4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132 ctdb_addr_to_str(state->addr),
134 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
139 if (!state->vnn->takeover_ctx) {
140 state->vnn->takeover_ctx = talloc_new(ctdb);
141 if (!state->vnn->takeover_ctx) {
146 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147 if (!arp) goto failed;
150 arp->addr = *state->addr;
151 arp->vnn = state->vnn;
153 tcparray = state->vnn->tcp_array;
155 /* add all of the known tcp connections for this IP to the
156 list of tcp connections to send tickle acks for */
157 arp->tcparray = talloc_steal(arp, tcparray);
159 state->vnn->tcp_array = NULL;
160 state->vnn->tcp_update_needed = true;
163 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
164 timeval_zero(), ctdb_control_send_arp, arp);
166 /* the control succeeded */
167 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
172 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
178 Find the vnn of the node that has a public ip address
179 returns -1 if the address is not known as a public address
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
183 struct ctdb_vnn *vnn;
185 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186 if (ctdb_same_ip(&vnn->public_address, addr)) {
196 take over an ip address
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
199 struct ctdb_req_control *c,
204 struct takeover_callback_state *state;
205 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206 struct ctdb_vnn *vnn;
208 /* update out vnn list */
209 vnn = find_public_ip_vnn(ctdb, &pip->addr);
211 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
212 ctdb_addr_to_str(&pip->addr)));
217 /* if our kernel already has this IP, do nothing */
218 if (ctdb_sys_have_ip(&pip->addr)) {
222 state = talloc(ctdb, struct takeover_callback_state);
223 CTDB_NO_MEMORY(ctdb, state);
225 state->c = talloc_steal(ctdb, c);
226 state->addr = talloc(ctdb, ctdb_sock_addr);
227 CTDB_NO_MEMORY(ctdb, state->addr);
229 *state->addr = pip->addr;
232 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
233 ctdb_addr_to_str(&pip->addr),
234 vnn->public_netmask_bits,
237 ret = ctdb_event_script_callback(ctdb,
238 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239 state, takeover_ip_callback, state,
242 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243 vnn->public_netmask_bits);
246 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247 ctdb_addr_to_str(&pip->addr),
253 /* tell ctdb_control.c that we will be replying asynchronously */
260 takeover an ip address old v4 style
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
263 struct ctdb_req_control *c,
269 data.dsize = sizeof(struct ctdb_public_ip);
270 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271 CTDB_NO_MEMORY(ctdb, data.dptr);
273 memcpy(data.dptr, indata.dptr, indata.dsize);
274 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
278 kill any clients that are registered with a IP that is being released
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
282 struct ctdb_client_ip *ip;
284 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285 ctdb_addr_to_str(addr)));
287 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288 ctdb_sock_addr tmp_addr;
291 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
293 ctdb_addr_to_str(&ip->addr)));
295 if (ctdb_same_ip(&tmp_addr, addr)) {
296 struct ctdb_client *client = ctdb_reqid_find(ctdb,
299 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
301 ctdb_addr_to_str(&ip->addr),
304 if (client->pid != 0) {
305 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306 (unsigned)client->pid,
307 ctdb_addr_to_str(addr),
309 kill(client->pid, SIGKILL);
316 called when releaseip event finishes
318 static void release_ip_callback(struct ctdb_context *ctdb, int status,
321 struct takeover_callback_state *state =
322 talloc_get_type(private_data, struct takeover_callback_state);
325 /* send a message to all clients of this node telling them
326 that the cluster has been reconfigured and they should
327 release any sockets on this IP */
328 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
330 data.dsize = strlen((char *)data.dptr)+1;
332 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
334 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
336 /* kill clients that have registered with this IP */
337 release_kill_clients(ctdb, state->addr);
339 /* the control succeeded */
340 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
345 release an ip address
347 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
348 struct ctdb_req_control *c,
353 struct takeover_callback_state *state;
354 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
355 struct ctdb_vnn *vnn;
357 /* update our vnn list */
358 vnn = find_public_ip_vnn(ctdb, &pip->addr);
360 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
361 ctdb_addr_to_str(&pip->addr)));
366 /* stop any previous arps */
367 talloc_free(vnn->takeover_ctx);
368 vnn->takeover_ctx = NULL;
370 if (!ctdb_sys_have_ip(&pip->addr)) {
371 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
372 ctdb_addr_to_str(&pip->addr),
373 vnn->public_netmask_bits,
378 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n",
379 ctdb_addr_to_str(&pip->addr),
380 vnn->public_netmask_bits,
383 state = talloc(ctdb, struct takeover_callback_state);
384 CTDB_NO_MEMORY(ctdb, state);
386 state->c = talloc_steal(state, c);
387 state->addr = talloc(state, ctdb_sock_addr);
388 CTDB_NO_MEMORY(ctdb, state->addr);
389 *state->addr = pip->addr;
392 ret = ctdb_event_script_callback(ctdb,
393 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
394 state, release_ip_callback, state,
395 "releaseip %s %s %u",
397 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
398 vnn->public_netmask_bits);
400 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
401 ctdb_addr_to_str(&pip->addr),
407 /* tell the control that we will be reply asynchronously */
413 release an ip address old v4 style
415 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
416 struct ctdb_req_control *c,
422 data.dsize = sizeof(struct ctdb_public_ip);
423 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
424 CTDB_NO_MEMORY(ctdb, data.dptr);
426 memcpy(data.dptr, indata.dptr, indata.dsize);
427 return ctdb_control_release_ip(ctdb, c, data, async_reply);
431 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
433 struct ctdb_vnn *vnn;
435 /* Verify that we dont have an entry for this ip yet */
436 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
437 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
438 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
439 ctdb_addr_to_str(addr)));
444 /* create a new vnn structure for this ip address */
445 vnn = talloc_zero(ctdb, struct ctdb_vnn);
446 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
447 vnn->iface = talloc_strdup(vnn, iface);
448 CTDB_NO_MEMORY(ctdb, vnn->iface);
449 vnn->public_address = *addr;
450 vnn->public_netmask_bits = mask;
453 DLIST_ADD(ctdb->vnn, vnn);
460 setup the event script directory
462 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
464 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
465 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
470 setup the public address lists from a file
472 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
478 lines = file_lines_load(alist, &nlines, ctdb);
480 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
483 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
487 for (i=0;i<nlines;i++) {
495 while ((*line == ' ') || (*line == '\t')) {
501 if (strcmp(line, "") == 0) {
504 tok = strtok(line, " \t");
506 tok = strtok(NULL, " \t");
508 if (NULL == ctdb->default_public_interface) {
509 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
514 iface = ctdb->default_public_interface;
519 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
520 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
524 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
525 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
538 struct ctdb_public_ip_list {
539 struct ctdb_public_ip_list *next;
545 /* Given a physical node, return the number of
546 public addresses that is currently assigned to this node.
548 static int node_ip_coverage(struct ctdb_context *ctdb,
550 struct ctdb_public_ip_list *ips)
554 for (;ips;ips=ips->next) {
555 if (ips->pnn == pnn) {
563 /* Check if this is a public ip known to the node, i.e. can that
564 node takeover this ip ?
566 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
567 struct ctdb_public_ip_list *ip)
569 struct ctdb_all_public_ips *public_ips;
572 public_ips = ctdb->nodes[pnn]->public_ips;
574 if (public_ips == NULL) {
578 for (i=0;i<public_ips->num;i++) {
579 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
580 /* yes, this node can serve this public ip */
589 /* search the node lists list for a node to takeover this ip.
590 pick the node that currently are serving the least number of ips
591 so that the ips get spread out evenly.
593 static int find_takeover_node(struct ctdb_context *ctdb,
594 struct ctdb_node_map *nodemap, uint32_t mask,
595 struct ctdb_public_ip_list *ip,
596 struct ctdb_public_ip_list *all_ips)
602 for (i=0;i<nodemap->num;i++) {
603 if (nodemap->nodes[i].flags & mask) {
604 /* This node is not healty and can not be used to serve
610 /* verify that this node can serve this ip */
611 if (can_node_serve_ip(ctdb, i, ip)) {
612 /* no it couldnt so skip to the next node */
616 num = node_ip_coverage(ctdb, i, all_ips);
617 /* was this the first node we checked ? */
629 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
630 ctdb_addr_to_str(&ip->addr)));
639 struct ctdb_public_ip_list *
640 add_ip_to_merged_list(struct ctdb_context *ctdb,
642 struct ctdb_public_ip_list *ip_list,
643 struct ctdb_public_ip *ip)
645 struct ctdb_public_ip_list *tmp_ip;
647 /* do we already have this ip in our merged list ?*/
648 for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
650 /* we already have this public ip in the list */
651 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
656 /* this is a new public ip, we must add it to the list */
657 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
658 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
659 tmp_ip->pnn = ip->pnn;
660 tmp_ip->addr = ip->addr;
661 tmp_ip->next = ip_list;
666 struct ctdb_public_ip_list *
667 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
670 struct ctdb_public_ip_list *ip_list = NULL;
671 struct ctdb_all_public_ips *public_ips;
673 for (i=0;i<ctdb->num_nodes;i++) {
674 public_ips = ctdb->nodes[i]->public_ips;
676 /* there were no public ips for this node */
677 if (public_ips == NULL) {
681 for (j=0;j<public_ips->num;j++) {
682 ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
683 ip_list, &public_ips->ips[j]);
691 make any IP alias changes for public addresses that are necessary
693 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
695 int i, num_healthy, retries;
696 struct ctdb_public_ip ip;
697 struct ctdb_public_ipv4 ipv4;
699 struct ctdb_public_ip_list *all_ips, *tmp_ip;
700 int maxnode, maxnum=0, minnode, minnum=0, num;
702 struct timeval timeout;
703 struct client_async_data *async_data;
704 struct ctdb_client_control_state *state;
705 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
710 /* Count how many completely healthy nodes we have */
712 for (i=0;i<nodemap->num;i++) {
713 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
718 if (num_healthy > 0) {
719 /* We have healthy nodes, so only consider them for
720 serving public addresses
722 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
724 /* We didnt have any completely healthy nodes so
725 use "disabled" nodes as a fallback
727 mask = NODE_FLAGS_INACTIVE;
730 /* since nodes only know about those public addresses that
731 can be served by that particular node, no single node has
732 a full list of all public addresses that exist in the cluster.
733 Walk over all node structures and create a merged list of
734 all public addresses that exist in the cluster.
736 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
738 /* If we want deterministic ip allocations, i.e. that the ip addresses
739 will always be allocated the same way for a specific set of
740 available/unavailable nodes.
742 if (1 == ctdb->tunable.deterministic_public_ips) {
743 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
744 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
745 tmp_ip->pnn = i%nodemap->num;
750 /* mark all public addresses with a masked node as being served by
753 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
754 if (tmp_ip->pnn == -1) {
757 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
762 /* verify that the assigned nodes can serve that public ip
763 and set it to -1 if not
765 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
766 if (tmp_ip->pnn == -1) {
769 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
770 /* this node can not serve this ip. */
776 /* now we must redistribute all public addresses with takeover node
777 -1 among the nodes available
781 /* loop over all ip's and find a physical node to cover for
784 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
785 if (tmp_ip->pnn == -1) {
786 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
787 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
788 ctdb_addr_to_str(&tmp_ip->addr)));
793 /* If we dont want ips to fail back after a node becomes healthy
794 again, we wont even try to reallocat the ip addresses so that
795 they are evenly spread out.
796 This can NOT be used at the same time as DeterministicIPs !
798 if (1 == ctdb->tunable.no_ip_failback) {
799 if (1 == ctdb->tunable.deterministic_public_ips) {
800 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
806 /* now, try to make sure the ip adresses are evenly distributed
808 for each ip address, loop over all nodes that can serve this
809 ip and make sure that the difference between the node
810 serving the most and the node serving the least ip's are not greater
813 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
814 if (tmp_ip->pnn == -1) {
818 /* Get the highest and lowest number of ips's served by any
819 valid node which can serve this ip.
823 for (i=0;i<nodemap->num;i++) {
824 if (nodemap->nodes[i].flags & mask) {
828 /* only check nodes that can actually serve this ip */
829 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
830 /* no it couldnt so skip to the next node */
834 num = node_ip_coverage(ctdb, i, all_ips);
855 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
856 ctdb_addr_to_str(&tmp_ip->addr)));
861 /* If we want deterministic IPs then dont try to reallocate
862 them to spread out the load.
864 if (1 == ctdb->tunable.deterministic_public_ips) {
868 /* if the spread between the smallest and largest coverage by
869 a node is >=2 we steal one of the ips from the node with
870 most coverage to even things out a bit.
871 try to do this at most 5 times since we dont want to spend
872 too much time balancing the ip coverage.
874 if ( (maxnum > minnum+1)
876 struct ctdb_public_ip_list *tmp;
878 /* mark one of maxnode's vnn's as unassigned and try
881 for (tmp=all_ips;tmp;tmp=tmp->next) {
882 if (tmp->pnn == maxnode) {
892 /* finished distributing the public addresses, now just send the
893 info out to the nodes
897 /* at this point ->pnn is the node which will own each IP
898 or -1 if there is no node that can cover this ip
901 /* now tell all nodes to delete any alias that they should not
902 have. This will be a NOOP on nodes that don't currently
903 hold the given alias */
904 async_data = talloc_zero(tmp_ctx, struct client_async_data);
905 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
907 for (i=0;i<nodemap->num;i++) {
908 /* don't talk to unconnected nodes, but do talk to banned nodes */
909 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
913 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
914 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
915 /* This node should be serving this
916 vnn so dont tell it to release the ip
920 if (tmp_ip->addr.sa.sa_family == AF_INET) {
921 ipv4.pnn = tmp_ip->pnn;
922 ipv4.sin = tmp_ip->addr.ip;
924 timeout = TAKEOVER_TIMEOUT();
925 data.dsize = sizeof(ipv4);
926 data.dptr = (uint8_t *)&ipv4;
927 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
928 0, CTDB_CONTROL_RELEASE_IPv4, 0,
932 ip.pnn = tmp_ip->pnn;
933 ip.addr = tmp_ip->addr;
935 timeout = TAKEOVER_TIMEOUT();
936 data.dsize = sizeof(ip);
937 data.dptr = (uint8_t *)&ip;
938 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
939 0, CTDB_CONTROL_RELEASE_IP, 0,
945 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
946 talloc_free(tmp_ctx);
950 ctdb_client_async_add(async_data, state);
953 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
954 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
955 talloc_free(tmp_ctx);
958 talloc_free(async_data);
961 /* tell all nodes to get their own IPs */
962 async_data = talloc_zero(tmp_ctx, struct client_async_data);
963 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
964 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
965 if (tmp_ip->pnn == -1) {
966 /* this IP won't be taken over */
970 if (tmp_ip->addr.sa.sa_family == AF_INET) {
971 ipv4.pnn = tmp_ip->pnn;
972 ipv4.sin = tmp_ip->addr.ip;
974 timeout = TAKEOVER_TIMEOUT();
975 data.dsize = sizeof(ipv4);
976 data.dptr = (uint8_t *)&ipv4;
977 state = ctdb_control_send(ctdb, tmp_ip->pnn,
978 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
982 ip.pnn = tmp_ip->pnn;
983 ip.addr = tmp_ip->addr;
985 timeout = TAKEOVER_TIMEOUT();
986 data.dsize = sizeof(ip);
987 data.dptr = (uint8_t *)&ip;
988 state = ctdb_control_send(ctdb, tmp_ip->pnn,
989 0, CTDB_CONTROL_TAKEOVER_IP, 0,
994 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
995 talloc_free(tmp_ctx);
999 ctdb_client_async_add(async_data, state);
1001 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1002 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1003 talloc_free(tmp_ctx);
1007 talloc_free(tmp_ctx);
1013 destroy a ctdb_client_ip structure
1015 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1017 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1018 ctdb_addr_to_str(&ip->addr),
1019 ntohs(ip->addr.ip.sin_port),
1022 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1027 called by a client to inform us of a TCP connection that it is managing
1028 that should tickled with an ACK when IP takeover is done
1029 we handle both the old ipv4 style of packets as well as the new ipv4/6
1032 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1035 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1036 struct ctdb_control_tcp *old_addr = NULL;
1037 struct ctdb_control_tcp_addr new_addr;
1038 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1039 struct ctdb_tcp_list *tcp;
1040 struct ctdb_control_tcp_vnn t;
1043 struct ctdb_client_ip *ip;
1044 struct ctdb_vnn *vnn;
1045 ctdb_sock_addr addr;
1047 switch (indata.dsize) {
1048 case sizeof(struct ctdb_control_tcp):
1049 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1050 ZERO_STRUCT(new_addr);
1051 tcp_sock = &new_addr;
1052 tcp_sock->src.ip = old_addr->src;
1053 tcp_sock->dest.ip = old_addr->dest;
1055 case sizeof(struct ctdb_control_tcp_addr):
1056 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1059 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
1063 addr = tcp_sock->src;
1064 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1065 addr = tcp_sock->dest;
1066 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1069 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1070 vnn = find_public_ip_vnn(ctdb, &addr);
1072 switch (addr.sa.sa_family) {
1074 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1075 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1076 ctdb_addr_to_str(&addr)));
1080 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1081 ctdb_addr_to_str(&addr)));
1084 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1090 if (vnn->pnn != ctdb->pnn) {
1091 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1092 ctdb_addr_to_str(&addr),
1093 client_id, client->pid));
1094 /* failing this call will tell smbd to die */
1098 ip = talloc(client, struct ctdb_client_ip);
1099 CTDB_NO_MEMORY(ctdb, ip);
1103 ip->client_id = client_id;
1104 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1105 DLIST_ADD(ctdb->client_ip_list, ip);
1107 tcp = talloc(client, struct ctdb_tcp_list);
1108 CTDB_NO_MEMORY(ctdb, tcp);
1110 tcp->connection.src_addr = tcp_sock->src;
1111 tcp->connection.dst_addr = tcp_sock->dest;
1113 DLIST_ADD(client->tcp_list, tcp);
1115 t.src = tcp_sock->src;
1116 t.dest = tcp_sock->dest;
1118 data.dptr = (uint8_t *)&t;
1119 data.dsize = sizeof(t);
1121 switch (addr.sa.sa_family) {
1123 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1124 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1125 ctdb_addr_to_str(&tcp_sock->src),
1126 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1129 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1130 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1131 ctdb_addr_to_str(&tcp_sock->src),
1132 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1135 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1139 /* tell all nodes about this tcp connection */
1140 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1141 CTDB_CONTROL_TCP_ADD,
1142 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1144 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1152 find a tcp address on a list
1154 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1155 struct ctdb_tcp_connection *tcp)
1159 if (array == NULL) {
1163 for (i=0;i<array->num;i++) {
1164 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1165 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1166 return &array->connections[i];
1173 called by a daemon to inform us of a TCP connection that one of its
1174 clients managing that should tickled with an ACK when IP takeover is
1177 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1179 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1180 struct ctdb_tcp_array *tcparray;
1181 struct ctdb_tcp_connection tcp;
1182 struct ctdb_vnn *vnn;
1184 vnn = find_public_ip_vnn(ctdb, &p->dest);
1186 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1187 ctdb_addr_to_str(&p->dest)));
1193 tcparray = vnn->tcp_array;
1195 /* If this is the first tickle */
1196 if (tcparray == NULL) {
1197 tcparray = talloc_size(ctdb->nodes,
1198 offsetof(struct ctdb_tcp_array, connections) +
1199 sizeof(struct ctdb_tcp_connection) * 1);
1200 CTDB_NO_MEMORY(ctdb, tcparray);
1201 vnn->tcp_array = tcparray;
1204 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1205 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1207 tcparray->connections[tcparray->num].src_addr = p->src;
1208 tcparray->connections[tcparray->num].dst_addr = p->dest;
1214 /* Do we already have this tickle ?*/
1215 tcp.src_addr = p->src;
1216 tcp.dst_addr = p->dest;
1217 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1218 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1219 ctdb_addr_to_str(&tcp.dst_addr),
1220 ntohs(tcp.dst_addr.ip.sin_port),
1225 /* A new tickle, we must add it to the array */
1226 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1227 struct ctdb_tcp_connection,
1229 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1231 vnn->tcp_array = tcparray;
1232 tcparray->connections[tcparray->num].src_addr = p->src;
1233 tcparray->connections[tcparray->num].dst_addr = p->dest;
1236 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1237 ctdb_addr_to_str(&tcp.dst_addr),
1238 ntohs(tcp.dst_addr.ip.sin_port),
1246 called by a daemon to inform us of a TCP connection that one of its
1247 clients managing that should tickled with an ACK when IP takeover is
1250 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1252 struct ctdb_tcp_connection *tcpp;
1253 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1256 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1257 ctdb_addr_to_str(&conn->dst_addr)));
1261 /* if the array is empty we cant remove it
1262 and we dont need to do anything
1264 if (vnn->tcp_array == NULL) {
1265 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1266 ctdb_addr_to_str(&conn->dst_addr),
1267 ntohs(conn->dst_addr.ip.sin_port)));
1272 /* See if we know this connection
1273 if we dont know this connection then we dont need to do anything
1275 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1277 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1278 ctdb_addr_to_str(&conn->dst_addr),
1279 ntohs(conn->dst_addr.ip.sin_port)));
1284 /* We need to remove this entry from the array.
1285 Instead of allocating a new array and copying data to it
1286 we cheat and just copy the last entry in the existing array
1287 to the entry that is to be removed and just shring the
1290 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1291 vnn->tcp_array->num--;
1293 /* If we deleted the last entry we also need to remove the entire array
1295 if (vnn->tcp_array->num == 0) {
1296 talloc_free(vnn->tcp_array);
1297 vnn->tcp_array = NULL;
1300 vnn->tcp_update_needed = true;
1302 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1303 ctdb_addr_to_str(&conn->src_addr),
1304 ntohs(conn->src_addr.ip.sin_port)));
1309 called when a daemon restarts - send all tickes for all public addresses
1310 we are serving immediately to the new node.
1312 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1314 /*XXX here we should send all tickes we are serving to the new node */
1320 called when a client structure goes away - hook to remove
1321 elements from the tcp_list in all daemons
1323 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1325 while (client->tcp_list) {
1326 struct ctdb_tcp_list *tcp = client->tcp_list;
1327 DLIST_REMOVE(client->tcp_list, tcp);
1328 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1334 release all IPs on shutdown
1336 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1338 struct ctdb_vnn *vnn;
1340 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1341 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1344 if (vnn->pnn == ctdb->pnn) {
1347 ctdb_event_script(ctdb, "releaseip %s %s %u",
1349 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1350 vnn->public_netmask_bits);
1351 release_kill_clients(ctdb, &vnn->public_address);
1357 get list of public IPs
1359 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1360 struct ctdb_req_control *c, TDB_DATA *outdata)
1363 struct ctdb_all_public_ips *ips;
1364 struct ctdb_vnn *vnn;
1366 /* count how many public ip structures we have */
1368 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1372 len = offsetof(struct ctdb_all_public_ips, ips) +
1373 num*sizeof(struct ctdb_public_ip);
1374 ips = talloc_zero_size(outdata, len);
1375 CTDB_NO_MEMORY(ctdb, ips);
1377 outdata->dsize = len;
1378 outdata->dptr = (uint8_t *)ips;
1382 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1383 ips->ips[i].pnn = vnn->pnn;
1384 ips->ips[i].addr = vnn->public_address;
1393 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1395 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1396 struct ctdb_req_control *c, TDB_DATA *outdata)
1399 struct ctdb_all_public_ipsv4 *ips;
1400 struct ctdb_vnn *vnn;
1402 /* count how many public ip structures we have */
1404 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1405 if (vnn->public_address.sa.sa_family != AF_INET) {
1411 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1412 num*sizeof(struct ctdb_public_ipv4);
1413 ips = talloc_zero_size(outdata, len);
1414 CTDB_NO_MEMORY(ctdb, ips);
1416 outdata->dsize = len;
1417 outdata->dptr = (uint8_t *)ips;
1421 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1422 if (vnn->public_address.sa.sa_family != AF_INET) {
1425 ips->ips[i].pnn = vnn->pnn;
1426 ips->ips[i].sin = vnn->public_address.ip;
1435 structure containing the listening socket and the list of tcp connections
1436 that the ctdb daemon is to kill
1438 struct ctdb_kill_tcp {
1439 struct ctdb_vnn *vnn;
1440 struct ctdb_context *ctdb;
1442 struct fd_event *fde;
1443 trbt_tree_t *connections;
1448 a tcp connection that is to be killed
1450 struct ctdb_killtcp_con {
1451 ctdb_sock_addr src_addr;
1452 ctdb_sock_addr dst_addr;
1454 struct ctdb_kill_tcp *killtcp;
1457 /* this function is used to create a key to represent this socketpair
1458 in the killtcp tree.
1459 this key is used to insert and lookup matching socketpairs that are
1460 to be tickled and RST
1462 #define KILLTCP_KEYLEN 10
1463 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1465 static uint32_t key[KILLTCP_KEYLEN];
1467 bzero(key, sizeof(key));
1469 if (src->sa.sa_family != dst->sa.sa_family) {
1470 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1474 switch (src->sa.sa_family) {
1476 key[0] = dst->ip.sin_addr.s_addr;
1477 key[1] = src->ip.sin_addr.s_addr;
1478 key[2] = dst->ip.sin_port;
1479 key[3] = src->ip.sin_port;
1482 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1483 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1484 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1485 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1486 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1487 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1488 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1489 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1490 key[8] = dst->ip6.sin6_port;
1491 key[9] = src->ip6.sin6_port;
1494 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1502 called when we get a read event on the raw socket
1504 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1505 uint16_t flags, void *private_data)
1507 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1508 struct ctdb_killtcp_con *con;
1509 ctdb_sock_addr src, dst;
1510 uint32_t ack_seq, seq;
1512 if (!(flags & EVENT_FD_READ)) {
1516 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1517 killtcp->private_data,
1519 &ack_seq, &seq) != 0) {
1520 /* probably a non-tcp ACK packet */
1524 /* check if we have this guy in our list of connections
1527 con = trbt_lookuparray32(killtcp->connections,
1528 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1530 /* no this was some other packet we can just ignore */
1534 /* This one has been tickled !
1535 now reset him and remove him from the list.
1537 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1538 ntohs(con->dst_addr.ip.sin_port),
1539 ctdb_addr_to_str(&con->src_addr),
1540 ntohs(con->src_addr.ip.sin_port)));
1542 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1547 /* when traversing the list of all tcp connections to send tickle acks to
1548 (so that we can capture the ack coming back and kill the connection
1550 this callback is called for each connection we are currently trying to kill
1552 static void tickle_connection_traverse(void *param, void *data)
1554 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1556 /* have tried too many times, just give up */
1557 if (con->count >= 5) {
1562 /* othervise, try tickling it again */
1565 (ctdb_sock_addr *)&con->dst_addr,
1566 (ctdb_sock_addr *)&con->src_addr,
1572 called every second until all sentenced connections have been reset
1574 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1575 struct timeval t, void *private_data)
1577 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1580 /* loop over all connections sending tickle ACKs */
1581 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1584 /* If there are no more connections to kill we can remove the
1585 entire killtcp structure
1587 if ( (killtcp->connections == NULL) ||
1588 (killtcp->connections->root == NULL) ) {
1589 talloc_free(killtcp);
1593 /* try tickling them again in a seconds time
1595 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1596 ctdb_tickle_sentenced_connections, killtcp);
1600 destroy the killtcp structure
1602 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1604 killtcp->vnn->killtcp = NULL;
1609 /* nothing fancy here, just unconditionally replace any existing
1610 connection structure with the new one.
1612 dont even free the old one if it did exist, that one is talloc_stolen
1613 by the same node in the tree anyway and will be deleted when the new data
1616 static void *add_killtcp_callback(void *parm, void *data)
1622 add a tcp socket to the list of connections we want to RST
1624 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1628 ctdb_sock_addr src, dst;
1629 struct ctdb_kill_tcp *killtcp;
1630 struct ctdb_killtcp_con *con;
1631 struct ctdb_vnn *vnn;
1633 ctdb_canonicalize_ip(s, &src);
1634 ctdb_canonicalize_ip(d, &dst);
1636 vnn = find_public_ip_vnn(ctdb, &dst);
1638 vnn = find_public_ip_vnn(ctdb, &src);
1641 /* if it is not a public ip it could be our 'single ip' */
1642 if (ctdb->single_ip_vnn) {
1643 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1644 vnn = ctdb->single_ip_vnn;
1649 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1653 killtcp = vnn->killtcp;
1655 /* If this is the first connection to kill we must allocate
1658 if (killtcp == NULL) {
1659 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1660 CTDB_NO_MEMORY(ctdb, killtcp);
1663 killtcp->ctdb = ctdb;
1664 killtcp->capture_fd = -1;
1665 killtcp->connections = trbt_create(killtcp, 0);
1667 vnn->killtcp = killtcp;
1668 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1673 /* create a structure that describes this connection we want to
1674 RST and store it in killtcp->connections
1676 con = talloc(killtcp, struct ctdb_killtcp_con);
1677 CTDB_NO_MEMORY(ctdb, con);
1678 con->src_addr = src;
1679 con->dst_addr = dst;
1681 con->killtcp = killtcp;
1684 trbt_insertarray32_callback(killtcp->connections,
1685 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1686 add_killtcp_callback, con);
1689 If we dont have a socket to listen on yet we must create it
1691 if (killtcp->capture_fd == -1) {
1692 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1693 if (killtcp->capture_fd == -1) {
1694 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1700 if (killtcp->fde == NULL) {
1701 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1702 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1703 capture_tcp_handler, killtcp);
1705 /* We also need to set up some events to tickle all these connections
1706 until they are all reset
1708 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1709 ctdb_tickle_sentenced_connections, killtcp);
1712 /* tickle him once now */
1721 talloc_free(vnn->killtcp);
1722 vnn->killtcp = NULL;
1727 kill a TCP connection.
1729 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1731 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1733 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1737 called by a daemon to inform us of the entire list of TCP tickles for
1738 a particular public address.
1739 this control should only be sent by the node that is currently serving
1740 that public address.
1742 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1744 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1745 struct ctdb_tcp_array *tcparray;
1746 struct ctdb_vnn *vnn;
1748 /* We must at least have tickles.num or else we cant verify the size
1749 of the received data blob
1751 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1752 tickles.connections)) {
1753 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1757 /* verify that the size of data matches what we expect */
1758 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1759 tickles.connections)
1760 + sizeof(struct ctdb_tcp_connection)
1761 * list->tickles.num) {
1762 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1766 vnn = find_public_ip_vnn(ctdb, &list->addr);
1768 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1769 ctdb_addr_to_str(&list->addr)));
1774 /* remove any old ticklelist we might have */
1775 talloc_free(vnn->tcp_array);
1776 vnn->tcp_array = NULL;
1778 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1779 CTDB_NO_MEMORY(ctdb, tcparray);
1781 tcparray->num = list->tickles.num;
1783 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1784 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1786 memcpy(tcparray->connections, &list->tickles.connections[0],
1787 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1789 /* We now have a new fresh tickle list array for this vnn */
1790 vnn->tcp_array = talloc_steal(vnn, tcparray);
1796 called to return the full list of tickles for the puclic address associated
1797 with the provided vnn
1799 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1801 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1802 struct ctdb_control_tcp_tickle_list *list;
1803 struct ctdb_tcp_array *tcparray;
1805 struct ctdb_vnn *vnn;
1807 vnn = find_public_ip_vnn(ctdb, addr);
1809 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1810 ctdb_addr_to_str(addr)));
1815 tcparray = vnn->tcp_array;
1817 num = tcparray->num;
1822 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1823 tickles.connections)
1824 + sizeof(struct ctdb_tcp_connection) * num;
1826 outdata->dptr = talloc_size(outdata, outdata->dsize);
1827 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1828 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1831 list->tickles.num = num;
1833 memcpy(&list->tickles.connections[0], tcparray->connections,
1834 sizeof(struct ctdb_tcp_connection) * num);
1842 set the list of all tcp tickles for a public address
1844 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1845 struct timeval timeout, uint32_t destnode,
1846 ctdb_sock_addr *addr,
1847 struct ctdb_tcp_array *tcparray)
1851 struct ctdb_control_tcp_tickle_list *list;
1854 num = tcparray->num;
1859 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1860 tickles.connections) +
1861 sizeof(struct ctdb_tcp_connection) * num;
1862 data.dptr = talloc_size(ctdb, data.dsize);
1863 CTDB_NO_MEMORY(ctdb, data.dptr);
1865 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1867 list->tickles.num = num;
1869 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1872 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1873 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1874 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1876 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1880 talloc_free(data.dptr);
1887 perform tickle updates if required
1889 static void ctdb_update_tcp_tickles(struct event_context *ev,
1890 struct timed_event *te,
1891 struct timeval t, void *private_data)
1893 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1895 struct ctdb_vnn *vnn;
1897 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1898 /* we only send out updates for public addresses that
1901 if (ctdb->pnn != vnn->pnn) {
1904 /* We only send out the updates if we need to */
1905 if (!vnn->tcp_update_needed) {
1908 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1910 CTDB_BROADCAST_CONNECTED,
1911 &vnn->public_address,
1914 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1915 ctdb_addr_to_str(&vnn->public_address)));
1919 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1920 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1921 ctdb_update_tcp_tickles, ctdb);
1926 start periodic update of tcp tickles
1928 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1930 ctdb->tickle_update_context = talloc_new(ctdb);
1932 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1933 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1934 ctdb_update_tcp_tickles, ctdb);
1940 struct control_gratious_arp {
1941 struct ctdb_context *ctdb;
1942 ctdb_sock_addr addr;
1948 send a control_gratuitous arp
1950 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1951 struct timeval t, void *private_data)
1954 struct control_gratious_arp *arp = talloc_get_type(private_data,
1955 struct control_gratious_arp);
1957 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1959 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1964 if (arp->count == CTDB_ARP_REPEAT) {
1969 event_add_timed(arp->ctdb->ev, arp,
1970 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
1971 send_gratious_arp, arp);
1978 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1980 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1981 struct control_gratious_arp *arp;
1983 /* verify the size of indata */
1984 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1985 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
1986 (unsigned)indata.dsize,
1987 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1991 ( offsetof(struct ctdb_control_gratious_arp, iface)
1992 + gratious_arp->len ) ){
1994 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1995 "but should be %u bytes\n",
1996 (unsigned)indata.dsize,
1997 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2002 arp = talloc(ctdb, struct control_gratious_arp);
2003 CTDB_NO_MEMORY(ctdb, arp);
2006 arp->addr = gratious_arp->addr;
2007 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2008 CTDB_NO_MEMORY(ctdb, arp->iface);
2011 event_add_timed(arp->ctdb->ev, arp,
2012 timeval_zero(), send_gratious_arp, arp);
2017 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2019 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2022 /* verify the size of indata */
2023 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2024 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2028 ( offsetof(struct ctdb_control_ip_iface, iface)
2031 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2032 "but should be %u bytes\n",
2033 (unsigned)indata.dsize,
2034 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2038 return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2042 called when releaseip event finishes for del_public_address
2044 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2047 talloc_free(private_data);
2050 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2052 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2053 struct ctdb_vnn *vnn;
2056 /* verify the size of indata */
2057 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2058 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2062 ( offsetof(struct ctdb_control_ip_iface, iface)
2065 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2066 "but should be %u bytes\n",
2067 (unsigned)indata.dsize,
2068 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2072 /* walk over all public addresses until we find a match */
2073 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2074 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2075 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2077 DLIST_REMOVE(ctdb->vnn, vnn);
2079 ret = ctdb_event_script_callback(ctdb,
2080 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2081 mem_ctx, delete_ip_callback, mem_ctx,
2082 "releaseip %s %s %u",
2084 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2085 vnn->public_netmask_bits);