4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 if (status == -ETIME) {
134 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
135 ctdb_addr_to_str(state->addr),
137 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
142 if (!state->vnn->takeover_ctx) {
143 state->vnn->takeover_ctx = talloc_new(state->vnn);
144 if (!state->vnn->takeover_ctx) {
149 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
150 if (!arp) goto failed;
153 arp->addr = *state->addr;
154 arp->vnn = state->vnn;
156 tcparray = state->vnn->tcp_array;
158 /* add all of the known tcp connections for this IP to the
159 list of tcp connections to send tickle acks for */
160 arp->tcparray = talloc_steal(arp, tcparray);
162 state->vnn->tcp_array = NULL;
163 state->vnn->tcp_update_needed = true;
166 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
167 timeval_zero(), ctdb_control_send_arp, arp);
169 /* the control succeeded */
170 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
175 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
181 Find the vnn of the node that has a public ip address
182 returns -1 if the address is not known as a public address
184 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
186 struct ctdb_vnn *vnn;
188 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
189 if (ctdb_same_ip(&vnn->public_address, addr)) {
199 take over an ip address
201 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
202 struct ctdb_req_control *c,
207 struct takeover_callback_state *state;
208 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
209 struct ctdb_vnn *vnn;
211 /* update out vnn list */
212 vnn = find_public_ip_vnn(ctdb, &pip->addr);
214 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
215 ctdb_addr_to_str(&pip->addr)));
220 /* if our kernel already has this IP, do nothing */
221 if (ctdb_sys_have_ip(&pip->addr)) {
225 state = talloc(vnn, struct takeover_callback_state);
226 CTDB_NO_MEMORY(ctdb, state);
228 state->c = talloc_steal(ctdb, c);
229 state->addr = talloc(ctdb, ctdb_sock_addr);
230 CTDB_NO_MEMORY(ctdb, state->addr);
232 *state->addr = pip->addr;
235 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
236 ctdb_addr_to_str(&pip->addr),
237 vnn->public_netmask_bits,
240 ret = ctdb_event_script_callback(ctdb,
241 state, takeover_ip_callback, state,
246 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
247 vnn->public_netmask_bits);
250 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
251 ctdb_addr_to_str(&pip->addr),
257 /* tell ctdb_control.c that we will be replying asynchronously */
264 takeover an ip address old v4 style
266 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
267 struct ctdb_req_control *c,
273 data.dsize = sizeof(struct ctdb_public_ip);
274 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
275 CTDB_NO_MEMORY(ctdb, data.dptr);
277 memcpy(data.dptr, indata.dptr, indata.dsize);
278 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
282 kill any clients that are registered with a IP that is being released
284 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
286 struct ctdb_client_ip *ip;
288 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
289 ctdb_addr_to_str(addr)));
291 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
292 ctdb_sock_addr tmp_addr;
295 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
297 ctdb_addr_to_str(&ip->addr)));
299 if (ctdb_same_ip(&tmp_addr, addr)) {
300 struct ctdb_client *client = ctdb_reqid_find(ctdb,
303 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
305 ctdb_addr_to_str(&ip->addr),
308 if (client->pid != 0) {
309 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
310 (unsigned)client->pid,
311 ctdb_addr_to_str(addr),
313 kill(client->pid, SIGKILL);
320 called when releaseip event finishes
322 static void release_ip_callback(struct ctdb_context *ctdb, int status,
325 struct takeover_callback_state *state =
326 talloc_get_type(private_data, struct takeover_callback_state);
329 if (status == -ETIME) {
333 /* send a message to all clients of this node telling them
334 that the cluster has been reconfigured and they should
335 release any sockets on this IP */
336 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
337 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
338 data.dsize = strlen((char *)data.dptr)+1;
340 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
342 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
344 /* kill clients that have registered with this IP */
345 release_kill_clients(ctdb, state->addr);
347 /* the control succeeded */
348 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
353 release an ip address
355 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
356 struct ctdb_req_control *c,
361 struct takeover_callback_state *state;
362 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
363 struct ctdb_vnn *vnn;
365 /* update our vnn list */
366 vnn = find_public_ip_vnn(ctdb, &pip->addr);
368 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
369 ctdb_addr_to_str(&pip->addr)));
374 /* stop any previous arps */
375 talloc_free(vnn->takeover_ctx);
376 vnn->takeover_ctx = NULL;
378 if (!ctdb_sys_have_ip(&pip->addr)) {
379 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
380 ctdb_addr_to_str(&pip->addr),
381 vnn->public_netmask_bits,
386 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%u\n",
387 ctdb_addr_to_str(&pip->addr),
388 vnn->public_netmask_bits,
392 state = talloc(ctdb, struct takeover_callback_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(state, c);
396 state->addr = talloc(state, ctdb_sock_addr);
397 CTDB_NO_MEMORY(ctdb, state->addr);
398 *state->addr = pip->addr;
401 ret = ctdb_event_script_callback(ctdb,
402 state, release_ip_callback, state,
404 CTDB_EVENT_RELEASE_IP,
407 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
408 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
411 ctdb_addr_to_str(&pip->addr),
417 /* tell the control that we will be reply asynchronously */
423 release an ip address old v4 style
425 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
426 struct ctdb_req_control *c,
432 data.dsize = sizeof(struct ctdb_public_ip);
433 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
434 CTDB_NO_MEMORY(ctdb, data.dptr);
436 memcpy(data.dptr, indata.dptr, indata.dsize);
437 return ctdb_control_release_ip(ctdb, c, data, async_reply);
441 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
443 struct ctdb_vnn *vnn;
445 /* Verify that we dont have an entry for this ip yet */
446 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
447 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
448 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
449 ctdb_addr_to_str(addr)));
454 /* create a new vnn structure for this ip address */
455 vnn = talloc_zero(ctdb, struct ctdb_vnn);
456 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
457 vnn->iface = talloc_strdup(vnn, iface);
458 CTDB_NO_MEMORY(ctdb, vnn->iface);
459 vnn->public_address = *addr;
460 vnn->public_netmask_bits = mask;
463 DLIST_ADD(ctdb->vnn, vnn);
470 setup the event script directory
472 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
474 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
475 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
480 setup the public address lists from a file
482 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
488 lines = file_lines_load(alist, &nlines, ctdb);
490 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
493 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
497 for (i=0;i<nlines;i++) {
505 while ((*line == ' ') || (*line == '\t')) {
511 if (strcmp(line, "") == 0) {
514 tok = strtok(line, " \t");
516 tok = strtok(NULL, " \t");
518 if (NULL == ctdb->default_public_interface) {
519 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
524 iface = ctdb->default_public_interface;
529 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
530 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
534 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
535 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
548 struct ctdb_public_ip_list {
549 struct ctdb_public_ip_list *next;
555 /* Given a physical node, return the number of
556 public addresses that is currently assigned to this node.
558 static int node_ip_coverage(struct ctdb_context *ctdb,
560 struct ctdb_public_ip_list *ips)
564 for (;ips;ips=ips->next) {
565 if (ips->pnn == pnn) {
573 /* Check if this is a public ip known to the node, i.e. can that
574 node takeover this ip ?
576 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
577 struct ctdb_public_ip_list *ip)
579 struct ctdb_all_public_ips *public_ips;
582 public_ips = ctdb->nodes[pnn]->public_ips;
584 if (public_ips == NULL) {
588 for (i=0;i<public_ips->num;i++) {
589 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
590 /* yes, this node can serve this public ip */
599 /* search the node lists list for a node to takeover this ip.
600 pick the node that currently are serving the least number of ips
601 so that the ips get spread out evenly.
603 static int find_takeover_node(struct ctdb_context *ctdb,
604 struct ctdb_node_map *nodemap, uint32_t mask,
605 struct ctdb_public_ip_list *ip,
606 struct ctdb_public_ip_list *all_ips)
612 for (i=0;i<nodemap->num;i++) {
613 if (nodemap->nodes[i].flags & mask) {
614 /* This node is not healty and can not be used to serve
620 /* verify that this node can serve this ip */
621 if (can_node_serve_ip(ctdb, i, ip)) {
622 /* no it couldnt so skip to the next node */
626 num = node_ip_coverage(ctdb, i, all_ips);
627 /* was this the first node we checked ? */
639 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
640 ctdb_addr_to_str(&ip->addr)));
650 static uint32_t *ip_key(ctdb_sock_addr *ip)
652 static uint32_t key[IP_KEYLEN];
654 bzero(key, sizeof(key));
656 switch (ip->sa.sa_family) {
658 key[3] = htonl(ip->ip.sin_addr.s_addr);
661 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
662 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
663 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
664 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
667 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
674 static void *add_ip_callback(void *parm, void *data)
679 void getips_count_callback(void *param, void *data)
681 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
682 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
684 new_ip->next = *ip_list;
688 static struct ctdb_public_ip_list *
689 create_merged_ip_list(struct ctdb_context *ctdb)
692 struct ctdb_public_ip_list *ip_list;
693 struct ctdb_all_public_ips *public_ips;
695 if (ctdb->ip_tree != NULL) {
696 talloc_free(ctdb->ip_tree);
697 ctdb->ip_tree = NULL;
699 ctdb->ip_tree = trbt_create(ctdb, 0);
701 for (i=0;i<ctdb->num_nodes;i++) {
702 public_ips = ctdb->nodes[i]->public_ips;
704 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
708 /* there were no public ips for this node */
709 if (public_ips == NULL) {
713 for (j=0;j<public_ips->num;j++) {
714 struct ctdb_public_ip_list *tmp_ip;
716 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
717 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
718 tmp_ip->pnn = public_ips->ips[j].pnn;
719 tmp_ip->addr = public_ips->ips[j].addr;
722 trbt_insertarray32_callback(ctdb->ip_tree,
723 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
730 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
736 make any IP alias changes for public addresses that are necessary
738 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
740 int i, num_healthy, retries, num_ips;
741 struct ctdb_public_ip ip;
742 struct ctdb_public_ipv4 ipv4;
743 uint32_t mask, *nodes;
744 struct ctdb_public_ip_list *all_ips, *tmp_ip;
745 int maxnode, maxnum=0, minnode, minnum=0, num;
747 struct timeval timeout;
748 struct client_async_data *async_data;
749 struct ctdb_client_control_state *state;
750 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
754 /* Count how many completely healthy nodes we have */
756 for (i=0;i<nodemap->num;i++) {
757 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
762 if (num_healthy > 0) {
763 /* We have healthy nodes, so only consider them for
764 serving public addresses
766 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
768 /* We didnt have any completely healthy nodes so
769 use "disabled" nodes as a fallback
771 mask = NODE_FLAGS_INACTIVE;
774 /* since nodes only know about those public addresses that
775 can be served by that particular node, no single node has
776 a full list of all public addresses that exist in the cluster.
777 Walk over all node structures and create a merged list of
778 all public addresses that exist in the cluster.
780 keep the tree of ips around as ctdb->ip_tree
782 all_ips = create_merged_ip_list(ctdb);
784 /* Count how many ips we have */
786 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
790 /* If we want deterministic ip allocations, i.e. that the ip addresses
791 will always be allocated the same way for a specific set of
792 available/unavailable nodes.
794 if (1 == ctdb->tunable.deterministic_public_ips) {
795 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
796 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
797 tmp_ip->pnn = i%nodemap->num;
802 /* mark all public addresses with a masked node as being served by
805 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
806 if (tmp_ip->pnn == -1) {
809 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
814 /* verify that the assigned nodes can serve that public ip
815 and set it to -1 if not
817 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
818 if (tmp_ip->pnn == -1) {
821 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
822 /* this node can not serve this ip. */
828 /* now we must redistribute all public addresses with takeover node
829 -1 among the nodes available
833 /* loop over all ip's and find a physical node to cover for
836 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
837 if (tmp_ip->pnn == -1) {
838 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
839 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
840 ctdb_addr_to_str(&tmp_ip->addr)));
845 /* If we dont want ips to fail back after a node becomes healthy
846 again, we wont even try to reallocat the ip addresses so that
847 they are evenly spread out.
848 This can NOT be used at the same time as DeterministicIPs !
850 if (1 == ctdb->tunable.no_ip_failback) {
851 if (1 == ctdb->tunable.deterministic_public_ips) {
852 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
858 /* now, try to make sure the ip adresses are evenly distributed
860 for each ip address, loop over all nodes that can serve this
861 ip and make sure that the difference between the node
862 serving the most and the node serving the least ip's are not greater
865 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
866 if (tmp_ip->pnn == -1) {
870 /* Get the highest and lowest number of ips's served by any
871 valid node which can serve this ip.
875 for (i=0;i<nodemap->num;i++) {
876 if (nodemap->nodes[i].flags & mask) {
880 /* only check nodes that can actually serve this ip */
881 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
882 /* no it couldnt so skip to the next node */
886 num = node_ip_coverage(ctdb, i, all_ips);
907 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
908 ctdb_addr_to_str(&tmp_ip->addr)));
913 /* If we want deterministic IPs then dont try to reallocate
914 them to spread out the load.
916 if (1 == ctdb->tunable.deterministic_public_ips) {
920 /* if the spread between the smallest and largest coverage by
921 a node is >=2 we steal one of the ips from the node with
922 most coverage to even things out a bit.
923 try to do this a limited number of times since we dont
924 want to spend too much time balancing the ip coverage.
926 if ( (maxnum > minnum+1)
927 && (retries < (num_ips + 5)) ){
928 struct ctdb_public_ip_list *tmp;
930 /* mark one of maxnode's vnn's as unassigned and try
933 for (tmp=all_ips;tmp;tmp=tmp->next) {
934 if (tmp->pnn == maxnode) {
944 /* finished distributing the public addresses, now just send the
945 info out to the nodes
949 /* at this point ->pnn is the node which will own each IP
950 or -1 if there is no node that can cover this ip
953 /* now tell all nodes to delete any alias that they should not
954 have. This will be a NOOP on nodes that don't currently
955 hold the given alias */
956 async_data = talloc_zero(tmp_ctx, struct client_async_data);
957 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
959 for (i=0;i<nodemap->num;i++) {
960 /* don't talk to unconnected nodes, but do talk to banned nodes */
961 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
965 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
966 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
967 /* This node should be serving this
968 vnn so dont tell it to release the ip
972 if (tmp_ip->addr.sa.sa_family == AF_INET) {
973 ipv4.pnn = tmp_ip->pnn;
974 ipv4.sin = tmp_ip->addr.ip;
976 timeout = TAKEOVER_TIMEOUT();
977 data.dsize = sizeof(ipv4);
978 data.dptr = (uint8_t *)&ipv4;
979 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
980 0, CTDB_CONTROL_RELEASE_IPv4, 0,
984 ip.pnn = tmp_ip->pnn;
985 ip.addr = tmp_ip->addr;
987 timeout = TAKEOVER_TIMEOUT();
988 data.dsize = sizeof(ip);
989 data.dptr = (uint8_t *)&ip;
990 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
991 0, CTDB_CONTROL_RELEASE_IP, 0,
997 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
998 talloc_free(tmp_ctx);
1002 ctdb_client_async_add(async_data, state);
1005 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1006 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1007 talloc_free(tmp_ctx);
1010 talloc_free(async_data);
1013 /* tell all nodes to get their own IPs */
1014 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1015 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1016 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1017 if (tmp_ip->pnn == -1) {
1018 /* this IP won't be taken over */
1022 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1023 ipv4.pnn = tmp_ip->pnn;
1024 ipv4.sin = tmp_ip->addr.ip;
1026 timeout = TAKEOVER_TIMEOUT();
1027 data.dsize = sizeof(ipv4);
1028 data.dptr = (uint8_t *)&ipv4;
1029 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1030 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1034 ip.pnn = tmp_ip->pnn;
1035 ip.addr = tmp_ip->addr;
1037 timeout = TAKEOVER_TIMEOUT();
1038 data.dsize = sizeof(ip);
1039 data.dptr = (uint8_t *)&ip;
1040 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1041 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1045 if (state == NULL) {
1046 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1047 talloc_free(tmp_ctx);
1051 ctdb_client_async_add(async_data, state);
1053 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1054 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1055 talloc_free(tmp_ctx);
1060 /* tell all nodes to update natwg */
1061 /* send the flags update natgw on all connected nodes */
1062 data.dptr = discard_const("ipreallocated");
1063 data.dsize = strlen((char *)data.dptr) + 1;
1064 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1065 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1066 nodes, 0, TAKEOVER_TIMEOUT(),
1070 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1073 talloc_free(tmp_ctx);
1079 destroy a ctdb_client_ip structure
1081 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1083 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1084 ctdb_addr_to_str(&ip->addr),
1085 ntohs(ip->addr.ip.sin_port),
1088 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1093 called by a client to inform us of a TCP connection that it is managing
1094 that should tickled with an ACK when IP takeover is done
1095 we handle both the old ipv4 style of packets as well as the new ipv4/6
1098 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1101 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1102 struct ctdb_control_tcp *old_addr = NULL;
1103 struct ctdb_control_tcp_addr new_addr;
1104 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1105 struct ctdb_tcp_list *tcp;
1106 struct ctdb_control_tcp_vnn t;
1109 struct ctdb_client_ip *ip;
1110 struct ctdb_vnn *vnn;
1111 ctdb_sock_addr addr;
1113 switch (indata.dsize) {
1114 case sizeof(struct ctdb_control_tcp):
1115 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1116 ZERO_STRUCT(new_addr);
1117 tcp_sock = &new_addr;
1118 tcp_sock->src.ip = old_addr->src;
1119 tcp_sock->dest.ip = old_addr->dest;
1121 case sizeof(struct ctdb_control_tcp_addr):
1122 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1125 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1126 "to ctdb_control_tcp_client. size was %d but "
1127 "only allowed sizes are %lu and %lu\n",
1129 (long unsigned)sizeof(struct ctdb_control_tcp),
1130 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1134 addr = tcp_sock->src;
1135 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1136 addr = tcp_sock->dest;
1137 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1140 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1141 vnn = find_public_ip_vnn(ctdb, &addr);
1143 switch (addr.sa.sa_family) {
1145 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1146 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1147 ctdb_addr_to_str(&addr)));
1151 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1152 ctdb_addr_to_str(&addr)));
1155 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1161 if (vnn->pnn != ctdb->pnn) {
1162 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1163 ctdb_addr_to_str(&addr),
1164 client_id, client->pid));
1165 /* failing this call will tell smbd to die */
1169 ip = talloc(client, struct ctdb_client_ip);
1170 CTDB_NO_MEMORY(ctdb, ip);
1174 ip->client_id = client_id;
1175 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1176 DLIST_ADD(ctdb->client_ip_list, ip);
1178 tcp = talloc(client, struct ctdb_tcp_list);
1179 CTDB_NO_MEMORY(ctdb, tcp);
1181 tcp->connection.src_addr = tcp_sock->src;
1182 tcp->connection.dst_addr = tcp_sock->dest;
1184 DLIST_ADD(client->tcp_list, tcp);
1186 t.src = tcp_sock->src;
1187 t.dest = tcp_sock->dest;
1189 data.dptr = (uint8_t *)&t;
1190 data.dsize = sizeof(t);
1192 switch (addr.sa.sa_family) {
1194 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1195 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1196 ctdb_addr_to_str(&tcp_sock->src),
1197 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1200 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1201 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1202 ctdb_addr_to_str(&tcp_sock->src),
1203 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1206 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1210 /* tell all nodes about this tcp connection */
1211 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1212 CTDB_CONTROL_TCP_ADD,
1213 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1215 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1223 find a tcp address on a list
1225 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1226 struct ctdb_tcp_connection *tcp)
1230 if (array == NULL) {
1234 for (i=0;i<array->num;i++) {
1235 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1236 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1237 return &array->connections[i];
1244 called by a daemon to inform us of a TCP connection that one of its
1245 clients managing that should tickled with an ACK when IP takeover is
1248 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1250 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1251 struct ctdb_tcp_array *tcparray;
1252 struct ctdb_tcp_connection tcp;
1253 struct ctdb_vnn *vnn;
1255 vnn = find_public_ip_vnn(ctdb, &p->dest);
1257 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1258 ctdb_addr_to_str(&p->dest)));
1264 tcparray = vnn->tcp_array;
1266 /* If this is the first tickle */
1267 if (tcparray == NULL) {
1268 tcparray = talloc_size(ctdb->nodes,
1269 offsetof(struct ctdb_tcp_array, connections) +
1270 sizeof(struct ctdb_tcp_connection) * 1);
1271 CTDB_NO_MEMORY(ctdb, tcparray);
1272 vnn->tcp_array = tcparray;
1275 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1276 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1278 tcparray->connections[tcparray->num].src_addr = p->src;
1279 tcparray->connections[tcparray->num].dst_addr = p->dest;
1285 /* Do we already have this tickle ?*/
1286 tcp.src_addr = p->src;
1287 tcp.dst_addr = p->dest;
1288 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1289 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1290 ctdb_addr_to_str(&tcp.dst_addr),
1291 ntohs(tcp.dst_addr.ip.sin_port),
1296 /* A new tickle, we must add it to the array */
1297 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1298 struct ctdb_tcp_connection,
1300 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1302 vnn->tcp_array = tcparray;
1303 tcparray->connections[tcparray->num].src_addr = p->src;
1304 tcparray->connections[tcparray->num].dst_addr = p->dest;
1307 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1308 ctdb_addr_to_str(&tcp.dst_addr),
1309 ntohs(tcp.dst_addr.ip.sin_port),
1317 called by a daemon to inform us of a TCP connection that one of its
1318 clients managing that should tickled with an ACK when IP takeover is
1321 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1323 struct ctdb_tcp_connection *tcpp;
1324 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1327 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1328 ctdb_addr_to_str(&conn->dst_addr)));
1332 /* if the array is empty we cant remove it
1333 and we dont need to do anything
1335 if (vnn->tcp_array == NULL) {
1336 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1337 ctdb_addr_to_str(&conn->dst_addr),
1338 ntohs(conn->dst_addr.ip.sin_port)));
1343 /* See if we know this connection
1344 if we dont know this connection then we dont need to do anything
1346 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1348 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1349 ctdb_addr_to_str(&conn->dst_addr),
1350 ntohs(conn->dst_addr.ip.sin_port)));
1355 /* We need to remove this entry from the array.
1356 Instead of allocating a new array and copying data to it
1357 we cheat and just copy the last entry in the existing array
1358 to the entry that is to be removed and just shring the
1361 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1362 vnn->tcp_array->num--;
1364 /* If we deleted the last entry we also need to remove the entire array
1366 if (vnn->tcp_array->num == 0) {
1367 talloc_free(vnn->tcp_array);
1368 vnn->tcp_array = NULL;
1371 vnn->tcp_update_needed = true;
1373 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1374 ctdb_addr_to_str(&conn->src_addr),
1375 ntohs(conn->src_addr.ip.sin_port)));
1380 called when a daemon restarts - send all tickes for all public addresses
1381 we are serving immediately to the new node.
1383 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1385 /*XXX here we should send all tickes we are serving to the new node */
1391 called when a client structure goes away - hook to remove
1392 elements from the tcp_list in all daemons
1394 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1396 while (client->tcp_list) {
1397 struct ctdb_tcp_list *tcp = client->tcp_list;
1398 DLIST_REMOVE(client->tcp_list, tcp);
1399 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1405 release all IPs on shutdown
1407 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1409 struct ctdb_vnn *vnn;
1411 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1412 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1415 if (vnn->pnn == ctdb->pnn) {
1418 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1420 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1421 vnn->public_netmask_bits);
1422 release_kill_clients(ctdb, &vnn->public_address);
1428 get list of public IPs
1430 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1431 struct ctdb_req_control *c, TDB_DATA *outdata)
1434 struct ctdb_all_public_ips *ips;
1435 struct ctdb_vnn *vnn;
1437 /* count how many public ip structures we have */
1439 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1443 len = offsetof(struct ctdb_all_public_ips, ips) +
1444 num*sizeof(struct ctdb_public_ip);
1445 ips = talloc_zero_size(outdata, len);
1446 CTDB_NO_MEMORY(ctdb, ips);
1448 outdata->dsize = len;
1449 outdata->dptr = (uint8_t *)ips;
1453 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1454 ips->ips[i].pnn = vnn->pnn;
1455 ips->ips[i].addr = vnn->public_address;
1464 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1466 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1467 struct ctdb_req_control *c, TDB_DATA *outdata)
1470 struct ctdb_all_public_ipsv4 *ips;
1471 struct ctdb_vnn *vnn;
1473 /* count how many public ip structures we have */
1475 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1476 if (vnn->public_address.sa.sa_family != AF_INET) {
1482 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1483 num*sizeof(struct ctdb_public_ipv4);
1484 ips = talloc_zero_size(outdata, len);
1485 CTDB_NO_MEMORY(ctdb, ips);
1487 outdata->dsize = len;
1488 outdata->dptr = (uint8_t *)ips;
1492 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1493 if (vnn->public_address.sa.sa_family != AF_INET) {
1496 ips->ips[i].pnn = vnn->pnn;
1497 ips->ips[i].sin = vnn->public_address.ip;
1506 structure containing the listening socket and the list of tcp connections
1507 that the ctdb daemon is to kill
1509 struct ctdb_kill_tcp {
1510 struct ctdb_vnn *vnn;
1511 struct ctdb_context *ctdb;
1513 struct fd_event *fde;
1514 trbt_tree_t *connections;
1519 a tcp connection that is to be killed
1521 struct ctdb_killtcp_con {
1522 ctdb_sock_addr src_addr;
1523 ctdb_sock_addr dst_addr;
1525 struct ctdb_kill_tcp *killtcp;
1528 /* this function is used to create a key to represent this socketpair
1529 in the killtcp tree.
1530 this key is used to insert and lookup matching socketpairs that are
1531 to be tickled and RST
1533 #define KILLTCP_KEYLEN 10
1534 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1536 static uint32_t key[KILLTCP_KEYLEN];
1538 bzero(key, sizeof(key));
1540 if (src->sa.sa_family != dst->sa.sa_family) {
1541 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1545 switch (src->sa.sa_family) {
1547 key[0] = dst->ip.sin_addr.s_addr;
1548 key[1] = src->ip.sin_addr.s_addr;
1549 key[2] = dst->ip.sin_port;
1550 key[3] = src->ip.sin_port;
1553 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1554 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1555 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1556 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1557 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1558 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1559 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1560 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1561 key[8] = dst->ip6.sin6_port;
1562 key[9] = src->ip6.sin6_port;
1565 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1573 called when we get a read event on the raw socket
1575 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1576 uint16_t flags, void *private_data)
1578 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1579 struct ctdb_killtcp_con *con;
1580 ctdb_sock_addr src, dst;
1581 uint32_t ack_seq, seq;
1583 if (!(flags & EVENT_FD_READ)) {
1587 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1588 killtcp->private_data,
1590 &ack_seq, &seq) != 0) {
1591 /* probably a non-tcp ACK packet */
1595 /* check if we have this guy in our list of connections
1598 con = trbt_lookuparray32(killtcp->connections,
1599 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1601 /* no this was some other packet we can just ignore */
1605 /* This one has been tickled !
1606 now reset him and remove him from the list.
1608 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1609 ntohs(con->dst_addr.ip.sin_port),
1610 ctdb_addr_to_str(&con->src_addr),
1611 ntohs(con->src_addr.ip.sin_port)));
1613 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1618 /* when traversing the list of all tcp connections to send tickle acks to
1619 (so that we can capture the ack coming back and kill the connection
1621 this callback is called for each connection we are currently trying to kill
1623 static void tickle_connection_traverse(void *param, void *data)
1625 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1627 /* have tried too many times, just give up */
1628 if (con->count >= 5) {
1629 /* can't delete in traverse: reparent to delete_cons */
1630 talloc_steal(param, con);
1634 /* othervise, try tickling it again */
1637 (ctdb_sock_addr *)&con->dst_addr,
1638 (ctdb_sock_addr *)&con->src_addr,
1644 called every second until all sentenced connections have been reset
1646 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1647 struct timeval t, void *private_data)
1649 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1650 void *delete_cons = talloc_new(NULL);
1652 /* loop over all connections sending tickle ACKs */
1653 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
1655 /* now we've finished traverse, it's safe to do deletion. */
1656 talloc_free(delete_cons);
1658 /* If there are no more connections to kill we can remove the
1659 entire killtcp structure
1661 if ( (killtcp->connections == NULL) ||
1662 (killtcp->connections->root == NULL) ) {
1663 talloc_free(killtcp);
1667 /* try tickling them again in a seconds time
1669 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1670 ctdb_tickle_sentenced_connections, killtcp);
1674 destroy the killtcp structure
1676 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1679 killtcp->vnn->killtcp = NULL;
1685 /* nothing fancy here, just unconditionally replace any existing
1686 connection structure with the new one.
1688 dont even free the old one if it did exist, that one is talloc_stolen
1689 by the same node in the tree anyway and will be deleted when the new data
1692 static void *add_killtcp_callback(void *parm, void *data)
1698 add a tcp socket to the list of connections we want to RST
1700 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1704 ctdb_sock_addr src, dst;
1705 struct ctdb_kill_tcp *killtcp;
1706 struct ctdb_killtcp_con *con;
1707 struct ctdb_vnn *vnn;
1709 ctdb_canonicalize_ip(s, &src);
1710 ctdb_canonicalize_ip(d, &dst);
1712 vnn = find_public_ip_vnn(ctdb, &dst);
1714 vnn = find_public_ip_vnn(ctdb, &src);
1717 /* if it is not a public ip it could be our 'single ip' */
1718 if (ctdb->single_ip_vnn) {
1719 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1720 vnn = ctdb->single_ip_vnn;
1725 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1729 killtcp = vnn->killtcp;
1731 /* If this is the first connection to kill we must allocate
1734 if (killtcp == NULL) {
1735 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1736 CTDB_NO_MEMORY(ctdb, killtcp);
1739 killtcp->ctdb = ctdb;
1740 killtcp->capture_fd = -1;
1741 killtcp->connections = trbt_create(killtcp, 0);
1743 vnn->killtcp = killtcp;
1744 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1749 /* create a structure that describes this connection we want to
1750 RST and store it in killtcp->connections
1752 con = talloc(killtcp, struct ctdb_killtcp_con);
1753 CTDB_NO_MEMORY(ctdb, con);
1754 con->src_addr = src;
1755 con->dst_addr = dst;
1757 con->killtcp = killtcp;
1760 trbt_insertarray32_callback(killtcp->connections,
1761 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1762 add_killtcp_callback, con);
1765 If we dont have a socket to listen on yet we must create it
1767 if (killtcp->capture_fd == -1) {
1768 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1769 if (killtcp->capture_fd == -1) {
1770 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1776 if (killtcp->fde == NULL) {
1777 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1778 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1779 capture_tcp_handler, killtcp);
1781 /* We also need to set up some events to tickle all these connections
1782 until they are all reset
1784 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1785 ctdb_tickle_sentenced_connections, killtcp);
1788 /* tickle him once now */
1797 talloc_free(vnn->killtcp);
1798 vnn->killtcp = NULL;
1803 kill a TCP connection.
1805 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1807 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1809 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1813 called by a daemon to inform us of the entire list of TCP tickles for
1814 a particular public address.
1815 this control should only be sent by the node that is currently serving
1816 that public address.
1818 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1820 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1821 struct ctdb_tcp_array *tcparray;
1822 struct ctdb_vnn *vnn;
1824 /* We must at least have tickles.num or else we cant verify the size
1825 of the received data blob
1827 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1828 tickles.connections)) {
1829 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1833 /* verify that the size of data matches what we expect */
1834 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1835 tickles.connections)
1836 + sizeof(struct ctdb_tcp_connection)
1837 * list->tickles.num) {
1838 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1842 vnn = find_public_ip_vnn(ctdb, &list->addr);
1844 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1845 ctdb_addr_to_str(&list->addr)));
1850 /* remove any old ticklelist we might have */
1851 talloc_free(vnn->tcp_array);
1852 vnn->tcp_array = NULL;
1854 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1855 CTDB_NO_MEMORY(ctdb, tcparray);
1857 tcparray->num = list->tickles.num;
1859 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1860 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1862 memcpy(tcparray->connections, &list->tickles.connections[0],
1863 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1865 /* We now have a new fresh tickle list array for this vnn */
1866 vnn->tcp_array = talloc_steal(vnn, tcparray);
1872 called to return the full list of tickles for the puclic address associated
1873 with the provided vnn
1875 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1877 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1878 struct ctdb_control_tcp_tickle_list *list;
1879 struct ctdb_tcp_array *tcparray;
1881 struct ctdb_vnn *vnn;
1883 vnn = find_public_ip_vnn(ctdb, addr);
1885 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1886 ctdb_addr_to_str(addr)));
1891 tcparray = vnn->tcp_array;
1893 num = tcparray->num;
1898 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1899 tickles.connections)
1900 + sizeof(struct ctdb_tcp_connection) * num;
1902 outdata->dptr = talloc_size(outdata, outdata->dsize);
1903 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1904 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1907 list->tickles.num = num;
1909 memcpy(&list->tickles.connections[0], tcparray->connections,
1910 sizeof(struct ctdb_tcp_connection) * num);
1918 set the list of all tcp tickles for a public address
1920 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1921 struct timeval timeout, uint32_t destnode,
1922 ctdb_sock_addr *addr,
1923 struct ctdb_tcp_array *tcparray)
1927 struct ctdb_control_tcp_tickle_list *list;
1930 num = tcparray->num;
1935 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1936 tickles.connections) +
1937 sizeof(struct ctdb_tcp_connection) * num;
1938 data.dptr = talloc_size(ctdb, data.dsize);
1939 CTDB_NO_MEMORY(ctdb, data.dptr);
1941 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1943 list->tickles.num = num;
1945 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1948 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1949 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1950 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1952 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1956 talloc_free(data.dptr);
1963 perform tickle updates if required
1965 static void ctdb_update_tcp_tickles(struct event_context *ev,
1966 struct timed_event *te,
1967 struct timeval t, void *private_data)
1969 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1971 struct ctdb_vnn *vnn;
1973 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1974 /* we only send out updates for public addresses that
1977 if (ctdb->pnn != vnn->pnn) {
1980 /* We only send out the updates if we need to */
1981 if (!vnn->tcp_update_needed) {
1984 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1986 CTDB_BROADCAST_CONNECTED,
1987 &vnn->public_address,
1990 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1991 ctdb_addr_to_str(&vnn->public_address)));
1995 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1996 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1997 ctdb_update_tcp_tickles, ctdb);
2002 start periodic update of tcp tickles
2004 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2006 ctdb->tickle_update_context = talloc_new(ctdb);
2008 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2009 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2010 ctdb_update_tcp_tickles, ctdb);
2016 struct control_gratious_arp {
2017 struct ctdb_context *ctdb;
2018 ctdb_sock_addr addr;
2024 send a control_gratuitous arp
2026 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2027 struct timeval t, void *private_data)
2030 struct control_gratious_arp *arp = talloc_get_type(private_data,
2031 struct control_gratious_arp);
2033 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2035 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2040 if (arp->count == CTDB_ARP_REPEAT) {
2045 event_add_timed(arp->ctdb->ev, arp,
2046 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2047 send_gratious_arp, arp);
2054 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2056 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2057 struct control_gratious_arp *arp;
2059 /* verify the size of indata */
2060 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2061 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2062 (unsigned)indata.dsize,
2063 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2067 ( offsetof(struct ctdb_control_gratious_arp, iface)
2068 + gratious_arp->len ) ){
2070 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2071 "but should be %u bytes\n",
2072 (unsigned)indata.dsize,
2073 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2078 arp = talloc(ctdb, struct control_gratious_arp);
2079 CTDB_NO_MEMORY(ctdb, arp);
2082 arp->addr = gratious_arp->addr;
2083 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2084 CTDB_NO_MEMORY(ctdb, arp->iface);
2087 event_add_timed(arp->ctdb->ev, arp,
2088 timeval_zero(), send_gratious_arp, arp);
2093 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2095 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2098 /* verify the size of indata */
2099 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2100 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2104 ( offsetof(struct ctdb_control_ip_iface, iface)
2107 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2108 "but should be %u bytes\n",
2109 (unsigned)indata.dsize,
2110 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2114 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2117 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2125 called when releaseip event finishes for del_public_address
2127 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2130 talloc_free(private_data);
2133 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2135 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2136 struct ctdb_vnn *vnn;
2139 /* verify the size of indata */
2140 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2141 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2145 ( offsetof(struct ctdb_control_ip_iface, iface)
2148 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2149 "but should be %u bytes\n",
2150 (unsigned)indata.dsize,
2151 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2155 /* walk over all public addresses until we find a match */
2156 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2157 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2158 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2160 DLIST_REMOVE(ctdb->vnn, vnn);
2162 ret = ctdb_event_script_callback(ctdb,
2163 mem_ctx, delete_ip_callback, mem_ctx,
2165 CTDB_EVENT_RELEASE_IP,
2168 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2169 vnn->public_netmask_bits);
2171 vnn->killtcp->vnn = NULL;
2184 /* This function is called from the recovery daemon to verify that a remote
2185 node has the expected ip allocation.
2186 This is verified against ctdb->ip_tree
2188 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2190 struct ctdb_public_ip_list *tmp_ip;
2193 if (ctdb->ip_tree == NULL) {
2194 /* dont know the expected allocation yet, assume remote node
2203 for (i=0; i<ips->num; i++) {
2204 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2205 if (tmp_ip == NULL) {
2206 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2210 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2214 if (tmp_ip->pnn != ips->ips[i].pnn) {
2215 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2223 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2225 struct ctdb_public_ip_list *tmp_ip;
2227 if (ctdb->ip_tree == NULL) {
2228 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2232 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2233 if (tmp_ip == NULL) {
2234 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2238 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2239 tmp_ip->pnn = ip->pnn;