4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 /* Flags used in IP allocation algorithms. */
43 struct ctdb_iface *prev, *next;
49 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
52 return vnn->iface->name;
58 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
62 /* Verify that we dont have an entry for this ip yet */
63 for (i=ctdb->ifaces;i;i=i->next) {
64 if (strcmp(i->name, iface) == 0) {
69 /* create a new structure for this interface */
70 i = talloc_zero(ctdb, struct ctdb_iface);
71 CTDB_NO_MEMORY_FATAL(ctdb, i);
72 i->name = talloc_strdup(i, iface);
73 CTDB_NO_MEMORY(ctdb, i->name);
75 * If link_up defaults to true then IPs can be allocated to a
76 * node during the first recovery. However, then an interface
77 * could have its link marked down during the startup event,
78 * causing the IP to move almost immediately. If link_up
79 * defaults to false then, during normal operation, IPs added
80 * to a new interface can't be assigned until a monitor cycle
81 * has occurred and marked the new interfaces up. This makes
82 * IP allocation unpredictable. The following is a neat
83 * compromise: early in startup link_up defaults to false, so
84 * IPs can't be assigned, and after startup IPs can be
85 * assigned immediately.
87 i->link_up = (ctdb->runstate == CTDB_RUNSTATE_RUNNING);
89 DLIST_ADD(ctdb->ifaces, i);
94 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
99 for (n = 0; vnn->ifaces[n] != NULL; n++) {
100 if (strcmp(name, vnn->ifaces[n]) == 0) {
108 /* If any interfaces now have no possible IPs then delete them. This
109 * implementation is naive (i.e. simple) rather than clever
110 * (i.e. complex). Given that this is run on delip and that operation
111 * is rare, this doesn't need to be efficient - it needs to be
112 * foolproof. One alternative is reference counting, where the logic
113 * is distributed and can, therefore, be broken in multiple places.
114 * Another alternative is to build a red-black tree of interfaces that
115 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
116 * once) and then walking ctdb->ifaces once and deleting those not in
117 * the tree. Let's go to one of those if the naive implementation
118 * causes problems... :-)
120 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
121 struct ctdb_vnn *vnn,
124 struct ctdb_iface *i;
126 /* For each interface, check if there's an IP using it. */
127 for(i=ctdb->ifaces; i; i=i->next) {
131 /* Only consider interfaces named in the given VNN. */
132 if (!vnn_has_interface_with_name(vnn, i->name)) {
136 /* Is the "single IP" on this interface? */
137 if ((ctdb->single_ip_vnn != NULL) &&
138 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
139 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
140 /* Found, next interface please... */
143 /* Search for a vnn with this interface. */
145 for (tv=ctdb->vnn; tv; tv=tv->next) {
146 if (vnn_has_interface_with_name(tv, i->name)) {
153 /* None of the VNNs are using this interface. */
154 DLIST_REMOVE(ctdb->ifaces, i);
155 /* Caller will free mem_ctx when convenient. */
156 talloc_steal(mem_ctx, i);
162 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
165 struct ctdb_iface *i;
167 for (i=ctdb->ifaces;i;i=i->next) {
168 if (strcmp(i->name, iface) == 0) {
176 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
177 struct ctdb_vnn *vnn)
180 struct ctdb_iface *cur = NULL;
181 struct ctdb_iface *best = NULL;
183 for (i=0; vnn->ifaces[i]; i++) {
185 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
199 if (cur->references < best->references) {
208 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
209 struct ctdb_vnn *vnn)
211 struct ctdb_iface *best = NULL;
214 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
215 "still assigned to iface '%s'\n",
216 ctdb_addr_to_str(&vnn->public_address),
217 ctdb_vnn_iface_string(vnn)));
221 best = ctdb_vnn_best_iface(ctdb, vnn);
223 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
224 "cannot assign to iface any iface\n",
225 ctdb_addr_to_str(&vnn->public_address)));
231 vnn->pnn = ctdb->pnn;
233 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
234 "now assigned to iface '%s' refs[%d]\n",
235 ctdb_addr_to_str(&vnn->public_address),
236 ctdb_vnn_iface_string(vnn),
241 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
242 struct ctdb_vnn *vnn)
244 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
245 "now unassigned (old iface '%s' refs[%d])\n",
246 ctdb_addr_to_str(&vnn->public_address),
247 ctdb_vnn_iface_string(vnn),
248 vnn->iface?vnn->iface->references:0));
250 vnn->iface->references--;
253 if (vnn->pnn == ctdb->pnn) {
258 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
259 struct ctdb_vnn *vnn)
263 if (vnn->iface && vnn->iface->link_up) {
267 for (i=0; vnn->ifaces[i]; i++) {
268 struct ctdb_iface *cur;
270 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
283 struct ctdb_takeover_arp {
284 struct ctdb_context *ctdb;
287 struct ctdb_tcp_array *tcparray;
288 struct ctdb_vnn *vnn;
293 lists of tcp endpoints
295 struct ctdb_tcp_list {
296 struct ctdb_tcp_list *prev, *next;
297 struct ctdb_tcp_connection connection;
301 list of clients to kill on IP release
303 struct ctdb_client_ip {
304 struct ctdb_client_ip *prev, *next;
305 struct ctdb_context *ctdb;
312 send a gratuitous arp
314 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
315 struct timeval t, void *private_data)
317 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
318 struct ctdb_takeover_arp);
320 struct ctdb_tcp_array *tcparray;
321 const char *iface = ctdb_vnn_iface_string(arp->vnn);
323 ret = ctdb_sys_send_arp(&arp->addr, iface);
325 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
326 iface, strerror(errno)));
329 tcparray = arp->tcparray;
331 for (i=0;i<tcparray->num;i++) {
332 struct ctdb_tcp_connection *tcon;
334 tcon = &tcparray->connections[i];
335 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
336 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
337 ctdb_addr_to_str(&tcon->src_addr),
338 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
339 ret = ctdb_sys_send_tcp(
344 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
345 ctdb_addr_to_str(&tcon->src_addr)));
352 if (arp->count == CTDB_ARP_REPEAT) {
357 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
358 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
359 ctdb_control_send_arp, arp);
362 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
363 struct ctdb_vnn *vnn)
365 struct ctdb_takeover_arp *arp;
366 struct ctdb_tcp_array *tcparray;
368 if (!vnn->takeover_ctx) {
369 vnn->takeover_ctx = talloc_new(vnn);
370 if (!vnn->takeover_ctx) {
375 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
381 arp->addr = vnn->public_address;
384 tcparray = vnn->tcp_array;
386 /* add all of the known tcp connections for this IP to the
387 list of tcp connections to send tickle acks for */
388 arp->tcparray = talloc_steal(arp, tcparray);
390 vnn->tcp_array = NULL;
391 vnn->tcp_update_needed = true;
394 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
395 timeval_zero(), ctdb_control_send_arp, arp);
400 struct takeover_callback_state {
401 struct ctdb_req_control *c;
402 ctdb_sock_addr *addr;
403 struct ctdb_vnn *vnn;
406 struct ctdb_do_takeip_state {
407 struct ctdb_req_control *c;
408 struct ctdb_vnn *vnn;
412 called when takeip event finishes
414 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
417 struct ctdb_do_takeip_state *state =
418 talloc_get_type(private_data, struct ctdb_do_takeip_state);
423 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
425 if (status == -ETIME) {
428 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
429 ctdb_addr_to_str(&state->vnn->public_address),
430 ctdb_vnn_iface_string(state->vnn)));
431 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433 node->flags |= NODE_FLAGS_UNHEALTHY;
438 if (ctdb->do_checkpublicip) {
440 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
442 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
449 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450 data.dsize = strlen((char *)data.dptr) + 1;
451 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
453 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
456 /* the control succeeded */
457 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
464 state->vnn->update_in_flight = false;
469 take over an ip address
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472 struct ctdb_req_control *c,
473 struct ctdb_vnn *vnn)
476 struct ctdb_do_takeip_state *state;
478 if (vnn->update_in_flight) {
479 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480 "update for this IP already in flight\n",
481 ctdb_addr_to_str(&vnn->public_address),
482 vnn->public_netmask_bits));
486 ret = ctdb_vnn_assign_iface(ctdb, vnn);
488 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489 "assign a usable interface\n",
490 ctdb_addr_to_str(&vnn->public_address),
491 vnn->public_netmask_bits));
495 state = talloc(vnn, struct ctdb_do_takeip_state);
496 CTDB_NO_MEMORY(ctdb, state);
498 state->c = talloc_steal(ctdb, c);
501 vnn->update_in_flight = true;
502 talloc_set_destructor(state, ctdb_takeip_destructor);
504 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505 ctdb_addr_to_str(&vnn->public_address),
506 vnn->public_netmask_bits,
507 ctdb_vnn_iface_string(vnn)));
509 ret = ctdb_event_script_callback(ctdb,
511 ctdb_do_takeip_callback,
516 ctdb_vnn_iface_string(vnn),
517 ctdb_addr_to_str(&vnn->public_address),
518 vnn->public_netmask_bits);
521 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
522 ctdb_addr_to_str(&vnn->public_address),
523 ctdb_vnn_iface_string(vnn)));
531 struct ctdb_do_updateip_state {
532 struct ctdb_req_control *c;
533 struct ctdb_iface *old;
534 struct ctdb_vnn *vnn;
538 called when updateip event finishes
540 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
543 struct ctdb_do_updateip_state *state =
544 talloc_get_type(private_data, struct ctdb_do_updateip_state);
548 if (status == -ETIME) {
551 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
552 ctdb_addr_to_str(&state->vnn->public_address),
554 ctdb_vnn_iface_string(state->vnn)));
557 * All we can do is reset the old interface
558 * and let the next run fix it
560 ctdb_vnn_unassign_iface(ctdb, state->vnn);
561 state->vnn->iface = state->old;
562 state->vnn->iface->references++;
564 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
569 if (ctdb->do_checkpublicip) {
571 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
573 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
580 /* the control succeeded */
581 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
586 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
588 state->vnn->update_in_flight = false;
593 update (move) an ip address
595 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
596 struct ctdb_req_control *c,
597 struct ctdb_vnn *vnn)
600 struct ctdb_do_updateip_state *state;
601 struct ctdb_iface *old = vnn->iface;
602 const char *new_name;
604 if (vnn->update_in_flight) {
605 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
606 "update for this IP already in flight\n",
607 ctdb_addr_to_str(&vnn->public_address),
608 vnn->public_netmask_bits));
612 ctdb_vnn_unassign_iface(ctdb, vnn);
613 ret = ctdb_vnn_assign_iface(ctdb, vnn);
615 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
616 "assin a usable interface (old iface '%s')\n",
617 ctdb_addr_to_str(&vnn->public_address),
618 vnn->public_netmask_bits,
623 new_name = ctdb_vnn_iface_string(vnn);
624 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
625 /* A benign update from one interface onto itself.
626 * no need to run the eventscripts in this case, just return
629 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
633 state = talloc(vnn, struct ctdb_do_updateip_state);
634 CTDB_NO_MEMORY(ctdb, state);
636 state->c = talloc_steal(ctdb, c);
640 vnn->update_in_flight = true;
641 talloc_set_destructor(state, ctdb_updateip_destructor);
643 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
644 "interface %s to %s\n",
645 ctdb_addr_to_str(&vnn->public_address),
646 vnn->public_netmask_bits,
650 ret = ctdb_event_script_callback(ctdb,
652 ctdb_do_updateip_callback,
655 CTDB_EVENT_UPDATE_IP,
659 ctdb_addr_to_str(&vnn->public_address),
660 vnn->public_netmask_bits);
662 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
663 ctdb_addr_to_str(&vnn->public_address),
664 old->name, new_name));
673 Find the vnn of the node that has a public ip address
674 returns -1 if the address is not known as a public address
676 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
678 struct ctdb_vnn *vnn;
680 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
681 if (ctdb_same_ip(&vnn->public_address, addr)) {
690 take over an ip address
692 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
693 struct ctdb_req_control *c,
698 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
699 struct ctdb_vnn *vnn;
700 bool have_ip = false;
701 bool do_updateip = false;
702 bool do_takeip = false;
703 struct ctdb_iface *best_iface = NULL;
705 if (pip->pnn != ctdb->pnn) {
706 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
707 "with pnn %d, but we're node %d\n",
708 ctdb_addr_to_str(&pip->addr),
709 pip->pnn, ctdb->pnn));
713 /* update out vnn list */
714 vnn = find_public_ip_vnn(ctdb, &pip->addr);
716 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
717 ctdb_addr_to_str(&pip->addr)));
721 if (ctdb->do_checkpublicip) {
722 have_ip = ctdb_sys_have_ip(&pip->addr);
724 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
725 if (best_iface == NULL) {
726 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
727 "a usable interface (old %s, have_ip %d)\n",
728 ctdb_addr_to_str(&vnn->public_address),
729 vnn->public_netmask_bits,
730 ctdb_vnn_iface_string(vnn),
735 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
736 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
741 if (vnn->iface == NULL && have_ip) {
742 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
743 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
744 ctdb_addr_to_str(&vnn->public_address)));
748 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
749 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
750 "and we have it on iface[%s], but it was assigned to node %d"
751 "and we are node %d, banning ourself\n",
752 ctdb_addr_to_str(&vnn->public_address),
753 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
758 if (vnn->pnn == -1 && have_ip) {
759 vnn->pnn = ctdb->pnn;
760 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
761 "and we already have it on iface[%s], update local daemon\n",
762 ctdb_addr_to_str(&vnn->public_address),
763 ctdb_vnn_iface_string(vnn)));
768 if (vnn->iface != best_iface) {
769 if (!vnn->iface->link_up) {
771 } else if (vnn->iface->references > (best_iface->references + 1)) {
772 /* only move when the rebalance gains something */
780 ctdb_vnn_unassign_iface(ctdb, vnn);
787 ret = ctdb_do_takeip(ctdb, c, vnn);
791 } else if (do_updateip) {
792 ret = ctdb_do_updateip(ctdb, c, vnn);
798 * The interface is up and the kernel known the ip
801 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
802 ctdb_addr_to_str(&pip->addr),
803 vnn->public_netmask_bits,
804 ctdb_vnn_iface_string(vnn)));
808 /* tell ctdb_control.c that we will be replying asynchronously */
815 takeover an ip address old v4 style
817 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
818 struct ctdb_req_control *c,
824 data.dsize = sizeof(struct ctdb_public_ip);
825 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
826 CTDB_NO_MEMORY(ctdb, data.dptr);
828 memcpy(data.dptr, indata.dptr, indata.dsize);
829 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
833 kill any clients that are registered with a IP that is being released
835 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
837 struct ctdb_client_ip *ip;
839 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
840 ctdb_addr_to_str(addr)));
842 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
843 ctdb_sock_addr tmp_addr;
846 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
848 ctdb_addr_to_str(&ip->addr)));
850 if (ctdb_same_ip(&tmp_addr, addr)) {
851 struct ctdb_client *client = ctdb_reqid_find(ctdb,
854 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
856 ctdb_addr_to_str(&ip->addr),
859 if (client->pid != 0) {
860 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
861 (unsigned)client->pid,
862 ctdb_addr_to_str(addr),
864 ctdb_kill(ctdb, client->pid, SIGKILL);
871 called when releaseip event finishes
873 static void release_ip_callback(struct ctdb_context *ctdb, int status,
876 struct takeover_callback_state *state =
877 talloc_get_type(private_data, struct takeover_callback_state);
880 if (status == -ETIME) {
884 /* send a message to all clients of this node telling them
885 that the cluster has been reconfigured and they should
886 release any sockets on this IP */
887 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
888 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
889 data.dsize = strlen((char *)data.dptr)+1;
891 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
893 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
895 /* kill clients that have registered with this IP */
896 release_kill_clients(ctdb, state->addr);
898 ctdb_vnn_unassign_iface(ctdb, state->vnn);
900 /* the control succeeded */
901 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
905 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
907 state->vnn->update_in_flight = false;
912 release an ip address
914 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
915 struct ctdb_req_control *c,
920 struct takeover_callback_state *state;
921 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
922 struct ctdb_vnn *vnn;
925 /* update our vnn list */
926 vnn = find_public_ip_vnn(ctdb, &pip->addr);
928 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
929 ctdb_addr_to_str(&pip->addr)));
934 /* stop any previous arps */
935 talloc_free(vnn->takeover_ctx);
936 vnn->takeover_ctx = NULL;
938 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
939 * lazy multicast to drop an IP from any node that isn't the
940 * intended new node. The following causes makes ctdbd ignore
941 * a release for any address it doesn't host.
943 if (ctdb->do_checkpublicip) {
944 if (!ctdb_sys_have_ip(&pip->addr)) {
945 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
946 ctdb_addr_to_str(&pip->addr),
947 vnn->public_netmask_bits,
948 ctdb_vnn_iface_string(vnn)));
949 ctdb_vnn_unassign_iface(ctdb, vnn);
953 if (vnn->iface == NULL) {
954 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
955 ctdb_addr_to_str(&pip->addr),
956 vnn->public_netmask_bits));
961 /* There is a potential race between take_ip and us because we
962 * update the VNN via a callback that run when the
963 * eventscripts have been run. Avoid the race by allowing one
964 * update to be in flight at a time.
966 if (vnn->update_in_flight) {
967 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
968 "update for this IP already in flight\n",
969 ctdb_addr_to_str(&vnn->public_address),
970 vnn->public_netmask_bits));
974 if (ctdb->do_checkpublicip) {
975 iface = ctdb_sys_find_ifname(&pip->addr);
977 DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
981 iface = strdup(ctdb_vnn_iface_string(vnn));
984 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
985 ctdb_addr_to_str(&pip->addr),
986 vnn->public_netmask_bits,
990 state = talloc(ctdb, struct takeover_callback_state);
991 CTDB_NO_MEMORY(ctdb, state);
993 state->c = talloc_steal(state, c);
994 state->addr = talloc(state, ctdb_sock_addr);
995 CTDB_NO_MEMORY(ctdb, state->addr);
996 *state->addr = pip->addr;
999 vnn->update_in_flight = true;
1000 talloc_set_destructor(state, ctdb_releaseip_destructor);
1002 ret = ctdb_event_script_callback(ctdb,
1003 state, release_ip_callback, state,
1005 CTDB_EVENT_RELEASE_IP,
1008 ctdb_addr_to_str(&pip->addr),
1009 vnn->public_netmask_bits);
1012 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1013 ctdb_addr_to_str(&pip->addr),
1014 ctdb_vnn_iface_string(vnn)));
1019 /* tell the control that we will be reply asynchronously */
1020 *async_reply = true;
1025 release an ip address old v4 style
1027 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
1028 struct ctdb_req_control *c,
1034 data.dsize = sizeof(struct ctdb_public_ip);
1035 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
1036 CTDB_NO_MEMORY(ctdb, data.dptr);
1038 memcpy(data.dptr, indata.dptr, indata.dsize);
1039 return ctdb_control_release_ip(ctdb, c, data, async_reply);
1043 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1044 ctdb_sock_addr *addr,
1045 unsigned mask, const char *ifaces,
1048 struct ctdb_vnn *vnn;
1055 tmp = strdup(ifaces);
1056 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1057 if (!ctdb_sys_check_iface_exists(iface)) {
1058 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1065 /* Verify that we dont have an entry for this ip yet */
1066 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1067 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1068 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1069 ctdb_addr_to_str(addr)));
1074 /* create a new vnn structure for this ip address */
1075 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1076 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1077 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1078 tmp = talloc_strdup(vnn, ifaces);
1079 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1080 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1081 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1082 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1083 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1084 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1088 vnn->ifaces[num] = NULL;
1089 vnn->public_address = *addr;
1090 vnn->public_netmask_bits = mask;
1092 if (check_address) {
1093 if (ctdb_sys_have_ip(addr)) {
1094 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1095 vnn->pnn = ctdb->pnn;
1099 for (i=0; vnn->ifaces[i]; i++) {
1100 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1102 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1103 "for public_address[%s]\n",
1104 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1110 DLIST_ADD(ctdb->vnn, vnn);
1116 setup the event script directory
1118 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
1120 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
1121 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
1125 static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
1126 struct timeval t, void *private_data)
1128 struct ctdb_context *ctdb = talloc_get_type(private_data,
1129 struct ctdb_context);
1130 struct ctdb_vnn *vnn;
1132 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1135 for (i=0; vnn->ifaces[i] != NULL; i++) {
1136 if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
1137 DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
1139 ctdb_addr_to_str(&vnn->public_address)));
1144 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
1145 timeval_current_ofs(30, 0),
1146 ctdb_check_interfaces_event, ctdb);
1150 int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
1152 if (ctdb->check_public_ifaces_ctx != NULL) {
1153 talloc_free(ctdb->check_public_ifaces_ctx);
1154 ctdb->check_public_ifaces_ctx = NULL;
1157 ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
1158 if (ctdb->check_public_ifaces_ctx == NULL) {
1159 ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
1162 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
1163 timeval_current_ofs(30, 0),
1164 ctdb_check_interfaces_event, ctdb);
1171 setup the public address lists from a file
1173 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1179 lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb);
1180 if (lines == NULL) {
1181 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1184 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1188 for (i=0;i<nlines;i++) {
1190 ctdb_sock_addr addr;
1191 const char *addrstr;
1196 while ((*line == ' ') || (*line == '\t')) {
1202 if (strcmp(line, "") == 0) {
1205 tok = strtok(line, " \t");
1207 tok = strtok(NULL, " \t");
1209 if (NULL == ctdb->default_public_interface) {
1210 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1215 ifaces = ctdb->default_public_interface;
1220 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1221 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1225 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1226 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1237 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1241 struct ctdb_vnn *svnn;
1242 struct ctdb_iface *cur = NULL;
1246 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1247 CTDB_NO_MEMORY(ctdb, svnn);
1249 svnn->ifaces = talloc_array(svnn, const char *, 2);
1250 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1251 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1252 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1253 svnn->ifaces[1] = NULL;
1255 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1261 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1263 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1264 "for single_ip[%s]\n",
1266 ctdb_addr_to_str(&svnn->public_address)));
1271 /* assume the single public ip interface is initially "good" */
1272 cur = ctdb_find_iface(ctdb, iface);
1274 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1277 cur->link_up = true;
1279 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1285 ctdb->single_ip_vnn = svnn;
1289 /* Given a physical node, return the number of
1290 public addresses that is currently assigned to this node.
1292 static int node_ip_coverage(struct ctdb_context *ctdb,
1294 struct ctdb_public_ip_list *ips)
1298 for (;ips;ips=ips->next) {
1299 if (ips->pnn == pnn) {
1307 /* Can the given node host the given IP: is the public IP known to the
1308 * node and is NOIPHOST unset?
1310 static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
1311 struct ctdb_ipflags ipflags,
1312 struct ctdb_public_ip_list *ip)
1314 struct ctdb_all_public_ips *public_ips;
1317 if (ipflags.noiphost) {
1321 public_ips = ctdb->nodes[pnn]->available_public_ips;
1323 if (public_ips == NULL) {
1327 for (i=0; i<public_ips->num; i++) {
1328 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1329 /* yes, this node can serve this public ip */
1337 static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
1338 struct ctdb_ipflags ipflags,
1339 struct ctdb_public_ip_list *ip)
1341 if (ipflags.noiptakeover) {
1345 return can_node_host_ip(ctdb, pnn, ipflags, ip);
1348 /* search the node lists list for a node to takeover this ip.
1349 pick the node that currently are serving the least number of ips
1350 so that the ips get spread out evenly.
1352 static int find_takeover_node(struct ctdb_context *ctdb,
1353 struct ctdb_ipflags *ipflags,
1354 struct ctdb_public_ip_list *ip,
1355 struct ctdb_public_ip_list *all_ips)
1357 int pnn, min=0, num;
1360 numnodes = talloc_array_length(ipflags);
1362 for (i=0; i<numnodes; i++) {
1363 /* verify that this node can serve this ip */
1364 if (!can_node_takeover_ip(ctdb, i, ipflags[i], ip)) {
1365 /* no it couldnt so skip to the next node */
1369 num = node_ip_coverage(ctdb, i, all_ips);
1370 /* was this the first node we checked ? */
1382 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1383 ctdb_addr_to_str(&ip->addr)));
1393 static uint32_t *ip_key(ctdb_sock_addr *ip)
1395 static uint32_t key[IP_KEYLEN];
1397 bzero(key, sizeof(key));
1399 switch (ip->sa.sa_family) {
1401 key[3] = htonl(ip->ip.sin_addr.s_addr);
1404 uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
1405 key[0] = htonl(s6_a32[0]);
1406 key[1] = htonl(s6_a32[1]);
1407 key[2] = htonl(s6_a32[2]);
1408 key[3] = htonl(s6_a32[3]);
1412 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1419 static void *add_ip_callback(void *parm, void *data)
1421 struct ctdb_public_ip_list *this_ip = parm;
1422 struct ctdb_public_ip_list *prev_ip = data;
1424 if (prev_ip == NULL) {
1427 if (this_ip->pnn == -1) {
1428 this_ip->pnn = prev_ip->pnn;
1434 static int getips_count_callback(void *param, void *data)
1436 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1437 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1439 new_ip->next = *ip_list;
1444 static struct ctdb_public_ip_list *
1445 create_merged_ip_list(struct ctdb_context *ctdb)
1448 struct ctdb_public_ip_list *ip_list;
1449 struct ctdb_all_public_ips *public_ips;
1451 if (ctdb->ip_tree != NULL) {
1452 talloc_free(ctdb->ip_tree);
1453 ctdb->ip_tree = NULL;
1455 ctdb->ip_tree = trbt_create(ctdb, 0);
1457 for (i=0;i<ctdb->num_nodes;i++) {
1458 public_ips = ctdb->nodes[i]->known_public_ips;
1460 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1464 /* there were no public ips for this node */
1465 if (public_ips == NULL) {
1469 for (j=0;j<public_ips->num;j++) {
1470 struct ctdb_public_ip_list *tmp_ip;
1472 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1473 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1474 /* Do not use information about IP addresses hosted
1475 * on other nodes, it may not be accurate */
1476 if (public_ips->ips[j].pnn == ctdb->nodes[i]->pnn) {
1477 tmp_ip->pnn = public_ips->ips[j].pnn;
1481 tmp_ip->addr = public_ips->ips[j].addr;
1482 tmp_ip->next = NULL;
1484 trbt_insertarray32_callback(ctdb->ip_tree,
1485 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1492 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1498 * This is the length of the longtest common prefix between the IPs.
1499 * It is calculated by XOR-ing the 2 IPs together and counting the
1500 * number of leading zeroes. The implementation means that all
1501 * addresses end up being 128 bits long.
1503 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1504 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1505 * lots of nodes and IP addresses?
1507 static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1509 uint32_t ip1_k[IP_KEYLEN];
1514 uint32_t distance = 0;
1516 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1518 for (i=0; i<IP_KEYLEN; i++) {
1519 x = ip1_k[i] ^ t[i];
1523 /* Count number of leading zeroes.
1524 * FIXME? This could be optimised...
1526 while ((x & (1 << 31)) == 0) {
1536 /* Calculate the IP distance for the given IP relative to IPs on the
1537 given node. The ips argument is generally the all_ips variable
1538 used in the main part of the algorithm.
1540 static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1541 struct ctdb_public_ip_list *ips,
1544 struct ctdb_public_ip_list *t;
1549 for (t=ips; t != NULL; t=t->next) {
1550 if (t->pnn != pnn) {
1554 /* Optimisation: We never calculate the distance
1555 * between an address and itself. This allows us to
1556 * calculate the effect of removing an address from a
1557 * node by simply calculating the distance between
1558 * that address and all of the exitsing addresses.
1559 * Moreover, we assume that we're only ever dealing
1560 * with addresses from all_ips so we can identify an
1561 * address via a pointer rather than doing a more
1562 * expensive address comparison. */
1563 if (&(t->addr) == ip) {
1567 d = ip_distance(ip, &(t->addr));
1568 sum += d * d; /* Cheaper than pulling in math.h :-) */
1574 /* Return the LCP2 imbalance metric for addresses currently assigned
1577 static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1579 struct ctdb_public_ip_list *t;
1581 uint32_t imbalance = 0;
1583 for (t=all_ips; t!=NULL; t=t->next) {
1584 if (t->pnn != pnn) {
1587 /* Pass the rest of the IPs rather than the whole
1590 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1596 /* Allocate any unassigned IPs just by looping through the IPs and
1597 * finding the best node for each.
1599 static void basic_allocate_unassigned(struct ctdb_context *ctdb,
1600 struct ctdb_ipflags *ipflags,
1601 struct ctdb_public_ip_list *all_ips)
1603 struct ctdb_public_ip_list *tmp_ip;
1605 /* loop over all ip's and find a physical node to cover for
1608 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1609 if (tmp_ip->pnn == -1) {
1610 if (find_takeover_node(ctdb, ipflags, tmp_ip, all_ips)) {
1611 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1612 ctdb_addr_to_str(&tmp_ip->addr)));
1618 /* Basic non-deterministic rebalancing algorithm.
1620 static void basic_failback(struct ctdb_context *ctdb,
1621 struct ctdb_ipflags *ipflags,
1622 struct ctdb_public_ip_list *all_ips,
1626 int maxnode, maxnum, minnode, minnum, num, retries;
1627 struct ctdb_public_ip_list *tmp_ip;
1629 numnodes = talloc_array_length(ipflags);
1636 /* for each ip address, loop over all nodes that can serve
1637 this ip and make sure that the difference between the node
1638 serving the most and the node serving the least ip's are
1641 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1642 if (tmp_ip->pnn == -1) {
1646 /* Get the highest and lowest number of ips's served by any
1647 valid node which can serve this ip.
1651 for (i=0; i<numnodes; i++) {
1652 /* only check nodes that can actually serve this ip */
1653 if (!can_node_takeover_ip(ctdb, i, ipflags[i], tmp_ip)) {
1654 /* no it couldnt so skip to the next node */
1658 num = node_ip_coverage(ctdb, i, all_ips);
1659 if (maxnode == -1) {
1668 if (minnode == -1) {
1678 if (maxnode == -1) {
1679 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1680 ctdb_addr_to_str(&tmp_ip->addr)));
1685 /* if the spread between the smallest and largest coverage by
1686 a node is >=2 we steal one of the ips from the node with
1687 most coverage to even things out a bit.
1688 try to do this a limited number of times since we dont
1689 want to spend too much time balancing the ip coverage.
1691 if ( (maxnum > minnum+1)
1692 && (retries < (num_ips + 5)) ){
1693 struct ctdb_public_ip_list *tmp;
1695 /* Reassign one of maxnode's VNNs */
1696 for (tmp=all_ips;tmp;tmp=tmp->next) {
1697 if (tmp->pnn == maxnode) {
1698 (void)find_takeover_node(ctdb, ipflags, tmp, all_ips);
1707 struct ctdb_rebalancenodes {
1708 struct ctdb_rebalancenodes *next;
1711 static struct ctdb_rebalancenodes *force_rebalance_list = NULL;
1714 /* set this flag to force the node to be rebalanced even if it just didnt
1715 become healthy again.
1717 void lcp2_forcerebalance(struct ctdb_context *ctdb, uint32_t pnn)
1719 struct ctdb_rebalancenodes *rebalance;
1721 for (rebalance = force_rebalance_list; rebalance; rebalance = rebalance->next) {
1722 if (rebalance->pnn == pnn) {
1727 rebalance = talloc(ctdb, struct ctdb_rebalancenodes);
1728 rebalance->pnn = pnn;
1729 rebalance->next = force_rebalance_list;
1730 force_rebalance_list = rebalance;
1733 /* Do necessary LCP2 initialisation. Bury it in a function here so
1734 * that we can unit test it.
1736 static void lcp2_init(struct ctdb_context *tmp_ctx,
1737 struct ctdb_ipflags *ipflags,
1738 struct ctdb_public_ip_list *all_ips,
1739 uint32_t **lcp2_imbalances,
1740 bool **rebalance_candidates)
1743 struct ctdb_public_ip_list *tmp_ip;
1745 numnodes = talloc_array_length(ipflags);
1747 *rebalance_candidates = talloc_array(tmp_ctx, bool, numnodes);
1748 CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
1749 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, numnodes);
1750 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1752 for (i=0; i<numnodes; i++) {
1753 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1754 /* First step: assume all nodes are candidates */
1755 (*rebalance_candidates)[i] = true;
1758 /* 2nd step: if a node has IPs assigned then it must have been
1759 * healthy before, so we remove it from consideration. This
1760 * is overkill but is all we have because we don't maintain
1761 * state between takeover runs. An alternative would be to
1762 * keep state and invalidate it every time the recovery master
1765 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1766 if (tmp_ip->pnn != -1) {
1767 (*rebalance_candidates)[tmp_ip->pnn] = false;
1771 /* 3rd step: if a node is forced to re-balance then
1772 we allow failback onto the node */
1773 while (force_rebalance_list != NULL) {
1774 struct ctdb_rebalancenodes *next = force_rebalance_list->next;
1776 if (force_rebalance_list->pnn <= numnodes) {
1777 (*rebalance_candidates)[force_rebalance_list->pnn] = true;
1780 DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
1781 talloc_free(force_rebalance_list);
1782 force_rebalance_list = next;
1786 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1787 * the IP/node combination that will cost the least.
1789 static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1790 struct ctdb_ipflags *ipflags,
1791 struct ctdb_public_ip_list *all_ips,
1792 uint32_t *lcp2_imbalances)
1794 struct ctdb_public_ip_list *tmp_ip;
1795 int dstnode, numnodes;
1798 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1799 struct ctdb_public_ip_list *minip;
1801 bool should_loop = true;
1802 bool have_unassigned = true;
1804 numnodes = talloc_array_length(ipflags);
1806 while (have_unassigned && should_loop) {
1807 should_loop = false;
1809 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1810 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1816 /* loop over each unassigned ip. */
1817 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1818 if (tmp_ip->pnn != -1) {
1822 for (dstnode=0; dstnode<numnodes; dstnode++) {
1823 /* only check nodes that can actually takeover this ip */
1824 if (!can_node_takeover_ip(ctdb, dstnode,
1827 /* no it couldnt so skip to the next node */
1831 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1832 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1833 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1834 ctdb_addr_to_str(&(tmp_ip->addr)),
1836 dstimbl - lcp2_imbalances[dstnode]));
1839 if ((minnode == -1) || (dstdsum < mindsum)) {
1849 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1851 /* If we found one then assign it to the given node. */
1852 if (minnode != -1) {
1853 minip->pnn = minnode;
1854 lcp2_imbalances[minnode] = minimbl;
1855 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1856 ctdb_addr_to_str(&(minip->addr)),
1861 /* There might be a better way but at least this is clear. */
1862 have_unassigned = false;
1863 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1864 if (tmp_ip->pnn == -1) {
1865 have_unassigned = true;
1870 /* We know if we have an unassigned addresses so we might as
1873 if (have_unassigned) {
1874 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1875 if (tmp_ip->pnn == -1) {
1876 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1877 ctdb_addr_to_str(&tmp_ip->addr)));
1883 /* LCP2 algorithm for rebalancing the cluster. Given a candidate node
1884 * to move IPs from, determines the best IP/destination node
1885 * combination to move from the source node.
1887 static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
1888 struct ctdb_ipflags *ipflags,
1889 struct ctdb_public_ip_list *all_ips,
1892 uint32_t *lcp2_imbalances,
1893 bool *rebalance_candidates)
1895 int dstnode, mindstnode, numnodes;
1896 uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
1897 uint32_t minsrcimbl, mindstimbl;
1898 struct ctdb_public_ip_list *minip;
1899 struct ctdb_public_ip_list *tmp_ip;
1901 /* Find an IP and destination node that best reduces imbalance. */
1907 numnodes = talloc_array_length(ipflags);
1909 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1910 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
1912 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1913 /* Only consider addresses on srcnode. */
1914 if (tmp_ip->pnn != srcnode) {
1918 /* What is this IP address costing the source node? */
1919 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1920 srcimbl = candimbl - srcdsum;
1922 /* Consider this IP address would cost each potential
1923 * destination node. Destination nodes are limited to
1924 * those that are newly healthy, since we don't want
1925 * to do gratuitous failover of IPs just to make minor
1926 * balance improvements.
1928 for (dstnode=0; dstnode<numnodes; dstnode++) {
1929 if (!rebalance_candidates[dstnode]) {
1933 /* only check nodes that can actually takeover this ip */
1934 if (!can_node_takeover_ip(ctdb, dstnode,
1935 ipflags[dstnode], tmp_ip)) {
1936 /* no it couldnt so skip to the next node */
1940 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1941 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1942 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1943 srcnode, srcimbl - lcp2_imbalances[srcnode],
1944 ctdb_addr_to_str(&(tmp_ip->addr)),
1945 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1947 if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
1948 ((mindstnode == -1) || \
1949 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1952 minsrcimbl = srcimbl;
1953 mindstnode = dstnode;
1954 mindstimbl = dstimbl;
1958 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1960 if (mindstnode != -1) {
1961 /* We found a move that makes things better... */
1962 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1963 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1964 ctdb_addr_to_str(&(minip->addr)),
1965 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1968 lcp2_imbalances[srcnode] = srcimbl;
1969 lcp2_imbalances[mindstnode] = mindstimbl;
1970 minip->pnn = mindstnode;
1979 struct lcp2_imbalance_pnn {
1984 static int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
1986 const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
1987 const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
1989 if (lipa->imbalance > lipb->imbalance) {
1991 } else if (lipa->imbalance == lipb->imbalance) {
1998 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1999 * node with the highest LCP2 imbalance, and then determines the best
2000 * IP/destination node combination to move from the source node.
2002 static void lcp2_failback(struct ctdb_context *ctdb,
2003 struct ctdb_ipflags *ipflags,
2004 struct ctdb_public_ip_list *all_ips,
2005 uint32_t *lcp2_imbalances,
2006 bool *rebalance_candidates)
2008 int i, num_rebalance_candidates, numnodes;
2009 struct lcp2_imbalance_pnn * lips;
2012 numnodes = talloc_array_length(ipflags);
2016 /* It is only worth continuing if we have suitable target
2017 * nodes to transfer IPs to. This check is much cheaper than
2020 num_rebalance_candidates = 0;
2021 for (i=0; i<numnodes; i++) {
2022 if (rebalance_candidates[i]) {
2023 num_rebalance_candidates++;
2026 if (num_rebalance_candidates == 0) {
2030 /* Put the imbalances and nodes into an array, sort them and
2031 * iterate through candidates. Usually the 1st one will be
2032 * used, so this doesn't cost much...
2034 lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, numnodes);
2035 for (i=0; i<numnodes; i++) {
2036 lips[i].imbalance = lcp2_imbalances[i];
2039 qsort(lips, numnodes, sizeof(struct lcp2_imbalance_pnn),
2040 lcp2_cmp_imbalance_pnn);
2043 for (i=0; i<numnodes; i++) {
2044 /* This means that all nodes had 0 or 1 addresses, so
2045 * can't be imbalanced.
2047 if (lips[i].imbalance == 0) {
2051 if (lcp2_failback_candidate(ctdb,
2057 rebalance_candidates)) {
2069 static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
2070 struct ctdb_ipflags *ipflags,
2071 struct ctdb_public_ip_list *all_ips)
2073 struct ctdb_public_ip_list *tmp_ip;
2075 /* verify that the assigned nodes can serve that public ip
2076 and set it to -1 if not
2078 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2079 if (tmp_ip->pnn == -1) {
2082 if (!can_node_host_ip(ctdb, tmp_ip->pnn,
2083 ipflags[tmp_ip->pnn], tmp_ip) != 0) {
2084 /* this node can not serve this ip. */
2085 DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
2086 ctdb_addr_to_str(&(tmp_ip->addr)),
2093 static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
2094 struct ctdb_ipflags *ipflags,
2095 struct ctdb_public_ip_list *all_ips)
2097 struct ctdb_public_ip_list *tmp_ip;
2100 numnodes = talloc_array_length(ipflags);
2102 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
2103 /* Allocate IPs to nodes in a modulo fashion so that IPs will
2104 * always be allocated the same way for a specific set of
2105 * available/unavailable nodes.
2108 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
2109 tmp_ip->pnn = i % numnodes;
2112 /* IP failback doesn't make sense with deterministic
2113 * IPs, since the modulo step above implicitly fails
2114 * back IPs to their "home" node.
2116 if (1 == ctdb->tunable.no_ip_failback) {
2117 DEBUG(DEBUG_WARNING, ("WARNING: 'NoIPFailback' set but ignored - incompatible with 'DeterministicIPs\n"));
2120 unassign_unsuitable_ips(ctdb, ipflags, all_ips);
2122 basic_allocate_unassigned(ctdb, ipflags, all_ips);
2124 /* No failback here! */
2127 static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
2128 struct ctdb_ipflags *ipflags,
2129 struct ctdb_public_ip_list *all_ips)
2131 /* This should be pushed down into basic_failback. */
2132 struct ctdb_public_ip_list *tmp_ip;
2134 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2138 unassign_unsuitable_ips(ctdb, ipflags, all_ips);
2140 basic_allocate_unassigned(ctdb, ipflags, all_ips);
2142 /* If we don't want IPs to fail back then don't rebalance IPs. */
2143 if (1 == ctdb->tunable.no_ip_failback) {
2147 /* Now, try to make sure the ip adresses are evenly distributed
2150 basic_failback(ctdb, ipflags, all_ips, num_ips);
2153 static void ip_alloc_lcp2(struct ctdb_context *ctdb,
2154 struct ctdb_ipflags *ipflags,
2155 struct ctdb_public_ip_list *all_ips)
2157 uint32_t *lcp2_imbalances;
2158 bool *rebalance_candidates;
2160 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2162 unassign_unsuitable_ips(ctdb, ipflags, all_ips);
2164 lcp2_init(tmp_ctx, ipflags, all_ips,
2165 &lcp2_imbalances, &rebalance_candidates);
2167 lcp2_allocate_unassigned(ctdb, ipflags, all_ips, lcp2_imbalances);
2169 /* If we don't want IPs to fail back then don't rebalance IPs. */
2170 if (1 == ctdb->tunable.no_ip_failback) {
2174 /* Now, try to make sure the ip adresses are evenly distributed
2177 lcp2_failback(ctdb, ipflags, all_ips,
2178 lcp2_imbalances, rebalance_candidates);
2181 talloc_free(tmp_ctx);
2184 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
2188 /* Count how many completely healthy nodes we have */
2190 for (i=0;i<nodemap->num;i++) {
2191 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
2196 return num_healthy == 0;
2199 /* The calculation part of the IP allocation algorithm. */
2200 static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
2201 struct ctdb_ipflags *ipflags,
2202 struct ctdb_public_ip_list **all_ips_p)
2204 /* since nodes only know about those public addresses that
2205 can be served by that particular node, no single node has
2206 a full list of all public addresses that exist in the cluster.
2207 Walk over all node structures and create a merged list of
2208 all public addresses that exist in the cluster.
2210 keep the tree of ips around as ctdb->ip_tree
2212 *all_ips_p = create_merged_ip_list(ctdb);
2214 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
2215 ip_alloc_lcp2(ctdb, ipflags, *all_ips_p);
2216 } else if (1 == ctdb->tunable.deterministic_public_ips) {
2217 ip_alloc_deterministic_ips(ctdb, ipflags, *all_ips_p);
2219 ip_alloc_nondeterministic_ips(ctdb, ipflags, *all_ips_p);
2222 /* at this point ->pnn is the node which will own each IP
2223 or -1 if there is no node that can cover this ip
2229 struct get_tunable_callback_data {
2230 const char *tunable;
2235 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
2236 int32_t res, TDB_DATA outdata,
2239 struct get_tunable_callback_data *cd =
2240 (struct get_tunable_callback_data *)callback;
2244 /* Already handled in fail callback */
2248 if (outdata.dsize != sizeof(uint32_t)) {
2249 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
2250 cd->tunable, pnn, (int)sizeof(uint32_t),
2251 (int)outdata.dsize));
2256 size = talloc_array_length(cd->out);
2258 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
2259 cd->tunable, pnn, size));
2264 cd->out[pnn] = *(uint32_t *)outdata.dptr;
2267 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
2268 int32_t res, TDB_DATA outdata,
2271 struct get_tunable_callback_data *cd =
2272 (struct get_tunable_callback_data *)callback;
2277 ("Timed out getting tunable \"%s\" from node %d\n",
2283 DEBUG(DEBUG_WARNING,
2284 ("Tunable \"%s\" not implemented on node %d\n",
2289 ("Unexpected error getting tunable \"%s\" from node %d\n",
2295 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
2296 TALLOC_CTX *tmp_ctx,
2297 struct ctdb_node_map *nodemap,
2298 const char *tunable,
2299 uint32_t default_value)
2302 struct ctdb_control_get_tunable *t;
2305 struct get_tunable_callback_data callback_data;
2308 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
2309 CTDB_NO_MEMORY_NULL(ctdb, tvals);
2310 for (i=0; i<nodemap->num; i++) {
2311 tvals[i] = default_value;
2314 callback_data.out = tvals;
2315 callback_data.tunable = tunable;
2316 callback_data.fatal = false;
2318 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
2319 data.dptr = talloc_size(tmp_ctx, data.dsize);
2320 t = (struct ctdb_control_get_tunable *)data.dptr;
2321 t->length = strlen(tunable)+1;
2322 memcpy(t->name, tunable, t->length);
2323 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2324 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
2325 nodes, 0, TAKEOVER_TIMEOUT(),
2327 get_tunable_callback,
2328 get_tunable_fail_callback,
2329 &callback_data) != 0) {
2330 if (callback_data.fatal) {
2336 talloc_free(data.dptr);
2341 /* Set internal flags for IP allocation:
2343 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
2344 * Set NOIPHOST ip flag for each INACTIVE node
2345 * if all nodes are disabled:
2346 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
2348 * Set NOIPHOST ip flags for disabled nodes
2350 static struct ctdb_ipflags *
2351 set_ipflags_internal(struct ctdb_context *ctdb,
2352 TALLOC_CTX *tmp_ctx,
2353 struct ctdb_node_map *nodemap,
2354 uint32_t *tval_noiptakeover,
2355 uint32_t *tval_noiphostonalldisabled)
2358 struct ctdb_ipflags *ipflags;
2360 /* Clear IP flags - implicit due to talloc_zero */
2361 ipflags = talloc_zero_array(tmp_ctx, struct ctdb_ipflags, nodemap->num);
2362 CTDB_NO_MEMORY_NULL(ctdb, ipflags);
2364 for (i=0;i<nodemap->num;i++) {
2365 /* Can not take IPs on node with NoIPTakeover set */
2366 if (tval_noiptakeover[i] != 0) {
2367 ipflags[i].noiptakeover = true;
2370 /* Can not host IPs on INACTIVE node */
2371 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
2372 ipflags[i].noiphost = true;
2376 if (all_nodes_are_disabled(nodemap)) {
2377 /* If all nodes are disabled, can not host IPs on node
2378 * with NoIPHostOnAllDisabled set
2380 for (i=0;i<nodemap->num;i++) {
2381 if (tval_noiphostonalldisabled[i] != 0) {
2382 ipflags[i].noiphost = true;
2386 /* If some nodes are not disabled, then can not host
2387 * IPs on DISABLED node
2389 for (i=0;i<nodemap->num;i++) {
2390 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
2391 ipflags[i].noiphost = true;
2399 static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb,
2400 TALLOC_CTX *tmp_ctx,
2401 struct ctdb_node_map *nodemap)
2403 uint32_t *tval_noiptakeover;
2404 uint32_t *tval_noiphostonalldisabled;
2405 struct ctdb_ipflags *ipflags;
2407 tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
2409 if (tval_noiptakeover == NULL) {
2413 tval_noiphostonalldisabled =
2414 get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
2415 "NoIPHostOnAllDisabled", 0);
2416 if (tval_noiphostonalldisabled == NULL) {
2417 /* Caller frees tmp_ctx */
2421 ipflags = set_ipflags_internal(ctdb, tmp_ctx, nodemap,
2423 tval_noiphostonalldisabled);
2425 talloc_free(tval_noiptakeover);
2426 talloc_free(tval_noiphostonalldisabled);
2432 make any IP alias changes for public addresses that are necessary
2434 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
2435 client_async_callback fail_callback, void *callback_data)
2438 struct ctdb_public_ip ip;
2439 struct ctdb_public_ipv4 ipv4;
2441 struct ctdb_public_ip_list *all_ips, *tmp_ip;
2443 struct timeval timeout;
2444 struct client_async_data *async_data;
2445 struct ctdb_client_control_state *state;
2446 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2447 uint32_t disable_timeout;
2448 struct ctdb_ipflags *ipflags;
2451 * ip failover is completely disabled, just send out the
2452 * ipreallocated event.
2454 if (ctdb->tunable.disable_ip_failover != 0) {
2458 ipflags = set_ipflags(ctdb, tmp_ctx, nodemap);
2459 if (ipflags == NULL) {
2460 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
2461 talloc_free(tmp_ctx);
2467 /* Do the IP reassignment calculations */
2468 ctdb_takeover_run_core(ctdb, ipflags, &all_ips);
2470 /* The IP flags need to be cleared because they should never
2471 * be seen outside the IP allocation code.
2474 /* The recovery daemon does regular sanity checks of the IPs.
2475 * However, sometimes it is overzealous and thinks changes are
2476 * required when they're already underway. This stops the
2477 * checks for a while before we start moving IPs.
2479 disable_timeout = ctdb->tunable.takeover_timeout;
2480 data.dptr = (uint8_t*)&disable_timeout;
2481 data.dsize = sizeof(disable_timeout);
2482 if (ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
2483 CTDB_SRVID_DISABLE_IP_CHECK, data) != 0) {
2484 DEBUG(DEBUG_INFO,("Failed to disable ip verification\n"));
2487 /* now tell all nodes to delete any alias that they should not
2488 have. This will be a NOOP on nodes that don't currently
2489 hold the given alias */
2490 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2491 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2493 async_data->fail_callback = fail_callback;
2494 async_data->callback_data = callback_data;
2496 for (i=0;i<nodemap->num;i++) {
2497 /* don't talk to unconnected nodes, but do talk to banned nodes */
2498 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
2502 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2503 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
2504 /* This node should be serving this
2505 vnn so dont tell it to release the ip
2509 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2510 ipv4.pnn = tmp_ip->pnn;
2511 ipv4.sin = tmp_ip->addr.ip;
2513 timeout = TAKEOVER_TIMEOUT();
2514 data.dsize = sizeof(ipv4);
2515 data.dptr = (uint8_t *)&ipv4;
2516 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2517 0, CTDB_CONTROL_RELEASE_IPv4, 0,
2521 ip.pnn = tmp_ip->pnn;
2522 ip.addr = tmp_ip->addr;
2524 timeout = TAKEOVER_TIMEOUT();
2525 data.dsize = sizeof(ip);
2526 data.dptr = (uint8_t *)&ip;
2527 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2528 0, CTDB_CONTROL_RELEASE_IP, 0,
2533 if (state == NULL) {
2534 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
2535 talloc_free(tmp_ctx);
2539 ctdb_client_async_add(async_data, state);
2542 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2543 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
2544 talloc_free(tmp_ctx);
2547 talloc_free(async_data);
2550 /* tell all nodes to get their own IPs */
2551 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2552 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2554 async_data->fail_callback = fail_callback;
2555 async_data->callback_data = callback_data;
2557 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2558 if (tmp_ip->pnn == -1) {
2559 /* this IP won't be taken over */
2563 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2564 ipv4.pnn = tmp_ip->pnn;
2565 ipv4.sin = tmp_ip->addr.ip;
2567 timeout = TAKEOVER_TIMEOUT();
2568 data.dsize = sizeof(ipv4);
2569 data.dptr = (uint8_t *)&ipv4;
2570 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2571 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
2575 ip.pnn = tmp_ip->pnn;
2576 ip.addr = tmp_ip->addr;
2578 timeout = TAKEOVER_TIMEOUT();
2579 data.dsize = sizeof(ip);
2580 data.dptr = (uint8_t *)&ip;
2581 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2582 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2586 if (state == NULL) {
2587 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2588 talloc_free(tmp_ctx);
2592 ctdb_client_async_add(async_data, state);
2594 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2595 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2596 talloc_free(tmp_ctx);
2602 * Tell all nodes to run eventscripts to process the
2603 * "ipreallocated" event. This can do a lot of things,
2604 * including restarting services to reconfigure them if public
2605 * IPs have moved. Once upon a time this event only used to
2608 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2609 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
2610 nodes, 0, TAKEOVER_TIMEOUT(),
2612 NULL, fail_callback,
2613 callback_data) != 0) {
2614 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
2617 talloc_free(tmp_ctx);
2623 destroy a ctdb_client_ip structure
2625 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2627 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2628 ctdb_addr_to_str(&ip->addr),
2629 ntohs(ip->addr.ip.sin_port),
2632 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2637 called by a client to inform us of a TCP connection that it is managing
2638 that should tickled with an ACK when IP takeover is done
2639 we handle both the old ipv4 style of packets as well as the new ipv4/6
2642 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2645 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2646 struct ctdb_control_tcp *old_addr = NULL;
2647 struct ctdb_control_tcp_addr new_addr;
2648 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2649 struct ctdb_tcp_list *tcp;
2650 struct ctdb_tcp_connection t;
2653 struct ctdb_client_ip *ip;
2654 struct ctdb_vnn *vnn;
2655 ctdb_sock_addr addr;
2657 switch (indata.dsize) {
2658 case sizeof(struct ctdb_control_tcp):
2659 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2660 ZERO_STRUCT(new_addr);
2661 tcp_sock = &new_addr;
2662 tcp_sock->src.ip = old_addr->src;
2663 tcp_sock->dest.ip = old_addr->dest;
2665 case sizeof(struct ctdb_control_tcp_addr):
2666 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2669 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2670 "to ctdb_control_tcp_client. size was %d but "
2671 "only allowed sizes are %lu and %lu\n",
2673 (long unsigned)sizeof(struct ctdb_control_tcp),
2674 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2678 addr = tcp_sock->src;
2679 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2680 addr = tcp_sock->dest;
2681 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2684 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2685 vnn = find_public_ip_vnn(ctdb, &addr);
2687 switch (addr.sa.sa_family) {
2689 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2690 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2691 ctdb_addr_to_str(&addr)));
2695 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2696 ctdb_addr_to_str(&addr)));
2699 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2705 if (vnn->pnn != ctdb->pnn) {
2706 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2707 ctdb_addr_to_str(&addr),
2708 client_id, client->pid));
2709 /* failing this call will tell smbd to die */
2713 ip = talloc(client, struct ctdb_client_ip);
2714 CTDB_NO_MEMORY(ctdb, ip);
2718 ip->client_id = client_id;
2719 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2720 DLIST_ADD(ctdb->client_ip_list, ip);
2722 tcp = talloc(client, struct ctdb_tcp_list);
2723 CTDB_NO_MEMORY(ctdb, tcp);
2725 tcp->connection.src_addr = tcp_sock->src;
2726 tcp->connection.dst_addr = tcp_sock->dest;
2728 DLIST_ADD(client->tcp_list, tcp);
2730 t.src_addr = tcp_sock->src;
2731 t.dst_addr = tcp_sock->dest;
2733 data.dptr = (uint8_t *)&t;
2734 data.dsize = sizeof(t);
2736 switch (addr.sa.sa_family) {
2738 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2739 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2740 ctdb_addr_to_str(&tcp_sock->src),
2741 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2744 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2745 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2746 ctdb_addr_to_str(&tcp_sock->src),
2747 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2750 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2754 /* tell all nodes about this tcp connection */
2755 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2756 CTDB_CONTROL_TCP_ADD,
2757 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2759 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2767 find a tcp address on a list
2769 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2770 struct ctdb_tcp_connection *tcp)
2774 if (array == NULL) {
2778 for (i=0;i<array->num;i++) {
2779 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2780 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2781 return &array->connections[i];
2790 called by a daemon to inform us of a TCP connection that one of its
2791 clients managing that should tickled with an ACK when IP takeover is
2794 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2796 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2797 struct ctdb_tcp_array *tcparray;
2798 struct ctdb_tcp_connection tcp;
2799 struct ctdb_vnn *vnn;
2801 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2803 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2804 ctdb_addr_to_str(&p->dst_addr)));
2810 tcparray = vnn->tcp_array;
2812 /* If this is the first tickle */
2813 if (tcparray == NULL) {
2814 tcparray = talloc_size(ctdb->nodes,
2815 offsetof(struct ctdb_tcp_array, connections) +
2816 sizeof(struct ctdb_tcp_connection) * 1);
2817 CTDB_NO_MEMORY(ctdb, tcparray);
2818 vnn->tcp_array = tcparray;
2821 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2822 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2824 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2825 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2828 if (tcp_update_needed) {
2829 vnn->tcp_update_needed = true;
2835 /* Do we already have this tickle ?*/
2836 tcp.src_addr = p->src_addr;
2837 tcp.dst_addr = p->dst_addr;
2838 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2839 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2840 ctdb_addr_to_str(&tcp.dst_addr),
2841 ntohs(tcp.dst_addr.ip.sin_port),
2846 /* A new tickle, we must add it to the array */
2847 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2848 struct ctdb_tcp_connection,
2850 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2852 vnn->tcp_array = tcparray;
2853 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2854 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2857 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2858 ctdb_addr_to_str(&tcp.dst_addr),
2859 ntohs(tcp.dst_addr.ip.sin_port),
2862 if (tcp_update_needed) {
2863 vnn->tcp_update_needed = true;
2871 called by a daemon to inform us of a TCP connection that one of its
2872 clients managing that should tickled with an ACK when IP takeover is
2875 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2877 struct ctdb_tcp_connection *tcpp;
2878 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2881 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2882 ctdb_addr_to_str(&conn->dst_addr)));
2886 /* if the array is empty we cant remove it
2887 and we dont need to do anything
2889 if (vnn->tcp_array == NULL) {
2890 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2891 ctdb_addr_to_str(&conn->dst_addr),
2892 ntohs(conn->dst_addr.ip.sin_port)));
2897 /* See if we know this connection
2898 if we dont know this connection then we dont need to do anything
2900 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2902 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2903 ctdb_addr_to_str(&conn->dst_addr),
2904 ntohs(conn->dst_addr.ip.sin_port)));
2909 /* We need to remove this entry from the array.
2910 Instead of allocating a new array and copying data to it
2911 we cheat and just copy the last entry in the existing array
2912 to the entry that is to be removed and just shring the
2915 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2916 vnn->tcp_array->num--;
2918 /* If we deleted the last entry we also need to remove the entire array
2920 if (vnn->tcp_array->num == 0) {
2921 talloc_free(vnn->tcp_array);
2922 vnn->tcp_array = NULL;
2925 vnn->tcp_update_needed = true;
2927 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2928 ctdb_addr_to_str(&conn->src_addr),
2929 ntohs(conn->src_addr.ip.sin_port)));
2934 called by a daemon to inform us of a TCP connection that one of its
2935 clients used are no longer needed in the tickle database
2937 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2939 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2941 ctdb_remove_tcp_connection(ctdb, conn);
2948 called when a daemon restarts - send all tickes for all public addresses
2949 we are serving immediately to the new node.
2951 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2953 /*XXX here we should send all tickes we are serving to the new node */
2959 called when a client structure goes away - hook to remove
2960 elements from the tcp_list in all daemons
2962 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2964 while (client->tcp_list) {
2965 struct ctdb_tcp_list *tcp = client->tcp_list;
2966 DLIST_REMOVE(client->tcp_list, tcp);
2967 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2973 release all IPs on shutdown
2975 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2977 struct ctdb_vnn *vnn;
2979 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2980 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2981 ctdb_vnn_unassign_iface(ctdb, vnn);
2987 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2988 ctdb_vnn_iface_string(vnn),
2989 ctdb_addr_to_str(&vnn->public_address),
2990 vnn->public_netmask_bits);
2991 release_kill_clients(ctdb, &vnn->public_address);
2992 ctdb_vnn_unassign_iface(ctdb, vnn);
2998 get list of public IPs
3000 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
3001 struct ctdb_req_control *c, TDB_DATA *outdata)
3004 struct ctdb_all_public_ips *ips;
3005 struct ctdb_vnn *vnn;
3006 bool only_available = false;
3008 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
3009 only_available = true;
3012 /* count how many public ip structures we have */
3014 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3018 len = offsetof(struct ctdb_all_public_ips, ips) +
3019 num*sizeof(struct ctdb_public_ip);
3020 ips = talloc_zero_size(outdata, len);
3021 CTDB_NO_MEMORY(ctdb, ips);
3024 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3025 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
3028 ips->ips[i].pnn = vnn->pnn;
3029 ips->ips[i].addr = vnn->public_address;
3033 len = offsetof(struct ctdb_all_public_ips, ips) +
3034 i*sizeof(struct ctdb_public_ip);
3036 outdata->dsize = len;
3037 outdata->dptr = (uint8_t *)ips;
3044 get list of public IPs, old ipv4 style. only returns ipv4 addresses
3046 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
3047 struct ctdb_req_control *c, TDB_DATA *outdata)
3050 struct ctdb_all_public_ipsv4 *ips;
3051 struct ctdb_vnn *vnn;
3053 /* count how many public ip structures we have */
3055 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3056 if (vnn->public_address.sa.sa_family != AF_INET) {
3062 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
3063 num*sizeof(struct ctdb_public_ipv4);
3064 ips = talloc_zero_size(outdata, len);
3065 CTDB_NO_MEMORY(ctdb, ips);
3067 outdata->dsize = len;
3068 outdata->dptr = (uint8_t *)ips;
3072 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3073 if (vnn->public_address.sa.sa_family != AF_INET) {
3076 ips->ips[i].pnn = vnn->pnn;
3077 ips->ips[i].sin = vnn->public_address.ip;
3084 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
3085 struct ctdb_req_control *c,
3090 ctdb_sock_addr *addr;
3091 struct ctdb_control_public_ip_info *info;
3092 struct ctdb_vnn *vnn;
3094 addr = (ctdb_sock_addr *)indata.dptr;
3096 vnn = find_public_ip_vnn(ctdb, addr);
3098 /* if it is not a public ip it could be our 'single ip' */
3099 if (ctdb->single_ip_vnn) {
3100 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
3101 vnn = ctdb->single_ip_vnn;
3106 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
3107 "'%s'not a public address\n",
3108 ctdb_addr_to_str(addr)));
3112 /* count how many public ip structures we have */
3114 for (;vnn->ifaces[num];) {
3118 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
3119 num*sizeof(struct ctdb_control_iface_info);
3120 info = talloc_zero_size(outdata, len);
3121 CTDB_NO_MEMORY(ctdb, info);
3123 info->ip.addr = vnn->public_address;
3124 info->ip.pnn = vnn->pnn;
3125 info->active_idx = 0xFFFFFFFF;
3127 for (i=0; vnn->ifaces[i]; i++) {
3128 struct ctdb_iface *cur;
3130 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
3132 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
3136 if (vnn->iface == cur) {
3137 info->active_idx = i;
3139 strcpy(info->ifaces[i].name, cur->name);
3140 info->ifaces[i].link_state = cur->link_up;
3141 info->ifaces[i].references = cur->references;
3144 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
3145 i*sizeof(struct ctdb_control_iface_info);
3147 outdata->dsize = len;
3148 outdata->dptr = (uint8_t *)info;
3153 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
3154 struct ctdb_req_control *c,
3158 struct ctdb_control_get_ifaces *ifaces;
3159 struct ctdb_iface *cur;
3161 /* count how many public ip structures we have */
3163 for (cur=ctdb->ifaces;cur;cur=cur->next) {
3167 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
3168 num*sizeof(struct ctdb_control_iface_info);
3169 ifaces = talloc_zero_size(outdata, len);
3170 CTDB_NO_MEMORY(ctdb, ifaces);
3173 for (cur=ctdb->ifaces;cur;cur=cur->next) {
3174 strcpy(ifaces->ifaces[i].name, cur->name);
3175 ifaces->ifaces[i].link_state = cur->link_up;
3176 ifaces->ifaces[i].references = cur->references;
3180 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
3181 i*sizeof(struct ctdb_control_iface_info);
3183 outdata->dsize = len;
3184 outdata->dptr = (uint8_t *)ifaces;
3189 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
3190 struct ctdb_req_control *c,
3193 struct ctdb_control_iface_info *info;
3194 struct ctdb_iface *iface;
3195 bool link_up = false;
3197 info = (struct ctdb_control_iface_info *)indata.dptr;
3199 if (info->name[CTDB_IFACE_SIZE] != '\0') {
3200 int len = strnlen(info->name, CTDB_IFACE_SIZE);
3201 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
3202 len, len, info->name));
3206 switch (info->link_state) {
3214 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
3215 (unsigned int)info->link_state));
3219 if (info->references != 0) {
3220 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
3221 (unsigned int)info->references));
3225 iface = ctdb_find_iface(ctdb, info->name);
3226 if (iface == NULL) {
3230 if (link_up == iface->link_up) {
3234 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
3235 ("iface[%s] has changed it's link status %s => %s\n",
3237 iface->link_up?"up":"down",
3238 link_up?"up":"down"));
3240 iface->link_up = link_up;
3246 structure containing the listening socket and the list of tcp connections
3247 that the ctdb daemon is to kill
3249 struct ctdb_kill_tcp {
3250 struct ctdb_vnn *vnn;
3251 struct ctdb_context *ctdb;
3253 struct fd_event *fde;
3254 trbt_tree_t *connections;
3259 a tcp connection that is to be killed
3261 struct ctdb_killtcp_con {
3262 ctdb_sock_addr src_addr;
3263 ctdb_sock_addr dst_addr;
3265 struct ctdb_kill_tcp *killtcp;
3268 /* this function is used to create a key to represent this socketpair
3269 in the killtcp tree.
3270 this key is used to insert and lookup matching socketpairs that are
3271 to be tickled and RST
3273 #define KILLTCP_KEYLEN 10
3274 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
3276 static uint32_t key[KILLTCP_KEYLEN];
3278 bzero(key, sizeof(key));
3280 if (src->sa.sa_family != dst->sa.sa_family) {
3281 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
3285 switch (src->sa.sa_family) {
3287 key[0] = dst->ip.sin_addr.s_addr;
3288 key[1] = src->ip.sin_addr.s_addr;
3289 key[2] = dst->ip.sin_port;
3290 key[3] = src->ip.sin_port;
3293 uint32_t *dst6_addr32 =
3294 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
3295 uint32_t *src6_addr32 =
3296 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
3297 key[0] = dst6_addr32[3];
3298 key[1] = src6_addr32[3];
3299 key[2] = dst6_addr32[2];
3300 key[3] = src6_addr32[2];
3301 key[4] = dst6_addr32[1];
3302 key[5] = src6_addr32[1];
3303 key[6] = dst6_addr32[0];
3304 key[7] = src6_addr32[0];
3305 key[8] = dst->ip6.sin6_port;
3306 key[9] = src->ip6.sin6_port;
3310 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
3318 called when we get a read event on the raw socket
3320 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
3321 uint16_t flags, void *private_data)
3323 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
3324 struct ctdb_killtcp_con *con;
3325 ctdb_sock_addr src, dst;
3326 uint32_t ack_seq, seq;
3328 if (!(flags & EVENT_FD_READ)) {
3332 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
3333 killtcp->private_data,
3335 &ack_seq, &seq) != 0) {
3336 /* probably a non-tcp ACK packet */
3340 /* check if we have this guy in our list of connections
3343 con = trbt_lookuparray32(killtcp->connections,
3344 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
3346 /* no this was some other packet we can just ignore */
3350 /* This one has been tickled !
3351 now reset him and remove him from the list.
3353 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
3354 ntohs(con->dst_addr.ip.sin_port),
3355 ctdb_addr_to_str(&con->src_addr),
3356 ntohs(con->src_addr.ip.sin_port)));
3358 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
3363 /* when traversing the list of all tcp connections to send tickle acks to
3364 (so that we can capture the ack coming back and kill the connection
3366 this callback is called for each connection we are currently trying to kill
3368 static int tickle_connection_traverse(void *param, void *data)
3370 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
3372 /* have tried too many times, just give up */
3373 if (con->count >= 5) {
3374 /* can't delete in traverse: reparent to delete_cons */
3375 talloc_steal(param, con);
3379 /* othervise, try tickling it again */
3382 (ctdb_sock_addr *)&con->dst_addr,
3383 (ctdb_sock_addr *)&con->src_addr,
3390 called every second until all sentenced connections have been reset
3392 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
3393 struct timeval t, void *private_data)
3395 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
3396 void *delete_cons = talloc_new(NULL);
3398 /* loop over all connections sending tickle ACKs */
3399 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
3401 /* now we've finished traverse, it's safe to do deletion. */
3402 talloc_free(delete_cons);
3404 /* If there are no more connections to kill we can remove the
3405 entire killtcp structure
3407 if ( (killtcp->connections == NULL) ||
3408 (killtcp->connections->root == NULL) ) {
3409 talloc_free(killtcp);
3413 /* try tickling them again in a seconds time
3415 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3416 ctdb_tickle_sentenced_connections, killtcp);
3420 destroy the killtcp structure
3422 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
3424 struct ctdb_vnn *tmpvnn;
3426 /* verify that this vnn is still active */
3427 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
3428 if (tmpvnn == killtcp->vnn) {
3433 if (tmpvnn == NULL) {
3437 if (killtcp->vnn->killtcp != killtcp) {
3441 killtcp->vnn->killtcp = NULL;
3447 /* nothing fancy here, just unconditionally replace any existing
3448 connection structure with the new one.
3450 dont even free the old one if it did exist, that one is talloc_stolen
3451 by the same node in the tree anyway and will be deleted when the new data
3454 static void *add_killtcp_callback(void *parm, void *data)
3460 add a tcp socket to the list of connections we want to RST
3462 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
3466 ctdb_sock_addr src, dst;
3467 struct ctdb_kill_tcp *killtcp;
3468 struct ctdb_killtcp_con *con;
3469 struct ctdb_vnn *vnn;
3471 ctdb_canonicalize_ip(s, &src);
3472 ctdb_canonicalize_ip(d, &dst);
3474 vnn = find_public_ip_vnn(ctdb, &dst);
3476 vnn = find_public_ip_vnn(ctdb, &src);
3479 /* if it is not a public ip it could be our 'single ip' */
3480 if (ctdb->single_ip_vnn) {
3481 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
3482 vnn = ctdb->single_ip_vnn;
3487 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
3491 killtcp = vnn->killtcp;
3493 /* If this is the first connection to kill we must allocate
3496 if (killtcp == NULL) {
3497 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
3498 CTDB_NO_MEMORY(ctdb, killtcp);
3501 killtcp->ctdb = ctdb;
3502 killtcp->capture_fd = -1;
3503 killtcp->connections = trbt_create(killtcp, 0);
3505 vnn->killtcp = killtcp;
3506 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
3511 /* create a structure that describes this connection we want to
3512 RST and store it in killtcp->connections
3514 con = talloc(killtcp, struct ctdb_killtcp_con);
3515 CTDB_NO_MEMORY(ctdb, con);
3516 con->src_addr = src;
3517 con->dst_addr = dst;
3519 con->killtcp = killtcp;
3522 trbt_insertarray32_callback(killtcp->connections,
3523 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
3524 add_killtcp_callback, con);
3527 If we dont have a socket to listen on yet we must create it
3529 if (killtcp->capture_fd == -1) {
3530 const char *iface = ctdb_vnn_iface_string(vnn);
3531 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
3532 if (killtcp->capture_fd == -1) {
3533 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
3534 "socket on iface '%s' for killtcp (%s)\n",
3535 iface, strerror(errno)));
3541 if (killtcp->fde == NULL) {
3542 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
3544 capture_tcp_handler, killtcp);
3545 tevent_fd_set_auto_close(killtcp->fde);
3547 /* We also need to set up some events to tickle all these connections
3548 until they are all reset
3550 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3551 ctdb_tickle_sentenced_connections, killtcp);
3554 /* tickle him once now */
3563 talloc_free(vnn->killtcp);
3564 vnn->killtcp = NULL;
3569 kill a TCP connection.
3571 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
3573 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
3575 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
3579 called by a daemon to inform us of the entire list of TCP tickles for
3580 a particular public address.
3581 this control should only be sent by the node that is currently serving
3582 that public address.
3584 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
3586 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
3587 struct ctdb_tcp_array *tcparray;
3588 struct ctdb_vnn *vnn;
3590 /* We must at least have tickles.num or else we cant verify the size
3591 of the received data blob
3593 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3594 tickles.connections)) {
3595 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
3599 /* verify that the size of data matches what we expect */
3600 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3601 tickles.connections)
3602 + sizeof(struct ctdb_tcp_connection)
3603 * list->tickles.num) {
3604 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3608 vnn = find_public_ip_vnn(ctdb, &list->addr);
3610 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3611 ctdb_addr_to_str(&list->addr)));
3616 /* remove any old ticklelist we might have */
3617 talloc_free(vnn->tcp_array);
3618 vnn->tcp_array = NULL;
3620 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3621 CTDB_NO_MEMORY(ctdb, tcparray);
3623 tcparray->num = list->tickles.num;
3625 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3626 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3628 memcpy(tcparray->connections, &list->tickles.connections[0],
3629 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3631 /* We now have a new fresh tickle list array for this vnn */
3632 vnn->tcp_array = talloc_steal(vnn, tcparray);
3638 called to return the full list of tickles for the puclic address associated
3639 with the provided vnn
3641 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3643 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3644 struct ctdb_control_tcp_tickle_list *list;
3645 struct ctdb_tcp_array *tcparray;
3647 struct ctdb_vnn *vnn;
3649 vnn = find_public_ip_vnn(ctdb, addr);
3651 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3652 ctdb_addr_to_str(addr)));
3657 tcparray = vnn->tcp_array;
3659 num = tcparray->num;
3664 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3665 tickles.connections)
3666 + sizeof(struct ctdb_tcp_connection) * num;
3668 outdata->dptr = talloc_size(outdata, outdata->dsize);
3669 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3670 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3673 list->tickles.num = num;
3675 memcpy(&list->tickles.connections[0], tcparray->connections,
3676 sizeof(struct ctdb_tcp_connection) * num);
3684 set the list of all tcp tickles for a public address
3686 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3687 struct timeval timeout, uint32_t destnode,
3688 ctdb_sock_addr *addr,
3689 struct ctdb_tcp_array *tcparray)
3693 struct ctdb_control_tcp_tickle_list *list;
3696 num = tcparray->num;
3701 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3702 tickles.connections) +
3703 sizeof(struct ctdb_tcp_connection) * num;
3704 data.dptr = talloc_size(ctdb, data.dsize);
3705 CTDB_NO_MEMORY(ctdb, data.dptr);
3707 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3709 list->tickles.num = num;
3711 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3714 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3715 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3716 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3718 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3722 talloc_free(data.dptr);
3729 perform tickle updates if required
3731 static void ctdb_update_tcp_tickles(struct event_context *ev,
3732 struct timed_event *te,
3733 struct timeval t, void *private_data)
3735 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3737 struct ctdb_vnn *vnn;
3739 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3740 /* we only send out updates for public addresses that
3743 if (ctdb->pnn != vnn->pnn) {
3746 /* We only send out the updates if we need to */
3747 if (!vnn->tcp_update_needed) {
3750 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3752 CTDB_BROADCAST_CONNECTED,
3753 &vnn->public_address,
3756 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3757 ctdb_addr_to_str(&vnn->public_address)));
3761 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3762 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3763 ctdb_update_tcp_tickles, ctdb);
3768 start periodic update of tcp tickles
3770 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3772 ctdb->tickle_update_context = talloc_new(ctdb);
3774 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3775 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3776 ctdb_update_tcp_tickles, ctdb);
3782 struct control_gratious_arp {
3783 struct ctdb_context *ctdb;
3784 ctdb_sock_addr addr;
3790 send a control_gratuitous arp
3792 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3793 struct timeval t, void *private_data)
3796 struct control_gratious_arp *arp = talloc_get_type(private_data,
3797 struct control_gratious_arp);
3799 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3801 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3802 arp->iface, strerror(errno)));
3807 if (arp->count == CTDB_ARP_REPEAT) {
3812 event_add_timed(arp->ctdb->ev, arp,
3813 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3814 send_gratious_arp, arp);
3821 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3823 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3824 struct control_gratious_arp *arp;
3826 /* verify the size of indata */
3827 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3828 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3829 (unsigned)indata.dsize,
3830 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3834 ( offsetof(struct ctdb_control_gratious_arp, iface)
3835 + gratious_arp->len ) ){
3837 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3838 "but should be %u bytes\n",
3839 (unsigned)indata.dsize,
3840 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3845 arp = talloc(ctdb, struct control_gratious_arp);
3846 CTDB_NO_MEMORY(ctdb, arp);
3849 arp->addr = gratious_arp->addr;
3850 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3851 CTDB_NO_MEMORY(ctdb, arp->iface);
3854 event_add_timed(arp->ctdb->ev, arp,
3855 timeval_zero(), send_gratious_arp, arp);
3860 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3862 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3865 /* verify the size of indata */
3866 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3867 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3871 ( offsetof(struct ctdb_control_ip_iface, iface)
3874 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3875 "but should be %u bytes\n",
3876 (unsigned)indata.dsize,
3877 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3881 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
3883 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3886 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3894 called when releaseip event finishes for del_public_address
3896 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3899 talloc_free(private_data);
3902 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3904 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3905 struct ctdb_vnn *vnn;
3908 /* verify the size of indata */
3909 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3910 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3914 ( offsetof(struct ctdb_control_ip_iface, iface)
3917 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3918 "but should be %u bytes\n",
3919 (unsigned)indata.dsize,
3920 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3924 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
3926 /* walk over all public addresses until we find a match */
3927 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3928 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3929 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
3931 DLIST_REMOVE(ctdb->vnn, vnn);
3932 talloc_steal(mem_ctx, vnn);
3933 ctdb_remove_orphaned_ifaces(ctdb, vnn, mem_ctx);
3934 if (vnn->pnn != ctdb->pnn) {
3935 if (vnn->iface != NULL) {
3936 ctdb_vnn_unassign_iface(ctdb, vnn);
3938 talloc_free(mem_ctx);
3943 ret = ctdb_event_script_callback(ctdb,
3944 mem_ctx, delete_ip_callback, mem_ctx,
3946 CTDB_EVENT_RELEASE_IP,
3948 ctdb_vnn_iface_string(vnn),
3949 ctdb_addr_to_str(&vnn->public_address),
3950 vnn->public_netmask_bits);
3951 if (vnn->iface != NULL) {
3952 ctdb_vnn_unassign_iface(ctdb, vnn);
3965 struct ipreallocated_callback_state {
3966 struct ctdb_req_control *c;
3969 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3970 int status, void *p)
3972 struct ipreallocated_callback_state *state =
3973 talloc_get_type(p, struct ipreallocated_callback_state);
3977 (" \"ipreallocated\" event script failed (status %d)\n",
3979 if (status == -ETIME) {
3980 ctdb_ban_self(ctdb);
3984 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3988 /* A control to run the ipreallocated event */
3989 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3990 struct ctdb_req_control *c,
3994 struct ipreallocated_callback_state *state;
3996 state = talloc(ctdb, struct ipreallocated_callback_state);
3997 CTDB_NO_MEMORY(ctdb, state);
3999 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
4001 ret = ctdb_event_script_callback(ctdb, state,
4002 ctdb_ipreallocated_callback, state,
4003 false, CTDB_EVENT_IPREALLOCATED,
4007 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
4012 /* tell the control that we will be reply asynchronously */
4013 state->c = talloc_steal(state, c);
4014 *async_reply = true;
4020 /* This function is called from the recovery daemon to verify that a remote
4021 node has the expected ip allocation.
4022 This is verified against ctdb->ip_tree
4024 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
4026 struct ctdb_public_ip_list *tmp_ip;
4029 if (ctdb->ip_tree == NULL) {
4030 /* dont know the expected allocation yet, assume remote node
4039 for (i=0; i<ips->num; i++) {
4040 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
4041 if (tmp_ip == NULL) {
4042 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4046 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
4050 if (tmp_ip->pnn != ips->ips[i].pnn) {
4051 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
4059 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
4061 struct ctdb_public_ip_list *tmp_ip;
4063 if (ctdb->ip_tree == NULL) {
4064 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
4068 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
4069 if (tmp_ip == NULL) {
4070 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
4074 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
4075 tmp_ip->pnn = ip->pnn;
4081 struct ctdb_reloadips_handle {
4082 struct ctdb_context *ctdb;
4083 struct ctdb_req_control *c;
4087 struct fd_event *fde;
4090 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
4092 if (h == h->ctdb->reload_ips) {
4093 h->ctdb->reload_ips = NULL;
4096 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
4099 ctdb_kill(h->ctdb, h->child, SIGKILL);
4103 static void ctdb_reloadips_timeout_event(struct event_context *ev,
4104 struct timed_event *te,
4105 struct timeval t, void *private_data)
4107 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
4112 static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde,
4113 uint16_t flags, void *private_data)
4115 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
4120 ret = read(h->fd[0], &res, 1);
4121 if (ret < 1 || res != 0) {
4122 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
4130 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
4132 TALLOC_CTX *mem_ctx = talloc_new(NULL);
4133 struct ctdb_all_public_ips *ips;
4134 struct ctdb_vnn *vnn;
4137 /* read the ip allocation from the local node */
4138 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
4140 DEBUG(DEBUG_ERR, ("Unable to get public ips from local node\n"));
4141 talloc_free(mem_ctx);
4145 /* re-read the public ips file */
4147 if (ctdb_set_public_addresses(ctdb, false) != 0) {
4148 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
4149 talloc_free(mem_ctx);
4154 /* check the previous list of ips and scan for ips that have been
4157 for (i = 0; i < ips->num; i++) {
4158 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
4159 if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
4164 /* we need to delete this ip, no longer available on this node */
4166 struct ctdb_control_ip_iface pub;
4168 DEBUG(DEBUG_NOTICE,("RELOADIPS: IP%s is no longer available on this node. Deleting it.\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4169 pub.addr = ips->ips[i].addr;
4173 ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
4175 DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4182 /* loop over all new ones and check the ones we need to add */
4183 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
4184 for (i = 0; i < ips->num; i++) {
4185 if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
4189 if (i == ips->num) {
4190 struct ctdb_control_ip_iface pub;
4191 const char *ifaces = NULL;
4194 DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
4196 pub.addr = vnn->public_address;
4197 pub.mask = vnn->public_netmask_bits;
4200 ifaces = vnn->ifaces[0];
4202 while (vnn->ifaces[iface] != NULL) {
4203 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
4206 pub.len = strlen(ifaces)+1;
4207 memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
4209 ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
4211 DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
4220 /* This control is sent to force the node to re-read the public addresses file
4221 and drop any addresses we should nnot longer host, and add new addresses
4222 that we are now able to host
4224 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
4226 struct ctdb_reloadips_handle *h;
4227 pid_t parent = getpid();
4229 if (ctdb->reload_ips != NULL) {
4230 talloc_free(ctdb->reload_ips);
4231 ctdb->reload_ips = NULL;
4234 h = talloc(ctdb, struct ctdb_reloadips_handle);
4235 CTDB_NO_MEMORY(ctdb, h);
4240 if (pipe(h->fd) == -1) {
4241 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
4246 h->child = ctdb_fork(ctdb);
4247 if (h->child == (pid_t)-1) {
4248 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
4256 if (h->child == 0) {
4257 signed char res = 0;
4260 debug_extra = talloc_asprintf(NULL, "reloadips:");
4262 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
4263 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
4266 res = ctdb_reloadips_child(ctdb);
4268 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
4272 write(h->fd[1], &res, 1);
4273 /* make sure we die when our parent dies */
4274 while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
4280 h->c = talloc_steal(h, c);
4283 set_close_on_exec(h->fd[0]);
4285 talloc_set_destructor(h, ctdb_reloadips_destructor);
4288 h->fde = event_add_fd(ctdb->ev, h, h->fd[0],
4289 EVENT_FD_READ, ctdb_reloadips_child_handler,
4291 tevent_fd_set_auto_close(h->fde);
4293 event_add_timed(ctdb->ev, h,
4294 timeval_current_ofs(120, 0),
4295 ctdb_reloadips_timeout_event, h);
4297 /* we reply later */
4298 *async_reply = true;