4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
118 * once) and then walking ctdb->ifaces once and deleting those not in
119 * the tree. Let's go to one of those if the naive implementation
120 * causes problems... :-)
122 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_interface *i, *next;
127 /* For each interface, check if there's an IP using it. */
128 for (i = ctdb->ifaces; i != NULL; i = next) {
133 /* Only consider interfaces named in the given VNN. */
134 if (!vnn_has_interface_with_name(vnn, i->name)) {
138 /* Is the "single IP" on this interface? */
139 if ((ctdb->single_ip_vnn != NULL) &&
140 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
141 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
142 /* Found, next interface please... */
145 /* Search for a vnn with this interface. */
147 for (tv=ctdb->vnn; tv; tv=tv->next) {
148 if (vnn_has_interface_with_name(tv, i->name)) {
155 /* None of the VNNs are using this interface. */
156 DLIST_REMOVE(ctdb->ifaces, i);
163 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
166 struct ctdb_interface *i;
168 for (i=ctdb->ifaces;i;i=i->next) {
169 if (strcmp(i->name, iface) == 0) {
177 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
178 struct ctdb_vnn *vnn)
181 struct ctdb_interface *cur = NULL;
182 struct ctdb_interface *best = NULL;
184 for (i=0; vnn->ifaces[i]; i++) {
186 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
200 if (cur->references < best->references) {
209 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
210 struct ctdb_vnn *vnn)
212 struct ctdb_interface *best = NULL;
215 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
216 "still assigned to iface '%s'\n",
217 ctdb_addr_to_str(&vnn->public_address),
218 ctdb_vnn_iface_string(vnn)));
222 best = ctdb_vnn_best_iface(ctdb, vnn);
224 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
225 "cannot assign to iface any iface\n",
226 ctdb_addr_to_str(&vnn->public_address)));
232 vnn->pnn = ctdb->pnn;
234 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
235 "now assigned to iface '%s' refs[%d]\n",
236 ctdb_addr_to_str(&vnn->public_address),
237 ctdb_vnn_iface_string(vnn),
242 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
243 struct ctdb_vnn *vnn)
245 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
246 "now unassigned (old iface '%s' refs[%d])\n",
247 ctdb_addr_to_str(&vnn->public_address),
248 ctdb_vnn_iface_string(vnn),
249 vnn->iface?vnn->iface->references:0));
251 vnn->iface->references--;
254 if (vnn->pnn == ctdb->pnn) {
259 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
260 struct ctdb_vnn *vnn)
264 /* Nodes that are not RUNNING can not host IPs */
265 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
269 if (vnn->delete_pending) {
273 if (vnn->iface && vnn->iface->link_up) {
277 for (i=0; vnn->ifaces[i]; i++) {
278 struct ctdb_interface *cur;
280 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
293 struct ctdb_takeover_arp {
294 struct ctdb_context *ctdb;
297 struct ctdb_tcp_array *tcparray;
298 struct ctdb_vnn *vnn;
303 lists of tcp endpoints
305 struct ctdb_tcp_list {
306 struct ctdb_tcp_list *prev, *next;
307 struct ctdb_connection connection;
311 list of clients to kill on IP release
313 struct ctdb_client_ip {
314 struct ctdb_client_ip *prev, *next;
315 struct ctdb_context *ctdb;
322 send a gratuitous arp
324 static void ctdb_control_send_arp(struct tevent_context *ev,
325 struct tevent_timer *te,
326 struct timeval t, void *private_data)
328 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
329 struct ctdb_takeover_arp);
331 struct ctdb_tcp_array *tcparray;
332 const char *iface = ctdb_vnn_iface_string(arp->vnn);
334 ret = ctdb_sys_send_arp(&arp->addr, iface);
336 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
337 iface, strerror(errno)));
340 tcparray = arp->tcparray;
342 for (i=0;i<tcparray->num;i++) {
343 struct ctdb_connection *tcon;
345 tcon = &tcparray->connections[i];
346 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
347 (unsigned)ntohs(tcon->dst.ip.sin_port),
348 ctdb_addr_to_str(&tcon->src),
349 (unsigned)ntohs(tcon->src.ip.sin_port)));
350 ret = ctdb_sys_send_tcp(
355 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
356 ctdb_addr_to_str(&tcon->src)));
363 if (arp->count == CTDB_ARP_REPEAT) {
368 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
369 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
370 ctdb_control_send_arp, arp);
373 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
374 struct ctdb_vnn *vnn)
376 struct ctdb_takeover_arp *arp;
377 struct ctdb_tcp_array *tcparray;
379 if (!vnn->takeover_ctx) {
380 vnn->takeover_ctx = talloc_new(vnn);
381 if (!vnn->takeover_ctx) {
386 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
392 arp->addr = vnn->public_address;
395 tcparray = vnn->tcp_array;
397 /* add all of the known tcp connections for this IP to the
398 list of tcp connections to send tickle acks for */
399 arp->tcparray = talloc_steal(arp, tcparray);
401 vnn->tcp_array = NULL;
402 vnn->tcp_update_needed = true;
405 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
406 timeval_zero(), ctdb_control_send_arp, arp);
411 struct takeover_callback_state {
412 struct ctdb_req_control_old *c;
413 ctdb_sock_addr *addr;
414 struct ctdb_vnn *vnn;
417 struct ctdb_do_takeip_state {
418 struct ctdb_req_control_old *c;
419 struct ctdb_vnn *vnn;
423 called when takeip event finishes
425 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
428 struct ctdb_do_takeip_state *state =
429 talloc_get_type(private_data, struct ctdb_do_takeip_state);
434 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
436 if (status == -ETIME) {
439 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
440 ctdb_addr_to_str(&state->vnn->public_address),
441 ctdb_vnn_iface_string(state->vnn)));
442 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
444 node->flags |= NODE_FLAGS_UNHEALTHY;
449 if (ctdb->do_checkpublicip) {
451 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
453 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
460 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
461 data.dsize = strlen((char *)data.dptr) + 1;
462 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
464 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
467 /* the control succeeded */
468 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
473 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
475 state->vnn->update_in_flight = false;
480 take over an ip address
482 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
483 struct ctdb_req_control_old *c,
484 struct ctdb_vnn *vnn)
487 struct ctdb_do_takeip_state *state;
489 if (vnn->update_in_flight) {
490 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
491 "update for this IP already in flight\n",
492 ctdb_addr_to_str(&vnn->public_address),
493 vnn->public_netmask_bits));
497 ret = ctdb_vnn_assign_iface(ctdb, vnn);
499 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
500 "assign a usable interface\n",
501 ctdb_addr_to_str(&vnn->public_address),
502 vnn->public_netmask_bits));
506 state = talloc(vnn, struct ctdb_do_takeip_state);
507 CTDB_NO_MEMORY(ctdb, state);
509 state->c = talloc_steal(ctdb, c);
512 vnn->update_in_flight = true;
513 talloc_set_destructor(state, ctdb_takeip_destructor);
515 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits,
518 ctdb_vnn_iface_string(vnn)));
520 ret = ctdb_event_script_callback(ctdb,
522 ctdb_do_takeip_callback,
526 ctdb_vnn_iface_string(vnn),
527 ctdb_addr_to_str(&vnn->public_address),
528 vnn->public_netmask_bits);
531 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
532 ctdb_addr_to_str(&vnn->public_address),
533 ctdb_vnn_iface_string(vnn)));
541 struct ctdb_do_updateip_state {
542 struct ctdb_req_control_old *c;
543 struct ctdb_interface *old;
544 struct ctdb_vnn *vnn;
548 called when updateip event finishes
550 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
553 struct ctdb_do_updateip_state *state =
554 talloc_get_type(private_data, struct ctdb_do_updateip_state);
558 if (status == -ETIME) {
561 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
562 ctdb_addr_to_str(&state->vnn->public_address),
564 ctdb_vnn_iface_string(state->vnn)));
567 * All we can do is reset the old interface
568 * and let the next run fix it
570 ctdb_vnn_unassign_iface(ctdb, state->vnn);
571 state->vnn->iface = state->old;
572 state->vnn->iface->references++;
574 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
579 if (ctdb->do_checkpublicip) {
581 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
583 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
590 /* the control succeeded */
591 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
596 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
598 state->vnn->update_in_flight = false;
603 update (move) an ip address
605 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
606 struct ctdb_req_control_old *c,
607 struct ctdb_vnn *vnn)
610 struct ctdb_do_updateip_state *state;
611 struct ctdb_interface *old = vnn->iface;
612 const char *new_name;
614 if (vnn->update_in_flight) {
615 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
616 "update for this IP already in flight\n",
617 ctdb_addr_to_str(&vnn->public_address),
618 vnn->public_netmask_bits));
622 ctdb_vnn_unassign_iface(ctdb, vnn);
623 ret = ctdb_vnn_assign_iface(ctdb, vnn);
625 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
626 "assin a usable interface (old iface '%s')\n",
627 ctdb_addr_to_str(&vnn->public_address),
628 vnn->public_netmask_bits,
633 new_name = ctdb_vnn_iface_string(vnn);
634 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
635 /* A benign update from one interface onto itself.
636 * no need to run the eventscripts in this case, just return
639 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
643 state = talloc(vnn, struct ctdb_do_updateip_state);
644 CTDB_NO_MEMORY(ctdb, state);
646 state->c = talloc_steal(ctdb, c);
650 vnn->update_in_flight = true;
651 talloc_set_destructor(state, ctdb_updateip_destructor);
653 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
654 "interface %s to %s\n",
655 ctdb_addr_to_str(&vnn->public_address),
656 vnn->public_netmask_bits,
660 ret = ctdb_event_script_callback(ctdb,
662 ctdb_do_updateip_callback,
664 CTDB_EVENT_UPDATE_IP,
668 ctdb_addr_to_str(&vnn->public_address),
669 vnn->public_netmask_bits);
671 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
672 ctdb_addr_to_str(&vnn->public_address),
673 old->name, new_name));
682 Find the vnn of the node that has a public ip address
683 returns -1 if the address is not known as a public address
685 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
687 struct ctdb_vnn *vnn;
689 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
690 if (ctdb_same_ip(&vnn->public_address, addr)) {
699 take over an ip address
701 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
702 struct ctdb_req_control_old *c,
707 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
708 struct ctdb_vnn *vnn;
709 bool have_ip = false;
710 bool do_updateip = false;
711 bool do_takeip = false;
712 struct ctdb_interface *best_iface = NULL;
714 if (pip->pnn != ctdb->pnn) {
715 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
716 "with pnn %d, but we're node %d\n",
717 ctdb_addr_to_str(&pip->addr),
718 pip->pnn, ctdb->pnn));
722 /* update out vnn list */
723 vnn = find_public_ip_vnn(ctdb, &pip->addr);
725 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
726 ctdb_addr_to_str(&pip->addr)));
730 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
731 have_ip = ctdb_sys_have_ip(&pip->addr);
733 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
734 if (best_iface == NULL) {
735 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
736 "a usable interface (old %s, have_ip %d)\n",
737 ctdb_addr_to_str(&vnn->public_address),
738 vnn->public_netmask_bits,
739 ctdb_vnn_iface_string(vnn),
744 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
745 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
750 if (vnn->iface == NULL && have_ip) {
751 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
752 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
753 ctdb_addr_to_str(&vnn->public_address)));
757 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
758 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759 "and we have it on iface[%s], but it was assigned to node %d"
760 "and we are node %d, banning ourself\n",
761 ctdb_addr_to_str(&vnn->public_address),
762 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
767 if (vnn->pnn == -1 && have_ip) {
768 vnn->pnn = ctdb->pnn;
769 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
770 "and we already have it on iface[%s], update local daemon\n",
771 ctdb_addr_to_str(&vnn->public_address),
772 ctdb_vnn_iface_string(vnn)));
777 if (vnn->iface != best_iface) {
778 if (!vnn->iface->link_up) {
780 } else if (vnn->iface->references > (best_iface->references + 1)) {
781 /* only move when the rebalance gains something */
789 ctdb_vnn_unassign_iface(ctdb, vnn);
796 ret = ctdb_do_takeip(ctdb, c, vnn);
800 } else if (do_updateip) {
801 ret = ctdb_do_updateip(ctdb, c, vnn);
807 * The interface is up and the kernel known the ip
810 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
811 ctdb_addr_to_str(&pip->addr),
812 vnn->public_netmask_bits,
813 ctdb_vnn_iface_string(vnn)));
817 /* tell ctdb_control.c that we will be replying asynchronously */
823 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
825 DLIST_REMOVE(ctdb->vnn, vnn);
826 ctdb_vnn_unassign_iface(ctdb, vnn);
827 ctdb_remove_orphaned_ifaces(ctdb, vnn);
832 called when releaseip event finishes
834 static void release_ip_callback(struct ctdb_context *ctdb, int status,
837 struct takeover_callback_state *state =
838 talloc_get_type(private_data, struct takeover_callback_state);
841 if (status == -ETIME) {
845 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
846 if (ctdb_sys_have_ip(state->addr)) {
848 ("IP %s still hosted during release IP callback, failing\n",
849 ctdb_addr_to_str(state->addr)));
850 ctdb_request_control_reply(ctdb, state->c,
857 /* send a message to all clients of this node telling them
858 that the cluster has been reconfigured and they should
859 release any sockets on this IP */
860 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
861 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
862 data.dsize = strlen((char *)data.dptr)+1;
864 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
866 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
868 ctdb_vnn_unassign_iface(ctdb, state->vnn);
870 /* Process the IP if it has been marked for deletion */
871 if (state->vnn->delete_pending) {
872 do_delete_ip(ctdb, state->vnn);
876 /* the control succeeded */
877 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
881 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
883 if (state->vnn != NULL) {
884 state->vnn->update_in_flight = false;
890 release an ip address
892 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
893 struct ctdb_req_control_old *c,
898 struct takeover_callback_state *state;
899 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
900 struct ctdb_vnn *vnn;
903 /* update our vnn list */
904 vnn = find_public_ip_vnn(ctdb, &pip->addr);
906 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
907 ctdb_addr_to_str(&pip->addr)));
912 /* stop any previous arps */
913 talloc_free(vnn->takeover_ctx);
914 vnn->takeover_ctx = NULL;
916 /* Some ctdb tool commands (e.g. moveip) send
917 * lazy multicast to drop an IP from any node that isn't the
918 * intended new node. The following causes makes ctdbd ignore
919 * a release for any address it doesn't host.
921 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
922 if (!ctdb_sys_have_ip(&pip->addr)) {
923 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
924 ctdb_addr_to_str(&pip->addr),
925 vnn->public_netmask_bits,
926 ctdb_vnn_iface_string(vnn)));
927 ctdb_vnn_unassign_iface(ctdb, vnn);
931 if (vnn->iface == NULL) {
932 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
933 ctdb_addr_to_str(&pip->addr),
934 vnn->public_netmask_bits));
939 /* There is a potential race between take_ip and us because we
940 * update the VNN via a callback that run when the
941 * eventscripts have been run. Avoid the race by allowing one
942 * update to be in flight at a time.
944 if (vnn->update_in_flight) {
945 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
946 "update for this IP already in flight\n",
947 ctdb_addr_to_str(&vnn->public_address),
948 vnn->public_netmask_bits));
952 iface = strdup(ctdb_vnn_iface_string(vnn));
954 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
955 ctdb_addr_to_str(&pip->addr),
956 vnn->public_netmask_bits,
960 state = talloc(ctdb, struct takeover_callback_state);
962 ctdb_set_error(ctdb, "Out of memory at %s:%d",
968 state->c = talloc_steal(state, c);
969 state->addr = talloc(state, ctdb_sock_addr);
970 if (state->addr == NULL) {
971 ctdb_set_error(ctdb, "Out of memory at %s:%d",
977 *state->addr = pip->addr;
980 vnn->update_in_flight = true;
981 talloc_set_destructor(state, ctdb_releaseip_destructor);
983 ret = ctdb_event_script_callback(ctdb,
984 state, release_ip_callback, state,
985 CTDB_EVENT_RELEASE_IP,
988 ctdb_addr_to_str(&pip->addr),
989 vnn->public_netmask_bits);
992 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
993 ctdb_addr_to_str(&pip->addr),
994 ctdb_vnn_iface_string(vnn)));
999 /* tell the control that we will be reply asynchronously */
1000 *async_reply = true;
1004 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1005 ctdb_sock_addr *addr,
1006 unsigned mask, const char *ifaces,
1009 struct ctdb_vnn *vnn;
1016 tmp = strdup(ifaces);
1017 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1018 if (!ctdb_sys_check_iface_exists(iface)) {
1019 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1026 /* Verify that we don't have an entry for this ip yet */
1027 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1028 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1029 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1030 ctdb_addr_to_str(addr)));
1035 /* create a new vnn structure for this ip address */
1036 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1037 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1038 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1039 tmp = talloc_strdup(vnn, ifaces);
1040 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1041 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1042 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1043 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1044 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1045 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1049 vnn->ifaces[num] = NULL;
1050 vnn->public_address = *addr;
1051 vnn->public_netmask_bits = mask;
1053 if (check_address) {
1054 if (ctdb_sys_have_ip(addr)) {
1055 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1056 vnn->pnn = ctdb->pnn;
1060 for (i=0; vnn->ifaces[i]; i++) {
1061 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1063 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1064 "for public_address[%s]\n",
1065 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1071 DLIST_ADD(ctdb->vnn, vnn);
1077 setup the public address lists from a file
1079 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1085 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1086 if (lines == NULL) {
1087 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1090 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1094 for (i=0;i<nlines;i++) {
1096 ctdb_sock_addr addr;
1097 const char *addrstr;
1102 while ((*line == ' ') || (*line == '\t')) {
1108 if (strcmp(line, "") == 0) {
1111 tok = strtok(line, " \t");
1113 tok = strtok(NULL, " \t");
1115 if (NULL == ctdb->default_public_interface) {
1116 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1121 ifaces = ctdb->default_public_interface;
1126 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1127 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1131 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1132 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1143 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1147 struct ctdb_vnn *svnn;
1148 struct ctdb_interface *cur = NULL;
1152 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1153 CTDB_NO_MEMORY(ctdb, svnn);
1155 svnn->ifaces = talloc_array(svnn, const char *, 2);
1156 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1157 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1158 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1159 svnn->ifaces[1] = NULL;
1161 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1167 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1169 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1170 "for single_ip[%s]\n",
1172 ctdb_addr_to_str(&svnn->public_address)));
1177 /* assume the single public ip interface is initially "good" */
1178 cur = ctdb_find_iface(ctdb, iface);
1180 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1183 cur->link_up = true;
1185 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1191 ctdb->single_ip_vnn = svnn;
1195 static void *add_ip_callback(void *parm, void *data)
1197 struct public_ip_list *this_ip = parm;
1198 struct public_ip_list *prev_ip = data;
1200 if (prev_ip == NULL) {
1203 if (this_ip->pnn == -1) {
1204 this_ip->pnn = prev_ip->pnn;
1210 static int getips_count_callback(void *param, void *data)
1212 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1213 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1215 new_ip->next = *ip_list;
1220 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1221 struct ctdb_public_ip_list *ips,
1224 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1225 struct ipalloc_state *ipalloc_state,
1226 struct ctdb_node_map_old *nodemap)
1230 struct ctdb_public_ip_list_old *ip_list;
1232 if (ipalloc_state->num != nodemap->num) {
1235 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1236 ipalloc_state->num, nodemap->num));
1240 for (j=0; j<nodemap->num; j++) {
1241 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1245 /* Retrieve the list of known public IPs from the node */
1246 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1249 ipalloc_state->known_public_ips,
1254 ("Failed to read known public IPs from node: %u\n",
1258 ipalloc_state->known_public_ips[j].num = ip_list->num;
1259 /* This could be copied and freed. However, ip_list
1260 * is allocated off ipalloc_state->known_public_ips,
1261 * so this is a safe hack. This will go away in a
1262 * while anyway... */
1263 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1265 if (ctdb->do_checkpublicip) {
1266 verify_remote_ip_allocation(
1268 &ipalloc_state->known_public_ips[j],
1272 /* Retrieve the list of available public IPs from the node */
1273 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1276 ipalloc_state->available_public_ips,
1277 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1281 ("Failed to read available public IPs from node: %u\n",
1285 ipalloc_state->available_public_ips[j].num = ip_list->num;
1286 /* This could be copied and freed. However, ip_list
1287 * is allocated off ipalloc_state->available_public_ips,
1288 * so this is a safe hack. This will go away in a
1289 * while anyway... */
1290 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1296 static struct public_ip_list *
1297 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1300 struct public_ip_list *ip_list;
1301 struct ctdb_public_ip_list *public_ips;
1303 TALLOC_FREE(ctdb->ip_tree);
1304 ctdb->ip_tree = trbt_create(ctdb, 0);
1306 for (i=0; i < ctdb->num_nodes; i++) {
1308 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1312 /* there were no public ips for this node */
1313 if (ipalloc_state->known_public_ips == NULL) {
1317 public_ips = &ipalloc_state->known_public_ips[i];
1319 for (j=0; j < public_ips->num; j++) {
1320 struct public_ip_list *tmp_ip;
1322 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1323 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1324 /* Do not use information about IP addresses hosted
1325 * on other nodes, it may not be accurate */
1326 if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1327 tmp_ip->pnn = public_ips->ip[j].pnn;
1331 tmp_ip->addr = public_ips->ip[j].addr;
1332 tmp_ip->next = NULL;
1334 trbt_insertarray32_callback(ctdb->ip_tree,
1335 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1342 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1347 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1351 for (i=0;i<nodemap->num;i++) {
1352 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1353 /* Found one completely healthy node */
1361 struct get_tunable_callback_data {
1362 const char *tunable;
1367 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1368 int32_t res, TDB_DATA outdata,
1371 struct get_tunable_callback_data *cd =
1372 (struct get_tunable_callback_data *)callback;
1376 /* Already handled in fail callback */
1380 if (outdata.dsize != sizeof(uint32_t)) {
1381 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1382 cd->tunable, pnn, (int)sizeof(uint32_t),
1383 (int)outdata.dsize));
1388 size = talloc_array_length(cd->out);
1390 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1391 cd->tunable, pnn, size));
1396 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1399 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1400 int32_t res, TDB_DATA outdata,
1403 struct get_tunable_callback_data *cd =
1404 (struct get_tunable_callback_data *)callback;
1409 ("Timed out getting tunable \"%s\" from node %d\n",
1415 DEBUG(DEBUG_WARNING,
1416 ("Tunable \"%s\" not implemented on node %d\n",
1421 ("Unexpected error getting tunable \"%s\" from node %d\n",
1427 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1428 TALLOC_CTX *tmp_ctx,
1429 struct ctdb_node_map_old *nodemap,
1430 const char *tunable,
1431 uint32_t default_value)
1434 struct ctdb_control_get_tunable *t;
1437 struct get_tunable_callback_data callback_data;
1440 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1441 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1442 for (i=0; i<nodemap->num; i++) {
1443 tvals[i] = default_value;
1446 callback_data.out = tvals;
1447 callback_data.tunable = tunable;
1448 callback_data.fatal = false;
1450 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1451 data.dptr = talloc_size(tmp_ctx, data.dsize);
1452 t = (struct ctdb_control_get_tunable *)data.dptr;
1453 t->length = strlen(tunable)+1;
1454 memcpy(t->name, tunable, t->length);
1455 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1456 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1457 nodes, 0, TAKEOVER_TIMEOUT(),
1459 get_tunable_callback,
1460 get_tunable_fail_callback,
1461 &callback_data) != 0) {
1462 if (callback_data.fatal) {
1468 talloc_free(data.dptr);
1473 /* Set internal flags for IP allocation:
1475 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1476 * Set NOIPHOST ip flag for each INACTIVE node
1477 * if all nodes are disabled:
1478 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1480 * Set NOIPHOST ip flags for disabled nodes
1482 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1483 struct ctdb_node_map_old *nodemap,
1484 uint32_t *tval_noiptakeover,
1485 uint32_t *tval_noiphostonalldisabled)
1489 for (i=0;i<nodemap->num;i++) {
1490 /* Can not take IPs on node with NoIPTakeover set */
1491 if (tval_noiptakeover[i] != 0) {
1492 ipalloc_state->noiptakeover[i] = true;
1495 /* Can not host IPs on INACTIVE node */
1496 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1497 ipalloc_state->noiphost[i] = true;
1501 if (all_nodes_are_disabled(nodemap)) {
1502 /* If all nodes are disabled, can not host IPs on node
1503 * with NoIPHostOnAllDisabled set
1505 for (i=0;i<nodemap->num;i++) {
1506 if (tval_noiphostonalldisabled[i] != 0) {
1507 ipalloc_state->noiphost[i] = true;
1511 /* If some nodes are not disabled, then can not host
1512 * IPs on DISABLED node
1514 for (i=0;i<nodemap->num;i++) {
1515 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1516 ipalloc_state->noiphost[i] = true;
1522 static bool set_ipflags(struct ctdb_context *ctdb,
1523 struct ipalloc_state *ipalloc_state,
1524 struct ctdb_node_map_old *nodemap)
1526 uint32_t *tval_noiptakeover;
1527 uint32_t *tval_noiphostonalldisabled;
1529 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1531 if (tval_noiptakeover == NULL) {
1535 tval_noiphostonalldisabled =
1536 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1537 "NoIPHostOnAllDisabled", 0);
1538 if (tval_noiphostonalldisabled == NULL) {
1539 /* Caller frees tmp_ctx */
1543 set_ipflags_internal(ipalloc_state, nodemap,
1545 tval_noiphostonalldisabled);
1547 talloc_free(tval_noiptakeover);
1548 talloc_free(tval_noiphostonalldisabled);
1553 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1554 TALLOC_CTX *mem_ctx)
1556 struct ipalloc_state *ipalloc_state =
1557 talloc_zero(mem_ctx, struct ipalloc_state);
1558 if (ipalloc_state == NULL) {
1559 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1563 ipalloc_state->num = ctdb->num_nodes;
1565 ipalloc_state->known_public_ips =
1566 talloc_zero_array(ipalloc_state,
1567 struct ctdb_public_ip_list,
1568 ipalloc_state->num);
1569 if (ipalloc_state->known_public_ips == NULL) {
1570 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1574 ipalloc_state->available_public_ips =
1575 talloc_zero_array(ipalloc_state,
1576 struct ctdb_public_ip_list,
1577 ipalloc_state->num);
1578 if (ipalloc_state->available_public_ips == NULL) {
1579 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1582 ipalloc_state->noiptakeover =
1583 talloc_zero_array(ipalloc_state,
1585 ipalloc_state->num);
1586 if (ipalloc_state->noiptakeover == NULL) {
1587 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1590 ipalloc_state->noiphost =
1591 talloc_zero_array(ipalloc_state,
1593 ipalloc_state->num);
1594 if (ipalloc_state->noiphost == NULL) {
1595 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1599 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1600 ipalloc_state->algorithm = IPALLOC_LCP2;
1601 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1602 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1604 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1607 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1609 return ipalloc_state;
1611 talloc_free(ipalloc_state);
1615 struct iprealloc_callback_data {
1618 client_async_callback fail_callback;
1619 void *fail_callback_data;
1620 struct ctdb_node_map_old *nodemap;
1623 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1624 int32_t res, TDB_DATA outdata,
1628 struct iprealloc_callback_data *cd =
1629 (struct iprealloc_callback_data *)callback;
1631 numnodes = talloc_array_length(cd->retry_nodes);
1632 if (pnn > numnodes) {
1634 ("ipreallocated failure from node %d, "
1635 "but only %d nodes in nodemap\n",
1640 /* Can't run the "ipreallocated" event on a INACTIVE node */
1641 if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1642 DEBUG(DEBUG_WARNING,
1643 ("ipreallocated failed on inactive node %d, ignoring\n",
1650 /* If the control timed out then that's a real error,
1651 * so call the real fail callback
1653 if (cd->fail_callback) {
1654 cd->fail_callback(ctdb, pnn, res, outdata,
1655 cd->fail_callback_data);
1657 DEBUG(DEBUG_WARNING,
1658 ("iprealloc timed out but no callback registered\n"));
1662 /* If not a timeout then either the ipreallocated
1663 * eventscript (or some setup) failed. This might
1664 * have failed because the IPREALLOCATED control isn't
1665 * implemented - right now there is no way of knowing
1666 * because the error codes are all folded down to -1.
1667 * Consider retrying using EVENTSCRIPT control...
1669 DEBUG(DEBUG_WARNING,
1670 ("ipreallocated failure from node %d, flagging retry\n",
1672 cd->retry_nodes[pnn] = true;
1677 struct takeover_callback_data {
1679 client_async_callback fail_callback;
1680 void *fail_callback_data;
1681 struct ctdb_node_map_old *nodemap;
1684 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1685 uint32_t node_pnn, int32_t res,
1686 TDB_DATA outdata, void *callback_data)
1688 struct takeover_callback_data *cd =
1689 talloc_get_type_abort(callback_data,
1690 struct takeover_callback_data);
1693 for (i = 0; i < cd->nodemap->num; i++) {
1694 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1699 if (i == cd->nodemap->num) {
1700 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1704 if (!cd->node_failed[i]) {
1705 cd->node_failed[i] = true;
1706 cd->fail_callback(ctdb, node_pnn, res, outdata,
1707 cd->fail_callback_data);
1712 * Recalculate the allocation of public IPs to nodes and have the
1713 * nodes host their allocated addresses.
1715 * - Allocate memory for IP allocation state, including per node
1717 * - Populate IP allocation algorithm in IP allocation state
1718 * - Populate local value of tunable NoIPFailback in IP allocation
1719 state - this is really a cluster-wide configuration variable and
1720 only the value form the master node is used
1721 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1722 * connected nodes - this is done separately so tunable values can
1723 * be faked in unit testing
1724 * - Populate NoIPTakover tunable in IP allocation state
1725 * - Populate NoIPHost in IP allocation state, derived from node flags
1726 * and NoIPHostOnAllDisabled tunable
1727 * - Retrieve and populate known and available IP lists in IP
1729 * - If no available IP addresses then early exit
1730 * - Build list of (known IPs, currently assigned node)
1731 * - Populate list of nodes to force rebalance - internal structure,
1732 * currently no way to fetch, only used by LCP2 for nodes that have
1733 * had new IP addresses added
1734 * - Run IP allocation algorithm
1735 * - Send RELEASE_IP to all nodes for IPs they should not host
1736 * - Send TAKE_IP to all nodes for IPs they should host
1737 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1739 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1740 uint32_t *force_rebalance_nodes,
1741 client_async_callback fail_callback, void *callback_data)
1744 struct ctdb_public_ip ip;
1746 struct public_ip_list *all_ips, *tmp_ip;
1748 struct timeval timeout;
1749 struct client_async_data *async_data;
1750 struct ctdb_client_control_state *state;
1751 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1752 struct ipalloc_state *ipalloc_state;
1753 struct takeover_callback_data *takeover_data;
1754 struct iprealloc_callback_data iprealloc_data;
1759 * ip failover is completely disabled, just send out the
1760 * ipreallocated event.
1762 if (ctdb->tunable.disable_ip_failover != 0) {
1766 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1767 if (ipalloc_state == NULL) {
1768 talloc_free(tmp_ctx);
1772 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1773 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1774 talloc_free(tmp_ctx);
1778 /* Fetch known/available public IPs from each active node */
1779 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1781 talloc_free(tmp_ctx);
1785 /* Short-circuit IP allocation if no node has available IPs */
1786 can_host_ips = false;
1787 for (i=0; i < ipalloc_state->num; i++) {
1788 if (ipalloc_state->available_public_ips[i].num != 0) {
1789 can_host_ips = true;
1792 if (!can_host_ips) {
1793 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1797 /* since nodes only know about those public addresses that
1798 can be served by that particular node, no single node has
1799 a full list of all public addresses that exist in the cluster.
1800 Walk over all node structures and create a merged list of
1801 all public addresses that exist in the cluster.
1803 keep the tree of ips around as ctdb->ip_tree
1805 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1806 ipalloc_state->all_ips = all_ips;
1808 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1810 /* Do the IP reassignment calculations */
1811 ipalloc(ipalloc_state);
1813 /* Now tell all nodes to release any public IPs should not
1814 * host. This will be a NOOP on nodes that don't currently
1815 * hold the given IP.
1817 takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1818 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1820 takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1821 bool, nodemap->num);
1822 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1823 takeover_data->fail_callback = fail_callback;
1824 takeover_data->fail_callback_data = callback_data;
1825 takeover_data->nodemap = nodemap;
1827 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1828 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1830 async_data->fail_callback = takeover_run_fail_callback;
1831 async_data->callback_data = takeover_data;
1833 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1835 /* Send a RELEASE_IP to all nodes that should not be hosting
1836 * each IP. For each IP, all but one of these will be
1837 * redundant. However, the redundant ones are used to tell
1838 * nodes which node should be hosting the IP so that commands
1839 * like "ctdb ip" can display a particular nodes idea of who
1840 * is hosting what. */
1841 for (i=0;i<nodemap->num;i++) {
1842 /* don't talk to unconnected nodes, but do talk to banned nodes */
1843 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1847 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1848 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1849 /* This node should be serving this
1850 vnn so don't tell it to release the ip
1854 ip.pnn = tmp_ip->pnn;
1855 ip.addr = tmp_ip->addr;
1857 timeout = TAKEOVER_TIMEOUT();
1858 data.dsize = sizeof(ip);
1859 data.dptr = (uint8_t *)&ip;
1860 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1861 0, CTDB_CONTROL_RELEASE_IP, 0,
1864 if (state == NULL) {
1865 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1866 talloc_free(tmp_ctx);
1870 ctdb_client_async_add(async_data, state);
1873 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1874 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1875 talloc_free(tmp_ctx);
1878 talloc_free(async_data);
1881 /* For each IP, send a TAKOVER_IP to the node that should be
1882 * hosting it. Many of these will often be redundant (since
1883 * the allocation won't have changed) but they can be useful
1884 * to recover from inconsistencies. */
1885 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1886 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1888 async_data->fail_callback = fail_callback;
1889 async_data->callback_data = callback_data;
1891 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1892 if (tmp_ip->pnn == -1) {
1893 /* this IP won't be taken over */
1897 ip.pnn = tmp_ip->pnn;
1898 ip.addr = tmp_ip->addr;
1900 timeout = TAKEOVER_TIMEOUT();
1901 data.dsize = sizeof(ip);
1902 data.dptr = (uint8_t *)&ip;
1903 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1904 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1905 data, async_data, &timeout, NULL);
1906 if (state == NULL) {
1907 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1908 talloc_free(tmp_ctx);
1912 ctdb_client_async_add(async_data, state);
1914 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1915 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1916 talloc_free(tmp_ctx);
1922 * Tell all nodes to run eventscripts to process the
1923 * "ipreallocated" event. This can do a lot of things,
1924 * including restarting services to reconfigure them if public
1925 * IPs have moved. Once upon a time this event only used to
1928 retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1929 CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1930 iprealloc_data.retry_nodes = retry_data;
1931 iprealloc_data.retry_count = 0;
1932 iprealloc_data.fail_callback = fail_callback;
1933 iprealloc_data.fail_callback_data = callback_data;
1934 iprealloc_data.nodemap = nodemap;
1936 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1937 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1938 nodes, 0, TAKEOVER_TIMEOUT(),
1940 NULL, iprealloc_fail_callback,
1943 /* If the control failed then we should retry to any
1944 * nodes flagged by iprealloc_fail_callback using the
1945 * EVENTSCRIPT control. This is a best-effort at
1946 * backward compatiblity when running a mixed cluster
1947 * where some nodes have not yet been upgraded to
1948 * support the IPREALLOCATED control.
1950 DEBUG(DEBUG_WARNING,
1951 ("Retry ipreallocated to some nodes using eventscript control\n"));
1953 nodes = talloc_array(tmp_ctx, uint32_t,
1954 iprealloc_data.retry_count);
1955 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1958 for (i=0; i<nodemap->num; i++) {
1959 if (iprealloc_data.retry_nodes[i]) {
1965 data.dptr = discard_const("ipreallocated");
1966 data.dsize = strlen((char *)data.dptr) + 1;
1967 ret = ctdb_client_async_control(ctdb,
1968 CTDB_CONTROL_RUN_EVENTSCRIPTS,
1969 nodes, 0, TAKEOVER_TIMEOUT(),
1971 NULL, fail_callback,
1974 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
1978 talloc_free(tmp_ctx);
1984 destroy a ctdb_client_ip structure
1986 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1988 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1989 ctdb_addr_to_str(&ip->addr),
1990 ntohs(ip->addr.ip.sin_port),
1993 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1998 called by a client to inform us of a TCP connection that it is managing
1999 that should tickled with an ACK when IP takeover is done
2001 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2004 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2005 struct ctdb_connection *tcp_sock = NULL;
2006 struct ctdb_tcp_list *tcp;
2007 struct ctdb_connection t;
2010 struct ctdb_client_ip *ip;
2011 struct ctdb_vnn *vnn;
2012 ctdb_sock_addr addr;
2014 /* If we don't have public IPs, tickles are useless */
2015 if (ctdb->vnn == NULL) {
2019 tcp_sock = (struct ctdb_connection *)indata.dptr;
2021 addr = tcp_sock->src;
2022 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2023 addr = tcp_sock->dst;
2024 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
2027 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
2028 vnn = find_public_ip_vnn(ctdb, &addr);
2030 switch (addr.sa.sa_family) {
2032 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2033 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2034 ctdb_addr_to_str(&addr)));
2038 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2039 ctdb_addr_to_str(&addr)));
2042 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2048 if (vnn->pnn != ctdb->pnn) {
2049 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2050 ctdb_addr_to_str(&addr),
2051 client_id, client->pid));
2052 /* failing this call will tell smbd to die */
2056 ip = talloc(client, struct ctdb_client_ip);
2057 CTDB_NO_MEMORY(ctdb, ip);
2061 ip->client_id = client_id;
2062 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2063 DLIST_ADD(ctdb->client_ip_list, ip);
2065 tcp = talloc(client, struct ctdb_tcp_list);
2066 CTDB_NO_MEMORY(ctdb, tcp);
2068 tcp->connection.src = tcp_sock->src;
2069 tcp->connection.dst = tcp_sock->dst;
2071 DLIST_ADD(client->tcp_list, tcp);
2073 t.src = tcp_sock->src;
2074 t.dst = tcp_sock->dst;
2076 data.dptr = (uint8_t *)&t;
2077 data.dsize = sizeof(t);
2079 switch (addr.sa.sa_family) {
2081 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2082 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2083 ctdb_addr_to_str(&tcp_sock->src),
2084 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2087 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2088 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2089 ctdb_addr_to_str(&tcp_sock->src),
2090 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2093 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2097 /* tell all nodes about this tcp connection */
2098 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2099 CTDB_CONTROL_TCP_ADD,
2100 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2102 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2110 find a tcp address on a list
2112 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2113 struct ctdb_connection *tcp)
2117 if (array == NULL) {
2121 for (i=0;i<array->num;i++) {
2122 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2123 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2124 return &array->connections[i];
2133 called by a daemon to inform us of a TCP connection that one of its
2134 clients managing that should tickled with an ACK when IP takeover is
2137 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2139 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2140 struct ctdb_tcp_array *tcparray;
2141 struct ctdb_connection tcp;
2142 struct ctdb_vnn *vnn;
2144 /* If we don't have public IPs, tickles are useless */
2145 if (ctdb->vnn == NULL) {
2149 vnn = find_public_ip_vnn(ctdb, &p->dst);
2151 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2152 ctdb_addr_to_str(&p->dst)));
2158 tcparray = vnn->tcp_array;
2160 /* If this is the first tickle */
2161 if (tcparray == NULL) {
2162 tcparray = talloc(vnn, struct ctdb_tcp_array);
2163 CTDB_NO_MEMORY(ctdb, tcparray);
2164 vnn->tcp_array = tcparray;
2167 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2168 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2170 tcparray->connections[tcparray->num].src = p->src;
2171 tcparray->connections[tcparray->num].dst = p->dst;
2174 if (tcp_update_needed) {
2175 vnn->tcp_update_needed = true;
2181 /* Do we already have this tickle ?*/
2184 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2185 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2186 ctdb_addr_to_str(&tcp.dst),
2187 ntohs(tcp.dst.ip.sin_port),
2192 /* A new tickle, we must add it to the array */
2193 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2194 struct ctdb_connection,
2196 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2198 tcparray->connections[tcparray->num].src = p->src;
2199 tcparray->connections[tcparray->num].dst = p->dst;
2202 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2203 ctdb_addr_to_str(&tcp.dst),
2204 ntohs(tcp.dst.ip.sin_port),
2207 if (tcp_update_needed) {
2208 vnn->tcp_update_needed = true;
2215 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2217 struct ctdb_connection *tcpp;
2223 /* if the array is empty we cant remove it
2224 and we don't need to do anything
2226 if (vnn->tcp_array == NULL) {
2227 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2228 ctdb_addr_to_str(&conn->dst),
2229 ntohs(conn->dst.ip.sin_port)));
2234 /* See if we know this connection
2235 if we don't know this connection then we dont need to do anything
2237 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2239 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2240 ctdb_addr_to_str(&conn->dst),
2241 ntohs(conn->dst.ip.sin_port)));
2246 /* We need to remove this entry from the array.
2247 Instead of allocating a new array and copying data to it
2248 we cheat and just copy the last entry in the existing array
2249 to the entry that is to be removed and just shring the
2252 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2253 vnn->tcp_array->num--;
2255 /* If we deleted the last entry we also need to remove the entire array
2257 if (vnn->tcp_array->num == 0) {
2258 talloc_free(vnn->tcp_array);
2259 vnn->tcp_array = NULL;
2262 vnn->tcp_update_needed = true;
2264 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2265 ctdb_addr_to_str(&conn->src),
2266 ntohs(conn->src.ip.sin_port)));
2271 called by a daemon to inform us of a TCP connection that one of its
2272 clients used are no longer needed in the tickle database
2274 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2276 struct ctdb_vnn *vnn;
2277 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2279 /* If we don't have public IPs, tickles are useless */
2280 if (ctdb->vnn == NULL) {
2284 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2287 (__location__ " unable to find public address %s\n",
2288 ctdb_addr_to_str(&conn->dst)));
2292 ctdb_remove_connection(vnn, conn);
2299 Called when another daemon starts - causes all tickles for all
2300 public addresses we are serving to be sent to the new node on the
2301 next check. This actually causes the next scheduled call to
2302 tdb_update_tcp_tickles() to update all nodes. This is simple and
2303 doesn't require careful error handling.
2305 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2307 struct ctdb_vnn *vnn;
2309 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2310 (unsigned long) pnn));
2312 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2313 vnn->tcp_update_needed = true;
2321 called when a client structure goes away - hook to remove
2322 elements from the tcp_list in all daemons
2324 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2326 while (client->tcp_list) {
2327 struct ctdb_vnn *vnn;
2328 struct ctdb_tcp_list *tcp = client->tcp_list;
2329 struct ctdb_connection *conn = &tcp->connection;
2331 DLIST_REMOVE(client->tcp_list, tcp);
2333 vnn = find_public_ip_vnn(client->ctdb,
2337 (__location__ " unable to find public address %s\n",
2338 ctdb_addr_to_str(&conn->dst)));
2342 /* If the IP address is hosted on this node then
2343 * remove the connection. */
2344 if (vnn->pnn == client->ctdb->pnn) {
2345 ctdb_remove_connection(vnn, conn);
2348 /* Otherwise this function has been called because the
2349 * server IP address has been released to another node
2350 * and the client has exited. This means that we
2351 * should not delete the connection information. The
2352 * takeover node processes connections too. */
2357 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2359 struct ctdb_vnn *vnn;
2363 if (ctdb->tunable.disable_ip_failover == 1) {
2367 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2368 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2369 ctdb_vnn_unassign_iface(ctdb, vnn);
2376 /* Don't allow multiple releases at once. Some code,
2377 * particularly ctdb_tickle_sentenced_connections() is
2379 if (vnn->update_in_flight) {
2380 DEBUG(DEBUG_WARNING,
2382 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2383 ctdb_addr_to_str(&vnn->public_address),
2384 vnn->public_netmask_bits,
2385 ctdb_vnn_iface_string(vnn)));
2388 vnn->update_in_flight = true;
2390 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2391 ctdb_addr_to_str(&vnn->public_address),
2392 vnn->public_netmask_bits,
2393 ctdb_vnn_iface_string(vnn)));
2395 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2396 ctdb_vnn_iface_string(vnn),
2397 ctdb_addr_to_str(&vnn->public_address),
2398 vnn->public_netmask_bits);
2400 data.dptr = (uint8_t *)talloc_strdup(
2401 vnn, ctdb_addr_to_str(&vnn->public_address));
2402 if (data.dptr != NULL) {
2403 data.dsize = strlen((char *)data.dptr) + 1;
2404 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2405 CTDB_SRVID_RELEASE_IP, data);
2406 talloc_free(data.dptr);
2409 ctdb_vnn_unassign_iface(ctdb, vnn);
2410 vnn->update_in_flight = false;
2414 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2419 get list of public IPs
2421 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2422 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2425 struct ctdb_public_ip_list_old *ips;
2426 struct ctdb_vnn *vnn;
2427 bool only_available = false;
2429 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2430 only_available = true;
2433 /* count how many public ip structures we have */
2435 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2439 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2440 num*sizeof(struct ctdb_public_ip);
2441 ips = talloc_zero_size(outdata, len);
2442 CTDB_NO_MEMORY(ctdb, ips);
2445 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2446 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2449 ips->ips[i].pnn = vnn->pnn;
2450 ips->ips[i].addr = vnn->public_address;
2454 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2455 i*sizeof(struct ctdb_public_ip);
2457 outdata->dsize = len;
2458 outdata->dptr = (uint8_t *)ips;
2464 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2465 struct ctdb_req_control_old *c,
2470 ctdb_sock_addr *addr;
2471 struct ctdb_public_ip_info_old *info;
2472 struct ctdb_vnn *vnn;
2474 addr = (ctdb_sock_addr *)indata.dptr;
2476 vnn = find_public_ip_vnn(ctdb, addr);
2478 /* if it is not a public ip it could be our 'single ip' */
2479 if (ctdb->single_ip_vnn) {
2480 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2481 vnn = ctdb->single_ip_vnn;
2486 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2487 "'%s'not a public address\n",
2488 ctdb_addr_to_str(addr)));
2492 /* count how many public ip structures we have */
2494 for (;vnn->ifaces[num];) {
2498 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2499 num*sizeof(struct ctdb_iface);
2500 info = talloc_zero_size(outdata, len);
2501 CTDB_NO_MEMORY(ctdb, info);
2503 info->ip.addr = vnn->public_address;
2504 info->ip.pnn = vnn->pnn;
2505 info->active_idx = 0xFFFFFFFF;
2507 for (i=0; vnn->ifaces[i]; i++) {
2508 struct ctdb_interface *cur;
2510 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2512 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2516 if (vnn->iface == cur) {
2517 info->active_idx = i;
2519 strncpy(info->ifaces[i].name, cur->name,
2520 sizeof(info->ifaces[i].name));
2521 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2522 info->ifaces[i].link_state = cur->link_up;
2523 info->ifaces[i].references = cur->references;
2526 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2527 i*sizeof(struct ctdb_iface);
2529 outdata->dsize = len;
2530 outdata->dptr = (uint8_t *)info;
2535 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2536 struct ctdb_req_control_old *c,
2540 struct ctdb_iface_list_old *ifaces;
2541 struct ctdb_interface *cur;
2543 /* count how many public ip structures we have */
2545 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2549 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2550 num*sizeof(struct ctdb_iface);
2551 ifaces = talloc_zero_size(outdata, len);
2552 CTDB_NO_MEMORY(ctdb, ifaces);
2555 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2556 strncpy(ifaces->ifaces[i].name, cur->name,
2557 sizeof(ifaces->ifaces[i].name));
2558 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2559 ifaces->ifaces[i].link_state = cur->link_up;
2560 ifaces->ifaces[i].references = cur->references;
2564 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2565 i*sizeof(struct ctdb_iface);
2567 outdata->dsize = len;
2568 outdata->dptr = (uint8_t *)ifaces;
2573 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2574 struct ctdb_req_control_old *c,
2577 struct ctdb_iface *info;
2578 struct ctdb_interface *iface;
2579 bool link_up = false;
2581 info = (struct ctdb_iface *)indata.dptr;
2583 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2584 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2585 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2586 len, len, info->name));
2590 switch (info->link_state) {
2598 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2599 (unsigned int)info->link_state));
2603 if (info->references != 0) {
2604 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2605 (unsigned int)info->references));
2609 iface = ctdb_find_iface(ctdb, info->name);
2610 if (iface == NULL) {
2614 if (link_up == iface->link_up) {
2618 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2619 ("iface[%s] has changed it's link status %s => %s\n",
2621 iface->link_up?"up":"down",
2622 link_up?"up":"down"));
2624 iface->link_up = link_up;
2630 called by a daemon to inform us of the entire list of TCP tickles for
2631 a particular public address.
2632 this control should only be sent by the node that is currently serving
2633 that public address.
2635 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2637 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2638 struct ctdb_tcp_array *tcparray;
2639 struct ctdb_vnn *vnn;
2641 /* We must at least have tickles.num or else we cant verify the size
2642 of the received data blob
2644 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2645 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2649 /* verify that the size of data matches what we expect */
2650 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2651 + sizeof(struct ctdb_connection) * list->num) {
2652 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2656 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2657 ctdb_addr_to_str(&list->addr)));
2659 vnn = find_public_ip_vnn(ctdb, &list->addr);
2661 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2662 ctdb_addr_to_str(&list->addr)));
2667 if (vnn->pnn == ctdb->pnn) {
2669 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2670 ctdb_addr_to_str(&list->addr)));
2674 /* remove any old ticklelist we might have */
2675 talloc_free(vnn->tcp_array);
2676 vnn->tcp_array = NULL;
2678 tcparray = talloc(vnn, struct ctdb_tcp_array);
2679 CTDB_NO_MEMORY(ctdb, tcparray);
2681 tcparray->num = list->num;
2683 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2684 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2686 memcpy(tcparray->connections, &list->connections[0],
2687 sizeof(struct ctdb_connection)*tcparray->num);
2689 /* We now have a new fresh tickle list array for this vnn */
2690 vnn->tcp_array = tcparray;
2696 called to return the full list of tickles for the puclic address associated
2697 with the provided vnn
2699 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2701 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2702 struct ctdb_tickle_list_old *list;
2703 struct ctdb_tcp_array *tcparray;
2705 struct ctdb_vnn *vnn;
2707 vnn = find_public_ip_vnn(ctdb, addr);
2709 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2710 ctdb_addr_to_str(addr)));
2715 tcparray = vnn->tcp_array;
2717 num = tcparray->num;
2722 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2723 + sizeof(struct ctdb_connection) * num;
2725 outdata->dptr = talloc_size(outdata, outdata->dsize);
2726 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2727 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2732 memcpy(&list->connections[0], tcparray->connections,
2733 sizeof(struct ctdb_connection) * num);
2741 set the list of all tcp tickles for a public address
2743 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2744 ctdb_sock_addr *addr,
2745 struct ctdb_tcp_array *tcparray)
2749 struct ctdb_tickle_list_old *list;
2752 num = tcparray->num;
2757 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2758 sizeof(struct ctdb_connection) * num;
2759 data.dptr = talloc_size(ctdb, data.dsize);
2760 CTDB_NO_MEMORY(ctdb, data.dptr);
2762 list = (struct ctdb_tickle_list_old *)data.dptr;
2766 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2769 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2770 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2771 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2773 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2777 talloc_free(data.dptr);
2784 perform tickle updates if required
2786 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2787 struct tevent_timer *te,
2788 struct timeval t, void *private_data)
2790 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2792 struct ctdb_vnn *vnn;
2794 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2795 /* we only send out updates for public addresses that
2798 if (ctdb->pnn != vnn->pnn) {
2801 /* We only send out the updates if we need to */
2802 if (!vnn->tcp_update_needed) {
2805 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2806 &vnn->public_address,
2809 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2810 ctdb_addr_to_str(&vnn->public_address)));
2813 ("Sent tickle update for public address %s\n",
2814 ctdb_addr_to_str(&vnn->public_address)));
2815 vnn->tcp_update_needed = false;
2819 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2820 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2821 ctdb_update_tcp_tickles, ctdb);
2825 start periodic update of tcp tickles
2827 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2829 ctdb->tickle_update_context = talloc_new(ctdb);
2831 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2832 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2833 ctdb_update_tcp_tickles, ctdb);
2839 struct control_gratious_arp {
2840 struct ctdb_context *ctdb;
2841 ctdb_sock_addr addr;
2847 send a control_gratuitous arp
2849 static void send_gratious_arp(struct tevent_context *ev,
2850 struct tevent_timer *te,
2851 struct timeval t, void *private_data)
2854 struct control_gratious_arp *arp = talloc_get_type(private_data,
2855 struct control_gratious_arp);
2857 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2859 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2860 arp->iface, strerror(errno)));
2865 if (arp->count == CTDB_ARP_REPEAT) {
2870 tevent_add_timer(arp->ctdb->ev, arp,
2871 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2872 send_gratious_arp, arp);
2879 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2881 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2882 struct control_gratious_arp *arp;
2884 /* verify the size of indata */
2885 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2886 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2887 (unsigned)indata.dsize,
2888 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2892 ( offsetof(struct ctdb_addr_info_old, iface)
2893 + gratious_arp->len ) ){
2895 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2896 "but should be %u bytes\n",
2897 (unsigned)indata.dsize,
2898 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2903 arp = talloc(ctdb, struct control_gratious_arp);
2904 CTDB_NO_MEMORY(ctdb, arp);
2907 arp->addr = gratious_arp->addr;
2908 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2909 CTDB_NO_MEMORY(ctdb, arp->iface);
2912 tevent_add_timer(arp->ctdb->ev, arp,
2913 timeval_zero(), send_gratious_arp, arp);
2918 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2920 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2923 /* verify the size of indata */
2924 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2925 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2929 ( offsetof(struct ctdb_addr_info_old, iface)
2932 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2933 "but should be %u bytes\n",
2934 (unsigned)indata.dsize,
2935 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2939 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2941 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2944 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2951 struct delete_ip_callback_state {
2952 struct ctdb_req_control_old *c;
2956 called when releaseip event finishes for del_public_address
2958 static void delete_ip_callback(struct ctdb_context *ctdb,
2959 int32_t status, TDB_DATA data,
2960 const char *errormsg,
2963 struct delete_ip_callback_state *state =
2964 talloc_get_type(private_data, struct delete_ip_callback_state);
2966 /* If release failed then fail. */
2967 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2968 talloc_free(private_data);
2971 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2972 struct ctdb_req_control_old *c,
2973 TDB_DATA indata, bool *async_reply)
2975 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2976 struct ctdb_vnn *vnn;
2978 /* verify the size of indata */
2979 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2980 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2984 ( offsetof(struct ctdb_addr_info_old, iface)
2987 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2988 "but should be %u bytes\n",
2989 (unsigned)indata.dsize,
2990 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2994 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2996 /* walk over all public addresses until we find a match */
2997 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2998 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2999 if (vnn->pnn == ctdb->pnn) {
3000 struct delete_ip_callback_state *state;
3001 struct ctdb_public_ip *ip;
3005 vnn->delete_pending = true;
3007 state = talloc(ctdb,
3008 struct delete_ip_callback_state);
3009 CTDB_NO_MEMORY(ctdb, state);
3012 ip = talloc(state, struct ctdb_public_ip);
3015 (__location__ " Out of memory\n"));
3020 ip->addr = pub->addr;
3022 data.dsize = sizeof(struct ctdb_public_ip);
3023 data.dptr = (unsigned char *)ip;
3025 ret = ctdb_daemon_send_control(ctdb,
3028 CTDB_CONTROL_RELEASE_IP,
3035 (__location__ "Unable to send "
3036 "CTDB_CONTROL_RELEASE_IP\n"));
3041 state->c = talloc_steal(state, c);
3042 *async_reply = true;
3044 /* This IP is not hosted on the
3045 * current node so just delete it
3047 do_delete_ip(ctdb, vnn);
3054 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3055 ctdb_addr_to_str(&pub->addr)));
3060 struct ipreallocated_callback_state {
3061 struct ctdb_req_control_old *c;
3064 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3065 int status, void *p)
3067 struct ipreallocated_callback_state *state =
3068 talloc_get_type(p, struct ipreallocated_callback_state);
3072 (" \"ipreallocated\" event script failed (status %d)\n",
3074 if (status == -ETIME) {
3075 ctdb_ban_self(ctdb);
3079 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3083 /* A control to run the ipreallocated event */
3084 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3085 struct ctdb_req_control_old *c,
3089 struct ipreallocated_callback_state *state;
3091 state = talloc(ctdb, struct ipreallocated_callback_state);
3092 CTDB_NO_MEMORY(ctdb, state);
3094 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3096 ret = ctdb_event_script_callback(ctdb, state,
3097 ctdb_ipreallocated_callback, state,
3098 CTDB_EVENT_IPREALLOCATED,
3102 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3107 /* tell the control that we will be reply asynchronously */
3108 state->c = talloc_steal(state, c);
3109 *async_reply = true;
3115 /* This function is called from the recovery daemon to verify that a remote
3116 node has the expected ip allocation.
3117 This is verified against ctdb->ip_tree
3119 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3120 struct ctdb_public_ip_list *ips,
3123 struct public_ip_list *tmp_ip;
3126 if (ctdb->ip_tree == NULL) {
3127 /* don't know the expected allocation yet, assume remote node
3136 for (i=0; i<ips->num; i++) {
3137 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3138 if (tmp_ip == NULL) {
3139 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3143 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3147 if (tmp_ip->pnn != ips->ip[i].pnn) {
3149 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3151 ctdb_addr_to_str(&ips->ip[i].addr),
3152 ips->ip[i].pnn, tmp_ip->pnn));
3160 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3162 struct public_ip_list *tmp_ip;
3164 /* IP tree is never built if DisableIPFailover is set */
3165 if (ctdb->tunable.disable_ip_failover != 0) {
3169 if (ctdb->ip_tree == NULL) {
3170 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3174 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3175 if (tmp_ip == NULL) {
3176 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3180 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3181 tmp_ip->pnn = ip->pnn;
3186 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3188 TALLOC_FREE(ctdb->ip_tree);
3191 struct ctdb_reloadips_handle {
3192 struct ctdb_context *ctdb;
3193 struct ctdb_req_control_old *c;
3197 struct tevent_fd *fde;
3200 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3202 if (h == h->ctdb->reload_ips) {
3203 h->ctdb->reload_ips = NULL;
3206 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3209 ctdb_kill(h->ctdb, h->child, SIGKILL);
3213 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3214 struct tevent_timer *te,
3215 struct timeval t, void *private_data)
3217 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3222 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3223 struct tevent_fd *fde,
3224 uint16_t flags, void *private_data)
3226 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3231 ret = sys_read(h->fd[0], &res, 1);
3232 if (ret < 1 || res != 0) {
3233 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3241 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3243 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3244 struct ctdb_public_ip_list_old *ips;
3245 struct ctdb_vnn *vnn;
3246 struct client_async_data *async_data;
3247 struct timeval timeout;
3249 struct ctdb_client_control_state *state;
3253 CTDB_NO_MEMORY(ctdb, mem_ctx);
3255 /* Read IPs from local node */
3256 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3257 CTDB_CURRENT_NODE, mem_ctx, &ips);
3260 ("Unable to fetch public IPs from local node\n"));
3261 talloc_free(mem_ctx);
3265 /* Read IPs file - this is safe since this is a child process */
3267 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3268 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3269 talloc_free(mem_ctx);
3273 async_data = talloc_zero(mem_ctx, struct client_async_data);
3274 CTDB_NO_MEMORY(ctdb, async_data);
3276 /* Compare IPs between node and file for IPs to be deleted */
3277 for (i = 0; i < ips->num; i++) {
3279 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3280 if (ctdb_same_ip(&vnn->public_address,
3281 &ips->ips[i].addr)) {
3282 /* IP is still in file */
3288 /* Delete IP ips->ips[i] */
3289 struct ctdb_addr_info_old *pub;
3292 ("IP %s no longer configured, deleting it\n",
3293 ctdb_addr_to_str(&ips->ips[i].addr)));
3295 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3296 CTDB_NO_MEMORY(ctdb, pub);
3298 pub->addr = ips->ips[i].addr;
3302 timeout = TAKEOVER_TIMEOUT();
3304 data.dsize = offsetof(struct ctdb_addr_info_old,
3306 data.dptr = (uint8_t *)pub;
3308 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3309 CTDB_CONTROL_DEL_PUBLIC_IP,
3310 0, data, async_data,
3312 if (state == NULL) {
3315 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3319 ctdb_client_async_add(async_data, state);
3323 /* Compare IPs between node and file for IPs to be added */
3325 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3326 for (i = 0; i < ips->num; i++) {
3327 if (ctdb_same_ip(&vnn->public_address,
3328 &ips->ips[i].addr)) {
3329 /* IP already on node */
3333 if (i == ips->num) {
3334 /* Add IP ips->ips[i] */
3335 struct ctdb_addr_info_old *pub;
3336 const char *ifaces = NULL;
3341 ("New IP %s configured, adding it\n",
3342 ctdb_addr_to_str(&vnn->public_address)));
3344 uint32_t pnn = ctdb_get_pnn(ctdb);
3346 data.dsize = sizeof(pnn);
3347 data.dptr = (uint8_t *)&pnn;
3349 ret = ctdb_client_send_message(
3351 CTDB_BROADCAST_CONNECTED,
3352 CTDB_SRVID_REBALANCE_NODE,
3355 DEBUG(DEBUG_WARNING,
3356 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3362 ifaces = vnn->ifaces[0];
3364 while (vnn->ifaces[iface] != NULL) {
3365 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3366 vnn->ifaces[iface]);
3370 len = strlen(ifaces) + 1;
3371 pub = talloc_zero_size(mem_ctx,
3372 offsetof(struct ctdb_addr_info_old, iface) + len);
3373 CTDB_NO_MEMORY(ctdb, pub);
3375 pub->addr = vnn->public_address;
3376 pub->mask = vnn->public_netmask_bits;
3378 memcpy(&pub->iface[0], ifaces, pub->len);
3380 timeout = TAKEOVER_TIMEOUT();
3382 data.dsize = offsetof(struct ctdb_addr_info_old,
3384 data.dptr = (uint8_t *)pub;
3386 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3387 CTDB_CONTROL_ADD_PUBLIC_IP,
3388 0, data, async_data,
3390 if (state == NULL) {
3393 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3397 ctdb_client_async_add(async_data, state);
3401 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3402 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3406 talloc_free(mem_ctx);
3410 talloc_free(mem_ctx);
3414 /* This control is sent to force the node to re-read the public addresses file
3415 and drop any addresses we should nnot longer host, and add new addresses
3416 that we are now able to host
3418 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3420 struct ctdb_reloadips_handle *h;
3421 pid_t parent = getpid();
3423 if (ctdb->reload_ips != NULL) {
3424 talloc_free(ctdb->reload_ips);
3425 ctdb->reload_ips = NULL;
3428 h = talloc(ctdb, struct ctdb_reloadips_handle);
3429 CTDB_NO_MEMORY(ctdb, h);
3434 if (pipe(h->fd) == -1) {
3435 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3440 h->child = ctdb_fork(ctdb);
3441 if (h->child == (pid_t)-1) {
3442 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3450 if (h->child == 0) {
3451 signed char res = 0;
3454 debug_extra = talloc_asprintf(NULL, "reloadips:");
3456 prctl_set_comment("ctdb_reloadips");
3457 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3458 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3461 res = ctdb_reloadips_child(ctdb);
3463 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3467 sys_write(h->fd[1], &res, 1);
3468 ctdb_wait_for_process_to_exit(parent);
3472 h->c = talloc_steal(h, c);
3475 set_close_on_exec(h->fd[0]);
3477 talloc_set_destructor(h, ctdb_reloadips_destructor);
3480 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3481 ctdb_reloadips_child_handler, (void *)h);
3482 tevent_fd_set_auto_close(h->fde);
3484 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3485 ctdb_reloadips_timeout_event, h);
3487 /* we reply later */
3488 *async_reply = true;