4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn once) and then walking
118 * ctdb->ifaces once and deleting those not in the tree. Let's go to
119 * one of those if the naive implementation causes problems... :-)
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122 struct ctdb_vnn *vnn)
124 struct ctdb_interface *i, *next;
126 /* For each interface, check if there's an IP using it. */
127 for (i = ctdb->ifaces; i != NULL; i = next) {
132 /* Only consider interfaces named in the given VNN. */
133 if (!vnn_has_interface_with_name(vnn, i->name)) {
137 /* Search for a vnn with this interface. */
139 for (tv=ctdb->vnn; tv; tv=tv->next) {
140 if (vnn_has_interface_with_name(tv, i->name)) {
147 /* None of the VNNs are using this interface. */
148 DLIST_REMOVE(ctdb->ifaces, i);
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
158 struct ctdb_interface *i;
160 for (i=ctdb->ifaces;i;i=i->next) {
161 if (strcmp(i->name, iface) == 0) {
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170 struct ctdb_vnn *vnn)
173 struct ctdb_interface *cur = NULL;
174 struct ctdb_interface *best = NULL;
176 for (i=0; vnn->ifaces[i]; i++) {
178 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
192 if (cur->references < best->references) {
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202 struct ctdb_vnn *vnn)
204 struct ctdb_interface *best = NULL;
207 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208 "still assigned to iface '%s'\n",
209 ctdb_addr_to_str(&vnn->public_address),
210 ctdb_vnn_iface_string(vnn)));
214 best = ctdb_vnn_best_iface(ctdb, vnn);
216 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217 "cannot assign to iface any iface\n",
218 ctdb_addr_to_str(&vnn->public_address)));
224 vnn->pnn = ctdb->pnn;
226 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227 "now assigned to iface '%s' refs[%d]\n",
228 ctdb_addr_to_str(&vnn->public_address),
229 ctdb_vnn_iface_string(vnn),
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238 "now unassigned (old iface '%s' refs[%d])\n",
239 ctdb_addr_to_str(&vnn->public_address),
240 ctdb_vnn_iface_string(vnn),
241 vnn->iface?vnn->iface->references:0));
243 vnn->iface->references--;
246 if (vnn->pnn == ctdb->pnn) {
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252 struct ctdb_vnn *vnn)
256 /* Nodes that are not RUNNING can not host IPs */
257 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
261 if (vnn->delete_pending) {
265 if (vnn->iface && vnn->iface->link_up) {
269 for (i=0; vnn->ifaces[i]; i++) {
270 struct ctdb_interface *cur;
272 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
285 struct ctdb_takeover_arp {
286 struct ctdb_context *ctdb;
289 struct ctdb_tcp_array *tcparray;
290 struct ctdb_vnn *vnn;
295 lists of tcp endpoints
297 struct ctdb_tcp_list {
298 struct ctdb_tcp_list *prev, *next;
299 struct ctdb_connection connection;
303 list of clients to kill on IP release
305 struct ctdb_client_ip {
306 struct ctdb_client_ip *prev, *next;
307 struct ctdb_context *ctdb;
314 send a gratuitous arp
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317 struct tevent_timer *te,
318 struct timeval t, void *private_data)
320 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
321 struct ctdb_takeover_arp);
323 struct ctdb_tcp_array *tcparray;
324 const char *iface = ctdb_vnn_iface_string(arp->vnn);
326 ret = ctdb_sys_send_arp(&arp->addr, iface);
328 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329 iface, strerror(errno)));
332 tcparray = arp->tcparray;
334 for (i=0;i<tcparray->num;i++) {
335 struct ctdb_connection *tcon;
337 tcon = &tcparray->connections[i];
338 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339 (unsigned)ntohs(tcon->dst.ip.sin_port),
340 ctdb_addr_to_str(&tcon->src),
341 (unsigned)ntohs(tcon->src.ip.sin_port)));
342 ret = ctdb_sys_send_tcp(
347 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348 ctdb_addr_to_str(&tcon->src)));
355 if (arp->count == CTDB_ARP_REPEAT) {
360 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362 ctdb_control_send_arp, arp);
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366 struct ctdb_vnn *vnn)
368 struct ctdb_takeover_arp *arp;
369 struct ctdb_tcp_array *tcparray;
371 if (!vnn->takeover_ctx) {
372 vnn->takeover_ctx = talloc_new(vnn);
373 if (!vnn->takeover_ctx) {
378 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
384 arp->addr = vnn->public_address;
387 tcparray = vnn->tcp_array;
389 /* add all of the known tcp connections for this IP to the
390 list of tcp connections to send tickle acks for */
391 arp->tcparray = talloc_steal(arp, tcparray);
393 vnn->tcp_array = NULL;
394 vnn->tcp_update_needed = true;
397 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398 timeval_zero(), ctdb_control_send_arp, arp);
403 struct takeover_callback_state {
404 struct ctdb_req_control_old *c;
405 ctdb_sock_addr *addr;
406 struct ctdb_vnn *vnn;
409 struct ctdb_do_takeip_state {
410 struct ctdb_req_control_old *c;
411 struct ctdb_vnn *vnn;
415 called when takeip event finishes
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
420 struct ctdb_do_takeip_state *state =
421 talloc_get_type(private_data, struct ctdb_do_takeip_state);
426 if (status == -ETIME) {
429 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430 ctdb_addr_to_str(&state->vnn->public_address),
431 ctdb_vnn_iface_string(state->vnn)));
432 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
438 if (ctdb->do_checkpublicip) {
440 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
442 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
449 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450 data.dsize = strlen((char *)data.dptr) + 1;
451 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
453 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
456 /* the control succeeded */
457 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
464 state->vnn->update_in_flight = false;
469 take over an ip address
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472 struct ctdb_req_control_old *c,
473 struct ctdb_vnn *vnn)
476 struct ctdb_do_takeip_state *state;
478 if (vnn->update_in_flight) {
479 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480 "update for this IP already in flight\n",
481 ctdb_addr_to_str(&vnn->public_address),
482 vnn->public_netmask_bits));
486 ret = ctdb_vnn_assign_iface(ctdb, vnn);
488 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489 "assign a usable interface\n",
490 ctdb_addr_to_str(&vnn->public_address),
491 vnn->public_netmask_bits));
495 state = talloc(vnn, struct ctdb_do_takeip_state);
496 CTDB_NO_MEMORY(ctdb, state);
498 state->c = talloc_steal(ctdb, c);
501 vnn->update_in_flight = true;
502 talloc_set_destructor(state, ctdb_takeip_destructor);
504 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505 ctdb_addr_to_str(&vnn->public_address),
506 vnn->public_netmask_bits,
507 ctdb_vnn_iface_string(vnn)));
509 ret = ctdb_event_script_callback(ctdb,
511 ctdb_do_takeip_callback,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
520 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521 ctdb_addr_to_str(&vnn->public_address),
522 ctdb_vnn_iface_string(vnn)));
530 struct ctdb_do_updateip_state {
531 struct ctdb_req_control_old *c;
532 struct ctdb_interface *old;
533 struct ctdb_vnn *vnn;
537 called when updateip event finishes
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
542 struct ctdb_do_updateip_state *state =
543 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547 if (status == -ETIME) {
550 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551 ctdb_addr_to_str(&state->vnn->public_address),
553 ctdb_vnn_iface_string(state->vnn)));
556 * All we can do is reset the old interface
557 * and let the next run fix it
559 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560 state->vnn->iface = state->old;
561 state->vnn->iface->references++;
563 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
568 if (ctdb->do_checkpublicip) {
570 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
572 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
579 /* the control succeeded */
580 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
587 state->vnn->update_in_flight = false;
592 update (move) an ip address
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595 struct ctdb_req_control_old *c,
596 struct ctdb_vnn *vnn)
599 struct ctdb_do_updateip_state *state;
600 struct ctdb_interface *old = vnn->iface;
601 const char *new_name;
603 if (vnn->update_in_flight) {
604 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605 "update for this IP already in flight\n",
606 ctdb_addr_to_str(&vnn->public_address),
607 vnn->public_netmask_bits));
611 ctdb_vnn_unassign_iface(ctdb, vnn);
612 ret = ctdb_vnn_assign_iface(ctdb, vnn);
614 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615 "assin a usable interface (old iface '%s')\n",
616 ctdb_addr_to_str(&vnn->public_address),
617 vnn->public_netmask_bits,
622 new_name = ctdb_vnn_iface_string(vnn);
623 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624 /* A benign update from one interface onto itself.
625 * no need to run the eventscripts in this case, just return
628 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632 state = talloc(vnn, struct ctdb_do_updateip_state);
633 CTDB_NO_MEMORY(ctdb, state);
635 state->c = talloc_steal(ctdb, c);
639 vnn->update_in_flight = true;
640 talloc_set_destructor(state, ctdb_updateip_destructor);
642 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643 "interface %s to %s\n",
644 ctdb_addr_to_str(&vnn->public_address),
645 vnn->public_netmask_bits,
649 ret = ctdb_event_script_callback(ctdb,
651 ctdb_do_updateip_callback,
653 CTDB_EVENT_UPDATE_IP,
657 ctdb_addr_to_str(&vnn->public_address),
658 vnn->public_netmask_bits);
660 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661 ctdb_addr_to_str(&vnn->public_address),
662 old->name, new_name));
671 Find the vnn of the node that has a public ip address
672 returns -1 if the address is not known as a public address
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
676 struct ctdb_vnn *vnn;
678 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679 if (ctdb_same_ip(&vnn->public_address, addr)) {
688 take over an ip address
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691 struct ctdb_req_control_old *c,
696 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697 struct ctdb_vnn *vnn;
698 bool have_ip = false;
699 bool do_updateip = false;
700 bool do_takeip = false;
701 struct ctdb_interface *best_iface = NULL;
703 if (pip->pnn != ctdb->pnn) {
704 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705 "with pnn %d, but we're node %d\n",
706 ctdb_addr_to_str(&pip->addr),
707 pip->pnn, ctdb->pnn));
711 /* update out vnn list */
712 vnn = find_public_ip_vnn(ctdb, &pip->addr);
714 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715 ctdb_addr_to_str(&pip->addr)));
719 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720 have_ip = ctdb_sys_have_ip(&pip->addr);
722 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723 if (best_iface == NULL) {
724 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725 "a usable interface (old %s, have_ip %d)\n",
726 ctdb_addr_to_str(&vnn->public_address),
727 vnn->public_netmask_bits,
728 ctdb_vnn_iface_string(vnn),
733 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
739 if (vnn->iface == NULL && have_ip) {
740 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742 ctdb_addr_to_str(&vnn->public_address)));
746 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748 "and we have it on iface[%s], but it was assigned to node %d"
749 "and we are node %d, banning ourself\n",
750 ctdb_addr_to_str(&vnn->public_address),
751 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
756 if (vnn->pnn == -1 && have_ip) {
757 vnn->pnn = ctdb->pnn;
758 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759 "and we already have it on iface[%s], update local daemon\n",
760 ctdb_addr_to_str(&vnn->public_address),
761 ctdb_vnn_iface_string(vnn)));
766 if (vnn->iface != best_iface) {
767 if (!vnn->iface->link_up) {
769 } else if (vnn->iface->references > (best_iface->references + 1)) {
770 /* only move when the rebalance gains something */
778 ctdb_vnn_unassign_iface(ctdb, vnn);
785 ret = ctdb_do_takeip(ctdb, c, vnn);
789 } else if (do_updateip) {
790 ret = ctdb_do_updateip(ctdb, c, vnn);
796 * The interface is up and the kernel known the ip
799 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800 ctdb_addr_to_str(&pip->addr),
801 vnn->public_netmask_bits,
802 ctdb_vnn_iface_string(vnn)));
806 /* tell ctdb_control.c that we will be replying asynchronously */
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
814 DLIST_REMOVE(ctdb->vnn, vnn);
815 ctdb_vnn_unassign_iface(ctdb, vnn);
816 ctdb_remove_orphaned_ifaces(ctdb, vnn);
821 called when releaseip event finishes
823 static void release_ip_callback(struct ctdb_context *ctdb, int status,
826 struct takeover_callback_state *state =
827 talloc_get_type(private_data, struct takeover_callback_state);
830 if (status == -ETIME) {
834 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835 if (ctdb_sys_have_ip(state->addr)) {
837 ("IP %s still hosted during release IP callback, failing\n",
838 ctdb_addr_to_str(state->addr)));
839 ctdb_request_control_reply(ctdb, state->c,
846 /* send a message to all clients of this node telling them
847 that the cluster has been reconfigured and they should
848 release any sockets on this IP */
849 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851 data.dsize = strlen((char *)data.dptr)+1;
853 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
855 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
857 ctdb_vnn_unassign_iface(ctdb, state->vnn);
859 /* Process the IP if it has been marked for deletion */
860 if (state->vnn->delete_pending) {
861 do_delete_ip(ctdb, state->vnn);
865 /* the control succeeded */
866 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
872 if (state->vnn != NULL) {
873 state->vnn->update_in_flight = false;
879 release an ip address
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
882 struct ctdb_req_control_old *c,
887 struct takeover_callback_state *state;
888 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889 struct ctdb_vnn *vnn;
892 /* update our vnn list */
893 vnn = find_public_ip_vnn(ctdb, &pip->addr);
895 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896 ctdb_addr_to_str(&pip->addr)));
901 /* stop any previous arps */
902 talloc_free(vnn->takeover_ctx);
903 vnn->takeover_ctx = NULL;
905 /* Some ctdb tool commands (e.g. moveip) send
906 * lazy multicast to drop an IP from any node that isn't the
907 * intended new node. The following causes makes ctdbd ignore
908 * a release for any address it doesn't host.
910 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911 if (!ctdb_sys_have_ip(&pip->addr)) {
912 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913 ctdb_addr_to_str(&pip->addr),
914 vnn->public_netmask_bits,
915 ctdb_vnn_iface_string(vnn)));
916 ctdb_vnn_unassign_iface(ctdb, vnn);
920 if (vnn->iface == NULL) {
921 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922 ctdb_addr_to_str(&pip->addr),
923 vnn->public_netmask_bits));
928 /* There is a potential race between take_ip and us because we
929 * update the VNN via a callback that run when the
930 * eventscripts have been run. Avoid the race by allowing one
931 * update to be in flight at a time.
933 if (vnn->update_in_flight) {
934 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935 "update for this IP already in flight\n",
936 ctdb_addr_to_str(&vnn->public_address),
937 vnn->public_netmask_bits));
941 iface = strdup(ctdb_vnn_iface_string(vnn));
943 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
944 ctdb_addr_to_str(&pip->addr),
945 vnn->public_netmask_bits,
949 state = talloc(ctdb, struct takeover_callback_state);
951 ctdb_set_error(ctdb, "Out of memory at %s:%d",
957 state->c = talloc_steal(state, c);
958 state->addr = talloc(state, ctdb_sock_addr);
959 if (state->addr == NULL) {
960 ctdb_set_error(ctdb, "Out of memory at %s:%d",
966 *state->addr = pip->addr;
969 vnn->update_in_flight = true;
970 talloc_set_destructor(state, ctdb_releaseip_destructor);
972 ret = ctdb_event_script_callback(ctdb,
973 state, release_ip_callback, state,
974 CTDB_EVENT_RELEASE_IP,
977 ctdb_addr_to_str(&pip->addr),
978 vnn->public_netmask_bits);
981 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982 ctdb_addr_to_str(&pip->addr),
983 ctdb_vnn_iface_string(vnn)));
988 /* tell the control that we will be reply asynchronously */
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994 ctdb_sock_addr *addr,
995 unsigned mask, const char *ifaces,
998 struct ctdb_vnn *vnn;
1005 tmp = strdup(ifaces);
1006 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007 if (!ctdb_sys_check_iface_exists(iface)) {
1008 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1015 /* Verify that we don't have an entry for this ip yet */
1016 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1019 ctdb_addr_to_str(addr)));
1024 /* create a new vnn structure for this ip address */
1025 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028 tmp = talloc_strdup(vnn, ifaces);
1029 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038 vnn->ifaces[num] = NULL;
1039 vnn->public_address = *addr;
1040 vnn->public_netmask_bits = mask;
1042 if (check_address) {
1043 if (ctdb_sys_have_ip(addr)) {
1044 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045 vnn->pnn = ctdb->pnn;
1049 for (i=0; vnn->ifaces[i]; i++) {
1050 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1052 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053 "for public_address[%s]\n",
1054 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1060 DLIST_ADD(ctdb->vnn, vnn);
1066 setup the public address lists from a file
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1074 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075 if (lines == NULL) {
1076 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1079 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083 for (i=0;i<nlines;i++) {
1085 ctdb_sock_addr addr;
1086 const char *addrstr;
1091 while ((*line == ' ') || (*line == '\t')) {
1097 if (strcmp(line, "") == 0) {
1100 tok = strtok(line, " \t");
1102 tok = strtok(NULL, " \t");
1104 if (NULL == ctdb->default_public_interface) {
1105 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1110 ifaces = ctdb->default_public_interface;
1115 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1132 static void *add_ip_callback(void *parm, void *data)
1134 struct public_ip_list *this_ip = parm;
1135 struct public_ip_list *prev_ip = data;
1137 if (prev_ip == NULL) {
1140 if (this_ip->pnn == -1) {
1141 this_ip->pnn = prev_ip->pnn;
1147 static int getips_count_callback(void *param, void *data)
1149 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1152 new_ip->next = *ip_list;
1157 static struct ctdb_public_ip_list *
1158 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1159 TALLOC_CTX *mem_ctx,
1160 struct ctdb_node_map_old *nodemap,
1161 uint32_t public_ip_flags)
1164 struct ctdb_public_ip_list_old *ip_list;
1165 struct ctdb_public_ip_list *public_ips;
1167 public_ips = talloc_zero_array(mem_ctx,
1168 struct ctdb_public_ip_list,
1170 if (public_ips == NULL) {
1171 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1175 for (j = 0; j < nodemap->num; j++) {
1176 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1180 /* Retrieve the list of public IPs from the
1181 * node. Flags says whether it is known or
1183 ret = ctdb_ctrl_get_public_ips_flags(
1184 ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1185 public_ip_flags, &ip_list);
1188 ("Failed to read public IPs from node: %u\n", j));
1189 talloc_free(public_ips);
1192 public_ips[j].num = ip_list->num;
1193 if (ip_list->num == 0) {
1194 talloc_free(ip_list);
1197 public_ips[j].ip = talloc_zero_array(public_ips,
1198 struct ctdb_public_ip,
1200 if (public_ips[j].ip == NULL) {
1201 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1202 talloc_free(public_ips);
1205 memcpy(public_ips[j].ip, &ip_list->ips[0],
1206 sizeof(struct ctdb_public_ip) * ip_list->num);
1207 talloc_free(ip_list);
1213 static struct public_ip_list *
1214 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
1217 struct public_ip_list *ip_list;
1218 struct ctdb_public_ip_list *public_ips;
1219 struct trbt_tree *ip_tree;
1221 ip_tree = trbt_create(ipalloc_state, 0);
1223 if (ipalloc_state->known_public_ips == NULL) {
1224 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1228 for (i=0; i < ipalloc_state->num; i++) {
1230 public_ips = &ipalloc_state->known_public_ips[i];
1232 for (j=0; j < public_ips->num; j++) {
1233 struct public_ip_list *tmp_ip;
1235 /* This is returned as part of ip_list */
1236 tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
1237 if (tmp_ip == NULL) {
1239 (__location__ " out of memory\n"));
1240 talloc_free(ip_tree);
1244 /* Do not use information about IP addresses hosted
1245 * on other nodes, it may not be accurate */
1246 if (public_ips->ip[j].pnn == i) {
1247 tmp_ip->pnn = public_ips->ip[j].pnn;
1251 tmp_ip->addr = public_ips->ip[j].addr;
1252 tmp_ip->next = NULL;
1254 trbt_insertarray32_callback(ip_tree,
1255 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1262 trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1263 talloc_free(ip_tree);
1268 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1272 for (i=0;i<nodemap->num;i++) {
1273 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1274 /* Found one completely healthy node */
1282 struct get_tunable_callback_data {
1283 const char *tunable;
1288 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1289 int32_t res, TDB_DATA outdata,
1292 struct get_tunable_callback_data *cd =
1293 (struct get_tunable_callback_data *)callback;
1297 /* Already handled in fail callback */
1301 if (outdata.dsize != sizeof(uint32_t)) {
1302 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1303 cd->tunable, pnn, (int)sizeof(uint32_t),
1304 (int)outdata.dsize));
1309 size = talloc_array_length(cd->out);
1311 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1312 cd->tunable, pnn, size));
1317 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1320 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1321 int32_t res, TDB_DATA outdata,
1324 struct get_tunable_callback_data *cd =
1325 (struct get_tunable_callback_data *)callback;
1330 ("Timed out getting tunable \"%s\" from node %d\n",
1336 DEBUG(DEBUG_WARNING,
1337 ("Tunable \"%s\" not implemented on node %d\n",
1342 ("Unexpected error getting tunable \"%s\" from node %d\n",
1348 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1349 TALLOC_CTX *tmp_ctx,
1350 struct ctdb_node_map_old *nodemap,
1351 const char *tunable,
1352 uint32_t default_value)
1355 struct ctdb_control_get_tunable *t;
1358 struct get_tunable_callback_data callback_data;
1361 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1362 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1363 for (i=0; i<nodemap->num; i++) {
1364 tvals[i] = default_value;
1367 callback_data.out = tvals;
1368 callback_data.tunable = tunable;
1369 callback_data.fatal = false;
1371 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1372 data.dptr = talloc_size(tmp_ctx, data.dsize);
1373 t = (struct ctdb_control_get_tunable *)data.dptr;
1374 t->length = strlen(tunable)+1;
1375 memcpy(t->name, tunable, t->length);
1376 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1377 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1378 nodes, 0, TAKEOVER_TIMEOUT(),
1380 get_tunable_callback,
1381 get_tunable_fail_callback,
1382 &callback_data) != 0) {
1383 if (callback_data.fatal) {
1389 talloc_free(data.dptr);
1394 /* Set internal flags for IP allocation:
1396 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1397 * Set NOIPHOST ip flag for each INACTIVE node
1398 * if all nodes are disabled:
1399 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1401 * Set NOIPHOST ip flags for disabled nodes
1403 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1404 struct ctdb_node_map_old *nodemap,
1405 uint32_t *tval_noiptakeover,
1406 uint32_t *tval_noiphostonalldisabled)
1410 for (i=0;i<nodemap->num;i++) {
1411 /* Can not take IPs on node with NoIPTakeover set */
1412 if (tval_noiptakeover[i] != 0) {
1413 ipalloc_state->noiptakeover[i] = true;
1416 /* Can not host IPs on INACTIVE node */
1417 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1418 ipalloc_state->noiphost[i] = true;
1422 if (all_nodes_are_disabled(nodemap)) {
1423 /* If all nodes are disabled, can not host IPs on node
1424 * with NoIPHostOnAllDisabled set
1426 for (i=0;i<nodemap->num;i++) {
1427 if (tval_noiphostonalldisabled[i] != 0) {
1428 ipalloc_state->noiphost[i] = true;
1432 /* If some nodes are not disabled, then can not host
1433 * IPs on DISABLED node
1435 for (i=0;i<nodemap->num;i++) {
1436 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1437 ipalloc_state->noiphost[i] = true;
1443 static bool set_ipflags(struct ctdb_context *ctdb,
1444 struct ipalloc_state *ipalloc_state,
1445 struct ctdb_node_map_old *nodemap)
1447 uint32_t *tval_noiptakeover;
1448 uint32_t *tval_noiphostonalldisabled;
1450 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1452 if (tval_noiptakeover == NULL) {
1456 tval_noiphostonalldisabled =
1457 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1458 "NoIPHostOnAllDisabled", 0);
1459 if (tval_noiphostonalldisabled == NULL) {
1460 /* Caller frees tmp_ctx */
1464 set_ipflags_internal(ipalloc_state, nodemap,
1466 tval_noiphostonalldisabled);
1468 talloc_free(tval_noiptakeover);
1469 talloc_free(tval_noiphostonalldisabled);
1474 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1475 TALLOC_CTX *mem_ctx)
1477 struct ipalloc_state *ipalloc_state =
1478 talloc_zero(mem_ctx, struct ipalloc_state);
1479 if (ipalloc_state == NULL) {
1480 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1484 ipalloc_state->num = ctdb->num_nodes;
1486 ipalloc_state->noiptakeover =
1487 talloc_zero_array(ipalloc_state,
1489 ipalloc_state->num);
1490 if (ipalloc_state->noiptakeover == NULL) {
1491 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1494 ipalloc_state->noiphost =
1495 talloc_zero_array(ipalloc_state,
1497 ipalloc_state->num);
1498 if (ipalloc_state->noiphost == NULL) {
1499 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1503 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1504 ipalloc_state->algorithm = IPALLOC_LCP2;
1505 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1506 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1508 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1511 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1513 return ipalloc_state;
1515 talloc_free(ipalloc_state);
1519 struct takeover_callback_data {
1521 unsigned int *fail_count;
1524 static struct takeover_callback_data *
1525 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1528 static struct takeover_callback_data *takeover_data;
1530 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1531 if (takeover_data == NULL) {
1532 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1536 takeover_data->fail_count = talloc_zero_array(takeover_data,
1537 unsigned int, num_nodes);
1538 if (takeover_data->fail_count == NULL) {
1539 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1540 talloc_free(takeover_data);
1544 takeover_data->num_nodes = num_nodes;
1546 return takeover_data;
1549 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1550 uint32_t node_pnn, int32_t res,
1551 TDB_DATA outdata, void *callback_data)
1553 struct takeover_callback_data *cd =
1554 talloc_get_type_abort(callback_data,
1555 struct takeover_callback_data);
1557 if (node_pnn >= cd->num_nodes) {
1558 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1562 if (cd->fail_count[node_pnn] == 0) {
1564 ("Node %u failed the takeover run\n", node_pnn));
1567 cd->fail_count[node_pnn]++;
1570 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1571 struct takeover_callback_data *tcd)
1573 unsigned int max_fails = 0;
1574 uint32_t max_pnn = -1;
1577 for (i = 0; i < tcd->num_nodes; i++) {
1578 if (tcd->fail_count[i] > max_fails) {
1580 max_fails = tcd->fail_count[i];
1584 if (max_fails > 0) {
1589 ("Sending banning credits to %u with fail count %u\n",
1590 max_pnn, max_fails));
1592 data.dptr = (uint8_t *)&max_pnn;
1593 data.dsize = sizeof(uint32_t);
1594 ret = ctdb_client_send_message(ctdb,
1595 CTDB_BROADCAST_CONNECTED,
1600 ("Failed to set banning credits for node %u\n",
1607 * Recalculate the allocation of public IPs to nodes and have the
1608 * nodes host their allocated addresses.
1610 * - Allocate memory for IP allocation state, including per node
1612 * - Populate IP allocation algorithm in IP allocation state
1613 * - Populate local value of tunable NoIPFailback in IP allocation
1614 state - this is really a cluster-wide configuration variable and
1615 only the value form the master node is used
1616 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1617 * connected nodes - this is done separately so tunable values can
1618 * be faked in unit testing
1619 * - Populate NoIPTakover tunable in IP allocation state
1620 * - Populate NoIPHost in IP allocation state, derived from node flags
1621 * and NoIPHostOnAllDisabled tunable
1622 * - Retrieve known and available IP addresses (done separately so
1623 * values can be faked in unit testing)
1624 * - Use ipalloc_set_public_ips() to set known and available IP
1625 addresses for allocation
1626 * - If no available IP addresses then early exit
1627 * - Build list of (known IPs, currently assigned node)
1628 * - Populate list of nodes to force rebalance - internal structure,
1629 * currently no way to fetch, only used by LCP2 for nodes that have
1630 * had new IP addresses added
1631 * - Run IP allocation algorithm
1632 * - Send RELEASE_IP to all nodes for IPs they should not host
1633 * - Send TAKE_IP to all nodes for IPs they should host
1634 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1636 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1637 uint32_t *force_rebalance_nodes)
1640 struct ctdb_public_ip ip;
1642 struct public_ip_list *all_ips, *tmp_ip;
1644 struct timeval timeout;
1645 struct client_async_data *async_data;
1646 struct ctdb_client_control_state *state;
1647 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1648 struct ipalloc_state *ipalloc_state;
1649 struct ctdb_public_ip_list *known_ips, *available_ips;
1650 struct takeover_callback_data *takeover_data;
1653 /* Initialise fail callback data to be used with
1654 * takeover_run_fail_callback(). A failure in any of the
1655 * following steps will cause an early return, so this can be
1656 * reused for each of those steps without re-initialising. */
1657 takeover_data = takeover_callback_data_init(tmp_ctx,
1659 if (takeover_data == NULL) {
1660 talloc_free(tmp_ctx);
1665 * ip failover is completely disabled, just send out the
1666 * ipreallocated event.
1668 if (ctdb->tunable.disable_ip_failover != 0) {
1672 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1673 if (ipalloc_state == NULL) {
1674 talloc_free(tmp_ctx);
1678 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1679 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1680 talloc_free(tmp_ctx);
1684 /* Fetch known/available public IPs from each active node */
1685 /* Fetch lists of known public IPs from all nodes */
1686 known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1688 if (known_ips == NULL) {
1689 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1690 talloc_free(tmp_ctx);
1693 available_ips = ctdb_fetch_remote_public_ips(
1694 ctdb, ipalloc_state, nodemap,
1695 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1696 if (available_ips == NULL) {
1697 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1698 talloc_free(tmp_ctx);
1702 if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1703 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1704 talloc_free(tmp_ctx);
1708 /* Short-circuit IP allocation if no node has available IPs */
1709 can_host_ips = false;
1710 for (i=0; i < ipalloc_state->num; i++) {
1711 if (ipalloc_state->available_public_ips[i].num != 0) {
1712 can_host_ips = true;
1715 if (!can_host_ips) {
1716 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1720 /* since nodes only know about those public addresses that
1721 can be served by that particular node, no single node has
1722 a full list of all public addresses that exist in the cluster.
1723 Walk over all node structures and create a merged list of
1724 all public addresses that exist in the cluster.
1726 all_ips = create_merged_ip_list(ipalloc_state);
1727 if (all_ips == NULL) {
1728 talloc_free(tmp_ctx);
1731 ipalloc_state->all_ips = all_ips;
1733 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1735 /* Do the IP reassignment calculations */
1736 ipalloc(ipalloc_state);
1738 /* Now tell all nodes to release any public IPs should not
1739 * host. This will be a NOOP on nodes that don't currently
1740 * hold the given IP.
1742 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1743 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1745 async_data->fail_callback = takeover_run_fail_callback;
1746 async_data->callback_data = takeover_data;
1748 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1750 /* Send a RELEASE_IP to all nodes that should not be hosting
1751 * each IP. For each IP, all but one of these will be
1752 * redundant. However, the redundant ones are used to tell
1753 * nodes which node should be hosting the IP so that commands
1754 * like "ctdb ip" can display a particular nodes idea of who
1755 * is hosting what. */
1756 for (i=0;i<nodemap->num;i++) {
1757 /* don't talk to unconnected nodes, but do talk to banned nodes */
1758 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1762 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1763 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1764 /* This node should be serving this
1765 vnn so don't tell it to release the ip
1769 ip.pnn = tmp_ip->pnn;
1770 ip.addr = tmp_ip->addr;
1772 timeout = TAKEOVER_TIMEOUT();
1773 data.dsize = sizeof(ip);
1774 data.dptr = (uint8_t *)&ip;
1775 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1776 0, CTDB_CONTROL_RELEASE_IP, 0,
1779 if (state == NULL) {
1780 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1781 talloc_free(tmp_ctx);
1785 ctdb_client_async_add(async_data, state);
1788 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1790 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1793 talloc_free(async_data);
1796 /* For each IP, send a TAKOVER_IP to the node that should be
1797 * hosting it. Many of these will often be redundant (since
1798 * the allocation won't have changed) but they can be useful
1799 * to recover from inconsistencies. */
1800 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1801 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1803 async_data->fail_callback = takeover_run_fail_callback;
1804 async_data->callback_data = takeover_data;
1806 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1807 if (tmp_ip->pnn == -1) {
1808 /* this IP won't be taken over */
1812 ip.pnn = tmp_ip->pnn;
1813 ip.addr = tmp_ip->addr;
1815 timeout = TAKEOVER_TIMEOUT();
1816 data.dsize = sizeof(ip);
1817 data.dptr = (uint8_t *)&ip;
1818 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1819 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1820 data, async_data, &timeout, NULL);
1821 if (state == NULL) {
1822 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1823 talloc_free(tmp_ctx);
1827 ctdb_client_async_add(async_data, state);
1829 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1831 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1837 * Tell all nodes to run eventscripts to process the
1838 * "ipreallocated" event. This can do a lot of things,
1839 * including restarting services to reconfigure them if public
1840 * IPs have moved. Once upon a time this event only used to
1843 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1844 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1845 nodes, 0, TAKEOVER_TIMEOUT(),
1847 NULL, takeover_run_fail_callback,
1851 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1855 talloc_free(tmp_ctx);
1859 takeover_run_process_failures(ctdb, takeover_data);
1860 talloc_free(tmp_ctx);
1866 destroy a ctdb_client_ip structure
1868 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1870 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1871 ctdb_addr_to_str(&ip->addr),
1872 ntohs(ip->addr.ip.sin_port),
1875 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1880 called by a client to inform us of a TCP connection that it is managing
1881 that should tickled with an ACK when IP takeover is done
1883 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1886 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1887 struct ctdb_connection *tcp_sock = NULL;
1888 struct ctdb_tcp_list *tcp;
1889 struct ctdb_connection t;
1892 struct ctdb_client_ip *ip;
1893 struct ctdb_vnn *vnn;
1894 ctdb_sock_addr addr;
1896 /* If we don't have public IPs, tickles are useless */
1897 if (ctdb->vnn == NULL) {
1901 tcp_sock = (struct ctdb_connection *)indata.dptr;
1903 addr = tcp_sock->src;
1904 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1905 addr = tcp_sock->dst;
1906 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1909 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1910 vnn = find_public_ip_vnn(ctdb, &addr);
1912 switch (addr.sa.sa_family) {
1914 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1915 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1916 ctdb_addr_to_str(&addr)));
1920 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1921 ctdb_addr_to_str(&addr)));
1924 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1930 if (vnn->pnn != ctdb->pnn) {
1931 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1932 ctdb_addr_to_str(&addr),
1933 client_id, client->pid));
1934 /* failing this call will tell smbd to die */
1938 ip = talloc(client, struct ctdb_client_ip);
1939 CTDB_NO_MEMORY(ctdb, ip);
1943 ip->client_id = client_id;
1944 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1945 DLIST_ADD(ctdb->client_ip_list, ip);
1947 tcp = talloc(client, struct ctdb_tcp_list);
1948 CTDB_NO_MEMORY(ctdb, tcp);
1950 tcp->connection.src = tcp_sock->src;
1951 tcp->connection.dst = tcp_sock->dst;
1953 DLIST_ADD(client->tcp_list, tcp);
1955 t.src = tcp_sock->src;
1956 t.dst = tcp_sock->dst;
1958 data.dptr = (uint8_t *)&t;
1959 data.dsize = sizeof(t);
1961 switch (addr.sa.sa_family) {
1963 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1964 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1965 ctdb_addr_to_str(&tcp_sock->src),
1966 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1969 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1970 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1971 ctdb_addr_to_str(&tcp_sock->src),
1972 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1975 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1979 /* tell all nodes about this tcp connection */
1980 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1981 CTDB_CONTROL_TCP_ADD,
1982 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1984 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1992 find a tcp address on a list
1994 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1995 struct ctdb_connection *tcp)
1999 if (array == NULL) {
2003 for (i=0;i<array->num;i++) {
2004 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2005 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2006 return &array->connections[i];
2015 called by a daemon to inform us of a TCP connection that one of its
2016 clients managing that should tickled with an ACK when IP takeover is
2019 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2021 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2022 struct ctdb_tcp_array *tcparray;
2023 struct ctdb_connection tcp;
2024 struct ctdb_vnn *vnn;
2026 /* If we don't have public IPs, tickles are useless */
2027 if (ctdb->vnn == NULL) {
2031 vnn = find_public_ip_vnn(ctdb, &p->dst);
2033 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2034 ctdb_addr_to_str(&p->dst)));
2040 tcparray = vnn->tcp_array;
2042 /* If this is the first tickle */
2043 if (tcparray == NULL) {
2044 tcparray = talloc(vnn, struct ctdb_tcp_array);
2045 CTDB_NO_MEMORY(ctdb, tcparray);
2046 vnn->tcp_array = tcparray;
2049 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2050 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2052 tcparray->connections[tcparray->num].src = p->src;
2053 tcparray->connections[tcparray->num].dst = p->dst;
2056 if (tcp_update_needed) {
2057 vnn->tcp_update_needed = true;
2063 /* Do we already have this tickle ?*/
2066 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2067 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2068 ctdb_addr_to_str(&tcp.dst),
2069 ntohs(tcp.dst.ip.sin_port),
2074 /* A new tickle, we must add it to the array */
2075 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2076 struct ctdb_connection,
2078 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2080 tcparray->connections[tcparray->num].src = p->src;
2081 tcparray->connections[tcparray->num].dst = p->dst;
2084 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2085 ctdb_addr_to_str(&tcp.dst),
2086 ntohs(tcp.dst.ip.sin_port),
2089 if (tcp_update_needed) {
2090 vnn->tcp_update_needed = true;
2097 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2099 struct ctdb_connection *tcpp;
2105 /* if the array is empty we cant remove it
2106 and we don't need to do anything
2108 if (vnn->tcp_array == NULL) {
2109 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2110 ctdb_addr_to_str(&conn->dst),
2111 ntohs(conn->dst.ip.sin_port)));
2116 /* See if we know this connection
2117 if we don't know this connection then we dont need to do anything
2119 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2121 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2122 ctdb_addr_to_str(&conn->dst),
2123 ntohs(conn->dst.ip.sin_port)));
2128 /* We need to remove this entry from the array.
2129 Instead of allocating a new array and copying data to it
2130 we cheat and just copy the last entry in the existing array
2131 to the entry that is to be removed and just shring the
2134 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2135 vnn->tcp_array->num--;
2137 /* If we deleted the last entry we also need to remove the entire array
2139 if (vnn->tcp_array->num == 0) {
2140 talloc_free(vnn->tcp_array);
2141 vnn->tcp_array = NULL;
2144 vnn->tcp_update_needed = true;
2146 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2147 ctdb_addr_to_str(&conn->src),
2148 ntohs(conn->src.ip.sin_port)));
2153 called by a daemon to inform us of a TCP connection that one of its
2154 clients used are no longer needed in the tickle database
2156 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2158 struct ctdb_vnn *vnn;
2159 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2161 /* If we don't have public IPs, tickles are useless */
2162 if (ctdb->vnn == NULL) {
2166 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2169 (__location__ " unable to find public address %s\n",
2170 ctdb_addr_to_str(&conn->dst)));
2174 ctdb_remove_connection(vnn, conn);
2181 Called when another daemon starts - causes all tickles for all
2182 public addresses we are serving to be sent to the new node on the
2183 next check. This actually causes the next scheduled call to
2184 tdb_update_tcp_tickles() to update all nodes. This is simple and
2185 doesn't require careful error handling.
2187 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2189 struct ctdb_vnn *vnn;
2191 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2192 (unsigned long) pnn));
2194 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2195 vnn->tcp_update_needed = true;
2203 called when a client structure goes away - hook to remove
2204 elements from the tcp_list in all daemons
2206 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2208 while (client->tcp_list) {
2209 struct ctdb_vnn *vnn;
2210 struct ctdb_tcp_list *tcp = client->tcp_list;
2211 struct ctdb_connection *conn = &tcp->connection;
2213 DLIST_REMOVE(client->tcp_list, tcp);
2215 vnn = find_public_ip_vnn(client->ctdb,
2219 (__location__ " unable to find public address %s\n",
2220 ctdb_addr_to_str(&conn->dst)));
2224 /* If the IP address is hosted on this node then
2225 * remove the connection. */
2226 if (vnn->pnn == client->ctdb->pnn) {
2227 ctdb_remove_connection(vnn, conn);
2230 /* Otherwise this function has been called because the
2231 * server IP address has been released to another node
2232 * and the client has exited. This means that we
2233 * should not delete the connection information. The
2234 * takeover node processes connections too. */
2239 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2241 struct ctdb_vnn *vnn;
2245 if (ctdb->tunable.disable_ip_failover == 1) {
2249 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2250 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2251 ctdb_vnn_unassign_iface(ctdb, vnn);
2258 /* Don't allow multiple releases at once. Some code,
2259 * particularly ctdb_tickle_sentenced_connections() is
2261 if (vnn->update_in_flight) {
2262 DEBUG(DEBUG_WARNING,
2264 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2265 ctdb_addr_to_str(&vnn->public_address),
2266 vnn->public_netmask_bits,
2267 ctdb_vnn_iface_string(vnn)));
2270 vnn->update_in_flight = true;
2272 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2273 ctdb_addr_to_str(&vnn->public_address),
2274 vnn->public_netmask_bits,
2275 ctdb_vnn_iface_string(vnn)));
2277 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2278 ctdb_vnn_iface_string(vnn),
2279 ctdb_addr_to_str(&vnn->public_address),
2280 vnn->public_netmask_bits);
2282 data.dptr = (uint8_t *)talloc_strdup(
2283 vnn, ctdb_addr_to_str(&vnn->public_address));
2284 if (data.dptr != NULL) {
2285 data.dsize = strlen((char *)data.dptr) + 1;
2286 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2287 CTDB_SRVID_RELEASE_IP, data);
2288 talloc_free(data.dptr);
2291 ctdb_vnn_unassign_iface(ctdb, vnn);
2292 vnn->update_in_flight = false;
2296 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2301 get list of public IPs
2303 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2304 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2307 struct ctdb_public_ip_list_old *ips;
2308 struct ctdb_vnn *vnn;
2309 bool only_available = false;
2311 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2312 only_available = true;
2315 /* count how many public ip structures we have */
2317 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2321 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2322 num*sizeof(struct ctdb_public_ip);
2323 ips = talloc_zero_size(outdata, len);
2324 CTDB_NO_MEMORY(ctdb, ips);
2327 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2328 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2331 ips->ips[i].pnn = vnn->pnn;
2332 ips->ips[i].addr = vnn->public_address;
2336 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2337 i*sizeof(struct ctdb_public_ip);
2339 outdata->dsize = len;
2340 outdata->dptr = (uint8_t *)ips;
2346 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2347 struct ctdb_req_control_old *c,
2352 ctdb_sock_addr *addr;
2353 struct ctdb_public_ip_info_old *info;
2354 struct ctdb_vnn *vnn;
2356 addr = (ctdb_sock_addr *)indata.dptr;
2358 vnn = find_public_ip_vnn(ctdb, addr);
2360 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2361 "'%s'not a public address\n",
2362 ctdb_addr_to_str(addr)));
2366 /* count how many public ip structures we have */
2368 for (;vnn->ifaces[num];) {
2372 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2373 num*sizeof(struct ctdb_iface);
2374 info = talloc_zero_size(outdata, len);
2375 CTDB_NO_MEMORY(ctdb, info);
2377 info->ip.addr = vnn->public_address;
2378 info->ip.pnn = vnn->pnn;
2379 info->active_idx = 0xFFFFFFFF;
2381 for (i=0; vnn->ifaces[i]; i++) {
2382 struct ctdb_interface *cur;
2384 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2386 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2390 if (vnn->iface == cur) {
2391 info->active_idx = i;
2393 strncpy(info->ifaces[i].name, cur->name,
2394 sizeof(info->ifaces[i].name));
2395 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2396 info->ifaces[i].link_state = cur->link_up;
2397 info->ifaces[i].references = cur->references;
2400 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2401 i*sizeof(struct ctdb_iface);
2403 outdata->dsize = len;
2404 outdata->dptr = (uint8_t *)info;
2409 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2410 struct ctdb_req_control_old *c,
2414 struct ctdb_iface_list_old *ifaces;
2415 struct ctdb_interface *cur;
2417 /* count how many public ip structures we have */
2419 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2423 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2424 num*sizeof(struct ctdb_iface);
2425 ifaces = talloc_zero_size(outdata, len);
2426 CTDB_NO_MEMORY(ctdb, ifaces);
2429 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2430 strncpy(ifaces->ifaces[i].name, cur->name,
2431 sizeof(ifaces->ifaces[i].name));
2432 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2433 ifaces->ifaces[i].link_state = cur->link_up;
2434 ifaces->ifaces[i].references = cur->references;
2438 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2439 i*sizeof(struct ctdb_iface);
2441 outdata->dsize = len;
2442 outdata->dptr = (uint8_t *)ifaces;
2447 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2448 struct ctdb_req_control_old *c,
2451 struct ctdb_iface *info;
2452 struct ctdb_interface *iface;
2453 bool link_up = false;
2455 info = (struct ctdb_iface *)indata.dptr;
2457 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2458 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2459 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2460 len, len, info->name));
2464 switch (info->link_state) {
2472 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2473 (unsigned int)info->link_state));
2477 if (info->references != 0) {
2478 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2479 (unsigned int)info->references));
2483 iface = ctdb_find_iface(ctdb, info->name);
2484 if (iface == NULL) {
2488 if (link_up == iface->link_up) {
2492 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2493 ("iface[%s] has changed it's link status %s => %s\n",
2495 iface->link_up?"up":"down",
2496 link_up?"up":"down"));
2498 iface->link_up = link_up;
2504 called by a daemon to inform us of the entire list of TCP tickles for
2505 a particular public address.
2506 this control should only be sent by the node that is currently serving
2507 that public address.
2509 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2511 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2512 struct ctdb_tcp_array *tcparray;
2513 struct ctdb_vnn *vnn;
2515 /* We must at least have tickles.num or else we cant verify the size
2516 of the received data blob
2518 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2519 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2523 /* verify that the size of data matches what we expect */
2524 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2525 + sizeof(struct ctdb_connection) * list->num) {
2526 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2530 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2531 ctdb_addr_to_str(&list->addr)));
2533 vnn = find_public_ip_vnn(ctdb, &list->addr);
2535 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2536 ctdb_addr_to_str(&list->addr)));
2541 if (vnn->pnn == ctdb->pnn) {
2543 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2544 ctdb_addr_to_str(&list->addr)));
2548 /* remove any old ticklelist we might have */
2549 talloc_free(vnn->tcp_array);
2550 vnn->tcp_array = NULL;
2552 tcparray = talloc(vnn, struct ctdb_tcp_array);
2553 CTDB_NO_MEMORY(ctdb, tcparray);
2555 tcparray->num = list->num;
2557 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2558 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2560 memcpy(tcparray->connections, &list->connections[0],
2561 sizeof(struct ctdb_connection)*tcparray->num);
2563 /* We now have a new fresh tickle list array for this vnn */
2564 vnn->tcp_array = tcparray;
2570 called to return the full list of tickles for the puclic address associated
2571 with the provided vnn
2573 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2575 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2576 struct ctdb_tickle_list_old *list;
2577 struct ctdb_tcp_array *tcparray;
2579 struct ctdb_vnn *vnn;
2582 vnn = find_public_ip_vnn(ctdb, addr);
2584 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2585 ctdb_addr_to_str(addr)));
2590 port = ctdb_addr_to_port(addr);
2592 tcparray = vnn->tcp_array;
2594 if (tcparray != NULL) {
2596 /* All connections */
2597 num = tcparray->num;
2599 /* Count connections for port */
2600 for (i = 0; i < tcparray->num; i++) {
2601 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2608 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2609 + sizeof(struct ctdb_connection) * num;
2611 outdata->dptr = talloc_size(outdata, outdata->dsize);
2612 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2613 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2623 for (i = 0; i < tcparray->num; i++) {
2625 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2626 list->connections[num] = tcparray->connections[i];
2636 set the list of all tcp tickles for a public address
2638 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2639 ctdb_sock_addr *addr,
2640 struct ctdb_tcp_array *tcparray)
2644 struct ctdb_tickle_list_old *list;
2647 num = tcparray->num;
2652 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2653 sizeof(struct ctdb_connection) * num;
2654 data.dptr = talloc_size(ctdb, data.dsize);
2655 CTDB_NO_MEMORY(ctdb, data.dptr);
2657 list = (struct ctdb_tickle_list_old *)data.dptr;
2661 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2664 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2665 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2666 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2668 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2672 talloc_free(data.dptr);
2679 perform tickle updates if required
2681 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2682 struct tevent_timer *te,
2683 struct timeval t, void *private_data)
2685 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2687 struct ctdb_vnn *vnn;
2689 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2690 /* we only send out updates for public addresses that
2693 if (ctdb->pnn != vnn->pnn) {
2696 /* We only send out the updates if we need to */
2697 if (!vnn->tcp_update_needed) {
2700 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2701 &vnn->public_address,
2704 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2705 ctdb_addr_to_str(&vnn->public_address)));
2708 ("Sent tickle update for public address %s\n",
2709 ctdb_addr_to_str(&vnn->public_address)));
2710 vnn->tcp_update_needed = false;
2714 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2715 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2716 ctdb_update_tcp_tickles, ctdb);
2720 start periodic update of tcp tickles
2722 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2724 ctdb->tickle_update_context = talloc_new(ctdb);
2726 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2727 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2728 ctdb_update_tcp_tickles, ctdb);
2734 struct control_gratious_arp {
2735 struct ctdb_context *ctdb;
2736 ctdb_sock_addr addr;
2742 send a control_gratuitous arp
2744 static void send_gratious_arp(struct tevent_context *ev,
2745 struct tevent_timer *te,
2746 struct timeval t, void *private_data)
2749 struct control_gratious_arp *arp = talloc_get_type(private_data,
2750 struct control_gratious_arp);
2752 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2754 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2755 arp->iface, strerror(errno)));
2760 if (arp->count == CTDB_ARP_REPEAT) {
2765 tevent_add_timer(arp->ctdb->ev, arp,
2766 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2767 send_gratious_arp, arp);
2774 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2776 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2777 struct control_gratious_arp *arp;
2779 /* verify the size of indata */
2780 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2781 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2782 (unsigned)indata.dsize,
2783 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2787 ( offsetof(struct ctdb_addr_info_old, iface)
2788 + gratious_arp->len ) ){
2790 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2791 "but should be %u bytes\n",
2792 (unsigned)indata.dsize,
2793 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2798 arp = talloc(ctdb, struct control_gratious_arp);
2799 CTDB_NO_MEMORY(ctdb, arp);
2802 arp->addr = gratious_arp->addr;
2803 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2804 CTDB_NO_MEMORY(ctdb, arp->iface);
2807 tevent_add_timer(arp->ctdb->ev, arp,
2808 timeval_zero(), send_gratious_arp, arp);
2813 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2815 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2818 /* verify the size of indata */
2819 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2820 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2824 ( offsetof(struct ctdb_addr_info_old, iface)
2827 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2828 "but should be %u bytes\n",
2829 (unsigned)indata.dsize,
2830 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2834 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2836 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2839 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2846 struct delete_ip_callback_state {
2847 struct ctdb_req_control_old *c;
2851 called when releaseip event finishes for del_public_address
2853 static void delete_ip_callback(struct ctdb_context *ctdb,
2854 int32_t status, TDB_DATA data,
2855 const char *errormsg,
2858 struct delete_ip_callback_state *state =
2859 talloc_get_type(private_data, struct delete_ip_callback_state);
2861 /* If release failed then fail. */
2862 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2863 talloc_free(private_data);
2866 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2867 struct ctdb_req_control_old *c,
2868 TDB_DATA indata, bool *async_reply)
2870 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2871 struct ctdb_vnn *vnn;
2873 /* verify the size of indata */
2874 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2875 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2879 ( offsetof(struct ctdb_addr_info_old, iface)
2882 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2883 "but should be %u bytes\n",
2884 (unsigned)indata.dsize,
2885 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2889 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2891 /* walk over all public addresses until we find a match */
2892 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2893 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2894 if (vnn->pnn == ctdb->pnn) {
2895 struct delete_ip_callback_state *state;
2896 struct ctdb_public_ip *ip;
2900 vnn->delete_pending = true;
2902 state = talloc(ctdb,
2903 struct delete_ip_callback_state);
2904 CTDB_NO_MEMORY(ctdb, state);
2907 ip = talloc(state, struct ctdb_public_ip);
2910 (__location__ " Out of memory\n"));
2915 ip->addr = pub->addr;
2917 data.dsize = sizeof(struct ctdb_public_ip);
2918 data.dptr = (unsigned char *)ip;
2920 ret = ctdb_daemon_send_control(ctdb,
2923 CTDB_CONTROL_RELEASE_IP,
2930 (__location__ "Unable to send "
2931 "CTDB_CONTROL_RELEASE_IP\n"));
2936 state->c = talloc_steal(state, c);
2937 *async_reply = true;
2939 /* This IP is not hosted on the
2940 * current node so just delete it
2942 do_delete_ip(ctdb, vnn);
2949 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2950 ctdb_addr_to_str(&pub->addr)));
2955 struct ipreallocated_callback_state {
2956 struct ctdb_req_control_old *c;
2959 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2960 int status, void *p)
2962 struct ipreallocated_callback_state *state =
2963 talloc_get_type(p, struct ipreallocated_callback_state);
2967 (" \"ipreallocated\" event script failed (status %d)\n",
2969 if (status == -ETIME) {
2970 ctdb_ban_self(ctdb);
2974 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2978 /* A control to run the ipreallocated event */
2979 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2980 struct ctdb_req_control_old *c,
2984 struct ipreallocated_callback_state *state;
2986 state = talloc(ctdb, struct ipreallocated_callback_state);
2987 CTDB_NO_MEMORY(ctdb, state);
2989 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2991 ret = ctdb_event_script_callback(ctdb, state,
2992 ctdb_ipreallocated_callback, state,
2993 CTDB_EVENT_IPREALLOCATED,
2997 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3002 /* tell the control that we will be reply asynchronously */
3003 state->c = talloc_steal(state, c);
3004 *async_reply = true;
3010 struct ctdb_reloadips_handle {
3011 struct ctdb_context *ctdb;
3012 struct ctdb_req_control_old *c;
3016 struct tevent_fd *fde;
3019 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3021 if (h == h->ctdb->reload_ips) {
3022 h->ctdb->reload_ips = NULL;
3025 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3028 ctdb_kill(h->ctdb, h->child, SIGKILL);
3032 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3033 struct tevent_timer *te,
3034 struct timeval t, void *private_data)
3036 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3041 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3042 struct tevent_fd *fde,
3043 uint16_t flags, void *private_data)
3045 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3050 ret = sys_read(h->fd[0], &res, 1);
3051 if (ret < 1 || res != 0) {
3052 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3060 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3062 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3063 struct ctdb_public_ip_list_old *ips;
3064 struct ctdb_vnn *vnn;
3065 struct client_async_data *async_data;
3066 struct timeval timeout;
3068 struct ctdb_client_control_state *state;
3072 CTDB_NO_MEMORY(ctdb, mem_ctx);
3074 /* Read IPs from local node */
3075 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3076 CTDB_CURRENT_NODE, mem_ctx, &ips);
3079 ("Unable to fetch public IPs from local node\n"));
3080 talloc_free(mem_ctx);
3084 /* Read IPs file - this is safe since this is a child process */
3086 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3087 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3088 talloc_free(mem_ctx);
3092 async_data = talloc_zero(mem_ctx, struct client_async_data);
3093 CTDB_NO_MEMORY(ctdb, async_data);
3095 /* Compare IPs between node and file for IPs to be deleted */
3096 for (i = 0; i < ips->num; i++) {
3098 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3099 if (ctdb_same_ip(&vnn->public_address,
3100 &ips->ips[i].addr)) {
3101 /* IP is still in file */
3107 /* Delete IP ips->ips[i] */
3108 struct ctdb_addr_info_old *pub;
3111 ("IP %s no longer configured, deleting it\n",
3112 ctdb_addr_to_str(&ips->ips[i].addr)));
3114 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3115 CTDB_NO_MEMORY(ctdb, pub);
3117 pub->addr = ips->ips[i].addr;
3121 timeout = TAKEOVER_TIMEOUT();
3123 data.dsize = offsetof(struct ctdb_addr_info_old,
3125 data.dptr = (uint8_t *)pub;
3127 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3128 CTDB_CONTROL_DEL_PUBLIC_IP,
3129 0, data, async_data,
3131 if (state == NULL) {
3134 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3138 ctdb_client_async_add(async_data, state);
3142 /* Compare IPs between node and file for IPs to be added */
3144 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3145 for (i = 0; i < ips->num; i++) {
3146 if (ctdb_same_ip(&vnn->public_address,
3147 &ips->ips[i].addr)) {
3148 /* IP already on node */
3152 if (i == ips->num) {
3153 /* Add IP ips->ips[i] */
3154 struct ctdb_addr_info_old *pub;
3155 const char *ifaces = NULL;
3160 ("New IP %s configured, adding it\n",
3161 ctdb_addr_to_str(&vnn->public_address)));
3163 uint32_t pnn = ctdb_get_pnn(ctdb);
3165 data.dsize = sizeof(pnn);
3166 data.dptr = (uint8_t *)&pnn;
3168 ret = ctdb_client_send_message(
3170 CTDB_BROADCAST_CONNECTED,
3171 CTDB_SRVID_REBALANCE_NODE,
3174 DEBUG(DEBUG_WARNING,
3175 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3181 ifaces = vnn->ifaces[0];
3183 while (vnn->ifaces[iface] != NULL) {
3184 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3185 vnn->ifaces[iface]);
3189 len = strlen(ifaces) + 1;
3190 pub = talloc_zero_size(mem_ctx,
3191 offsetof(struct ctdb_addr_info_old, iface) + len);
3192 CTDB_NO_MEMORY(ctdb, pub);
3194 pub->addr = vnn->public_address;
3195 pub->mask = vnn->public_netmask_bits;
3197 memcpy(&pub->iface[0], ifaces, pub->len);
3199 timeout = TAKEOVER_TIMEOUT();
3201 data.dsize = offsetof(struct ctdb_addr_info_old,
3203 data.dptr = (uint8_t *)pub;
3205 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3206 CTDB_CONTROL_ADD_PUBLIC_IP,
3207 0, data, async_data,
3209 if (state == NULL) {
3212 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3216 ctdb_client_async_add(async_data, state);
3220 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3221 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3225 talloc_free(mem_ctx);
3229 talloc_free(mem_ctx);
3233 /* This control is sent to force the node to re-read the public addresses file
3234 and drop any addresses we should nnot longer host, and add new addresses
3235 that we are now able to host
3237 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3239 struct ctdb_reloadips_handle *h;
3240 pid_t parent = getpid();
3242 if (ctdb->reload_ips != NULL) {
3243 talloc_free(ctdb->reload_ips);
3244 ctdb->reload_ips = NULL;
3247 h = talloc(ctdb, struct ctdb_reloadips_handle);
3248 CTDB_NO_MEMORY(ctdb, h);
3253 if (pipe(h->fd) == -1) {
3254 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3259 h->child = ctdb_fork(ctdb);
3260 if (h->child == (pid_t)-1) {
3261 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3269 if (h->child == 0) {
3270 signed char res = 0;
3273 debug_extra = talloc_asprintf(NULL, "reloadips:");
3275 prctl_set_comment("ctdb_reloadips");
3276 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3277 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3280 res = ctdb_reloadips_child(ctdb);
3282 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3286 sys_write(h->fd[1], &res, 1);
3287 ctdb_wait_for_process_to_exit(parent);
3291 h->c = talloc_steal(h, c);
3294 set_close_on_exec(h->fd[0]);
3296 talloc_set_destructor(h, ctdb_reloadips_destructor);
3299 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3300 ctdb_reloadips_child_handler, (void *)h);
3301 tevent_fd_set_auto_close(h->fde);
3303 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3304 ctdb_reloadips_timeout_event, h);
3306 /* we reply later */
3307 *async_reply = true;