4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn once) and then walking
118 * ctdb->ifaces once and deleting those not in the tree. Let's go to
119 * one of those if the naive implementation causes problems... :-)
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122 struct ctdb_vnn *vnn)
124 struct ctdb_interface *i, *next;
126 /* For each interface, check if there's an IP using it. */
127 for (i = ctdb->ifaces; i != NULL; i = next) {
132 /* Only consider interfaces named in the given VNN. */
133 if (!vnn_has_interface_with_name(vnn, i->name)) {
137 /* Search for a vnn with this interface. */
139 for (tv=ctdb->vnn; tv; tv=tv->next) {
140 if (vnn_has_interface_with_name(tv, i->name)) {
147 /* None of the VNNs are using this interface. */
148 DLIST_REMOVE(ctdb->ifaces, i);
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
158 struct ctdb_interface *i;
160 for (i=ctdb->ifaces;i;i=i->next) {
161 if (strcmp(i->name, iface) == 0) {
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170 struct ctdb_vnn *vnn)
173 struct ctdb_interface *cur = NULL;
174 struct ctdb_interface *best = NULL;
176 for (i=0; vnn->ifaces[i]; i++) {
178 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
192 if (cur->references < best->references) {
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202 struct ctdb_vnn *vnn)
204 struct ctdb_interface *best = NULL;
207 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208 "still assigned to iface '%s'\n",
209 ctdb_addr_to_str(&vnn->public_address),
210 ctdb_vnn_iface_string(vnn)));
214 best = ctdb_vnn_best_iface(ctdb, vnn);
216 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217 "cannot assign to iface any iface\n",
218 ctdb_addr_to_str(&vnn->public_address)));
224 vnn->pnn = ctdb->pnn;
226 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227 "now assigned to iface '%s' refs[%d]\n",
228 ctdb_addr_to_str(&vnn->public_address),
229 ctdb_vnn_iface_string(vnn),
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238 "now unassigned (old iface '%s' refs[%d])\n",
239 ctdb_addr_to_str(&vnn->public_address),
240 ctdb_vnn_iface_string(vnn),
241 vnn->iface?vnn->iface->references:0));
243 vnn->iface->references--;
246 if (vnn->pnn == ctdb->pnn) {
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252 struct ctdb_vnn *vnn)
256 /* Nodes that are not RUNNING can not host IPs */
257 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
261 if (vnn->delete_pending) {
265 if (vnn->iface && vnn->iface->link_up) {
269 for (i=0; vnn->ifaces[i]; i++) {
270 struct ctdb_interface *cur;
272 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
285 struct ctdb_takeover_arp {
286 struct ctdb_context *ctdb;
289 struct ctdb_tcp_array *tcparray;
290 struct ctdb_vnn *vnn;
295 lists of tcp endpoints
297 struct ctdb_tcp_list {
298 struct ctdb_tcp_list *prev, *next;
299 struct ctdb_connection connection;
303 list of clients to kill on IP release
305 struct ctdb_client_ip {
306 struct ctdb_client_ip *prev, *next;
307 struct ctdb_context *ctdb;
314 send a gratuitous arp
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317 struct tevent_timer *te,
318 struct timeval t, void *private_data)
320 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
321 struct ctdb_takeover_arp);
323 struct ctdb_tcp_array *tcparray;
324 const char *iface = ctdb_vnn_iface_string(arp->vnn);
326 ret = ctdb_sys_send_arp(&arp->addr, iface);
328 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329 iface, strerror(errno)));
332 tcparray = arp->tcparray;
334 for (i=0;i<tcparray->num;i++) {
335 struct ctdb_connection *tcon;
337 tcon = &tcparray->connections[i];
338 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339 (unsigned)ntohs(tcon->dst.ip.sin_port),
340 ctdb_addr_to_str(&tcon->src),
341 (unsigned)ntohs(tcon->src.ip.sin_port)));
342 ret = ctdb_sys_send_tcp(
347 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348 ctdb_addr_to_str(&tcon->src)));
355 if (arp->count == CTDB_ARP_REPEAT) {
360 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362 ctdb_control_send_arp, arp);
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366 struct ctdb_vnn *vnn)
368 struct ctdb_takeover_arp *arp;
369 struct ctdb_tcp_array *tcparray;
371 if (!vnn->takeover_ctx) {
372 vnn->takeover_ctx = talloc_new(vnn);
373 if (!vnn->takeover_ctx) {
378 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
384 arp->addr = vnn->public_address;
387 tcparray = vnn->tcp_array;
389 /* add all of the known tcp connections for this IP to the
390 list of tcp connections to send tickle acks for */
391 arp->tcparray = talloc_steal(arp, tcparray);
393 vnn->tcp_array = NULL;
394 vnn->tcp_update_needed = true;
397 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398 timeval_zero(), ctdb_control_send_arp, arp);
403 struct takeover_callback_state {
404 struct ctdb_req_control_old *c;
405 ctdb_sock_addr *addr;
406 struct ctdb_vnn *vnn;
409 struct ctdb_do_takeip_state {
410 struct ctdb_req_control_old *c;
411 struct ctdb_vnn *vnn;
415 called when takeip event finishes
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
420 struct ctdb_do_takeip_state *state =
421 talloc_get_type(private_data, struct ctdb_do_takeip_state);
426 if (status == -ETIME) {
429 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430 ctdb_addr_to_str(&state->vnn->public_address),
431 ctdb_vnn_iface_string(state->vnn)));
432 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
438 if (ctdb->do_checkpublicip) {
440 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
442 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
449 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450 data.dsize = strlen((char *)data.dptr) + 1;
451 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
453 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
456 /* the control succeeded */
457 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
464 state->vnn->update_in_flight = false;
469 take over an ip address
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472 struct ctdb_req_control_old *c,
473 struct ctdb_vnn *vnn)
476 struct ctdb_do_takeip_state *state;
478 if (vnn->update_in_flight) {
479 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480 "update for this IP already in flight\n",
481 ctdb_addr_to_str(&vnn->public_address),
482 vnn->public_netmask_bits));
486 ret = ctdb_vnn_assign_iface(ctdb, vnn);
488 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489 "assign a usable interface\n",
490 ctdb_addr_to_str(&vnn->public_address),
491 vnn->public_netmask_bits));
495 state = talloc(vnn, struct ctdb_do_takeip_state);
496 CTDB_NO_MEMORY(ctdb, state);
498 state->c = talloc_steal(ctdb, c);
501 vnn->update_in_flight = true;
502 talloc_set_destructor(state, ctdb_takeip_destructor);
504 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505 ctdb_addr_to_str(&vnn->public_address),
506 vnn->public_netmask_bits,
507 ctdb_vnn_iface_string(vnn)));
509 ret = ctdb_event_script_callback(ctdb,
511 ctdb_do_takeip_callback,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
520 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521 ctdb_addr_to_str(&vnn->public_address),
522 ctdb_vnn_iface_string(vnn)));
530 struct ctdb_do_updateip_state {
531 struct ctdb_req_control_old *c;
532 struct ctdb_interface *old;
533 struct ctdb_vnn *vnn;
537 called when updateip event finishes
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
542 struct ctdb_do_updateip_state *state =
543 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547 if (status == -ETIME) {
550 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551 ctdb_addr_to_str(&state->vnn->public_address),
553 ctdb_vnn_iface_string(state->vnn)));
556 * All we can do is reset the old interface
557 * and let the next run fix it
559 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560 state->vnn->iface = state->old;
561 state->vnn->iface->references++;
563 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
568 if (ctdb->do_checkpublicip) {
570 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
572 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
579 /* the control succeeded */
580 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
587 state->vnn->update_in_flight = false;
592 update (move) an ip address
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595 struct ctdb_req_control_old *c,
596 struct ctdb_vnn *vnn)
599 struct ctdb_do_updateip_state *state;
600 struct ctdb_interface *old = vnn->iface;
601 const char *new_name;
603 if (vnn->update_in_flight) {
604 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605 "update for this IP already in flight\n",
606 ctdb_addr_to_str(&vnn->public_address),
607 vnn->public_netmask_bits));
611 ctdb_vnn_unassign_iface(ctdb, vnn);
612 ret = ctdb_vnn_assign_iface(ctdb, vnn);
614 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615 "assin a usable interface (old iface '%s')\n",
616 ctdb_addr_to_str(&vnn->public_address),
617 vnn->public_netmask_bits,
622 new_name = ctdb_vnn_iface_string(vnn);
623 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624 /* A benign update from one interface onto itself.
625 * no need to run the eventscripts in this case, just return
628 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632 state = talloc(vnn, struct ctdb_do_updateip_state);
633 CTDB_NO_MEMORY(ctdb, state);
635 state->c = talloc_steal(ctdb, c);
639 vnn->update_in_flight = true;
640 talloc_set_destructor(state, ctdb_updateip_destructor);
642 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643 "interface %s to %s\n",
644 ctdb_addr_to_str(&vnn->public_address),
645 vnn->public_netmask_bits,
649 ret = ctdb_event_script_callback(ctdb,
651 ctdb_do_updateip_callback,
653 CTDB_EVENT_UPDATE_IP,
657 ctdb_addr_to_str(&vnn->public_address),
658 vnn->public_netmask_bits);
660 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661 ctdb_addr_to_str(&vnn->public_address),
662 old->name, new_name));
671 Find the vnn of the node that has a public ip address
672 returns -1 if the address is not known as a public address
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
676 struct ctdb_vnn *vnn;
678 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679 if (ctdb_same_ip(&vnn->public_address, addr)) {
688 take over an ip address
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691 struct ctdb_req_control_old *c,
696 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697 struct ctdb_vnn *vnn;
698 bool have_ip = false;
699 bool do_updateip = false;
700 bool do_takeip = false;
701 struct ctdb_interface *best_iface = NULL;
703 if (pip->pnn != ctdb->pnn) {
704 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705 "with pnn %d, but we're node %d\n",
706 ctdb_addr_to_str(&pip->addr),
707 pip->pnn, ctdb->pnn));
711 /* update out vnn list */
712 vnn = find_public_ip_vnn(ctdb, &pip->addr);
714 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715 ctdb_addr_to_str(&pip->addr)));
719 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720 have_ip = ctdb_sys_have_ip(&pip->addr);
722 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723 if (best_iface == NULL) {
724 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725 "a usable interface (old %s, have_ip %d)\n",
726 ctdb_addr_to_str(&vnn->public_address),
727 vnn->public_netmask_bits,
728 ctdb_vnn_iface_string(vnn),
733 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
739 if (vnn->iface == NULL && have_ip) {
740 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742 ctdb_addr_to_str(&vnn->public_address)));
746 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748 "and we have it on iface[%s], but it was assigned to node %d"
749 "and we are node %d, banning ourself\n",
750 ctdb_addr_to_str(&vnn->public_address),
751 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
756 if (vnn->pnn == -1 && have_ip) {
757 vnn->pnn = ctdb->pnn;
758 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759 "and we already have it on iface[%s], update local daemon\n",
760 ctdb_addr_to_str(&vnn->public_address),
761 ctdb_vnn_iface_string(vnn)));
766 if (vnn->iface != best_iface) {
767 if (!vnn->iface->link_up) {
769 } else if (vnn->iface->references > (best_iface->references + 1)) {
770 /* only move when the rebalance gains something */
778 ctdb_vnn_unassign_iface(ctdb, vnn);
785 ret = ctdb_do_takeip(ctdb, c, vnn);
789 } else if (do_updateip) {
790 ret = ctdb_do_updateip(ctdb, c, vnn);
796 * The interface is up and the kernel known the ip
799 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800 ctdb_addr_to_str(&pip->addr),
801 vnn->public_netmask_bits,
802 ctdb_vnn_iface_string(vnn)));
806 /* tell ctdb_control.c that we will be replying asynchronously */
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
814 DLIST_REMOVE(ctdb->vnn, vnn);
815 ctdb_vnn_unassign_iface(ctdb, vnn);
816 ctdb_remove_orphaned_ifaces(ctdb, vnn);
821 called when releaseip event finishes
823 static void release_ip_callback(struct ctdb_context *ctdb, int status,
826 struct takeover_callback_state *state =
827 talloc_get_type(private_data, struct takeover_callback_state);
830 if (status == -ETIME) {
834 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835 if (ctdb_sys_have_ip(state->addr)) {
837 ("IP %s still hosted during release IP callback, failing\n",
838 ctdb_addr_to_str(state->addr)));
839 ctdb_request_control_reply(ctdb, state->c,
846 /* send a message to all clients of this node telling them
847 that the cluster has been reconfigured and they should
848 release any sockets on this IP */
849 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851 data.dsize = strlen((char *)data.dptr)+1;
853 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
855 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
857 ctdb_vnn_unassign_iface(ctdb, state->vnn);
859 /* Process the IP if it has been marked for deletion */
860 if (state->vnn->delete_pending) {
861 do_delete_ip(ctdb, state->vnn);
865 /* the control succeeded */
866 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
872 if (state->vnn != NULL) {
873 state->vnn->update_in_flight = false;
879 release an ip address
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
882 struct ctdb_req_control_old *c,
887 struct takeover_callback_state *state;
888 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889 struct ctdb_vnn *vnn;
892 /* update our vnn list */
893 vnn = find_public_ip_vnn(ctdb, &pip->addr);
895 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896 ctdb_addr_to_str(&pip->addr)));
901 /* stop any previous arps */
902 talloc_free(vnn->takeover_ctx);
903 vnn->takeover_ctx = NULL;
905 /* Some ctdb tool commands (e.g. moveip) send
906 * lazy multicast to drop an IP from any node that isn't the
907 * intended new node. The following causes makes ctdbd ignore
908 * a release for any address it doesn't host.
910 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911 if (!ctdb_sys_have_ip(&pip->addr)) {
912 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913 ctdb_addr_to_str(&pip->addr),
914 vnn->public_netmask_bits,
915 ctdb_vnn_iface_string(vnn)));
916 ctdb_vnn_unassign_iface(ctdb, vnn);
920 if (vnn->iface == NULL) {
921 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922 ctdb_addr_to_str(&pip->addr),
923 vnn->public_netmask_bits));
928 /* There is a potential race between take_ip and us because we
929 * update the VNN via a callback that run when the
930 * eventscripts have been run. Avoid the race by allowing one
931 * update to be in flight at a time.
933 if (vnn->update_in_flight) {
934 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935 "update for this IP already in flight\n",
936 ctdb_addr_to_str(&vnn->public_address),
937 vnn->public_netmask_bits));
941 iface = strdup(ctdb_vnn_iface_string(vnn));
943 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
944 ctdb_addr_to_str(&pip->addr),
945 vnn->public_netmask_bits,
949 state = talloc(ctdb, struct takeover_callback_state);
951 ctdb_set_error(ctdb, "Out of memory at %s:%d",
957 state->c = talloc_steal(state, c);
958 state->addr = talloc(state, ctdb_sock_addr);
959 if (state->addr == NULL) {
960 ctdb_set_error(ctdb, "Out of memory at %s:%d",
966 *state->addr = pip->addr;
969 vnn->update_in_flight = true;
970 talloc_set_destructor(state, ctdb_releaseip_destructor);
972 ret = ctdb_event_script_callback(ctdb,
973 state, release_ip_callback, state,
974 CTDB_EVENT_RELEASE_IP,
977 ctdb_addr_to_str(&pip->addr),
978 vnn->public_netmask_bits);
981 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982 ctdb_addr_to_str(&pip->addr),
983 ctdb_vnn_iface_string(vnn)));
988 /* tell the control that we will be reply asynchronously */
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994 ctdb_sock_addr *addr,
995 unsigned mask, const char *ifaces,
998 struct ctdb_vnn *vnn;
1005 tmp = strdup(ifaces);
1006 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007 if (!ctdb_sys_check_iface_exists(iface)) {
1008 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1015 /* Verify that we don't have an entry for this ip yet */
1016 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1019 ctdb_addr_to_str(addr)));
1024 /* create a new vnn structure for this ip address */
1025 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028 tmp = talloc_strdup(vnn, ifaces);
1029 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038 vnn->ifaces[num] = NULL;
1039 vnn->public_address = *addr;
1040 vnn->public_netmask_bits = mask;
1042 if (check_address) {
1043 if (ctdb_sys_have_ip(addr)) {
1044 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045 vnn->pnn = ctdb->pnn;
1049 for (i=0; vnn->ifaces[i]; i++) {
1050 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1052 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053 "for public_address[%s]\n",
1054 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1060 DLIST_ADD(ctdb->vnn, vnn);
1066 setup the public address lists from a file
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1074 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075 if (lines == NULL) {
1076 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1079 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083 for (i=0;i<nlines;i++) {
1085 ctdb_sock_addr addr;
1086 const char *addrstr;
1091 while ((*line == ' ') || (*line == '\t')) {
1097 if (strcmp(line, "") == 0) {
1100 tok = strtok(line, " \t");
1102 tok = strtok(NULL, " \t");
1104 if (NULL == ctdb->default_public_interface) {
1105 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1110 ifaces = ctdb->default_public_interface;
1115 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1132 static struct ctdb_public_ip_list *
1133 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1134 TALLOC_CTX *mem_ctx,
1135 struct ctdb_node_map_old *nodemap,
1136 uint32_t public_ip_flags)
1139 struct ctdb_public_ip_list_old *ip_list;
1140 struct ctdb_public_ip_list *public_ips;
1142 public_ips = talloc_zero_array(mem_ctx,
1143 struct ctdb_public_ip_list,
1145 if (public_ips == NULL) {
1146 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1150 for (j = 0; j < nodemap->num; j++) {
1151 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1155 /* Retrieve the list of public IPs from the
1156 * node. Flags says whether it is known or
1158 ret = ctdb_ctrl_get_public_ips_flags(
1159 ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1160 public_ip_flags, &ip_list);
1163 ("Failed to read public IPs from node: %u\n", j));
1164 talloc_free(public_ips);
1167 public_ips[j].num = ip_list->num;
1168 if (ip_list->num == 0) {
1169 talloc_free(ip_list);
1172 public_ips[j].ip = talloc_zero_array(public_ips,
1173 struct ctdb_public_ip,
1175 if (public_ips[j].ip == NULL) {
1176 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1177 talloc_free(public_ips);
1180 memcpy(public_ips[j].ip, &ip_list->ips[0],
1181 sizeof(struct ctdb_public_ip) * ip_list->num);
1182 talloc_free(ip_list);
1188 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
1192 for (i=0;i<nodemap->num;i++) {
1193 if (!(nodemap->node[i].flags &
1194 (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1195 /* Found one completely healthy node */
1203 struct get_tunable_callback_data {
1204 const char *tunable;
1209 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1210 int32_t res, TDB_DATA outdata,
1213 struct get_tunable_callback_data *cd =
1214 (struct get_tunable_callback_data *)callback;
1218 /* Already handled in fail callback */
1222 if (outdata.dsize != sizeof(uint32_t)) {
1223 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1224 cd->tunable, pnn, (int)sizeof(uint32_t),
1225 (int)outdata.dsize));
1230 size = talloc_array_length(cd->out);
1232 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1233 cd->tunable, pnn, size));
1238 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1241 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1242 int32_t res, TDB_DATA outdata,
1245 struct get_tunable_callback_data *cd =
1246 (struct get_tunable_callback_data *)callback;
1251 ("Timed out getting tunable \"%s\" from node %d\n",
1257 DEBUG(DEBUG_WARNING,
1258 ("Tunable \"%s\" not implemented on node %d\n",
1263 ("Unexpected error getting tunable \"%s\" from node %d\n",
1269 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1270 TALLOC_CTX *tmp_ctx,
1271 struct ctdb_node_map_old *nodemap,
1272 const char *tunable,
1273 uint32_t default_value)
1276 struct ctdb_control_get_tunable *t;
1279 struct get_tunable_callback_data callback_data;
1282 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1283 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1284 for (i=0; i<nodemap->num; i++) {
1285 tvals[i] = default_value;
1288 callback_data.out = tvals;
1289 callback_data.tunable = tunable;
1290 callback_data.fatal = false;
1292 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1293 data.dptr = talloc_size(tmp_ctx, data.dsize);
1294 t = (struct ctdb_control_get_tunable *)data.dptr;
1295 t->length = strlen(tunable)+1;
1296 memcpy(t->name, tunable, t->length);
1297 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1298 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1299 nodes, 0, TAKEOVER_TIMEOUT(),
1301 get_tunable_callback,
1302 get_tunable_fail_callback,
1303 &callback_data) != 0) {
1304 if (callback_data.fatal) {
1310 talloc_free(data.dptr);
1315 /* Set internal flags for IP allocation:
1317 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1318 * Set NOIPHOST ip flag for each INACTIVE node
1319 * if all nodes are disabled:
1320 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1322 * Set NOIPHOST ip flags for disabled nodes
1324 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1325 struct ctdb_node_map *nodemap,
1326 uint32_t *tval_noiptakeover,
1327 uint32_t *tval_noiphostonalldisabled)
1331 for (i=0;i<nodemap->num;i++) {
1332 /* Can not take IPs on node with NoIPTakeover set */
1333 if (tval_noiptakeover[i] != 0) {
1334 ipalloc_state->noiptakeover[i] = true;
1337 /* Can not host IPs on INACTIVE node */
1338 if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
1339 ipalloc_state->noiphost[i] = true;
1343 if (all_nodes_are_disabled(nodemap)) {
1344 /* If all nodes are disabled, can not host IPs on node
1345 * with NoIPHostOnAllDisabled set
1347 for (i=0;i<nodemap->num;i++) {
1348 if (tval_noiphostonalldisabled[i] != 0) {
1349 ipalloc_state->noiphost[i] = true;
1353 /* If some nodes are not disabled, then can not host
1354 * IPs on DISABLED node
1356 for (i=0;i<nodemap->num;i++) {
1357 if (nodemap->node[i].flags & NODE_FLAGS_DISABLED) {
1358 ipalloc_state->noiphost[i] = true;
1364 static struct ctdb_node_map *
1365 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1366 const struct ctdb_node_map_old *old)
1368 struct ctdb_node_map *new;
1370 new = talloc(mem_ctx, struct ctdb_node_map);
1372 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1375 new->num = old->num;
1376 new->node = talloc_zero_array(new,
1377 struct ctdb_node_and_flags, new->num);
1378 memcpy(new->node, &old->nodes[0],
1379 sizeof(struct ctdb_node_and_flags) * new->num);
1385 static bool set_ipflags(struct ctdb_context *ctdb,
1386 struct ipalloc_state *ipalloc_state,
1387 struct ctdb_node_map_old *nodemap)
1389 uint32_t *tval_noiptakeover;
1390 uint32_t *tval_noiphostonalldisabled;
1391 struct ctdb_node_map *new;
1393 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1395 if (tval_noiptakeover == NULL) {
1399 tval_noiphostonalldisabled =
1400 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1401 "NoIPHostOnAllDisabled", 0);
1402 if (tval_noiphostonalldisabled == NULL) {
1403 /* Caller frees tmp_ctx */
1407 new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1412 set_ipflags_internal(ipalloc_state, new,
1414 tval_noiphostonalldisabled);
1416 talloc_free(tval_noiptakeover);
1417 talloc_free(tval_noiphostonalldisabled);
1423 static enum ipalloc_algorithm
1424 determine_algorithm(const struct ctdb_tunable_list *tunables)
1426 if (1 == tunables->lcp2_public_ip_assignment) {
1427 return IPALLOC_LCP2;
1428 } else if (1 == tunables->deterministic_public_ips) {
1429 return IPALLOC_DETERMINISTIC;
1431 return IPALLOC_NONDETERMINISTIC;
1435 struct takeover_callback_data {
1437 unsigned int *fail_count;
1440 static struct takeover_callback_data *
1441 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1444 static struct takeover_callback_data *takeover_data;
1446 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1447 if (takeover_data == NULL) {
1448 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1452 takeover_data->fail_count = talloc_zero_array(takeover_data,
1453 unsigned int, num_nodes);
1454 if (takeover_data->fail_count == NULL) {
1455 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1456 talloc_free(takeover_data);
1460 takeover_data->num_nodes = num_nodes;
1462 return takeover_data;
1465 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1466 uint32_t node_pnn, int32_t res,
1467 TDB_DATA outdata, void *callback_data)
1469 struct takeover_callback_data *cd =
1470 talloc_get_type_abort(callback_data,
1471 struct takeover_callback_data);
1473 if (node_pnn >= cd->num_nodes) {
1474 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1478 if (cd->fail_count[node_pnn] == 0) {
1480 ("Node %u failed the takeover run\n", node_pnn));
1483 cd->fail_count[node_pnn]++;
1486 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1487 struct takeover_callback_data *tcd)
1489 unsigned int max_fails = 0;
1490 uint32_t max_pnn = -1;
1493 for (i = 0; i < tcd->num_nodes; i++) {
1494 if (tcd->fail_count[i] > max_fails) {
1496 max_fails = tcd->fail_count[i];
1500 if (max_fails > 0) {
1505 ("Sending banning credits to %u with fail count %u\n",
1506 max_pnn, max_fails));
1508 data.dptr = (uint8_t *)&max_pnn;
1509 data.dsize = sizeof(uint32_t);
1510 ret = ctdb_client_send_message(ctdb,
1511 CTDB_BROADCAST_CONNECTED,
1516 ("Failed to set banning credits for node %u\n",
1523 * Recalculate the allocation of public IPs to nodes and have the
1524 * nodes host their allocated addresses.
1526 * - Initialise IP allocation state. Pass:
1527 + algorithm to be used;
1528 + whether IP rebalancing ("failback") should be done (this uses a
1529 cluster-wide configuration variable and only the value form the
1530 master node is used); and
1531 * + list of nodes to force rebalance (internal structure, currently
1532 * no way to fetch, only used by LCP2 for nodes that have had new
1533 * IP addresses added).
1534 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1535 * connected nodes - this is done separately so tunable values can
1536 * be faked in unit testing
1537 * - Populate NoIPTakover tunable in IP allocation state
1538 * - Populate NoIPHost in IP allocation state, derived from node flags
1539 * and NoIPHostOnAllDisabled tunable
1540 * - Retrieve known and available IP addresses (done separately so
1541 * values can be faked in unit testing)
1542 * - Use ipalloc_set_public_ips() to set known and available IP
1543 addresses for allocation
1544 * - If cluster can't host IP addresses then early exit
1545 * - Run IP allocation algorithm
1546 * - Send RELEASE_IP to all nodes for IPs they should not host
1547 * - Send TAKE_IP to all nodes for IPs they should host
1548 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1550 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1551 uint32_t *force_rebalance_nodes)
1554 struct ctdb_public_ip ip;
1556 struct public_ip_list *all_ips, *tmp_ip;
1558 struct timeval timeout;
1559 struct client_async_data *async_data;
1560 struct ctdb_client_control_state *state;
1561 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1562 struct ipalloc_state *ipalloc_state;
1563 struct ctdb_public_ip_list *known_ips, *available_ips;
1564 struct takeover_callback_data *takeover_data;
1566 /* Initialise fail callback data to be used with
1567 * takeover_run_fail_callback(). A failure in any of the
1568 * following steps will cause an early return, so this can be
1569 * reused for each of those steps without re-initialising. */
1570 takeover_data = takeover_callback_data_init(tmp_ctx,
1572 if (takeover_data == NULL) {
1573 talloc_free(tmp_ctx);
1578 * ip failover is completely disabled, just send out the
1579 * ipreallocated event.
1581 if (ctdb->tunable.disable_ip_failover != 0) {
1585 ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1586 determine_algorithm(&ctdb->tunable),
1587 (ctdb->tunable.no_ip_failback != 0),
1588 force_rebalance_nodes);
1589 if (ipalloc_state == NULL) {
1590 talloc_free(tmp_ctx);
1594 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1595 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1596 talloc_free(tmp_ctx);
1600 /* Fetch known/available public IPs from each active node */
1601 /* Fetch lists of known public IPs from all nodes */
1602 known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1604 if (known_ips == NULL) {
1605 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1606 talloc_free(tmp_ctx);
1609 available_ips = ctdb_fetch_remote_public_ips(
1610 ctdb, ipalloc_state, nodemap,
1611 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1612 if (available_ips == NULL) {
1613 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1614 talloc_free(tmp_ctx);
1618 if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1619 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1620 talloc_free(tmp_ctx);
1624 if (! ipalloc_can_host_ips(ipalloc_state)) {
1625 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1629 /* Do the IP reassignment calculations */
1630 ipalloc(ipalloc_state);
1631 if (ipalloc_state->all_ips == NULL) {
1632 talloc_free(tmp_ctx);
1635 all_ips = ipalloc_state->all_ips;
1637 /* Now tell all nodes to release any public IPs should not
1638 * host. This will be a NOOP on nodes that don't currently
1639 * hold the given IP.
1641 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1642 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1644 async_data->fail_callback = takeover_run_fail_callback;
1645 async_data->callback_data = takeover_data;
1647 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1649 /* Send a RELEASE_IP to all nodes that should not be hosting
1650 * each IP. For each IP, all but one of these will be
1651 * redundant. However, the redundant ones are used to tell
1652 * nodes which node should be hosting the IP so that commands
1653 * like "ctdb ip" can display a particular nodes idea of who
1654 * is hosting what. */
1655 for (i=0;i<nodemap->num;i++) {
1656 /* don't talk to unconnected nodes, but do talk to banned nodes */
1657 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1661 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1662 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1663 /* This node should be serving this
1664 vnn so don't tell it to release the ip
1668 ip.pnn = tmp_ip->pnn;
1669 ip.addr = tmp_ip->addr;
1671 timeout = TAKEOVER_TIMEOUT();
1672 data.dsize = sizeof(ip);
1673 data.dptr = (uint8_t *)&ip;
1674 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1675 0, CTDB_CONTROL_RELEASE_IP, 0,
1678 if (state == NULL) {
1679 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1680 talloc_free(tmp_ctx);
1684 ctdb_client_async_add(async_data, state);
1687 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1689 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1692 talloc_free(async_data);
1695 /* For each IP, send a TAKOVER_IP to the node that should be
1696 * hosting it. Many of these will often be redundant (since
1697 * the allocation won't have changed) but they can be useful
1698 * to recover from inconsistencies. */
1699 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1700 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1702 async_data->fail_callback = takeover_run_fail_callback;
1703 async_data->callback_data = takeover_data;
1705 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1706 if (tmp_ip->pnn == -1) {
1707 /* this IP won't be taken over */
1711 ip.pnn = tmp_ip->pnn;
1712 ip.addr = tmp_ip->addr;
1714 timeout = TAKEOVER_TIMEOUT();
1715 data.dsize = sizeof(ip);
1716 data.dptr = (uint8_t *)&ip;
1717 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1718 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1719 data, async_data, &timeout, NULL);
1720 if (state == NULL) {
1721 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1722 talloc_free(tmp_ctx);
1726 ctdb_client_async_add(async_data, state);
1728 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1730 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1736 * Tell all nodes to run eventscripts to process the
1737 * "ipreallocated" event. This can do a lot of things,
1738 * including restarting services to reconfigure them if public
1739 * IPs have moved. Once upon a time this event only used to
1742 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1743 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1744 nodes, 0, TAKEOVER_TIMEOUT(),
1746 NULL, takeover_run_fail_callback,
1750 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1754 talloc_free(tmp_ctx);
1758 takeover_run_process_failures(ctdb, takeover_data);
1759 talloc_free(tmp_ctx);
1765 destroy a ctdb_client_ip structure
1767 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1769 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1770 ctdb_addr_to_str(&ip->addr),
1771 ntohs(ip->addr.ip.sin_port),
1774 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1779 called by a client to inform us of a TCP connection that it is managing
1780 that should tickled with an ACK when IP takeover is done
1782 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1785 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1786 struct ctdb_connection *tcp_sock = NULL;
1787 struct ctdb_tcp_list *tcp;
1788 struct ctdb_connection t;
1791 struct ctdb_client_ip *ip;
1792 struct ctdb_vnn *vnn;
1793 ctdb_sock_addr addr;
1795 /* If we don't have public IPs, tickles are useless */
1796 if (ctdb->vnn == NULL) {
1800 tcp_sock = (struct ctdb_connection *)indata.dptr;
1802 addr = tcp_sock->src;
1803 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1804 addr = tcp_sock->dst;
1805 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1808 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1809 vnn = find_public_ip_vnn(ctdb, &addr);
1811 switch (addr.sa.sa_family) {
1813 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1814 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1815 ctdb_addr_to_str(&addr)));
1819 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1820 ctdb_addr_to_str(&addr)));
1823 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1829 if (vnn->pnn != ctdb->pnn) {
1830 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1831 ctdb_addr_to_str(&addr),
1832 client_id, client->pid));
1833 /* failing this call will tell smbd to die */
1837 ip = talloc(client, struct ctdb_client_ip);
1838 CTDB_NO_MEMORY(ctdb, ip);
1842 ip->client_id = client_id;
1843 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1844 DLIST_ADD(ctdb->client_ip_list, ip);
1846 tcp = talloc(client, struct ctdb_tcp_list);
1847 CTDB_NO_MEMORY(ctdb, tcp);
1849 tcp->connection.src = tcp_sock->src;
1850 tcp->connection.dst = tcp_sock->dst;
1852 DLIST_ADD(client->tcp_list, tcp);
1854 t.src = tcp_sock->src;
1855 t.dst = tcp_sock->dst;
1857 data.dptr = (uint8_t *)&t;
1858 data.dsize = sizeof(t);
1860 switch (addr.sa.sa_family) {
1862 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1863 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1864 ctdb_addr_to_str(&tcp_sock->src),
1865 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1868 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1869 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1870 ctdb_addr_to_str(&tcp_sock->src),
1871 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1874 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1878 /* tell all nodes about this tcp connection */
1879 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1880 CTDB_CONTROL_TCP_ADD,
1881 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1883 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1891 find a tcp address on a list
1893 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1894 struct ctdb_connection *tcp)
1898 if (array == NULL) {
1902 for (i=0;i<array->num;i++) {
1903 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1904 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1905 return &array->connections[i];
1914 called by a daemon to inform us of a TCP connection that one of its
1915 clients managing that should tickled with an ACK when IP takeover is
1918 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1920 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1921 struct ctdb_tcp_array *tcparray;
1922 struct ctdb_connection tcp;
1923 struct ctdb_vnn *vnn;
1925 /* If we don't have public IPs, tickles are useless */
1926 if (ctdb->vnn == NULL) {
1930 vnn = find_public_ip_vnn(ctdb, &p->dst);
1932 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1933 ctdb_addr_to_str(&p->dst)));
1939 tcparray = vnn->tcp_array;
1941 /* If this is the first tickle */
1942 if (tcparray == NULL) {
1943 tcparray = talloc(vnn, struct ctdb_tcp_array);
1944 CTDB_NO_MEMORY(ctdb, tcparray);
1945 vnn->tcp_array = tcparray;
1948 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1949 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1951 tcparray->connections[tcparray->num].src = p->src;
1952 tcparray->connections[tcparray->num].dst = p->dst;
1955 if (tcp_update_needed) {
1956 vnn->tcp_update_needed = true;
1962 /* Do we already have this tickle ?*/
1965 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1966 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1967 ctdb_addr_to_str(&tcp.dst),
1968 ntohs(tcp.dst.ip.sin_port),
1973 /* A new tickle, we must add it to the array */
1974 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1975 struct ctdb_connection,
1977 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1979 tcparray->connections[tcparray->num].src = p->src;
1980 tcparray->connections[tcparray->num].dst = p->dst;
1983 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1984 ctdb_addr_to_str(&tcp.dst),
1985 ntohs(tcp.dst.ip.sin_port),
1988 if (tcp_update_needed) {
1989 vnn->tcp_update_needed = true;
1996 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1998 struct ctdb_connection *tcpp;
2004 /* if the array is empty we cant remove it
2005 and we don't need to do anything
2007 if (vnn->tcp_array == NULL) {
2008 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2009 ctdb_addr_to_str(&conn->dst),
2010 ntohs(conn->dst.ip.sin_port)));
2015 /* See if we know this connection
2016 if we don't know this connection then we dont need to do anything
2018 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2020 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2021 ctdb_addr_to_str(&conn->dst),
2022 ntohs(conn->dst.ip.sin_port)));
2027 /* We need to remove this entry from the array.
2028 Instead of allocating a new array and copying data to it
2029 we cheat and just copy the last entry in the existing array
2030 to the entry that is to be removed and just shring the
2033 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2034 vnn->tcp_array->num--;
2036 /* If we deleted the last entry we also need to remove the entire array
2038 if (vnn->tcp_array->num == 0) {
2039 talloc_free(vnn->tcp_array);
2040 vnn->tcp_array = NULL;
2043 vnn->tcp_update_needed = true;
2045 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2046 ctdb_addr_to_str(&conn->src),
2047 ntohs(conn->src.ip.sin_port)));
2052 called by a daemon to inform us of a TCP connection that one of its
2053 clients used are no longer needed in the tickle database
2055 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2057 struct ctdb_vnn *vnn;
2058 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2060 /* If we don't have public IPs, tickles are useless */
2061 if (ctdb->vnn == NULL) {
2065 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2068 (__location__ " unable to find public address %s\n",
2069 ctdb_addr_to_str(&conn->dst)));
2073 ctdb_remove_connection(vnn, conn);
2080 Called when another daemon starts - causes all tickles for all
2081 public addresses we are serving to be sent to the new node on the
2082 next check. This actually causes the next scheduled call to
2083 tdb_update_tcp_tickles() to update all nodes. This is simple and
2084 doesn't require careful error handling.
2086 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2088 struct ctdb_vnn *vnn;
2090 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2091 (unsigned long) pnn));
2093 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2094 vnn->tcp_update_needed = true;
2102 called when a client structure goes away - hook to remove
2103 elements from the tcp_list in all daemons
2105 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2107 while (client->tcp_list) {
2108 struct ctdb_vnn *vnn;
2109 struct ctdb_tcp_list *tcp = client->tcp_list;
2110 struct ctdb_connection *conn = &tcp->connection;
2112 DLIST_REMOVE(client->tcp_list, tcp);
2114 vnn = find_public_ip_vnn(client->ctdb,
2118 (__location__ " unable to find public address %s\n",
2119 ctdb_addr_to_str(&conn->dst)));
2123 /* If the IP address is hosted on this node then
2124 * remove the connection. */
2125 if (vnn->pnn == client->ctdb->pnn) {
2126 ctdb_remove_connection(vnn, conn);
2129 /* Otherwise this function has been called because the
2130 * server IP address has been released to another node
2131 * and the client has exited. This means that we
2132 * should not delete the connection information. The
2133 * takeover node processes connections too. */
2138 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2140 struct ctdb_vnn *vnn;
2144 if (ctdb->tunable.disable_ip_failover == 1) {
2148 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2149 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2150 ctdb_vnn_unassign_iface(ctdb, vnn);
2157 /* Don't allow multiple releases at once. Some code,
2158 * particularly ctdb_tickle_sentenced_connections() is
2160 if (vnn->update_in_flight) {
2161 DEBUG(DEBUG_WARNING,
2163 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2164 ctdb_addr_to_str(&vnn->public_address),
2165 vnn->public_netmask_bits,
2166 ctdb_vnn_iface_string(vnn)));
2169 vnn->update_in_flight = true;
2171 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2172 ctdb_addr_to_str(&vnn->public_address),
2173 vnn->public_netmask_bits,
2174 ctdb_vnn_iface_string(vnn)));
2176 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2177 ctdb_vnn_iface_string(vnn),
2178 ctdb_addr_to_str(&vnn->public_address),
2179 vnn->public_netmask_bits);
2181 data.dptr = (uint8_t *)talloc_strdup(
2182 vnn, ctdb_addr_to_str(&vnn->public_address));
2183 if (data.dptr != NULL) {
2184 data.dsize = strlen((char *)data.dptr) + 1;
2185 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2186 CTDB_SRVID_RELEASE_IP, data);
2187 talloc_free(data.dptr);
2190 ctdb_vnn_unassign_iface(ctdb, vnn);
2191 vnn->update_in_flight = false;
2195 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2200 get list of public IPs
2202 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2203 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2206 struct ctdb_public_ip_list_old *ips;
2207 struct ctdb_vnn *vnn;
2208 bool only_available = false;
2210 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2211 only_available = true;
2214 /* count how many public ip structures we have */
2216 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2220 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2221 num*sizeof(struct ctdb_public_ip);
2222 ips = talloc_zero_size(outdata, len);
2223 CTDB_NO_MEMORY(ctdb, ips);
2226 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2227 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2230 ips->ips[i].pnn = vnn->pnn;
2231 ips->ips[i].addr = vnn->public_address;
2235 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2236 i*sizeof(struct ctdb_public_ip);
2238 outdata->dsize = len;
2239 outdata->dptr = (uint8_t *)ips;
2245 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2246 struct ctdb_req_control_old *c,
2251 ctdb_sock_addr *addr;
2252 struct ctdb_public_ip_info_old *info;
2253 struct ctdb_vnn *vnn;
2255 addr = (ctdb_sock_addr *)indata.dptr;
2257 vnn = find_public_ip_vnn(ctdb, addr);
2259 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2260 "'%s'not a public address\n",
2261 ctdb_addr_to_str(addr)));
2265 /* count how many public ip structures we have */
2267 for (;vnn->ifaces[num];) {
2271 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2272 num*sizeof(struct ctdb_iface);
2273 info = talloc_zero_size(outdata, len);
2274 CTDB_NO_MEMORY(ctdb, info);
2276 info->ip.addr = vnn->public_address;
2277 info->ip.pnn = vnn->pnn;
2278 info->active_idx = 0xFFFFFFFF;
2280 for (i=0; vnn->ifaces[i]; i++) {
2281 struct ctdb_interface *cur;
2283 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2285 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2289 if (vnn->iface == cur) {
2290 info->active_idx = i;
2292 strncpy(info->ifaces[i].name, cur->name,
2293 sizeof(info->ifaces[i].name));
2294 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2295 info->ifaces[i].link_state = cur->link_up;
2296 info->ifaces[i].references = cur->references;
2299 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2300 i*sizeof(struct ctdb_iface);
2302 outdata->dsize = len;
2303 outdata->dptr = (uint8_t *)info;
2308 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2309 struct ctdb_req_control_old *c,
2313 struct ctdb_iface_list_old *ifaces;
2314 struct ctdb_interface *cur;
2316 /* count how many public ip structures we have */
2318 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2322 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2323 num*sizeof(struct ctdb_iface);
2324 ifaces = talloc_zero_size(outdata, len);
2325 CTDB_NO_MEMORY(ctdb, ifaces);
2328 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2329 strncpy(ifaces->ifaces[i].name, cur->name,
2330 sizeof(ifaces->ifaces[i].name));
2331 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2332 ifaces->ifaces[i].link_state = cur->link_up;
2333 ifaces->ifaces[i].references = cur->references;
2337 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2338 i*sizeof(struct ctdb_iface);
2340 outdata->dsize = len;
2341 outdata->dptr = (uint8_t *)ifaces;
2346 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2347 struct ctdb_req_control_old *c,
2350 struct ctdb_iface *info;
2351 struct ctdb_interface *iface;
2352 bool link_up = false;
2354 info = (struct ctdb_iface *)indata.dptr;
2356 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2357 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2358 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2359 len, len, info->name));
2363 switch (info->link_state) {
2371 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2372 (unsigned int)info->link_state));
2376 if (info->references != 0) {
2377 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2378 (unsigned int)info->references));
2382 iface = ctdb_find_iface(ctdb, info->name);
2383 if (iface == NULL) {
2387 if (link_up == iface->link_up) {
2391 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2392 ("iface[%s] has changed it's link status %s => %s\n",
2394 iface->link_up?"up":"down",
2395 link_up?"up":"down"));
2397 iface->link_up = link_up;
2403 called by a daemon to inform us of the entire list of TCP tickles for
2404 a particular public address.
2405 this control should only be sent by the node that is currently serving
2406 that public address.
2408 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2410 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2411 struct ctdb_tcp_array *tcparray;
2412 struct ctdb_vnn *vnn;
2414 /* We must at least have tickles.num or else we cant verify the size
2415 of the received data blob
2417 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2418 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2422 /* verify that the size of data matches what we expect */
2423 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2424 + sizeof(struct ctdb_connection) * list->num) {
2425 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2429 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2430 ctdb_addr_to_str(&list->addr)));
2432 vnn = find_public_ip_vnn(ctdb, &list->addr);
2434 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2435 ctdb_addr_to_str(&list->addr)));
2440 if (vnn->pnn == ctdb->pnn) {
2442 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2443 ctdb_addr_to_str(&list->addr)));
2447 /* remove any old ticklelist we might have */
2448 talloc_free(vnn->tcp_array);
2449 vnn->tcp_array = NULL;
2451 tcparray = talloc(vnn, struct ctdb_tcp_array);
2452 CTDB_NO_MEMORY(ctdb, tcparray);
2454 tcparray->num = list->num;
2456 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2457 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2459 memcpy(tcparray->connections, &list->connections[0],
2460 sizeof(struct ctdb_connection)*tcparray->num);
2462 /* We now have a new fresh tickle list array for this vnn */
2463 vnn->tcp_array = tcparray;
2469 called to return the full list of tickles for the puclic address associated
2470 with the provided vnn
2472 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2474 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2475 struct ctdb_tickle_list_old *list;
2476 struct ctdb_tcp_array *tcparray;
2478 struct ctdb_vnn *vnn;
2481 vnn = find_public_ip_vnn(ctdb, addr);
2483 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2484 ctdb_addr_to_str(addr)));
2489 port = ctdb_addr_to_port(addr);
2491 tcparray = vnn->tcp_array;
2493 if (tcparray != NULL) {
2495 /* All connections */
2496 num = tcparray->num;
2498 /* Count connections for port */
2499 for (i = 0; i < tcparray->num; i++) {
2500 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2507 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2508 + sizeof(struct ctdb_connection) * num;
2510 outdata->dptr = talloc_size(outdata, outdata->dsize);
2511 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2512 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2522 for (i = 0; i < tcparray->num; i++) {
2524 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2525 list->connections[num] = tcparray->connections[i];
2535 set the list of all tcp tickles for a public address
2537 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2538 ctdb_sock_addr *addr,
2539 struct ctdb_tcp_array *tcparray)
2543 struct ctdb_tickle_list_old *list;
2546 num = tcparray->num;
2551 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2552 sizeof(struct ctdb_connection) * num;
2553 data.dptr = talloc_size(ctdb, data.dsize);
2554 CTDB_NO_MEMORY(ctdb, data.dptr);
2556 list = (struct ctdb_tickle_list_old *)data.dptr;
2560 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2563 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2564 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2565 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2567 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2571 talloc_free(data.dptr);
2578 perform tickle updates if required
2580 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2581 struct tevent_timer *te,
2582 struct timeval t, void *private_data)
2584 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2586 struct ctdb_vnn *vnn;
2588 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2589 /* we only send out updates for public addresses that
2592 if (ctdb->pnn != vnn->pnn) {
2595 /* We only send out the updates if we need to */
2596 if (!vnn->tcp_update_needed) {
2599 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2600 &vnn->public_address,
2603 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2604 ctdb_addr_to_str(&vnn->public_address)));
2607 ("Sent tickle update for public address %s\n",
2608 ctdb_addr_to_str(&vnn->public_address)));
2609 vnn->tcp_update_needed = false;
2613 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2614 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2615 ctdb_update_tcp_tickles, ctdb);
2619 start periodic update of tcp tickles
2621 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2623 ctdb->tickle_update_context = talloc_new(ctdb);
2625 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2626 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2627 ctdb_update_tcp_tickles, ctdb);
2633 struct control_gratious_arp {
2634 struct ctdb_context *ctdb;
2635 ctdb_sock_addr addr;
2641 send a control_gratuitous arp
2643 static void send_gratious_arp(struct tevent_context *ev,
2644 struct tevent_timer *te,
2645 struct timeval t, void *private_data)
2648 struct control_gratious_arp *arp = talloc_get_type(private_data,
2649 struct control_gratious_arp);
2651 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2653 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2654 arp->iface, strerror(errno)));
2659 if (arp->count == CTDB_ARP_REPEAT) {
2664 tevent_add_timer(arp->ctdb->ev, arp,
2665 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2666 send_gratious_arp, arp);
2673 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2675 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2676 struct control_gratious_arp *arp;
2678 /* verify the size of indata */
2679 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2680 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2681 (unsigned)indata.dsize,
2682 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2686 ( offsetof(struct ctdb_addr_info_old, iface)
2687 + gratious_arp->len ) ){
2689 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2690 "but should be %u bytes\n",
2691 (unsigned)indata.dsize,
2692 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2697 arp = talloc(ctdb, struct control_gratious_arp);
2698 CTDB_NO_MEMORY(ctdb, arp);
2701 arp->addr = gratious_arp->addr;
2702 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2703 CTDB_NO_MEMORY(ctdb, arp->iface);
2706 tevent_add_timer(arp->ctdb->ev, arp,
2707 timeval_zero(), send_gratious_arp, arp);
2712 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2714 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2717 /* verify the size of indata */
2718 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2719 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2723 ( offsetof(struct ctdb_addr_info_old, iface)
2726 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2727 "but should be %u bytes\n",
2728 (unsigned)indata.dsize,
2729 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2733 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2735 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2738 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2745 struct delete_ip_callback_state {
2746 struct ctdb_req_control_old *c;
2750 called when releaseip event finishes for del_public_address
2752 static void delete_ip_callback(struct ctdb_context *ctdb,
2753 int32_t status, TDB_DATA data,
2754 const char *errormsg,
2757 struct delete_ip_callback_state *state =
2758 talloc_get_type(private_data, struct delete_ip_callback_state);
2760 /* If release failed then fail. */
2761 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2762 talloc_free(private_data);
2765 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2766 struct ctdb_req_control_old *c,
2767 TDB_DATA indata, bool *async_reply)
2769 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2770 struct ctdb_vnn *vnn;
2772 /* verify the size of indata */
2773 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2774 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2778 ( offsetof(struct ctdb_addr_info_old, iface)
2781 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2782 "but should be %u bytes\n",
2783 (unsigned)indata.dsize,
2784 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2788 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2790 /* walk over all public addresses until we find a match */
2791 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2792 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2793 if (vnn->pnn == ctdb->pnn) {
2794 struct delete_ip_callback_state *state;
2795 struct ctdb_public_ip *ip;
2799 vnn->delete_pending = true;
2801 state = talloc(ctdb,
2802 struct delete_ip_callback_state);
2803 CTDB_NO_MEMORY(ctdb, state);
2806 ip = talloc(state, struct ctdb_public_ip);
2809 (__location__ " Out of memory\n"));
2814 ip->addr = pub->addr;
2816 data.dsize = sizeof(struct ctdb_public_ip);
2817 data.dptr = (unsigned char *)ip;
2819 ret = ctdb_daemon_send_control(ctdb,
2822 CTDB_CONTROL_RELEASE_IP,
2829 (__location__ "Unable to send "
2830 "CTDB_CONTROL_RELEASE_IP\n"));
2835 state->c = talloc_steal(state, c);
2836 *async_reply = true;
2838 /* This IP is not hosted on the
2839 * current node so just delete it
2841 do_delete_ip(ctdb, vnn);
2848 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2849 ctdb_addr_to_str(&pub->addr)));
2854 struct ipreallocated_callback_state {
2855 struct ctdb_req_control_old *c;
2858 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2859 int status, void *p)
2861 struct ipreallocated_callback_state *state =
2862 talloc_get_type(p, struct ipreallocated_callback_state);
2866 (" \"ipreallocated\" event script failed (status %d)\n",
2868 if (status == -ETIME) {
2869 ctdb_ban_self(ctdb);
2873 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2877 /* A control to run the ipreallocated event */
2878 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2879 struct ctdb_req_control_old *c,
2883 struct ipreallocated_callback_state *state;
2885 state = talloc(ctdb, struct ipreallocated_callback_state);
2886 CTDB_NO_MEMORY(ctdb, state);
2888 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2890 ret = ctdb_event_script_callback(ctdb, state,
2891 ctdb_ipreallocated_callback, state,
2892 CTDB_EVENT_IPREALLOCATED,
2896 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2901 /* tell the control that we will be reply asynchronously */
2902 state->c = talloc_steal(state, c);
2903 *async_reply = true;
2909 struct ctdb_reloadips_handle {
2910 struct ctdb_context *ctdb;
2911 struct ctdb_req_control_old *c;
2915 struct tevent_fd *fde;
2918 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2920 if (h == h->ctdb->reload_ips) {
2921 h->ctdb->reload_ips = NULL;
2924 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2927 ctdb_kill(h->ctdb, h->child, SIGKILL);
2931 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2932 struct tevent_timer *te,
2933 struct timeval t, void *private_data)
2935 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2940 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2941 struct tevent_fd *fde,
2942 uint16_t flags, void *private_data)
2944 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2949 ret = sys_read(h->fd[0], &res, 1);
2950 if (ret < 1 || res != 0) {
2951 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2959 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2961 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2962 struct ctdb_public_ip_list_old *ips;
2963 struct ctdb_vnn *vnn;
2964 struct client_async_data *async_data;
2965 struct timeval timeout;
2967 struct ctdb_client_control_state *state;
2971 CTDB_NO_MEMORY(ctdb, mem_ctx);
2973 /* Read IPs from local node */
2974 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2975 CTDB_CURRENT_NODE, mem_ctx, &ips);
2978 ("Unable to fetch public IPs from local node\n"));
2979 talloc_free(mem_ctx);
2983 /* Read IPs file - this is safe since this is a child process */
2985 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2986 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2987 talloc_free(mem_ctx);
2991 async_data = talloc_zero(mem_ctx, struct client_async_data);
2992 CTDB_NO_MEMORY(ctdb, async_data);
2994 /* Compare IPs between node and file for IPs to be deleted */
2995 for (i = 0; i < ips->num; i++) {
2997 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2998 if (ctdb_same_ip(&vnn->public_address,
2999 &ips->ips[i].addr)) {
3000 /* IP is still in file */
3006 /* Delete IP ips->ips[i] */
3007 struct ctdb_addr_info_old *pub;
3010 ("IP %s no longer configured, deleting it\n",
3011 ctdb_addr_to_str(&ips->ips[i].addr)));
3013 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3014 CTDB_NO_MEMORY(ctdb, pub);
3016 pub->addr = ips->ips[i].addr;
3020 timeout = TAKEOVER_TIMEOUT();
3022 data.dsize = offsetof(struct ctdb_addr_info_old,
3024 data.dptr = (uint8_t *)pub;
3026 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3027 CTDB_CONTROL_DEL_PUBLIC_IP,
3028 0, data, async_data,
3030 if (state == NULL) {
3033 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3037 ctdb_client_async_add(async_data, state);
3041 /* Compare IPs between node and file for IPs to be added */
3043 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3044 for (i = 0; i < ips->num; i++) {
3045 if (ctdb_same_ip(&vnn->public_address,
3046 &ips->ips[i].addr)) {
3047 /* IP already on node */
3051 if (i == ips->num) {
3052 /* Add IP ips->ips[i] */
3053 struct ctdb_addr_info_old *pub;
3054 const char *ifaces = NULL;
3059 ("New IP %s configured, adding it\n",
3060 ctdb_addr_to_str(&vnn->public_address)));
3062 uint32_t pnn = ctdb_get_pnn(ctdb);
3064 data.dsize = sizeof(pnn);
3065 data.dptr = (uint8_t *)&pnn;
3067 ret = ctdb_client_send_message(
3069 CTDB_BROADCAST_CONNECTED,
3070 CTDB_SRVID_REBALANCE_NODE,
3073 DEBUG(DEBUG_WARNING,
3074 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3080 ifaces = vnn->ifaces[0];
3082 while (vnn->ifaces[iface] != NULL) {
3083 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3084 vnn->ifaces[iface]);
3088 len = strlen(ifaces) + 1;
3089 pub = talloc_zero_size(mem_ctx,
3090 offsetof(struct ctdb_addr_info_old, iface) + len);
3091 CTDB_NO_MEMORY(ctdb, pub);
3093 pub->addr = vnn->public_address;
3094 pub->mask = vnn->public_netmask_bits;
3096 memcpy(&pub->iface[0], ifaces, pub->len);
3098 timeout = TAKEOVER_TIMEOUT();
3100 data.dsize = offsetof(struct ctdb_addr_info_old,
3102 data.dptr = (uint8_t *)pub;
3104 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3105 CTDB_CONTROL_ADD_PUBLIC_IP,
3106 0, data, async_data,
3108 if (state == NULL) {
3111 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3115 ctdb_client_async_add(async_data, state);
3119 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3120 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3124 talloc_free(mem_ctx);
3128 talloc_free(mem_ctx);
3132 /* This control is sent to force the node to re-read the public addresses file
3133 and drop any addresses we should nnot longer host, and add new addresses
3134 that we are now able to host
3136 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3138 struct ctdb_reloadips_handle *h;
3139 pid_t parent = getpid();
3141 if (ctdb->reload_ips != NULL) {
3142 talloc_free(ctdb->reload_ips);
3143 ctdb->reload_ips = NULL;
3146 h = talloc(ctdb, struct ctdb_reloadips_handle);
3147 CTDB_NO_MEMORY(ctdb, h);
3152 if (pipe(h->fd) == -1) {
3153 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3158 h->child = ctdb_fork(ctdb);
3159 if (h->child == (pid_t)-1) {
3160 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3168 if (h->child == 0) {
3169 signed char res = 0;
3172 debug_extra = talloc_asprintf(NULL, "reloadips:");
3174 prctl_set_comment("ctdb_reloadips");
3175 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3176 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3179 res = ctdb_reloadips_child(ctdb);
3181 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3185 sys_write(h->fd[1], &res, 1);
3186 ctdb_wait_for_process_to_exit(parent);
3190 h->c = talloc_steal(h, c);
3193 set_close_on_exec(h->fd[0]);
3195 talloc_set_destructor(h, ctdb_reloadips_destructor);
3198 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3199 ctdb_reloadips_child_handler, (void *)h);
3200 tevent_fd_set_auto_close(h->fde);
3202 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3203 ctdb_reloadips_timeout_event, h);
3205 /* we reply later */
3206 *async_reply = true;