4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 /* These flags are ONLY valid within IP allocation code and must be
37 * cleared to avoid confusing other recovery daemon functions
39 #define NODE_FLAGS_NOIPTAKEOVER 0x01000000 /* can not takeover additional IPs */
40 #define NODE_FLAGS_NOIPHOST 0x02000000 /* can not host IPs */
43 struct ctdb_iface *prev, *next;
49 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
52 return vnn->iface->name;
58 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
62 /* Verify that we dont have an entry for this ip yet */
63 for (i=ctdb->ifaces;i;i=i->next) {
64 if (strcmp(i->name, iface) == 0) {
69 /* create a new structure for this interface */
70 i = talloc_zero(ctdb, struct ctdb_iface);
71 CTDB_NO_MEMORY_FATAL(ctdb, i);
72 i->name = talloc_strdup(i, iface);
73 CTDB_NO_MEMORY(ctdb, i->name);
75 * If link_up defaults to true then IPs can be allocated to a
76 * node during the first recovery. However, then an interface
77 * could have its link marked down during the startup event,
78 * causing the IP to move almost immediately. If link_up
79 * defaults to false then, during normal operation, IPs added
80 * to a new interface can't be assigned until a monitor cycle
81 * has occurred and marked the new interfaces up. This makes
82 * IP allocation unpredictable. The following is a neat
83 * compromise: early in startup link_up defaults to false, so
84 * IPs can't be assigned, and after startup IPs can be
85 * assigned immediately.
87 i->link_up = ctdb->done_startup;
89 DLIST_ADD(ctdb->ifaces, i);
94 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
99 for (n = 0; vnn->ifaces[n] != NULL; n++) {
100 if (strcmp(name, vnn->ifaces[n]) == 0) {
108 /* If any interfaces now have no possible IPs then delete them. This
109 * implementation is naive (i.e. simple) rather than clever
110 * (i.e. complex). Given that this is run on delip and that operation
111 * is rare, this doesn't need to be efficient - it needs to be
112 * foolproof. One alternative is reference counting, where the logic
113 * is distributed and can, therefore, be broken in multiple places.
114 * Another alternative is to build a red-black tree of interfaces that
115 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
116 * once) and then walking ctdb->ifaces once and deleting those not in
117 * the tree. Let's go to one of those if the naive implementation
118 * causes problems... :-)
120 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
121 struct ctdb_vnn *vnn,
124 struct ctdb_iface *i;
126 /* For each interface, check if there's an IP using it. */
127 for(i=ctdb->ifaces; i; i=i->next) {
131 /* Only consider interfaces named in the given VNN. */
132 if (!vnn_has_interface_with_name(vnn, i->name)) {
136 /* Is the "single IP" on this interface? */
137 if ((ctdb->single_ip_vnn != NULL) &&
138 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
139 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
140 /* Found, next interface please... */
143 /* Search for a vnn with this interface. */
145 for (tv=ctdb->vnn; tv; tv=tv->next) {
146 if (vnn_has_interface_with_name(tv, i->name)) {
153 /* None of the VNNs are using this interface. */
154 DLIST_REMOVE(ctdb->ifaces, i);
155 /* Caller will free mem_ctx when convenient. */
156 talloc_steal(mem_ctx, i);
162 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
165 struct ctdb_iface *i;
167 /* Verify that we dont have an entry for this ip yet */
168 for (i=ctdb->ifaces;i;i=i->next) {
169 if (strcmp(i->name, iface) == 0) {
177 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
178 struct ctdb_vnn *vnn)
181 struct ctdb_iface *cur = NULL;
182 struct ctdb_iface *best = NULL;
184 for (i=0; vnn->ifaces[i]; i++) {
186 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
200 if (cur->references < best->references) {
209 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
210 struct ctdb_vnn *vnn)
212 struct ctdb_iface *best = NULL;
215 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
216 "still assigned to iface '%s'\n",
217 ctdb_addr_to_str(&vnn->public_address),
218 ctdb_vnn_iface_string(vnn)));
222 best = ctdb_vnn_best_iface(ctdb, vnn);
224 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
225 "cannot assign to iface any iface\n",
226 ctdb_addr_to_str(&vnn->public_address)));
232 vnn->pnn = ctdb->pnn;
234 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
235 "now assigned to iface '%s' refs[%d]\n",
236 ctdb_addr_to_str(&vnn->public_address),
237 ctdb_vnn_iface_string(vnn),
242 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
243 struct ctdb_vnn *vnn)
245 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
246 "now unassigned (old iface '%s' refs[%d])\n",
247 ctdb_addr_to_str(&vnn->public_address),
248 ctdb_vnn_iface_string(vnn),
249 vnn->iface?vnn->iface->references:0));
251 vnn->iface->references--;
254 if (vnn->pnn == ctdb->pnn) {
259 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
260 struct ctdb_vnn *vnn)
264 if (vnn->iface && vnn->iface->link_up) {
268 for (i=0; vnn->ifaces[i]; i++) {
269 struct ctdb_iface *cur;
271 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
284 struct ctdb_takeover_arp {
285 struct ctdb_context *ctdb;
288 struct ctdb_tcp_array *tcparray;
289 struct ctdb_vnn *vnn;
294 lists of tcp endpoints
296 struct ctdb_tcp_list {
297 struct ctdb_tcp_list *prev, *next;
298 struct ctdb_tcp_connection connection;
302 list of clients to kill on IP release
304 struct ctdb_client_ip {
305 struct ctdb_client_ip *prev, *next;
306 struct ctdb_context *ctdb;
313 send a gratuitous arp
315 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
316 struct timeval t, void *private_data)
318 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
319 struct ctdb_takeover_arp);
321 struct ctdb_tcp_array *tcparray;
322 const char *iface = ctdb_vnn_iface_string(arp->vnn);
324 ret = ctdb_sys_send_arp(&arp->addr, iface);
326 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
327 iface, strerror(errno)));
330 tcparray = arp->tcparray;
332 for (i=0;i<tcparray->num;i++) {
333 struct ctdb_tcp_connection *tcon;
335 tcon = &tcparray->connections[i];
336 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
337 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
338 ctdb_addr_to_str(&tcon->src_addr),
339 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
340 ret = ctdb_sys_send_tcp(
345 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
346 ctdb_addr_to_str(&tcon->src_addr)));
353 if (arp->count == CTDB_ARP_REPEAT) {
358 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
359 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
360 ctdb_control_send_arp, arp);
363 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
364 struct ctdb_vnn *vnn)
366 struct ctdb_takeover_arp *arp;
367 struct ctdb_tcp_array *tcparray;
369 if (!vnn->takeover_ctx) {
370 vnn->takeover_ctx = talloc_new(vnn);
371 if (!vnn->takeover_ctx) {
376 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
382 arp->addr = vnn->public_address;
385 tcparray = vnn->tcp_array;
387 /* add all of the known tcp connections for this IP to the
388 list of tcp connections to send tickle acks for */
389 arp->tcparray = talloc_steal(arp, tcparray);
391 vnn->tcp_array = NULL;
392 vnn->tcp_update_needed = true;
395 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
396 timeval_zero(), ctdb_control_send_arp, arp);
401 struct takeover_callback_state {
402 struct ctdb_req_control *c;
403 ctdb_sock_addr *addr;
404 struct ctdb_vnn *vnn;
407 struct ctdb_do_takeip_state {
408 struct ctdb_req_control *c;
409 struct ctdb_vnn *vnn;
413 called when takeip event finishes
415 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418 struct ctdb_do_takeip_state *state =
419 talloc_get_type(private_data, struct ctdb_do_takeip_state);
424 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
426 if (status == -ETIME) {
429 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430 ctdb_addr_to_str(&state->vnn->public_address),
431 ctdb_vnn_iface_string(state->vnn)));
432 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
434 node->flags |= NODE_FLAGS_UNHEALTHY;
439 if (ctdb->do_checkpublicip) {
441 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
443 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
450 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
451 data.dsize = strlen((char *)data.dptr) + 1;
452 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
454 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
457 /* the control succeeded */
458 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
463 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
465 state->vnn->update_in_flight = false;
470 take over an ip address
472 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
473 struct ctdb_req_control *c,
474 struct ctdb_vnn *vnn)
477 struct ctdb_do_takeip_state *state;
479 if (vnn->update_in_flight) {
480 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
481 "update for this IP already in flight\n",
482 ctdb_addr_to_str(&vnn->public_address),
483 vnn->public_netmask_bits));
487 ret = ctdb_vnn_assign_iface(ctdb, vnn);
489 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
490 "assign a usable interface\n",
491 ctdb_addr_to_str(&vnn->public_address),
492 vnn->public_netmask_bits));
496 state = talloc(vnn, struct ctdb_do_takeip_state);
497 CTDB_NO_MEMORY(ctdb, state);
499 state->c = talloc_steal(ctdb, c);
502 vnn->update_in_flight = true;
503 talloc_set_destructor(state, ctdb_takeip_destructor);
505 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
506 ctdb_addr_to_str(&vnn->public_address),
507 vnn->public_netmask_bits,
508 ctdb_vnn_iface_string(vnn)));
510 ret = ctdb_event_script_callback(ctdb,
512 ctdb_do_takeip_callback,
517 ctdb_vnn_iface_string(vnn),
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits);
522 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
523 ctdb_addr_to_str(&vnn->public_address),
524 ctdb_vnn_iface_string(vnn)));
532 struct ctdb_do_updateip_state {
533 struct ctdb_req_control *c;
534 struct ctdb_iface *old;
535 struct ctdb_vnn *vnn;
539 called when updateip event finishes
541 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
544 struct ctdb_do_updateip_state *state =
545 talloc_get_type(private_data, struct ctdb_do_updateip_state);
549 if (status == -ETIME) {
552 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
553 ctdb_addr_to_str(&state->vnn->public_address),
555 ctdb_vnn_iface_string(state->vnn)));
558 * All we can do is reset the old interface
559 * and let the next run fix it
561 ctdb_vnn_unassign_iface(ctdb, state->vnn);
562 state->vnn->iface = state->old;
563 state->vnn->iface->references++;
565 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
570 if (ctdb->do_checkpublicip) {
572 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
574 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
581 /* the control succeeded */
582 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
587 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
589 state->vnn->update_in_flight = false;
594 update (move) an ip address
596 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
597 struct ctdb_req_control *c,
598 struct ctdb_vnn *vnn)
601 struct ctdb_do_updateip_state *state;
602 struct ctdb_iface *old = vnn->iface;
603 const char *new_name;
605 if (vnn->update_in_flight) {
606 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
607 "update for this IP already in flight\n",
608 ctdb_addr_to_str(&vnn->public_address),
609 vnn->public_netmask_bits));
613 ctdb_vnn_unassign_iface(ctdb, vnn);
614 ret = ctdb_vnn_assign_iface(ctdb, vnn);
616 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
617 "assin a usable interface (old iface '%s')\n",
618 ctdb_addr_to_str(&vnn->public_address),
619 vnn->public_netmask_bits,
624 new_name = ctdb_vnn_iface_string(vnn);
625 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
626 /* A benign update from one interface onto itself.
627 * no need to run the eventscripts in this case, just return
630 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
634 state = talloc(vnn, struct ctdb_do_updateip_state);
635 CTDB_NO_MEMORY(ctdb, state);
637 state->c = talloc_steal(ctdb, c);
641 vnn->update_in_flight = true;
642 talloc_set_destructor(state, ctdb_updateip_destructor);
644 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
645 "interface %s to %s\n",
646 ctdb_addr_to_str(&vnn->public_address),
647 vnn->public_netmask_bits,
651 ret = ctdb_event_script_callback(ctdb,
653 ctdb_do_updateip_callback,
656 CTDB_EVENT_UPDATE_IP,
660 ctdb_addr_to_str(&vnn->public_address),
661 vnn->public_netmask_bits);
663 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
664 ctdb_addr_to_str(&vnn->public_address),
665 old->name, new_name));
674 Find the vnn of the node that has a public ip address
675 returns -1 if the address is not known as a public address
677 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
679 struct ctdb_vnn *vnn;
681 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
682 if (ctdb_same_ip(&vnn->public_address, addr)) {
691 take over an ip address
693 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
694 struct ctdb_req_control *c,
699 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
700 struct ctdb_vnn *vnn;
701 bool have_ip = false;
702 bool do_updateip = false;
703 bool do_takeip = false;
704 struct ctdb_iface *best_iface = NULL;
706 if (pip->pnn != ctdb->pnn) {
707 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
708 "with pnn %d, but we're node %d\n",
709 ctdb_addr_to_str(&pip->addr),
710 pip->pnn, ctdb->pnn));
714 /* update out vnn list */
715 vnn = find_public_ip_vnn(ctdb, &pip->addr);
717 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
718 ctdb_addr_to_str(&pip->addr)));
722 if (ctdb->do_checkpublicip) {
723 have_ip = ctdb_sys_have_ip(&pip->addr);
725 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
726 if (best_iface == NULL) {
727 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
728 "a usable interface (old %s, have_ip %d)\n",
729 ctdb_addr_to_str(&vnn->public_address),
730 vnn->public_netmask_bits,
731 ctdb_vnn_iface_string(vnn),
736 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
737 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
742 if (vnn->iface == NULL && have_ip) {
743 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
744 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
745 ctdb_addr_to_str(&vnn->public_address)));
749 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
750 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
751 "and we have it on iface[%s], but it was assigned to node %d"
752 "and we are node %d, banning ourself\n",
753 ctdb_addr_to_str(&vnn->public_address),
754 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
759 if (vnn->pnn == -1 && have_ip) {
760 vnn->pnn = ctdb->pnn;
761 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762 "and we already have it on iface[%s], update local daemon\n",
763 ctdb_addr_to_str(&vnn->public_address),
764 ctdb_vnn_iface_string(vnn)));
769 if (vnn->iface != best_iface) {
770 if (!vnn->iface->link_up) {
772 } else if (vnn->iface->references > (best_iface->references + 1)) {
773 /* only move when the rebalance gains something */
781 ctdb_vnn_unassign_iface(ctdb, vnn);
788 ret = ctdb_do_takeip(ctdb, c, vnn);
792 } else if (do_updateip) {
793 ret = ctdb_do_updateip(ctdb, c, vnn);
799 * The interface is up and the kernel known the ip
802 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
803 ctdb_addr_to_str(&pip->addr),
804 vnn->public_netmask_bits,
805 ctdb_vnn_iface_string(vnn)));
809 /* tell ctdb_control.c that we will be replying asynchronously */
816 takeover an ip address old v4 style
818 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
819 struct ctdb_req_control *c,
825 data.dsize = sizeof(struct ctdb_public_ip);
826 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
827 CTDB_NO_MEMORY(ctdb, data.dptr);
829 memcpy(data.dptr, indata.dptr, indata.dsize);
830 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
834 kill any clients that are registered with a IP that is being released
836 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
838 struct ctdb_client_ip *ip;
840 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
841 ctdb_addr_to_str(addr)));
843 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
844 ctdb_sock_addr tmp_addr;
847 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
849 ctdb_addr_to_str(&ip->addr)));
851 if (ctdb_same_ip(&tmp_addr, addr)) {
852 struct ctdb_client *client = ctdb_reqid_find(ctdb,
855 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
857 ctdb_addr_to_str(&ip->addr),
860 if (client->pid != 0) {
861 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
862 (unsigned)client->pid,
863 ctdb_addr_to_str(addr),
865 ctdb_kill(ctdb, client->pid, SIGKILL);
872 called when releaseip event finishes
874 static void release_ip_callback(struct ctdb_context *ctdb, int status,
877 struct takeover_callback_state *state =
878 talloc_get_type(private_data, struct takeover_callback_state);
881 if (status == -ETIME) {
885 /* send a message to all clients of this node telling them
886 that the cluster has been reconfigured and they should
887 release any sockets on this IP */
888 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
889 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
890 data.dsize = strlen((char *)data.dptr)+1;
892 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
894 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
896 /* kill clients that have registered with this IP */
897 release_kill_clients(ctdb, state->addr);
899 ctdb_vnn_unassign_iface(ctdb, state->vnn);
901 /* the control succeeded */
902 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
906 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
908 state->vnn->update_in_flight = false;
913 release an ip address
915 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
916 struct ctdb_req_control *c,
921 struct takeover_callback_state *state;
922 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
923 struct ctdb_vnn *vnn;
926 /* update our vnn list */
927 vnn = find_public_ip_vnn(ctdb, &pip->addr);
929 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
930 ctdb_addr_to_str(&pip->addr)));
935 /* stop any previous arps */
936 talloc_free(vnn->takeover_ctx);
937 vnn->takeover_ctx = NULL;
939 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
940 * lazy multicast to drop an IP from any node that isn't the
941 * intended new node. The following causes makes ctdbd ignore
942 * a release for any address it doesn't host.
944 if (ctdb->do_checkpublicip) {
945 if (!ctdb_sys_have_ip(&pip->addr)) {
946 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
947 ctdb_addr_to_str(&pip->addr),
948 vnn->public_netmask_bits,
949 ctdb_vnn_iface_string(vnn)));
950 ctdb_vnn_unassign_iface(ctdb, vnn);
954 if (vnn->iface == NULL) {
955 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
956 ctdb_addr_to_str(&pip->addr),
957 vnn->public_netmask_bits));
962 /* There is a potential race between take_ip and us because we
963 * update the VNN via a callback that run when the
964 * eventscripts have been run. Avoid the race by allowing one
965 * update to be in flight at a time.
967 if (vnn->update_in_flight) {
968 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
969 "update for this IP already in flight\n",
970 ctdb_addr_to_str(&vnn->public_address),
971 vnn->public_netmask_bits));
975 if (ctdb->do_checkpublicip) {
976 iface = ctdb_sys_find_ifname(&pip->addr);
978 DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
982 iface = strdup(ctdb_vnn_iface_string(vnn));
985 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
986 ctdb_addr_to_str(&pip->addr),
987 vnn->public_netmask_bits,
991 state = talloc(ctdb, struct takeover_callback_state);
992 CTDB_NO_MEMORY(ctdb, state);
994 state->c = talloc_steal(state, c);
995 state->addr = talloc(state, ctdb_sock_addr);
996 CTDB_NO_MEMORY(ctdb, state->addr);
997 *state->addr = pip->addr;
1000 vnn->update_in_flight = true;
1001 talloc_set_destructor(state, ctdb_releaseip_destructor);
1003 ret = ctdb_event_script_callback(ctdb,
1004 state, release_ip_callback, state,
1006 CTDB_EVENT_RELEASE_IP,
1009 ctdb_addr_to_str(&pip->addr),
1010 vnn->public_netmask_bits);
1013 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1014 ctdb_addr_to_str(&pip->addr),
1015 ctdb_vnn_iface_string(vnn)));
1020 /* tell the control that we will be reply asynchronously */
1021 *async_reply = true;
1026 release an ip address old v4 style
1028 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
1029 struct ctdb_req_control *c,
1035 data.dsize = sizeof(struct ctdb_public_ip);
1036 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
1037 CTDB_NO_MEMORY(ctdb, data.dptr);
1039 memcpy(data.dptr, indata.dptr, indata.dsize);
1040 return ctdb_control_release_ip(ctdb, c, data, async_reply);
1044 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1045 ctdb_sock_addr *addr,
1046 unsigned mask, const char *ifaces,
1049 struct ctdb_vnn *vnn;
1056 tmp = strdup(ifaces);
1057 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1058 if (!ctdb_sys_check_iface_exists(iface)) {
1059 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1066 /* Verify that we dont have an entry for this ip yet */
1067 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1068 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1069 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1070 ctdb_addr_to_str(addr)));
1075 /* create a new vnn structure for this ip address */
1076 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1077 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1078 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1079 tmp = talloc_strdup(vnn, ifaces);
1080 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1081 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1082 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1083 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1084 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1085 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1089 vnn->ifaces[num] = NULL;
1090 vnn->public_address = *addr;
1091 vnn->public_netmask_bits = mask;
1093 if (check_address) {
1094 if (ctdb_sys_have_ip(addr)) {
1095 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1096 vnn->pnn = ctdb->pnn;
1100 for (i=0; vnn->ifaces[i]; i++) {
1101 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1103 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1104 "for public_address[%s]\n",
1105 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1111 DLIST_ADD(ctdb->vnn, vnn);
1117 setup the event script directory
1119 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
1121 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
1122 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
1126 static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
1127 struct timeval t, void *private_data)
1129 struct ctdb_context *ctdb = talloc_get_type(private_data,
1130 struct ctdb_context);
1131 struct ctdb_vnn *vnn;
1133 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1136 for (i=0; vnn->ifaces[i] != NULL; i++) {
1137 if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
1138 DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
1140 ctdb_addr_to_str(&vnn->public_address)));
1145 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
1146 timeval_current_ofs(30, 0),
1147 ctdb_check_interfaces_event, ctdb);
1151 int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
1153 if (ctdb->check_public_ifaces_ctx != NULL) {
1154 talloc_free(ctdb->check_public_ifaces_ctx);
1155 ctdb->check_public_ifaces_ctx = NULL;
1158 ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
1159 if (ctdb->check_public_ifaces_ctx == NULL) {
1160 ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
1163 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
1164 timeval_current_ofs(30, 0),
1165 ctdb_check_interfaces_event, ctdb);
1172 setup the public address lists from a file
1174 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1180 lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb);
1181 if (lines == NULL) {
1182 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1185 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1189 for (i=0;i<nlines;i++) {
1191 ctdb_sock_addr addr;
1192 const char *addrstr;
1197 while ((*line == ' ') || (*line == '\t')) {
1203 if (strcmp(line, "") == 0) {
1206 tok = strtok(line, " \t");
1208 tok = strtok(NULL, " \t");
1210 if (NULL == ctdb->default_public_interface) {
1211 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1216 ifaces = ctdb->default_public_interface;
1221 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1222 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1226 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1227 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1238 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1242 struct ctdb_vnn *svnn;
1243 struct ctdb_iface *cur = NULL;
1247 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1248 CTDB_NO_MEMORY(ctdb, svnn);
1250 svnn->ifaces = talloc_array(svnn, const char *, 2);
1251 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1252 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1253 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1254 svnn->ifaces[1] = NULL;
1256 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1262 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1264 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1265 "for single_ip[%s]\n",
1267 ctdb_addr_to_str(&svnn->public_address)));
1272 /* assume the single public ip interface is initially "good" */
1273 cur = ctdb_find_iface(ctdb, iface);
1275 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1278 cur->link_up = true;
1280 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1286 ctdb->single_ip_vnn = svnn;
1290 /* Given a physical node, return the number of
1291 public addresses that is currently assigned to this node.
1293 static int node_ip_coverage(struct ctdb_context *ctdb,
1295 struct ctdb_public_ip_list *ips)
1299 for (;ips;ips=ips->next) {
1300 if (ips->pnn == pnn) {
1308 /* Can the given node host the given IP: is the public IP known to the
1309 * node and is NOIPHOST unset?
1311 static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
1312 struct ctdb_node_map *nodemap,
1313 struct ctdb_public_ip_list *ip)
1315 struct ctdb_all_public_ips *public_ips;
1318 if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPHOST) {
1322 public_ips = ctdb->nodes[pnn]->available_public_ips;
1324 if (public_ips == NULL) {
1328 for (i=0;i<public_ips->num;i++) {
1329 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1330 /* yes, this node can serve this public ip */
1338 static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
1339 struct ctdb_node_map *nodemap,
1340 struct ctdb_public_ip_list *ip)
1342 if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPTAKEOVER) {
1346 return can_node_host_ip(ctdb, pnn, nodemap, ip);
1349 /* search the node lists list for a node to takeover this ip.
1350 pick the node that currently are serving the least number of ips
1351 so that the ips get spread out evenly.
1353 static int find_takeover_node(struct ctdb_context *ctdb,
1354 struct ctdb_node_map *nodemap,
1355 struct ctdb_public_ip_list *ip,
1356 struct ctdb_public_ip_list *all_ips)
1358 int pnn, min=0, num;
1362 for (i=0;i<nodemap->num;i++) {
1363 /* verify that this node can serve this ip */
1364 if (!can_node_takeover_ip(ctdb, i, nodemap, ip)) {
1365 /* no it couldnt so skip to the next node */
1369 num = node_ip_coverage(ctdb, i, all_ips);
1370 /* was this the first node we checked ? */
1382 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1383 ctdb_addr_to_str(&ip->addr)));
1393 static uint32_t *ip_key(ctdb_sock_addr *ip)
1395 static uint32_t key[IP_KEYLEN];
1397 bzero(key, sizeof(key));
1399 switch (ip->sa.sa_family) {
1401 key[3] = htonl(ip->ip.sin_addr.s_addr);
1404 uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
1405 key[0] = htonl(s6_a32[0]);
1406 key[1] = htonl(s6_a32[1]);
1407 key[2] = htonl(s6_a32[2]);
1408 key[3] = htonl(s6_a32[3]);
1412 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1419 static void *add_ip_callback(void *parm, void *data)
1421 struct ctdb_public_ip_list *this_ip = parm;
1422 struct ctdb_public_ip_list *prev_ip = data;
1424 if (prev_ip == NULL) {
1427 if (this_ip->pnn == -1) {
1428 this_ip->pnn = prev_ip->pnn;
1434 static int getips_count_callback(void *param, void *data)
1436 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1437 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1439 new_ip->next = *ip_list;
1444 static struct ctdb_public_ip_list *
1445 create_merged_ip_list(struct ctdb_context *ctdb)
1448 struct ctdb_public_ip_list *ip_list;
1449 struct ctdb_all_public_ips *public_ips;
1451 if (ctdb->ip_tree != NULL) {
1452 talloc_free(ctdb->ip_tree);
1453 ctdb->ip_tree = NULL;
1455 ctdb->ip_tree = trbt_create(ctdb, 0);
1457 for (i=0;i<ctdb->num_nodes;i++) {
1458 public_ips = ctdb->nodes[i]->known_public_ips;
1460 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1464 /* there were no public ips for this node */
1465 if (public_ips == NULL) {
1469 for (j=0;j<public_ips->num;j++) {
1470 struct ctdb_public_ip_list *tmp_ip;
1472 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1473 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1474 /* Do not use information about IP addresses hosted
1475 * on other nodes, it may not be accurate */
1476 if (public_ips->ips[j].pnn == ctdb->nodes[i]->pnn) {
1477 tmp_ip->pnn = public_ips->ips[j].pnn;
1481 tmp_ip->addr = public_ips->ips[j].addr;
1482 tmp_ip->next = NULL;
1484 trbt_insertarray32_callback(ctdb->ip_tree,
1485 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1492 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1498 * This is the length of the longtest common prefix between the IPs.
1499 * It is calculated by XOR-ing the 2 IPs together and counting the
1500 * number of leading zeroes. The implementation means that all
1501 * addresses end up being 128 bits long.
1503 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1504 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1505 * lots of nodes and IP addresses?
1507 static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1509 uint32_t ip1_k[IP_KEYLEN];
1514 uint32_t distance = 0;
1516 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1518 for (i=0; i<IP_KEYLEN; i++) {
1519 x = ip1_k[i] ^ t[i];
1523 /* Count number of leading zeroes.
1524 * FIXME? This could be optimised...
1526 while ((x & (1 << 31)) == 0) {
1536 /* Calculate the IP distance for the given IP relative to IPs on the
1537 given node. The ips argument is generally the all_ips variable
1538 used in the main part of the algorithm.
1540 static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1541 struct ctdb_public_ip_list *ips,
1544 struct ctdb_public_ip_list *t;
1549 for (t=ips; t != NULL; t=t->next) {
1550 if (t->pnn != pnn) {
1554 /* Optimisation: We never calculate the distance
1555 * between an address and itself. This allows us to
1556 * calculate the effect of removing an address from a
1557 * node by simply calculating the distance between
1558 * that address and all of the exitsing addresses.
1559 * Moreover, we assume that we're only ever dealing
1560 * with addresses from all_ips so we can identify an
1561 * address via a pointer rather than doing a more
1562 * expensive address comparison. */
1563 if (&(t->addr) == ip) {
1567 d = ip_distance(ip, &(t->addr));
1568 sum += d * d; /* Cheaper than pulling in math.h :-) */
1574 /* Return the LCP2 imbalance metric for addresses currently assigned
1577 static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1579 struct ctdb_public_ip_list *t;
1581 uint32_t imbalance = 0;
1583 for (t=all_ips; t!=NULL; t=t->next) {
1584 if (t->pnn != pnn) {
1587 /* Pass the rest of the IPs rather than the whole
1590 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1596 /* Allocate any unassigned IPs just by looping through the IPs and
1597 * finding the best node for each.
1599 static void basic_allocate_unassigned(struct ctdb_context *ctdb,
1600 struct ctdb_node_map *nodemap,
1601 struct ctdb_public_ip_list *all_ips)
1603 struct ctdb_public_ip_list *tmp_ip;
1605 /* loop over all ip's and find a physical node to cover for
1608 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1609 if (tmp_ip->pnn == -1) {
1610 if (find_takeover_node(ctdb, nodemap, tmp_ip, all_ips)) {
1611 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1612 ctdb_addr_to_str(&tmp_ip->addr)));
1618 /* Basic non-deterministic rebalancing algorithm.
1620 static void basic_failback(struct ctdb_context *ctdb,
1621 struct ctdb_node_map *nodemap,
1622 struct ctdb_public_ip_list *all_ips,
1626 int maxnode, maxnum, minnode, minnum, num, retries;
1627 struct ctdb_public_ip_list *tmp_ip;
1635 /* for each ip address, loop over all nodes that can serve
1636 this ip and make sure that the difference between the node
1637 serving the most and the node serving the least ip's are
1640 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1641 if (tmp_ip->pnn == -1) {
1645 /* Get the highest and lowest number of ips's served by any
1646 valid node which can serve this ip.
1650 for (i=0;i<nodemap->num;i++) {
1651 /* only check nodes that can actually serve this ip */
1652 if (!can_node_takeover_ip(ctdb, i, nodemap, tmp_ip)) {
1653 /* no it couldnt so skip to the next node */
1657 num = node_ip_coverage(ctdb, i, all_ips);
1658 if (maxnode == -1) {
1667 if (minnode == -1) {
1677 if (maxnode == -1) {
1678 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1679 ctdb_addr_to_str(&tmp_ip->addr)));
1684 /* if the spread between the smallest and largest coverage by
1685 a node is >=2 we steal one of the ips from the node with
1686 most coverage to even things out a bit.
1687 try to do this a limited number of times since we dont
1688 want to spend too much time balancing the ip coverage.
1690 if ( (maxnum > minnum+1)
1691 && (retries < (num_ips + 5)) ){
1692 struct ctdb_public_ip_list *tmp;
1694 /* Reassign one of maxnode's VNNs */
1695 for (tmp=all_ips;tmp;tmp=tmp->next) {
1696 if (tmp->pnn == maxnode) {
1697 (void)find_takeover_node(ctdb, nodemap, tmp, all_ips);
1706 struct ctdb_rebalancenodes {
1707 struct ctdb_rebalancenodes *next;
1710 static struct ctdb_rebalancenodes *force_rebalance_list = NULL;
1713 /* set this flag to force the node to be rebalanced even if it just didnt
1714 become healthy again.
1716 void lcp2_forcerebalance(struct ctdb_context *ctdb, uint32_t pnn)
1718 struct ctdb_rebalancenodes *rebalance;
1720 for (rebalance = force_rebalance_list; rebalance; rebalance = rebalance->next) {
1721 if (rebalance->pnn == pnn) {
1726 rebalance = talloc(ctdb, struct ctdb_rebalancenodes);
1727 rebalance->pnn = pnn;
1728 rebalance->next = force_rebalance_list;
1729 force_rebalance_list = rebalance;
1732 /* Do necessary LCP2 initialisation. Bury it in a function here so
1733 * that we can unit test it.
1735 static void lcp2_init(struct ctdb_context * tmp_ctx,
1736 struct ctdb_node_map * nodemap,
1737 struct ctdb_public_ip_list *all_ips,
1738 uint32_t **lcp2_imbalances,
1739 bool **rebalance_candidates)
1742 struct ctdb_public_ip_list *tmp_ip;
1744 *rebalance_candidates = talloc_array(tmp_ctx, bool, nodemap->num);
1745 CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
1746 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1747 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1749 for (i=0;i<nodemap->num;i++) {
1750 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1751 /* First step: assume all nodes are candidates */
1752 (*rebalance_candidates)[i] = true;
1755 /* 2nd step: if a node has IPs assigned then it must have been
1756 * healthy before, so we remove it from consideration. This
1757 * is overkill but is all we have because we don't maintain
1758 * state between takeover runs. An alternative would be to
1759 * keep state and invalidate it every time the recovery master
1762 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1763 if (tmp_ip->pnn != -1) {
1764 (*rebalance_candidates)[tmp_ip->pnn] = false;
1768 /* 3rd step: if a node is forced to re-balance then
1769 we allow failback onto the node */
1770 while (force_rebalance_list != NULL) {
1771 struct ctdb_rebalancenodes *next = force_rebalance_list->next;
1773 if (force_rebalance_list->pnn <= nodemap->num) {
1774 (*rebalance_candidates)[force_rebalance_list->pnn] = true;
1777 DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
1778 talloc_free(force_rebalance_list);
1779 force_rebalance_list = next;
1783 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1784 * the IP/node combination that will cost the least.
1786 static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1787 struct ctdb_node_map *nodemap,
1788 struct ctdb_public_ip_list *all_ips,
1789 uint32_t *lcp2_imbalances)
1791 struct ctdb_public_ip_list *tmp_ip;
1795 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1796 struct ctdb_public_ip_list *minip;
1798 bool should_loop = true;
1799 bool have_unassigned = true;
1801 while (have_unassigned && should_loop) {
1802 should_loop = false;
1804 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1805 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1811 /* loop over each unassigned ip. */
1812 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1813 if (tmp_ip->pnn != -1) {
1817 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1818 /* only check nodes that can actually takeover this ip */
1819 if (!can_node_takeover_ip(ctdb, dstnode,
1821 /* no it couldnt so skip to the next node */
1825 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1826 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1827 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1828 ctdb_addr_to_str(&(tmp_ip->addr)),
1830 dstimbl - lcp2_imbalances[dstnode]));
1833 if ((minnode == -1) || (dstdsum < mindsum)) {
1843 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1845 /* If we found one then assign it to the given node. */
1846 if (minnode != -1) {
1847 minip->pnn = minnode;
1848 lcp2_imbalances[minnode] = minimbl;
1849 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1850 ctdb_addr_to_str(&(minip->addr)),
1855 /* There might be a better way but at least this is clear. */
1856 have_unassigned = false;
1857 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1858 if (tmp_ip->pnn == -1) {
1859 have_unassigned = true;
1864 /* We know if we have an unassigned addresses so we might as
1867 if (have_unassigned) {
1868 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1869 if (tmp_ip->pnn == -1) {
1870 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1871 ctdb_addr_to_str(&tmp_ip->addr)));
1877 /* LCP2 algorithm for rebalancing the cluster. Given a candidate node
1878 * to move IPs from, determines the best IP/destination node
1879 * combination to move from the source node.
1881 static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
1882 struct ctdb_node_map *nodemap,
1883 struct ctdb_public_ip_list *all_ips,
1886 uint32_t *lcp2_imbalances,
1887 bool *rebalance_candidates)
1889 int dstnode, mindstnode;
1890 uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
1891 uint32_t minsrcimbl, mindstimbl;
1892 struct ctdb_public_ip_list *minip;
1893 struct ctdb_public_ip_list *tmp_ip;
1895 /* Find an IP and destination node that best reduces imbalance. */
1901 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1902 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
1904 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1905 /* Only consider addresses on srcnode. */
1906 if (tmp_ip->pnn != srcnode) {
1910 /* What is this IP address costing the source node? */
1911 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1912 srcimbl = candimbl - srcdsum;
1914 /* Consider this IP address would cost each potential
1915 * destination node. Destination nodes are limited to
1916 * those that are newly healthy, since we don't want
1917 * to do gratuitous failover of IPs just to make minor
1918 * balance improvements.
1920 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1921 if (!rebalance_candidates[dstnode]) {
1925 /* only check nodes that can actually takeover this ip */
1926 if (!can_node_takeover_ip(ctdb, dstnode,
1928 /* no it couldnt so skip to the next node */
1932 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1933 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1934 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1935 srcnode, srcimbl - lcp2_imbalances[srcnode],
1936 ctdb_addr_to_str(&(tmp_ip->addr)),
1937 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1939 if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
1940 ((mindstnode == -1) || \
1941 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1944 minsrcimbl = srcimbl;
1945 mindstnode = dstnode;
1946 mindstimbl = dstimbl;
1950 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1952 if (mindstnode != -1) {
1953 /* We found a move that makes things better... */
1954 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1955 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1956 ctdb_addr_to_str(&(minip->addr)),
1957 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1960 lcp2_imbalances[srcnode] = srcimbl;
1961 lcp2_imbalances[mindstnode] = mindstimbl;
1962 minip->pnn = mindstnode;
1971 struct lcp2_imbalance_pnn {
1976 static int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
1978 const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
1979 const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
1981 if (lipa->imbalance > lipb->imbalance) {
1983 } else if (lipa->imbalance == lipb->imbalance) {
1990 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1991 * node with the highest LCP2 imbalance, and then determines the best
1992 * IP/destination node combination to move from the source node.
1994 static void lcp2_failback(struct ctdb_context *ctdb,
1995 struct ctdb_node_map *nodemap,
1996 struct ctdb_public_ip_list *all_ips,
1997 uint32_t *lcp2_imbalances,
1998 bool *rebalance_candidates)
2000 int i, num_rebalance_candidates;
2001 struct lcp2_imbalance_pnn * lips;
2006 /* It is only worth continuing if we have suitable target
2007 * nodes to transfer IPs to. This check is much cheaper than
2010 num_rebalance_candidates = 0;
2011 for (i = 0; i < nodemap->num; i++) {
2012 if (rebalance_candidates[i]) {
2013 num_rebalance_candidates++;
2016 if (num_rebalance_candidates == 0) {
2020 /* Put the imbalances and nodes into an array, sort them and
2021 * iterate through candidates. Usually the 1st one will be
2022 * used, so this doesn't cost much...
2024 lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
2025 for (i = 0; i < nodemap->num; i++) {
2026 lips[i].imbalance = lcp2_imbalances[i];
2029 qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
2030 lcp2_cmp_imbalance_pnn);
2033 for (i = 0; i < nodemap->num; i++) {
2034 /* This means that all nodes had 0 or 1 addresses, so
2035 * can't be imbalanced.
2037 if (lips[i].imbalance == 0) {
2041 if (lcp2_failback_candidate(ctdb,
2047 rebalance_candidates)) {
2059 static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
2060 struct ctdb_node_map *nodemap,
2061 struct ctdb_public_ip_list *all_ips)
2063 struct ctdb_public_ip_list *tmp_ip;
2065 /* verify that the assigned nodes can serve that public ip
2066 and set it to -1 if not
2068 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2069 if (tmp_ip->pnn == -1) {
2072 if (!can_node_host_ip(ctdb, tmp_ip->pnn,
2073 nodemap, tmp_ip) != 0) {
2074 /* this node can not serve this ip. */
2075 DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
2076 ctdb_addr_to_str(&(tmp_ip->addr)),
2083 static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
2084 struct ctdb_node_map *nodemap,
2085 struct ctdb_public_ip_list *all_ips)
2087 struct ctdb_public_ip_list *tmp_ip;
2090 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
2091 /* Allocate IPs to nodes in a modulo fashion so that IPs will
2092 * always be allocated the same way for a specific set of
2093 * available/unavailable nodes.
2096 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
2097 tmp_ip->pnn = i%nodemap->num;
2100 /* IP failback doesn't make sense with deterministic
2101 * IPs, since the modulo step above implicitly fails
2102 * back IPs to their "home" node.
2104 if (1 == ctdb->tunable.no_ip_failback) {
2105 DEBUG(DEBUG_WARNING, ("WARNING: 'NoIPFailback' set but ignored - incompatible with 'DeterministicIPs\n"));
2108 unassign_unsuitable_ips(ctdb, nodemap, all_ips);
2110 basic_allocate_unassigned(ctdb, nodemap, all_ips);
2112 /* No failback here! */
2115 static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
2116 struct ctdb_node_map *nodemap,
2117 struct ctdb_public_ip_list *all_ips)
2119 /* This should be pushed down into basic_failback. */
2120 struct ctdb_public_ip_list *tmp_ip;
2122 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2126 unassign_unsuitable_ips(ctdb, nodemap, all_ips);
2128 basic_allocate_unassigned(ctdb, nodemap, all_ips);
2130 /* If we don't want IPs to fail back then don't rebalance IPs. */
2131 if (1 == ctdb->tunable.no_ip_failback) {
2135 /* Now, try to make sure the ip adresses are evenly distributed
2138 basic_failback(ctdb, nodemap, all_ips, num_ips);
2141 static void ip_alloc_lcp2(struct ctdb_context *ctdb,
2142 struct ctdb_node_map *nodemap,
2143 struct ctdb_public_ip_list *all_ips)
2145 uint32_t *lcp2_imbalances;
2146 bool *rebalance_candidates;
2148 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2150 unassign_unsuitable_ips(ctdb, nodemap, all_ips);
2152 lcp2_init(tmp_ctx, nodemap, all_ips,
2153 &lcp2_imbalances, &rebalance_candidates);
2155 lcp2_allocate_unassigned(ctdb, nodemap, all_ips, lcp2_imbalances);
2157 /* If we don't want IPs to fail back then don't rebalance IPs. */
2158 if (1 == ctdb->tunable.no_ip_failback) {
2162 /* Now, try to make sure the ip adresses are evenly distributed
2165 lcp2_failback(ctdb, nodemap, all_ips,
2166 lcp2_imbalances, rebalance_candidates);
2169 talloc_free(tmp_ctx);
2172 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
2176 /* Count how many completely healthy nodes we have */
2178 for (i=0;i<nodemap->num;i++) {
2179 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
2184 return num_healthy == 0;
2187 /* The calculation part of the IP allocation algorithm. */
2188 static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
2189 struct ctdb_node_map *nodemap,
2190 struct ctdb_public_ip_list **all_ips_p)
2192 /* since nodes only know about those public addresses that
2193 can be served by that particular node, no single node has
2194 a full list of all public addresses that exist in the cluster.
2195 Walk over all node structures and create a merged list of
2196 all public addresses that exist in the cluster.
2198 keep the tree of ips around as ctdb->ip_tree
2200 *all_ips_p = create_merged_ip_list(ctdb);
2202 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
2203 ip_alloc_lcp2(ctdb, nodemap, *all_ips_p);
2204 } else if (1 == ctdb->tunable.deterministic_public_ips) {
2205 ip_alloc_deterministic_ips(ctdb, nodemap, *all_ips_p);
2207 ip_alloc_nondeterministic_ips(ctdb, nodemap, *all_ips_p);
2210 /* at this point ->pnn is the node which will own each IP
2211 or -1 if there is no node that can cover this ip
2217 struct get_tunable_callback_data {
2218 const char *tunable;
2222 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
2223 int32_t res, TDB_DATA outdata,
2226 struct get_tunable_callback_data *cd =
2227 (struct get_tunable_callback_data *)callback;
2232 ("Failure to read \"%s\" tunable from remote node %d\n",
2237 if (outdata.dsize != sizeof(uint32_t)) {
2238 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
2239 cd->tunable, pnn, (int)sizeof(uint32_t),
2240 (int)outdata.dsize));
2244 size = talloc_get_size(cd->out) / sizeof(uint32_t);
2246 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
2247 cd->tunable, pnn, size));
2252 cd->out[pnn] = *(uint32_t *)outdata.dptr;
2255 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
2256 TALLOC_CTX *tmp_ctx,
2257 struct ctdb_node_map *nodemap,
2258 const char *tunable)
2261 struct ctdb_control_get_tunable *t;
2264 struct get_tunable_callback_data callback_data;
2266 tvals = talloc_zero_array(tmp_ctx, uint32_t, nodemap->num);
2267 CTDB_NO_MEMORY_NULL(ctdb, tvals);
2268 callback_data.out = tvals;
2269 callback_data.tunable = tunable;
2271 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
2272 data.dptr = talloc_size(tmp_ctx, data.dsize);
2273 t = (struct ctdb_control_get_tunable *)data.dptr;
2274 t->length = strlen(tunable)+1;
2275 memcpy(t->name, tunable, t->length);
2276 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2277 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
2278 nodes, 0, TAKEOVER_TIMEOUT(),
2280 get_tunable_callback, NULL,
2281 &callback_data) != 0) {
2282 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to get %s tunable failed\n", tunable));
2285 talloc_free(data.dptr);
2290 static void clear_ipflags(struct ctdb_node_map *nodemap)
2294 for (i=0;i<nodemap->num;i++) {
2295 nodemap->nodes[i].flags &=
2296 ~(NODE_FLAGS_NOIPTAKEOVER|NODE_FLAGS_NOIPHOST);
2301 /* Set internal flags for IP allocation:
2303 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
2304 * Set NOIPHOST ip flag for each INACTIVE node
2305 * if all nodes are disabled:
2306 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
2308 * Set NOIPHOST ip flags for disabled nodes
2310 static void set_ipflags_internal(struct ctdb_node_map *nodemap,
2311 uint32_t *tval_noiptakeover,
2312 uint32_t *tval_noiphostonalldisabled)
2316 clear_ipflags(nodemap);
2318 for (i=0;i<nodemap->num;i++) {
2319 /* Can not take IPs on node with NoIPTakeover set */
2320 if (tval_noiptakeover[i] != 0) {
2321 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPTAKEOVER;
2324 /* Can not host IPs on INACTIVE node */
2325 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
2326 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
2330 if (all_nodes_are_disabled(nodemap)) {
2331 /* If all nodes are disabled, can not host IPs on node
2332 * with NoIPHostOnAllDisabled set
2334 for (i=0;i<nodemap->num;i++) {
2335 if (tval_noiphostonalldisabled[i] != 0) {
2336 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
2340 /* If some nodes are not disabled, then can not host
2341 * IPs on DISABLED node
2343 for (i=0;i<nodemap->num;i++) {
2344 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
2345 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
2351 static bool set_ipflags(struct ctdb_context *ctdb,
2352 TALLOC_CTX *tmp_ctx,
2353 struct ctdb_node_map *nodemap)
2355 uint32_t *tval_noiptakeover;
2356 uint32_t *tval_noiphostonalldisabled;
2358 tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
2360 if (tval_noiptakeover == NULL) {
2364 tval_noiphostonalldisabled =
2365 get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
2366 "NoIPHostOnAllDisabled");
2367 if (tval_noiphostonalldisabled == NULL) {
2371 set_ipflags_internal(nodemap,
2372 tval_noiptakeover, tval_noiphostonalldisabled);
2374 talloc_free(tval_noiptakeover);
2375 talloc_free(tval_noiphostonalldisabled);
2381 make any IP alias changes for public addresses that are necessary
2383 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
2384 client_async_callback fail_callback, void *callback_data)
2387 struct ctdb_public_ip ip;
2388 struct ctdb_public_ipv4 ipv4;
2390 struct ctdb_public_ip_list *all_ips, *tmp_ip;
2392 struct timeval timeout;
2393 struct client_async_data *async_data;
2394 struct ctdb_client_control_state *state;
2395 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2396 uint32_t disable_timeout;
2399 * ip failover is completely disabled, just send out the
2400 * ipreallocated event.
2402 if (ctdb->tunable.disable_ip_failover != 0) {
2406 if (!set_ipflags(ctdb, tmp_ctx, nodemap)) {
2407 DEBUG(DEBUG_ERR,("Failed to set IP flags from tunables\n"));
2413 /* Do the IP reassignment calculations */
2414 ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
2416 /* The IP flags need to be cleared because they should never
2417 * be seen outside the IP allocation code.
2419 clear_ipflags(nodemap);
2421 /* The recovery daemon does regular sanity checks of the IPs.
2422 * However, sometimes it is overzealous and thinks changes are
2423 * required when they're already underway. This stops the
2424 * checks for a while before we start moving IPs.
2426 disable_timeout = ctdb->tunable.takeover_timeout;
2427 data.dptr = (uint8_t*)&disable_timeout;
2428 data.dsize = sizeof(disable_timeout);
2429 if (ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
2430 CTDB_SRVID_DISABLE_IP_CHECK, data) != 0) {
2431 DEBUG(DEBUG_INFO,("Failed to disable ip verification\n"));
2434 /* now tell all nodes to delete any alias that they should not
2435 have. This will be a NOOP on nodes that don't currently
2436 hold the given alias */
2437 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2438 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2440 async_data->fail_callback = fail_callback;
2441 async_data->callback_data = callback_data;
2443 for (i=0;i<nodemap->num;i++) {
2444 /* don't talk to unconnected nodes, but do talk to banned nodes */
2445 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
2449 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2450 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
2451 /* This node should be serving this
2452 vnn so dont tell it to release the ip
2456 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2457 ipv4.pnn = tmp_ip->pnn;
2458 ipv4.sin = tmp_ip->addr.ip;
2460 timeout = TAKEOVER_TIMEOUT();
2461 data.dsize = sizeof(ipv4);
2462 data.dptr = (uint8_t *)&ipv4;
2463 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2464 0, CTDB_CONTROL_RELEASE_IPv4, 0,
2468 ip.pnn = tmp_ip->pnn;
2469 ip.addr = tmp_ip->addr;
2471 timeout = TAKEOVER_TIMEOUT();
2472 data.dsize = sizeof(ip);
2473 data.dptr = (uint8_t *)&ip;
2474 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2475 0, CTDB_CONTROL_RELEASE_IP, 0,
2480 if (state == NULL) {
2481 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
2482 talloc_free(tmp_ctx);
2486 ctdb_client_async_add(async_data, state);
2489 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2490 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
2491 talloc_free(tmp_ctx);
2494 talloc_free(async_data);
2497 /* tell all nodes to get their own IPs */
2498 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2499 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2501 async_data->fail_callback = fail_callback;
2502 async_data->callback_data = callback_data;
2504 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2505 if (tmp_ip->pnn == -1) {
2506 /* this IP won't be taken over */
2510 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2511 ipv4.pnn = tmp_ip->pnn;
2512 ipv4.sin = tmp_ip->addr.ip;
2514 timeout = TAKEOVER_TIMEOUT();
2515 data.dsize = sizeof(ipv4);
2516 data.dptr = (uint8_t *)&ipv4;
2517 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2518 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
2522 ip.pnn = tmp_ip->pnn;
2523 ip.addr = tmp_ip->addr;
2525 timeout = TAKEOVER_TIMEOUT();
2526 data.dsize = sizeof(ip);
2527 data.dptr = (uint8_t *)&ip;
2528 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2529 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2533 if (state == NULL) {
2534 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2535 talloc_free(tmp_ctx);
2539 ctdb_client_async_add(async_data, state);
2541 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2542 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2543 talloc_free(tmp_ctx);
2549 * Tell all nodes to run eventscripts to process the
2550 * "ipreallocated" event. This can do a lot of things,
2551 * including restarting services to reconfigure them if public
2552 * IPs have moved. Once upon a time this event only used to
2555 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2556 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
2557 nodes, 0, TAKEOVER_TIMEOUT(),
2559 NULL, fail_callback,
2560 callback_data) != 0) {
2561 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
2564 talloc_free(tmp_ctx);
2570 destroy a ctdb_client_ip structure
2572 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2574 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2575 ctdb_addr_to_str(&ip->addr),
2576 ntohs(ip->addr.ip.sin_port),
2579 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2584 called by a client to inform us of a TCP connection that it is managing
2585 that should tickled with an ACK when IP takeover is done
2586 we handle both the old ipv4 style of packets as well as the new ipv4/6
2589 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2592 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2593 struct ctdb_control_tcp *old_addr = NULL;
2594 struct ctdb_control_tcp_addr new_addr;
2595 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2596 struct ctdb_tcp_list *tcp;
2597 struct ctdb_tcp_connection t;
2600 struct ctdb_client_ip *ip;
2601 struct ctdb_vnn *vnn;
2602 ctdb_sock_addr addr;
2604 switch (indata.dsize) {
2605 case sizeof(struct ctdb_control_tcp):
2606 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2607 ZERO_STRUCT(new_addr);
2608 tcp_sock = &new_addr;
2609 tcp_sock->src.ip = old_addr->src;
2610 tcp_sock->dest.ip = old_addr->dest;
2612 case sizeof(struct ctdb_control_tcp_addr):
2613 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2616 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2617 "to ctdb_control_tcp_client. size was %d but "
2618 "only allowed sizes are %lu and %lu\n",
2620 (long unsigned)sizeof(struct ctdb_control_tcp),
2621 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2625 addr = tcp_sock->src;
2626 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2627 addr = tcp_sock->dest;
2628 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2631 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2632 vnn = find_public_ip_vnn(ctdb, &addr);
2634 switch (addr.sa.sa_family) {
2636 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2637 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2638 ctdb_addr_to_str(&addr)));
2642 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2643 ctdb_addr_to_str(&addr)));
2646 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2652 if (vnn->pnn != ctdb->pnn) {
2653 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2654 ctdb_addr_to_str(&addr),
2655 client_id, client->pid));
2656 /* failing this call will tell smbd to die */
2660 ip = talloc(client, struct ctdb_client_ip);
2661 CTDB_NO_MEMORY(ctdb, ip);
2665 ip->client_id = client_id;
2666 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2667 DLIST_ADD(ctdb->client_ip_list, ip);
2669 tcp = talloc(client, struct ctdb_tcp_list);
2670 CTDB_NO_MEMORY(ctdb, tcp);
2672 tcp->connection.src_addr = tcp_sock->src;
2673 tcp->connection.dst_addr = tcp_sock->dest;
2675 DLIST_ADD(client->tcp_list, tcp);
2677 t.src_addr = tcp_sock->src;
2678 t.dst_addr = tcp_sock->dest;
2680 data.dptr = (uint8_t *)&t;
2681 data.dsize = sizeof(t);
2683 switch (addr.sa.sa_family) {
2685 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2686 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2687 ctdb_addr_to_str(&tcp_sock->src),
2688 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2691 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2692 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2693 ctdb_addr_to_str(&tcp_sock->src),
2694 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2697 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2701 /* tell all nodes about this tcp connection */
2702 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2703 CTDB_CONTROL_TCP_ADD,
2704 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2706 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2714 find a tcp address on a list
2716 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2717 struct ctdb_tcp_connection *tcp)
2721 if (array == NULL) {
2725 for (i=0;i<array->num;i++) {
2726 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2727 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2728 return &array->connections[i];
2737 called by a daemon to inform us of a TCP connection that one of its
2738 clients managing that should tickled with an ACK when IP takeover is
2741 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2743 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2744 struct ctdb_tcp_array *tcparray;
2745 struct ctdb_tcp_connection tcp;
2746 struct ctdb_vnn *vnn;
2748 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2750 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2751 ctdb_addr_to_str(&p->dst_addr)));
2757 tcparray = vnn->tcp_array;
2759 /* If this is the first tickle */
2760 if (tcparray == NULL) {
2761 tcparray = talloc_size(ctdb->nodes,
2762 offsetof(struct ctdb_tcp_array, connections) +
2763 sizeof(struct ctdb_tcp_connection) * 1);
2764 CTDB_NO_MEMORY(ctdb, tcparray);
2765 vnn->tcp_array = tcparray;
2768 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2769 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2771 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2772 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2775 if (tcp_update_needed) {
2776 vnn->tcp_update_needed = true;
2782 /* Do we already have this tickle ?*/
2783 tcp.src_addr = p->src_addr;
2784 tcp.dst_addr = p->dst_addr;
2785 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2786 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2787 ctdb_addr_to_str(&tcp.dst_addr),
2788 ntohs(tcp.dst_addr.ip.sin_port),
2793 /* A new tickle, we must add it to the array */
2794 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2795 struct ctdb_tcp_connection,
2797 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2799 vnn->tcp_array = tcparray;
2800 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2801 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2804 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2805 ctdb_addr_to_str(&tcp.dst_addr),
2806 ntohs(tcp.dst_addr.ip.sin_port),
2809 if (tcp_update_needed) {
2810 vnn->tcp_update_needed = true;
2818 called by a daemon to inform us of a TCP connection that one of its
2819 clients managing that should tickled with an ACK when IP takeover is
2822 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2824 struct ctdb_tcp_connection *tcpp;
2825 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2828 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2829 ctdb_addr_to_str(&conn->dst_addr)));
2833 /* if the array is empty we cant remove it
2834 and we dont need to do anything
2836 if (vnn->tcp_array == NULL) {
2837 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2838 ctdb_addr_to_str(&conn->dst_addr),
2839 ntohs(conn->dst_addr.ip.sin_port)));
2844 /* See if we know this connection
2845 if we dont know this connection then we dont need to do anything
2847 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2849 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2850 ctdb_addr_to_str(&conn->dst_addr),
2851 ntohs(conn->dst_addr.ip.sin_port)));
2856 /* We need to remove this entry from the array.
2857 Instead of allocating a new array and copying data to it
2858 we cheat and just copy the last entry in the existing array
2859 to the entry that is to be removed and just shring the
2862 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2863 vnn->tcp_array->num--;
2865 /* If we deleted the last entry we also need to remove the entire array
2867 if (vnn->tcp_array->num == 0) {
2868 talloc_free(vnn->tcp_array);
2869 vnn->tcp_array = NULL;
2872 vnn->tcp_update_needed = true;
2874 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2875 ctdb_addr_to_str(&conn->src_addr),
2876 ntohs(conn->src_addr.ip.sin_port)));
2881 called by a daemon to inform us of a TCP connection that one of its
2882 clients used are no longer needed in the tickle database
2884 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2886 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2888 ctdb_remove_tcp_connection(ctdb, conn);
2895 called when a daemon restarts - send all tickes for all public addresses
2896 we are serving immediately to the new node.
2898 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2900 /*XXX here we should send all tickes we are serving to the new node */
2906 called when a client structure goes away - hook to remove
2907 elements from the tcp_list in all daemons
2909 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2911 while (client->tcp_list) {
2912 struct ctdb_tcp_list *tcp = client->tcp_list;
2913 DLIST_REMOVE(client->tcp_list, tcp);
2914 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2920 release all IPs on shutdown
2922 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2924 struct ctdb_vnn *vnn;
2926 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2927 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2928 ctdb_vnn_unassign_iface(ctdb, vnn);
2934 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2935 ctdb_vnn_iface_string(vnn),
2936 ctdb_addr_to_str(&vnn->public_address),
2937 vnn->public_netmask_bits);
2938 release_kill_clients(ctdb, &vnn->public_address);
2939 ctdb_vnn_unassign_iface(ctdb, vnn);
2945 get list of public IPs
2947 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2948 struct ctdb_req_control *c, TDB_DATA *outdata)
2951 struct ctdb_all_public_ips *ips;
2952 struct ctdb_vnn *vnn;
2953 bool only_available = false;
2955 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2956 only_available = true;
2959 /* count how many public ip structures we have */
2961 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2965 len = offsetof(struct ctdb_all_public_ips, ips) +
2966 num*sizeof(struct ctdb_public_ip);
2967 ips = talloc_zero_size(outdata, len);
2968 CTDB_NO_MEMORY(ctdb, ips);
2971 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2972 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2975 ips->ips[i].pnn = vnn->pnn;
2976 ips->ips[i].addr = vnn->public_address;
2980 len = offsetof(struct ctdb_all_public_ips, ips) +
2981 i*sizeof(struct ctdb_public_ip);
2983 outdata->dsize = len;
2984 outdata->dptr = (uint8_t *)ips;
2991 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2993 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2994 struct ctdb_req_control *c, TDB_DATA *outdata)
2997 struct ctdb_all_public_ipsv4 *ips;
2998 struct ctdb_vnn *vnn;
3000 /* count how many public ip structures we have */
3002 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3003 if (vnn->public_address.sa.sa_family != AF_INET) {
3009 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
3010 num*sizeof(struct ctdb_public_ipv4);
3011 ips = talloc_zero_size(outdata, len);
3012 CTDB_NO_MEMORY(ctdb, ips);
3014 outdata->dsize = len;
3015 outdata->dptr = (uint8_t *)ips;
3019 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3020 if (vnn->public_address.sa.sa_family != AF_INET) {
3023 ips->ips[i].pnn = vnn->pnn;
3024 ips->ips[i].sin = vnn->public_address.ip;
3031 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
3032 struct ctdb_req_control *c,
3037 ctdb_sock_addr *addr;
3038 struct ctdb_control_public_ip_info *info;
3039 struct ctdb_vnn *vnn;
3041 addr = (ctdb_sock_addr *)indata.dptr;
3043 vnn = find_public_ip_vnn(ctdb, addr);
3045 /* if it is not a public ip it could be our 'single ip' */
3046 if (ctdb->single_ip_vnn) {
3047 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
3048 vnn = ctdb->single_ip_vnn;
3053 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
3054 "'%s'not a public address\n",
3055 ctdb_addr_to_str(addr)));
3059 /* count how many public ip structures we have */
3061 for (;vnn->ifaces[num];) {
3065 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
3066 num*sizeof(struct ctdb_control_iface_info);
3067 info = talloc_zero_size(outdata, len);
3068 CTDB_NO_MEMORY(ctdb, info);
3070 info->ip.addr = vnn->public_address;
3071 info->ip.pnn = vnn->pnn;
3072 info->active_idx = 0xFFFFFFFF;
3074 for (i=0; vnn->ifaces[i]; i++) {
3075 struct ctdb_iface *cur;
3077 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
3079 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
3083 if (vnn->iface == cur) {
3084 info->active_idx = i;
3086 strcpy(info->ifaces[i].name, cur->name);
3087 info->ifaces[i].link_state = cur->link_up;
3088 info->ifaces[i].references = cur->references;
3091 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
3092 i*sizeof(struct ctdb_control_iface_info);
3094 outdata->dsize = len;
3095 outdata->dptr = (uint8_t *)info;
3100 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
3101 struct ctdb_req_control *c,
3105 struct ctdb_control_get_ifaces *ifaces;
3106 struct ctdb_iface *cur;
3108 /* count how many public ip structures we have */
3110 for (cur=ctdb->ifaces;cur;cur=cur->next) {
3114 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
3115 num*sizeof(struct ctdb_control_iface_info);
3116 ifaces = talloc_zero_size(outdata, len);
3117 CTDB_NO_MEMORY(ctdb, ifaces);
3120 for (cur=ctdb->ifaces;cur;cur=cur->next) {
3121 strcpy(ifaces->ifaces[i].name, cur->name);
3122 ifaces->ifaces[i].link_state = cur->link_up;
3123 ifaces->ifaces[i].references = cur->references;
3127 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
3128 i*sizeof(struct ctdb_control_iface_info);
3130 outdata->dsize = len;
3131 outdata->dptr = (uint8_t *)ifaces;
3136 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
3137 struct ctdb_req_control *c,
3140 struct ctdb_control_iface_info *info;
3141 struct ctdb_iface *iface;
3142 bool link_up = false;
3144 info = (struct ctdb_control_iface_info *)indata.dptr;
3146 if (info->name[CTDB_IFACE_SIZE] != '\0') {
3147 int len = strnlen(info->name, CTDB_IFACE_SIZE);
3148 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
3149 len, len, info->name));
3153 switch (info->link_state) {
3161 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
3162 (unsigned int)info->link_state));
3166 if (info->references != 0) {
3167 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
3168 (unsigned int)info->references));
3172 iface = ctdb_find_iface(ctdb, info->name);
3173 if (iface == NULL) {
3177 if (link_up == iface->link_up) {
3181 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
3182 ("iface[%s] has changed it's link status %s => %s\n",
3184 iface->link_up?"up":"down",
3185 link_up?"up":"down"));
3187 iface->link_up = link_up;
3193 structure containing the listening socket and the list of tcp connections
3194 that the ctdb daemon is to kill
3196 struct ctdb_kill_tcp {
3197 struct ctdb_vnn *vnn;
3198 struct ctdb_context *ctdb;
3200 struct fd_event *fde;
3201 trbt_tree_t *connections;
3206 a tcp connection that is to be killed
3208 struct ctdb_killtcp_con {
3209 ctdb_sock_addr src_addr;
3210 ctdb_sock_addr dst_addr;
3212 struct ctdb_kill_tcp *killtcp;
3215 /* this function is used to create a key to represent this socketpair
3216 in the killtcp tree.
3217 this key is used to insert and lookup matching socketpairs that are
3218 to be tickled and RST
3220 #define KILLTCP_KEYLEN 10
3221 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
3223 static uint32_t key[KILLTCP_KEYLEN];
3225 bzero(key, sizeof(key));
3227 if (src->sa.sa_family != dst->sa.sa_family) {
3228 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
3232 switch (src->sa.sa_family) {
3234 key[0] = dst->ip.sin_addr.s_addr;
3235 key[1] = src->ip.sin_addr.s_addr;
3236 key[2] = dst->ip.sin_port;
3237 key[3] = src->ip.sin_port;
3240 uint32_t *dst6_addr32 =
3241 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
3242 uint32_t *src6_addr32 =
3243 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
3244 key[0] = dst6_addr32[3];
3245 key[1] = src6_addr32[3];
3246 key[2] = dst6_addr32[2];
3247 key[3] = src6_addr32[2];
3248 key[4] = dst6_addr32[1];
3249 key[5] = src6_addr32[1];
3250 key[6] = dst6_addr32[0];
3251 key[7] = src6_addr32[0];
3252 key[8] = dst->ip6.sin6_port;
3253 key[9] = src->ip6.sin6_port;
3257 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
3265 called when we get a read event on the raw socket
3267 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
3268 uint16_t flags, void *private_data)
3270 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
3271 struct ctdb_killtcp_con *con;
3272 ctdb_sock_addr src, dst;
3273 uint32_t ack_seq, seq;
3275 if (!(flags & EVENT_FD_READ)) {
3279 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
3280 killtcp->private_data,
3282 &ack_seq, &seq) != 0) {
3283 /* probably a non-tcp ACK packet */
3287 /* check if we have this guy in our list of connections
3290 con = trbt_lookuparray32(killtcp->connections,
3291 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
3293 /* no this was some other packet we can just ignore */
3297 /* This one has been tickled !
3298 now reset him and remove him from the list.
3300 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
3301 ntohs(con->dst_addr.ip.sin_port),
3302 ctdb_addr_to_str(&con->src_addr),
3303 ntohs(con->src_addr.ip.sin_port)));
3305 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
3310 /* when traversing the list of all tcp connections to send tickle acks to
3311 (so that we can capture the ack coming back and kill the connection
3313 this callback is called for each connection we are currently trying to kill
3315 static int tickle_connection_traverse(void *param, void *data)
3317 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
3319 /* have tried too many times, just give up */
3320 if (con->count >= 5) {
3321 /* can't delete in traverse: reparent to delete_cons */
3322 talloc_steal(param, con);
3326 /* othervise, try tickling it again */
3329 (ctdb_sock_addr *)&con->dst_addr,
3330 (ctdb_sock_addr *)&con->src_addr,
3337 called every second until all sentenced connections have been reset
3339 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
3340 struct timeval t, void *private_data)
3342 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
3343 void *delete_cons = talloc_new(NULL);
3345 /* loop over all connections sending tickle ACKs */
3346 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
3348 /* now we've finished traverse, it's safe to do deletion. */
3349 talloc_free(delete_cons);
3351 /* If there are no more connections to kill we can remove the
3352 entire killtcp structure
3354 if ( (killtcp->connections == NULL) ||
3355 (killtcp->connections->root == NULL) ) {
3356 talloc_free(killtcp);
3360 /* try tickling them again in a seconds time
3362 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3363 ctdb_tickle_sentenced_connections, killtcp);
3367 destroy the killtcp structure
3369 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
3371 struct ctdb_vnn *tmpvnn;
3373 /* verify that this vnn is still active */
3374 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
3375 if (tmpvnn == killtcp->vnn) {
3380 if (tmpvnn == NULL) {
3384 if (killtcp->vnn->killtcp != killtcp) {
3388 killtcp->vnn->killtcp = NULL;
3394 /* nothing fancy here, just unconditionally replace any existing
3395 connection structure with the new one.
3397 dont even free the old one if it did exist, that one is talloc_stolen
3398 by the same node in the tree anyway and will be deleted when the new data
3401 static void *add_killtcp_callback(void *parm, void *data)
3407 add a tcp socket to the list of connections we want to RST
3409 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
3413 ctdb_sock_addr src, dst;
3414 struct ctdb_kill_tcp *killtcp;
3415 struct ctdb_killtcp_con *con;
3416 struct ctdb_vnn *vnn;
3418 ctdb_canonicalize_ip(s, &src);
3419 ctdb_canonicalize_ip(d, &dst);
3421 vnn = find_public_ip_vnn(ctdb, &dst);
3423 vnn = find_public_ip_vnn(ctdb, &src);
3426 /* if it is not a public ip it could be our 'single ip' */
3427 if (ctdb->single_ip_vnn) {
3428 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
3429 vnn = ctdb->single_ip_vnn;
3434 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
3438 killtcp = vnn->killtcp;
3440 /* If this is the first connection to kill we must allocate
3443 if (killtcp == NULL) {
3444 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
3445 CTDB_NO_MEMORY(ctdb, killtcp);
3448 killtcp->ctdb = ctdb;
3449 killtcp->capture_fd = -1;
3450 killtcp->connections = trbt_create(killtcp, 0);
3452 vnn->killtcp = killtcp;
3453 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
3458 /* create a structure that describes this connection we want to
3459 RST and store it in killtcp->connections
3461 con = talloc(killtcp, struct ctdb_killtcp_con);
3462 CTDB_NO_MEMORY(ctdb, con);
3463 con->src_addr = src;
3464 con->dst_addr = dst;
3466 con->killtcp = killtcp;
3469 trbt_insertarray32_callback(killtcp->connections,
3470 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
3471 add_killtcp_callback, con);
3474 If we dont have a socket to listen on yet we must create it
3476 if (killtcp->capture_fd == -1) {
3477 const char *iface = ctdb_vnn_iface_string(vnn);
3478 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
3479 if (killtcp->capture_fd == -1) {
3480 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
3481 "socket on iface '%s' for killtcp (%s)\n",
3482 iface, strerror(errno)));
3488 if (killtcp->fde == NULL) {
3489 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
3491 capture_tcp_handler, killtcp);
3492 tevent_fd_set_auto_close(killtcp->fde);
3494 /* We also need to set up some events to tickle all these connections
3495 until they are all reset
3497 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3498 ctdb_tickle_sentenced_connections, killtcp);
3501 /* tickle him once now */
3510 talloc_free(vnn->killtcp);
3511 vnn->killtcp = NULL;
3516 kill a TCP connection.
3518 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
3520 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
3522 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
3526 called by a daemon to inform us of the entire list of TCP tickles for
3527 a particular public address.
3528 this control should only be sent by the node that is currently serving
3529 that public address.
3531 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
3533 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
3534 struct ctdb_tcp_array *tcparray;
3535 struct ctdb_vnn *vnn;
3537 /* We must at least have tickles.num or else we cant verify the size
3538 of the received data blob
3540 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3541 tickles.connections)) {
3542 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
3546 /* verify that the size of data matches what we expect */
3547 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3548 tickles.connections)
3549 + sizeof(struct ctdb_tcp_connection)
3550 * list->tickles.num) {
3551 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3555 vnn = find_public_ip_vnn(ctdb, &list->addr);
3557 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3558 ctdb_addr_to_str(&list->addr)));
3563 /* remove any old ticklelist we might have */
3564 talloc_free(vnn->tcp_array);
3565 vnn->tcp_array = NULL;
3567 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3568 CTDB_NO_MEMORY(ctdb, tcparray);
3570 tcparray->num = list->tickles.num;
3572 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3573 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3575 memcpy(tcparray->connections, &list->tickles.connections[0],
3576 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3578 /* We now have a new fresh tickle list array for this vnn */
3579 vnn->tcp_array = talloc_steal(vnn, tcparray);
3585 called to return the full list of tickles for the puclic address associated
3586 with the provided vnn
3588 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3590 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3591 struct ctdb_control_tcp_tickle_list *list;
3592 struct ctdb_tcp_array *tcparray;
3594 struct ctdb_vnn *vnn;
3596 vnn = find_public_ip_vnn(ctdb, addr);
3598 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3599 ctdb_addr_to_str(addr)));
3604 tcparray = vnn->tcp_array;
3606 num = tcparray->num;
3611 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3612 tickles.connections)
3613 + sizeof(struct ctdb_tcp_connection) * num;
3615 outdata->dptr = talloc_size(outdata, outdata->dsize);
3616 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3617 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3620 list->tickles.num = num;
3622 memcpy(&list->tickles.connections[0], tcparray->connections,
3623 sizeof(struct ctdb_tcp_connection) * num);
3631 set the list of all tcp tickles for a public address
3633 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3634 struct timeval timeout, uint32_t destnode,
3635 ctdb_sock_addr *addr,
3636 struct ctdb_tcp_array *tcparray)
3640 struct ctdb_control_tcp_tickle_list *list;
3643 num = tcparray->num;
3648 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3649 tickles.connections) +
3650 sizeof(struct ctdb_tcp_connection) * num;
3651 data.dptr = talloc_size(ctdb, data.dsize);
3652 CTDB_NO_MEMORY(ctdb, data.dptr);
3654 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3656 list->tickles.num = num;
3658 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3661 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3662 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3663 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3665 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3669 talloc_free(data.dptr);
3676 perform tickle updates if required
3678 static void ctdb_update_tcp_tickles(struct event_context *ev,
3679 struct timed_event *te,
3680 struct timeval t, void *private_data)
3682 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3684 struct ctdb_vnn *vnn;
3686 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3687 /* we only send out updates for public addresses that
3690 if (ctdb->pnn != vnn->pnn) {
3693 /* We only send out the updates if we need to */
3694 if (!vnn->tcp_update_needed) {
3697 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3699 CTDB_BROADCAST_CONNECTED,
3700 &vnn->public_address,
3703 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3704 ctdb_addr_to_str(&vnn->public_address)));
3708 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3709 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3710 ctdb_update_tcp_tickles, ctdb);
3715 start periodic update of tcp tickles
3717 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3719 ctdb->tickle_update_context = talloc_new(ctdb);
3721 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3722 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3723 ctdb_update_tcp_tickles, ctdb);
3729 struct control_gratious_arp {
3730 struct ctdb_context *ctdb;
3731 ctdb_sock_addr addr;
3737 send a control_gratuitous arp
3739 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3740 struct timeval t, void *private_data)
3743 struct control_gratious_arp *arp = talloc_get_type(private_data,
3744 struct control_gratious_arp);
3746 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3748 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3749 arp->iface, strerror(errno)));
3754 if (arp->count == CTDB_ARP_REPEAT) {
3759 event_add_timed(arp->ctdb->ev, arp,
3760 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3761 send_gratious_arp, arp);
3768 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3770 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3771 struct control_gratious_arp *arp;
3773 /* verify the size of indata */
3774 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3775 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3776 (unsigned)indata.dsize,
3777 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3781 ( offsetof(struct ctdb_control_gratious_arp, iface)
3782 + gratious_arp->len ) ){
3784 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3785 "but should be %u bytes\n",
3786 (unsigned)indata.dsize,
3787 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3792 arp = talloc(ctdb, struct control_gratious_arp);
3793 CTDB_NO_MEMORY(ctdb, arp);
3796 arp->addr = gratious_arp->addr;
3797 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3798 CTDB_NO_MEMORY(ctdb, arp->iface);
3801 event_add_timed(arp->ctdb->ev, arp,
3802 timeval_zero(), send_gratious_arp, arp);
3807 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3809 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3812 /* verify the size of indata */
3813 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3814 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3818 ( offsetof(struct ctdb_control_ip_iface, iface)
3821 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3822 "but should be %u bytes\n",
3823 (unsigned)indata.dsize,
3824 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3828 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3831 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3839 called when releaseip event finishes for del_public_address
3841 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3844 talloc_free(private_data);
3847 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3849 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3850 struct ctdb_vnn *vnn;
3853 /* verify the size of indata */
3854 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3855 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3859 ( offsetof(struct ctdb_control_ip_iface, iface)
3862 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3863 "but should be %u bytes\n",
3864 (unsigned)indata.dsize,
3865 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3869 /* walk over all public addresses until we find a match */
3870 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3871 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3872 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
3874 DLIST_REMOVE(ctdb->vnn, vnn);
3875 talloc_steal(mem_ctx, vnn);
3876 ctdb_remove_orphaned_ifaces(ctdb, vnn, mem_ctx);
3877 if (vnn->pnn != ctdb->pnn) {
3878 if (vnn->iface != NULL) {
3879 ctdb_vnn_unassign_iface(ctdb, vnn);
3881 talloc_free(mem_ctx);
3886 ret = ctdb_event_script_callback(ctdb,
3887 mem_ctx, delete_ip_callback, mem_ctx,
3889 CTDB_EVENT_RELEASE_IP,
3891 ctdb_vnn_iface_string(vnn),
3892 ctdb_addr_to_str(&vnn->public_address),
3893 vnn->public_netmask_bits);
3894 if (vnn->iface != NULL) {
3895 ctdb_vnn_unassign_iface(ctdb, vnn);
3908 struct ipreallocated_callback_state {
3909 struct ctdb_req_control *c;
3912 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3913 int status, void *p)
3915 struct ipreallocated_callback_state *state =
3916 talloc_get_type(p, struct ipreallocated_callback_state);
3920 (" \"ipreallocated\" event script failed (status %d)\n",
3922 if (status == -ETIME) {
3923 ctdb_ban_self(ctdb);
3927 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3931 /* A control to run the ipreallocated event */
3932 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3933 struct ctdb_req_control *c,
3937 struct ipreallocated_callback_state *state;
3939 state = talloc(ctdb, struct ipreallocated_callback_state);
3940 CTDB_NO_MEMORY(ctdb, state);
3942 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3944 ret = ctdb_event_script_callback(ctdb, state,
3945 ctdb_ipreallocated_callback, state,
3946 false, CTDB_EVENT_IPREALLOCATED,
3950 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3955 /* tell the control that we will be reply asynchronously */
3956 state->c = talloc_steal(state, c);
3957 *async_reply = true;
3963 /* This function is called from the recovery daemon to verify that a remote
3964 node has the expected ip allocation.
3965 This is verified against ctdb->ip_tree
3967 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
3969 struct ctdb_public_ip_list *tmp_ip;
3972 if (ctdb->ip_tree == NULL) {
3973 /* dont know the expected allocation yet, assume remote node
3982 for (i=0; i<ips->num; i++) {
3983 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3984 if (tmp_ip == NULL) {
3985 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
3989 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3993 if (tmp_ip->pnn != ips->ips[i].pnn) {
3994 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
4002 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
4004 struct ctdb_public_ip_list *tmp_ip;
4006 if (ctdb->ip_tree == NULL) {
4007 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
4011 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
4012 if (tmp_ip == NULL) {
4013 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
4017 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
4018 tmp_ip->pnn = ip->pnn;
4024 struct ctdb_reloadips_handle {
4025 struct ctdb_context *ctdb;
4026 struct ctdb_req_control *c;
4030 struct fd_event *fde;
4033 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
4035 if (h == h->ctdb->reload_ips) {
4036 h->ctdb->reload_ips = NULL;
4039 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
4042 ctdb_kill(h->ctdb, h->child, SIGKILL);
4046 static void ctdb_reloadips_timeout_event(struct event_context *ev,
4047 struct timed_event *te,
4048 struct timeval t, void *private_data)
4050 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
4055 static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde,
4056 uint16_t flags, void *private_data)
4058 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
4063 ret = read(h->fd[0], &res, 1);
4064 if (ret < 1 || res != 0) {
4065 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
4073 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
4075 TALLOC_CTX *mem_ctx = talloc_new(NULL);
4076 struct ctdb_all_public_ips *ips;
4077 struct ctdb_vnn *vnn;
4080 /* read the ip allocation from the local node */
4081 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
4083 DEBUG(DEBUG_ERR, ("Unable to get public ips from local node\n"));
4084 talloc_free(mem_ctx);
4088 /* re-read the public ips file */
4090 if (ctdb_set_public_addresses(ctdb, false) != 0) {
4091 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
4092 talloc_free(mem_ctx);
4097 /* check the previous list of ips and scan for ips that have been
4100 for (i = 0; i < ips->num; i++) {
4101 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
4102 if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
4107 /* we need to delete this ip, no longer available on this node */
4109 struct ctdb_control_ip_iface pub;
4111 DEBUG(DEBUG_NOTICE,("RELOADIPS: IP%s is no longer available on this node. Deleting it.\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4112 pub.addr = ips->ips[i].addr;
4116 ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
4118 DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4125 /* loop over all new ones and check the ones we need to add */
4126 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
4127 for (i = 0; i < ips->num; i++) {
4128 if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
4132 if (i == ips->num) {
4133 struct ctdb_control_ip_iface pub;
4134 const char *ifaces = NULL;
4137 DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
4139 pub.addr = vnn->public_address;
4140 pub.mask = vnn->public_netmask_bits;
4143 ifaces = vnn->ifaces[0];
4145 while (vnn->ifaces[iface] != NULL) {
4146 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
4149 pub.len = strlen(ifaces)+1;
4150 memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
4152 ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
4154 DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
4163 /* This control is sent to force the node to re-read the public addresses file
4164 and drop any addresses we should nnot longer host, and add new addresses
4165 that we are now able to host
4167 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
4169 struct ctdb_reloadips_handle *h;
4170 pid_t parent = getpid();
4172 if (ctdb->reload_ips != NULL) {
4173 talloc_free(ctdb->reload_ips);
4174 ctdb->reload_ips = NULL;
4177 h = talloc(ctdb, struct ctdb_reloadips_handle);
4178 CTDB_NO_MEMORY(ctdb, h);
4183 if (pipe(h->fd) == -1) {
4184 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
4189 h->child = ctdb_fork(ctdb);
4190 if (h->child == (pid_t)-1) {
4191 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
4199 if (h->child == 0) {
4200 signed char res = 0;
4203 debug_extra = talloc_asprintf(NULL, "reloadips:");
4205 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
4206 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
4209 res = ctdb_reloadips_child(ctdb);
4211 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
4215 write(h->fd[1], &res, 1);
4216 /* make sure we die when our parent dies */
4217 while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
4223 h->c = talloc_steal(h, c);
4226 set_close_on_exec(h->fd[0]);
4228 talloc_set_destructor(h, ctdb_reloadips_destructor);
4231 h->fde = event_add_fd(ctdb->ev, h, h->fd[0],
4232 EVENT_FD_READ, ctdb_reloadips_child_handler,
4234 tevent_fd_set_auto_close(h->fde);
4236 event_add_timed(ctdb->ev, h,
4237 timeval_current_ofs(120, 0),
4238 ctdb_reloadips_timeout_event, h);
4240 /* we reply later */
4241 *async_reply = true;