4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
69 * If link_up defaults to true then IPs can be allocated to a
70 * node during the first recovery. However, then an interface
71 * could have its link marked down during the startup event,
72 * causing the IP to move almost immediately. If link_up
73 * defaults to false then, during normal operation, IPs added
74 * to a new interface can't be assigned until a monitor cycle
75 * has occurred and marked the new interfaces up. This makes
76 * IP allocation unpredictable. The following is a neat
77 * compromise: early in startup link_up defaults to false, so
78 * IPs can't be assigned, and after startup IPs can be
79 * assigned immediately.
81 i->link_up = ctdb->done_startup;
83 DLIST_ADD(ctdb->ifaces, i);
88 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
93 for (n = 0; vnn->ifaces[n] != NULL; n++) {
94 if (strcmp(name, vnn->ifaces[n]) == 0) {
102 /* If any interfaces now have no possible IPs then delete them. This
103 * implementation is naive (i.e. simple) rather than clever
104 * (i.e. complex). Given that this is run on delip and that operation
105 * is rare, this doesn't need to be efficient - it needs to be
106 * foolproof. One alternative is reference counting, where the logic
107 * is distributed and can, therefore, be broken in multiple places.
108 * Another alternative is to build a red-black tree of interfaces that
109 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
110 * once) and then walking ctdb->ifaces once and deleting those not in
111 * the tree. Let's go to one of those if the naive implementation
112 * causes problems... :-)
114 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
115 struct ctdb_vnn *vnn,
118 struct ctdb_iface *i;
120 /* For each interface, check if there's an IP using it. */
121 for(i=ctdb->ifaces; i; i=i->next) {
125 /* Only consider interfaces named in the given VNN. */
126 if (!vnn_has_interface_with_name(vnn, i->name)) {
130 /* Is the "single IP" on this interface? */
131 if ((ctdb->single_ip_vnn != NULL) &&
132 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
133 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
134 /* Found, next interface please... */
137 /* Search for a vnn with this interface. */
139 for (tv=ctdb->vnn; tv; tv=tv->next) {
140 if (vnn_has_interface_with_name(tv, i->name)) {
147 /* None of the VNNs are using this interface. */
148 DLIST_REMOVE(ctdb->ifaces, i);
149 /* Caller will free mem_ctx when convenient. */
150 talloc_steal(mem_ctx, i);
156 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
159 struct ctdb_iface *i;
161 /* Verify that we dont have an entry for this ip yet */
162 for (i=ctdb->ifaces;i;i=i->next) {
163 if (strcmp(i->name, iface) == 0) {
171 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
172 struct ctdb_vnn *vnn)
175 struct ctdb_iface *cur = NULL;
176 struct ctdb_iface *best = NULL;
178 for (i=0; vnn->ifaces[i]; i++) {
180 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
194 if (cur->references < best->references) {
203 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
204 struct ctdb_vnn *vnn)
206 struct ctdb_iface *best = NULL;
209 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
210 "still assigned to iface '%s'\n",
211 ctdb_addr_to_str(&vnn->public_address),
212 ctdb_vnn_iface_string(vnn)));
216 best = ctdb_vnn_best_iface(ctdb, vnn);
218 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
219 "cannot assign to iface any iface\n",
220 ctdb_addr_to_str(&vnn->public_address)));
226 vnn->pnn = ctdb->pnn;
228 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
229 "now assigned to iface '%s' refs[%d]\n",
230 ctdb_addr_to_str(&vnn->public_address),
231 ctdb_vnn_iface_string(vnn),
236 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
237 struct ctdb_vnn *vnn)
239 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
240 "now unassigned (old iface '%s' refs[%d])\n",
241 ctdb_addr_to_str(&vnn->public_address),
242 ctdb_vnn_iface_string(vnn),
243 vnn->iface?vnn->iface->references:0));
245 vnn->iface->references--;
248 if (vnn->pnn == ctdb->pnn) {
253 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
254 struct ctdb_vnn *vnn)
258 if (vnn->iface && vnn->iface->link_up) {
262 for (i=0; vnn->ifaces[i]; i++) {
263 struct ctdb_iface *cur;
265 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
278 struct ctdb_takeover_arp {
279 struct ctdb_context *ctdb;
282 struct ctdb_tcp_array *tcparray;
283 struct ctdb_vnn *vnn;
288 lists of tcp endpoints
290 struct ctdb_tcp_list {
291 struct ctdb_tcp_list *prev, *next;
292 struct ctdb_tcp_connection connection;
296 list of clients to kill on IP release
298 struct ctdb_client_ip {
299 struct ctdb_client_ip *prev, *next;
300 struct ctdb_context *ctdb;
307 send a gratuitous arp
309 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
310 struct timeval t, void *private_data)
312 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
313 struct ctdb_takeover_arp);
315 struct ctdb_tcp_array *tcparray;
316 const char *iface = ctdb_vnn_iface_string(arp->vnn);
318 ret = ctdb_sys_send_arp(&arp->addr, iface);
320 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
321 iface, strerror(errno)));
324 tcparray = arp->tcparray;
326 for (i=0;i<tcparray->num;i++) {
327 struct ctdb_tcp_connection *tcon;
329 tcon = &tcparray->connections[i];
330 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
331 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
332 ctdb_addr_to_str(&tcon->src_addr),
333 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
334 ret = ctdb_sys_send_tcp(
339 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
340 ctdb_addr_to_str(&tcon->src_addr)));
347 if (arp->count == CTDB_ARP_REPEAT) {
352 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
353 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
354 ctdb_control_send_arp, arp);
357 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
358 struct ctdb_vnn *vnn)
360 struct ctdb_takeover_arp *arp;
361 struct ctdb_tcp_array *tcparray;
363 if (!vnn->takeover_ctx) {
364 vnn->takeover_ctx = talloc_new(vnn);
365 if (!vnn->takeover_ctx) {
370 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
376 arp->addr = vnn->public_address;
379 tcparray = vnn->tcp_array;
381 /* add all of the known tcp connections for this IP to the
382 list of tcp connections to send tickle acks for */
383 arp->tcparray = talloc_steal(arp, tcparray);
385 vnn->tcp_array = NULL;
386 vnn->tcp_update_needed = true;
389 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
390 timeval_zero(), ctdb_control_send_arp, arp);
395 struct takeover_callback_state {
396 struct ctdb_req_control *c;
397 ctdb_sock_addr *addr;
398 struct ctdb_vnn *vnn;
401 struct ctdb_do_takeip_state {
402 struct ctdb_req_control *c;
403 struct ctdb_vnn *vnn;
407 called when takeip event finishes
409 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
412 struct ctdb_do_takeip_state *state =
413 talloc_get_type(private_data, struct ctdb_do_takeip_state);
418 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
420 if (status == -ETIME) {
423 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
424 ctdb_addr_to_str(&state->vnn->public_address),
425 ctdb_vnn_iface_string(state->vnn)));
426 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
428 node->flags |= NODE_FLAGS_UNHEALTHY;
433 if (ctdb->do_checkpublicip) {
435 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
437 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
444 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
445 data.dsize = strlen((char *)data.dptr) + 1;
446 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
448 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
451 /* the control succeeded */
452 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
457 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
459 state->vnn->update_in_flight = false;
464 take over an ip address
466 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
467 struct ctdb_req_control *c,
468 struct ctdb_vnn *vnn)
471 struct ctdb_do_takeip_state *state;
473 if (vnn->update_in_flight) {
474 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
475 "update for this IP already in flight\n",
476 ctdb_addr_to_str(&vnn->public_address),
477 vnn->public_netmask_bits));
481 ret = ctdb_vnn_assign_iface(ctdb, vnn);
483 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
484 "assign a usable interface\n",
485 ctdb_addr_to_str(&vnn->public_address),
486 vnn->public_netmask_bits));
490 state = talloc(vnn, struct ctdb_do_takeip_state);
491 CTDB_NO_MEMORY(ctdb, state);
493 state->c = talloc_steal(ctdb, c);
496 vnn->update_in_flight = true;
497 talloc_set_destructor(state, ctdb_takeip_destructor);
499 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
500 ctdb_addr_to_str(&vnn->public_address),
501 vnn->public_netmask_bits,
502 ctdb_vnn_iface_string(vnn)));
504 ret = ctdb_event_script_callback(ctdb,
506 ctdb_do_takeip_callback,
511 ctdb_vnn_iface_string(vnn),
512 ctdb_addr_to_str(&vnn->public_address),
513 vnn->public_netmask_bits);
516 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
517 ctdb_addr_to_str(&vnn->public_address),
518 ctdb_vnn_iface_string(vnn)));
526 struct ctdb_do_updateip_state {
527 struct ctdb_req_control *c;
528 struct ctdb_iface *old;
529 struct ctdb_vnn *vnn;
533 called when updateip event finishes
535 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
538 struct ctdb_do_updateip_state *state =
539 talloc_get_type(private_data, struct ctdb_do_updateip_state);
543 if (status == -ETIME) {
546 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
547 ctdb_addr_to_str(&state->vnn->public_address),
549 ctdb_vnn_iface_string(state->vnn)));
552 * All we can do is reset the old interface
553 * and let the next run fix it
555 ctdb_vnn_unassign_iface(ctdb, state->vnn);
556 state->vnn->iface = state->old;
557 state->vnn->iface->references++;
559 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564 if (ctdb->do_checkpublicip) {
566 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
568 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
575 /* the control succeeded */
576 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
583 state->vnn->update_in_flight = false;
588 update (move) an ip address
590 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
591 struct ctdb_req_control *c,
592 struct ctdb_vnn *vnn)
595 struct ctdb_do_updateip_state *state;
596 struct ctdb_iface *old = vnn->iface;
597 const char *new_name;
599 if (vnn->update_in_flight) {
600 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
601 "update for this IP already in flight\n",
602 ctdb_addr_to_str(&vnn->public_address),
603 vnn->public_netmask_bits));
607 ctdb_vnn_unassign_iface(ctdb, vnn);
608 ret = ctdb_vnn_assign_iface(ctdb, vnn);
610 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
611 "assin a usable interface (old iface '%s')\n",
612 ctdb_addr_to_str(&vnn->public_address),
613 vnn->public_netmask_bits,
618 new_name = ctdb_vnn_iface_string(vnn);
619 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
620 /* A benign update from one interface onto itself.
621 * no need to run the eventscripts in this case, just return
624 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
628 state = talloc(vnn, struct ctdb_do_updateip_state);
629 CTDB_NO_MEMORY(ctdb, state);
631 state->c = talloc_steal(ctdb, c);
635 vnn->update_in_flight = true;
636 talloc_set_destructor(state, ctdb_updateip_destructor);
638 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
639 "interface %s to %s\n",
640 ctdb_addr_to_str(&vnn->public_address),
641 vnn->public_netmask_bits,
645 ret = ctdb_event_script_callback(ctdb,
647 ctdb_do_updateip_callback,
650 CTDB_EVENT_UPDATE_IP,
654 ctdb_addr_to_str(&vnn->public_address),
655 vnn->public_netmask_bits);
657 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
658 ctdb_addr_to_str(&vnn->public_address),
659 old->name, new_name));
668 Find the vnn of the node that has a public ip address
669 returns -1 if the address is not known as a public address
671 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
673 struct ctdb_vnn *vnn;
675 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
676 if (ctdb_same_ip(&vnn->public_address, addr)) {
685 take over an ip address
687 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
688 struct ctdb_req_control *c,
693 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
694 struct ctdb_vnn *vnn;
695 bool have_ip = false;
696 bool do_updateip = false;
697 bool do_takeip = false;
698 struct ctdb_iface *best_iface = NULL;
700 if (pip->pnn != ctdb->pnn) {
701 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
702 "with pnn %d, but we're node %d\n",
703 ctdb_addr_to_str(&pip->addr),
704 pip->pnn, ctdb->pnn));
708 /* update out vnn list */
709 vnn = find_public_ip_vnn(ctdb, &pip->addr);
711 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
712 ctdb_addr_to_str(&pip->addr)));
716 if (ctdb->do_checkpublicip) {
717 have_ip = ctdb_sys_have_ip(&pip->addr);
719 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
720 if (best_iface == NULL) {
721 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
722 "a usable interface (old %s, have_ip %d)\n",
723 ctdb_addr_to_str(&vnn->public_address),
724 vnn->public_netmask_bits,
725 ctdb_vnn_iface_string(vnn),
730 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
731 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
736 if (vnn->iface == NULL && have_ip) {
737 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
738 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
739 ctdb_addr_to_str(&vnn->public_address)));
743 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
744 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
745 "and we have it on iface[%s], but it was assigned to node %d"
746 "and we are node %d, banning ourself\n",
747 ctdb_addr_to_str(&vnn->public_address),
748 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
753 if (vnn->pnn == -1 && have_ip) {
754 vnn->pnn = ctdb->pnn;
755 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
756 "and we already have it on iface[%s], update local daemon\n",
757 ctdb_addr_to_str(&vnn->public_address),
758 ctdb_vnn_iface_string(vnn)));
763 if (vnn->iface != best_iface) {
764 if (!vnn->iface->link_up) {
766 } else if (vnn->iface->references > (best_iface->references + 1)) {
767 /* only move when the rebalance gains something */
775 ctdb_vnn_unassign_iface(ctdb, vnn);
782 ret = ctdb_do_takeip(ctdb, c, vnn);
786 } else if (do_updateip) {
787 ret = ctdb_do_updateip(ctdb, c, vnn);
793 * The interface is up and the kernel known the ip
796 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
797 ctdb_addr_to_str(&pip->addr),
798 vnn->public_netmask_bits,
799 ctdb_vnn_iface_string(vnn)));
803 /* tell ctdb_control.c that we will be replying asynchronously */
810 takeover an ip address old v4 style
812 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
813 struct ctdb_req_control *c,
819 data.dsize = sizeof(struct ctdb_public_ip);
820 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
821 CTDB_NO_MEMORY(ctdb, data.dptr);
823 memcpy(data.dptr, indata.dptr, indata.dsize);
824 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
828 kill any clients that are registered with a IP that is being released
830 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
832 struct ctdb_client_ip *ip;
834 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
835 ctdb_addr_to_str(addr)));
837 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
838 ctdb_sock_addr tmp_addr;
841 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
843 ctdb_addr_to_str(&ip->addr)));
845 if (ctdb_same_ip(&tmp_addr, addr)) {
846 struct ctdb_client *client = ctdb_reqid_find(ctdb,
849 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
851 ctdb_addr_to_str(&ip->addr),
854 if (client->pid != 0) {
855 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
856 (unsigned)client->pid,
857 ctdb_addr_to_str(addr),
859 ctdb_kill(ctdb, client->pid, SIGKILL);
866 called when releaseip event finishes
868 static void release_ip_callback(struct ctdb_context *ctdb, int status,
871 struct takeover_callback_state *state =
872 talloc_get_type(private_data, struct takeover_callback_state);
875 if (status == -ETIME) {
879 /* send a message to all clients of this node telling them
880 that the cluster has been reconfigured and they should
881 release any sockets on this IP */
882 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
883 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
884 data.dsize = strlen((char *)data.dptr)+1;
886 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
888 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
890 /* kill clients that have registered with this IP */
891 release_kill_clients(ctdb, state->addr);
893 ctdb_vnn_unassign_iface(ctdb, state->vnn);
895 /* the control succeeded */
896 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
900 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
902 state->vnn->update_in_flight = false;
907 release an ip address
909 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
910 struct ctdb_req_control *c,
915 struct takeover_callback_state *state;
916 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
917 struct ctdb_vnn *vnn;
920 /* update our vnn list */
921 vnn = find_public_ip_vnn(ctdb, &pip->addr);
923 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
924 ctdb_addr_to_str(&pip->addr)));
929 /* stop any previous arps */
930 talloc_free(vnn->takeover_ctx);
931 vnn->takeover_ctx = NULL;
933 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
934 * lazy multicast to drop an IP from any node that isn't the
935 * intended new node. The following causes makes ctdbd ignore
936 * a release for any address it doesn't host.
938 if (ctdb->do_checkpublicip) {
939 if (!ctdb_sys_have_ip(&pip->addr)) {
940 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
941 ctdb_addr_to_str(&pip->addr),
942 vnn->public_netmask_bits,
943 ctdb_vnn_iface_string(vnn)));
944 ctdb_vnn_unassign_iface(ctdb, vnn);
948 if (vnn->iface == NULL) {
949 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
950 ctdb_addr_to_str(&pip->addr),
951 vnn->public_netmask_bits));
956 /* There is a potential race between take_ip and us because we
957 * update the VNN via a callback that run when the
958 * eventscripts have been run. Avoid the race by allowing one
959 * update to be in flight at a time.
961 if (vnn->update_in_flight) {
962 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
963 "update for this IP already in flight\n",
964 ctdb_addr_to_str(&vnn->public_address),
965 vnn->public_netmask_bits));
969 if (ctdb->do_checkpublicip) {
970 iface = ctdb_sys_find_ifname(&pip->addr);
972 DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
976 iface = strdup(ctdb_vnn_iface_string(vnn));
979 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
980 ctdb_addr_to_str(&pip->addr),
981 vnn->public_netmask_bits,
985 state = talloc(ctdb, struct takeover_callback_state);
986 CTDB_NO_MEMORY(ctdb, state);
988 state->c = talloc_steal(state, c);
989 state->addr = talloc(state, ctdb_sock_addr);
990 CTDB_NO_MEMORY(ctdb, state->addr);
991 *state->addr = pip->addr;
994 vnn->update_in_flight = true;
995 talloc_set_destructor(state, ctdb_releaseip_destructor);
997 ret = ctdb_event_script_callback(ctdb,
998 state, release_ip_callback, state,
1000 CTDB_EVENT_RELEASE_IP,
1003 ctdb_addr_to_str(&pip->addr),
1004 vnn->public_netmask_bits);
1007 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1008 ctdb_addr_to_str(&pip->addr),
1009 ctdb_vnn_iface_string(vnn)));
1014 /* tell the control that we will be reply asynchronously */
1015 *async_reply = true;
1020 release an ip address old v4 style
1022 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
1023 struct ctdb_req_control *c,
1029 data.dsize = sizeof(struct ctdb_public_ip);
1030 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
1031 CTDB_NO_MEMORY(ctdb, data.dptr);
1033 memcpy(data.dptr, indata.dptr, indata.dsize);
1034 return ctdb_control_release_ip(ctdb, c, data, async_reply);
1038 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1039 ctdb_sock_addr *addr,
1040 unsigned mask, const char *ifaces,
1043 struct ctdb_vnn *vnn;
1050 tmp = strdup(ifaces);
1051 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1052 if (!ctdb_sys_check_iface_exists(iface)) {
1053 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1060 /* Verify that we dont have an entry for this ip yet */
1061 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1062 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1063 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1064 ctdb_addr_to_str(addr)));
1069 /* create a new vnn structure for this ip address */
1070 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1071 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1072 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1073 tmp = talloc_strdup(vnn, ifaces);
1074 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1075 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1076 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1077 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1078 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1079 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1083 vnn->ifaces[num] = NULL;
1084 vnn->public_address = *addr;
1085 vnn->public_netmask_bits = mask;
1087 if (check_address) {
1088 if (ctdb_sys_have_ip(addr)) {
1089 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1090 vnn->pnn = ctdb->pnn;
1094 for (i=0; vnn->ifaces[i]; i++) {
1095 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1097 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1098 "for public_address[%s]\n",
1099 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1105 DLIST_ADD(ctdb->vnn, vnn);
1111 setup the event script directory
1113 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
1115 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
1116 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
1120 static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
1121 struct timeval t, void *private_data)
1123 struct ctdb_context *ctdb = talloc_get_type(private_data,
1124 struct ctdb_context);
1125 struct ctdb_vnn *vnn;
1127 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1130 for (i=0; vnn->ifaces[i] != NULL; i++) {
1131 if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
1132 DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
1134 ctdb_addr_to_str(&vnn->public_address)));
1139 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
1140 timeval_current_ofs(30, 0),
1141 ctdb_check_interfaces_event, ctdb);
1145 int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
1147 if (ctdb->check_public_ifaces_ctx != NULL) {
1148 talloc_free(ctdb->check_public_ifaces_ctx);
1149 ctdb->check_public_ifaces_ctx = NULL;
1152 ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
1153 if (ctdb->check_public_ifaces_ctx == NULL) {
1154 ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
1157 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
1158 timeval_current_ofs(30, 0),
1159 ctdb_check_interfaces_event, ctdb);
1166 setup the public address lists from a file
1168 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1174 lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb);
1175 if (lines == NULL) {
1176 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1179 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1183 for (i=0;i<nlines;i++) {
1185 ctdb_sock_addr addr;
1186 const char *addrstr;
1191 while ((*line == ' ') || (*line == '\t')) {
1197 if (strcmp(line, "") == 0) {
1200 tok = strtok(line, " \t");
1202 tok = strtok(NULL, " \t");
1204 if (NULL == ctdb->default_public_interface) {
1205 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1210 ifaces = ctdb->default_public_interface;
1215 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1216 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1220 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1221 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1232 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1236 struct ctdb_vnn *svnn;
1237 struct ctdb_iface *cur = NULL;
1241 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1242 CTDB_NO_MEMORY(ctdb, svnn);
1244 svnn->ifaces = talloc_array(svnn, const char *, 2);
1245 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1246 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1247 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1248 svnn->ifaces[1] = NULL;
1250 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1256 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1258 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1259 "for single_ip[%s]\n",
1261 ctdb_addr_to_str(&svnn->public_address)));
1266 /* assume the single public ip interface is initially "good" */
1267 cur = ctdb_find_iface(ctdb, iface);
1269 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1272 cur->link_up = true;
1274 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1280 ctdb->single_ip_vnn = svnn;
1284 /* Given a physical node, return the number of
1285 public addresses that is currently assigned to this node.
1287 static int node_ip_coverage(struct ctdb_context *ctdb,
1289 struct ctdb_public_ip_list *ips)
1293 for (;ips;ips=ips->next) {
1294 if (ips->pnn == pnn) {
1302 /* Can the given node host the given IP: is the public IP known to the
1303 * node and is NOIPHOST unset?
1305 static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
1306 struct ctdb_node_map *nodemap,
1307 struct ctdb_public_ip_list *ip)
1309 struct ctdb_all_public_ips *public_ips;
1312 if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPHOST) {
1316 public_ips = ctdb->nodes[pnn]->available_public_ips;
1318 if (public_ips == NULL) {
1322 for (i=0;i<public_ips->num;i++) {
1323 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1324 /* yes, this node can serve this public ip */
1332 static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
1333 struct ctdb_node_map *nodemap,
1334 struct ctdb_public_ip_list *ip)
1336 if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPTAKEOVER) {
1340 return can_node_host_ip(ctdb, pnn, nodemap, ip);
1343 /* search the node lists list for a node to takeover this ip.
1344 pick the node that currently are serving the least number of ips
1345 so that the ips get spread out evenly.
1347 static int find_takeover_node(struct ctdb_context *ctdb,
1348 struct ctdb_node_map *nodemap,
1349 struct ctdb_public_ip_list *ip,
1350 struct ctdb_public_ip_list *all_ips)
1352 int pnn, min=0, num;
1356 for (i=0;i<nodemap->num;i++) {
1357 /* verify that this node can serve this ip */
1358 if (!can_node_takeover_ip(ctdb, i, nodemap, ip)) {
1359 /* no it couldnt so skip to the next node */
1363 num = node_ip_coverage(ctdb, i, all_ips);
1364 /* was this the first node we checked ? */
1376 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1377 ctdb_addr_to_str(&ip->addr)));
1387 static uint32_t *ip_key(ctdb_sock_addr *ip)
1389 static uint32_t key[IP_KEYLEN];
1391 bzero(key, sizeof(key));
1393 switch (ip->sa.sa_family) {
1395 key[3] = htonl(ip->ip.sin_addr.s_addr);
1398 uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
1399 key[0] = htonl(s6_a32[0]);
1400 key[1] = htonl(s6_a32[1]);
1401 key[2] = htonl(s6_a32[2]);
1402 key[3] = htonl(s6_a32[3]);
1406 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1413 static void *add_ip_callback(void *parm, void *data)
1415 struct ctdb_public_ip_list *this_ip = parm;
1416 struct ctdb_public_ip_list *prev_ip = data;
1418 if (prev_ip == NULL) {
1421 if (this_ip->pnn == -1) {
1422 this_ip->pnn = prev_ip->pnn;
1428 static int getips_count_callback(void *param, void *data)
1430 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1431 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1433 new_ip->next = *ip_list;
1438 static struct ctdb_public_ip_list *
1439 create_merged_ip_list(struct ctdb_context *ctdb)
1442 struct ctdb_public_ip_list *ip_list;
1443 struct ctdb_all_public_ips *public_ips;
1445 if (ctdb->ip_tree != NULL) {
1446 talloc_free(ctdb->ip_tree);
1447 ctdb->ip_tree = NULL;
1449 ctdb->ip_tree = trbt_create(ctdb, 0);
1451 for (i=0;i<ctdb->num_nodes;i++) {
1452 public_ips = ctdb->nodes[i]->known_public_ips;
1454 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1458 /* there were no public ips for this node */
1459 if (public_ips == NULL) {
1463 for (j=0;j<public_ips->num;j++) {
1464 struct ctdb_public_ip_list *tmp_ip;
1466 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1467 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1468 /* Do not use information about IP addresses hosted
1469 * on other nodes, it may not be accurate */
1470 if (public_ips->ips[j].pnn == ctdb->nodes[i]->pnn) {
1471 tmp_ip->pnn = public_ips->ips[j].pnn;
1475 tmp_ip->addr = public_ips->ips[j].addr;
1476 tmp_ip->next = NULL;
1478 trbt_insertarray32_callback(ctdb->ip_tree,
1479 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1486 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1492 * This is the length of the longtest common prefix between the IPs.
1493 * It is calculated by XOR-ing the 2 IPs together and counting the
1494 * number of leading zeroes. The implementation means that all
1495 * addresses end up being 128 bits long.
1497 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1498 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1499 * lots of nodes and IP addresses?
1501 static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1503 uint32_t ip1_k[IP_KEYLEN];
1508 uint32_t distance = 0;
1510 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1512 for (i=0; i<IP_KEYLEN; i++) {
1513 x = ip1_k[i] ^ t[i];
1517 /* Count number of leading zeroes.
1518 * FIXME? This could be optimised...
1520 while ((x & (1 << 31)) == 0) {
1530 /* Calculate the IP distance for the given IP relative to IPs on the
1531 given node. The ips argument is generally the all_ips variable
1532 used in the main part of the algorithm.
1534 static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1535 struct ctdb_public_ip_list *ips,
1538 struct ctdb_public_ip_list *t;
1543 for (t=ips; t != NULL; t=t->next) {
1544 if (t->pnn != pnn) {
1548 /* Optimisation: We never calculate the distance
1549 * between an address and itself. This allows us to
1550 * calculate the effect of removing an address from a
1551 * node by simply calculating the distance between
1552 * that address and all of the exitsing addresses.
1553 * Moreover, we assume that we're only ever dealing
1554 * with addresses from all_ips so we can identify an
1555 * address via a pointer rather than doing a more
1556 * expensive address comparison. */
1557 if (&(t->addr) == ip) {
1561 d = ip_distance(ip, &(t->addr));
1562 sum += d * d; /* Cheaper than pulling in math.h :-) */
1568 /* Return the LCP2 imbalance metric for addresses currently assigned
1571 static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1573 struct ctdb_public_ip_list *t;
1575 uint32_t imbalance = 0;
1577 for (t=all_ips; t!=NULL; t=t->next) {
1578 if (t->pnn != pnn) {
1581 /* Pass the rest of the IPs rather than the whole
1584 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1590 /* Allocate any unassigned IPs just by looping through the IPs and
1591 * finding the best node for each.
1593 static void basic_allocate_unassigned(struct ctdb_context *ctdb,
1594 struct ctdb_node_map *nodemap,
1595 struct ctdb_public_ip_list *all_ips)
1597 struct ctdb_public_ip_list *tmp_ip;
1599 /* loop over all ip's and find a physical node to cover for
1602 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1603 if (tmp_ip->pnn == -1) {
1604 if (find_takeover_node(ctdb, nodemap, tmp_ip, all_ips)) {
1605 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1606 ctdb_addr_to_str(&tmp_ip->addr)));
1612 /* Basic non-deterministic rebalancing algorithm.
1614 static void basic_failback(struct ctdb_context *ctdb,
1615 struct ctdb_node_map *nodemap,
1616 struct ctdb_public_ip_list *all_ips,
1620 int maxnode, maxnum, minnode, minnum, num, retries;
1621 struct ctdb_public_ip_list *tmp_ip;
1629 /* for each ip address, loop over all nodes that can serve
1630 this ip and make sure that the difference between the node
1631 serving the most and the node serving the least ip's are
1634 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1635 if (tmp_ip->pnn == -1) {
1639 /* Get the highest and lowest number of ips's served by any
1640 valid node which can serve this ip.
1644 for (i=0;i<nodemap->num;i++) {
1645 /* only check nodes that can actually serve this ip */
1646 if (!can_node_takeover_ip(ctdb, i, nodemap, tmp_ip)) {
1647 /* no it couldnt so skip to the next node */
1651 num = node_ip_coverage(ctdb, i, all_ips);
1652 if (maxnode == -1) {
1661 if (minnode == -1) {
1671 if (maxnode == -1) {
1672 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1673 ctdb_addr_to_str(&tmp_ip->addr)));
1678 /* if the spread between the smallest and largest coverage by
1679 a node is >=2 we steal one of the ips from the node with
1680 most coverage to even things out a bit.
1681 try to do this a limited number of times since we dont
1682 want to spend too much time balancing the ip coverage.
1684 if ( (maxnum > minnum+1)
1685 && (retries < (num_ips + 5)) ){
1686 struct ctdb_public_ip_list *tmp;
1688 /* Reassign one of maxnode's VNNs */
1689 for (tmp=all_ips;tmp;tmp=tmp->next) {
1690 if (tmp->pnn == maxnode) {
1691 (void)find_takeover_node(ctdb, nodemap, tmp, all_ips);
1700 struct ctdb_rebalancenodes {
1701 struct ctdb_rebalancenodes *next;
1704 static struct ctdb_rebalancenodes *force_rebalance_list = NULL;
1707 /* set this flag to force the node to be rebalanced even if it just didnt
1708 become healthy again.
1710 void lcp2_forcerebalance(struct ctdb_context *ctdb, uint32_t pnn)
1712 struct ctdb_rebalancenodes *rebalance;
1714 for (rebalance = force_rebalance_list; rebalance; rebalance = rebalance->next) {
1715 if (rebalance->pnn == pnn) {
1720 rebalance = talloc(ctdb, struct ctdb_rebalancenodes);
1721 rebalance->pnn = pnn;
1722 rebalance->next = force_rebalance_list;
1723 force_rebalance_list = rebalance;
1726 /* Do necessary LCP2 initialisation. Bury it in a function here so
1727 * that we can unit test it.
1729 static void lcp2_init(struct ctdb_context * tmp_ctx,
1730 struct ctdb_node_map * nodemap,
1731 struct ctdb_public_ip_list *all_ips,
1732 uint32_t **lcp2_imbalances,
1733 bool **rebalance_candidates)
1736 struct ctdb_public_ip_list *tmp_ip;
1738 *rebalance_candidates = talloc_array(tmp_ctx, bool, nodemap->num);
1739 CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
1740 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1741 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1743 for (i=0;i<nodemap->num;i++) {
1744 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1745 /* First step: assume all nodes are candidates */
1746 (*rebalance_candidates)[i] = true;
1749 /* 2nd step: if a node has IPs assigned then it must have been
1750 * healthy before, so we remove it from consideration. This
1751 * is overkill but is all we have because we don't maintain
1752 * state between takeover runs. An alternative would be to
1753 * keep state and invalidate it every time the recovery master
1756 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1757 if (tmp_ip->pnn != -1) {
1758 (*rebalance_candidates)[tmp_ip->pnn] = false;
1762 /* 3rd step: if a node is forced to re-balance then
1763 we allow failback onto the node */
1764 while (force_rebalance_list != NULL) {
1765 struct ctdb_rebalancenodes *next = force_rebalance_list->next;
1767 if (force_rebalance_list->pnn <= nodemap->num) {
1768 (*rebalance_candidates)[force_rebalance_list->pnn] = true;
1771 DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
1772 talloc_free(force_rebalance_list);
1773 force_rebalance_list = next;
1777 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1778 * the IP/node combination that will cost the least.
1780 static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1781 struct ctdb_node_map *nodemap,
1782 struct ctdb_public_ip_list *all_ips,
1783 uint32_t *lcp2_imbalances)
1785 struct ctdb_public_ip_list *tmp_ip;
1789 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1790 struct ctdb_public_ip_list *minip;
1792 bool should_loop = true;
1793 bool have_unassigned = true;
1795 while (have_unassigned && should_loop) {
1796 should_loop = false;
1798 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1799 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1805 /* loop over each unassigned ip. */
1806 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1807 if (tmp_ip->pnn != -1) {
1811 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1812 /* only check nodes that can actually takeover this ip */
1813 if (!can_node_takeover_ip(ctdb, dstnode,
1815 /* no it couldnt so skip to the next node */
1819 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1820 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1821 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1822 ctdb_addr_to_str(&(tmp_ip->addr)),
1824 dstimbl - lcp2_imbalances[dstnode]));
1827 if ((minnode == -1) || (dstdsum < mindsum)) {
1837 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1839 /* If we found one then assign it to the given node. */
1840 if (minnode != -1) {
1841 minip->pnn = minnode;
1842 lcp2_imbalances[minnode] = minimbl;
1843 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1844 ctdb_addr_to_str(&(minip->addr)),
1849 /* There might be a better way but at least this is clear. */
1850 have_unassigned = false;
1851 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1852 if (tmp_ip->pnn == -1) {
1853 have_unassigned = true;
1858 /* We know if we have an unassigned addresses so we might as
1861 if (have_unassigned) {
1862 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1863 if (tmp_ip->pnn == -1) {
1864 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1865 ctdb_addr_to_str(&tmp_ip->addr)));
1871 /* LCP2 algorithm for rebalancing the cluster. Given a candidate node
1872 * to move IPs from, determines the best IP/destination node
1873 * combination to move from the source node.
1875 static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
1876 struct ctdb_node_map *nodemap,
1877 struct ctdb_public_ip_list *all_ips,
1880 uint32_t *lcp2_imbalances,
1881 bool *rebalance_candidates)
1883 int dstnode, mindstnode;
1884 uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
1885 uint32_t minsrcimbl, mindstimbl;
1886 struct ctdb_public_ip_list *minip;
1887 struct ctdb_public_ip_list *tmp_ip;
1889 /* Find an IP and destination node that best reduces imbalance. */
1895 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1896 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
1898 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1899 /* Only consider addresses on srcnode. */
1900 if (tmp_ip->pnn != srcnode) {
1904 /* What is this IP address costing the source node? */
1905 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1906 srcimbl = candimbl - srcdsum;
1908 /* Consider this IP address would cost each potential
1909 * destination node. Destination nodes are limited to
1910 * those that are newly healthy, since we don't want
1911 * to do gratuitous failover of IPs just to make minor
1912 * balance improvements.
1914 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1915 if (!rebalance_candidates[dstnode]) {
1919 /* only check nodes that can actually takeover this ip */
1920 if (!can_node_takeover_ip(ctdb, dstnode,
1922 /* no it couldnt so skip to the next node */
1926 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1927 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1928 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1929 srcnode, srcimbl - lcp2_imbalances[srcnode],
1930 ctdb_addr_to_str(&(tmp_ip->addr)),
1931 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1933 if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
1934 ((mindstnode == -1) || \
1935 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1938 minsrcimbl = srcimbl;
1939 mindstnode = dstnode;
1940 mindstimbl = dstimbl;
1944 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1946 if (mindstnode != -1) {
1947 /* We found a move that makes things better... */
1948 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1949 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1950 ctdb_addr_to_str(&(minip->addr)),
1951 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1954 lcp2_imbalances[srcnode] = srcimbl;
1955 lcp2_imbalances[mindstnode] = mindstimbl;
1956 minip->pnn = mindstnode;
1965 struct lcp2_imbalance_pnn {
1970 static int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
1972 const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
1973 const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
1975 if (lipa->imbalance > lipb->imbalance) {
1977 } else if (lipa->imbalance == lipb->imbalance) {
1984 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1985 * node with the highest LCP2 imbalance, and then determines the best
1986 * IP/destination node combination to move from the source node.
1988 static void lcp2_failback(struct ctdb_context *ctdb,
1989 struct ctdb_node_map *nodemap,
1990 struct ctdb_public_ip_list *all_ips,
1991 uint32_t *lcp2_imbalances,
1992 bool *rebalance_candidates)
1994 int i, num_rebalance_candidates;
1995 struct lcp2_imbalance_pnn * lips;
2000 /* It is only worth continuing if we have suitable target
2001 * nodes to transfer IPs to. This check is much cheaper than
2004 num_rebalance_candidates = 0;
2005 for (i = 0; i < nodemap->num; i++) {
2006 if (rebalance_candidates[i]) {
2007 num_rebalance_candidates++;
2010 if (num_rebalance_candidates == 0) {
2014 /* Put the imbalances and nodes into an array, sort them and
2015 * iterate through candidates. Usually the 1st one will be
2016 * used, so this doesn't cost much...
2018 lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
2019 for (i = 0; i < nodemap->num; i++) {
2020 lips[i].imbalance = lcp2_imbalances[i];
2023 qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
2024 lcp2_cmp_imbalance_pnn);
2027 for (i = 0; i < nodemap->num; i++) {
2028 /* This means that all nodes had 0 or 1 addresses, so
2029 * can't be imbalanced.
2031 if (lips[i].imbalance == 0) {
2035 if (lcp2_failback_candidate(ctdb,
2041 rebalance_candidates)) {
2053 static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
2054 struct ctdb_node_map *nodemap,
2055 struct ctdb_public_ip_list *all_ips)
2057 struct ctdb_public_ip_list *tmp_ip;
2059 /* verify that the assigned nodes can serve that public ip
2060 and set it to -1 if not
2062 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2063 if (tmp_ip->pnn == -1) {
2066 if (!can_node_host_ip(ctdb, tmp_ip->pnn,
2067 nodemap, tmp_ip) != 0) {
2068 /* this node can not serve this ip. */
2069 DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
2070 ctdb_addr_to_str(&(tmp_ip->addr)),
2077 static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
2078 struct ctdb_node_map *nodemap,
2079 struct ctdb_public_ip_list *all_ips)
2081 struct ctdb_public_ip_list *tmp_ip;
2084 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
2085 /* Allocate IPs to nodes in a modulo fashion so that IPs will
2086 * always be allocated the same way for a specific set of
2087 * available/unavailable nodes.
2090 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
2091 tmp_ip->pnn = i%nodemap->num;
2094 /* IP failback doesn't make sense with deterministic
2095 * IPs, since the modulo step above implicitly fails
2096 * back IPs to their "home" node.
2098 if (1 == ctdb->tunable.no_ip_failback) {
2099 DEBUG(DEBUG_WARNING, ("WARNING: 'NoIPFailback' set but ignored - incompatible with 'DeterministicIPs\n"));
2102 unassign_unsuitable_ips(ctdb, nodemap, all_ips);
2104 basic_allocate_unassigned(ctdb, nodemap, all_ips);
2106 /* No failback here! */
2109 static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
2110 struct ctdb_node_map *nodemap,
2111 struct ctdb_public_ip_list *all_ips)
2113 /* This should be pushed down into basic_failback. */
2114 struct ctdb_public_ip_list *tmp_ip;
2116 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2120 unassign_unsuitable_ips(ctdb, nodemap, all_ips);
2122 basic_allocate_unassigned(ctdb, nodemap, all_ips);
2124 /* If we don't want IPs to fail back then don't rebalance IPs. */
2125 if (1 == ctdb->tunable.no_ip_failback) {
2129 /* Now, try to make sure the ip adresses are evenly distributed
2132 basic_failback(ctdb, nodemap, all_ips, num_ips);
2135 static void ip_alloc_lcp2(struct ctdb_context *ctdb,
2136 struct ctdb_node_map *nodemap,
2137 struct ctdb_public_ip_list *all_ips)
2139 uint32_t *lcp2_imbalances;
2140 bool *rebalance_candidates;
2142 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2144 unassign_unsuitable_ips(ctdb, nodemap, all_ips);
2146 lcp2_init(tmp_ctx, nodemap, all_ips,
2147 &lcp2_imbalances, &rebalance_candidates);
2149 lcp2_allocate_unassigned(ctdb, nodemap, all_ips, lcp2_imbalances);
2151 /* If we don't want IPs to fail back then don't rebalance IPs. */
2152 if (1 == ctdb->tunable.no_ip_failback) {
2156 /* Now, try to make sure the ip adresses are evenly distributed
2159 lcp2_failback(ctdb, nodemap, all_ips,
2160 lcp2_imbalances, rebalance_candidates);
2163 talloc_free(tmp_ctx);
2166 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
2170 /* Count how many completely healthy nodes we have */
2172 for (i=0;i<nodemap->num;i++) {
2173 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
2178 return num_healthy == 0;
2181 /* The calculation part of the IP allocation algorithm. */
2182 static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
2183 struct ctdb_node_map *nodemap,
2184 struct ctdb_public_ip_list **all_ips_p)
2186 /* since nodes only know about those public addresses that
2187 can be served by that particular node, no single node has
2188 a full list of all public addresses that exist in the cluster.
2189 Walk over all node structures and create a merged list of
2190 all public addresses that exist in the cluster.
2192 keep the tree of ips around as ctdb->ip_tree
2194 *all_ips_p = create_merged_ip_list(ctdb);
2196 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
2197 ip_alloc_lcp2(ctdb, nodemap, *all_ips_p);
2198 } else if (1 == ctdb->tunable.deterministic_public_ips) {
2199 ip_alloc_deterministic_ips(ctdb, nodemap, *all_ips_p);
2201 ip_alloc_nondeterministic_ips(ctdb, nodemap, *all_ips_p);
2204 /* at this point ->pnn is the node which will own each IP
2205 or -1 if there is no node that can cover this ip
2211 struct get_tunable_callback_data {
2212 const char *tunable;
2216 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
2217 int32_t res, TDB_DATA outdata,
2220 struct get_tunable_callback_data *cd =
2221 (struct get_tunable_callback_data *)callback;
2226 ("Failure to read \"%s\" tunable from remote node %d\n",
2231 if (outdata.dsize != sizeof(uint32_t)) {
2232 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
2233 cd->tunable, pnn, (int)sizeof(uint32_t),
2234 (int)outdata.dsize));
2238 size = talloc_get_size(cd->out) / sizeof(uint32_t);
2240 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
2241 cd->tunable, pnn, size));
2246 cd->out[pnn] = *(uint32_t *)outdata.dptr;
2249 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
2250 TALLOC_CTX *tmp_ctx,
2251 struct ctdb_node_map *nodemap,
2252 const char *tunable)
2255 struct ctdb_control_get_tunable *t;
2258 struct get_tunable_callback_data callback_data;
2260 tvals = talloc_zero_array(tmp_ctx, uint32_t, nodemap->num);
2261 CTDB_NO_MEMORY_NULL(ctdb, tvals);
2262 callback_data.out = tvals;
2263 callback_data.tunable = tunable;
2265 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
2266 data.dptr = talloc_size(tmp_ctx, data.dsize);
2267 t = (struct ctdb_control_get_tunable *)data.dptr;
2268 t->length = strlen(tunable)+1;
2269 memcpy(t->name, tunable, t->length);
2270 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2271 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
2272 nodes, 0, TAKEOVER_TIMEOUT(),
2274 get_tunable_callback, NULL,
2275 &callback_data) != 0) {
2276 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to get %s tunable failed\n", tunable));
2279 talloc_free(data.dptr);
2284 static void clear_ipflags(struct ctdb_node_map *nodemap)
2288 for (i=0;i<nodemap->num;i++) {
2289 nodemap->nodes[i].flags &=
2290 ~(NODE_FLAGS_NOIPTAKEOVER|NODE_FLAGS_NOIPHOST);
2295 /* Set internal flags for IP allocation:
2297 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
2298 * Set NOIPHOST ip flag for each INACTIVE node
2299 * if all nodes are disabled:
2300 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
2302 * Set NOIPHOST ip flags for disabled nodes
2304 static void set_ipflags_internal(struct ctdb_node_map *nodemap,
2305 uint32_t *tval_noiptakeover,
2306 uint32_t *tval_noiphostonalldisabled)
2310 clear_ipflags(nodemap);
2312 for (i=0;i<nodemap->num;i++) {
2313 /* Can not take IPs on node with NoIPTakeover set */
2314 if (tval_noiptakeover[i] != 0) {
2315 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPTAKEOVER;
2318 /* Can not host IPs on INACTIVE node */
2319 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
2320 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
2324 if (all_nodes_are_disabled(nodemap)) {
2325 /* If all nodes are disabled, can not host IPs on node
2326 * with NoIPHostOnAllDisabled set
2328 for (i=0;i<nodemap->num;i++) {
2329 if (tval_noiphostonalldisabled[i] != 0) {
2330 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
2334 /* If some nodes are not disabled, then can not host
2335 * IPs on DISABLED node
2337 for (i=0;i<nodemap->num;i++) {
2338 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
2339 nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
2345 static bool set_ipflags(struct ctdb_context *ctdb,
2346 TALLOC_CTX *tmp_ctx,
2347 struct ctdb_node_map *nodemap)
2349 uint32_t *tval_noiptakeover;
2350 uint32_t *tval_noiphostonalldisabled;
2352 tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
2354 if (tval_noiptakeover == NULL) {
2358 tval_noiphostonalldisabled =
2359 get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
2360 "NoIPHostOnAllDisabled");
2361 if (tval_noiphostonalldisabled == NULL) {
2365 set_ipflags_internal(nodemap,
2366 tval_noiptakeover, tval_noiphostonalldisabled);
2368 talloc_free(tval_noiptakeover);
2369 talloc_free(tval_noiphostonalldisabled);
2375 make any IP alias changes for public addresses that are necessary
2377 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
2378 client_async_callback fail_callback, void *callback_data)
2381 struct ctdb_public_ip ip;
2382 struct ctdb_public_ipv4 ipv4;
2384 struct ctdb_public_ip_list *all_ips, *tmp_ip;
2386 struct timeval timeout;
2387 struct client_async_data *async_data;
2388 struct ctdb_client_control_state *state;
2389 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2390 uint32_t disable_timeout;
2393 * ip failover is completely disabled, just send out the
2394 * ipreallocated event.
2396 if (ctdb->tunable.disable_ip_failover != 0) {
2400 if (!set_ipflags(ctdb, tmp_ctx, nodemap)) {
2401 DEBUG(DEBUG_ERR,("Failed to set IP flags from tunables\n"));
2407 /* Do the IP reassignment calculations */
2408 ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
2410 /* The IP flags need to be cleared because they should never
2411 * be seen outside the IP allocation code.
2413 clear_ipflags(nodemap);
2415 /* The recovery daemon does regular sanity checks of the IPs.
2416 * However, sometimes it is overzealous and thinks changes are
2417 * required when they're already underway. This stops the
2418 * checks for a while before we start moving IPs.
2420 disable_timeout = ctdb->tunable.takeover_timeout;
2421 data.dptr = (uint8_t*)&disable_timeout;
2422 data.dsize = sizeof(disable_timeout);
2423 if (ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
2424 CTDB_SRVID_DISABLE_IP_CHECK, data) != 0) {
2425 DEBUG(DEBUG_INFO,("Failed to disable ip verification\n"));
2428 /* now tell all nodes to delete any alias that they should not
2429 have. This will be a NOOP on nodes that don't currently
2430 hold the given alias */
2431 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2432 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2434 async_data->fail_callback = fail_callback;
2435 async_data->callback_data = callback_data;
2437 for (i=0;i<nodemap->num;i++) {
2438 /* don't talk to unconnected nodes, but do talk to banned nodes */
2439 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
2443 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2444 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
2445 /* This node should be serving this
2446 vnn so dont tell it to release the ip
2450 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2451 ipv4.pnn = tmp_ip->pnn;
2452 ipv4.sin = tmp_ip->addr.ip;
2454 timeout = TAKEOVER_TIMEOUT();
2455 data.dsize = sizeof(ipv4);
2456 data.dptr = (uint8_t *)&ipv4;
2457 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2458 0, CTDB_CONTROL_RELEASE_IPv4, 0,
2462 ip.pnn = tmp_ip->pnn;
2463 ip.addr = tmp_ip->addr;
2465 timeout = TAKEOVER_TIMEOUT();
2466 data.dsize = sizeof(ip);
2467 data.dptr = (uint8_t *)&ip;
2468 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2469 0, CTDB_CONTROL_RELEASE_IP, 0,
2474 if (state == NULL) {
2475 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
2476 talloc_free(tmp_ctx);
2480 ctdb_client_async_add(async_data, state);
2483 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2484 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
2485 talloc_free(tmp_ctx);
2488 talloc_free(async_data);
2491 /* tell all nodes to get their own IPs */
2492 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2493 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2495 async_data->fail_callback = fail_callback;
2496 async_data->callback_data = callback_data;
2498 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2499 if (tmp_ip->pnn == -1) {
2500 /* this IP won't be taken over */
2504 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2505 ipv4.pnn = tmp_ip->pnn;
2506 ipv4.sin = tmp_ip->addr.ip;
2508 timeout = TAKEOVER_TIMEOUT();
2509 data.dsize = sizeof(ipv4);
2510 data.dptr = (uint8_t *)&ipv4;
2511 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2512 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
2516 ip.pnn = tmp_ip->pnn;
2517 ip.addr = tmp_ip->addr;
2519 timeout = TAKEOVER_TIMEOUT();
2520 data.dsize = sizeof(ip);
2521 data.dptr = (uint8_t *)&ip;
2522 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2523 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2527 if (state == NULL) {
2528 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2529 talloc_free(tmp_ctx);
2533 ctdb_client_async_add(async_data, state);
2535 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2536 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2537 talloc_free(tmp_ctx);
2543 * Tell all nodes to run eventscripts to process the
2544 * "ipreallocated" event. This can do a lot of things,
2545 * including restarting services to reconfigure them if public
2546 * IPs have moved. Once upon a time this event only used to
2549 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2550 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
2551 nodes, 0, TAKEOVER_TIMEOUT(),
2553 NULL, fail_callback,
2554 callback_data) != 0) {
2555 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
2558 talloc_free(tmp_ctx);
2564 destroy a ctdb_client_ip structure
2566 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2568 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2569 ctdb_addr_to_str(&ip->addr),
2570 ntohs(ip->addr.ip.sin_port),
2573 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2578 called by a client to inform us of a TCP connection that it is managing
2579 that should tickled with an ACK when IP takeover is done
2580 we handle both the old ipv4 style of packets as well as the new ipv4/6
2583 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2586 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2587 struct ctdb_control_tcp *old_addr = NULL;
2588 struct ctdb_control_tcp_addr new_addr;
2589 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2590 struct ctdb_tcp_list *tcp;
2591 struct ctdb_tcp_connection t;
2594 struct ctdb_client_ip *ip;
2595 struct ctdb_vnn *vnn;
2596 ctdb_sock_addr addr;
2598 switch (indata.dsize) {
2599 case sizeof(struct ctdb_control_tcp):
2600 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2601 ZERO_STRUCT(new_addr);
2602 tcp_sock = &new_addr;
2603 tcp_sock->src.ip = old_addr->src;
2604 tcp_sock->dest.ip = old_addr->dest;
2606 case sizeof(struct ctdb_control_tcp_addr):
2607 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2610 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2611 "to ctdb_control_tcp_client. size was %d but "
2612 "only allowed sizes are %lu and %lu\n",
2614 (long unsigned)sizeof(struct ctdb_control_tcp),
2615 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2619 addr = tcp_sock->src;
2620 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2621 addr = tcp_sock->dest;
2622 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2625 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2626 vnn = find_public_ip_vnn(ctdb, &addr);
2628 switch (addr.sa.sa_family) {
2630 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2631 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2632 ctdb_addr_to_str(&addr)));
2636 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2637 ctdb_addr_to_str(&addr)));
2640 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2646 if (vnn->pnn != ctdb->pnn) {
2647 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2648 ctdb_addr_to_str(&addr),
2649 client_id, client->pid));
2650 /* failing this call will tell smbd to die */
2654 ip = talloc(client, struct ctdb_client_ip);
2655 CTDB_NO_MEMORY(ctdb, ip);
2659 ip->client_id = client_id;
2660 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2661 DLIST_ADD(ctdb->client_ip_list, ip);
2663 tcp = talloc(client, struct ctdb_tcp_list);
2664 CTDB_NO_MEMORY(ctdb, tcp);
2666 tcp->connection.src_addr = tcp_sock->src;
2667 tcp->connection.dst_addr = tcp_sock->dest;
2669 DLIST_ADD(client->tcp_list, tcp);
2671 t.src_addr = tcp_sock->src;
2672 t.dst_addr = tcp_sock->dest;
2674 data.dptr = (uint8_t *)&t;
2675 data.dsize = sizeof(t);
2677 switch (addr.sa.sa_family) {
2679 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2680 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2681 ctdb_addr_to_str(&tcp_sock->src),
2682 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2685 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2686 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2687 ctdb_addr_to_str(&tcp_sock->src),
2688 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2691 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2695 /* tell all nodes about this tcp connection */
2696 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2697 CTDB_CONTROL_TCP_ADD,
2698 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2700 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2708 find a tcp address on a list
2710 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2711 struct ctdb_tcp_connection *tcp)
2715 if (array == NULL) {
2719 for (i=0;i<array->num;i++) {
2720 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2721 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2722 return &array->connections[i];
2731 called by a daemon to inform us of a TCP connection that one of its
2732 clients managing that should tickled with an ACK when IP takeover is
2735 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2737 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2738 struct ctdb_tcp_array *tcparray;
2739 struct ctdb_tcp_connection tcp;
2740 struct ctdb_vnn *vnn;
2742 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2744 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2745 ctdb_addr_to_str(&p->dst_addr)));
2751 tcparray = vnn->tcp_array;
2753 /* If this is the first tickle */
2754 if (tcparray == NULL) {
2755 tcparray = talloc_size(ctdb->nodes,
2756 offsetof(struct ctdb_tcp_array, connections) +
2757 sizeof(struct ctdb_tcp_connection) * 1);
2758 CTDB_NO_MEMORY(ctdb, tcparray);
2759 vnn->tcp_array = tcparray;
2762 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2763 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2765 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2766 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2769 if (tcp_update_needed) {
2770 vnn->tcp_update_needed = true;
2776 /* Do we already have this tickle ?*/
2777 tcp.src_addr = p->src_addr;
2778 tcp.dst_addr = p->dst_addr;
2779 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2780 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2781 ctdb_addr_to_str(&tcp.dst_addr),
2782 ntohs(tcp.dst_addr.ip.sin_port),
2787 /* A new tickle, we must add it to the array */
2788 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2789 struct ctdb_tcp_connection,
2791 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2793 vnn->tcp_array = tcparray;
2794 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2795 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2798 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2799 ctdb_addr_to_str(&tcp.dst_addr),
2800 ntohs(tcp.dst_addr.ip.sin_port),
2803 if (tcp_update_needed) {
2804 vnn->tcp_update_needed = true;
2812 called by a daemon to inform us of a TCP connection that one of its
2813 clients managing that should tickled with an ACK when IP takeover is
2816 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2818 struct ctdb_tcp_connection *tcpp;
2819 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2822 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2823 ctdb_addr_to_str(&conn->dst_addr)));
2827 /* if the array is empty we cant remove it
2828 and we dont need to do anything
2830 if (vnn->tcp_array == NULL) {
2831 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2832 ctdb_addr_to_str(&conn->dst_addr),
2833 ntohs(conn->dst_addr.ip.sin_port)));
2838 /* See if we know this connection
2839 if we dont know this connection then we dont need to do anything
2841 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2843 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2844 ctdb_addr_to_str(&conn->dst_addr),
2845 ntohs(conn->dst_addr.ip.sin_port)));
2850 /* We need to remove this entry from the array.
2851 Instead of allocating a new array and copying data to it
2852 we cheat and just copy the last entry in the existing array
2853 to the entry that is to be removed and just shring the
2856 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2857 vnn->tcp_array->num--;
2859 /* If we deleted the last entry we also need to remove the entire array
2861 if (vnn->tcp_array->num == 0) {
2862 talloc_free(vnn->tcp_array);
2863 vnn->tcp_array = NULL;
2866 vnn->tcp_update_needed = true;
2868 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2869 ctdb_addr_to_str(&conn->src_addr),
2870 ntohs(conn->src_addr.ip.sin_port)));
2875 called by a daemon to inform us of a TCP connection that one of its
2876 clients used are no longer needed in the tickle database
2878 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2880 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2882 ctdb_remove_tcp_connection(ctdb, conn);
2889 called when a daemon restarts - send all tickes for all public addresses
2890 we are serving immediately to the new node.
2892 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2894 /*XXX here we should send all tickes we are serving to the new node */
2900 called when a client structure goes away - hook to remove
2901 elements from the tcp_list in all daemons
2903 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2905 while (client->tcp_list) {
2906 struct ctdb_tcp_list *tcp = client->tcp_list;
2907 DLIST_REMOVE(client->tcp_list, tcp);
2908 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2914 release all IPs on shutdown
2916 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2918 struct ctdb_vnn *vnn;
2920 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2921 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2922 ctdb_vnn_unassign_iface(ctdb, vnn);
2928 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2929 ctdb_vnn_iface_string(vnn),
2930 ctdb_addr_to_str(&vnn->public_address),
2931 vnn->public_netmask_bits);
2932 release_kill_clients(ctdb, &vnn->public_address);
2933 ctdb_vnn_unassign_iface(ctdb, vnn);
2939 get list of public IPs
2941 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2942 struct ctdb_req_control *c, TDB_DATA *outdata)
2945 struct ctdb_all_public_ips *ips;
2946 struct ctdb_vnn *vnn;
2947 bool only_available = false;
2949 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2950 only_available = true;
2953 /* count how many public ip structures we have */
2955 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2959 len = offsetof(struct ctdb_all_public_ips, ips) +
2960 num*sizeof(struct ctdb_public_ip);
2961 ips = talloc_zero_size(outdata, len);
2962 CTDB_NO_MEMORY(ctdb, ips);
2965 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2966 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2969 ips->ips[i].pnn = vnn->pnn;
2970 ips->ips[i].addr = vnn->public_address;
2974 len = offsetof(struct ctdb_all_public_ips, ips) +
2975 i*sizeof(struct ctdb_public_ip);
2977 outdata->dsize = len;
2978 outdata->dptr = (uint8_t *)ips;
2985 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2987 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2988 struct ctdb_req_control *c, TDB_DATA *outdata)
2991 struct ctdb_all_public_ipsv4 *ips;
2992 struct ctdb_vnn *vnn;
2994 /* count how many public ip structures we have */
2996 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2997 if (vnn->public_address.sa.sa_family != AF_INET) {
3003 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
3004 num*sizeof(struct ctdb_public_ipv4);
3005 ips = talloc_zero_size(outdata, len);
3006 CTDB_NO_MEMORY(ctdb, ips);
3008 outdata->dsize = len;
3009 outdata->dptr = (uint8_t *)ips;
3013 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3014 if (vnn->public_address.sa.sa_family != AF_INET) {
3017 ips->ips[i].pnn = vnn->pnn;
3018 ips->ips[i].sin = vnn->public_address.ip;
3025 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
3026 struct ctdb_req_control *c,
3031 ctdb_sock_addr *addr;
3032 struct ctdb_control_public_ip_info *info;
3033 struct ctdb_vnn *vnn;
3035 addr = (ctdb_sock_addr *)indata.dptr;
3037 vnn = find_public_ip_vnn(ctdb, addr);
3039 /* if it is not a public ip it could be our 'single ip' */
3040 if (ctdb->single_ip_vnn) {
3041 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
3042 vnn = ctdb->single_ip_vnn;
3047 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
3048 "'%s'not a public address\n",
3049 ctdb_addr_to_str(addr)));
3053 /* count how many public ip structures we have */
3055 for (;vnn->ifaces[num];) {
3059 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
3060 num*sizeof(struct ctdb_control_iface_info);
3061 info = talloc_zero_size(outdata, len);
3062 CTDB_NO_MEMORY(ctdb, info);
3064 info->ip.addr = vnn->public_address;
3065 info->ip.pnn = vnn->pnn;
3066 info->active_idx = 0xFFFFFFFF;
3068 for (i=0; vnn->ifaces[i]; i++) {
3069 struct ctdb_iface *cur;
3071 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
3073 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
3077 if (vnn->iface == cur) {
3078 info->active_idx = i;
3080 strcpy(info->ifaces[i].name, cur->name);
3081 info->ifaces[i].link_state = cur->link_up;
3082 info->ifaces[i].references = cur->references;
3085 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
3086 i*sizeof(struct ctdb_control_iface_info);
3088 outdata->dsize = len;
3089 outdata->dptr = (uint8_t *)info;
3094 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
3095 struct ctdb_req_control *c,
3099 struct ctdb_control_get_ifaces *ifaces;
3100 struct ctdb_iface *cur;
3102 /* count how many public ip structures we have */
3104 for (cur=ctdb->ifaces;cur;cur=cur->next) {
3108 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
3109 num*sizeof(struct ctdb_control_iface_info);
3110 ifaces = talloc_zero_size(outdata, len);
3111 CTDB_NO_MEMORY(ctdb, ifaces);
3114 for (cur=ctdb->ifaces;cur;cur=cur->next) {
3115 strcpy(ifaces->ifaces[i].name, cur->name);
3116 ifaces->ifaces[i].link_state = cur->link_up;
3117 ifaces->ifaces[i].references = cur->references;
3121 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
3122 i*sizeof(struct ctdb_control_iface_info);
3124 outdata->dsize = len;
3125 outdata->dptr = (uint8_t *)ifaces;
3130 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
3131 struct ctdb_req_control *c,
3134 struct ctdb_control_iface_info *info;
3135 struct ctdb_iface *iface;
3136 bool link_up = false;
3138 info = (struct ctdb_control_iface_info *)indata.dptr;
3140 if (info->name[CTDB_IFACE_SIZE] != '\0') {
3141 int len = strnlen(info->name, CTDB_IFACE_SIZE);
3142 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
3143 len, len, info->name));
3147 switch (info->link_state) {
3155 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
3156 (unsigned int)info->link_state));
3160 if (info->references != 0) {
3161 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
3162 (unsigned int)info->references));
3166 iface = ctdb_find_iface(ctdb, info->name);
3167 if (iface == NULL) {
3171 if (link_up == iface->link_up) {
3175 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
3176 ("iface[%s] has changed it's link status %s => %s\n",
3178 iface->link_up?"up":"down",
3179 link_up?"up":"down"));
3181 iface->link_up = link_up;
3187 structure containing the listening socket and the list of tcp connections
3188 that the ctdb daemon is to kill
3190 struct ctdb_kill_tcp {
3191 struct ctdb_vnn *vnn;
3192 struct ctdb_context *ctdb;
3194 struct fd_event *fde;
3195 trbt_tree_t *connections;
3200 a tcp connection that is to be killed
3202 struct ctdb_killtcp_con {
3203 ctdb_sock_addr src_addr;
3204 ctdb_sock_addr dst_addr;
3206 struct ctdb_kill_tcp *killtcp;
3209 /* this function is used to create a key to represent this socketpair
3210 in the killtcp tree.
3211 this key is used to insert and lookup matching socketpairs that are
3212 to be tickled and RST
3214 #define KILLTCP_KEYLEN 10
3215 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
3217 static uint32_t key[KILLTCP_KEYLEN];
3219 bzero(key, sizeof(key));
3221 if (src->sa.sa_family != dst->sa.sa_family) {
3222 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
3226 switch (src->sa.sa_family) {
3228 key[0] = dst->ip.sin_addr.s_addr;
3229 key[1] = src->ip.sin_addr.s_addr;
3230 key[2] = dst->ip.sin_port;
3231 key[3] = src->ip.sin_port;
3234 uint32_t *dst6_addr32 =
3235 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
3236 uint32_t *src6_addr32 =
3237 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
3238 key[0] = dst6_addr32[3];
3239 key[1] = src6_addr32[3];
3240 key[2] = dst6_addr32[2];
3241 key[3] = src6_addr32[2];
3242 key[4] = dst6_addr32[1];
3243 key[5] = src6_addr32[1];
3244 key[6] = dst6_addr32[0];
3245 key[7] = src6_addr32[0];
3246 key[8] = dst->ip6.sin6_port;
3247 key[9] = src->ip6.sin6_port;
3251 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
3259 called when we get a read event on the raw socket
3261 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
3262 uint16_t flags, void *private_data)
3264 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
3265 struct ctdb_killtcp_con *con;
3266 ctdb_sock_addr src, dst;
3267 uint32_t ack_seq, seq;
3269 if (!(flags & EVENT_FD_READ)) {
3273 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
3274 killtcp->private_data,
3276 &ack_seq, &seq) != 0) {
3277 /* probably a non-tcp ACK packet */
3281 /* check if we have this guy in our list of connections
3284 con = trbt_lookuparray32(killtcp->connections,
3285 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
3287 /* no this was some other packet we can just ignore */
3291 /* This one has been tickled !
3292 now reset him and remove him from the list.
3294 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
3295 ntohs(con->dst_addr.ip.sin_port),
3296 ctdb_addr_to_str(&con->src_addr),
3297 ntohs(con->src_addr.ip.sin_port)));
3299 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
3304 /* when traversing the list of all tcp connections to send tickle acks to
3305 (so that we can capture the ack coming back and kill the connection
3307 this callback is called for each connection we are currently trying to kill
3309 static int tickle_connection_traverse(void *param, void *data)
3311 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
3313 /* have tried too many times, just give up */
3314 if (con->count >= 5) {
3315 /* can't delete in traverse: reparent to delete_cons */
3316 talloc_steal(param, con);
3320 /* othervise, try tickling it again */
3323 (ctdb_sock_addr *)&con->dst_addr,
3324 (ctdb_sock_addr *)&con->src_addr,
3331 called every second until all sentenced connections have been reset
3333 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
3334 struct timeval t, void *private_data)
3336 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
3337 void *delete_cons = talloc_new(NULL);
3339 /* loop over all connections sending tickle ACKs */
3340 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
3342 /* now we've finished traverse, it's safe to do deletion. */
3343 talloc_free(delete_cons);
3345 /* If there are no more connections to kill we can remove the
3346 entire killtcp structure
3348 if ( (killtcp->connections == NULL) ||
3349 (killtcp->connections->root == NULL) ) {
3350 talloc_free(killtcp);
3354 /* try tickling them again in a seconds time
3356 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3357 ctdb_tickle_sentenced_connections, killtcp);
3361 destroy the killtcp structure
3363 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
3365 struct ctdb_vnn *tmpvnn;
3367 /* verify that this vnn is still active */
3368 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
3369 if (tmpvnn == killtcp->vnn) {
3374 if (tmpvnn == NULL) {
3378 if (killtcp->vnn->killtcp != killtcp) {
3382 killtcp->vnn->killtcp = NULL;
3388 /* nothing fancy here, just unconditionally replace any existing
3389 connection structure with the new one.
3391 dont even free the old one if it did exist, that one is talloc_stolen
3392 by the same node in the tree anyway and will be deleted when the new data
3395 static void *add_killtcp_callback(void *parm, void *data)
3401 add a tcp socket to the list of connections we want to RST
3403 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
3407 ctdb_sock_addr src, dst;
3408 struct ctdb_kill_tcp *killtcp;
3409 struct ctdb_killtcp_con *con;
3410 struct ctdb_vnn *vnn;
3412 ctdb_canonicalize_ip(s, &src);
3413 ctdb_canonicalize_ip(d, &dst);
3415 vnn = find_public_ip_vnn(ctdb, &dst);
3417 vnn = find_public_ip_vnn(ctdb, &src);
3420 /* if it is not a public ip it could be our 'single ip' */
3421 if (ctdb->single_ip_vnn) {
3422 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
3423 vnn = ctdb->single_ip_vnn;
3428 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
3432 killtcp = vnn->killtcp;
3434 /* If this is the first connection to kill we must allocate
3437 if (killtcp == NULL) {
3438 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
3439 CTDB_NO_MEMORY(ctdb, killtcp);
3442 killtcp->ctdb = ctdb;
3443 killtcp->capture_fd = -1;
3444 killtcp->connections = trbt_create(killtcp, 0);
3446 vnn->killtcp = killtcp;
3447 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
3452 /* create a structure that describes this connection we want to
3453 RST and store it in killtcp->connections
3455 con = talloc(killtcp, struct ctdb_killtcp_con);
3456 CTDB_NO_MEMORY(ctdb, con);
3457 con->src_addr = src;
3458 con->dst_addr = dst;
3460 con->killtcp = killtcp;
3463 trbt_insertarray32_callback(killtcp->connections,
3464 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
3465 add_killtcp_callback, con);
3468 If we dont have a socket to listen on yet we must create it
3470 if (killtcp->capture_fd == -1) {
3471 const char *iface = ctdb_vnn_iface_string(vnn);
3472 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
3473 if (killtcp->capture_fd == -1) {
3474 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
3475 "socket on iface '%s' for killtcp (%s)\n",
3476 iface, strerror(errno)));
3482 if (killtcp->fde == NULL) {
3483 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
3485 capture_tcp_handler, killtcp);
3486 tevent_fd_set_auto_close(killtcp->fde);
3488 /* We also need to set up some events to tickle all these connections
3489 until they are all reset
3491 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3492 ctdb_tickle_sentenced_connections, killtcp);
3495 /* tickle him once now */
3504 talloc_free(vnn->killtcp);
3505 vnn->killtcp = NULL;
3510 kill a TCP connection.
3512 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
3514 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
3516 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
3520 called by a daemon to inform us of the entire list of TCP tickles for
3521 a particular public address.
3522 this control should only be sent by the node that is currently serving
3523 that public address.
3525 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
3527 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
3528 struct ctdb_tcp_array *tcparray;
3529 struct ctdb_vnn *vnn;
3531 /* We must at least have tickles.num or else we cant verify the size
3532 of the received data blob
3534 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3535 tickles.connections)) {
3536 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
3540 /* verify that the size of data matches what we expect */
3541 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3542 tickles.connections)
3543 + sizeof(struct ctdb_tcp_connection)
3544 * list->tickles.num) {
3545 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3549 vnn = find_public_ip_vnn(ctdb, &list->addr);
3551 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3552 ctdb_addr_to_str(&list->addr)));
3557 /* remove any old ticklelist we might have */
3558 talloc_free(vnn->tcp_array);
3559 vnn->tcp_array = NULL;
3561 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3562 CTDB_NO_MEMORY(ctdb, tcparray);
3564 tcparray->num = list->tickles.num;
3566 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3567 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3569 memcpy(tcparray->connections, &list->tickles.connections[0],
3570 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3572 /* We now have a new fresh tickle list array for this vnn */
3573 vnn->tcp_array = talloc_steal(vnn, tcparray);
3579 called to return the full list of tickles for the puclic address associated
3580 with the provided vnn
3582 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3584 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3585 struct ctdb_control_tcp_tickle_list *list;
3586 struct ctdb_tcp_array *tcparray;
3588 struct ctdb_vnn *vnn;
3590 vnn = find_public_ip_vnn(ctdb, addr);
3592 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3593 ctdb_addr_to_str(addr)));
3598 tcparray = vnn->tcp_array;
3600 num = tcparray->num;
3605 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3606 tickles.connections)
3607 + sizeof(struct ctdb_tcp_connection) * num;
3609 outdata->dptr = talloc_size(outdata, outdata->dsize);
3610 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3611 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3614 list->tickles.num = num;
3616 memcpy(&list->tickles.connections[0], tcparray->connections,
3617 sizeof(struct ctdb_tcp_connection) * num);
3625 set the list of all tcp tickles for a public address
3627 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3628 struct timeval timeout, uint32_t destnode,
3629 ctdb_sock_addr *addr,
3630 struct ctdb_tcp_array *tcparray)
3634 struct ctdb_control_tcp_tickle_list *list;
3637 num = tcparray->num;
3642 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3643 tickles.connections) +
3644 sizeof(struct ctdb_tcp_connection) * num;
3645 data.dptr = talloc_size(ctdb, data.dsize);
3646 CTDB_NO_MEMORY(ctdb, data.dptr);
3648 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3650 list->tickles.num = num;
3652 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3655 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3656 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3657 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3659 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3663 talloc_free(data.dptr);
3670 perform tickle updates if required
3672 static void ctdb_update_tcp_tickles(struct event_context *ev,
3673 struct timed_event *te,
3674 struct timeval t, void *private_data)
3676 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3678 struct ctdb_vnn *vnn;
3680 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3681 /* we only send out updates for public addresses that
3684 if (ctdb->pnn != vnn->pnn) {
3687 /* We only send out the updates if we need to */
3688 if (!vnn->tcp_update_needed) {
3691 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3693 CTDB_BROADCAST_CONNECTED,
3694 &vnn->public_address,
3697 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3698 ctdb_addr_to_str(&vnn->public_address)));
3702 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3703 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3704 ctdb_update_tcp_tickles, ctdb);
3709 start periodic update of tcp tickles
3711 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3713 ctdb->tickle_update_context = talloc_new(ctdb);
3715 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3716 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3717 ctdb_update_tcp_tickles, ctdb);
3723 struct control_gratious_arp {
3724 struct ctdb_context *ctdb;
3725 ctdb_sock_addr addr;
3731 send a control_gratuitous arp
3733 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3734 struct timeval t, void *private_data)
3737 struct control_gratious_arp *arp = talloc_get_type(private_data,
3738 struct control_gratious_arp);
3740 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3742 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3743 arp->iface, strerror(errno)));
3748 if (arp->count == CTDB_ARP_REPEAT) {
3753 event_add_timed(arp->ctdb->ev, arp,
3754 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3755 send_gratious_arp, arp);
3762 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3764 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3765 struct control_gratious_arp *arp;
3767 /* verify the size of indata */
3768 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3769 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3770 (unsigned)indata.dsize,
3771 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3775 ( offsetof(struct ctdb_control_gratious_arp, iface)
3776 + gratious_arp->len ) ){
3778 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3779 "but should be %u bytes\n",
3780 (unsigned)indata.dsize,
3781 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3786 arp = talloc(ctdb, struct control_gratious_arp);
3787 CTDB_NO_MEMORY(ctdb, arp);
3790 arp->addr = gratious_arp->addr;
3791 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3792 CTDB_NO_MEMORY(ctdb, arp->iface);
3795 event_add_timed(arp->ctdb->ev, arp,
3796 timeval_zero(), send_gratious_arp, arp);
3801 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3803 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3806 /* verify the size of indata */
3807 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3808 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3812 ( offsetof(struct ctdb_control_ip_iface, iface)
3815 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3816 "but should be %u bytes\n",
3817 (unsigned)indata.dsize,
3818 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3822 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3825 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3833 called when releaseip event finishes for del_public_address
3835 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3838 talloc_free(private_data);
3841 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3843 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3844 struct ctdb_vnn *vnn;
3847 /* verify the size of indata */
3848 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3849 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3853 ( offsetof(struct ctdb_control_ip_iface, iface)
3856 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3857 "but should be %u bytes\n",
3858 (unsigned)indata.dsize,
3859 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3863 /* walk over all public addresses until we find a match */
3864 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3865 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3866 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
3868 DLIST_REMOVE(ctdb->vnn, vnn);
3869 talloc_steal(mem_ctx, vnn);
3870 ctdb_remove_orphaned_ifaces(ctdb, vnn, mem_ctx);
3871 if (vnn->pnn != ctdb->pnn) {
3872 if (vnn->iface != NULL) {
3873 ctdb_vnn_unassign_iface(ctdb, vnn);
3875 talloc_free(mem_ctx);
3880 ret = ctdb_event_script_callback(ctdb,
3881 mem_ctx, delete_ip_callback, mem_ctx,
3883 CTDB_EVENT_RELEASE_IP,
3885 ctdb_vnn_iface_string(vnn),
3886 ctdb_addr_to_str(&vnn->public_address),
3887 vnn->public_netmask_bits);
3888 if (vnn->iface != NULL) {
3889 ctdb_vnn_unassign_iface(ctdb, vnn);
3902 struct ipreallocated_callback_state {
3903 struct ctdb_req_control *c;
3906 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3907 int status, void *p)
3909 struct ipreallocated_callback_state *state =
3910 talloc_get_type(p, struct ipreallocated_callback_state);
3914 (" \"ipreallocated\" event script failed (status %d)\n",
3916 if (status == -ETIME) {
3917 ctdb_ban_self(ctdb);
3921 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3925 /* A control to run the ipreallocated event */
3926 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3927 struct ctdb_req_control *c,
3931 struct ipreallocated_callback_state *state;
3933 state = talloc(ctdb, struct ipreallocated_callback_state);
3934 CTDB_NO_MEMORY(ctdb, state);
3936 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3938 ret = ctdb_event_script_callback(ctdb, state,
3939 ctdb_ipreallocated_callback, state,
3940 false, CTDB_EVENT_IPREALLOCATED,
3944 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3949 /* tell the control that we will be reply asynchronously */
3950 state->c = talloc_steal(state, c);
3951 *async_reply = true;
3957 /* This function is called from the recovery daemon to verify that a remote
3958 node has the expected ip allocation.
3959 This is verified against ctdb->ip_tree
3961 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
3963 struct ctdb_public_ip_list *tmp_ip;
3966 if (ctdb->ip_tree == NULL) {
3967 /* dont know the expected allocation yet, assume remote node
3976 for (i=0; i<ips->num; i++) {
3977 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3978 if (tmp_ip == NULL) {
3979 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
3983 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3987 if (tmp_ip->pnn != ips->ips[i].pnn) {
3988 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
3996 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3998 struct ctdb_public_ip_list *tmp_ip;
4000 if (ctdb->ip_tree == NULL) {
4001 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
4005 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
4006 if (tmp_ip == NULL) {
4007 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
4011 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
4012 tmp_ip->pnn = ip->pnn;
4018 struct ctdb_reloadips_handle {
4019 struct ctdb_context *ctdb;
4020 struct ctdb_req_control *c;
4024 struct fd_event *fde;
4027 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
4029 if (h == h->ctdb->reload_ips) {
4030 h->ctdb->reload_ips = NULL;
4033 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
4036 ctdb_kill(h->ctdb, h->child, SIGKILL);
4040 static void ctdb_reloadips_timeout_event(struct event_context *ev,
4041 struct timed_event *te,
4042 struct timeval t, void *private_data)
4044 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
4049 static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde,
4050 uint16_t flags, void *private_data)
4052 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
4057 ret = read(h->fd[0], &res, 1);
4058 if (ret < 1 || res != 0) {
4059 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
4067 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
4069 TALLOC_CTX *mem_ctx = talloc_new(NULL);
4070 struct ctdb_all_public_ips *ips;
4071 struct ctdb_vnn *vnn;
4074 /* read the ip allocation from the local node */
4075 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
4077 DEBUG(DEBUG_ERR, ("Unable to get public ips from local node\n"));
4078 talloc_free(mem_ctx);
4082 /* re-read the public ips file */
4084 if (ctdb_set_public_addresses(ctdb, false) != 0) {
4085 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
4086 talloc_free(mem_ctx);
4091 /* check the previous list of ips and scan for ips that have been
4094 for (i = 0; i < ips->num; i++) {
4095 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
4096 if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
4101 /* we need to delete this ip, no longer available on this node */
4103 struct ctdb_control_ip_iface pub;
4105 DEBUG(DEBUG_NOTICE,("RELOADIPS: IP%s is no longer available on this node. Deleting it.\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4106 pub.addr = ips->ips[i].addr;
4110 ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
4112 DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
4119 /* loop over all new ones and check the ones we need to add */
4120 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
4121 for (i = 0; i < ips->num; i++) {
4122 if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
4126 if (i == ips->num) {
4127 struct ctdb_control_ip_iface pub;
4128 const char *ifaces = NULL;
4131 DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
4133 pub.addr = vnn->public_address;
4134 pub.mask = vnn->public_netmask_bits;
4137 ifaces = vnn->ifaces[0];
4139 while (vnn->ifaces[iface] != NULL) {
4140 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
4143 pub.len = strlen(ifaces)+1;
4144 memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
4146 ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
4148 DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
4157 /* This control is sent to force the node to re-read the public addresses file
4158 and drop any addresses we should nnot longer host, and add new addresses
4159 that we are now able to host
4161 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
4163 struct ctdb_reloadips_handle *h;
4164 pid_t parent = getpid();
4166 if (ctdb->reload_ips != NULL) {
4167 talloc_free(ctdb->reload_ips);
4168 ctdb->reload_ips = NULL;
4171 h = talloc(ctdb, struct ctdb_reloadips_handle);
4172 CTDB_NO_MEMORY(ctdb, h);
4177 if (pipe(h->fd) == -1) {
4178 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
4183 h->child = ctdb_fork(ctdb);
4184 if (h->child == (pid_t)-1) {
4185 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
4193 if (h->child == 0) {
4194 signed char res = 0;
4197 debug_extra = talloc_asprintf(NULL, "reloadips:");
4199 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
4200 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
4203 res = ctdb_reloadips_child(ctdb);
4205 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
4209 write(h->fd[1], &res, 1);
4210 /* make sure we die when our parent dies */
4211 while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
4217 h->c = talloc_steal(h, c);
4220 set_close_on_exec(h->fd[0]);
4222 talloc_set_destructor(h, ctdb_reloadips_destructor);
4225 h->fde = event_add_fd(ctdb->ev, h, h->fd[0],
4226 EVENT_FD_READ, ctdb_reloadips_child_handler,
4228 tevent_fd_set_auto_close(h->fde);
4230 event_add_timed(ctdb->ev, h,
4231 timeval_current_ofs(120, 0),
4232 ctdb_reloadips_timeout_event, h);
4234 /* we reply later */
4235 *async_reply = true;