4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/common.h"
43 #include "common/logging.h"
45 #include "server/ipalloc.h"
47 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
49 #define CTDB_ARP_INTERVAL 1
50 #define CTDB_ARP_REPEAT 3
52 struct ctdb_interface {
53 struct ctdb_interface *prev, *next;
59 struct vnn_interface {
60 struct vnn_interface *prev, *next;
61 struct ctdb_interface *iface;
64 /* state associated with a public ip address */
66 struct ctdb_vnn *prev, *next;
68 struct ctdb_interface *iface;
69 struct vnn_interface *ifaces;
70 ctdb_sock_addr public_address;
71 uint8_t public_netmask_bits;
73 /* the node number that is serving this public address, if any.
74 If no node serves this ip it is set to -1 */
77 /* List of clients to tickle for this public address */
78 struct ctdb_tcp_array *tcp_array;
80 /* whether we need to update the other nodes with changes to our list
81 of connected clients */
82 bool tcp_update_needed;
84 /* a context to hang sending gratious arp events off */
85 TALLOC_CTX *takeover_ctx;
87 /* Set to true any time an update to this VNN is in flight.
88 This helps to avoid races. */
89 bool update_in_flight;
91 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
92 * address then this flag is set. It will be deleted in the
93 * release IP callback. */
97 static const char *iface_string(const struct ctdb_interface *iface)
99 return (iface != NULL ? iface->name : "__none__");
102 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
104 return iface_string(vnn->iface);
107 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
110 static struct ctdb_interface *
111 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
113 struct ctdb_interface *i;
115 if (strlen(iface) > CTDB_IFACE_SIZE) {
116 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
120 /* Verify that we don't have an entry for this ip yet */
121 i = ctdb_find_iface(ctdb, iface);
126 /* create a new structure for this interface */
127 i = talloc_zero(ctdb, struct ctdb_interface);
129 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
132 i->name = talloc_strdup(i, iface);
133 if (i->name == NULL) {
134 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
141 DLIST_ADD(ctdb->ifaces, i);
146 static bool vnn_has_interface(struct ctdb_vnn *vnn,
147 const struct ctdb_interface *iface)
149 struct vnn_interface *i;
151 for (i = vnn->ifaces; i != NULL; i = i->next) {
152 if (iface == i->iface) {
160 /* If any interfaces now have no possible IPs then delete them. This
161 * implementation is naive (i.e. simple) rather than clever
162 * (i.e. complex). Given that this is run on delip and that operation
163 * is rare, this doesn't need to be efficient - it needs to be
164 * foolproof. One alternative is reference counting, where the logic
165 * is distributed and can, therefore, be broken in multiple places.
166 * Another alternative is to build a red-black tree of interfaces that
167 * can have addresses (by walking ctdb->vnn once) and then walking
168 * ctdb->ifaces once and deleting those not in the tree. Let's go to
169 * one of those if the naive implementation causes problems... :-)
171 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
172 struct ctdb_vnn *vnn)
174 struct ctdb_interface *i, *next;
176 /* For each interface, check if there's an IP using it. */
177 for (i = ctdb->ifaces; i != NULL; i = next) {
182 /* Only consider interfaces named in the given VNN. */
183 if (!vnn_has_interface(vnn, i)) {
187 /* Search for a vnn with this interface. */
189 for (tv=ctdb->vnn; tv; tv=tv->next) {
190 if (vnn_has_interface(tv, i)) {
197 /* None of the VNNs are using this interface. */
198 DLIST_REMOVE(ctdb->ifaces, i);
205 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
208 struct ctdb_interface *i;
210 for (i=ctdb->ifaces;i;i=i->next) {
211 if (strcmp(i->name, iface) == 0) {
219 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
220 struct ctdb_vnn *vnn)
222 struct vnn_interface *i;
223 struct ctdb_interface *cur = NULL;
224 struct ctdb_interface *best = NULL;
226 for (i = vnn->ifaces; i != NULL; i = i->next) {
239 if (cur->references < best->references) {
248 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
249 struct ctdb_vnn *vnn)
251 struct ctdb_interface *best = NULL;
254 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
255 "still assigned to iface '%s'\n",
256 ctdb_addr_to_str(&vnn->public_address),
257 ctdb_vnn_iface_string(vnn)));
261 best = ctdb_vnn_best_iface(ctdb, vnn);
263 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
264 "cannot assign to iface any iface\n",
265 ctdb_addr_to_str(&vnn->public_address)));
271 vnn->pnn = ctdb->pnn;
273 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
274 "now assigned to iface '%s' refs[%d]\n",
275 ctdb_addr_to_str(&vnn->public_address),
276 ctdb_vnn_iface_string(vnn),
281 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
282 struct ctdb_vnn *vnn)
284 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
285 "now unassigned (old iface '%s' refs[%d])\n",
286 ctdb_addr_to_str(&vnn->public_address),
287 ctdb_vnn_iface_string(vnn),
288 vnn->iface?vnn->iface->references:0));
290 vnn->iface->references--;
293 if (vnn->pnn == ctdb->pnn) {
298 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
299 struct ctdb_vnn *vnn)
301 struct vnn_interface *i;
303 /* Nodes that are not RUNNING can not host IPs */
304 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
308 if (vnn->delete_pending) {
312 if (vnn->iface && vnn->iface->link_up) {
316 for (i = vnn->ifaces; i != NULL; i = i->next) {
317 if (i->iface->link_up) {
325 struct ctdb_takeover_arp {
326 struct ctdb_context *ctdb;
329 struct ctdb_tcp_array *tcparray;
330 struct ctdb_vnn *vnn;
335 lists of tcp endpoints
337 struct ctdb_tcp_list {
338 struct ctdb_tcp_list *prev, *next;
339 struct ctdb_connection connection;
343 list of clients to kill on IP release
345 struct ctdb_client_ip {
346 struct ctdb_client_ip *prev, *next;
347 struct ctdb_context *ctdb;
354 send a gratuitous arp
356 static void ctdb_control_send_arp(struct tevent_context *ev,
357 struct tevent_timer *te,
358 struct timeval t, void *private_data)
360 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
361 struct ctdb_takeover_arp);
363 struct ctdb_tcp_array *tcparray;
364 const char *iface = ctdb_vnn_iface_string(arp->vnn);
366 ret = ctdb_sys_send_arp(&arp->addr, iface);
368 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
369 iface, strerror(errno)));
372 tcparray = arp->tcparray;
374 for (i=0;i<tcparray->num;i++) {
375 struct ctdb_connection *tcon;
377 tcon = &tcparray->connections[i];
378 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
379 (unsigned)ntohs(tcon->dst.ip.sin_port),
380 ctdb_addr_to_str(&tcon->src),
381 (unsigned)ntohs(tcon->src.ip.sin_port)));
382 ret = ctdb_sys_send_tcp(
387 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
388 ctdb_addr_to_str(&tcon->src)));
395 if (arp->count == CTDB_ARP_REPEAT) {
400 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
401 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
402 ctdb_control_send_arp, arp);
405 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
406 struct ctdb_vnn *vnn)
408 struct ctdb_takeover_arp *arp;
409 struct ctdb_tcp_array *tcparray;
411 if (!vnn->takeover_ctx) {
412 vnn->takeover_ctx = talloc_new(vnn);
413 if (!vnn->takeover_ctx) {
418 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
424 arp->addr = vnn->public_address;
427 tcparray = vnn->tcp_array;
429 /* add all of the known tcp connections for this IP to the
430 list of tcp connections to send tickle acks for */
431 arp->tcparray = talloc_steal(arp, tcparray);
433 vnn->tcp_array = NULL;
434 vnn->tcp_update_needed = true;
437 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
438 timeval_zero(), ctdb_control_send_arp, arp);
443 struct ctdb_do_takeip_state {
444 struct ctdb_req_control_old *c;
445 struct ctdb_vnn *vnn;
449 called when takeip event finishes
451 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
454 struct ctdb_do_takeip_state *state =
455 talloc_get_type(private_data, struct ctdb_do_takeip_state);
460 if (status == -ETIME) {
463 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
464 ctdb_addr_to_str(&state->vnn->public_address),
465 ctdb_vnn_iface_string(state->vnn)));
466 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
472 if (ctdb->do_checkpublicip) {
474 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
476 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
483 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
484 data.dsize = strlen((char *)data.dptr) + 1;
485 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
487 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
490 /* the control succeeded */
491 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
496 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
498 state->vnn->update_in_flight = false;
503 take over an ip address
505 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
506 struct ctdb_req_control_old *c,
507 struct ctdb_vnn *vnn)
510 struct ctdb_do_takeip_state *state;
512 if (vnn->update_in_flight) {
513 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
514 "update for this IP already in flight\n",
515 ctdb_addr_to_str(&vnn->public_address),
516 vnn->public_netmask_bits));
520 ret = ctdb_vnn_assign_iface(ctdb, vnn);
522 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
523 "assign a usable interface\n",
524 ctdb_addr_to_str(&vnn->public_address),
525 vnn->public_netmask_bits));
529 state = talloc(vnn, struct ctdb_do_takeip_state);
530 CTDB_NO_MEMORY(ctdb, state);
535 vnn->update_in_flight = true;
536 talloc_set_destructor(state, ctdb_takeip_destructor);
538 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
539 ctdb_addr_to_str(&vnn->public_address),
540 vnn->public_netmask_bits,
541 ctdb_vnn_iface_string(vnn)));
543 ret = ctdb_event_script_callback(ctdb,
545 ctdb_do_takeip_callback,
549 ctdb_vnn_iface_string(vnn),
550 ctdb_addr_to_str(&vnn->public_address),
551 vnn->public_netmask_bits);
554 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
555 ctdb_addr_to_str(&vnn->public_address),
556 ctdb_vnn_iface_string(vnn)));
561 state->c = talloc_steal(ctdb, c);
565 struct ctdb_do_updateip_state {
566 struct ctdb_req_control_old *c;
567 struct ctdb_interface *old;
568 struct ctdb_vnn *vnn;
572 called when updateip event finishes
574 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
577 struct ctdb_do_updateip_state *state =
578 talloc_get_type(private_data, struct ctdb_do_updateip_state);
582 if (status == -ETIME) {
586 ("Failed update of IP %s from interface %s to %s\n",
587 ctdb_addr_to_str(&state->vnn->public_address),
588 iface_string(state->old),
589 ctdb_vnn_iface_string(state->vnn)));
592 * All we can do is reset the old interface
593 * and let the next run fix it
595 ctdb_vnn_unassign_iface(ctdb, state->vnn);
596 state->vnn->iface = state->old;
597 state->vnn->iface->references++;
599 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
604 if (ctdb->do_checkpublicip) {
606 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
608 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
615 /* the control succeeded */
616 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
621 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
623 state->vnn->update_in_flight = false;
628 update (move) an ip address
630 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
631 struct ctdb_req_control_old *c,
632 struct ctdb_vnn *vnn)
635 struct ctdb_do_updateip_state *state;
636 struct ctdb_interface *old = vnn->iface;
637 const char *old_name = iface_string(old);
638 const char *new_name;
640 if (vnn->update_in_flight) {
641 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
642 "update for this IP already in flight\n",
643 ctdb_addr_to_str(&vnn->public_address),
644 vnn->public_netmask_bits));
648 ctdb_vnn_unassign_iface(ctdb, vnn);
649 ret = ctdb_vnn_assign_iface(ctdb, vnn);
651 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
652 "assign a usable interface (old iface '%s')\n",
653 ctdb_addr_to_str(&vnn->public_address),
654 vnn->public_netmask_bits,
659 if (old == vnn->iface) {
660 /* A benign update from one interface onto itself.
661 * no need to run the eventscripts in this case, just return
664 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
668 state = talloc(vnn, struct ctdb_do_updateip_state);
669 CTDB_NO_MEMORY(ctdb, state);
675 vnn->update_in_flight = true;
676 talloc_set_destructor(state, ctdb_updateip_destructor);
678 new_name = ctdb_vnn_iface_string(vnn);
679 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
680 "interface %s to %s\n",
681 ctdb_addr_to_str(&vnn->public_address),
682 vnn->public_netmask_bits,
686 ret = ctdb_event_script_callback(ctdb,
688 ctdb_do_updateip_callback,
690 CTDB_EVENT_UPDATE_IP,
694 ctdb_addr_to_str(&vnn->public_address),
695 vnn->public_netmask_bits);
698 ("Failed update IP %s from interface %s to %s\n",
699 ctdb_addr_to_str(&vnn->public_address),
700 old_name, new_name));
705 state->c = talloc_steal(ctdb, c);
710 Find the vnn of the node that has a public ip address
711 returns -1 if the address is not known as a public address
713 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
715 struct ctdb_vnn *vnn;
717 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
718 if (ctdb_same_ip(&vnn->public_address, addr)) {
727 take over an ip address
729 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
730 struct ctdb_req_control_old *c,
735 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
736 struct ctdb_vnn *vnn;
737 bool have_ip = false;
738 bool do_updateip = false;
739 bool do_takeip = false;
740 struct ctdb_interface *best_iface = NULL;
742 if (pip->pnn != ctdb->pnn) {
743 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
744 "with pnn %d, but we're node %d\n",
745 ctdb_addr_to_str(&pip->addr),
746 pip->pnn, ctdb->pnn));
750 /* update out vnn list */
751 vnn = find_public_ip_vnn(ctdb, &pip->addr);
753 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
754 ctdb_addr_to_str(&pip->addr)));
758 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
759 have_ip = ctdb_sys_have_ip(&pip->addr);
761 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
762 if (best_iface == NULL) {
763 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
764 "a usable interface (old %s, have_ip %d)\n",
765 ctdb_addr_to_str(&vnn->public_address),
766 vnn->public_netmask_bits,
767 ctdb_vnn_iface_string(vnn),
772 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
773 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774 "and we have it on iface[%s], but it was assigned to node %d"
775 "and we are node %d, banning ourself\n",
776 ctdb_addr_to_str(&vnn->public_address),
777 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
782 if (vnn->pnn == -1 && have_ip) {
783 /* This will cause connections to be reset and
784 * reestablished. However, this is a very unusual
785 * situation and doing this will completely repair the
786 * inconsistency in the VNN.
790 " Doing updateip for IP %s already on an interface\n",
791 ctdb_addr_to_str(&vnn->public_address)));
796 if (vnn->iface != best_iface) {
797 if (!vnn->iface->link_up) {
799 } else if (vnn->iface->references > (best_iface->references + 1)) {
800 /* only move when the rebalance gains something */
808 ctdb_vnn_unassign_iface(ctdb, vnn);
815 ret = ctdb_do_takeip(ctdb, c, vnn);
819 } else if (do_updateip) {
820 ret = ctdb_do_updateip(ctdb, c, vnn);
826 * The interface is up and the kernel known the ip
829 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
830 ctdb_addr_to_str(&pip->addr),
831 vnn->public_netmask_bits,
832 ctdb_vnn_iface_string(vnn)));
836 /* tell ctdb_control.c that we will be replying asynchronously */
842 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
844 DLIST_REMOVE(ctdb->vnn, vnn);
845 ctdb_vnn_unassign_iface(ctdb, vnn);
846 ctdb_remove_orphaned_ifaces(ctdb, vnn);
850 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
851 struct ctdb_vnn *vnn,
852 ctdb_sock_addr *addr)
856 /* Send a message to all clients of this node telling them
857 * that the cluster has been reconfigured and they should
858 * close any connections on this IP address
860 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
861 data.dsize = strlen((char *)data.dptr)+1;
862 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
863 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
865 ctdb_vnn_unassign_iface(ctdb, vnn);
867 /* Process the IP if it has been marked for deletion */
868 if (vnn->delete_pending) {
869 do_delete_ip(ctdb, vnn);
876 struct release_ip_callback_state {
877 struct ctdb_req_control_old *c;
878 ctdb_sock_addr *addr;
879 struct ctdb_vnn *vnn;
884 called when releaseip event finishes
886 static void release_ip_callback(struct ctdb_context *ctdb, int status,
889 struct release_ip_callback_state *state =
890 talloc_get_type(private_data, struct release_ip_callback_state);
892 if (status == -ETIME) {
896 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
897 if (ctdb_sys_have_ip(state->addr)) {
899 ("IP %s still hosted during release IP callback, failing\n",
900 ctdb_addr_to_str(state->addr)));
901 ctdb_request_control_reply(ctdb, state->c,
908 state->vnn->pnn = state->target_pnn;
909 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
911 /* the control succeeded */
912 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
916 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
918 if (state->vnn != NULL) {
919 state->vnn->update_in_flight = false;
925 release an ip address
927 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
928 struct ctdb_req_control_old *c,
933 struct release_ip_callback_state *state;
934 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
935 struct ctdb_vnn *vnn;
938 /* update our vnn list */
939 vnn = find_public_ip_vnn(ctdb, &pip->addr);
941 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
942 ctdb_addr_to_str(&pip->addr)));
946 /* stop any previous arps */
947 talloc_free(vnn->takeover_ctx);
948 vnn->takeover_ctx = NULL;
950 /* RELEASE_IP controls are sent to all nodes that should not
951 * be hosting a particular IP. This serves 2 purposes. The
952 * first is to help resolve any inconsistencies. If a node
953 * does unexpectly host an IP then it will be released. The
954 * 2nd is to use a "redundant release" to tell non-takeover
955 * nodes where an IP is moving to. This is how "ctdb ip" can
956 * report the (likely) location of an IP by only asking the
957 * local node. Redundant releases need to update the PNN but
958 * are otherwise ignored.
960 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
961 if (!ctdb_sys_have_ip(&pip->addr)) {
962 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
963 ctdb_addr_to_str(&pip->addr),
964 vnn->public_netmask_bits,
965 ctdb_vnn_iface_string(vnn)));
967 ctdb_vnn_unassign_iface(ctdb, vnn);
971 if (vnn->iface == NULL) {
972 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
973 ctdb_addr_to_str(&pip->addr),
974 vnn->public_netmask_bits));
980 /* There is a potential race between take_ip and us because we
981 * update the VNN via a callback that run when the
982 * eventscripts have been run. Avoid the race by allowing one
983 * update to be in flight at a time.
985 if (vnn->update_in_flight) {
986 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
987 "update for this IP already in flight\n",
988 ctdb_addr_to_str(&vnn->public_address),
989 vnn->public_netmask_bits));
993 iface = ctdb_vnn_iface_string(vnn);
995 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
996 ctdb_addr_to_str(&pip->addr),
997 vnn->public_netmask_bits,
1001 state = talloc(ctdb, struct release_ip_callback_state);
1002 if (state == NULL) {
1003 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1004 __FILE__, __LINE__);
1009 state->addr = talloc(state, ctdb_sock_addr);
1010 if (state->addr == NULL) {
1011 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1012 __FILE__, __LINE__);
1016 *state->addr = pip->addr;
1017 state->target_pnn = pip->pnn;
1020 vnn->update_in_flight = true;
1021 talloc_set_destructor(state, ctdb_releaseip_destructor);
1023 ret = ctdb_event_script_callback(ctdb,
1024 state, release_ip_callback, state,
1025 CTDB_EVENT_RELEASE_IP,
1028 ctdb_addr_to_str(&pip->addr),
1029 vnn->public_netmask_bits);
1031 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1032 ctdb_addr_to_str(&pip->addr),
1033 ctdb_vnn_iface_string(vnn)));
1038 /* tell the control that we will be reply asynchronously */
1039 *async_reply = true;
1040 state->c = talloc_steal(state, c);
1044 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1045 ctdb_sock_addr *addr,
1046 unsigned mask, const char *ifaces,
1049 struct ctdb_vnn *vnn;
1053 /* Verify that we don't have an entry for this IP yet */
1054 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1055 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1057 ("Duplicate public IP address '%s'\n",
1058 ctdb_addr_to_str(addr)));
1063 /* Create a new VNN structure for this IP address */
1064 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1066 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1069 tmp = talloc_strdup(vnn, ifaces);
1071 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1075 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1076 struct vnn_interface *vnn_iface;
1077 struct ctdb_interface *i;
1078 if (!ctdb_sys_check_iface_exists(iface)) {
1080 ("Unknown interface %s for public address %s\n",
1081 iface, ctdb_addr_to_str(addr)));
1086 i = ctdb_add_local_iface(ctdb, iface);
1089 ("Failed to add interface '%s' "
1090 "for public address %s\n",
1091 iface, ctdb_addr_to_str(addr)));
1096 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1097 if (vnn_iface == NULL) {
1098 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1103 vnn_iface->iface = i;
1104 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1107 vnn->public_address = *addr;
1108 vnn->public_netmask_bits = mask;
1111 DLIST_ADD(ctdb->vnn, vnn);
1117 setup the public address lists from a file
1119 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1125 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1126 if (lines == NULL) {
1127 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1130 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1134 for (i=0;i<nlines;i++) {
1136 ctdb_sock_addr addr;
1137 const char *addrstr;
1142 while ((*line == ' ') || (*line == '\t')) {
1148 if (strcmp(line, "") == 0) {
1151 tok = strtok(line, " \t");
1153 tok = strtok(NULL, " \t");
1155 if (NULL == ctdb->default_public_interface) {
1156 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1161 ifaces = ctdb->default_public_interface;
1166 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1167 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1171 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1172 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1183 static struct ctdb_public_ip_list *
1184 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1185 TALLOC_CTX *mem_ctx,
1186 struct ctdb_node_map_old *nodemap,
1187 uint32_t public_ip_flags)
1190 struct ctdb_public_ip_list_old *ip_list;
1191 struct ctdb_public_ip_list *public_ips;
1193 public_ips = talloc_zero_array(mem_ctx,
1194 struct ctdb_public_ip_list,
1196 if (public_ips == NULL) {
1197 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1201 for (j = 0; j < nodemap->num; j++) {
1202 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1206 /* Retrieve the list of public IPs from the
1207 * node. Flags says whether it is known or
1209 ret = ctdb_ctrl_get_public_ips_flags(
1210 ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1211 public_ip_flags, &ip_list);
1214 ("Failed to read public IPs from node: %u\n", j));
1215 talloc_free(public_ips);
1218 public_ips[j].num = ip_list->num;
1219 if (ip_list->num == 0) {
1220 talloc_free(ip_list);
1223 public_ips[j].ip = talloc_zero_array(public_ips,
1224 struct ctdb_public_ip,
1226 if (public_ips[j].ip == NULL) {
1227 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1228 talloc_free(public_ips);
1231 memcpy(public_ips[j].ip, &ip_list->ips[0],
1232 sizeof(struct ctdb_public_ip) * ip_list->num);
1233 talloc_free(ip_list);
1239 static struct ctdb_node_map *
1240 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1241 const struct ctdb_node_map_old *old)
1243 struct ctdb_node_map *new;
1245 new = talloc(mem_ctx, struct ctdb_node_map);
1247 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1250 new->num = old->num;
1251 new->node = talloc_zero_array(new,
1252 struct ctdb_node_and_flags, new->num);
1253 memcpy(new->node, &old->nodes[0],
1254 sizeof(struct ctdb_node_and_flags) * new->num);
1260 static bool set_ipflags(struct ctdb_context *ctdb,
1261 struct ipalloc_state *ipalloc_state,
1262 struct ctdb_node_map_old *nodemap)
1264 struct ctdb_node_map *new;
1266 new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1271 ipalloc_set_node_flags(ipalloc_state, new);
1278 static enum ipalloc_algorithm
1279 determine_algorithm(const struct ctdb_tunable_list *tunables)
1281 if (1 == tunables->lcp2_public_ip_assignment) {
1282 return IPALLOC_LCP2;
1283 } else if (1 == tunables->deterministic_public_ips) {
1284 return IPALLOC_DETERMINISTIC;
1286 return IPALLOC_NONDETERMINISTIC;
1290 struct takeover_callback_data {
1292 unsigned int *fail_count;
1295 static struct takeover_callback_data *
1296 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1299 static struct takeover_callback_data *takeover_data;
1301 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1302 if (takeover_data == NULL) {
1303 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1307 takeover_data->fail_count = talloc_zero_array(takeover_data,
1308 unsigned int, num_nodes);
1309 if (takeover_data->fail_count == NULL) {
1310 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1311 talloc_free(takeover_data);
1315 takeover_data->num_nodes = num_nodes;
1317 return takeover_data;
1320 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1321 uint32_t node_pnn, int32_t res,
1322 TDB_DATA outdata, void *callback_data)
1324 struct takeover_callback_data *cd =
1325 talloc_get_type_abort(callback_data,
1326 struct takeover_callback_data);
1328 if (node_pnn >= cd->num_nodes) {
1329 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1333 if (cd->fail_count[node_pnn] == 0) {
1335 ("Node %u failed the takeover run\n", node_pnn));
1338 cd->fail_count[node_pnn]++;
1341 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1342 struct takeover_callback_data *tcd)
1344 unsigned int max_fails = 0;
1345 uint32_t max_pnn = -1;
1348 for (i = 0; i < tcd->num_nodes; i++) {
1349 if (tcd->fail_count[i] > max_fails) {
1351 max_fails = tcd->fail_count[i];
1355 if (max_fails > 0) {
1360 ("Sending banning credits to %u with fail count %u\n",
1361 max_pnn, max_fails));
1363 data.dptr = (uint8_t *)&max_pnn;
1364 data.dsize = sizeof(uint32_t);
1365 ret = ctdb_client_send_message(ctdb,
1366 CTDB_BROADCAST_CONNECTED,
1371 ("Failed to set banning credits for node %u\n",
1378 * Recalculate the allocation of public IPs to nodes and have the
1379 * nodes host their allocated addresses.
1381 * - Initialise IP allocation state. Pass:
1382 + algorithm to be used;
1383 + whether IP rebalancing ("failback") should be done (this uses a
1384 cluster-wide configuration variable and only the value form the
1385 master node is used); and
1386 * + list of nodes to force rebalance (internal structure, currently
1387 * no way to fetch, only used by LCP2 for nodes that have had new
1388 * IP addresses added).
1389 * - Set IP flags for IP allocation based on node map and tunables
1390 * NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1391 * (tunable fetching done separately so values can be faked in unit
1393 * - Retrieve known and available IP addresses (done separately so
1394 * values can be faked in unit testing)
1395 * - Use ipalloc_set_public_ips() to set known and available IP
1396 addresses for allocation
1397 * - If cluster can't host IP addresses then early exit
1398 * - Run IP allocation algorithm
1399 * - Send RELEASE_IP to all nodes for IPs they should not host
1400 * - Send TAKE_IP to all nodes for IPs they should host
1401 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1403 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1404 uint32_t *force_rebalance_nodes)
1407 struct ctdb_public_ip ip;
1409 struct public_ip_list *all_ips, *tmp_ip;
1411 struct timeval timeout;
1412 struct client_async_data *async_data;
1413 struct ctdb_client_control_state *state;
1414 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1415 struct ipalloc_state *ipalloc_state;
1416 struct ctdb_public_ip_list *known_ips, *available_ips;
1417 struct takeover_callback_data *takeover_data;
1419 /* Initialise fail callback data to be used with
1420 * takeover_run_fail_callback(). A failure in any of the
1421 * following steps will cause an early return, so this can be
1422 * reused for each of those steps without re-initialising. */
1423 takeover_data = takeover_callback_data_init(tmp_ctx,
1425 if (takeover_data == NULL) {
1426 talloc_free(tmp_ctx);
1430 /* Default timeout for early jump to IPREALLOCATED. See below
1431 * for explanation of 3 times... */
1432 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
1435 * ip failover is completely disabled, just send out the
1436 * ipreallocated event.
1438 if (ctdb->tunable.disable_ip_failover != 0) {
1442 ipalloc_state = ipalloc_state_init(
1443 tmp_ctx, ctdb->num_nodes,
1444 determine_algorithm(&ctdb->tunable),
1445 (ctdb->tunable.no_ip_takeover != 0),
1446 (ctdb->tunable.no_ip_failback != 0),
1447 (ctdb->tunable.no_ip_host_on_all_disabled != 0),
1448 force_rebalance_nodes);
1449 if (ipalloc_state == NULL) {
1450 talloc_free(tmp_ctx);
1454 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1456 ("Failed to set IP flags - aborting takeover run\n"));
1457 talloc_free(tmp_ctx);
1461 /* Fetch known/available public IPs from each active node */
1462 /* Fetch lists of known public IPs from all nodes */
1463 known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1465 if (known_ips == NULL) {
1466 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1467 talloc_free(tmp_ctx);
1470 available_ips = ctdb_fetch_remote_public_ips(
1471 ctdb, ipalloc_state, nodemap,
1472 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1473 if (available_ips == NULL) {
1474 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1475 talloc_free(tmp_ctx);
1479 ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips);
1481 if (! ipalloc_can_host_ips(ipalloc_state)) {
1482 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1486 /* Do the IP reassignment calculations */
1487 all_ips = ipalloc(ipalloc_state);
1488 if (all_ips == NULL) {
1489 talloc_free(tmp_ctx);
1493 /* Now tell all nodes to release any public IPs should not
1494 * host. This will be a NOOP on nodes that don't currently
1495 * hold the given IP.
1497 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1498 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1500 async_data->fail_callback = takeover_run_fail_callback;
1501 async_data->callback_data = takeover_data;
1503 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1505 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
1506 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1507 * seconds. However, RELEASE_IP can take longer due to TCP
1508 * connection killing, so sometimes needs more time.
1509 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1510 * seconds across all 3 stages. No explicit expiry checks are
1511 * needed before each stage because tevent is smart enough to
1512 * fire the timeouts even if they are in the past. Initialise
1513 * this here so it explicitly covers the stages we're
1514 * interested in but, in particular, not the time taken by the
1517 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
1519 /* Send a RELEASE_IP to all nodes that should not be hosting
1520 * each IP. For each IP, all but one of these will be
1521 * redundant. However, the redundant ones are used to tell
1522 * nodes which node should be hosting the IP so that commands
1523 * like "ctdb ip" can display a particular nodes idea of who
1524 * is hosting what. */
1525 for (i=0;i<nodemap->num;i++) {
1526 /* don't talk to unconnected nodes, but do talk to banned nodes */
1527 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1531 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1532 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1533 /* This node should be serving this
1534 vnn so don't tell it to release the ip
1538 ip.pnn = tmp_ip->pnn;
1539 ip.addr = tmp_ip->addr;
1541 data.dsize = sizeof(ip);
1542 data.dptr = (uint8_t *)&ip;
1543 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1544 0, CTDB_CONTROL_RELEASE_IP, 0,
1547 if (state == NULL) {
1548 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1549 talloc_free(tmp_ctx);
1553 ctdb_client_async_add(async_data, state);
1556 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1558 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1561 talloc_free(async_data);
1564 /* For each IP, send a TAKOVER_IP to the node that should be
1565 * hosting it. Many of these will often be redundant (since
1566 * the allocation won't have changed) but they can be useful
1567 * to recover from inconsistencies. */
1568 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1569 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1571 async_data->fail_callback = takeover_run_fail_callback;
1572 async_data->callback_data = takeover_data;
1574 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1575 if (tmp_ip->pnn == -1) {
1576 /* this IP won't be taken over */
1580 ip.pnn = tmp_ip->pnn;
1581 ip.addr = tmp_ip->addr;
1583 data.dsize = sizeof(ip);
1584 data.dptr = (uint8_t *)&ip;
1585 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1586 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1587 data, async_data, &timeout, NULL);
1588 if (state == NULL) {
1589 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1590 talloc_free(tmp_ctx);
1594 ctdb_client_async_add(async_data, state);
1596 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1598 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1604 * Tell all nodes to run eventscripts to process the
1605 * "ipreallocated" event. This can do a lot of things,
1606 * including restarting services to reconfigure them if public
1607 * IPs have moved. Once upon a time this event only used to
1610 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1611 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1614 NULL, takeover_run_fail_callback,
1618 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1622 talloc_free(tmp_ctx);
1626 takeover_run_process_failures(ctdb, takeover_data);
1627 talloc_free(tmp_ctx);
1633 destroy a ctdb_client_ip structure
1635 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1637 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1638 ctdb_addr_to_str(&ip->addr),
1639 ntohs(ip->addr.ip.sin_port),
1642 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1647 called by a client to inform us of a TCP connection that it is managing
1648 that should tickled with an ACK when IP takeover is done
1650 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1653 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1654 struct ctdb_connection *tcp_sock = NULL;
1655 struct ctdb_tcp_list *tcp;
1656 struct ctdb_connection t;
1659 struct ctdb_client_ip *ip;
1660 struct ctdb_vnn *vnn;
1661 ctdb_sock_addr addr;
1663 /* If we don't have public IPs, tickles are useless */
1664 if (ctdb->vnn == NULL) {
1668 tcp_sock = (struct ctdb_connection *)indata.dptr;
1670 addr = tcp_sock->src;
1671 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1672 addr = tcp_sock->dst;
1673 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1676 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1677 vnn = find_public_ip_vnn(ctdb, &addr);
1679 switch (addr.sa.sa_family) {
1681 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1682 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1683 ctdb_addr_to_str(&addr)));
1687 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1688 ctdb_addr_to_str(&addr)));
1691 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1697 if (vnn->pnn != ctdb->pnn) {
1698 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1699 ctdb_addr_to_str(&addr),
1700 client_id, client->pid));
1701 /* failing this call will tell smbd to die */
1705 ip = talloc(client, struct ctdb_client_ip);
1706 CTDB_NO_MEMORY(ctdb, ip);
1710 ip->client_id = client_id;
1711 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1712 DLIST_ADD(ctdb->client_ip_list, ip);
1714 tcp = talloc(client, struct ctdb_tcp_list);
1715 CTDB_NO_MEMORY(ctdb, tcp);
1717 tcp->connection.src = tcp_sock->src;
1718 tcp->connection.dst = tcp_sock->dst;
1720 DLIST_ADD(client->tcp_list, tcp);
1722 t.src = tcp_sock->src;
1723 t.dst = tcp_sock->dst;
1725 data.dptr = (uint8_t *)&t;
1726 data.dsize = sizeof(t);
1728 switch (addr.sa.sa_family) {
1730 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1731 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1732 ctdb_addr_to_str(&tcp_sock->src),
1733 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1736 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1737 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1738 ctdb_addr_to_str(&tcp_sock->src),
1739 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1742 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1746 /* tell all nodes about this tcp connection */
1747 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1748 CTDB_CONTROL_TCP_ADD,
1749 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1751 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1759 find a tcp address on a list
1761 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1762 struct ctdb_connection *tcp)
1766 if (array == NULL) {
1770 for (i=0;i<array->num;i++) {
1771 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1772 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1773 return &array->connections[i];
1782 called by a daemon to inform us of a TCP connection that one of its
1783 clients managing that should tickled with an ACK when IP takeover is
1786 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1788 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1789 struct ctdb_tcp_array *tcparray;
1790 struct ctdb_connection tcp;
1791 struct ctdb_vnn *vnn;
1793 /* If we don't have public IPs, tickles are useless */
1794 if (ctdb->vnn == NULL) {
1798 vnn = find_public_ip_vnn(ctdb, &p->dst);
1800 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1801 ctdb_addr_to_str(&p->dst)));
1807 tcparray = vnn->tcp_array;
1809 /* If this is the first tickle */
1810 if (tcparray == NULL) {
1811 tcparray = talloc(vnn, struct ctdb_tcp_array);
1812 CTDB_NO_MEMORY(ctdb, tcparray);
1813 vnn->tcp_array = tcparray;
1816 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1817 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1819 tcparray->connections[tcparray->num].src = p->src;
1820 tcparray->connections[tcparray->num].dst = p->dst;
1823 if (tcp_update_needed) {
1824 vnn->tcp_update_needed = true;
1830 /* Do we already have this tickle ?*/
1833 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1834 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1835 ctdb_addr_to_str(&tcp.dst),
1836 ntohs(tcp.dst.ip.sin_port),
1841 /* A new tickle, we must add it to the array */
1842 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1843 struct ctdb_connection,
1845 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1847 tcparray->connections[tcparray->num].src = p->src;
1848 tcparray->connections[tcparray->num].dst = p->dst;
1851 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1852 ctdb_addr_to_str(&tcp.dst),
1853 ntohs(tcp.dst.ip.sin_port),
1856 if (tcp_update_needed) {
1857 vnn->tcp_update_needed = true;
1864 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1866 struct ctdb_connection *tcpp;
1872 /* if the array is empty we cant remove it
1873 and we don't need to do anything
1875 if (vnn->tcp_array == NULL) {
1876 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1877 ctdb_addr_to_str(&conn->dst),
1878 ntohs(conn->dst.ip.sin_port)));
1883 /* See if we know this connection
1884 if we don't know this connection then we dont need to do anything
1886 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1888 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1889 ctdb_addr_to_str(&conn->dst),
1890 ntohs(conn->dst.ip.sin_port)));
1895 /* We need to remove this entry from the array.
1896 Instead of allocating a new array and copying data to it
1897 we cheat and just copy the last entry in the existing array
1898 to the entry that is to be removed and just shring the
1901 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1902 vnn->tcp_array->num--;
1904 /* If we deleted the last entry we also need to remove the entire array
1906 if (vnn->tcp_array->num == 0) {
1907 talloc_free(vnn->tcp_array);
1908 vnn->tcp_array = NULL;
1911 vnn->tcp_update_needed = true;
1913 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1914 ctdb_addr_to_str(&conn->src),
1915 ntohs(conn->src.ip.sin_port)));
1920 called by a daemon to inform us of a TCP connection that one of its
1921 clients used are no longer needed in the tickle database
1923 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1925 struct ctdb_vnn *vnn;
1926 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1928 /* If we don't have public IPs, tickles are useless */
1929 if (ctdb->vnn == NULL) {
1933 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1936 (__location__ " unable to find public address %s\n",
1937 ctdb_addr_to_str(&conn->dst)));
1941 ctdb_remove_connection(vnn, conn);
1948 Called when another daemon starts - causes all tickles for all
1949 public addresses we are serving to be sent to the new node on the
1950 next check. This actually causes the next scheduled call to
1951 tdb_update_tcp_tickles() to update all nodes. This is simple and
1952 doesn't require careful error handling.
1954 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1956 struct ctdb_vnn *vnn;
1958 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1959 (unsigned long) pnn));
1961 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1962 vnn->tcp_update_needed = true;
1970 called when a client structure goes away - hook to remove
1971 elements from the tcp_list in all daemons
1973 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1975 while (client->tcp_list) {
1976 struct ctdb_vnn *vnn;
1977 struct ctdb_tcp_list *tcp = client->tcp_list;
1978 struct ctdb_connection *conn = &tcp->connection;
1980 DLIST_REMOVE(client->tcp_list, tcp);
1982 vnn = find_public_ip_vnn(client->ctdb,
1986 (__location__ " unable to find public address %s\n",
1987 ctdb_addr_to_str(&conn->dst)));
1991 /* If the IP address is hosted on this node then
1992 * remove the connection. */
1993 if (vnn->pnn == client->ctdb->pnn) {
1994 ctdb_remove_connection(vnn, conn);
1997 /* Otherwise this function has been called because the
1998 * server IP address has been released to another node
1999 * and the client has exited. This means that we
2000 * should not delete the connection information. The
2001 * takeover node processes connections too. */
2006 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2008 struct ctdb_vnn *vnn, *next;
2011 if (ctdb->tunable.disable_ip_failover == 1) {
2015 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
2016 /* vnn can be freed below in release_ip_post() */
2019 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2020 ctdb_vnn_unassign_iface(ctdb, vnn);
2024 /* Don't allow multiple releases at once. Some code,
2025 * particularly ctdb_tickle_sentenced_connections() is
2027 if (vnn->update_in_flight) {
2028 DEBUG(DEBUG_WARNING,
2030 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2031 ctdb_addr_to_str(&vnn->public_address),
2032 vnn->public_netmask_bits,
2033 ctdb_vnn_iface_string(vnn)));
2036 vnn->update_in_flight = true;
2038 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2039 ctdb_addr_to_str(&vnn->public_address),
2040 vnn->public_netmask_bits,
2041 ctdb_vnn_iface_string(vnn)));
2043 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2044 ctdb_vnn_iface_string(vnn),
2045 ctdb_addr_to_str(&vnn->public_address),
2046 vnn->public_netmask_bits);
2047 /* releaseip timeouts are converted to success, so to
2048 * detect failures just check if the IP address is
2051 if (ctdb_sys_have_ip(&vnn->public_address)) {
2054 " IP address %s not released\n",
2055 ctdb_addr_to_str(&vnn->public_address)));
2056 vnn->update_in_flight = false;
2060 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
2062 vnn->update_in_flight = false;
2067 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2072 get list of public IPs
2074 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2075 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2078 struct ctdb_public_ip_list_old *ips;
2079 struct ctdb_vnn *vnn;
2080 bool only_available = false;
2082 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2083 only_available = true;
2086 /* count how many public ip structures we have */
2088 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2092 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2093 num*sizeof(struct ctdb_public_ip);
2094 ips = talloc_zero_size(outdata, len);
2095 CTDB_NO_MEMORY(ctdb, ips);
2098 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2099 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2102 ips->ips[i].pnn = vnn->pnn;
2103 ips->ips[i].addr = vnn->public_address;
2107 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2108 i*sizeof(struct ctdb_public_ip);
2110 outdata->dsize = len;
2111 outdata->dptr = (uint8_t *)ips;
2117 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2118 struct ctdb_req_control_old *c,
2123 ctdb_sock_addr *addr;
2124 struct ctdb_public_ip_info_old *info;
2125 struct ctdb_vnn *vnn;
2126 struct vnn_interface *iface;
2128 addr = (ctdb_sock_addr *)indata.dptr;
2130 vnn = find_public_ip_vnn(ctdb, addr);
2132 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2133 "'%s'not a public address\n",
2134 ctdb_addr_to_str(addr)));
2138 /* count how many public ip structures we have */
2140 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
2144 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2145 num*sizeof(struct ctdb_iface);
2146 info = talloc_zero_size(outdata, len);
2147 CTDB_NO_MEMORY(ctdb, info);
2149 info->ip.addr = vnn->public_address;
2150 info->ip.pnn = vnn->pnn;
2151 info->active_idx = 0xFFFFFFFF;
2154 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
2155 struct ctdb_interface *cur;
2158 if (vnn->iface == cur) {
2159 info->active_idx = i;
2161 strncpy(info->ifaces[i].name, cur->name,
2162 sizeof(info->ifaces[i].name));
2163 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2164 info->ifaces[i].link_state = cur->link_up;
2165 info->ifaces[i].references = cur->references;
2170 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2171 i*sizeof(struct ctdb_iface);
2173 outdata->dsize = len;
2174 outdata->dptr = (uint8_t *)info;
2179 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2180 struct ctdb_req_control_old *c,
2184 struct ctdb_iface_list_old *ifaces;
2185 struct ctdb_interface *cur;
2187 /* count how many public ip structures we have */
2189 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2193 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2194 num*sizeof(struct ctdb_iface);
2195 ifaces = talloc_zero_size(outdata, len);
2196 CTDB_NO_MEMORY(ctdb, ifaces);
2199 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2200 strncpy(ifaces->ifaces[i].name, cur->name,
2201 sizeof(ifaces->ifaces[i].name));
2202 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2203 ifaces->ifaces[i].link_state = cur->link_up;
2204 ifaces->ifaces[i].references = cur->references;
2208 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2209 i*sizeof(struct ctdb_iface);
2211 outdata->dsize = len;
2212 outdata->dptr = (uint8_t *)ifaces;
2217 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2218 struct ctdb_req_control_old *c,
2221 struct ctdb_iface *info;
2222 struct ctdb_interface *iface;
2223 bool link_up = false;
2225 info = (struct ctdb_iface *)indata.dptr;
2227 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2228 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2229 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2230 len, len, info->name));
2234 switch (info->link_state) {
2242 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2243 (unsigned int)info->link_state));
2247 if (info->references != 0) {
2248 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2249 (unsigned int)info->references));
2253 iface = ctdb_find_iface(ctdb, info->name);
2254 if (iface == NULL) {
2258 if (link_up == iface->link_up) {
2263 ("iface[%s] has changed it's link status %s => %s\n",
2265 iface->link_up?"up":"down",
2266 link_up?"up":"down"));
2268 iface->link_up = link_up;
2274 called by a daemon to inform us of the entire list of TCP tickles for
2275 a particular public address.
2276 this control should only be sent by the node that is currently serving
2277 that public address.
2279 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2281 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2282 struct ctdb_tcp_array *tcparray;
2283 struct ctdb_vnn *vnn;
2285 /* We must at least have tickles.num or else we cant verify the size
2286 of the received data blob
2288 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2289 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2293 /* verify that the size of data matches what we expect */
2294 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2295 + sizeof(struct ctdb_connection) * list->num) {
2296 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2300 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2301 ctdb_addr_to_str(&list->addr)));
2303 vnn = find_public_ip_vnn(ctdb, &list->addr);
2305 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2306 ctdb_addr_to_str(&list->addr)));
2311 if (vnn->pnn == ctdb->pnn) {
2313 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2314 ctdb_addr_to_str(&list->addr)));
2318 /* remove any old ticklelist we might have */
2319 talloc_free(vnn->tcp_array);
2320 vnn->tcp_array = NULL;
2322 tcparray = talloc(vnn, struct ctdb_tcp_array);
2323 CTDB_NO_MEMORY(ctdb, tcparray);
2325 tcparray->num = list->num;
2327 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2328 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2330 memcpy(tcparray->connections, &list->connections[0],
2331 sizeof(struct ctdb_connection)*tcparray->num);
2333 /* We now have a new fresh tickle list array for this vnn */
2334 vnn->tcp_array = tcparray;
2340 called to return the full list of tickles for the puclic address associated
2341 with the provided vnn
2343 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2345 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2346 struct ctdb_tickle_list_old *list;
2347 struct ctdb_tcp_array *tcparray;
2349 struct ctdb_vnn *vnn;
2352 vnn = find_public_ip_vnn(ctdb, addr);
2354 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2355 ctdb_addr_to_str(addr)));
2360 port = ctdb_addr_to_port(addr);
2362 tcparray = vnn->tcp_array;
2364 if (tcparray != NULL) {
2366 /* All connections */
2367 num = tcparray->num;
2369 /* Count connections for port */
2370 for (i = 0; i < tcparray->num; i++) {
2371 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2378 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2379 + sizeof(struct ctdb_connection) * num;
2381 outdata->dptr = talloc_size(outdata, outdata->dsize);
2382 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2383 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2393 for (i = 0; i < tcparray->num; i++) {
2395 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2396 list->connections[num] = tcparray->connections[i];
2406 set the list of all tcp tickles for a public address
2408 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2409 ctdb_sock_addr *addr,
2410 struct ctdb_tcp_array *tcparray)
2414 struct ctdb_tickle_list_old *list;
2417 num = tcparray->num;
2422 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2423 sizeof(struct ctdb_connection) * num;
2424 data.dptr = talloc_size(ctdb, data.dsize);
2425 CTDB_NO_MEMORY(ctdb, data.dptr);
2427 list = (struct ctdb_tickle_list_old *)data.dptr;
2431 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2434 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2435 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2436 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2438 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2442 talloc_free(data.dptr);
2449 perform tickle updates if required
2451 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2452 struct tevent_timer *te,
2453 struct timeval t, void *private_data)
2455 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2457 struct ctdb_vnn *vnn;
2459 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2460 /* we only send out updates for public addresses that
2463 if (ctdb->pnn != vnn->pnn) {
2466 /* We only send out the updates if we need to */
2467 if (!vnn->tcp_update_needed) {
2470 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2471 &vnn->public_address,
2474 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2475 ctdb_addr_to_str(&vnn->public_address)));
2478 ("Sent tickle update for public address %s\n",
2479 ctdb_addr_to_str(&vnn->public_address)));
2480 vnn->tcp_update_needed = false;
2484 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2485 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2486 ctdb_update_tcp_tickles, ctdb);
2490 start periodic update of tcp tickles
2492 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2494 ctdb->tickle_update_context = talloc_new(ctdb);
2496 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2497 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2498 ctdb_update_tcp_tickles, ctdb);
2504 struct control_gratious_arp {
2505 struct ctdb_context *ctdb;
2506 ctdb_sock_addr addr;
2512 send a control_gratuitous arp
2514 static void send_gratious_arp(struct tevent_context *ev,
2515 struct tevent_timer *te,
2516 struct timeval t, void *private_data)
2519 struct control_gratious_arp *arp = talloc_get_type(private_data,
2520 struct control_gratious_arp);
2522 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2524 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2525 arp->iface, strerror(errno)));
2530 if (arp->count == CTDB_ARP_REPEAT) {
2535 tevent_add_timer(arp->ctdb->ev, arp,
2536 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2537 send_gratious_arp, arp);
2544 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2546 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2547 struct control_gratious_arp *arp;
2549 /* verify the size of indata */
2550 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2551 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2552 (unsigned)indata.dsize,
2553 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2557 ( offsetof(struct ctdb_addr_info_old, iface)
2558 + gratious_arp->len ) ){
2560 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2561 "but should be %u bytes\n",
2562 (unsigned)indata.dsize,
2563 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2568 arp = talloc(ctdb, struct control_gratious_arp);
2569 CTDB_NO_MEMORY(ctdb, arp);
2572 arp->addr = gratious_arp->addr;
2573 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2574 CTDB_NO_MEMORY(ctdb, arp->iface);
2577 tevent_add_timer(arp->ctdb->ev, arp,
2578 timeval_zero(), send_gratious_arp, arp);
2583 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2585 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2588 /* verify the size of indata */
2589 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2590 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2594 ( offsetof(struct ctdb_addr_info_old, iface)
2597 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2598 "but should be %u bytes\n",
2599 (unsigned)indata.dsize,
2600 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2604 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2606 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2609 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2616 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2618 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2619 struct ctdb_vnn *vnn;
2621 /* verify the size of indata */
2622 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2623 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2627 ( offsetof(struct ctdb_addr_info_old, iface)
2630 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2631 "but should be %u bytes\n",
2632 (unsigned)indata.dsize,
2633 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2637 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2639 /* walk over all public addresses until we find a match */
2640 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2641 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2642 if (vnn->pnn == ctdb->pnn) {
2643 /* This IP is currently being hosted.
2644 * Defer the deletion until the next
2645 * takeover run. "ctdb reloadips" will
2646 * always cause a takeover run. "ctdb
2647 * delip" will now need an explicit
2648 * "ctdb ipreallocated" afterwards. */
2649 vnn->delete_pending = true;
2651 /* This IP is not hosted on the
2652 * current node so just delete it
2654 do_delete_ip(ctdb, vnn);
2661 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2662 ctdb_addr_to_str(&pub->addr)));
2667 struct ipreallocated_callback_state {
2668 struct ctdb_req_control_old *c;
2671 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2672 int status, void *p)
2674 struct ipreallocated_callback_state *state =
2675 talloc_get_type(p, struct ipreallocated_callback_state);
2679 (" \"ipreallocated\" event script failed (status %d)\n",
2681 if (status == -ETIME) {
2682 ctdb_ban_self(ctdb);
2686 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2690 /* A control to run the ipreallocated event */
2691 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2692 struct ctdb_req_control_old *c,
2696 struct ipreallocated_callback_state *state;
2698 state = talloc(ctdb, struct ipreallocated_callback_state);
2699 CTDB_NO_MEMORY(ctdb, state);
2701 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2703 ret = ctdb_event_script_callback(ctdb, state,
2704 ctdb_ipreallocated_callback, state,
2705 CTDB_EVENT_IPREALLOCATED,
2709 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2714 /* tell the control that we will be reply asynchronously */
2715 state->c = talloc_steal(state, c);
2716 *async_reply = true;
2722 struct ctdb_reloadips_handle {
2723 struct ctdb_context *ctdb;
2724 struct ctdb_req_control_old *c;
2728 struct tevent_fd *fde;
2731 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2733 if (h == h->ctdb->reload_ips) {
2734 h->ctdb->reload_ips = NULL;
2737 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2740 ctdb_kill(h->ctdb, h->child, SIGKILL);
2744 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2745 struct tevent_timer *te,
2746 struct timeval t, void *private_data)
2748 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2753 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2754 struct tevent_fd *fde,
2755 uint16_t flags, void *private_data)
2757 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2762 ret = sys_read(h->fd[0], &res, 1);
2763 if (ret < 1 || res != 0) {
2764 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2772 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2774 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2775 struct ctdb_public_ip_list_old *ips;
2776 struct ctdb_vnn *vnn;
2777 struct client_async_data *async_data;
2778 struct timeval timeout;
2780 struct ctdb_client_control_state *state;
2784 CTDB_NO_MEMORY(ctdb, mem_ctx);
2786 /* Read IPs from local node */
2787 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2788 CTDB_CURRENT_NODE, mem_ctx, &ips);
2791 ("Unable to fetch public IPs from local node\n"));
2792 talloc_free(mem_ctx);
2796 /* Read IPs file - this is safe since this is a child process */
2798 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2799 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2800 talloc_free(mem_ctx);
2804 async_data = talloc_zero(mem_ctx, struct client_async_data);
2805 CTDB_NO_MEMORY(ctdb, async_data);
2807 /* Compare IPs between node and file for IPs to be deleted */
2808 for (i = 0; i < ips->num; i++) {
2810 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2811 if (ctdb_same_ip(&vnn->public_address,
2812 &ips->ips[i].addr)) {
2813 /* IP is still in file */
2819 /* Delete IP ips->ips[i] */
2820 struct ctdb_addr_info_old *pub;
2823 ("IP %s no longer configured, deleting it\n",
2824 ctdb_addr_to_str(&ips->ips[i].addr)));
2826 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2827 CTDB_NO_MEMORY(ctdb, pub);
2829 pub->addr = ips->ips[i].addr;
2833 timeout = TAKEOVER_TIMEOUT();
2835 data.dsize = offsetof(struct ctdb_addr_info_old,
2837 data.dptr = (uint8_t *)pub;
2839 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2840 CTDB_CONTROL_DEL_PUBLIC_IP,
2841 0, data, async_data,
2843 if (state == NULL) {
2846 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2850 ctdb_client_async_add(async_data, state);
2854 /* Compare IPs between node and file for IPs to be added */
2856 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2857 for (i = 0; i < ips->num; i++) {
2858 if (ctdb_same_ip(&vnn->public_address,
2859 &ips->ips[i].addr)) {
2860 /* IP already on node */
2864 if (i == ips->num) {
2865 /* Add IP ips->ips[i] */
2866 struct ctdb_addr_info_old *pub;
2867 const char *ifaces = NULL;
2869 struct vnn_interface *iface = NULL;
2872 ("New IP %s configured, adding it\n",
2873 ctdb_addr_to_str(&vnn->public_address)));
2875 uint32_t pnn = ctdb_get_pnn(ctdb);
2877 data.dsize = sizeof(pnn);
2878 data.dptr = (uint8_t *)&pnn;
2880 ret = ctdb_client_send_message(
2882 CTDB_BROADCAST_CONNECTED,
2883 CTDB_SRVID_REBALANCE_NODE,
2886 DEBUG(DEBUG_WARNING,
2887 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2893 ifaces = vnn->ifaces->iface->name;
2894 iface = vnn->ifaces->next;
2895 while (iface != NULL) {
2896 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2897 iface->iface->name);
2898 iface = iface->next;
2901 len = strlen(ifaces) + 1;
2902 pub = talloc_zero_size(mem_ctx,
2903 offsetof(struct ctdb_addr_info_old, iface) + len);
2904 CTDB_NO_MEMORY(ctdb, pub);
2906 pub->addr = vnn->public_address;
2907 pub->mask = vnn->public_netmask_bits;
2909 memcpy(&pub->iface[0], ifaces, pub->len);
2911 timeout = TAKEOVER_TIMEOUT();
2913 data.dsize = offsetof(struct ctdb_addr_info_old,
2915 data.dptr = (uint8_t *)pub;
2917 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2918 CTDB_CONTROL_ADD_PUBLIC_IP,
2919 0, data, async_data,
2921 if (state == NULL) {
2924 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2928 ctdb_client_async_add(async_data, state);
2932 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2933 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2937 talloc_free(mem_ctx);
2941 talloc_free(mem_ctx);
2945 /* This control is sent to force the node to re-read the public addresses file
2946 and drop any addresses we should nnot longer host, and add new addresses
2947 that we are now able to host
2949 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2951 struct ctdb_reloadips_handle *h;
2952 pid_t parent = getpid();
2954 if (ctdb->reload_ips != NULL) {
2955 talloc_free(ctdb->reload_ips);
2956 ctdb->reload_ips = NULL;
2959 h = talloc(ctdb, struct ctdb_reloadips_handle);
2960 CTDB_NO_MEMORY(ctdb, h);
2965 if (pipe(h->fd) == -1) {
2966 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2971 h->child = ctdb_fork(ctdb);
2972 if (h->child == (pid_t)-1) {
2973 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2981 if (h->child == 0) {
2982 signed char res = 0;
2986 prctl_set_comment("ctdb_reloadips");
2987 if (switch_from_server_to_client(ctdb) != 0) {
2988 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2991 res = ctdb_reloadips_child(ctdb);
2993 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2997 sys_write(h->fd[1], &res, 1);
2998 ctdb_wait_for_process_to_exit(parent);
3002 h->c = talloc_steal(h, c);
3005 set_close_on_exec(h->fd[0]);
3007 talloc_set_destructor(h, ctdb_reloadips_destructor);
3010 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3011 ctdb_reloadips_child_handler, (void *)h);
3012 tevent_fd_set_auto_close(h->fde);
3014 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3015 ctdb_reloadips_timeout_event, h);
3017 /* we reply later */
3018 *async_reply = true;