4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/system_socket.h"
43 #include "common/common.h"
44 #include "common/logging.h"
46 #include "server/ipalloc.h"
48 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
50 #define CTDB_ARP_INTERVAL 1
51 #define CTDB_ARP_REPEAT 3
53 struct ctdb_interface {
54 struct ctdb_interface *prev, *next;
60 struct vnn_interface {
61 struct vnn_interface *prev, *next;
62 struct ctdb_interface *iface;
65 /* state associated with a public ip address */
67 struct ctdb_vnn *prev, *next;
69 struct ctdb_interface *iface;
70 struct vnn_interface *ifaces;
71 ctdb_sock_addr public_address;
72 uint8_t public_netmask_bits;
74 /* the node number that is serving this public address, if any.
75 If no node serves this ip it is set to -1 */
78 /* List of clients to tickle for this public address */
79 struct ctdb_tcp_array *tcp_array;
81 /* whether we need to update the other nodes with changes to our list
82 of connected clients */
83 bool tcp_update_needed;
85 /* a context to hang sending gratious arp events off */
86 TALLOC_CTX *takeover_ctx;
88 /* Set to true any time an update to this VNN is in flight.
89 This helps to avoid races. */
90 bool update_in_flight;
92 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
93 * address then this flag is set. It will be deleted in the
94 * release IP callback. */
98 static const char *iface_string(const struct ctdb_interface *iface)
100 return (iface != NULL ? iface->name : "__none__");
103 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
105 return iface_string(vnn->iface);
108 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
111 static struct ctdb_interface *
112 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
114 struct ctdb_interface *i;
116 if (strlen(iface) > CTDB_IFACE_SIZE) {
117 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
121 /* Verify that we don't have an entry for this ip yet */
122 i = ctdb_find_iface(ctdb, iface);
127 /* create a new structure for this interface */
128 i = talloc_zero(ctdb, struct ctdb_interface);
130 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
133 i->name = talloc_strdup(i, iface);
134 if (i->name == NULL) {
135 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
142 DLIST_ADD(ctdb->ifaces, i);
147 static bool vnn_has_interface(struct ctdb_vnn *vnn,
148 const struct ctdb_interface *iface)
150 struct vnn_interface *i;
152 for (i = vnn->ifaces; i != NULL; i = i->next) {
153 if (iface == i->iface) {
161 /* If any interfaces now have no possible IPs then delete them. This
162 * implementation is naive (i.e. simple) rather than clever
163 * (i.e. complex). Given that this is run on delip and that operation
164 * is rare, this doesn't need to be efficient - it needs to be
165 * foolproof. One alternative is reference counting, where the logic
166 * is distributed and can, therefore, be broken in multiple places.
167 * Another alternative is to build a red-black tree of interfaces that
168 * can have addresses (by walking ctdb->vnn once) and then walking
169 * ctdb->ifaces once and deleting those not in the tree. Let's go to
170 * one of those if the naive implementation causes problems... :-)
172 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
175 struct ctdb_interface *i, *next;
177 /* For each interface, check if there's an IP using it. */
178 for (i = ctdb->ifaces; i != NULL; i = next) {
183 /* Only consider interfaces named in the given VNN. */
184 if (!vnn_has_interface(vnn, i)) {
188 /* Search for a vnn with this interface. */
190 for (tv=ctdb->vnn; tv; tv=tv->next) {
191 if (vnn_has_interface(tv, i)) {
198 /* None of the VNNs are using this interface. */
199 DLIST_REMOVE(ctdb->ifaces, i);
206 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
209 struct ctdb_interface *i;
211 for (i=ctdb->ifaces;i;i=i->next) {
212 if (strcmp(i->name, iface) == 0) {
220 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
221 struct ctdb_vnn *vnn)
223 struct vnn_interface *i;
224 struct ctdb_interface *cur = NULL;
225 struct ctdb_interface *best = NULL;
227 for (i = vnn->ifaces; i != NULL; i = i->next) {
240 if (cur->references < best->references) {
249 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
250 struct ctdb_vnn *vnn)
252 struct ctdb_interface *best = NULL;
255 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
256 "still assigned to iface '%s'\n",
257 ctdb_addr_to_str(&vnn->public_address),
258 ctdb_vnn_iface_string(vnn)));
262 best = ctdb_vnn_best_iface(ctdb, vnn);
264 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
265 "cannot assign to iface any iface\n",
266 ctdb_addr_to_str(&vnn->public_address)));
272 vnn->pnn = ctdb->pnn;
274 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
275 "now assigned to iface '%s' refs[%d]\n",
276 ctdb_addr_to_str(&vnn->public_address),
277 ctdb_vnn_iface_string(vnn),
282 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
283 struct ctdb_vnn *vnn)
285 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
286 "now unassigned (old iface '%s' refs[%d])\n",
287 ctdb_addr_to_str(&vnn->public_address),
288 ctdb_vnn_iface_string(vnn),
289 vnn->iface?vnn->iface->references:0));
291 vnn->iface->references--;
294 if (vnn->pnn == ctdb->pnn) {
299 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
300 struct ctdb_vnn *vnn)
303 struct vnn_interface *i;
305 /* Nodes that are not RUNNING can not host IPs */
306 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
310 flags = ctdb->nodes[ctdb->pnn]->flags;
311 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
315 if (vnn->delete_pending) {
319 if (vnn->iface && vnn->iface->link_up) {
323 for (i = vnn->ifaces; i != NULL; i = i->next) {
324 if (i->iface->link_up) {
332 struct ctdb_takeover_arp {
333 struct ctdb_context *ctdb;
336 struct ctdb_tcp_array *tcparray;
337 struct ctdb_vnn *vnn;
342 lists of tcp endpoints
344 struct ctdb_tcp_list {
345 struct ctdb_tcp_list *prev, *next;
346 struct ctdb_connection connection;
350 list of clients to kill on IP release
352 struct ctdb_client_ip {
353 struct ctdb_client_ip *prev, *next;
354 struct ctdb_context *ctdb;
361 send a gratuitous arp
363 static void ctdb_control_send_arp(struct tevent_context *ev,
364 struct tevent_timer *te,
365 struct timeval t, void *private_data)
367 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
368 struct ctdb_takeover_arp);
370 struct ctdb_tcp_array *tcparray;
371 const char *iface = ctdb_vnn_iface_string(arp->vnn);
373 ret = ctdb_sys_send_arp(&arp->addr, iface);
375 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
376 iface, strerror(errno)));
379 tcparray = arp->tcparray;
381 for (i=0;i<tcparray->num;i++) {
382 struct ctdb_connection *tcon;
384 tcon = &tcparray->connections[i];
385 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
386 (unsigned)ntohs(tcon->dst.ip.sin_port),
387 ctdb_addr_to_str(&tcon->src),
388 (unsigned)ntohs(tcon->src.ip.sin_port)));
389 ret = ctdb_sys_send_tcp(
394 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
395 ctdb_addr_to_str(&tcon->src)));
402 if (arp->count == CTDB_ARP_REPEAT) {
407 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
408 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
409 ctdb_control_send_arp, arp);
412 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
413 struct ctdb_vnn *vnn)
415 struct ctdb_takeover_arp *arp;
416 struct ctdb_tcp_array *tcparray;
418 if (!vnn->takeover_ctx) {
419 vnn->takeover_ctx = talloc_new(vnn);
420 if (!vnn->takeover_ctx) {
425 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
431 arp->addr = vnn->public_address;
434 tcparray = vnn->tcp_array;
436 /* add all of the known tcp connections for this IP to the
437 list of tcp connections to send tickle acks for */
438 arp->tcparray = talloc_steal(arp, tcparray);
440 vnn->tcp_array = NULL;
441 vnn->tcp_update_needed = true;
444 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
445 timeval_zero(), ctdb_control_send_arp, arp);
450 struct ctdb_do_takeip_state {
451 struct ctdb_req_control_old *c;
452 struct ctdb_vnn *vnn;
456 called when takeip event finishes
458 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
461 struct ctdb_do_takeip_state *state =
462 talloc_get_type(private_data, struct ctdb_do_takeip_state);
467 if (status == -ETIME) {
470 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
471 ctdb_addr_to_str(&state->vnn->public_address),
472 ctdb_vnn_iface_string(state->vnn)));
473 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
479 if (ctdb->do_checkpublicip) {
481 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
483 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
490 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
491 data.dsize = strlen((char *)data.dptr) + 1;
492 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
494 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
497 /* the control succeeded */
498 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
503 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
505 state->vnn->update_in_flight = false;
510 take over an ip address
512 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
513 struct ctdb_req_control_old *c,
514 struct ctdb_vnn *vnn)
517 struct ctdb_do_takeip_state *state;
519 if (vnn->update_in_flight) {
520 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
521 "update for this IP already in flight\n",
522 ctdb_addr_to_str(&vnn->public_address),
523 vnn->public_netmask_bits));
527 ret = ctdb_vnn_assign_iface(ctdb, vnn);
529 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
530 "assign a usable interface\n",
531 ctdb_addr_to_str(&vnn->public_address),
532 vnn->public_netmask_bits));
536 state = talloc(vnn, struct ctdb_do_takeip_state);
537 CTDB_NO_MEMORY(ctdb, state);
542 vnn->update_in_flight = true;
543 talloc_set_destructor(state, ctdb_takeip_destructor);
545 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
546 ctdb_addr_to_str(&vnn->public_address),
547 vnn->public_netmask_bits,
548 ctdb_vnn_iface_string(vnn)));
550 ret = ctdb_event_script_callback(ctdb,
552 ctdb_do_takeip_callback,
556 ctdb_vnn_iface_string(vnn),
557 ctdb_addr_to_str(&vnn->public_address),
558 vnn->public_netmask_bits);
561 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
562 ctdb_addr_to_str(&vnn->public_address),
563 ctdb_vnn_iface_string(vnn)));
568 state->c = talloc_steal(ctdb, c);
572 struct ctdb_do_updateip_state {
573 struct ctdb_req_control_old *c;
574 struct ctdb_interface *old;
575 struct ctdb_vnn *vnn;
579 called when updateip event finishes
581 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
584 struct ctdb_do_updateip_state *state =
585 talloc_get_type(private_data, struct ctdb_do_updateip_state);
588 if (status == -ETIME) {
592 ("Failed update of IP %s from interface %s to %s\n",
593 ctdb_addr_to_str(&state->vnn->public_address),
594 iface_string(state->old),
595 ctdb_vnn_iface_string(state->vnn)));
598 * All we can do is reset the old interface
599 * and let the next run fix it
601 ctdb_vnn_unassign_iface(ctdb, state->vnn);
602 state->vnn->iface = state->old;
603 state->vnn->iface->references++;
605 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
610 /* the control succeeded */
611 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
616 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
618 state->vnn->update_in_flight = false;
623 update (move) an ip address
625 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
626 struct ctdb_req_control_old *c,
627 struct ctdb_vnn *vnn)
630 struct ctdb_do_updateip_state *state;
631 struct ctdb_interface *old = vnn->iface;
632 const char *old_name = iface_string(old);
633 const char *new_name;
635 if (vnn->update_in_flight) {
636 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
637 "update for this IP already in flight\n",
638 ctdb_addr_to_str(&vnn->public_address),
639 vnn->public_netmask_bits));
643 ctdb_vnn_unassign_iface(ctdb, vnn);
644 ret = ctdb_vnn_assign_iface(ctdb, vnn);
646 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
647 "assign a usable interface (old iface '%s')\n",
648 ctdb_addr_to_str(&vnn->public_address),
649 vnn->public_netmask_bits,
654 if (old == vnn->iface) {
655 /* A benign update from one interface onto itself.
656 * no need to run the eventscripts in this case, just return
659 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
663 state = talloc(vnn, struct ctdb_do_updateip_state);
664 CTDB_NO_MEMORY(ctdb, state);
670 vnn->update_in_flight = true;
671 talloc_set_destructor(state, ctdb_updateip_destructor);
673 new_name = ctdb_vnn_iface_string(vnn);
674 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
675 "interface %s to %s\n",
676 ctdb_addr_to_str(&vnn->public_address),
677 vnn->public_netmask_bits,
681 ret = ctdb_event_script_callback(ctdb,
683 ctdb_do_updateip_callback,
685 CTDB_EVENT_UPDATE_IP,
689 ctdb_addr_to_str(&vnn->public_address),
690 vnn->public_netmask_bits);
693 ("Failed update IP %s from interface %s to %s\n",
694 ctdb_addr_to_str(&vnn->public_address),
695 old_name, new_name));
700 state->c = talloc_steal(ctdb, c);
705 Find the vnn of the node that has a public ip address
706 returns -1 if the address is not known as a public address
708 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
710 struct ctdb_vnn *vnn;
712 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
713 if (ctdb_same_ip(&vnn->public_address, addr)) {
722 take over an ip address
724 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
725 struct ctdb_req_control_old *c,
730 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
731 struct ctdb_vnn *vnn;
732 bool have_ip = false;
733 bool do_updateip = false;
734 bool do_takeip = false;
735 struct ctdb_interface *best_iface = NULL;
737 if (pip->pnn != ctdb->pnn) {
738 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
739 "with pnn %d, but we're node %d\n",
740 ctdb_addr_to_str(&pip->addr),
741 pip->pnn, ctdb->pnn));
745 /* update out vnn list */
746 vnn = find_public_ip_vnn(ctdb, &pip->addr);
748 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
749 ctdb_addr_to_str(&pip->addr)));
753 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
754 have_ip = ctdb_sys_have_ip(&pip->addr);
756 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
757 if (best_iface == NULL) {
758 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
759 "a usable interface (old %s, have_ip %d)\n",
760 ctdb_addr_to_str(&vnn->public_address),
761 vnn->public_netmask_bits,
762 ctdb_vnn_iface_string(vnn),
767 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
768 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
769 "and we have it on iface[%s], but it was assigned to node %d"
770 "and we are node %d, banning ourself\n",
771 ctdb_addr_to_str(&vnn->public_address),
772 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
777 if (vnn->pnn == -1 && have_ip) {
778 /* This will cause connections to be reset and
779 * reestablished. However, this is a very unusual
780 * situation and doing this will completely repair the
781 * inconsistency in the VNN.
785 " Doing updateip for IP %s already on an interface\n",
786 ctdb_addr_to_str(&vnn->public_address)));
791 if (vnn->iface != best_iface) {
792 if (!vnn->iface->link_up) {
794 } else if (vnn->iface->references > (best_iface->references + 1)) {
795 /* only move when the rebalance gains something */
803 ctdb_vnn_unassign_iface(ctdb, vnn);
810 ret = ctdb_do_takeip(ctdb, c, vnn);
814 } else if (do_updateip) {
815 ret = ctdb_do_updateip(ctdb, c, vnn);
821 * The interface is up and the kernel known the ip
824 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
825 ctdb_addr_to_str(&pip->addr),
826 vnn->public_netmask_bits,
827 ctdb_vnn_iface_string(vnn)));
831 /* tell ctdb_control.c that we will be replying asynchronously */
837 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
839 DLIST_REMOVE(ctdb->vnn, vnn);
840 ctdb_vnn_unassign_iface(ctdb, vnn);
841 ctdb_remove_orphaned_ifaces(ctdb, vnn);
845 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
846 struct ctdb_vnn *vnn,
847 ctdb_sock_addr *addr)
851 /* Send a message to all clients of this node telling them
852 * that the cluster has been reconfigured and they should
853 * close any connections on this IP address
855 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
856 data.dsize = strlen((char *)data.dptr)+1;
857 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
858 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
860 ctdb_vnn_unassign_iface(ctdb, vnn);
862 /* Process the IP if it has been marked for deletion */
863 if (vnn->delete_pending) {
864 do_delete_ip(ctdb, vnn);
871 struct release_ip_callback_state {
872 struct ctdb_req_control_old *c;
873 ctdb_sock_addr *addr;
874 struct ctdb_vnn *vnn;
879 called when releaseip event finishes
881 static void release_ip_callback(struct ctdb_context *ctdb, int status,
884 struct release_ip_callback_state *state =
885 talloc_get_type(private_data, struct release_ip_callback_state);
887 if (status == -ETIME) {
891 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
892 if (ctdb_sys_have_ip(state->addr)) {
894 ("IP %s still hosted during release IP callback, failing\n",
895 ctdb_addr_to_str(state->addr)));
896 ctdb_request_control_reply(ctdb, state->c,
903 state->vnn->pnn = state->target_pnn;
904 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
906 /* the control succeeded */
907 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
911 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
913 if (state->vnn != NULL) {
914 state->vnn->update_in_flight = false;
920 release an ip address
922 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
923 struct ctdb_req_control_old *c,
928 struct release_ip_callback_state *state;
929 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
930 struct ctdb_vnn *vnn;
933 /* update our vnn list */
934 vnn = find_public_ip_vnn(ctdb, &pip->addr);
936 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
937 ctdb_addr_to_str(&pip->addr)));
941 /* stop any previous arps */
942 talloc_free(vnn->takeover_ctx);
943 vnn->takeover_ctx = NULL;
945 /* RELEASE_IP controls are sent to all nodes that should not
946 * be hosting a particular IP. This serves 2 purposes. The
947 * first is to help resolve any inconsistencies. If a node
948 * does unexpectly host an IP then it will be released. The
949 * 2nd is to use a "redundant release" to tell non-takeover
950 * nodes where an IP is moving to. This is how "ctdb ip" can
951 * report the (likely) location of an IP by only asking the
952 * local node. Redundant releases need to update the PNN but
953 * are otherwise ignored.
955 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
956 if (!ctdb_sys_have_ip(&pip->addr)) {
957 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
958 ctdb_addr_to_str(&pip->addr),
959 vnn->public_netmask_bits,
960 ctdb_vnn_iface_string(vnn)));
962 ctdb_vnn_unassign_iface(ctdb, vnn);
966 if (vnn->iface == NULL) {
967 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
968 ctdb_addr_to_str(&pip->addr),
969 vnn->public_netmask_bits));
975 /* There is a potential race between take_ip and us because we
976 * update the VNN via a callback that run when the
977 * eventscripts have been run. Avoid the race by allowing one
978 * update to be in flight at a time.
980 if (vnn->update_in_flight) {
981 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
982 "update for this IP already in flight\n",
983 ctdb_addr_to_str(&vnn->public_address),
984 vnn->public_netmask_bits));
988 iface = ctdb_vnn_iface_string(vnn);
990 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
991 ctdb_addr_to_str(&pip->addr),
992 vnn->public_netmask_bits,
996 state = talloc(ctdb, struct release_ip_callback_state);
998 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1004 state->addr = talloc(state, ctdb_sock_addr);
1005 if (state->addr == NULL) {
1006 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1007 __FILE__, __LINE__);
1011 *state->addr = pip->addr;
1012 state->target_pnn = pip->pnn;
1015 vnn->update_in_flight = true;
1016 talloc_set_destructor(state, ctdb_releaseip_destructor);
1018 ret = ctdb_event_script_callback(ctdb,
1019 state, release_ip_callback, state,
1020 CTDB_EVENT_RELEASE_IP,
1023 ctdb_addr_to_str(&pip->addr),
1024 vnn->public_netmask_bits);
1026 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1027 ctdb_addr_to_str(&pip->addr),
1028 ctdb_vnn_iface_string(vnn)));
1033 /* tell the control that we will be reply asynchronously */
1034 *async_reply = true;
1035 state->c = talloc_steal(state, c);
1039 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1040 ctdb_sock_addr *addr,
1041 unsigned mask, const char *ifaces,
1044 struct ctdb_vnn *vnn;
1048 /* Verify that we don't have an entry for this IP yet */
1049 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1050 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1052 ("Duplicate public IP address '%s'\n",
1053 ctdb_addr_to_str(addr)));
1058 /* Create a new VNN structure for this IP address */
1059 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1061 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1064 tmp = talloc_strdup(vnn, ifaces);
1066 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1070 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1071 struct vnn_interface *vnn_iface;
1072 struct ctdb_interface *i;
1073 if (!ctdb_sys_check_iface_exists(iface)) {
1075 ("Unknown interface %s for public address %s\n",
1076 iface, ctdb_addr_to_str(addr)));
1081 i = ctdb_add_local_iface(ctdb, iface);
1084 ("Failed to add interface '%s' "
1085 "for public address %s\n",
1086 iface, ctdb_addr_to_str(addr)));
1091 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1092 if (vnn_iface == NULL) {
1093 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1098 vnn_iface->iface = i;
1099 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1102 vnn->public_address = *addr;
1103 vnn->public_netmask_bits = mask;
1106 DLIST_ADD(ctdb->vnn, vnn);
1112 setup the public address lists from a file
1114 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1121 /* If no public addresses file given then try the default */
1122 if (ctdb->public_addresses_file == NULL) {
1123 const char *b = getenv("CTDB_BASE");
1125 DBG_ERR("CTDB_BASE not set\n");
1128 ctdb->public_addresses_file = talloc_asprintf(
1129 ctdb, "%s/%s", b, "public_addresses");
1130 if (ctdb->public_addresses_file == NULL) {
1131 DBG_ERR("Out of memory\n");
1136 /* If the file doesn't exist then warn and do nothing */
1137 ok = file_exist(ctdb->public_addresses_file);
1139 D_WARNING("Not loading public addresses, no file %s\n",
1140 ctdb->public_addresses_file);
1144 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1145 if (lines == NULL) {
1146 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1149 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1153 for (i=0;i<nlines;i++) {
1155 ctdb_sock_addr addr;
1156 const char *addrstr;
1161 while ((*line == ' ') || (*line == '\t')) {
1167 if (strcmp(line, "") == 0) {
1170 tok = strtok(line, " \t");
1173 tok = strtok(NULL, " \t");
1175 D_ERR("No interface specified at line %u "
1176 "of public addresses file\n", i+1);
1182 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1183 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1187 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1188 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1195 D_NOTICE("Loaded public addresses from %s\n",
1196 ctdb->public_addresses_file);
1203 destroy a ctdb_client_ip structure
1205 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1207 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1208 ctdb_addr_to_str(&ip->addr),
1209 ntohs(ip->addr.ip.sin_port),
1212 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1217 called by a client to inform us of a TCP connection that it is managing
1218 that should tickled with an ACK when IP takeover is done
1220 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1223 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1224 struct ctdb_connection *tcp_sock = NULL;
1225 struct ctdb_tcp_list *tcp;
1226 struct ctdb_connection t;
1229 struct ctdb_client_ip *ip;
1230 struct ctdb_vnn *vnn;
1231 ctdb_sock_addr addr;
1233 /* If we don't have public IPs, tickles are useless */
1234 if (ctdb->vnn == NULL) {
1238 tcp_sock = (struct ctdb_connection *)indata.dptr;
1240 addr = tcp_sock->src;
1241 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1242 addr = tcp_sock->dst;
1243 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1246 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1247 vnn = find_public_ip_vnn(ctdb, &addr);
1249 switch (addr.sa.sa_family) {
1251 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1252 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1253 ctdb_addr_to_str(&addr)));
1257 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1258 ctdb_addr_to_str(&addr)));
1261 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1267 if (vnn->pnn != ctdb->pnn) {
1268 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1269 ctdb_addr_to_str(&addr),
1270 client_id, client->pid));
1271 /* failing this call will tell smbd to die */
1275 ip = talloc(client, struct ctdb_client_ip);
1276 CTDB_NO_MEMORY(ctdb, ip);
1280 ip->client_id = client_id;
1281 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1282 DLIST_ADD(ctdb->client_ip_list, ip);
1284 tcp = talloc(client, struct ctdb_tcp_list);
1285 CTDB_NO_MEMORY(ctdb, tcp);
1287 tcp->connection.src = tcp_sock->src;
1288 tcp->connection.dst = tcp_sock->dst;
1290 DLIST_ADD(client->tcp_list, tcp);
1292 t.src = tcp_sock->src;
1293 t.dst = tcp_sock->dst;
1295 data.dptr = (uint8_t *)&t;
1296 data.dsize = sizeof(t);
1298 switch (addr.sa.sa_family) {
1300 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1301 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1302 ctdb_addr_to_str(&tcp_sock->src),
1303 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1306 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1307 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1308 ctdb_addr_to_str(&tcp_sock->src),
1309 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1312 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1316 /* tell all nodes about this tcp connection */
1317 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1318 CTDB_CONTROL_TCP_ADD,
1319 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1321 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1329 find a tcp address on a list
1331 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1332 struct ctdb_connection *tcp)
1336 if (array == NULL) {
1340 for (i=0;i<array->num;i++) {
1341 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1342 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1343 return &array->connections[i];
1352 called by a daemon to inform us of a TCP connection that one of its
1353 clients managing that should tickled with an ACK when IP takeover is
1356 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1358 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1359 struct ctdb_tcp_array *tcparray;
1360 struct ctdb_connection tcp;
1361 struct ctdb_vnn *vnn;
1363 /* If we don't have public IPs, tickles are useless */
1364 if (ctdb->vnn == NULL) {
1368 vnn = find_public_ip_vnn(ctdb, &p->dst);
1370 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1371 ctdb_addr_to_str(&p->dst)));
1377 tcparray = vnn->tcp_array;
1379 /* If this is the first tickle */
1380 if (tcparray == NULL) {
1381 tcparray = talloc(vnn, struct ctdb_tcp_array);
1382 CTDB_NO_MEMORY(ctdb, tcparray);
1383 vnn->tcp_array = tcparray;
1386 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1387 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1389 tcparray->connections[tcparray->num].src = p->src;
1390 tcparray->connections[tcparray->num].dst = p->dst;
1393 if (tcp_update_needed) {
1394 vnn->tcp_update_needed = true;
1400 /* Do we already have this tickle ?*/
1403 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1404 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1405 ctdb_addr_to_str(&tcp.dst),
1406 ntohs(tcp.dst.ip.sin_port),
1411 /* A new tickle, we must add it to the array */
1412 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1413 struct ctdb_connection,
1415 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1417 tcparray->connections[tcparray->num].src = p->src;
1418 tcparray->connections[tcparray->num].dst = p->dst;
1421 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1422 ctdb_addr_to_str(&tcp.dst),
1423 ntohs(tcp.dst.ip.sin_port),
1426 if (tcp_update_needed) {
1427 vnn->tcp_update_needed = true;
1434 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1436 struct ctdb_connection *tcpp;
1442 /* if the array is empty we cant remove it
1443 and we don't need to do anything
1445 if (vnn->tcp_array == NULL) {
1446 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1447 ctdb_addr_to_str(&conn->dst),
1448 ntohs(conn->dst.ip.sin_port)));
1453 /* See if we know this connection
1454 if we don't know this connection then we dont need to do anything
1456 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1458 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1459 ctdb_addr_to_str(&conn->dst),
1460 ntohs(conn->dst.ip.sin_port)));
1465 /* We need to remove this entry from the array.
1466 Instead of allocating a new array and copying data to it
1467 we cheat and just copy the last entry in the existing array
1468 to the entry that is to be removed and just shring the
1471 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1472 vnn->tcp_array->num--;
1474 /* If we deleted the last entry we also need to remove the entire array
1476 if (vnn->tcp_array->num == 0) {
1477 talloc_free(vnn->tcp_array);
1478 vnn->tcp_array = NULL;
1481 vnn->tcp_update_needed = true;
1483 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1484 ctdb_addr_to_str(&conn->src),
1485 ntohs(conn->src.ip.sin_port)));
1490 called by a daemon to inform us of a TCP connection that one of its
1491 clients used are no longer needed in the tickle database
1493 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1495 struct ctdb_vnn *vnn;
1496 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1498 /* If we don't have public IPs, tickles are useless */
1499 if (ctdb->vnn == NULL) {
1503 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1506 (__location__ " unable to find public address %s\n",
1507 ctdb_addr_to_str(&conn->dst)));
1511 ctdb_remove_connection(vnn, conn);
1517 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1521 Called when another daemon starts - causes all tickles for all
1522 public addresses we are serving to be sent to the new node on the
1523 next check. This actually causes the tickles to be sent to the
1524 other node immediately. In case there is an error, the periodic
1525 timer will send the updates on timer event. This is simple and
1526 doesn't require careful error handling.
1528 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1530 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1531 (unsigned long) pnn));
1533 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1539 called when a client structure goes away - hook to remove
1540 elements from the tcp_list in all daemons
1542 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1544 while (client->tcp_list) {
1545 struct ctdb_vnn *vnn;
1546 struct ctdb_tcp_list *tcp = client->tcp_list;
1547 struct ctdb_connection *conn = &tcp->connection;
1549 DLIST_REMOVE(client->tcp_list, tcp);
1551 vnn = find_public_ip_vnn(client->ctdb,
1555 (__location__ " unable to find public address %s\n",
1556 ctdb_addr_to_str(&conn->dst)));
1560 /* If the IP address is hosted on this node then
1561 * remove the connection. */
1562 if (vnn->pnn == client->ctdb->pnn) {
1563 ctdb_remove_connection(vnn, conn);
1566 /* Otherwise this function has been called because the
1567 * server IP address has been released to another node
1568 * and the client has exited. This means that we
1569 * should not delete the connection information. The
1570 * takeover node processes connections too. */
1575 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1577 struct ctdb_vnn *vnn, *next;
1580 if (ctdb->tunable.disable_ip_failover == 1) {
1584 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1585 /* vnn can be freed below in release_ip_post() */
1588 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1589 ctdb_vnn_unassign_iface(ctdb, vnn);
1593 /* Don't allow multiple releases at once. Some code,
1594 * particularly ctdb_tickle_sentenced_connections() is
1596 if (vnn->update_in_flight) {
1597 DEBUG(DEBUG_WARNING,
1599 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1600 ctdb_addr_to_str(&vnn->public_address),
1601 vnn->public_netmask_bits,
1602 ctdb_vnn_iface_string(vnn)));
1605 vnn->update_in_flight = true;
1607 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1608 ctdb_addr_to_str(&vnn->public_address),
1609 vnn->public_netmask_bits,
1610 ctdb_vnn_iface_string(vnn)));
1612 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1613 ctdb_vnn_iface_string(vnn),
1614 ctdb_addr_to_str(&vnn->public_address),
1615 vnn->public_netmask_bits);
1616 /* releaseip timeouts are converted to success, so to
1617 * detect failures just check if the IP address is
1620 if (ctdb_sys_have_ip(&vnn->public_address)) {
1623 " IP address %s not released\n",
1624 ctdb_addr_to_str(&vnn->public_address)));
1625 vnn->update_in_flight = false;
1629 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1631 vnn->update_in_flight = false;
1636 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1641 get list of public IPs
1643 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1644 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1647 struct ctdb_public_ip_list_old *ips;
1648 struct ctdb_vnn *vnn;
1649 bool only_available = false;
1651 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1652 only_available = true;
1655 /* count how many public ip structures we have */
1657 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1661 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1662 num*sizeof(struct ctdb_public_ip);
1663 ips = talloc_zero_size(outdata, len);
1664 CTDB_NO_MEMORY(ctdb, ips);
1667 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1668 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1671 ips->ips[i].pnn = vnn->pnn;
1672 ips->ips[i].addr = vnn->public_address;
1676 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1677 i*sizeof(struct ctdb_public_ip);
1679 outdata->dsize = len;
1680 outdata->dptr = (uint8_t *)ips;
1686 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1687 struct ctdb_req_control_old *c,
1692 ctdb_sock_addr *addr;
1693 struct ctdb_public_ip_info_old *info;
1694 struct ctdb_vnn *vnn;
1695 struct vnn_interface *iface;
1697 addr = (ctdb_sock_addr *)indata.dptr;
1699 vnn = find_public_ip_vnn(ctdb, addr);
1701 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1702 "'%s'not a public address\n",
1703 ctdb_addr_to_str(addr)));
1707 /* count how many public ip structures we have */
1709 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1713 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1714 num*sizeof(struct ctdb_iface);
1715 info = talloc_zero_size(outdata, len);
1716 CTDB_NO_MEMORY(ctdb, info);
1718 info->ip.addr = vnn->public_address;
1719 info->ip.pnn = vnn->pnn;
1720 info->active_idx = 0xFFFFFFFF;
1723 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1724 struct ctdb_interface *cur;
1727 if (vnn->iface == cur) {
1728 info->active_idx = i;
1730 strncpy(info->ifaces[i].name, cur->name,
1731 sizeof(info->ifaces[i].name));
1732 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1733 info->ifaces[i].link_state = cur->link_up;
1734 info->ifaces[i].references = cur->references;
1739 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1740 i*sizeof(struct ctdb_iface);
1742 outdata->dsize = len;
1743 outdata->dptr = (uint8_t *)info;
1748 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1749 struct ctdb_req_control_old *c,
1753 struct ctdb_iface_list_old *ifaces;
1754 struct ctdb_interface *cur;
1756 /* count how many public ip structures we have */
1758 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1762 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1763 num*sizeof(struct ctdb_iface);
1764 ifaces = talloc_zero_size(outdata, len);
1765 CTDB_NO_MEMORY(ctdb, ifaces);
1768 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1769 strncpy(ifaces->ifaces[i].name, cur->name,
1770 sizeof(ifaces->ifaces[i].name));
1771 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1772 ifaces->ifaces[i].link_state = cur->link_up;
1773 ifaces->ifaces[i].references = cur->references;
1777 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1778 i*sizeof(struct ctdb_iface);
1780 outdata->dsize = len;
1781 outdata->dptr = (uint8_t *)ifaces;
1786 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1787 struct ctdb_req_control_old *c,
1790 struct ctdb_iface *info;
1791 struct ctdb_interface *iface;
1792 bool link_up = false;
1794 info = (struct ctdb_iface *)indata.dptr;
1796 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1797 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1798 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1799 len, len, info->name));
1803 switch (info->link_state) {
1811 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1812 (unsigned int)info->link_state));
1816 if (info->references != 0) {
1817 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1818 (unsigned int)info->references));
1822 iface = ctdb_find_iface(ctdb, info->name);
1823 if (iface == NULL) {
1827 if (link_up == iface->link_up) {
1832 ("iface[%s] has changed it's link status %s => %s\n",
1834 iface->link_up?"up":"down",
1835 link_up?"up":"down"));
1837 iface->link_up = link_up;
1843 called by a daemon to inform us of the entire list of TCP tickles for
1844 a particular public address.
1845 this control should only be sent by the node that is currently serving
1846 that public address.
1848 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1850 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1851 struct ctdb_tcp_array *tcparray;
1852 struct ctdb_vnn *vnn;
1854 /* We must at least have tickles.num or else we cant verify the size
1855 of the received data blob
1857 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1858 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1862 /* verify that the size of data matches what we expect */
1863 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1864 + sizeof(struct ctdb_connection) * list->num) {
1865 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1869 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1870 ctdb_addr_to_str(&list->addr)));
1872 vnn = find_public_ip_vnn(ctdb, &list->addr);
1874 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1875 ctdb_addr_to_str(&list->addr)));
1880 if (vnn->pnn == ctdb->pnn) {
1882 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1883 ctdb_addr_to_str(&list->addr)));
1887 /* remove any old ticklelist we might have */
1888 talloc_free(vnn->tcp_array);
1889 vnn->tcp_array = NULL;
1891 tcparray = talloc(vnn, struct ctdb_tcp_array);
1892 CTDB_NO_MEMORY(ctdb, tcparray);
1894 tcparray->num = list->num;
1896 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1897 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1899 memcpy(tcparray->connections, &list->connections[0],
1900 sizeof(struct ctdb_connection)*tcparray->num);
1902 /* We now have a new fresh tickle list array for this vnn */
1903 vnn->tcp_array = tcparray;
1909 called to return the full list of tickles for the puclic address associated
1910 with the provided vnn
1912 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1914 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1915 struct ctdb_tickle_list_old *list;
1916 struct ctdb_tcp_array *tcparray;
1918 struct ctdb_vnn *vnn;
1921 vnn = find_public_ip_vnn(ctdb, addr);
1923 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1924 ctdb_addr_to_str(addr)));
1929 port = ctdb_addr_to_port(addr);
1931 tcparray = vnn->tcp_array;
1933 if (tcparray != NULL) {
1935 /* All connections */
1936 num = tcparray->num;
1938 /* Count connections for port */
1939 for (i = 0; i < tcparray->num; i++) {
1940 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1947 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1948 + sizeof(struct ctdb_connection) * num;
1950 outdata->dptr = talloc_size(outdata, outdata->dsize);
1951 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1952 list = (struct ctdb_tickle_list_old *)outdata->dptr;
1962 for (i = 0; i < tcparray->num; i++) {
1964 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1965 list->connections[num] = tcparray->connections[i];
1975 set the list of all tcp tickles for a public address
1977 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1978 ctdb_sock_addr *addr,
1979 struct ctdb_tcp_array *tcparray)
1983 struct ctdb_tickle_list_old *list;
1986 num = tcparray->num;
1991 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1992 sizeof(struct ctdb_connection) * num;
1993 data.dptr = talloc_size(ctdb, data.dsize);
1994 CTDB_NO_MEMORY(ctdb, data.dptr);
1996 list = (struct ctdb_tickle_list_old *)data.dptr;
2000 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2003 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2004 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2005 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2007 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2011 talloc_free(data.dptr);
2016 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2019 struct ctdb_vnn *vnn;
2022 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2023 /* we only send out updates for public addresses that
2026 if (ctdb->pnn != vnn->pnn) {
2030 /* We only send out the updates if we need to */
2031 if (!force && !vnn->tcp_update_needed) {
2035 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2036 &vnn->public_address,
2039 D_ERR("Failed to send the tickle update for ip %s\n",
2040 ctdb_addr_to_str(&vnn->public_address));
2041 vnn->tcp_update_needed = true;
2043 D_INFO("Sent tickle update for ip %s\n",
2044 ctdb_addr_to_str(&vnn->public_address));
2045 vnn->tcp_update_needed = false;
2052 perform tickle updates if required
2054 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2055 struct tevent_timer *te,
2056 struct timeval t, void *private_data)
2058 struct ctdb_context *ctdb = talloc_get_type(
2059 private_data, struct ctdb_context);
2061 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2063 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2064 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2065 ctdb_update_tcp_tickles, ctdb);
2069 start periodic update of tcp tickles
2071 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2073 ctdb->tickle_update_context = talloc_new(ctdb);
2075 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2076 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2077 ctdb_update_tcp_tickles, ctdb);
2083 struct control_gratious_arp {
2084 struct ctdb_context *ctdb;
2085 ctdb_sock_addr addr;
2091 send a control_gratuitous arp
2093 static void send_gratious_arp(struct tevent_context *ev,
2094 struct tevent_timer *te,
2095 struct timeval t, void *private_data)
2098 struct control_gratious_arp *arp = talloc_get_type(private_data,
2099 struct control_gratious_arp);
2101 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2103 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2104 arp->iface, strerror(errno)));
2109 if (arp->count == CTDB_ARP_REPEAT) {
2114 tevent_add_timer(arp->ctdb->ev, arp,
2115 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2116 send_gratious_arp, arp);
2123 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2125 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2126 struct control_gratious_arp *arp;
2128 /* verify the size of indata */
2129 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2130 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2131 (unsigned)indata.dsize,
2132 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2136 ( offsetof(struct ctdb_addr_info_old, iface)
2137 + gratious_arp->len ) ){
2139 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2140 "but should be %u bytes\n",
2141 (unsigned)indata.dsize,
2142 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2147 arp = talloc(ctdb, struct control_gratious_arp);
2148 CTDB_NO_MEMORY(ctdb, arp);
2151 arp->addr = gratious_arp->addr;
2152 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2153 CTDB_NO_MEMORY(ctdb, arp->iface);
2156 tevent_add_timer(arp->ctdb->ev, arp,
2157 timeval_zero(), send_gratious_arp, arp);
2162 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2164 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2167 /* verify the size of indata */
2168 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2169 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2173 ( offsetof(struct ctdb_addr_info_old, iface)
2176 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2177 "but should be %u bytes\n",
2178 (unsigned)indata.dsize,
2179 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2183 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2185 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2188 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2195 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2197 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2198 struct ctdb_vnn *vnn;
2200 /* verify the size of indata */
2201 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2202 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2206 ( offsetof(struct ctdb_addr_info_old, iface)
2209 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2210 "but should be %u bytes\n",
2211 (unsigned)indata.dsize,
2212 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2216 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2218 /* walk over all public addresses until we find a match */
2219 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2220 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2221 if (vnn->pnn == ctdb->pnn) {
2222 /* This IP is currently being hosted.
2223 * Defer the deletion until the next
2224 * takeover run. "ctdb reloadips" will
2225 * always cause a takeover run. "ctdb
2226 * delip" will now need an explicit
2227 * "ctdb ipreallocated" afterwards. */
2228 vnn->delete_pending = true;
2230 /* This IP is not hosted on the
2231 * current node so just delete it
2233 do_delete_ip(ctdb, vnn);
2240 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2241 ctdb_addr_to_str(&pub->addr)));
2246 struct ipreallocated_callback_state {
2247 struct ctdb_req_control_old *c;
2250 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2251 int status, void *p)
2253 struct ipreallocated_callback_state *state =
2254 talloc_get_type(p, struct ipreallocated_callback_state);
2258 (" \"ipreallocated\" event script failed (status %d)\n",
2260 if (status == -ETIME) {
2261 ctdb_ban_self(ctdb);
2265 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2269 /* A control to run the ipreallocated event */
2270 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2271 struct ctdb_req_control_old *c,
2275 struct ipreallocated_callback_state *state;
2277 state = talloc(ctdb, struct ipreallocated_callback_state);
2278 CTDB_NO_MEMORY(ctdb, state);
2280 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2282 ret = ctdb_event_script_callback(ctdb, state,
2283 ctdb_ipreallocated_callback, state,
2284 CTDB_EVENT_IPREALLOCATED,
2288 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2293 /* tell the control that we will be reply asynchronously */
2294 state->c = talloc_steal(state, c);
2295 *async_reply = true;
2301 struct ctdb_reloadips_handle {
2302 struct ctdb_context *ctdb;
2303 struct ctdb_req_control_old *c;
2307 struct tevent_fd *fde;
2310 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2312 if (h == h->ctdb->reload_ips) {
2313 h->ctdb->reload_ips = NULL;
2316 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2319 ctdb_kill(h->ctdb, h->child, SIGKILL);
2323 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2324 struct tevent_timer *te,
2325 struct timeval t, void *private_data)
2327 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2332 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2333 struct tevent_fd *fde,
2334 uint16_t flags, void *private_data)
2336 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2341 ret = sys_read(h->fd[0], &res, 1);
2342 if (ret < 1 || res != 0) {
2343 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2351 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2353 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2354 struct ctdb_public_ip_list_old *ips;
2355 struct ctdb_vnn *vnn;
2356 struct client_async_data *async_data;
2357 struct timeval timeout;
2359 struct ctdb_client_control_state *state;
2363 CTDB_NO_MEMORY(ctdb, mem_ctx);
2365 /* Read IPs from local node */
2366 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2367 CTDB_CURRENT_NODE, mem_ctx, &ips);
2370 ("Unable to fetch public IPs from local node\n"));
2371 talloc_free(mem_ctx);
2375 /* Read IPs file - this is safe since this is a child process */
2377 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2378 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2379 talloc_free(mem_ctx);
2383 async_data = talloc_zero(mem_ctx, struct client_async_data);
2384 CTDB_NO_MEMORY(ctdb, async_data);
2386 /* Compare IPs between node and file for IPs to be deleted */
2387 for (i = 0; i < ips->num; i++) {
2389 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2390 if (ctdb_same_ip(&vnn->public_address,
2391 &ips->ips[i].addr)) {
2392 /* IP is still in file */
2398 /* Delete IP ips->ips[i] */
2399 struct ctdb_addr_info_old *pub;
2402 ("IP %s no longer configured, deleting it\n",
2403 ctdb_addr_to_str(&ips->ips[i].addr)));
2405 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2406 CTDB_NO_MEMORY(ctdb, pub);
2408 pub->addr = ips->ips[i].addr;
2412 timeout = TAKEOVER_TIMEOUT();
2414 data.dsize = offsetof(struct ctdb_addr_info_old,
2416 data.dptr = (uint8_t *)pub;
2418 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2419 CTDB_CONTROL_DEL_PUBLIC_IP,
2420 0, data, async_data,
2422 if (state == NULL) {
2425 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2429 ctdb_client_async_add(async_data, state);
2433 /* Compare IPs between node and file for IPs to be added */
2435 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2436 for (i = 0; i < ips->num; i++) {
2437 if (ctdb_same_ip(&vnn->public_address,
2438 &ips->ips[i].addr)) {
2439 /* IP already on node */
2443 if (i == ips->num) {
2444 /* Add IP ips->ips[i] */
2445 struct ctdb_addr_info_old *pub;
2446 const char *ifaces = NULL;
2448 struct vnn_interface *iface = NULL;
2451 ("New IP %s configured, adding it\n",
2452 ctdb_addr_to_str(&vnn->public_address)));
2454 uint32_t pnn = ctdb_get_pnn(ctdb);
2456 data.dsize = sizeof(pnn);
2457 data.dptr = (uint8_t *)&pnn;
2459 ret = ctdb_client_send_message(
2461 CTDB_BROADCAST_CONNECTED,
2462 CTDB_SRVID_REBALANCE_NODE,
2465 DEBUG(DEBUG_WARNING,
2466 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2472 ifaces = vnn->ifaces->iface->name;
2473 iface = vnn->ifaces->next;
2474 while (iface != NULL) {
2475 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2476 iface->iface->name);
2477 iface = iface->next;
2480 len = strlen(ifaces) + 1;
2481 pub = talloc_zero_size(mem_ctx,
2482 offsetof(struct ctdb_addr_info_old, iface) + len);
2483 CTDB_NO_MEMORY(ctdb, pub);
2485 pub->addr = vnn->public_address;
2486 pub->mask = vnn->public_netmask_bits;
2488 memcpy(&pub->iface[0], ifaces, pub->len);
2490 timeout = TAKEOVER_TIMEOUT();
2492 data.dsize = offsetof(struct ctdb_addr_info_old,
2494 data.dptr = (uint8_t *)pub;
2496 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2497 CTDB_CONTROL_ADD_PUBLIC_IP,
2498 0, data, async_data,
2500 if (state == NULL) {
2503 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2507 ctdb_client_async_add(async_data, state);
2511 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2512 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2516 talloc_free(mem_ctx);
2520 talloc_free(mem_ctx);
2524 /* This control is sent to force the node to re-read the public addresses file
2525 and drop any addresses we should nnot longer host, and add new addresses
2526 that we are now able to host
2528 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2530 struct ctdb_reloadips_handle *h;
2531 pid_t parent = getpid();
2533 if (ctdb->reload_ips != NULL) {
2534 talloc_free(ctdb->reload_ips);
2535 ctdb->reload_ips = NULL;
2538 h = talloc(ctdb, struct ctdb_reloadips_handle);
2539 CTDB_NO_MEMORY(ctdb, h);
2544 if (pipe(h->fd) == -1) {
2545 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2550 h->child = ctdb_fork(ctdb);
2551 if (h->child == (pid_t)-1) {
2552 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2560 if (h->child == 0) {
2561 signed char res = 0;
2565 prctl_set_comment("ctdb_reloadips");
2566 if (switch_from_server_to_client(ctdb) != 0) {
2567 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2570 res = ctdb_reloadips_child(ctdb);
2572 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2576 sys_write(h->fd[1], &res, 1);
2577 ctdb_wait_for_process_to_exit(parent);
2581 h->c = talloc_steal(h, c);
2584 set_close_on_exec(h->fd[0]);
2586 talloc_set_destructor(h, ctdb_reloadips_destructor);
2589 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2590 ctdb_reloadips_child_handler, (void *)h);
2591 tevent_fd_set_auto_close(h->fde);
2593 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2594 ctdb_reloadips_timeout_event, h);
2596 /* we reply later */
2597 *async_reply = true;