4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tevent/tevent.h"
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/wait.h"
28 #include "../include/ctdb_private.h"
29 #include "../common/rb_tree.h"
32 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
34 #define CTDB_ARP_INTERVAL 1
35 #define CTDB_ARP_REPEAT 3
38 struct ctdb_iface *prev, *next;
44 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
47 return vnn->iface->name;
53 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
57 /* Verify that we dont have an entry for this ip yet */
58 for (i=ctdb->ifaces;i;i=i->next) {
59 if (strcmp(i->name, iface) == 0) {
64 /* create a new structure for this interface */
65 i = talloc_zero(ctdb, struct ctdb_iface);
66 CTDB_NO_MEMORY_FATAL(ctdb, i);
67 i->name = talloc_strdup(i, iface);
68 CTDB_NO_MEMORY(ctdb, i->name);
71 DLIST_ADD(ctdb->ifaces, i);
76 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
81 /* Verify that we dont have an entry for this ip yet */
82 for (i=ctdb->ifaces;i;i=i->next) {
83 if (strcmp(i->name, iface) == 0) {
91 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
95 struct ctdb_iface *cur = NULL;
96 struct ctdb_iface *best = NULL;
98 for (i=0; vnn->ifaces[i]; i++) {
100 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
114 if (cur->references < best->references) {
123 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
124 struct ctdb_vnn *vnn)
126 struct ctdb_iface *best = NULL;
129 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
130 "still assigned to iface '%s'\n",
131 ctdb_addr_to_str(&vnn->public_address),
132 ctdb_vnn_iface_string(vnn)));
136 best = ctdb_vnn_best_iface(ctdb, vnn);
138 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
139 "cannot assign to iface any iface\n",
140 ctdb_addr_to_str(&vnn->public_address)));
146 vnn->pnn = ctdb->pnn;
148 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
149 "now assigned to iface '%s' refs[%d]\n",
150 ctdb_addr_to_str(&vnn->public_address),
151 ctdb_vnn_iface_string(vnn),
156 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
157 struct ctdb_vnn *vnn)
159 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
160 "now unassigned (old iface '%s' refs[%d])\n",
161 ctdb_addr_to_str(&vnn->public_address),
162 ctdb_vnn_iface_string(vnn),
163 vnn->iface?vnn->iface->references:0));
165 vnn->iface->references--;
168 if (vnn->pnn == ctdb->pnn) {
173 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
174 struct ctdb_vnn *vnn)
178 if (vnn->iface && vnn->iface->link_up) {
182 for (i=0; vnn->ifaces[i]; i++) {
183 struct ctdb_iface *cur;
185 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
198 struct ctdb_takeover_arp {
199 struct ctdb_context *ctdb;
202 struct ctdb_tcp_array *tcparray;
203 struct ctdb_vnn *vnn;
208 lists of tcp endpoints
210 struct ctdb_tcp_list {
211 struct ctdb_tcp_list *prev, *next;
212 struct ctdb_tcp_connection connection;
216 list of clients to kill on IP release
218 struct ctdb_client_ip {
219 struct ctdb_client_ip *prev, *next;
220 struct ctdb_context *ctdb;
227 send a gratuitous arp
229 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
230 struct timeval t, void *private_data)
232 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
233 struct ctdb_takeover_arp);
235 struct ctdb_tcp_array *tcparray;
236 const char *iface = ctdb_vnn_iface_string(arp->vnn);
238 ret = ctdb_sys_send_arp(&arp->addr, iface);
240 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
241 iface, strerror(errno)));
244 tcparray = arp->tcparray;
246 for (i=0;i<tcparray->num;i++) {
247 struct ctdb_tcp_connection *tcon;
249 tcon = &tcparray->connections[i];
250 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
251 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
252 ctdb_addr_to_str(&tcon->src_addr),
253 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
254 ret = ctdb_sys_send_tcp(
259 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
260 ctdb_addr_to_str(&tcon->src_addr)));
267 if (arp->count == CTDB_ARP_REPEAT) {
272 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
273 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
274 ctdb_control_send_arp, arp);
277 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
278 struct ctdb_vnn *vnn)
280 struct ctdb_takeover_arp *arp;
281 struct ctdb_tcp_array *tcparray;
283 if (!vnn->takeover_ctx) {
284 vnn->takeover_ctx = talloc_new(vnn);
285 if (!vnn->takeover_ctx) {
290 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
296 arp->addr = vnn->public_address;
299 tcparray = vnn->tcp_array;
301 /* add all of the known tcp connections for this IP to the
302 list of tcp connections to send tickle acks for */
303 arp->tcparray = talloc_steal(arp, tcparray);
305 vnn->tcp_array = NULL;
306 vnn->tcp_update_needed = true;
309 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
310 timeval_zero(), ctdb_control_send_arp, arp);
315 struct takeover_callback_state {
316 struct ctdb_req_control *c;
317 ctdb_sock_addr *addr;
318 struct ctdb_vnn *vnn;
321 struct ctdb_do_takeip_state {
322 struct ctdb_req_control *c;
323 struct ctdb_vnn *vnn;
327 called when takeip event finishes
329 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
332 struct ctdb_do_takeip_state *state =
333 talloc_get_type(private_data, struct ctdb_do_takeip_state);
338 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
340 if (status == -ETIME) {
343 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
344 ctdb_addr_to_str(&state->vnn->public_address),
345 ctdb_vnn_iface_string(state->vnn)));
346 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 node->flags |= NODE_FLAGS_UNHEALTHY;
353 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
355 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
360 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
361 data.dsize = strlen((char *)data.dptr) + 1;
362 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
364 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
367 /* the control succeeded */
368 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
374 take over an ip address
376 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
377 struct ctdb_req_control *c,
378 struct ctdb_vnn *vnn)
381 struct ctdb_do_takeip_state *state;
383 ret = ctdb_vnn_assign_iface(ctdb, vnn);
385 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
386 "assin a usable interface\n",
387 ctdb_addr_to_str(&vnn->public_address),
388 vnn->public_netmask_bits));
392 state = talloc(vnn, struct ctdb_do_takeip_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(ctdb, c);
398 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
399 ctdb_addr_to_str(&vnn->public_address),
400 vnn->public_netmask_bits,
401 ctdb_vnn_iface_string(vnn)));
403 ret = ctdb_event_script_callback(ctdb,
405 ctdb_do_takeip_callback,
410 ctdb_vnn_iface_string(vnn),
411 ctdb_addr_to_str(&vnn->public_address),
412 vnn->public_netmask_bits);
415 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
416 ctdb_addr_to_str(&vnn->public_address),
417 ctdb_vnn_iface_string(vnn)));
425 struct ctdb_do_updateip_state {
426 struct ctdb_req_control *c;
427 struct ctdb_iface *old;
428 struct ctdb_vnn *vnn;
432 called when updateip event finishes
434 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
437 struct ctdb_do_updateip_state *state =
438 talloc_get_type(private_data, struct ctdb_do_updateip_state);
442 if (status == -ETIME) {
445 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
446 ctdb_addr_to_str(&state->vnn->public_address),
448 ctdb_vnn_iface_string(state->vnn)));
451 * All we can do is reset the old interface
452 * and let the next run fix it
454 ctdb_vnn_unassign_iface(ctdb, state->vnn);
455 state->vnn->iface = state->old;
456 state->vnn->iface->references++;
458 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
463 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
465 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
470 /* the control succeeded */
471 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
477 update (move) an ip address
479 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
480 struct ctdb_req_control *c,
481 struct ctdb_vnn *vnn)
484 struct ctdb_do_updateip_state *state;
485 struct ctdb_iface *old = vnn->iface;
486 const char *new_name;
488 ctdb_vnn_unassign_iface(ctdb, vnn);
489 ret = ctdb_vnn_assign_iface(ctdb, vnn);
491 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
492 "assin a usable interface (old iface '%s')\n",
493 ctdb_addr_to_str(&vnn->public_address),
494 vnn->public_netmask_bits,
499 new_name = ctdb_vnn_iface_string(vnn);
500 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
501 /* A benign update from one interface onto itself.
502 * no need to run the eventscripts in this case, just return
505 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
509 state = talloc(vnn, struct ctdb_do_updateip_state);
510 CTDB_NO_MEMORY(ctdb, state);
512 state->c = talloc_steal(ctdb, c);
516 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
517 "interface %s to %s\n",
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits,
523 ret = ctdb_event_script_callback(ctdb,
525 ctdb_do_updateip_callback,
528 CTDB_EVENT_UPDATE_IP,
532 ctdb_addr_to_str(&vnn->public_address),
533 vnn->public_netmask_bits);
535 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
536 ctdb_addr_to_str(&vnn->public_address),
537 old->name, new_name));
546 Find the vnn of the node that has a public ip address
547 returns -1 if the address is not known as a public address
549 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
551 struct ctdb_vnn *vnn;
553 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
554 if (ctdb_same_ip(&vnn->public_address, addr)) {
563 take over an ip address
565 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
566 struct ctdb_req_control *c,
571 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
572 struct ctdb_vnn *vnn;
573 bool have_ip = false;
574 bool do_updateip = false;
575 bool do_takeip = false;
576 struct ctdb_iface *best_iface = NULL;
578 if (pip->pnn != ctdb->pnn) {
579 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
580 "with pnn %d, but we're node %d\n",
581 ctdb_addr_to_str(&pip->addr),
582 pip->pnn, ctdb->pnn));
586 /* update out vnn list */
587 vnn = find_public_ip_vnn(ctdb, &pip->addr);
589 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
590 ctdb_addr_to_str(&pip->addr)));
594 have_ip = ctdb_sys_have_ip(&pip->addr);
595 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
596 if (best_iface == NULL) {
597 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
598 "a usable interface (old %s, have_ip %d)\n",
599 ctdb_addr_to_str(&vnn->public_address),
600 vnn->public_netmask_bits,
601 ctdb_vnn_iface_string(vnn),
606 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
607 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
611 if (vnn->iface == NULL && have_ip) {
612 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
613 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
614 ctdb_addr_to_str(&vnn->public_address)));
618 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
619 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
620 "and we have it on iface[%s], but it was assigned to node %d"
621 "and we are node %d, banning ourself\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
628 if (vnn->pnn == -1 && have_ip) {
629 vnn->pnn = ctdb->pnn;
630 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
631 "and we already have it on iface[%s], update local daemon\n",
632 ctdb_addr_to_str(&vnn->public_address),
633 ctdb_vnn_iface_string(vnn)));
638 if (vnn->iface->link_up) {
639 /* only move when the rebalance gains something */
640 if (vnn->iface->references > (best_iface->references + 1)) {
643 } else if (vnn->iface != best_iface) {
650 ctdb_vnn_unassign_iface(ctdb, vnn);
657 ret = ctdb_do_takeip(ctdb, c, vnn);
661 } else if (do_updateip) {
662 ret = ctdb_do_updateip(ctdb, c, vnn);
668 * The interface is up and the kernel known the ip
671 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
672 ctdb_addr_to_str(&pip->addr),
673 vnn->public_netmask_bits,
674 ctdb_vnn_iface_string(vnn)));
678 /* tell ctdb_control.c that we will be replying asynchronously */
685 takeover an ip address old v4 style
687 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
688 struct ctdb_req_control *c,
694 data.dsize = sizeof(struct ctdb_public_ip);
695 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
696 CTDB_NO_MEMORY(ctdb, data.dptr);
698 memcpy(data.dptr, indata.dptr, indata.dsize);
699 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
703 kill any clients that are registered with a IP that is being released
705 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
707 struct ctdb_client_ip *ip;
709 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
710 ctdb_addr_to_str(addr)));
712 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
713 ctdb_sock_addr tmp_addr;
716 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
718 ctdb_addr_to_str(&ip->addr)));
720 if (ctdb_same_ip(&tmp_addr, addr)) {
721 struct ctdb_client *client = ctdb_reqid_find(ctdb,
724 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
726 ctdb_addr_to_str(&ip->addr),
729 if (client->pid != 0) {
730 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
731 (unsigned)client->pid,
732 ctdb_addr_to_str(addr),
734 kill(client->pid, SIGKILL);
741 called when releaseip event finishes
743 static void release_ip_callback(struct ctdb_context *ctdb, int status,
746 struct takeover_callback_state *state =
747 talloc_get_type(private_data, struct takeover_callback_state);
750 if (status == -ETIME) {
754 /* send a message to all clients of this node telling them
755 that the cluster has been reconfigured and they should
756 release any sockets on this IP */
757 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
758 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
759 data.dsize = strlen((char *)data.dptr)+1;
761 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
763 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
765 /* kill clients that have registered with this IP */
766 release_kill_clients(ctdb, state->addr);
768 ctdb_vnn_unassign_iface(ctdb, state->vnn);
770 /* the control succeeded */
771 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
776 release an ip address
778 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
779 struct ctdb_req_control *c,
784 struct takeover_callback_state *state;
785 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
786 struct ctdb_vnn *vnn;
788 /* update our vnn list */
789 vnn = find_public_ip_vnn(ctdb, &pip->addr);
791 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
792 ctdb_addr_to_str(&pip->addr)));
797 /* stop any previous arps */
798 talloc_free(vnn->takeover_ctx);
799 vnn->takeover_ctx = NULL;
801 if (!ctdb_sys_have_ip(&pip->addr)) {
802 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
803 ctdb_addr_to_str(&pip->addr),
804 vnn->public_netmask_bits,
805 ctdb_vnn_iface_string(vnn)));
806 ctdb_vnn_unassign_iface(ctdb, vnn);
810 if (vnn->iface == NULL) {
811 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
812 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
813 ctdb_addr_to_str(&vnn->public_address)));
817 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits,
820 ctdb_vnn_iface_string(vnn),
823 state = talloc(ctdb, struct takeover_callback_state);
824 CTDB_NO_MEMORY(ctdb, state);
826 state->c = talloc_steal(state, c);
827 state->addr = talloc(state, ctdb_sock_addr);
828 CTDB_NO_MEMORY(ctdb, state->addr);
829 *state->addr = pip->addr;
832 ret = ctdb_event_script_callback(ctdb,
833 state, release_ip_callback, state,
835 CTDB_EVENT_RELEASE_IP,
837 ctdb_vnn_iface_string(vnn),
838 ctdb_addr_to_str(&pip->addr),
839 vnn->public_netmask_bits);
841 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
842 ctdb_addr_to_str(&pip->addr),
843 ctdb_vnn_iface_string(vnn)));
848 /* tell the control that we will be reply asynchronously */
854 release an ip address old v4 style
856 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
857 struct ctdb_req_control *c,
863 data.dsize = sizeof(struct ctdb_public_ip);
864 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
865 CTDB_NO_MEMORY(ctdb, data.dptr);
867 memcpy(data.dptr, indata.dptr, indata.dsize);
868 return ctdb_control_release_ip(ctdb, c, data, async_reply);
872 static int ctdb_add_public_address(struct ctdb_context *ctdb,
873 ctdb_sock_addr *addr,
874 unsigned mask, const char *ifaces)
876 struct ctdb_vnn *vnn;
883 tmp = strdup(ifaces);
884 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
885 if (!ctdb_sys_check_iface_exists(iface)) {
886 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
893 /* Verify that we dont have an entry for this ip yet */
894 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
895 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
896 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
897 ctdb_addr_to_str(addr)));
902 /* create a new vnn structure for this ip address */
903 vnn = talloc_zero(ctdb, struct ctdb_vnn);
904 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
905 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
906 tmp = talloc_strdup(vnn, ifaces);
907 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
908 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
909 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
910 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
911 vnn->ifaces[num] = talloc_strdup(vnn, iface);
912 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
916 vnn->ifaces[num] = NULL;
917 vnn->public_address = *addr;
918 vnn->public_netmask_bits = mask;
920 if (ctdb_sys_have_ip(addr)) {
921 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
922 vnn->pnn = ctdb->pnn;
925 for (i=0; vnn->ifaces[i]; i++) {
926 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
928 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
929 "for public_address[%s]\n",
930 vnn->ifaces[i], ctdb_addr_to_str(addr)));
935 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
939 DLIST_ADD(ctdb->vnn, vnn);
945 setup the event script directory
947 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
949 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
950 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
954 static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
955 struct timeval t, void *private_data)
957 struct ctdb_context *ctdb = talloc_get_type(private_data,
958 struct ctdb_context);
959 struct ctdb_vnn *vnn;
961 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
964 for (i=0; vnn->ifaces[i] != NULL; i++) {
965 if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
966 DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
968 ctdb_addr_to_str(&vnn->public_address)));
973 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
974 timeval_current_ofs(30, 0),
975 ctdb_check_interfaces_event, ctdb);
979 static int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
981 if (ctdb->check_public_ifaces_ctx != NULL) {
982 talloc_free(ctdb->check_public_ifaces_ctx);
983 ctdb->check_public_ifaces_ctx = NULL;
986 ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
987 if (ctdb->check_public_ifaces_ctx == NULL) {
988 ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
991 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
992 timeval_current_ofs(30, 0),
993 ctdb_check_interfaces_event, ctdb);
1000 setup the public address lists from a file
1002 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
1008 lines = file_lines_load(alist, &nlines, ctdb);
1009 if (lines == NULL) {
1010 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
1013 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1017 for (i=0;i<nlines;i++) {
1019 ctdb_sock_addr addr;
1020 const char *addrstr;
1025 while ((*line == ' ') || (*line == '\t')) {
1031 if (strcmp(line, "") == 0) {
1034 tok = strtok(line, " \t");
1036 tok = strtok(NULL, " \t");
1038 if (NULL == ctdb->default_public_interface) {
1039 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1044 ifaces = ctdb->default_public_interface;
1049 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1050 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1054 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
1055 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1062 ctdb_start_monitoring_interfaces(ctdb);
1068 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1072 struct ctdb_vnn *svnn;
1073 struct ctdb_iface *cur = NULL;
1077 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1078 CTDB_NO_MEMORY(ctdb, svnn);
1080 svnn->ifaces = talloc_array(svnn, const char *, 2);
1081 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1082 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1083 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1084 svnn->ifaces[1] = NULL;
1086 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1092 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1094 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1095 "for single_ip[%s]\n",
1097 ctdb_addr_to_str(&svnn->public_address)));
1102 /* assume the single public ip interface is initially "good" */
1103 cur = ctdb_find_iface(ctdb, iface);
1105 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1108 cur->link_up = true;
1110 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1116 ctdb->single_ip_vnn = svnn;
1120 /* Given a physical node, return the number of
1121 public addresses that is currently assigned to this node.
1123 static int node_ip_coverage(struct ctdb_context *ctdb,
1125 struct ctdb_public_ip_list *ips)
1129 for (;ips;ips=ips->next) {
1130 if (ips->pnn == pnn) {
1138 /* Check if this is a public ip known to the node, i.e. can that
1139 node takeover this ip ?
1141 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1142 struct ctdb_public_ip_list *ip)
1144 struct ctdb_all_public_ips *public_ips;
1147 public_ips = ctdb->nodes[pnn]->available_public_ips;
1149 if (public_ips == NULL) {
1153 for (i=0;i<public_ips->num;i++) {
1154 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1155 /* yes, this node can serve this public ip */
1164 /* search the node lists list for a node to takeover this ip.
1165 pick the node that currently are serving the least number of ips
1166 so that the ips get spread out evenly.
1168 static int find_takeover_node(struct ctdb_context *ctdb,
1169 struct ctdb_node_map *nodemap, uint32_t mask,
1170 struct ctdb_public_ip_list *ip,
1171 struct ctdb_public_ip_list *all_ips)
1173 int pnn, min=0, num;
1177 for (i=0;i<nodemap->num;i++) {
1178 if (nodemap->nodes[i].flags & mask) {
1179 /* This node is not healty and can not be used to serve
1185 /* verify that this node can serve this ip */
1186 if (can_node_serve_ip(ctdb, i, ip)) {
1187 /* no it couldnt so skip to the next node */
1191 num = node_ip_coverage(ctdb, i, all_ips);
1192 /* was this the first node we checked ? */
1204 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1205 ctdb_addr_to_str(&ip->addr)));
1215 static uint32_t *ip_key(ctdb_sock_addr *ip)
1217 static uint32_t key[IP_KEYLEN];
1219 bzero(key, sizeof(key));
1221 switch (ip->sa.sa_family) {
1223 key[3] = htonl(ip->ip.sin_addr.s_addr);
1226 uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
1227 key[0] = htonl(s6_a32[0]);
1228 key[1] = htonl(s6_a32[1]);
1229 key[2] = htonl(s6_a32[2]);
1230 key[3] = htonl(s6_a32[3]);
1234 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1241 static void *add_ip_callback(void *parm, void *data)
1243 struct ctdb_public_ip_list *this_ip = parm;
1244 struct ctdb_public_ip_list *prev_ip = data;
1246 if (prev_ip == NULL) {
1249 if (this_ip->pnn == -1) {
1250 this_ip->pnn = prev_ip->pnn;
1256 static int getips_count_callback(void *param, void *data)
1258 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1259 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1261 new_ip->next = *ip_list;
1266 static struct ctdb_public_ip_list *
1267 create_merged_ip_list(struct ctdb_context *ctdb)
1270 struct ctdb_public_ip_list *ip_list;
1271 struct ctdb_all_public_ips *public_ips;
1273 if (ctdb->ip_tree != NULL) {
1274 talloc_free(ctdb->ip_tree);
1275 ctdb->ip_tree = NULL;
1277 ctdb->ip_tree = trbt_create(ctdb, 0);
1279 for (i=0;i<ctdb->num_nodes;i++) {
1280 public_ips = ctdb->nodes[i]->known_public_ips;
1282 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1286 /* there were no public ips for this node */
1287 if (public_ips == NULL) {
1291 for (j=0;j<public_ips->num;j++) {
1292 struct ctdb_public_ip_list *tmp_ip;
1294 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1295 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1296 tmp_ip->pnn = public_ips->ips[j].pnn;
1297 tmp_ip->addr = public_ips->ips[j].addr;
1298 tmp_ip->next = NULL;
1300 trbt_insertarray32_callback(ctdb->ip_tree,
1301 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1308 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1314 * This is the length of the longtest common prefix between the IPs.
1315 * It is calculated by XOR-ing the 2 IPs together and counting the
1316 * number of leading zeroes. The implementation means that all
1317 * addresses end up being 128 bits long.
1319 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1320 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1321 * lots of nodes and IP addresses?
1323 static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1325 uint32_t ip1_k[IP_KEYLEN];
1330 uint32_t distance = 0;
1332 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1334 for (i=0; i<IP_KEYLEN; i++) {
1335 x = ip1_k[i] ^ t[i];
1339 /* Count number of leading zeroes.
1340 * FIXME? This could be optimised...
1342 while ((x & (1 << 31)) == 0) {
1352 /* Calculate the IP distance for the given IP relative to IPs on the
1353 given node. The ips argument is generally the all_ips variable
1354 used in the main part of the algorithm.
1356 static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1357 struct ctdb_public_ip_list *ips,
1360 struct ctdb_public_ip_list *t;
1365 for (t=ips; t != NULL; t=t->next) {
1366 if (t->pnn != pnn) {
1370 /* Optimisation: We never calculate the distance
1371 * between an address and itself. This allows us to
1372 * calculate the effect of removing an address from a
1373 * node by simply calculating the distance between
1374 * that address and all of the exitsing addresses.
1375 * Moreover, we assume that we're only ever dealing
1376 * with addresses from all_ips so we can identify an
1377 * address via a pointer rather than doing a more
1378 * expensive address comparison. */
1379 if (&(t->addr) == ip) {
1383 d = ip_distance(ip, &(t->addr));
1384 sum += d * d; /* Cheaper than pulling in math.h :-) */
1390 /* Return the LCP2 imbalance metric for addresses currently assigned
1393 static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1395 struct ctdb_public_ip_list *t;
1397 uint32_t imbalance = 0;
1399 for (t=all_ips; t!=NULL; t=t->next) {
1400 if (t->pnn != pnn) {
1403 /* Pass the rest of the IPs rather than the whole
1406 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1412 /* Allocate any unassigned IPs just by looping through the IPs and
1413 * finding the best node for each.
1415 static void basic_allocate_unassigned(struct ctdb_context *ctdb,
1416 struct ctdb_node_map *nodemap,
1418 struct ctdb_public_ip_list *all_ips)
1420 struct ctdb_public_ip_list *tmp_ip;
1422 /* loop over all ip's and find a physical node to cover for
1425 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1426 if (tmp_ip->pnn == -1) {
1427 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1428 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1429 ctdb_addr_to_str(&tmp_ip->addr)));
1435 /* Basic non-deterministic rebalancing algorithm.
1437 static bool basic_failback(struct ctdb_context *ctdb,
1438 struct ctdb_node_map *nodemap,
1440 struct ctdb_public_ip_list *all_ips,
1445 int maxnode, maxnum=0, minnode, minnum=0, num;
1446 struct ctdb_public_ip_list *tmp_ip;
1448 /* for each ip address, loop over all nodes that can serve
1449 this ip and make sure that the difference between the node
1450 serving the most and the node serving the least ip's are
1453 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1454 if (tmp_ip->pnn == -1) {
1458 /* Get the highest and lowest number of ips's served by any
1459 valid node which can serve this ip.
1463 for (i=0;i<nodemap->num;i++) {
1464 if (nodemap->nodes[i].flags & mask) {
1468 /* only check nodes that can actually serve this ip */
1469 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1470 /* no it couldnt so skip to the next node */
1474 num = node_ip_coverage(ctdb, i, all_ips);
1475 if (maxnode == -1) {
1484 if (minnode == -1) {
1494 if (maxnode == -1) {
1495 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1496 ctdb_addr_to_str(&tmp_ip->addr)));
1501 /* If we want deterministic IPs then dont try to reallocate
1502 them to spread out the load.
1504 if (1 == ctdb->tunable.deterministic_public_ips) {
1508 /* if the spread between the smallest and largest coverage by
1509 a node is >=2 we steal one of the ips from the node with
1510 most coverage to even things out a bit.
1511 try to do this a limited number of times since we dont
1512 want to spend too much time balancing the ip coverage.
1514 if ( (maxnum > minnum+1)
1515 && (*retries < (num_ips + 5)) ){
1516 struct ctdb_public_ip_list *tmp;
1518 /* mark one of maxnode's vnn's as unassigned and try
1521 for (tmp=all_ips;tmp;tmp=tmp->next) {
1522 if (tmp->pnn == maxnode) {
1534 struct ctdb_rebalancenodes {
1535 struct ctdb_rebalancenodes *next;
1538 static struct ctdb_rebalancenodes *force_rebalance_list = NULL;
1541 /* set this flag to force the node to be rebalanced even if it just didnt
1542 become healthy again.
1544 void lcp2_forcerebalance(struct ctdb_context *ctdb, uint32_t pnn)
1546 struct ctdb_rebalancenodes *rebalance;
1548 for (rebalance = force_rebalance_list; rebalance; rebalance = rebalance->next) {
1549 if (rebalance->pnn == pnn) {
1554 rebalance = talloc(ctdb, struct ctdb_rebalancenodes);
1555 rebalance->pnn = pnn;
1556 rebalance->next = force_rebalance_list;
1557 force_rebalance_list = rebalance;
1560 /* Do necessary LCP2 initialisation. Bury it in a function here so
1561 * that we can unit test it.
1563 static void lcp2_init(struct ctdb_context * tmp_ctx,
1564 struct ctdb_node_map * nodemap,
1566 struct ctdb_public_ip_list *all_ips,
1567 uint32_t **lcp2_imbalances,
1568 bool **newly_healthy)
1571 struct ctdb_public_ip_list *tmp_ip;
1573 *newly_healthy = talloc_array(tmp_ctx, bool, nodemap->num);
1574 CTDB_NO_MEMORY_FATAL(tmp_ctx, *newly_healthy);
1575 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1576 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1578 for (i=0;i<nodemap->num;i++) {
1579 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1580 /* First step: is the node "healthy"? */
1581 (*newly_healthy)[i] = ! (bool)(nodemap->nodes[i].flags & mask);
1584 /* 2nd step: if a ndoe has IPs assigned then it must have been
1585 * healthy before, so we remove it from consideration... */
1586 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1587 if (tmp_ip->pnn != -1) {
1588 (*newly_healthy)[tmp_ip->pnn] = false;
1592 /* 3rd step: if a node is forced to re-balance then
1593 we allow failback onto the node */
1594 while (force_rebalance_list != NULL) {
1595 struct ctdb_rebalancenodes *next = force_rebalance_list->next;
1597 if (force_rebalance_list->pnn <= nodemap->num) {
1598 (*newly_healthy)[force_rebalance_list->pnn] = true;
1601 DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
1602 talloc_free(force_rebalance_list);
1603 force_rebalance_list = next;
1607 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1608 * the IP/node combination that will cost the least.
1610 static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1611 struct ctdb_node_map *nodemap,
1613 struct ctdb_public_ip_list *all_ips,
1614 uint32_t *lcp2_imbalances)
1616 struct ctdb_public_ip_list *tmp_ip;
1620 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1621 struct ctdb_public_ip_list *minip;
1623 bool should_loop = true;
1624 bool have_unassigned = true;
1626 while (have_unassigned && should_loop) {
1627 should_loop = false;
1629 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1630 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1636 /* loop over each unassigned ip. */
1637 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1638 if (tmp_ip->pnn != -1) {
1642 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1643 /* only check nodes that can actually serve this ip */
1644 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1645 /* no it couldnt so skip to the next node */
1648 if (nodemap->nodes[dstnode].flags & mask) {
1652 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1653 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1654 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1655 ctdb_addr_to_str(&(tmp_ip->addr)),
1657 dstimbl - lcp2_imbalances[dstnode]));
1660 if ((minnode == -1) || (dstdsum < mindsum)) {
1670 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1672 /* If we found one then assign it to the given node. */
1673 if (minnode != -1) {
1674 minip->pnn = minnode;
1675 lcp2_imbalances[minnode] = minimbl;
1676 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1677 ctdb_addr_to_str(&(minip->addr)),
1682 /* There might be a better way but at least this is clear. */
1683 have_unassigned = false;
1684 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1685 if (tmp_ip->pnn == -1) {
1686 have_unassigned = true;
1691 /* We know if we have an unassigned addresses so we might as
1694 if (have_unassigned) {
1695 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1696 if (tmp_ip->pnn == -1) {
1697 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1698 ctdb_addr_to_str(&tmp_ip->addr)));
1704 /* LCP2 algorithm for rebalancing the cluster. Given a candidate node
1705 * to move IPs from, determines the best IP/destination node
1706 * combination to move from the source node.
1708 static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
1709 struct ctdb_node_map *nodemap,
1710 struct ctdb_public_ip_list *all_ips,
1713 uint32_t *lcp2_imbalances,
1714 bool *newly_healthy)
1716 int dstnode, mindstnode;
1717 uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
1718 uint32_t minsrcimbl, mindstimbl;
1719 struct ctdb_public_ip_list *minip;
1720 struct ctdb_public_ip_list *tmp_ip;
1722 /* Find an IP and destination node that best reduces imbalance. */
1728 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1729 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
1731 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1732 /* Only consider addresses on srcnode. */
1733 if (tmp_ip->pnn != srcnode) {
1737 /* What is this IP address costing the source node? */
1738 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1739 srcimbl = candimbl - srcdsum;
1741 /* Consider this IP address would cost each potential
1742 * destination node. Destination nodes are limited to
1743 * those that are newly healthy, since we don't want
1744 * to do gratuitous failover of IPs just to make minor
1745 * balance improvements.
1747 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1748 if (! newly_healthy[dstnode]) {
1751 /* only check nodes that can actually serve this ip */
1752 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1753 /* no it couldnt so skip to the next node */
1757 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1758 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1759 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1760 srcnode, srcimbl - lcp2_imbalances[srcnode],
1761 ctdb_addr_to_str(&(tmp_ip->addr)),
1762 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1764 if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
1765 ((mindstnode == -1) || \
1766 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1769 minsrcimbl = srcimbl;
1770 mindstnode = dstnode;
1771 mindstimbl = dstimbl;
1775 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1777 if (mindstnode != -1) {
1778 /* We found a move that makes things better... */
1779 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1780 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1781 ctdb_addr_to_str(&(minip->addr)),
1782 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1785 lcp2_imbalances[srcnode] = srcimbl;
1786 lcp2_imbalances[mindstnode] = mindstimbl;
1787 minip->pnn = mindstnode;
1796 struct lcp2_imbalance_pnn {
1801 static int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
1803 const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
1804 const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
1806 if (lipa->imbalance > lipb->imbalance) {
1808 } else if (lipa->imbalance == lipb->imbalance) {
1815 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1816 * node with the highest LCP2 imbalance, and then determines the best
1817 * IP/destination node combination to move from the source node.
1819 static bool lcp2_failback(struct ctdb_context *ctdb,
1820 struct ctdb_node_map *nodemap,
1822 struct ctdb_public_ip_list *all_ips,
1823 uint32_t *lcp2_imbalances,
1824 bool *newly_healthy)
1826 int i, num_newly_healthy;
1827 struct lcp2_imbalance_pnn * lips;
1830 /* It is only worth continuing if we have suitable target
1831 * nodes to transfer IPs to. This check is much cheaper than
1834 num_newly_healthy = 0;
1835 for (i = 0; i < nodemap->num; i++) {
1836 if (newly_healthy[i]) {
1837 num_newly_healthy++;
1840 if (num_newly_healthy == 0) {
1844 /* Put the imbalances and nodes into an array, sort them and
1845 * iterate through candidates. Usually the 1st one will be
1846 * used, so this doesn't cost much...
1848 lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
1849 for (i = 0; i < nodemap->num; i++) {
1850 lips[i].imbalance = lcp2_imbalances[i];
1853 qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
1854 lcp2_cmp_imbalance_pnn);
1857 for (i = 0; i < nodemap->num; i++) {
1858 /* This means that all nodes had 0 or 1 addresses, so
1859 * can't be imbalanced.
1861 if (lips[i].imbalance == 0) {
1865 if (lcp2_failback_candidate(ctdb,
1881 /* The calculation part of the IP allocation algorithm. */
1882 static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
1883 struct ctdb_node_map *nodemap,
1884 struct ctdb_public_ip_list **all_ips_p)
1886 int i, num_healthy, retries, num_ips;
1888 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1889 uint32_t *lcp2_imbalances;
1890 bool *newly_healthy;
1892 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1894 /* Count how many completely healthy nodes we have */
1896 for (i=0;i<nodemap->num;i++) {
1897 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1902 if (num_healthy > 0) {
1903 /* We have healthy nodes, so only consider them for
1904 serving public addresses
1906 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1908 /* We didnt have any completely healthy nodes so
1909 use "disabled" nodes as a fallback
1911 mask = NODE_FLAGS_INACTIVE;
1914 /* since nodes only know about those public addresses that
1915 can be served by that particular node, no single node has
1916 a full list of all public addresses that exist in the cluster.
1917 Walk over all node structures and create a merged list of
1918 all public addresses that exist in the cluster.
1920 keep the tree of ips around as ctdb->ip_tree
1922 all_ips = create_merged_ip_list(ctdb);
1923 *all_ips_p = all_ips; /* minimal code changes */
1925 /* Count how many ips we have */
1927 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1931 /* If we want deterministic ip allocations, i.e. that the ip addresses
1932 will always be allocated the same way for a specific set of
1933 available/unavailable nodes.
1935 if (1 == ctdb->tunable.deterministic_public_ips) {
1936 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1937 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1938 tmp_ip->pnn = i%nodemap->num;
1943 /* mark all public addresses with a masked node as being served by
1946 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1947 if (tmp_ip->pnn == -1) {
1950 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1955 /* verify that the assigned nodes can serve that public ip
1956 and set it to -1 if not
1958 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1959 if (tmp_ip->pnn == -1) {
1962 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1963 /* this node can not serve this ip. */
1968 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1969 lcp2_init(tmp_ctx, nodemap, mask, all_ips, &lcp2_imbalances, &newly_healthy);
1972 /* now we must redistribute all public addresses with takeover node
1973 -1 among the nodes available
1977 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1978 lcp2_allocate_unassigned(ctdb, nodemap, mask, all_ips, lcp2_imbalances);
1980 basic_allocate_unassigned(ctdb, nodemap, mask, all_ips);
1983 /* If we dont want ips to fail back after a node becomes healthy
1984 again, we wont even try to reallocat the ip addresses so that
1985 they are evenly spread out.
1986 This can NOT be used at the same time as DeterministicIPs !
1988 if (1 == ctdb->tunable.no_ip_failback) {
1989 if (1 == ctdb->tunable.deterministic_public_ips) {
1990 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1996 /* now, try to make sure the ip adresses are evenly distributed
1999 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
2000 if (lcp2_failback(ctdb, nodemap, mask, all_ips, lcp2_imbalances, newly_healthy)) {
2004 if (basic_failback(ctdb, nodemap, mask, all_ips, num_ips, &retries)) {
2009 /* finished distributing the public addresses, now just send the
2010 info out to the nodes
2014 /* at this point ->pnn is the node which will own each IP
2015 or -1 if there is no node that can cover this ip
2022 make any IP alias changes for public addresses that are necessary
2024 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
2027 struct ctdb_public_ip ip;
2028 struct ctdb_public_ipv4 ipv4;
2030 struct ctdb_public_ip_list *all_ips, *tmp_ip;
2032 struct timeval timeout;
2033 struct client_async_data *async_data;
2034 struct ctdb_client_control_state *state;
2035 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2038 * ip failover is completely disabled, just send out the
2039 * ipreallocated event.
2041 if (ctdb->tunable.disable_ip_failover != 0) {
2047 /* Do the IP reassignment calculations */
2048 ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
2050 /* now tell all nodes to delete any alias that they should not
2051 have. This will be a NOOP on nodes that don't currently
2052 hold the given alias */
2053 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2054 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2056 for (i=0;i<nodemap->num;i++) {
2057 /* don't talk to unconnected nodes, but do talk to banned nodes */
2058 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
2062 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2063 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
2064 /* This node should be serving this
2065 vnn so dont tell it to release the ip
2069 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2070 ipv4.pnn = tmp_ip->pnn;
2071 ipv4.sin = tmp_ip->addr.ip;
2073 timeout = TAKEOVER_TIMEOUT();
2074 data.dsize = sizeof(ipv4);
2075 data.dptr = (uint8_t *)&ipv4;
2076 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2077 0, CTDB_CONTROL_RELEASE_IPv4, 0,
2081 ip.pnn = tmp_ip->pnn;
2082 ip.addr = tmp_ip->addr;
2084 timeout = TAKEOVER_TIMEOUT();
2085 data.dsize = sizeof(ip);
2086 data.dptr = (uint8_t *)&ip;
2087 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2088 0, CTDB_CONTROL_RELEASE_IP, 0,
2093 if (state == NULL) {
2094 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
2095 talloc_free(tmp_ctx);
2099 ctdb_client_async_add(async_data, state);
2102 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2103 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
2104 talloc_free(tmp_ctx);
2107 talloc_free(async_data);
2110 /* tell all nodes to get their own IPs */
2111 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2112 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2113 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2114 if (tmp_ip->pnn == -1) {
2115 /* this IP won't be taken over */
2119 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2120 ipv4.pnn = tmp_ip->pnn;
2121 ipv4.sin = tmp_ip->addr.ip;
2123 timeout = TAKEOVER_TIMEOUT();
2124 data.dsize = sizeof(ipv4);
2125 data.dptr = (uint8_t *)&ipv4;
2126 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2127 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
2131 ip.pnn = tmp_ip->pnn;
2132 ip.addr = tmp_ip->addr;
2134 timeout = TAKEOVER_TIMEOUT();
2135 data.dsize = sizeof(ip);
2136 data.dptr = (uint8_t *)&ip;
2137 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2138 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2142 if (state == NULL) {
2143 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2144 talloc_free(tmp_ctx);
2148 ctdb_client_async_add(async_data, state);
2150 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2151 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2152 talloc_free(tmp_ctx);
2157 /* tell all nodes to update natwg */
2158 /* send the flags update natgw on all connected nodes */
2159 data.dptr = discard_const("ipreallocated");
2160 data.dsize = strlen((char *)data.dptr) + 1;
2161 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2162 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
2163 nodes, 0, TAKEOVER_TIMEOUT(),
2167 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
2170 talloc_free(tmp_ctx);
2176 destroy a ctdb_client_ip structure
2178 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2180 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2181 ctdb_addr_to_str(&ip->addr),
2182 ntohs(ip->addr.ip.sin_port),
2185 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2190 called by a client to inform us of a TCP connection that it is managing
2191 that should tickled with an ACK when IP takeover is done
2192 we handle both the old ipv4 style of packets as well as the new ipv4/6
2195 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2198 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2199 struct ctdb_control_tcp *old_addr = NULL;
2200 struct ctdb_control_tcp_addr new_addr;
2201 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2202 struct ctdb_tcp_list *tcp;
2203 struct ctdb_tcp_connection t;
2206 struct ctdb_client_ip *ip;
2207 struct ctdb_vnn *vnn;
2208 ctdb_sock_addr addr;
2210 switch (indata.dsize) {
2211 case sizeof(struct ctdb_control_tcp):
2212 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2213 ZERO_STRUCT(new_addr);
2214 tcp_sock = &new_addr;
2215 tcp_sock->src.ip = old_addr->src;
2216 tcp_sock->dest.ip = old_addr->dest;
2218 case sizeof(struct ctdb_control_tcp_addr):
2219 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2222 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2223 "to ctdb_control_tcp_client. size was %d but "
2224 "only allowed sizes are %lu and %lu\n",
2226 (long unsigned)sizeof(struct ctdb_control_tcp),
2227 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2231 addr = tcp_sock->src;
2232 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2233 addr = tcp_sock->dest;
2234 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2237 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2238 vnn = find_public_ip_vnn(ctdb, &addr);
2240 switch (addr.sa.sa_family) {
2242 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2243 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2244 ctdb_addr_to_str(&addr)));
2248 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2249 ctdb_addr_to_str(&addr)));
2252 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2258 if (vnn->pnn != ctdb->pnn) {
2259 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2260 ctdb_addr_to_str(&addr),
2261 client_id, client->pid));
2262 /* failing this call will tell smbd to die */
2266 ip = talloc(client, struct ctdb_client_ip);
2267 CTDB_NO_MEMORY(ctdb, ip);
2271 ip->client_id = client_id;
2272 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2273 DLIST_ADD(ctdb->client_ip_list, ip);
2275 tcp = talloc(client, struct ctdb_tcp_list);
2276 CTDB_NO_MEMORY(ctdb, tcp);
2278 tcp->connection.src_addr = tcp_sock->src;
2279 tcp->connection.dst_addr = tcp_sock->dest;
2281 DLIST_ADD(client->tcp_list, tcp);
2283 t.src_addr = tcp_sock->src;
2284 t.dst_addr = tcp_sock->dest;
2286 data.dptr = (uint8_t *)&t;
2287 data.dsize = sizeof(t);
2289 switch (addr.sa.sa_family) {
2291 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2292 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2293 ctdb_addr_to_str(&tcp_sock->src),
2294 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2297 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2298 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2299 ctdb_addr_to_str(&tcp_sock->src),
2300 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2303 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2307 /* tell all nodes about this tcp connection */
2308 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2309 CTDB_CONTROL_TCP_ADD,
2310 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2312 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2320 find a tcp address on a list
2322 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2323 struct ctdb_tcp_connection *tcp)
2327 if (array == NULL) {
2331 for (i=0;i<array->num;i++) {
2332 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2333 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2334 return &array->connections[i];
2343 called by a daemon to inform us of a TCP connection that one of its
2344 clients managing that should tickled with an ACK when IP takeover is
2347 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2349 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2350 struct ctdb_tcp_array *tcparray;
2351 struct ctdb_tcp_connection tcp;
2352 struct ctdb_vnn *vnn;
2354 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2356 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2357 ctdb_addr_to_str(&p->dst_addr)));
2363 tcparray = vnn->tcp_array;
2365 /* If this is the first tickle */
2366 if (tcparray == NULL) {
2367 tcparray = talloc_size(ctdb->nodes,
2368 offsetof(struct ctdb_tcp_array, connections) +
2369 sizeof(struct ctdb_tcp_connection) * 1);
2370 CTDB_NO_MEMORY(ctdb, tcparray);
2371 vnn->tcp_array = tcparray;
2374 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2375 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2377 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2378 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2381 if (tcp_update_needed) {
2382 vnn->tcp_update_needed = true;
2388 /* Do we already have this tickle ?*/
2389 tcp.src_addr = p->src_addr;
2390 tcp.dst_addr = p->dst_addr;
2391 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2392 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2393 ctdb_addr_to_str(&tcp.dst_addr),
2394 ntohs(tcp.dst_addr.ip.sin_port),
2399 /* A new tickle, we must add it to the array */
2400 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2401 struct ctdb_tcp_connection,
2403 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2405 vnn->tcp_array = tcparray;
2406 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2407 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2410 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2411 ctdb_addr_to_str(&tcp.dst_addr),
2412 ntohs(tcp.dst_addr.ip.sin_port),
2415 if (tcp_update_needed) {
2416 vnn->tcp_update_needed = true;
2424 called by a daemon to inform us of a TCP connection that one of its
2425 clients managing that should tickled with an ACK when IP takeover is
2428 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2430 struct ctdb_tcp_connection *tcpp;
2431 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2434 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2435 ctdb_addr_to_str(&conn->dst_addr)));
2439 /* if the array is empty we cant remove it
2440 and we dont need to do anything
2442 if (vnn->tcp_array == NULL) {
2443 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2444 ctdb_addr_to_str(&conn->dst_addr),
2445 ntohs(conn->dst_addr.ip.sin_port)));
2450 /* See if we know this connection
2451 if we dont know this connection then we dont need to do anything
2453 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2455 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2456 ctdb_addr_to_str(&conn->dst_addr),
2457 ntohs(conn->dst_addr.ip.sin_port)));
2462 /* We need to remove this entry from the array.
2463 Instead of allocating a new array and copying data to it
2464 we cheat and just copy the last entry in the existing array
2465 to the entry that is to be removed and just shring the
2468 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2469 vnn->tcp_array->num--;
2471 /* If we deleted the last entry we also need to remove the entire array
2473 if (vnn->tcp_array->num == 0) {
2474 talloc_free(vnn->tcp_array);
2475 vnn->tcp_array = NULL;
2478 vnn->tcp_update_needed = true;
2480 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2481 ctdb_addr_to_str(&conn->src_addr),
2482 ntohs(conn->src_addr.ip.sin_port)));
2487 called by a daemon to inform us of a TCP connection that one of its
2488 clients used are no longer needed in the tickle database
2490 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2492 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2494 ctdb_remove_tcp_connection(ctdb, conn);
2501 called when a daemon restarts - send all tickes for all public addresses
2502 we are serving immediately to the new node.
2504 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2506 /*XXX here we should send all tickes we are serving to the new node */
2512 called when a client structure goes away - hook to remove
2513 elements from the tcp_list in all daemons
2515 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2517 while (client->tcp_list) {
2518 struct ctdb_tcp_list *tcp = client->tcp_list;
2519 DLIST_REMOVE(client->tcp_list, tcp);
2520 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2526 release all IPs on shutdown
2528 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2530 struct ctdb_vnn *vnn;
2532 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2533 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2534 ctdb_vnn_unassign_iface(ctdb, vnn);
2540 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2541 ctdb_vnn_iface_string(vnn),
2542 ctdb_addr_to_str(&vnn->public_address),
2543 vnn->public_netmask_bits);
2544 release_kill_clients(ctdb, &vnn->public_address);
2545 ctdb_vnn_unassign_iface(ctdb, vnn);
2551 get list of public IPs
2553 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2554 struct ctdb_req_control *c, TDB_DATA *outdata)
2557 struct ctdb_all_public_ips *ips;
2558 struct ctdb_vnn *vnn;
2559 bool only_available = false;
2561 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2562 only_available = true;
2565 /* count how many public ip structures we have */
2567 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2571 len = offsetof(struct ctdb_all_public_ips, ips) +
2572 num*sizeof(struct ctdb_public_ip);
2573 ips = talloc_zero_size(outdata, len);
2574 CTDB_NO_MEMORY(ctdb, ips);
2577 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2578 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2581 ips->ips[i].pnn = vnn->pnn;
2582 ips->ips[i].addr = vnn->public_address;
2586 len = offsetof(struct ctdb_all_public_ips, ips) +
2587 i*sizeof(struct ctdb_public_ip);
2589 outdata->dsize = len;
2590 outdata->dptr = (uint8_t *)ips;
2597 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2599 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2600 struct ctdb_req_control *c, TDB_DATA *outdata)
2603 struct ctdb_all_public_ipsv4 *ips;
2604 struct ctdb_vnn *vnn;
2606 /* count how many public ip structures we have */
2608 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2609 if (vnn->public_address.sa.sa_family != AF_INET) {
2615 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2616 num*sizeof(struct ctdb_public_ipv4);
2617 ips = talloc_zero_size(outdata, len);
2618 CTDB_NO_MEMORY(ctdb, ips);
2620 outdata->dsize = len;
2621 outdata->dptr = (uint8_t *)ips;
2625 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2626 if (vnn->public_address.sa.sa_family != AF_INET) {
2629 ips->ips[i].pnn = vnn->pnn;
2630 ips->ips[i].sin = vnn->public_address.ip;
2637 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2638 struct ctdb_req_control *c,
2643 ctdb_sock_addr *addr;
2644 struct ctdb_control_public_ip_info *info;
2645 struct ctdb_vnn *vnn;
2647 addr = (ctdb_sock_addr *)indata.dptr;
2649 vnn = find_public_ip_vnn(ctdb, addr);
2651 /* if it is not a public ip it could be our 'single ip' */
2652 if (ctdb->single_ip_vnn) {
2653 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2654 vnn = ctdb->single_ip_vnn;
2659 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2660 "'%s'not a public address\n",
2661 ctdb_addr_to_str(addr)));
2665 /* count how many public ip structures we have */
2667 for (;vnn->ifaces[num];) {
2671 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2672 num*sizeof(struct ctdb_control_iface_info);
2673 info = talloc_zero_size(outdata, len);
2674 CTDB_NO_MEMORY(ctdb, info);
2676 info->ip.addr = vnn->public_address;
2677 info->ip.pnn = vnn->pnn;
2678 info->active_idx = 0xFFFFFFFF;
2680 for (i=0; vnn->ifaces[i]; i++) {
2681 struct ctdb_iface *cur;
2683 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2685 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2689 if (vnn->iface == cur) {
2690 info->active_idx = i;
2692 strcpy(info->ifaces[i].name, cur->name);
2693 info->ifaces[i].link_state = cur->link_up;
2694 info->ifaces[i].references = cur->references;
2697 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2698 i*sizeof(struct ctdb_control_iface_info);
2700 outdata->dsize = len;
2701 outdata->dptr = (uint8_t *)info;
2706 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2707 struct ctdb_req_control *c,
2711 struct ctdb_control_get_ifaces *ifaces;
2712 struct ctdb_iface *cur;
2714 /* count how many public ip structures we have */
2716 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2720 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2721 num*sizeof(struct ctdb_control_iface_info);
2722 ifaces = talloc_zero_size(outdata, len);
2723 CTDB_NO_MEMORY(ctdb, ifaces);
2726 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2727 strcpy(ifaces->ifaces[i].name, cur->name);
2728 ifaces->ifaces[i].link_state = cur->link_up;
2729 ifaces->ifaces[i].references = cur->references;
2733 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2734 i*sizeof(struct ctdb_control_iface_info);
2736 outdata->dsize = len;
2737 outdata->dptr = (uint8_t *)ifaces;
2742 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2743 struct ctdb_req_control *c,
2746 struct ctdb_control_iface_info *info;
2747 struct ctdb_iface *iface;
2748 bool link_up = false;
2750 info = (struct ctdb_control_iface_info *)indata.dptr;
2752 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2753 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2754 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2755 len, len, info->name));
2759 switch (info->link_state) {
2767 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2768 (unsigned int)info->link_state));
2772 if (info->references != 0) {
2773 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2774 (unsigned int)info->references));
2778 iface = ctdb_find_iface(ctdb, info->name);
2779 if (iface == NULL) {
2783 if (link_up == iface->link_up) {
2787 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2788 ("iface[%s] has changed it's link status %s => %s\n",
2790 iface->link_up?"up":"down",
2791 link_up?"up":"down"));
2793 iface->link_up = link_up;
2799 structure containing the listening socket and the list of tcp connections
2800 that the ctdb daemon is to kill
2802 struct ctdb_kill_tcp {
2803 struct ctdb_vnn *vnn;
2804 struct ctdb_context *ctdb;
2806 struct fd_event *fde;
2807 trbt_tree_t *connections;
2812 a tcp connection that is to be killed
2814 struct ctdb_killtcp_con {
2815 ctdb_sock_addr src_addr;
2816 ctdb_sock_addr dst_addr;
2818 struct ctdb_kill_tcp *killtcp;
2821 /* this function is used to create a key to represent this socketpair
2822 in the killtcp tree.
2823 this key is used to insert and lookup matching socketpairs that are
2824 to be tickled and RST
2826 #define KILLTCP_KEYLEN 10
2827 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2829 static uint32_t key[KILLTCP_KEYLEN];
2831 bzero(key, sizeof(key));
2833 if (src->sa.sa_family != dst->sa.sa_family) {
2834 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2838 switch (src->sa.sa_family) {
2840 key[0] = dst->ip.sin_addr.s_addr;
2841 key[1] = src->ip.sin_addr.s_addr;
2842 key[2] = dst->ip.sin_port;
2843 key[3] = src->ip.sin_port;
2846 uint32_t *dst6_addr32 =
2847 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
2848 uint32_t *src6_addr32 =
2849 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
2850 key[0] = dst6_addr32[3];
2851 key[1] = src6_addr32[3];
2852 key[2] = dst6_addr32[2];
2853 key[3] = src6_addr32[2];
2854 key[4] = dst6_addr32[1];
2855 key[5] = src6_addr32[1];
2856 key[6] = dst6_addr32[0];
2857 key[7] = src6_addr32[0];
2858 key[8] = dst->ip6.sin6_port;
2859 key[9] = src->ip6.sin6_port;
2863 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2871 called when we get a read event on the raw socket
2873 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2874 uint16_t flags, void *private_data)
2876 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2877 struct ctdb_killtcp_con *con;
2878 ctdb_sock_addr src, dst;
2879 uint32_t ack_seq, seq;
2881 if (!(flags & EVENT_FD_READ)) {
2885 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2886 killtcp->private_data,
2888 &ack_seq, &seq) != 0) {
2889 /* probably a non-tcp ACK packet */
2893 /* check if we have this guy in our list of connections
2896 con = trbt_lookuparray32(killtcp->connections,
2897 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2899 /* no this was some other packet we can just ignore */
2903 /* This one has been tickled !
2904 now reset him and remove him from the list.
2906 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2907 ntohs(con->dst_addr.ip.sin_port),
2908 ctdb_addr_to_str(&con->src_addr),
2909 ntohs(con->src_addr.ip.sin_port)));
2911 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2916 /* when traversing the list of all tcp connections to send tickle acks to
2917 (so that we can capture the ack coming back and kill the connection
2919 this callback is called for each connection we are currently trying to kill
2921 static int tickle_connection_traverse(void *param, void *data)
2923 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2925 /* have tried too many times, just give up */
2926 if (con->count >= 5) {
2927 /* can't delete in traverse: reparent to delete_cons */
2928 talloc_steal(param, con);
2932 /* othervise, try tickling it again */
2935 (ctdb_sock_addr *)&con->dst_addr,
2936 (ctdb_sock_addr *)&con->src_addr,
2943 called every second until all sentenced connections have been reset
2945 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2946 struct timeval t, void *private_data)
2948 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2949 void *delete_cons = talloc_new(NULL);
2951 /* loop over all connections sending tickle ACKs */
2952 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2954 /* now we've finished traverse, it's safe to do deletion. */
2955 talloc_free(delete_cons);
2957 /* If there are no more connections to kill we can remove the
2958 entire killtcp structure
2960 if ( (killtcp->connections == NULL) ||
2961 (killtcp->connections->root == NULL) ) {
2962 talloc_free(killtcp);
2966 /* try tickling them again in a seconds time
2968 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2969 ctdb_tickle_sentenced_connections, killtcp);
2973 destroy the killtcp structure
2975 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2977 struct ctdb_vnn *tmpvnn;
2979 /* verify that this vnn is still active */
2980 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
2981 if (tmpvnn == killtcp->vnn) {
2986 if (tmpvnn == NULL) {
2990 if (killtcp->vnn->killtcp != killtcp) {
2994 killtcp->vnn->killtcp = NULL;
3000 /* nothing fancy here, just unconditionally replace any existing
3001 connection structure with the new one.
3003 dont even free the old one if it did exist, that one is talloc_stolen
3004 by the same node in the tree anyway and will be deleted when the new data
3007 static void *add_killtcp_callback(void *parm, void *data)
3013 add a tcp socket to the list of connections we want to RST
3015 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
3019 ctdb_sock_addr src, dst;
3020 struct ctdb_kill_tcp *killtcp;
3021 struct ctdb_killtcp_con *con;
3022 struct ctdb_vnn *vnn;
3024 ctdb_canonicalize_ip(s, &src);
3025 ctdb_canonicalize_ip(d, &dst);
3027 vnn = find_public_ip_vnn(ctdb, &dst);
3029 vnn = find_public_ip_vnn(ctdb, &src);
3032 /* if it is not a public ip it could be our 'single ip' */
3033 if (ctdb->single_ip_vnn) {
3034 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
3035 vnn = ctdb->single_ip_vnn;
3040 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
3044 killtcp = vnn->killtcp;
3046 /* If this is the first connection to kill we must allocate
3049 if (killtcp == NULL) {
3050 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
3051 CTDB_NO_MEMORY(ctdb, killtcp);
3054 killtcp->ctdb = ctdb;
3055 killtcp->capture_fd = -1;
3056 killtcp->connections = trbt_create(killtcp, 0);
3058 vnn->killtcp = killtcp;
3059 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
3064 /* create a structure that describes this connection we want to
3065 RST and store it in killtcp->connections
3067 con = talloc(killtcp, struct ctdb_killtcp_con);
3068 CTDB_NO_MEMORY(ctdb, con);
3069 con->src_addr = src;
3070 con->dst_addr = dst;
3072 con->killtcp = killtcp;
3075 trbt_insertarray32_callback(killtcp->connections,
3076 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
3077 add_killtcp_callback, con);
3080 If we dont have a socket to listen on yet we must create it
3082 if (killtcp->capture_fd == -1) {
3083 const char *iface = ctdb_vnn_iface_string(vnn);
3084 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
3085 if (killtcp->capture_fd == -1) {
3086 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
3087 "socket on iface '%s' for killtcp (%s)\n",
3088 iface, strerror(errno)));
3094 if (killtcp->fde == NULL) {
3095 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
3097 capture_tcp_handler, killtcp);
3098 tevent_fd_set_auto_close(killtcp->fde);
3100 /* We also need to set up some events to tickle all these connections
3101 until they are all reset
3103 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3104 ctdb_tickle_sentenced_connections, killtcp);
3107 /* tickle him once now */
3116 talloc_free(vnn->killtcp);
3117 vnn->killtcp = NULL;
3122 kill a TCP connection.
3124 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
3126 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
3128 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
3132 called by a daemon to inform us of the entire list of TCP tickles for
3133 a particular public address.
3134 this control should only be sent by the node that is currently serving
3135 that public address.
3137 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
3139 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
3140 struct ctdb_tcp_array *tcparray;
3141 struct ctdb_vnn *vnn;
3143 /* We must at least have tickles.num or else we cant verify the size
3144 of the received data blob
3146 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3147 tickles.connections)) {
3148 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
3152 /* verify that the size of data matches what we expect */
3153 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3154 tickles.connections)
3155 + sizeof(struct ctdb_tcp_connection)
3156 * list->tickles.num) {
3157 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3161 vnn = find_public_ip_vnn(ctdb, &list->addr);
3163 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3164 ctdb_addr_to_str(&list->addr)));
3169 /* remove any old ticklelist we might have */
3170 talloc_free(vnn->tcp_array);
3171 vnn->tcp_array = NULL;
3173 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3174 CTDB_NO_MEMORY(ctdb, tcparray);
3176 tcparray->num = list->tickles.num;
3178 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3179 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3181 memcpy(tcparray->connections, &list->tickles.connections[0],
3182 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3184 /* We now have a new fresh tickle list array for this vnn */
3185 vnn->tcp_array = talloc_steal(vnn, tcparray);
3191 called to return the full list of tickles for the puclic address associated
3192 with the provided vnn
3194 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3196 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3197 struct ctdb_control_tcp_tickle_list *list;
3198 struct ctdb_tcp_array *tcparray;
3200 struct ctdb_vnn *vnn;
3202 vnn = find_public_ip_vnn(ctdb, addr);
3204 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3205 ctdb_addr_to_str(addr)));
3210 tcparray = vnn->tcp_array;
3212 num = tcparray->num;
3217 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3218 tickles.connections)
3219 + sizeof(struct ctdb_tcp_connection) * num;
3221 outdata->dptr = talloc_size(outdata, outdata->dsize);
3222 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3223 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3226 list->tickles.num = num;
3228 memcpy(&list->tickles.connections[0], tcparray->connections,
3229 sizeof(struct ctdb_tcp_connection) * num);
3237 set the list of all tcp tickles for a public address
3239 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3240 struct timeval timeout, uint32_t destnode,
3241 ctdb_sock_addr *addr,
3242 struct ctdb_tcp_array *tcparray)
3246 struct ctdb_control_tcp_tickle_list *list;
3249 num = tcparray->num;
3254 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3255 tickles.connections) +
3256 sizeof(struct ctdb_tcp_connection) * num;
3257 data.dptr = talloc_size(ctdb, data.dsize);
3258 CTDB_NO_MEMORY(ctdb, data.dptr);
3260 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3262 list->tickles.num = num;
3264 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3267 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3268 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3269 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3271 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3275 talloc_free(data.dptr);
3282 perform tickle updates if required
3284 static void ctdb_update_tcp_tickles(struct event_context *ev,
3285 struct timed_event *te,
3286 struct timeval t, void *private_data)
3288 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3290 struct ctdb_vnn *vnn;
3292 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3293 /* we only send out updates for public addresses that
3296 if (ctdb->pnn != vnn->pnn) {
3299 /* We only send out the updates if we need to */
3300 if (!vnn->tcp_update_needed) {
3303 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3305 CTDB_BROADCAST_CONNECTED,
3306 &vnn->public_address,
3309 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3310 ctdb_addr_to_str(&vnn->public_address)));
3314 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3315 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3316 ctdb_update_tcp_tickles, ctdb);
3321 start periodic update of tcp tickles
3323 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3325 ctdb->tickle_update_context = talloc_new(ctdb);
3327 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3328 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3329 ctdb_update_tcp_tickles, ctdb);
3335 struct control_gratious_arp {
3336 struct ctdb_context *ctdb;
3337 ctdb_sock_addr addr;
3343 send a control_gratuitous arp
3345 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3346 struct timeval t, void *private_data)
3349 struct control_gratious_arp *arp = talloc_get_type(private_data,
3350 struct control_gratious_arp);
3352 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3354 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3355 arp->iface, strerror(errno)));
3360 if (arp->count == CTDB_ARP_REPEAT) {
3365 event_add_timed(arp->ctdb->ev, arp,
3366 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3367 send_gratious_arp, arp);
3374 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3376 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3377 struct control_gratious_arp *arp;
3379 /* verify the size of indata */
3380 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3381 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3382 (unsigned)indata.dsize,
3383 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3387 ( offsetof(struct ctdb_control_gratious_arp, iface)
3388 + gratious_arp->len ) ){
3390 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3391 "but should be %u bytes\n",
3392 (unsigned)indata.dsize,
3393 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3398 arp = talloc(ctdb, struct control_gratious_arp);
3399 CTDB_NO_MEMORY(ctdb, arp);
3402 arp->addr = gratious_arp->addr;
3403 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3404 CTDB_NO_MEMORY(ctdb, arp->iface);
3407 event_add_timed(arp->ctdb->ev, arp,
3408 timeval_zero(), send_gratious_arp, arp);
3413 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3415 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3418 /* verify the size of indata */
3419 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3420 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3424 ( offsetof(struct ctdb_control_ip_iface, iface)
3427 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3428 "but should be %u bytes\n",
3429 (unsigned)indata.dsize,
3430 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3434 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
3437 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3445 called when releaseip event finishes for del_public_address
3447 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3450 talloc_free(private_data);
3453 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3455 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3456 struct ctdb_vnn *vnn;
3459 /* verify the size of indata */
3460 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3461 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3465 ( offsetof(struct ctdb_control_ip_iface, iface)
3468 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3469 "but should be %u bytes\n",
3470 (unsigned)indata.dsize,
3471 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3475 /* walk over all public addresses until we find a match */
3476 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3477 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3478 TALLOC_CTX *mem_ctx;
3480 DLIST_REMOVE(ctdb->vnn, vnn);
3481 if (vnn->pnn != ctdb->pnn) {
3482 if (vnn->iface != NULL) {
3483 ctdb_vnn_unassign_iface(ctdb, vnn);
3490 mem_ctx = talloc_new(ctdb);
3491 talloc_steal(mem_ctx, vnn);
3492 ret = ctdb_event_script_callback(ctdb,
3493 mem_ctx, delete_ip_callback, mem_ctx,
3495 CTDB_EVENT_RELEASE_IP,
3497 ctdb_vnn_iface_string(vnn),
3498 ctdb_addr_to_str(&vnn->public_address),
3499 vnn->public_netmask_bits);
3500 if (vnn->iface != NULL) {
3501 ctdb_vnn_unassign_iface(ctdb, vnn);
3513 /* This function is called from the recovery daemon to verify that a remote
3514 node has the expected ip allocation.
3515 This is verified against ctdb->ip_tree
3517 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
3519 struct ctdb_public_ip_list *tmp_ip;
3522 if (ctdb->ip_tree == NULL) {
3523 /* dont know the expected allocation yet, assume remote node
3532 for (i=0; i<ips->num; i++) {
3533 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3534 if (tmp_ip == NULL) {
3535 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
3539 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3543 if (tmp_ip->pnn != ips->ips[i].pnn) {
3544 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
3552 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3554 struct ctdb_public_ip_list *tmp_ip;
3556 if (ctdb->ip_tree == NULL) {
3557 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3561 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3562 if (tmp_ip == NULL) {
3563 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3567 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3568 tmp_ip->pnn = ip->pnn;