X-Git-Url: http://git.samba.org/samba.git/?p=kai%2Fsamba-autobuild%2F.git;a=blobdiff_plain;f=ctdb%2Fserver%2Fctdb_takeover.c;h=5ca786d7c3035b27129ca73b71251d2f00d1c7bf;hp=91f30302d777f2adebdd78ec5a14cab654e2a45f;hb=2b76e580321d050bf17d4999ee288c86669d6401;hpb=f087a8e2b81fae82fa571ef09d2d1cb682cc8ff8 diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 91f30302d77..5ca786d7c30 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -18,14 +18,27 @@ You should have received a copy of the GNU General Public License along with this program; if not, see . */ -#include "includes.h" -#include "tdb.h" -#include "lib/util/dlinklist.h" +#include "replace.h" #include "system/network.h" #include "system/filesys.h" +#include "system/time.h" #include "system/wait.h" -#include "../include/ctdb_private.h" -#include "../common/rb_tree.h" + +#include +#include + +#include "lib/util/dlinklist.h" +#include "lib/util/debug.h" +#include "lib/util/samba_util.h" + +#include "ctdb_private.h" +#include "ctdb_client.h" +#include "ctdb_logging.h" + +#include "common/rb_tree.h" +#include "common/reqid.h" +#include "common/system.h" +#include "common/common.h" #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0) @@ -37,6 +50,7 @@ struct ctdb_ipflags { bool noiptakeover; bool noiphost; + enum ctdb_runstate runstate; }; struct ctdb_iface { @@ -71,20 +85,8 @@ static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface) CTDB_NO_MEMORY_FATAL(ctdb, i); i->name = talloc_strdup(i, iface); CTDB_NO_MEMORY(ctdb, i->name); - /* - * If link_up defaults to true then IPs can be allocated to a - * node during the first recovery. However, then an interface - * could have its link marked down during the startup event, - * causing the IP to move almost immediately. If link_up - * defaults to false then, during normal operation, IPs added - * to a new interface can't be assigned until a monitor cycle - * has occurred and marked the new interfaces up. This makes - * IP allocation unpredictable. The following is a neat - * compromise: early in startup link_up defaults to false, so - * IPs can't be assigned, and after startup IPs can be - * assigned immediately. - */ - i->link_up = (ctdb->runstate == CTDB_RUNSTATE_RUNNING); + + i->link_up = true; DLIST_ADD(ctdb->ifaces, i); @@ -118,15 +120,15 @@ static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn, * causes problems... :-) */ static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb, - struct ctdb_vnn *vnn, - TALLOC_CTX *mem_ctx) + struct ctdb_vnn *vnn) { - struct ctdb_iface *i; + struct ctdb_iface *i, *next; /* For each interface, check if there's an IP using it. */ - for(i=ctdb->ifaces; i; i=i->next) { + for (i = ctdb->ifaces; i != NULL; i = next) { struct ctdb_vnn *tv; bool found; + next = i->next; /* Only consider interfaces named in the given VNN. */ if (!vnn_has_interface_with_name(vnn, i->name)) { @@ -152,8 +154,7 @@ static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb, if (!found) { /* None of the VNNs are using this interface. */ DLIST_REMOVE(ctdb->ifaces, i); - /* Caller will free mem_ctx when convenient. */ - talloc_steal(mem_ctx, i); + talloc_free(i); } } } @@ -260,6 +261,10 @@ static bool ctdb_vnn_available(struct ctdb_context *ctdb, { int i; + if (vnn->delete_pending) { + return false; + } + if (vnn->iface && vnn->iface->link_up) { return true; } @@ -311,7 +316,8 @@ struct ctdb_client_ip { /* send a gratuitous arp */ -static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, +static void ctdb_control_send_arp(struct tevent_context *ev, + struct tevent_timer *te, struct timeval t, void *private_data) { struct ctdb_takeover_arp *arp = talloc_get_type(private_data, @@ -354,9 +360,9 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event * return; } - event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, - timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), - ctdb_control_send_arp, arp); + tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx, + timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), + ctdb_control_send_arp, arp); } static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb, @@ -391,20 +397,20 @@ static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb, vnn->tcp_update_needed = true; } - event_add_timed(arp->ctdb->ev, vnn->takeover_ctx, - timeval_zero(), ctdb_control_send_arp, arp); + tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx, + timeval_zero(), ctdb_control_send_arp, arp); return 0; } struct takeover_callback_state { - struct ctdb_req_control *c; + struct ctdb_req_control_old *c; ctdb_sock_addr *addr; struct ctdb_vnn *vnn; }; struct ctdb_do_takeip_state { - struct ctdb_req_control *c; + struct ctdb_req_control_old *c; struct ctdb_vnn *vnn; }; @@ -469,7 +475,7 @@ static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state) take over an ip address */ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, struct ctdb_vnn *vnn) { int ret; @@ -510,7 +516,6 @@ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, state, ctdb_do_takeip_callback, state, - false, CTDB_EVENT_TAKE_IP, "%s %s %u", ctdb_vnn_iface_string(vnn), @@ -529,7 +534,7 @@ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, } struct ctdb_do_updateip_state { - struct ctdb_req_control *c; + struct ctdb_req_control_old *c; struct ctdb_iface *old; struct ctdb_vnn *vnn; }; @@ -593,7 +598,7 @@ static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state) update (move) an ip address */ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, struct ctdb_vnn *vnn) { int ret; @@ -651,7 +656,6 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, state, ctdb_do_updateip_callback, state, - false, CTDB_EVENT_UPDATE_IP, "%s %s %s %u", state->old->name, @@ -690,7 +694,7 @@ static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_ take over an ip address */ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, TDB_DATA indata, bool *async_reply) { @@ -718,7 +722,7 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, return 0; } - if (ctdb->do_checkpublicip) { + if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) { have_ip = ctdb_sys_have_ip(&pip->addr); } best_iface = ctdb_vnn_best_iface(ctdb, vnn); @@ -811,24 +815,6 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, return 0; } -/* - takeover an ip address old v4 style - */ -int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, - struct ctdb_req_control *c, - TDB_DATA indata, - bool *async_reply) -{ - TDB_DATA data; - - data.dsize = sizeof(struct ctdb_public_ip); - data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip); - CTDB_NO_MEMORY(ctdb, data.dptr); - - memcpy(data.dptr, indata.dptr, indata.dsize); - return ctdb_control_takeover_ip(ctdb, c, data, async_reply); -} - /* kill any clients that are registered with a IP that is being released */ @@ -848,9 +834,9 @@ static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr ctdb_addr_to_str(&ip->addr))); if (ctdb_same_ip(&tmp_addr, addr)) { - struct ctdb_client *client = ctdb_reqid_find(ctdb, - ip->client_id, - struct ctdb_client); + struct ctdb_client *client = reqid_find(ctdb->idr, + ip->client_id, + struct ctdb_client); DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", ip->client_id, ctdb_addr_to_str(&ip->addr), @@ -867,6 +853,14 @@ static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr } } +static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn) +{ + DLIST_REMOVE(ctdb->vnn, vnn); + ctdb_vnn_unassign_iface(ctdb, vnn); + ctdb_remove_orphaned_ifaces(ctdb, vnn); + talloc_free(vnn); +} + /* called when releaseip event finishes */ @@ -881,12 +875,16 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, ctdb_ban_self(ctdb); } - if (ctdb->do_checkpublicip && ctdb_sys_have_ip(state->addr)) { - DEBUG(DEBUG_ERR, ("IP %s still hosted during release IP callback, failing\n", - ctdb_addr_to_str(state->addr))); - ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL); - talloc_free(state); - return; + if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) { + if (ctdb_sys_have_ip(state->addr)) { + DEBUG(DEBUG_ERR, + ("IP %s still hosted during release IP callback, failing\n", + ctdb_addr_to_str(state->addr))); + ctdb_request_control_reply(ctdb, state->c, + NULL, -1, NULL); + talloc_free(state); + return; + } } /* send a message to all clients of this node telling them @@ -905,6 +903,12 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, ctdb_vnn_unassign_iface(ctdb, state->vnn); + /* Process the IP if it has been marked for deletion */ + if (state->vnn->delete_pending) { + do_delete_ip(ctdb, state->vnn); + state->vnn = NULL; + } + /* the control succeeded */ ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL); talloc_free(state); @@ -912,7 +916,9 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, static int ctdb_releaseip_destructor(struct takeover_callback_state *state) { - state->vnn->update_in_flight = false; + if (state->vnn != NULL) { + state->vnn->update_in_flight = false; + } return 0; } @@ -920,7 +926,7 @@ static int ctdb_releaseip_destructor(struct takeover_callback_state *state) release an ip address */ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, TDB_DATA indata, bool *async_reply) { @@ -948,7 +954,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, * intended new node. The following causes makes ctdbd ignore * a release for any address it doesn't host. */ - if (ctdb->do_checkpublicip) { + if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) { if (!ctdb_sys_have_ip(&pip->addr)) { DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", ctdb_addr_to_str(&pip->addr), @@ -979,30 +985,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, return -1; } - if (ctdb->do_checkpublicip) { - iface = ctdb_sys_find_ifname(&pip->addr); - if (iface == NULL) { - DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n")); - return 0; - } - if (vnn->iface == NULL) { - DEBUG(DEBUG_WARNING, - ("Public IP %s is hosted on interface %s but we have no VNN\n", - ctdb_addr_to_str(&pip->addr), - iface)); - } else if (strcmp(iface, ctdb_vnn_iface_string(vnn)) != 0) { - DEBUG(DEBUG_WARNING, - ("Public IP %s is hosted on inteterface %s but VNN says %s\n", - ctdb_addr_to_str(&pip->addr), - iface, - ctdb_vnn_iface_string(vnn))); - /* Should we fix vnn->iface? If we do, what - * happens to reference counts? - */ - } - } else { - iface = strdup(ctdb_vnn_iface_string(vnn)); - } + iface = strdup(ctdb_vnn_iface_string(vnn)); DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n", ctdb_addr_to_str(&pip->addr), @@ -1011,11 +994,22 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, pip->pnn)); state = talloc(ctdb, struct takeover_callback_state); - CTDB_NO_MEMORY(ctdb, state); + if (state == NULL) { + ctdb_set_error(ctdb, "Out of memory at %s:%d", + __FILE__, __LINE__); + free(iface); + return -1; + } state->c = talloc_steal(state, c); state->addr = talloc(state, ctdb_sock_addr); - CTDB_NO_MEMORY(ctdb, state->addr); + if (state->addr == NULL) { + ctdb_set_error(ctdb, "Out of memory at %s:%d", + __FILE__, __LINE__); + free(iface); + talloc_free(state); + return -1; + } *state->addr = pip->addr; state->vnn = vnn; @@ -1024,7 +1018,6 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state, - false, CTDB_EVENT_RELEASE_IP, "%s %s %u", iface, @@ -1044,25 +1037,6 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, return 0; } -/* - release an ip address old v4 style - */ -int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, - struct ctdb_req_control *c, - TDB_DATA indata, - bool *async_reply) -{ - TDB_DATA data; - - data.dsize = sizeof(struct ctdb_public_ip); - data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip); - CTDB_NO_MEMORY(ctdb, data.dptr); - - memcpy(data.dptr, indata.dptr, indata.dsize); - return ctdb_control_release_ip(ctdb, c, data, async_reply); -} - - static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *ifaces, @@ -1135,51 +1109,6 @@ static int ctdb_add_public_address(struct ctdb_context *ctdb, return 0; } -static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private_data) -{ - struct ctdb_context *ctdb = talloc_get_type(private_data, - struct ctdb_context); - struct ctdb_vnn *vnn; - - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - int i; - - for (i=0; vnn->ifaces[i] != NULL; i++) { - if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) { - DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n", - vnn->ifaces[i], - ctdb_addr_to_str(&vnn->public_address))); - } - } - } - - event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx, - timeval_current_ofs(30, 0), - ctdb_check_interfaces_event, ctdb); -} - - -int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb) -{ - if (ctdb->check_public_ifaces_ctx != NULL) { - talloc_free(ctdb->check_public_ifaces_ctx); - ctdb->check_public_ifaces_ctx = NULL; - } - - ctdb->check_public_ifaces_ctx = talloc_new(ctdb); - if (ctdb->check_public_ifaces_ctx == NULL) { - ctdb_fatal(ctdb, "failed to allocate context for checking interfaces"); - } - - event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx, - timeval_current_ofs(30, 0), - ctdb_check_interfaces_event, ctdb); - - return 0; -} - - /* setup the public address lists from a file */ @@ -1189,7 +1118,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses) int nlines; int i; - lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb); + lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb); if (lines == NULL) { ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file); return -1; @@ -1299,8 +1228,8 @@ int ctdb_set_single_public_ip(struct ctdb_context *ctdb, return 0; } -struct ctdb_public_ip_list { - struct ctdb_public_ip_list *next; +struct public_ip_list { + struct public_ip_list *next; uint32_t pnn; ctdb_sock_addr addr; }; @@ -1308,9 +1237,8 @@ struct ctdb_public_ip_list { /* Given a physical node, return the number of public addresses that is currently assigned to this node. */ -static int node_ip_coverage(struct ctdb_context *ctdb, - int32_t pnn, - struct ctdb_public_ip_list *ips) +static int node_ip_coverage(struct ctdb_context *ctdb, int32_t pnn, + struct public_ip_list *ips) { int num=0; @@ -1326,9 +1254,9 @@ static int node_ip_coverage(struct ctdb_context *ctdb, /* Can the given node host the given IP: is the public IP known to the * node and is NOIPHOST unset? */ -static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn, +static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn, struct ctdb_ipflags ipflags, - struct ctdb_public_ip_list *ip) + struct public_ip_list *ip) { struct ctdb_all_public_ips *public_ips; int i; @@ -1353,9 +1281,9 @@ static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn, return false; } -static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn, +static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn, struct ctdb_ipflags ipflags, - struct ctdb_public_ip_list *ip) + struct public_ip_list *ip) { if (ipflags.noiptakeover) { return false; @@ -1368,10 +1296,10 @@ static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn, pick the node that currently are serving the least number of ips so that the ips get spread out evenly. */ -static int find_takeover_node(struct ctdb_context *ctdb, - struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *ip, - struct ctdb_public_ip_list *all_ips) +static int find_takeover_node(struct ctdb_context *ctdb, + struct ctdb_ipflags *ipflags, + struct public_ip_list *ip, + struct public_ip_list *all_ips) { int pnn, min=0, num; int i, numnodes; @@ -1437,8 +1365,8 @@ static uint32_t *ip_key(ctdb_sock_addr *ip) static void *add_ip_callback(void *parm, void *data) { - struct ctdb_public_ip_list *this_ip = parm; - struct ctdb_public_ip_list *prev_ip = data; + struct public_ip_list *this_ip = parm; + struct public_ip_list *prev_ip = data; if (prev_ip == NULL) { return parm; @@ -1452,19 +1380,19 @@ static void *add_ip_callback(void *parm, void *data) static int getips_count_callback(void *param, void *data) { - struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param; - struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data; + struct public_ip_list **ip_list = (struct public_ip_list **)param; + struct public_ip_list *new_ip = (struct public_ip_list *)data; new_ip->next = *ip_list; *ip_list = new_ip; return 0; } -static struct ctdb_public_ip_list * +static struct public_ip_list * create_merged_ip_list(struct ctdb_context *ctdb) { int i, j; - struct ctdb_public_ip_list *ip_list; + struct public_ip_list *ip_list; struct ctdb_all_public_ips *public_ips; if (ctdb->ip_tree != NULL) { @@ -1486,9 +1414,9 @@ create_merged_ip_list(struct ctdb_context *ctdb) } for (j=0;jnum;j++) { - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; - tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list); + tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list); CTDB_NO_MEMORY_NULL(ctdb, tmp_ip); /* Do not use information about IP addresses hosted * on other nodes, it may not be accurate */ @@ -1557,10 +1485,10 @@ static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2) used in the main part of the algorithm. */ static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip, - struct ctdb_public_ip_list *ips, + struct public_ip_list *ips, int pnn) { - struct ctdb_public_ip_list *t; + struct public_ip_list *t; uint32_t d; uint32_t sum = 0; @@ -1593,9 +1521,9 @@ static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip, /* Return the LCP2 imbalance metric for addresses currently assigned to the given node. */ -static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn) +static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn) { - struct ctdb_public_ip_list *t; + struct public_ip_list *t; uint32_t imbalance = 0; @@ -1617,9 +1545,9 @@ static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn) */ static void basic_allocate_unassigned(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips) + struct public_ip_list *all_ips) { - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; /* loop over all ip's and find a physical node to cover for each unassigned ip. @@ -1638,12 +1566,12 @@ static void basic_allocate_unassigned(struct ctdb_context *ctdb, */ static void basic_failback(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips, + struct public_ip_list *all_ips, int num_ips) { int i, numnodes; int maxnode, maxnum, minnode, minnum, num, retries; - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; numnodes = talloc_array_length(ipflags); retries = 0; @@ -1709,7 +1637,7 @@ try_again: */ if ( (maxnum > minnum+1) && (retries < (num_ips + 5)) ){ - struct ctdb_public_ip_list *tmp; + struct public_ip_list *tmp; /* Reassign one of maxnode's VNNs */ for (tmp=all_ips;tmp;tmp=tmp->next) { @@ -1725,13 +1653,13 @@ try_again: static void lcp2_init(struct ctdb_context *tmp_ctx, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips, + struct public_ip_list *all_ips, uint32_t *force_rebalance_nodes, uint32_t **lcp2_imbalances, bool **rebalance_candidates) { int i, numnodes; - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; numnodes = talloc_array_length(ipflags); @@ -1783,15 +1711,15 @@ static void lcp2_init(struct ctdb_context *tmp_ctx, */ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips, + struct public_ip_list *all_ips, uint32_t *lcp2_imbalances) { - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; int dstnode, numnodes; int minnode; uint32_t mindsum, dstdsum, dstimbl, minimbl; - struct ctdb_public_ip_list *minip; + struct public_ip_list *minip; bool should_loop = true; bool have_unassigned = true; @@ -1881,17 +1809,16 @@ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb, */ static bool lcp2_failback_candidate(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips, + struct public_ip_list *all_ips, int srcnode, - uint32_t candimbl, uint32_t *lcp2_imbalances, bool *rebalance_candidates) { int dstnode, mindstnode, numnodes; uint32_t srcimbl, srcdsum, dstimbl, dstdsum; uint32_t minsrcimbl, mindstimbl; - struct ctdb_public_ip_list *minip; - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *minip; + struct public_ip_list *tmp_ip; /* Find an IP and destination node that best reduces imbalance. */ srcimbl = 0; @@ -1903,7 +1830,8 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb, numnodes = talloc_array_length(ipflags); DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n")); - DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl)); + DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", + srcnode, lcp2_imbalances[srcnode])); for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) { /* Only consider addresses on srcnode. */ @@ -1913,7 +1841,7 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb, /* What is this IP address costing the source node? */ srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode); - srcimbl = candimbl - srcdsum; + srcimbl = lcp2_imbalances[srcnode] - srcdsum; /* Consider this IP address would cost each potential * destination node. Destination nodes are limited to @@ -1936,11 +1864,12 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb, dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode); dstimbl = lcp2_imbalances[dstnode] + dstdsum; DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n", - srcnode, srcimbl - lcp2_imbalances[srcnode], + srcnode, -srcdsum, ctdb_addr_to_str(&(tmp_ip->addr)), - dstnode, dstimbl - lcp2_imbalances[dstnode])); + dstnode, dstdsum)); - if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \ + if ((dstimbl < lcp2_imbalances[srcnode]) && + (dstdsum < srcdsum) && \ ((mindstnode == -1) || \ ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) { @@ -1961,7 +1890,7 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb, mindstnode, mindstimbl - lcp2_imbalances[mindstnode])); - lcp2_imbalances[srcnode] = srcimbl; + lcp2_imbalances[srcnode] = minsrcimbl; lcp2_imbalances[mindstnode] = mindstimbl; minip->pnn = mindstnode; @@ -1997,40 +1926,28 @@ static int lcp2_cmp_imbalance_pnn(const void * a, const void * b) */ static void lcp2_failback(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips, + struct public_ip_list *all_ips, uint32_t *lcp2_imbalances, bool *rebalance_candidates) { - int i, num_rebalance_candidates, numnodes; + int i, numnodes; struct lcp2_imbalance_pnn * lips; bool again; numnodes = talloc_array_length(ipflags); try_again: - - /* It is only worth continuing if we have suitable target - * nodes to transfer IPs to. This check is much cheaper than - * continuing on... - */ - num_rebalance_candidates = 0; - for (i=0; inext) { num_ips++; @@ -2148,11 +2064,12 @@ static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb, static void ip_alloc_lcp2(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list *all_ips, + struct public_ip_list *all_ips, uint32_t *force_rebalance_nodes) { uint32_t *lcp2_imbalances; bool *rebalance_candidates; + int numnodes, num_rebalance_candidates, i; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); @@ -2168,6 +2085,21 @@ static void ip_alloc_lcp2(struct ctdb_context *ctdb, goto finished; } + /* It is only worth continuing if we have suitable target + * nodes to transfer IPs to. This check is much cheaper than + * continuing on... + */ + numnodes = talloc_array_length(ipflags); + num_rebalance_candidates = 0; + for (i=0; inum;i++) { if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) { - num_healthy++; + /* Found one completely healthy node */ + return false; } } - return num_healthy == 0; + return true; } /* The calculation part of the IP allocation algorithm. */ static void ctdb_takeover_run_core(struct ctdb_context *ctdb, struct ctdb_ipflags *ipflags, - struct ctdb_public_ip_list **all_ips_p, + struct public_ip_list **all_ips_p, uint32_t *force_rebalance_nodes) { /* since nodes only know about those public addresses that @@ -2292,7 +2223,7 @@ static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn, static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, - struct ctdb_node_map *nodemap, + struct ctdb_node_map_old *nodemap, const char *tunable, uint32_t default_value) { @@ -2394,7 +2325,7 @@ static void get_runstate_fail_callback(struct ctdb_context *ctdb, uint32_t pnn, static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, - struct ctdb_node_map *nodemap, + struct ctdb_node_map_old *nodemap, enum ctdb_runstate default_value) { uint32_t *nodes; @@ -2440,7 +2371,7 @@ static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb, static struct ctdb_ipflags * set_ipflags_internal(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, - struct ctdb_node_map *nodemap, + struct ctdb_node_map_old *nodemap, uint32_t *tval_noiptakeover, uint32_t *tval_noiphostonalldisabled, enum ctdb_runstate *runstate) @@ -2467,6 +2398,8 @@ set_ipflags_internal(struct ctdb_context *ctdb, if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { ipflags[i].noiphost = true; } + /* Remember the runstate */ + ipflags[i].runstate = runstate[i]; } if (all_nodes_are_disabled(nodemap)) { @@ -2494,7 +2427,7 @@ set_ipflags_internal(struct ctdb_context *ctdb, static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, - struct ctdb_node_map *nodemap) + struct ctdb_node_map_old *nodemap) { uint32_t *tval_noiptakeover; uint32_t *tval_noiphostonalldisabled; @@ -2544,7 +2477,7 @@ struct iprealloc_callback_data { int retry_count; client_async_callback fail_callback; void *fail_callback_data; - struct ctdb_node_map *nodemap; + struct ctdb_node_map_old *nodemap; }; static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn, @@ -2555,13 +2488,35 @@ static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn, struct iprealloc_callback_data *cd = (struct iprealloc_callback_data *)callback; + numnodes = talloc_array_length(cd->retry_nodes); + if (pnn > numnodes) { + DEBUG(DEBUG_ERR, + ("ipreallocated failure from node %d, " + "but only %d nodes in nodemap\n", + pnn, numnodes)); + return; + } + + /* Can't run the "ipreallocated" event on a INACTIVE node */ + if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) { + DEBUG(DEBUG_WARNING, + ("ipreallocated failed on inactive node %d, ignoring\n", + pnn)); + return; + } + switch (res) { case -ETIME: /* If the control timed out then that's a real error, * so call the real fail callback */ - cd->fail_callback(ctdb, pnn, res, outdata, - cd->fail_callback_data); + if (cd->fail_callback) { + cd->fail_callback(ctdb, pnn, res, outdata, + cd->fail_callback_data); + } else { + DEBUG(DEBUG_WARNING, + ("iprealloc timed out but no callback registered\n")); + } break; default: /* If not a timeout then either the ipreallocated @@ -2571,23 +2526,6 @@ static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn, * because the error codes are all folded down to -1. * Consider retrying using EVENTSCRIPT control... */ - - numnodes = talloc_array_length(cd->retry_nodes); - if (pnn > numnodes) { - DEBUG(DEBUG_ERR, - ("ipreallocated failure from node %d, but only %d nodes in nodemap\n", - pnn, numnodes)); - return; - } - - /* Can't run the "ipreallocated" event on a INACTIVE node */ - if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) { - DEBUG(DEBUG_ERR, - ("ipreallocated failure from node %d, but node is inactive - not flagging a retry\n", - pnn)); - return; - } - DEBUG(DEBUG_WARNING, ("ipreallocated failure from node %d, flagging retry\n", pnn)); @@ -2600,7 +2538,7 @@ struct takeover_callback_data { bool *node_failed; client_async_callback fail_callback; void *fail_callback_data; - struct ctdb_node_map *nodemap; + struct ctdb_node_map_old *nodemap; }; static void takeover_run_fail_callback(struct ctdb_context *ctdb, @@ -2633,15 +2571,14 @@ static void takeover_run_fail_callback(struct ctdb_context *ctdb, /* make any IP alias changes for public addresses that are necessary */ -int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, +int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, uint32_t *force_rebalance_nodes, client_async_callback fail_callback, void *callback_data) { int i, j, ret; struct ctdb_public_ip ip; - struct ctdb_public_ipv4 ipv4; uint32_t *nodes; - struct ctdb_public_ip_list *all_ips, *tmp_ip; + struct public_ip_list *all_ips, *tmp_ip; TDB_DATA data; struct timeval timeout; struct client_async_data *async_data; @@ -2651,6 +2588,7 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, struct takeover_callback_data *takeover_data; struct iprealloc_callback_data iprealloc_data; bool *retry_data; + bool can_host_ips; /* * ip failover is completely disabled, just send out the @@ -2667,7 +2605,18 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, return -1; } - ZERO_STRUCT(ip); + /* Short-circuit IP allocation if no nodes are in the RUNNING + * runstate yet, since no nodes will be able to host IPs */ + can_host_ips = false; + for (i=0; inum; i++) { + if (ipflags[i].runstate == CTDB_RUNSTATE_RUNNING) { + can_host_ips = true; + } + } + if (!can_host_ips) { + DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n")); + return 0; + } /* Do the IP reassignment calculations */ ctdb_takeover_run_core(ctdb, ipflags, &all_ips, force_rebalance_nodes); @@ -2692,6 +2641,14 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, async_data->fail_callback = takeover_run_fail_callback; async_data->callback_data = takeover_data; + ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */ + + /* Send a RELEASE_IP to all nodes that should not be hosting + * each IP. For each IP, all but one of these will be + * redundant. However, the redundant ones are used to tell + * nodes which node should be hosting the IP so that commands + * like "ctdb ip" can display a particular nodes idea of who + * is hosting what. */ for (i=0;inum;i++) { /* don't talk to unconnected nodes, but do talk to banned nodes */ if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) { @@ -2705,36 +2662,22 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, */ continue; } - if (tmp_ip->addr.sa.sa_family == AF_INET) { - ipv4.pnn = tmp_ip->pnn; - ipv4.sin = tmp_ip->addr.ip; - - timeout = TAKEOVER_TIMEOUT(); - data.dsize = sizeof(ipv4); - data.dptr = (uint8_t *)&ipv4; - state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn, - 0, CTDB_CONTROL_RELEASE_IPv4, 0, - data, async_data, - &timeout, NULL); - } else { - ip.pnn = tmp_ip->pnn; - ip.addr = tmp_ip->addr; - - timeout = TAKEOVER_TIMEOUT(); - data.dsize = sizeof(ip); - data.dptr = (uint8_t *)&ip; - state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn, - 0, CTDB_CONTROL_RELEASE_IP, 0, - data, async_data, - &timeout, NULL); - } + ip.pnn = tmp_ip->pnn; + ip.addr = tmp_ip->addr; + timeout = TAKEOVER_TIMEOUT(); + data.dsize = sizeof(ip); + data.dptr = (uint8_t *)&ip; + state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn, + 0, CTDB_CONTROL_RELEASE_IP, 0, + data, async_data, + &timeout, NULL); if (state == NULL) { DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn)); talloc_free(tmp_ctx); return -1; } - + ctdb_client_async_add(async_data, state); } } @@ -2746,7 +2689,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, talloc_free(async_data); - /* tell all nodes to get their own IPs */ + /* For each IP, send a TAKOVER_IP to the node that should be + * hosting it. Many of these will often be redundant (since + * the allocation won't have changed) but they can be useful + * to recover from inconsistencies. */ async_data = talloc_zero(tmp_ctx, struct client_async_data); CTDB_NO_MEMORY_FATAL(ctdb, async_data); @@ -2759,35 +2705,21 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, continue; } - if (tmp_ip->addr.sa.sa_family == AF_INET) { - ipv4.pnn = tmp_ip->pnn; - ipv4.sin = tmp_ip->addr.ip; - - timeout = TAKEOVER_TIMEOUT(); - data.dsize = sizeof(ipv4); - data.dptr = (uint8_t *)&ipv4; - state = ctdb_control_send(ctdb, tmp_ip->pnn, - 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, - data, async_data, - &timeout, NULL); - } else { - ip.pnn = tmp_ip->pnn; - ip.addr = tmp_ip->addr; + ip.pnn = tmp_ip->pnn; + ip.addr = tmp_ip->addr; - timeout = TAKEOVER_TIMEOUT(); - data.dsize = sizeof(ip); - data.dptr = (uint8_t *)&ip; - state = ctdb_control_send(ctdb, tmp_ip->pnn, - 0, CTDB_CONTROL_TAKEOVER_IP, 0, - data, async_data, - &timeout, NULL); - } + timeout = TAKEOVER_TIMEOUT(); + data.dsize = sizeof(ip); + data.dptr = (uint8_t *)&ip; + state = ctdb_control_send(ctdb, tmp_ip->pnn, + 0, CTDB_CONTROL_TAKEOVER_IP, 0, + data, async_data, &timeout, NULL); if (state == NULL) { DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn)); talloc_free(tmp_ctx); return -1; } - + ctdb_client_async_add(async_data, state); } if (ctdb_client_async_wait(ctdb, async_data) != 0) { @@ -2797,12 +2729,12 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, } ipreallocated: - /* + /* * Tell all nodes to run eventscripts to process the * "ipreallocated" event. This can do a lot of things, * including restarting services to reconfigure them if public * IPs have moved. Once upon a time this event only used to - * update natwg. + * update natgw. */ retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num); CTDB_NO_MEMORY_FATAL(ctdb, retry_data); @@ -2876,15 +2808,11 @@ static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip) /* called by a client to inform us of a TCP connection that it is managing that should tickled with an ACK when IP takeover is done - we handle both the old ipv4 style of packets as well as the new ipv4/6 - pdus. */ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata) { - struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); - struct ctdb_control_tcp *old_addr = NULL; - struct ctdb_control_tcp_addr new_addr; + struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client); struct ctdb_control_tcp_addr *tcp_sock = NULL; struct ctdb_tcp_list *tcp; struct ctdb_tcp_connection t; @@ -2894,27 +2822,13 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, struct ctdb_vnn *vnn; ctdb_sock_addr addr; - switch (indata.dsize) { - case sizeof(struct ctdb_control_tcp): - old_addr = (struct ctdb_control_tcp *)indata.dptr; - ZERO_STRUCT(new_addr); - tcp_sock = &new_addr; - tcp_sock->src.ip = old_addr->src; - tcp_sock->dest.ip = old_addr->dest; - break; - case sizeof(struct ctdb_control_tcp_addr): - tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr; - break; - default: - DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed " - "to ctdb_control_tcp_client. size was %d but " - "only allowed sizes are %lu and %lu\n", - (int)indata.dsize, - (long unsigned)sizeof(struct ctdb_control_tcp), - (long unsigned)sizeof(struct ctdb_control_tcp_addr))); - return -1; + /* If we don't have public IPs, tickles are useless */ + if (ctdb->vnn == NULL) { + return 0; } + tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr; + addr = tcp_sock->src; ctdb_canonicalize_ip(&addr, &tcp_sock->src); addr = tcp_sock->dest; @@ -3038,6 +2952,11 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tc struct ctdb_tcp_connection tcp; struct ctdb_vnn *vnn; + /* If we don't have public IPs, tickles are useless */ + if (ctdb->vnn == NULL) { + return 0; + } + vnn = find_public_ip_vnn(ctdb, &p->dst_addr); if (vnn == NULL) { DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n", @@ -3051,9 +2970,7 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tc /* If this is the first tickle */ if (tcparray == NULL) { - tcparray = talloc_size(ctdb->nodes, - offsetof(struct ctdb_tcp_array, connections) + - sizeof(struct ctdb_tcp_connection) * 1); + tcparray = talloc(vnn, struct ctdb_tcp_array); CTDB_NO_MEMORY(ctdb, tcparray); vnn->tcp_array = tcparray; @@ -3075,7 +2992,7 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tc /* Do we already have this tickle ?*/ tcp.src_addr = p->src_addr; tcp.dst_addr = p->dst_addr; - if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) { + if (ctdb_tcp_find(tcparray, &tcp) != NULL) { DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n", ctdb_addr_to_str(&tcp.dst_addr), ntohs(tcp.dst_addr.ip.sin_port), @@ -3089,11 +3006,10 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tc tcparray->num+1); CTDB_NO_MEMORY(ctdb, tcparray->connections); - vnn->tcp_array = tcparray; tcparray->connections[tcparray->num].src_addr = p->src_addr; tcparray->connections[tcparray->num].dst_addr = p->dst_addr; tcparray->num++; - + DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n", ctdb_addr_to_str(&tcp.dst_addr), ntohs(tcp.dst_addr.ip.sin_port), @@ -3178,6 +3094,11 @@ int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata) { struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr; + /* If we don't have public IPs, tickles are useless */ + if (ctdb->vnn == NULL) { + return 0; + } + ctdb_remove_tcp_connection(ctdb, conn); return 0; @@ -3185,12 +3106,23 @@ int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata) /* - called when a daemon restarts - send all tickes for all public addresses - we are serving immediately to the new node. + Called when another daemon starts - causes all tickles for all + public addresses we are serving to be sent to the new node on the + next check. This actually causes the next scheduled call to + tdb_update_tcp_tickles() to update all nodes. This is simple and + doesn't require careful error handling. */ -int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn) +int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn) { -/*XXX here we should send all tickes we are serving to the new node */ + struct ctdb_vnn *vnn; + + DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n", + (unsigned long) pnn)); + + for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { + vnn->tcp_update_needed = true; + } + return 0; } @@ -3209,14 +3141,15 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) } -/* - release all IPs on shutdown - */ void ctdb_release_all_ips(struct ctdb_context *ctdb) { struct ctdb_vnn *vnn; int count = 0; + if (ctdb->tunable.disable_ip_failover == 1) { + return; + } + for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { if (!ctdb_sys_have_ip(&vnn->public_address)) { ctdb_vnn_unassign_iface(ctdb, vnn); @@ -3226,6 +3159,20 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) continue; } + /* Don't allow multiple releases at once. Some code, + * particularly ctdb_tickle_sentenced_connections() is + * not re-entrant */ + if (vnn->update_in_flight) { + DEBUG(DEBUG_WARNING, + (__location__ + " Not releasing IP %s/%u on interface %s, an update is already in progess\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + continue; + } + vnn->update_in_flight = true; + DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n", ctdb_addr_to_str(&vnn->public_address), vnn->public_netmask_bits, @@ -3237,6 +3184,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) vnn->public_netmask_bits); release_kill_clients(ctdb, &vnn->public_address); ctdb_vnn_unassign_iface(ctdb, vnn); + vnn->update_in_flight = false; count++; } @@ -3248,7 +3196,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) get list of public IPs */ int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, - struct ctdb_req_control *c, TDB_DATA *outdata) + struct ctdb_req_control_old *c, TDB_DATA *outdata) { int i, num, len; struct ctdb_all_public_ips *ips; @@ -3290,49 +3238,8 @@ int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, } -/* - get list of public IPs, old ipv4 style. only returns ipv4 addresses - */ -int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, - struct ctdb_req_control *c, TDB_DATA *outdata) -{ - int i, num, len; - struct ctdb_all_public_ipsv4 *ips; - struct ctdb_vnn *vnn; - - /* count how many public ip structures we have */ - num = 0; - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - if (vnn->public_address.sa.sa_family != AF_INET) { - continue; - } - num++; - } - - len = offsetof(struct ctdb_all_public_ipsv4, ips) + - num*sizeof(struct ctdb_public_ipv4); - ips = talloc_zero_size(outdata, len); - CTDB_NO_MEMORY(ctdb, ips); - - outdata->dsize = len; - outdata->dptr = (uint8_t *)ips; - - ips->num = num; - i = 0; - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - if (vnn->public_address.sa.sa_family != AF_INET) { - continue; - } - ips->ips[i].pnn = vnn->pnn; - ips->ips[i].sin = vnn->public_address.ip; - i++; - } - - return 0; -} - int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, TDB_DATA indata, TDB_DATA *outdata) { @@ -3386,7 +3293,7 @@ int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb, if (vnn->iface == cur) { info->active_idx = i; } - strcpy(info->ifaces[i].name, cur->name); + strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1); info->ifaces[i].link_state = cur->link_up; info->ifaces[i].references = cur->references; } @@ -3401,7 +3308,7 @@ int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb, } int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, TDB_DATA *outdata) { int i, num, len; @@ -3437,7 +3344,7 @@ int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb, } int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, TDB_DATA indata) { struct ctdb_control_iface_info *info; @@ -3500,7 +3407,7 @@ struct ctdb_kill_tcp { struct ctdb_vnn *vnn; struct ctdb_context *ctdb; int capture_fd; - struct fd_event *fde; + struct tevent_fd *fde; trbt_tree_t *connections; void *private_data; }; @@ -3567,7 +3474,8 @@ static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst) /* called when we get a read event on the raw socket */ -static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, +static void capture_tcp_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, void *private_data) { struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp); @@ -3575,7 +3483,7 @@ static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, ctdb_sock_addr src, dst; uint32_t ack_seq, seq; - if (!(flags & EVENT_FD_READ)) { + if (!(flags & TEVENT_FD_READ)) { return; } @@ -3639,7 +3547,8 @@ static int tickle_connection_traverse(void *param, void *data) /* called every second until all sentenced connections have been reset */ -static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, +static void ctdb_tickle_sentenced_connections(struct tevent_context *ev, + struct tevent_timer *te, struct timeval t, void *private_data) { struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp); @@ -3662,8 +3571,9 @@ static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct t /* try tickling them again in a seconds time */ - event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), - ctdb_tickle_sentenced_connections, killtcp); + tevent_add_timer(killtcp->ctdb->ev, killtcp, + timeval_current_ofs(1, 0), + ctdb_tickle_sentenced_connections, killtcp); } /* @@ -3789,16 +3699,17 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, if (killtcp->fde == NULL) { - killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, - EVENT_FD_READ, - capture_tcp_handler, killtcp); + killtcp->fde = tevent_add_fd(ctdb->ev, killtcp, + killtcp->capture_fd, + TEVENT_FD_READ, + capture_tcp_handler, killtcp); tevent_fd_set_auto_close(killtcp->fde); /* We also need to set up some events to tickle all these connections until they are all reset */ - event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), - ctdb_tickle_sentenced_connections, killtcp); + tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0), + ctdb_tickle_sentenced_connections, killtcp); } /* tickle him once now */ @@ -3820,7 +3731,7 @@ failed: */ int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata) { - struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr; + struct ctdb_tcp_connection *killtcp = (struct ctdb_tcp_connection *)indata.dptr; return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr); } @@ -3853,11 +3764,14 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind * list->tickles.num) { DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n")); return -1; - } + } + + DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n", + ctdb_addr_to_str(&list->addr))); vnn = find_public_ip_vnn(ctdb, &list->addr); if (vnn == NULL) { - DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", + DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", ctdb_addr_to_str(&list->addr))); return 1; @@ -3867,7 +3781,7 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind talloc_free(vnn->tcp_array); vnn->tcp_array = NULL; - tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array); + tcparray = talloc(vnn, struct ctdb_tcp_array); CTDB_NO_MEMORY(ctdb, tcparray); tcparray->num = list->tickles.num; @@ -3875,12 +3789,12 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num); CTDB_NO_MEMORY(ctdb, tcparray->connections); - memcpy(tcparray->connections, &list->tickles.connections[0], + memcpy(tcparray->connections, &list->tickles.connections[0], sizeof(struct ctdb_tcp_connection)*tcparray->num); /* We now have a new fresh tickle list array for this vnn */ - vnn->tcp_array = talloc_steal(vnn, tcparray); - + vnn->tcp_array = tcparray; + return 0; } @@ -3933,10 +3847,9 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind /* set the list of all tcp tickles for a public address */ -static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, - struct timeval timeout, uint32_t destnode, - ctdb_sock_addr *addr, - struct ctdb_tcp_array *tcparray) +static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb, + ctdb_sock_addr *addr, + struct ctdb_tcp_array *tcparray) { int ret, num; TDB_DATA data; @@ -3961,7 +3874,7 @@ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num); } - ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, + ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0, CTDB_CONTROL_SET_TCP_TICKLE_LIST, 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); if (ret != 0) { @@ -3978,9 +3891,9 @@ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, /* perform tickle updates if required */ -static void ctdb_update_tcp_tickles(struct event_context *ev, - struct timed_event *te, - struct timeval t, void *private_data) +static void ctdb_update_tcp_tickles(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) { struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); int ret; @@ -3997,22 +3910,24 @@ static void ctdb_update_tcp_tickles(struct event_context *ev, if (!vnn->tcp_update_needed) { continue; } - ret = ctdb_ctrl_set_tcp_tickles(ctdb, - TAKEOVER_TIMEOUT(), - CTDB_BROADCAST_CONNECTED, - &vnn->public_address, - vnn->tcp_array); + ret = ctdb_send_set_tcp_tickles_for_ip(ctdb, + &vnn->public_address, + vnn->tcp_array); if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n", ctdb_addr_to_str(&vnn->public_address))); + } else { + DEBUG(DEBUG_INFO, + ("Sent tickle update for public address %s\n", + ctdb_addr_to_str(&vnn->public_address))); + vnn->tcp_update_needed = false; } } - event_add_timed(ctdb->ev, ctdb->tickle_update_context, - timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), - ctdb_update_tcp_tickles, ctdb); -} - + tevent_add_timer(ctdb->ev, ctdb->tickle_update_context, + timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), + ctdb_update_tcp_tickles, ctdb); +} /* start periodic update of tcp tickles @@ -4021,9 +3936,9 @@ void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb) { ctdb->tickle_update_context = talloc_new(ctdb); - event_add_timed(ctdb->ev, ctdb->tickle_update_context, - timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), - ctdb_update_tcp_tickles, ctdb); + tevent_add_timer(ctdb->ev, ctdb->tickle_update_context, + timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), + ctdb_update_tcp_tickles, ctdb); } @@ -4039,8 +3954,9 @@ struct control_gratious_arp { /* send a control_gratuitous arp */ -static void send_gratious_arp(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private_data) +static void send_gratious_arp(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) { int ret; struct control_gratious_arp *arp = talloc_get_type(private_data, @@ -4059,9 +3975,9 @@ static void send_gratious_arp(struct event_context *ev, struct timed_event *te, return; } - event_add_timed(arp->ctdb->ev, arp, - timeval_current_ofs(CTDB_ARP_INTERVAL, 0), - send_gratious_arp, arp); + tevent_add_timer(arp->ctdb->ev, arp, + timeval_current_ofs(CTDB_ARP_INTERVAL, 0), + send_gratious_arp, arp); } @@ -4100,9 +4016,9 @@ int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indat arp->iface = talloc_strdup(arp, gratious_arp->iface); CTDB_NO_MEMORY(ctdb, arp->iface); arp->count = 0; - - event_add_timed(arp->ctdb->ev, arp, - timeval_zero(), send_gratious_arp, arp); + + tevent_add_timer(arp->ctdb->ev, arp, + timeval_zero(), send_gratious_arp, arp); return 0; } @@ -4140,20 +4056,32 @@ int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA inda return 0; } +struct delete_ip_callback_state { + struct ctdb_req_control_old *c; +}; + /* called when releaseip event finishes for del_public_address */ -static void delete_ip_callback(struct ctdb_context *ctdb, int status, - void *private_data) +static void delete_ip_callback(struct ctdb_context *ctdb, + int32_t status, TDB_DATA data, + const char *errormsg, + void *private_data) { + struct delete_ip_callback_state *state = + talloc_get_type(private_data, struct delete_ip_callback_state); + + /* If release failed then fail. */ + ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg); talloc_free(private_data); } -int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata) +int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata, bool *async_reply) { struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr; struct ctdb_vnn *vnn; - int ret; /* verify the size of indata */ if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) { @@ -4176,44 +4104,69 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda /* walk over all public addresses until we find a match */ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { if (ctdb_same_ip(&vnn->public_address, &pub->addr)) { - TALLOC_CTX *mem_ctx = talloc_new(ctdb); - - DLIST_REMOVE(ctdb->vnn, vnn); - talloc_steal(mem_ctx, vnn); - ctdb_remove_orphaned_ifaces(ctdb, vnn, mem_ctx); - if (vnn->pnn != ctdb->pnn) { - if (vnn->iface != NULL) { - ctdb_vnn_unassign_iface(ctdb, vnn); + if (vnn->pnn == ctdb->pnn) { + struct delete_ip_callback_state *state; + struct ctdb_public_ip *ip; + TDB_DATA data; + int ret; + + vnn->delete_pending = true; + + state = talloc(ctdb, + struct delete_ip_callback_state); + CTDB_NO_MEMORY(ctdb, state); + state->c = c; + + ip = talloc(state, struct ctdb_public_ip); + if (ip == NULL) { + DEBUG(DEBUG_ERR, + (__location__ " Out of memory\n")); + talloc_free(state); + return -1; + } + ip->pnn = -1; + ip->addr = pub->addr; + + data.dsize = sizeof(struct ctdb_public_ip); + data.dptr = (unsigned char *)ip; + + ret = ctdb_daemon_send_control(ctdb, + ctdb_get_pnn(ctdb), + 0, + CTDB_CONTROL_RELEASE_IP, + 0, 0, + data, + delete_ip_callback, + state); + if (ret == -1) { + DEBUG(DEBUG_ERR, + (__location__ "Unable to send " + "CTDB_CONTROL_RELEASE_IP\n")); + talloc_free(state); + return -1; } - talloc_free(mem_ctx); - return 0; - } - vnn->pnn = -1; - ret = ctdb_event_script_callback(ctdb, - mem_ctx, delete_ip_callback, mem_ctx, - false, - CTDB_EVENT_RELEASE_IP, - "%s %s %u", - ctdb_vnn_iface_string(vnn), - ctdb_addr_to_str(&vnn->public_address), - vnn->public_netmask_bits); - if (vnn->iface != NULL) { - ctdb_vnn_unassign_iface(ctdb, vnn); - } - if (ret != 0) { - return -1; + state->c = talloc_steal(state, c); + *async_reply = true; + } else { + /* This IP is not hosted on the + * current node so just delete it + * now. */ + do_delete_ip(ctdb, vnn); } + return 0; } } + DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n", + ctdb_addr_to_str(&pub->addr))); return -1; } struct ipreallocated_callback_state { - struct ctdb_req_control *c; + struct ctdb_req_control_old *c; }; static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb, @@ -4237,7 +4190,7 @@ static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb, /* A control to run the ipreallocated event */ int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb, - struct ctdb_req_control *c, + struct ctdb_req_control_old *c, bool *async_reply) { int ret; @@ -4250,7 +4203,7 @@ int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb, ret = ctdb_event_script_callback(ctdb, state, ctdb_ipreallocated_callback, state, - false, CTDB_EVENT_IPREALLOCATED, + CTDB_EVENT_IPREALLOCATED, "%s", ""); if (ret != 0) { @@ -4275,7 +4228,7 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips, uint32_t pnn) { - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; int i; if (ctdb->ip_tree == NULL) { @@ -4314,7 +4267,12 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip) { - struct ctdb_public_ip_list *tmp_ip; + struct public_ip_list *tmp_ip; + + /* IP tree is never built if DisableIPFailover is set */ + if (ctdb->tunable.disable_ip_failover != 0) { + return 0; + } if (ctdb->ip_tree == NULL) { DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n")); @@ -4333,14 +4291,18 @@ int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip * return 0; } +void clear_ip_assignment_tree(struct ctdb_context *ctdb) +{ + TALLOC_FREE(ctdb->ip_tree); +} struct ctdb_reloadips_handle { struct ctdb_context *ctdb; - struct ctdb_req_control *c; + struct ctdb_req_control_old *c; int status; int fd[2]; pid_t child; - struct fd_event *fde; + struct tevent_fd *fde; }; static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h) @@ -4356,24 +4318,25 @@ static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h) return 0; } -static void ctdb_reloadips_timeout_event(struct event_context *ev, - struct timed_event *te, - struct timeval t, void *private_data) +static void ctdb_reloadips_timeout_event(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) { struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle); talloc_free(h); -} +} -static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde, - uint16_t flags, void *private_data) +static void ctdb_reloadips_child_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, void *private_data) { struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle); char res; int ret; - ret = read(h->fd[0], &res, 1); + ret = sys_read(h->fd[0], &res, 1); if (ret < 1 || res != 0) { DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n")); res = 1; @@ -4462,6 +4425,7 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb) goto failed; } + ctdb_client_async_add(async_data, state); } } @@ -4538,6 +4502,8 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb) " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n")); goto failed; } + + ctdb_client_async_add(async_data, state); } } @@ -4558,7 +4524,7 @@ failed: and drop any addresses we should nnot longer host, and add new addresses that we are now able to host */ -int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply) +int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply) { struct ctdb_reloadips_handle *h; pid_t parent = getpid(); @@ -4607,7 +4573,7 @@ int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_re } } - write(h->fd[1], &res, 1); + sys_write(h->fd[1], &res, 1); /* make sure we die when our parent dies */ while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) { sleep(5); @@ -4623,14 +4589,12 @@ int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_re talloc_set_destructor(h, ctdb_reloadips_destructor); - h->fde = event_add_fd(ctdb->ev, h, h->fd[0], - EVENT_FD_READ, ctdb_reloadips_child_handler, - (void *)h); + h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ, + ctdb_reloadips_child_handler, (void *)h); tevent_fd_set_auto_close(h->fde); - event_add_timed(ctdb->ev, h, - timeval_current_ofs(120, 0), - ctdb_reloadips_timeout_event, h); + tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0), + ctdb_reloadips_timeout_event, h); /* we reply later */ *async_reply = true;