You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "includes.h"
-#include "lib/tdb/include/tdb.h"
-#include "lib/util/dlinklist.h"
+#include "replace.h"
#include "system/network.h"
#include "system/filesys.h"
+#include "system/time.h"
#include "system/wait.h"
-#include "../include/ctdb_private.h"
-#include "../common/rb_tree.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+#include "ctdb_logging.h"
+
+#include "common/rb_tree.h"
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
struct ctdb_ipflags {
bool noiptakeover;
bool noiphost;
+ enum ctdb_runstate runstate;
};
struct ctdb_iface {
CTDB_NO_MEMORY_FATAL(ctdb, i);
i->name = talloc_strdup(i, iface);
CTDB_NO_MEMORY(ctdb, i->name);
- /*
- * If link_up defaults to true then IPs can be allocated to a
- * node during the first recovery. However, then an interface
- * could have its link marked down during the startup event,
- * causing the IP to move almost immediately. If link_up
- * defaults to false then, during normal operation, IPs added
- * to a new interface can't be assigned until a monitor cycle
- * has occurred and marked the new interfaces up. This makes
- * IP allocation unpredictable. The following is a neat
- * compromise: early in startup link_up defaults to false, so
- * IPs can't be assigned, and after startup IPs can be
- * assigned immediately.
- */
- i->link_up = (ctdb->runstate == CTDB_RUNSTATE_RUNNING);
+
+ i->link_up = true;
DLIST_ADD(ctdb->ifaces, i);
* causes problems... :-)
*/
static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
- struct ctdb_vnn *vnn,
- TALLOC_CTX *mem_ctx)
+ struct ctdb_vnn *vnn)
{
- struct ctdb_iface *i;
+ struct ctdb_iface *i, *next;
/* For each interface, check if there's an IP using it. */
- for(i=ctdb->ifaces; i; i=i->next) {
+ for (i = ctdb->ifaces; i != NULL; i = next) {
struct ctdb_vnn *tv;
bool found;
+ next = i->next;
/* Only consider interfaces named in the given VNN. */
if (!vnn_has_interface_with_name(vnn, i->name)) {
if (!found) {
/* None of the VNNs are using this interface. */
DLIST_REMOVE(ctdb->ifaces, i);
- /* Caller will free mem_ctx when convenient. */
- talloc_steal(mem_ctx, i);
+ talloc_free(i);
}
}
}
{
int i;
+ if (vnn->delete_pending) {
+ return false;
+ }
+
if (vnn->iface && vnn->iface->link_up) {
return true;
}
/*
send a gratuitous arp
*/
-static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
+static void ctdb_control_send_arp(struct tevent_context *ev,
+ struct tevent_timer *te,
struct timeval t, void *private_data)
{
struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
return;
}
- event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
- timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
- ctdb_control_send_arp, arp);
+ tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
+ timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
+ ctdb_control_send_arp, arp);
}
static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
vnn->tcp_update_needed = true;
}
- event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
- timeval_zero(), ctdb_control_send_arp, arp);
+ tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
+ timeval_zero(), ctdb_control_send_arp, arp);
return 0;
}
struct takeover_callback_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
ctdb_sock_addr *addr;
struct ctdb_vnn *vnn;
};
struct ctdb_do_takeip_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
struct ctdb_vnn *vnn;
};
take over an ip address
*/
static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
struct ctdb_vnn *vnn)
{
int ret;
state,
ctdb_do_takeip_callback,
state,
- false,
CTDB_EVENT_TAKE_IP,
"%s %s %u",
ctdb_vnn_iface_string(vnn),
}
struct ctdb_do_updateip_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
struct ctdb_iface *old;
struct ctdb_vnn *vnn;
};
update (move) an ip address
*/
static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
struct ctdb_vnn *vnn)
{
int ret;
state,
ctdb_do_updateip_callback,
state,
- false,
CTDB_EVENT_UPDATE_IP,
"%s %s %s %u",
state->old->name,
take over an ip address
*/
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata,
bool *async_reply)
{
return 0;
}
- if (ctdb->do_checkpublicip) {
+ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
have_ip = ctdb_sys_have_ip(&pip->addr);
}
best_iface = ctdb_vnn_best_iface(ctdb, vnn);
return 0;
}
-/*
- takeover an ip address old v4 style
- */
-int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
- TDB_DATA indata,
- bool *async_reply)
-{
- TDB_DATA data;
-
- data.dsize = sizeof(struct ctdb_public_ip);
- data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
- CTDB_NO_MEMORY(ctdb, data.dptr);
-
- memcpy(data.dptr, indata.dptr, indata.dsize);
- return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
-}
-
/*
kill any clients that are registered with a IP that is being released
*/
ctdb_addr_to_str(&ip->addr)));
if (ctdb_same_ip(&tmp_addr, addr)) {
- struct ctdb_client *client = ctdb_reqid_find(ctdb,
- ip->client_id,
- struct ctdb_client);
+ struct ctdb_client *client = reqid_find(ctdb->idr,
+ ip->client_id,
+ struct ctdb_client);
DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
ip->client_id,
ctdb_addr_to_str(&ip->addr),
(unsigned)client->pid,
ctdb_addr_to_str(addr),
ip->client_id));
- ctdb_kill(ctdb, client->pid, SIGKILL);
+ kill(client->pid, SIGKILL);
}
}
}
}
+static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
+{
+ DLIST_REMOVE(ctdb->vnn, vnn);
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ ctdb_remove_orphaned_ifaces(ctdb, vnn);
+ talloc_free(vnn);
+}
+
/*
called when releaseip event finishes
*/
ctdb_ban_self(ctdb);
}
+ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
+ if (ctdb_sys_have_ip(state->addr)) {
+ DEBUG(DEBUG_ERR,
+ ("IP %s still hosted during release IP callback, failing\n",
+ ctdb_addr_to_str(state->addr)));
+ ctdb_request_control_reply(ctdb, state->c,
+ NULL, -1, NULL);
+ talloc_free(state);
+ return;
+ }
+ }
+
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
ctdb_vnn_unassign_iface(ctdb, state->vnn);
+ /* Process the IP if it has been marked for deletion */
+ if (state->vnn->delete_pending) {
+ do_delete_ip(ctdb, state->vnn);
+ state->vnn = NULL;
+ }
+
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
{
- state->vnn->update_in_flight = false;
+ if (state->vnn != NULL) {
+ state->vnn->update_in_flight = false;
+ }
return 0;
}
release an ip address
*/
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata,
bool *async_reply)
{
* intended new node. The following causes makes ctdbd ignore
* a release for any address it doesn't host.
*/
- if (ctdb->do_checkpublicip) {
+ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
if (!ctdb_sys_have_ip(&pip->addr)) {
DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
ctdb_addr_to_str(&pip->addr),
return -1;
}
- if (ctdb->do_checkpublicip) {
- iface = ctdb_sys_find_ifname(&pip->addr);
- if (iface == NULL) {
- DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
- return 0;
- }
- } else {
- iface = strdup(ctdb_vnn_iface_string(vnn));
- }
+ iface = strdup(ctdb_vnn_iface_string(vnn));
DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
ctdb_addr_to_str(&pip->addr),
pip->pnn));
state = talloc(ctdb, struct takeover_callback_state);
- CTDB_NO_MEMORY(ctdb, state);
+ if (state == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ free(iface);
+ return -1;
+ }
state->c = talloc_steal(state, c);
state->addr = talloc(state, ctdb_sock_addr);
- CTDB_NO_MEMORY(ctdb, state->addr);
+ if (state->addr == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ free(iface);
+ talloc_free(state);
+ return -1;
+ }
*state->addr = pip->addr;
state->vnn = vnn;
ret = ctdb_event_script_callback(ctdb,
state, release_ip_callback, state,
- false,
CTDB_EVENT_RELEASE_IP,
"%s %s %u",
iface,
return 0;
}
-/*
- release an ip address old v4 style
- */
-int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
- TDB_DATA indata,
- bool *async_reply)
-{
- TDB_DATA data;
-
- data.dsize = sizeof(struct ctdb_public_ip);
- data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
- CTDB_NO_MEMORY(ctdb, data.dptr);
-
- memcpy(data.dptr, indata.dptr, indata.dsize);
- return ctdb_control_release_ip(ctdb, c, data, async_reply);
-}
-
-
static int ctdb_add_public_address(struct ctdb_context *ctdb,
ctdb_sock_addr *addr,
unsigned mask, const char *ifaces,
return 0;
}
-/*
- setup the event script directory
-*/
-int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
-{
- ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
- CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
- return 0;
-}
-
-static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
-{
- struct ctdb_context *ctdb = talloc_get_type(private_data,
- struct ctdb_context);
- struct ctdb_vnn *vnn;
-
- for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
- int i;
-
- for (i=0; vnn->ifaces[i] != NULL; i++) {
- if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
- DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
- vnn->ifaces[i],
- ctdb_addr_to_str(&vnn->public_address)));
- }
- }
- }
-
- event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
- timeval_current_ofs(30, 0),
- ctdb_check_interfaces_event, ctdb);
-}
-
-
-int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
-{
- if (ctdb->check_public_ifaces_ctx != NULL) {
- talloc_free(ctdb->check_public_ifaces_ctx);
- ctdb->check_public_ifaces_ctx = NULL;
- }
-
- ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
- if (ctdb->check_public_ifaces_ctx == NULL) {
- ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
- }
-
- event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
- timeval_current_ofs(30, 0),
- ctdb_check_interfaces_event, ctdb);
-
- return 0;
-}
-
-
/*
setup the public address lists from a file
*/
int nlines;
int i;
- lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb);
+ lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
if (lines == NULL) {
ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
return -1;
return 0;
}
+struct public_ip_list {
+ struct public_ip_list *next;
+ uint32_t pnn;
+ ctdb_sock_addr addr;
+};
+
/* Given a physical node, return the number of
public addresses that is currently assigned to this node.
*/
-static int node_ip_coverage(struct ctdb_context *ctdb,
- int32_t pnn,
- struct ctdb_public_ip_list *ips)
+static int node_ip_coverage(struct ctdb_context *ctdb, int32_t pnn,
+ struct public_ip_list *ips)
{
int num=0;
/* Can the given node host the given IP: is the public IP known to the
* node and is NOIPHOST unset?
*/
-static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
+static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
struct ctdb_ipflags ipflags,
- struct ctdb_public_ip_list *ip)
+ struct public_ip_list *ip)
{
struct ctdb_all_public_ips *public_ips;
int i;
return false;
}
-static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
+static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
struct ctdb_ipflags ipflags,
- struct ctdb_public_ip_list *ip)
+ struct public_ip_list *ip)
{
if (ipflags.noiptakeover) {
return false;
pick the node that currently are serving the least number of ips
so that the ips get spread out evenly.
*/
-static int find_takeover_node(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *ip,
- struct ctdb_public_ip_list *all_ips)
+static int find_takeover_node(struct ctdb_context *ctdb,
+ struct ctdb_ipflags *ipflags,
+ struct public_ip_list *ip,
+ struct public_ip_list *all_ips)
{
int pnn, min=0, num;
int i, numnodes;
static void *add_ip_callback(void *parm, void *data)
{
- struct ctdb_public_ip_list *this_ip = parm;
- struct ctdb_public_ip_list *prev_ip = data;
+ struct public_ip_list *this_ip = parm;
+ struct public_ip_list *prev_ip = data;
if (prev_ip == NULL) {
return parm;
static int getips_count_callback(void *param, void *data)
{
- struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
- struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
+ struct public_ip_list **ip_list = (struct public_ip_list **)param;
+ struct public_ip_list *new_ip = (struct public_ip_list *)data;
new_ip->next = *ip_list;
*ip_list = new_ip;
return 0;
}
-static struct ctdb_public_ip_list *
+static struct public_ip_list *
create_merged_ip_list(struct ctdb_context *ctdb)
{
int i, j;
- struct ctdb_public_ip_list *ip_list;
+ struct public_ip_list *ip_list;
struct ctdb_all_public_ips *public_ips;
if (ctdb->ip_tree != NULL) {
}
for (j=0;j<public_ips->num;j++) {
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
- tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
+ tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
/* Do not use information about IP addresses hosted
* on other nodes, it may not be accurate */
used in the main part of the algorithm.
*/
static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
- struct ctdb_public_ip_list *ips,
+ struct public_ip_list *ips,
int pnn)
{
- struct ctdb_public_ip_list *t;
+ struct public_ip_list *t;
uint32_t d;
uint32_t sum = 0;
/* Return the LCP2 imbalance metric for addresses currently assigned
to the given node.
*/
-static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
+static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn)
{
- struct ctdb_public_ip_list *t;
+ struct public_ip_list *t;
uint32_t imbalance = 0;
*/
static void basic_allocate_unassigned(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+ struct public_ip_list *all_ips)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
/* loop over all ip's and find a physical node to cover for
each unassigned ip.
*/
static void basic_failback(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+ struct public_ip_list *all_ips,
int num_ips)
{
int i, numnodes;
int maxnode, maxnum, minnode, minnum, num, retries;
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
numnodes = talloc_array_length(ipflags);
retries = 0;
*/
if ( (maxnum > minnum+1)
&& (retries < (num_ips + 5)) ){
- struct ctdb_public_ip_list *tmp;
+ struct public_ip_list *tmp;
/* Reassign one of maxnode's VNNs */
for (tmp=all_ips;tmp;tmp=tmp->next) {
}
}
-struct ctdb_rebalancenodes {
- struct ctdb_rebalancenodes *next;
- uint32_t pnn;
-};
-static struct ctdb_rebalancenodes *force_rebalance_list = NULL;
-
-
-/* set this flag to force the node to be rebalanced even if it just didnt
- become healthy again.
-*/
-void lcp2_forcerebalance(struct ctdb_context *ctdb, uint32_t pnn)
-{
- struct ctdb_rebalancenodes *rebalance;
-
- for (rebalance = force_rebalance_list; rebalance; rebalance = rebalance->next) {
- if (rebalance->pnn == pnn) {
- return;
- }
- }
-
- rebalance = talloc(ctdb, struct ctdb_rebalancenodes);
- rebalance->pnn = pnn;
- rebalance->next = force_rebalance_list;
- force_rebalance_list = rebalance;
-}
-
-/* Do necessary LCP2 initialisation. Bury it in a function here so
- * that we can unit test it.
- */
static void lcp2_init(struct ctdb_context *tmp_ctx,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+ struct public_ip_list *all_ips,
+ uint32_t *force_rebalance_nodes,
uint32_t **lcp2_imbalances,
bool **rebalance_candidates)
{
int i, numnodes;
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
numnodes = talloc_array_length(ipflags);
/* 3rd step: if a node is forced to re-balance then
we allow failback onto the node */
- while (force_rebalance_list != NULL) {
- struct ctdb_rebalancenodes *next = force_rebalance_list->next;
-
- if (force_rebalance_list->pnn <= numnodes) {
- (*rebalance_candidates)[force_rebalance_list->pnn] = true;
+ if (force_rebalance_nodes == NULL) {
+ return;
+ }
+ for (i = 0; i < talloc_array_length(force_rebalance_nodes); i++) {
+ uint32_t pnn = force_rebalance_nodes[i];
+ if (pnn >= numnodes) {
+ DEBUG(DEBUG_ERR,
+ (__location__ "unknown node %u\n", pnn));
+ continue;
}
- DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
- talloc_free(force_rebalance_list);
- force_rebalance_list = next;
+ DEBUG(DEBUG_NOTICE,
+ ("Forcing rebalancing of IPs to node %u\n", pnn));
+ (*rebalance_candidates)[pnn] = true;
}
}
*/
static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+ struct public_ip_list *all_ips,
uint32_t *lcp2_imbalances)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
int dstnode, numnodes;
int minnode;
uint32_t mindsum, dstdsum, dstimbl, minimbl;
- struct ctdb_public_ip_list *minip;
+ struct public_ip_list *minip;
bool should_loop = true;
bool have_unassigned = true;
*/
static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+ struct public_ip_list *all_ips,
int srcnode,
- uint32_t candimbl,
uint32_t *lcp2_imbalances,
bool *rebalance_candidates)
{
int dstnode, mindstnode, numnodes;
uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
uint32_t minsrcimbl, mindstimbl;
- struct ctdb_public_ip_list *minip;
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *minip;
+ struct public_ip_list *tmp_ip;
/* Find an IP and destination node that best reduces imbalance. */
+ srcimbl = 0;
minip = NULL;
minsrcimbl = 0;
mindstnode = -1;
numnodes = talloc_array_length(ipflags);
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
- DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n",
+ srcnode, lcp2_imbalances[srcnode]));
for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
/* Only consider addresses on srcnode. */
/* What is this IP address costing the source node? */
srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
- srcimbl = candimbl - srcdsum;
+ srcimbl = lcp2_imbalances[srcnode] - srcdsum;
/* Consider this IP address would cost each potential
* destination node. Destination nodes are limited to
dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
- srcnode, srcimbl - lcp2_imbalances[srcnode],
+ srcnode, -srcdsum,
ctdb_addr_to_str(&(tmp_ip->addr)),
- dstnode, dstimbl - lcp2_imbalances[dstnode]));
+ dstnode, dstdsum));
- if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
+ if ((dstimbl < lcp2_imbalances[srcnode]) &&
+ (dstdsum < srcdsum) && \
((mindstnode == -1) || \
((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
- lcp2_imbalances[srcnode] = srcimbl;
+ lcp2_imbalances[srcnode] = minsrcimbl;
lcp2_imbalances[mindstnode] = mindstimbl;
minip->pnn = mindstnode;
*/
static void lcp2_failback(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+ struct public_ip_list *all_ips,
uint32_t *lcp2_imbalances,
bool *rebalance_candidates)
{
- int i, num_rebalance_candidates, numnodes;
+ int i, numnodes;
struct lcp2_imbalance_pnn * lips;
bool again;
numnodes = talloc_array_length(ipflags);
try_again:
-
- /* It is only worth continuing if we have suitable target
- * nodes to transfer IPs to. This check is much cheaper than
- * continuing on...
- */
- num_rebalance_candidates = 0;
- for (i=0; i<numnodes; i++) {
- if (rebalance_candidates[i]) {
- num_rebalance_candidates++;
- }
- }
- if (num_rebalance_candidates == 0) {
- return;
- }
-
/* Put the imbalances and nodes into an array, sort them and
* iterate through candidates. Usually the 1st one will be
* used, so this doesn't cost much...
*/
+ DEBUG(DEBUG_DEBUG,("+++++++++++++++++++++++++++++++++++++++++\n"));
+ DEBUG(DEBUG_DEBUG,("Selecting most imbalanced node from:\n"));
lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, numnodes);
for (i=0; i<numnodes; i++) {
lips[i].imbalance = lcp2_imbalances[i];
lips[i].pnn = i;
+ DEBUG(DEBUG_DEBUG,(" %d [%d]\n", i, lcp2_imbalances[i]));
}
qsort(lips, numnodes, sizeof(struct lcp2_imbalance_pnn),
lcp2_cmp_imbalance_pnn);
ipflags,
all_ips,
lips[i].pnn,
- lips[i].imbalance,
lcp2_imbalances,
rebalance_candidates)) {
again = true;
static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+ struct public_ip_list *all_ips)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
/* verify that the assigned nodes can serve that public ip
and set it to -1 if not
static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+ struct public_ip_list *all_ips)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
int i, numnodes;
numnodes = talloc_array_length(ipflags);
static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+ struct public_ip_list *all_ips)
{
/* This should be pushed down into basic_failback. */
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
int num_ips = 0;
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
num_ips++;
static void ip_alloc_lcp2(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+ struct public_ip_list *all_ips,
+ uint32_t *force_rebalance_nodes)
{
uint32_t *lcp2_imbalances;
bool *rebalance_candidates;
+ int numnodes, num_rebalance_candidates, i;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
unassign_unsuitable_ips(ctdb, ipflags, all_ips);
- lcp2_init(tmp_ctx, ipflags, all_ips,
+ lcp2_init(tmp_ctx, ipflags, all_ips,force_rebalance_nodes,
&lcp2_imbalances, &rebalance_candidates);
lcp2_allocate_unassigned(ctdb, ipflags, all_ips, lcp2_imbalances);
goto finished;
}
+ /* It is only worth continuing if we have suitable target
+ * nodes to transfer IPs to. This check is much cheaper than
+ * continuing on...
+ */
+ numnodes = talloc_array_length(ipflags);
+ num_rebalance_candidates = 0;
+ for (i=0; i<numnodes; i++) {
+ if (rebalance_candidates[i]) {
+ num_rebalance_candidates++;
+ }
+ }
+ if (num_rebalance_candidates == 0) {
+ goto finished;
+ }
+
/* Now, try to make sure the ip adresses are evenly distributed
across the nodes.
*/
talloc_free(tmp_ctx);
}
-static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
+static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
{
- int i, num_healthy;
+ int i;
- /* Count how many completely healthy nodes we have */
- num_healthy = 0;
for (i=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
- num_healthy++;
+ /* Found one completely healthy node */
+ return false;
}
}
- return num_healthy == 0;
+ return true;
}
/* The calculation part of the IP allocation algorithm. */
static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list **all_ips_p)
+ struct public_ip_list **all_ips_p,
+ uint32_t *force_rebalance_nodes)
{
/* since nodes only know about those public addresses that
can be served by that particular node, no single node has
*all_ips_p = create_merged_ip_list(ctdb);
if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
- ip_alloc_lcp2(ctdb, ipflags, *all_ips_p);
+ ip_alloc_lcp2(ctdb, ipflags, *all_ips_p, force_rebalance_nodes);
} else if (1 == ctdb->tunable.deterministic_public_ips) {
ip_alloc_deterministic_ips(ctdb, ipflags, *all_ips_p);
} else {
struct get_tunable_callback_data {
const char *tunable;
uint32_t *out;
+ bool fatal;
};
static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
int size;
if (res != 0) {
- DEBUG(DEBUG_ERR,
- ("Failure to read \"%s\" tunable from remote node %d\n",
- cd->tunable, pnn));
+ /* Already handled in fail callback */
return;
}
DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
cd->tunable, pnn, (int)sizeof(uint32_t),
(int)outdata.dsize));
+ cd->fatal = true;
return;
}
cd->out[pnn] = *(uint32_t *)outdata.dptr;
}
+static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+ int32_t res, TDB_DATA outdata,
+ void *callback)
+{
+ struct get_tunable_callback_data *cd =
+ (struct get_tunable_callback_data *)callback;
+
+ switch (res) {
+ case -ETIME:
+ DEBUG(DEBUG_ERR,
+ ("Timed out getting tunable \"%s\" from node %d\n",
+ cd->tunable, pnn));
+ cd->fatal = true;
+ break;
+ case -EINVAL:
+ case -1:
+ DEBUG(DEBUG_WARNING,
+ ("Tunable \"%s\" not implemented on node %d\n",
+ cd->tunable, pnn));
+ break;
+ default:
+ DEBUG(DEBUG_ERR,
+ ("Unexpected error getting tunable \"%s\" from node %d\n",
+ cd->tunable, pnn));
+ cd->fatal = true;
+ }
+}
+
static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap,
+ struct ctdb_node_map_old *nodemap,
const char *tunable,
uint32_t default_value)
{
callback_data.out = tvals;
callback_data.tunable = tunable;
+ callback_data.fatal = false;
data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
data.dptr = talloc_size(tmp_ctx, data.dsize);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
nodes, 0, TAKEOVER_TIMEOUT(),
false, data,
- get_tunable_callback, NULL,
+ get_tunable_callback,
+ get_tunable_fail_callback,
&callback_data) != 0) {
- DEBUG(DEBUG_ERR, (__location__ " ctdb_control to get %s tunable failed\n", tunable));
+ if (callback_data.fatal) {
+ talloc_free(tvals);
+ tvals = NULL;
+ }
}
talloc_free(nodes);
talloc_free(data.dptr);
return tvals;
}
+struct get_runstate_callback_data {
+ enum ctdb_runstate *out;
+ bool fatal;
+};
+
+static void get_runstate_callback(struct ctdb_context *ctdb, uint32_t pnn,
+ int32_t res, TDB_DATA outdata,
+ void *callback_data)
+{
+ struct get_runstate_callback_data *cd =
+ (struct get_runstate_callback_data *)callback_data;
+ int size;
+
+ if (res != 0) {
+ /* Already handled in fail callback */
+ return;
+ }
+
+ if (outdata.dsize != sizeof(uint32_t)) {
+ DEBUG(DEBUG_ERR,("Wrong size of returned data when getting runstate from node %d. Expected %d bytes but received %d bytes\n",
+ pnn, (int)sizeof(uint32_t),
+ (int)outdata.dsize));
+ cd->fatal = true;
+ return;
+ }
+
+ size = talloc_array_length(cd->out);
+ if (pnn >= size) {
+ DEBUG(DEBUG_ERR,("Got reply from node %d but nodemap only has %d entries\n",
+ pnn, size));
+ return;
+ }
+
+ cd->out[pnn] = (enum ctdb_runstate)*(uint32_t *)outdata.dptr;
+}
+
+static void get_runstate_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+ int32_t res, TDB_DATA outdata,
+ void *callback)
+{
+ struct get_runstate_callback_data *cd =
+ (struct get_runstate_callback_data *)callback;
+
+ switch (res) {
+ case -ETIME:
+ DEBUG(DEBUG_ERR,
+ ("Timed out getting runstate from node %d\n", pnn));
+ cd->fatal = true;
+ break;
+ default:
+ DEBUG(DEBUG_WARNING,
+ ("Error getting runstate from node %d - assuming runstates not supported\n",
+ pnn));
+ }
+}
+
+static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb,
+ TALLOC_CTX *tmp_ctx,
+ struct ctdb_node_map_old *nodemap,
+ enum ctdb_runstate default_value)
+{
+ uint32_t *nodes;
+ enum ctdb_runstate *rs;
+ struct get_runstate_callback_data callback_data;
+ int i;
+
+ rs = talloc_array(tmp_ctx, enum ctdb_runstate, nodemap->num);
+ CTDB_NO_MEMORY_NULL(ctdb, rs);
+ for (i=0; i<nodemap->num; i++) {
+ rs[i] = default_value;
+ }
+
+ callback_data.out = rs;
+ callback_data.fatal = false;
+
+ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_RUNSTATE,
+ nodes, 0, TAKEOVER_TIMEOUT(),
+ true, tdb_null,
+ get_runstate_callback,
+ get_runstate_fail_callback,
+ &callback_data) != 0) {
+ if (callback_data.fatal) {
+ free(rs);
+ rs = NULL;
+ }
+ }
+ talloc_free(nodes);
+
+ return rs;
+}
+
/* Set internal flags for IP allocation:
* Clear ip flags
* Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
static struct ctdb_ipflags *
set_ipflags_internal(struct ctdb_context *ctdb,
TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap,
+ struct ctdb_node_map_old *nodemap,
uint32_t *tval_noiptakeover,
- uint32_t *tval_noiphostonalldisabled)
+ uint32_t *tval_noiphostonalldisabled,
+ enum ctdb_runstate *runstate)
{
int i;
struct ctdb_ipflags *ipflags;
ipflags[i].noiptakeover = true;
}
+ /* Can not host IPs on node not in RUNNING state */
+ if (runstate[i] != CTDB_RUNSTATE_RUNNING) {
+ ipflags[i].noiphost = true;
+ continue;
+ }
/* Can not host IPs on INACTIVE node */
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
ipflags[i].noiphost = true;
}
+ /* Remember the runstate */
+ ipflags[i].runstate = runstate[i];
}
if (all_nodes_are_disabled(nodemap)) {
static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb,
TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap)
+ struct ctdb_node_map_old *nodemap)
{
uint32_t *tval_noiptakeover;
uint32_t *tval_noiphostonalldisabled;
struct ctdb_ipflags *ipflags;
+ enum ctdb_runstate *runstate;
+
tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
"NoIPTakeover", 0);
return NULL;
}
+ /* Any nodes where CTDB_CONTROL_GET_RUNSTATE is not supported
+ * will default to CTDB_RUNSTATE_RUNNING. This ensures
+ * reasonable behaviour on a mixed cluster during upgrade.
+ */
+ runstate = get_runstate_from_nodes(ctdb, tmp_ctx, nodemap,
+ CTDB_RUNSTATE_RUNNING);
+ if (runstate == NULL) {
+ /* Caller frees tmp_ctx */
+ return NULL;
+ }
+
ipflags = set_ipflags_internal(ctdb, tmp_ctx, nodemap,
tval_noiptakeover,
- tval_noiphostonalldisabled);
+ tval_noiphostonalldisabled,
+ runstate);
talloc_free(tval_noiptakeover);
talloc_free(tval_noiphostonalldisabled);
+ talloc_free(runstate);
return ipflags;
}
+struct iprealloc_callback_data {
+ bool *retry_nodes;
+ int retry_count;
+ client_async_callback fail_callback;
+ void *fail_callback_data;
+ struct ctdb_node_map_old *nodemap;
+};
+
+static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+ int32_t res, TDB_DATA outdata,
+ void *callback)
+{
+ int numnodes;
+ struct iprealloc_callback_data *cd =
+ (struct iprealloc_callback_data *)callback;
+
+ numnodes = talloc_array_length(cd->retry_nodes);
+ if (pnn > numnodes) {
+ DEBUG(DEBUG_ERR,
+ ("ipreallocated failure from node %d, "
+ "but only %d nodes in nodemap\n",
+ pnn, numnodes));
+ return;
+ }
+
+ /* Can't run the "ipreallocated" event on a INACTIVE node */
+ if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
+ DEBUG(DEBUG_WARNING,
+ ("ipreallocated failed on inactive node %d, ignoring\n",
+ pnn));
+ return;
+ }
+
+ switch (res) {
+ case -ETIME:
+ /* If the control timed out then that's a real error,
+ * so call the real fail callback
+ */
+ if (cd->fail_callback) {
+ cd->fail_callback(ctdb, pnn, res, outdata,
+ cd->fail_callback_data);
+ } else {
+ DEBUG(DEBUG_WARNING,
+ ("iprealloc timed out but no callback registered\n"));
+ }
+ break;
+ default:
+ /* If not a timeout then either the ipreallocated
+ * eventscript (or some setup) failed. This might
+ * have failed because the IPREALLOCATED control isn't
+ * implemented - right now there is no way of knowing
+ * because the error codes are all folded down to -1.
+ * Consider retrying using EVENTSCRIPT control...
+ */
+ DEBUG(DEBUG_WARNING,
+ ("ipreallocated failure from node %d, flagging retry\n",
+ pnn));
+ cd->retry_nodes[pnn] = true;
+ cd->retry_count++;
+ }
+}
+
+struct takeover_callback_data {
+ bool *node_failed;
+ client_async_callback fail_callback;
+ void *fail_callback_data;
+ struct ctdb_node_map_old *nodemap;
+};
+
+static void takeover_run_fail_callback(struct ctdb_context *ctdb,
+ uint32_t node_pnn, int32_t res,
+ TDB_DATA outdata, void *callback_data)
+{
+ struct takeover_callback_data *cd =
+ talloc_get_type_abort(callback_data,
+ struct takeover_callback_data);
+ int i;
+
+ for (i = 0; i < cd->nodemap->num; i++) {
+ if (node_pnn == cd->nodemap->nodes[i].pnn) {
+ break;
+ }
+ }
+
+ if (i == cd->nodemap->num) {
+ DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
+ return;
+ }
+
+ if (!cd->node_failed[i]) {
+ cd->node_failed[i] = true;
+ cd->fail_callback(ctdb, node_pnn, res, outdata,
+ cd->fail_callback_data);
+ }
+}
+
/*
make any IP alias changes for public addresses that are necessary
*/
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
+int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
+ uint32_t *force_rebalance_nodes,
client_async_callback fail_callback, void *callback_data)
{
- int i;
+ int i, j, ret;
struct ctdb_public_ip ip;
- struct ctdb_public_ipv4 ipv4;
uint32_t *nodes;
- struct ctdb_public_ip_list *all_ips, *tmp_ip;
+ struct public_ip_list *all_ips, *tmp_ip;
TDB_DATA data;
struct timeval timeout;
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- uint32_t disable_timeout;
struct ctdb_ipflags *ipflags;
+ struct takeover_callback_data *takeover_data;
+ struct iprealloc_callback_data iprealloc_data;
+ bool *retry_data;
+ bool can_host_ips;
/*
* ip failover is completely disabled, just send out the
return -1;
}
- ZERO_STRUCT(ip);
+ /* Short-circuit IP allocation if no nodes are in the RUNNING
+ * runstate yet, since no nodes will be able to host IPs */
+ can_host_ips = false;
+ for (i=0; i<nodemap->num; i++) {
+ if (ipflags[i].runstate == CTDB_RUNSTATE_RUNNING) {
+ can_host_ips = true;
+ }
+ }
+ if (!can_host_ips) {
+ DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
+ return 0;
+ }
/* Do the IP reassignment calculations */
- ctdb_takeover_run_core(ctdb, ipflags, &all_ips);
+ ctdb_takeover_run_core(ctdb, ipflags, &all_ips, force_rebalance_nodes);
- /* The IP flags need to be cleared because they should never
- * be seen outside the IP allocation code.
+ /* Now tell all nodes to release any public IPs should not
+ * host. This will be a NOOP on nodes that don't currently
+ * hold the given IP.
*/
+ takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
+ CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
- /* The recovery daemon does regular sanity checks of the IPs.
- * However, sometimes it is overzealous and thinks changes are
- * required when they're already underway. This stops the
- * checks for a while before we start moving IPs.
- */
- disable_timeout = ctdb->tunable.takeover_timeout;
- data.dptr = (uint8_t*)&disable_timeout;
- data.dsize = sizeof(disable_timeout);
- if (ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
- CTDB_SRVID_DISABLE_IP_CHECK, data) != 0) {
- DEBUG(DEBUG_INFO,("Failed to disable ip verification\n"));
- }
+ takeover_data->node_failed = talloc_zero_array(tmp_ctx,
+ bool, nodemap->num);
+ CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
+ takeover_data->fail_callback = fail_callback;
+ takeover_data->fail_callback_data = callback_data;
+ takeover_data->nodemap = nodemap;
- /* now tell all nodes to delete any alias that they should not
- have. This will be a NOOP on nodes that don't currently
- hold the given alias */
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
- async_data->fail_callback = fail_callback;
- async_data->callback_data = callback_data;
+ async_data->fail_callback = takeover_run_fail_callback;
+ async_data->callback_data = takeover_data;
+ ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
+
+ /* Send a RELEASE_IP to all nodes that should not be hosting
+ * each IP. For each IP, all but one of these will be
+ * redundant. However, the redundant ones are used to tell
+ * nodes which node should be hosting the IP so that commands
+ * like "ctdb ip" can display a particular nodes idea of who
+ * is hosting what. */
for (i=0;i<nodemap->num;i++) {
/* don't talk to unconnected nodes, but do talk to banned nodes */
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
*/
continue;
}
- if (tmp_ip->addr.sa.sa_family == AF_INET) {
- ipv4.pnn = tmp_ip->pnn;
- ipv4.sin = tmp_ip->addr.ip;
-
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ipv4);
- data.dptr = (uint8_t *)&ipv4;
- state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
- 0, CTDB_CONTROL_RELEASE_IPv4, 0,
- data, async_data,
- &timeout, NULL);
- } else {
- ip.pnn = tmp_ip->pnn;
- ip.addr = tmp_ip->addr;
-
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ip);
- data.dptr = (uint8_t *)&ip;
- state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
- 0, CTDB_CONTROL_RELEASE_IP, 0,
- data, async_data,
- &timeout, NULL);
- }
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
+ timeout = TAKEOVER_TIMEOUT();
+ data.dsize = sizeof(ip);
+ data.dptr = (uint8_t *)&ip;
+ state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
+ 0, CTDB_CONTROL_RELEASE_IP, 0,
+ data, async_data,
+ &timeout, NULL);
if (state == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
talloc_free(tmp_ctx);
return -1;
}
-
+
ctdb_client_async_add(async_data, state);
}
}
talloc_free(async_data);
- /* tell all nodes to get their own IPs */
+ /* For each IP, send a TAKOVER_IP to the node that should be
+ * hosting it. Many of these will often be redundant (since
+ * the allocation won't have changed) but they can be useful
+ * to recover from inconsistencies. */
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
continue;
}
- if (tmp_ip->addr.sa.sa_family == AF_INET) {
- ipv4.pnn = tmp_ip->pnn;
- ipv4.sin = tmp_ip->addr.ip;
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ipv4);
- data.dptr = (uint8_t *)&ipv4;
- state = ctdb_control_send(ctdb, tmp_ip->pnn,
- 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
- data, async_data,
- &timeout, NULL);
- } else {
- ip.pnn = tmp_ip->pnn;
- ip.addr = tmp_ip->addr;
-
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ip);
- data.dptr = (uint8_t *)&ip;
- state = ctdb_control_send(ctdb, tmp_ip->pnn,
- 0, CTDB_CONTROL_TAKEOVER_IP, 0,
- data, async_data,
- &timeout, NULL);
- }
+ timeout = TAKEOVER_TIMEOUT();
+ data.dsize = sizeof(ip);
+ data.dptr = (uint8_t *)&ip;
+ state = ctdb_control_send(ctdb, tmp_ip->pnn,
+ 0, CTDB_CONTROL_TAKEOVER_IP, 0,
+ data, async_data, &timeout, NULL);
if (state == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
talloc_free(tmp_ctx);
return -1;
}
-
+
ctdb_client_async_add(async_data, state);
}
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
}
ipreallocated:
- /*
+ /*
* Tell all nodes to run eventscripts to process the
* "ipreallocated" event. This can do a lot of things,
* including restarting services to reconfigure them if public
* IPs have moved. Once upon a time this event only used to
- * update natwg.
+ * update natgw.
*/
+ retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
+ CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
+ iprealloc_data.retry_nodes = retry_data;
+ iprealloc_data.retry_count = 0;
+ iprealloc_data.fail_callback = fail_callback;
+ iprealloc_data.fail_callback_data = callback_data;
+ iprealloc_data.nodemap = nodemap;
+
nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
- if (ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
- nodes, 0, TAKEOVER_TIMEOUT(),
- false, tdb_null,
- NULL, fail_callback,
- callback_data) != 0) {
- DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
+ ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
+ nodes, 0, TAKEOVER_TIMEOUT(),
+ false, tdb_null,
+ NULL, iprealloc_fail_callback,
+ &iprealloc_data);
+ if (ret != 0) {
+ /* If the control failed then we should retry to any
+ * nodes flagged by iprealloc_fail_callback using the
+ * EVENTSCRIPT control. This is a best-effort at
+ * backward compatiblity when running a mixed cluster
+ * where some nodes have not yet been upgraded to
+ * support the IPREALLOCATED control.
+ */
+ DEBUG(DEBUG_WARNING,
+ ("Retry ipreallocated to some nodes using eventscript control\n"));
+
+ nodes = talloc_array(tmp_ctx, uint32_t,
+ iprealloc_data.retry_count);
+ CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+ j = 0;
+ for (i=0; i<nodemap->num; i++) {
+ if (iprealloc_data.retry_nodes[i]) {
+ nodes[j] = i;
+ j++;
+ }
+ }
+
+ data.dptr = discard_const("ipreallocated");
+ data.dsize = strlen((char *)data.dptr) + 1;
+ ret = ctdb_client_async_control(ctdb,
+ CTDB_CONTROL_RUN_EVENTSCRIPTS,
+ nodes, 0, TAKEOVER_TIMEOUT(),
+ false, data,
+ NULL, fail_callback,
+ callback_data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
+ }
}
talloc_free(tmp_ctx);
- return 0;
+ return ret;
}
/*
called by a client to inform us of a TCP connection that it is managing
that should tickled with an ACK when IP takeover is done
- we handle both the old ipv4 style of packets as well as the new ipv4/6
- pdus.
*/
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
TDB_DATA indata)
{
- struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
- struct ctdb_control_tcp *old_addr = NULL;
- struct ctdb_control_tcp_addr new_addr;
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
struct ctdb_control_tcp_addr *tcp_sock = NULL;
struct ctdb_tcp_list *tcp;
struct ctdb_tcp_connection t;
struct ctdb_vnn *vnn;
ctdb_sock_addr addr;
- switch (indata.dsize) {
- case sizeof(struct ctdb_control_tcp):
- old_addr = (struct ctdb_control_tcp *)indata.dptr;
- ZERO_STRUCT(new_addr);
- tcp_sock = &new_addr;
- tcp_sock->src.ip = old_addr->src;
- tcp_sock->dest.ip = old_addr->dest;
- break;
- case sizeof(struct ctdb_control_tcp_addr):
- tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
- break;
- default:
- DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
- "to ctdb_control_tcp_client. size was %d but "
- "only allowed sizes are %lu and %lu\n",
- (int)indata.dsize,
- (long unsigned)sizeof(struct ctdb_control_tcp),
- (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
- return -1;
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
}
+ tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
+
addr = tcp_sock->src;
ctdb_canonicalize_ip(&addr, &tcp_sock->src);
addr = tcp_sock->dest;
struct ctdb_tcp_connection tcp;
struct ctdb_vnn *vnn;
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
if (vnn == NULL) {
DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
/* If this is the first tickle */
if (tcparray == NULL) {
- tcparray = talloc_size(ctdb->nodes,
- offsetof(struct ctdb_tcp_array, connections) +
- sizeof(struct ctdb_tcp_connection) * 1);
+ tcparray = talloc(vnn, struct ctdb_tcp_array);
CTDB_NO_MEMORY(ctdb, tcparray);
vnn->tcp_array = tcparray;
/* Do we already have this tickle ?*/
tcp.src_addr = p->src_addr;
tcp.dst_addr = p->dst_addr;
- if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
+ if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
ctdb_addr_to_str(&tcp.dst_addr),
ntohs(tcp.dst_addr.ip.sin_port),
tcparray->num+1);
CTDB_NO_MEMORY(ctdb, tcparray->connections);
- vnn->tcp_array = tcparray;
tcparray->connections[tcparray->num].src_addr = p->src_addr;
tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
tcparray->num++;
-
+
DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
ctdb_addr_to_str(&tcp.dst_addr),
ntohs(tcp.dst_addr.ip.sin_port),
{
struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
ctdb_remove_tcp_connection(ctdb, conn);
return 0;
/*
- called when a daemon restarts - send all tickes for all public addresses
- we are serving immediately to the new node.
+ Called when another daemon starts - causes all tickles for all
+ public addresses we are serving to be sent to the new node on the
+ next check. This actually causes the next scheduled call to
+ tdb_update_tcp_tickles() to update all nodes. This is simple and
+ doesn't require careful error handling.
*/
-int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
+int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
{
-/*XXX here we should send all tickes we are serving to the new node */
+ struct ctdb_vnn *vnn;
+
+ DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
+ (unsigned long) pnn));
+
+ for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
+ vnn->tcp_update_needed = true;
+ }
+
return 0;
}
}
-/*
- release all IPs on shutdown
- */
void ctdb_release_all_ips(struct ctdb_context *ctdb)
{
struct ctdb_vnn *vnn;
+ int count = 0;
+
+ if (ctdb->tunable.disable_ip_failover == 1) {
+ return;
+ }
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (!ctdb_sys_have_ip(&vnn->public_address)) {
if (!vnn->iface) {
continue;
}
+
+ /* Don't allow multiple releases at once. Some code,
+ * particularly ctdb_tickle_sentenced_connections() is
+ * not re-entrant */
+ if (vnn->update_in_flight) {
+ DEBUG(DEBUG_WARNING,
+ (__location__
+ " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+ continue;
+ }
+ vnn->update_in_flight = true;
+
+ DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+
ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
ctdb_vnn_iface_string(vnn),
ctdb_addr_to_str(&vnn->public_address),
vnn->public_netmask_bits);
release_kill_clients(ctdb, &vnn->public_address);
ctdb_vnn_unassign_iface(ctdb, vnn);
+ vnn->update_in_flight = false;
+ count++;
}
+
+ DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
}
get list of public IPs
*/
int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
- struct ctdb_req_control *c, TDB_DATA *outdata)
+ struct ctdb_req_control_old *c, TDB_DATA *outdata)
{
int i, num, len;
struct ctdb_all_public_ips *ips;
}
-/*
- get list of public IPs, old ipv4 style. only returns ipv4 addresses
- */
-int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
- struct ctdb_req_control *c, TDB_DATA *outdata)
-{
- int i, num, len;
- struct ctdb_all_public_ipsv4 *ips;
- struct ctdb_vnn *vnn;
-
- /* count how many public ip structures we have */
- num = 0;
- for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
- if (vnn->public_address.sa.sa_family != AF_INET) {
- continue;
- }
- num++;
- }
-
- len = offsetof(struct ctdb_all_public_ipsv4, ips) +
- num*sizeof(struct ctdb_public_ipv4);
- ips = talloc_zero_size(outdata, len);
- CTDB_NO_MEMORY(ctdb, ips);
-
- outdata->dsize = len;
- outdata->dptr = (uint8_t *)ips;
-
- ips->num = num;
- i = 0;
- for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
- if (vnn->public_address.sa.sa_family != AF_INET) {
- continue;
- }
- ips->ips[i].pnn = vnn->pnn;
- ips->ips[i].sin = vnn->public_address.ip;
- i++;
- }
-
- return 0;
-}
-
int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata,
TDB_DATA *outdata)
{
if (vnn->iface == cur) {
info->active_idx = i;
}
- strcpy(info->ifaces[i].name, cur->name);
+ strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1);
info->ifaces[i].link_state = cur->link_up;
info->ifaces[i].references = cur->references;
}
}
int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA *outdata)
{
int i, num, len;
}
int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata)
{
struct ctdb_control_iface_info *info;
struct ctdb_vnn *vnn;
struct ctdb_context *ctdb;
int capture_fd;
- struct fd_event *fde;
+ struct tevent_fd *fde;
trbt_tree_t *connections;
void *private_data;
};
/*
called when we get a read event on the raw socket
*/
-static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
+static void capture_tcp_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
uint16_t flags, void *private_data)
{
struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
ctdb_sock_addr src, dst;
uint32_t ack_seq, seq;
- if (!(flags & EVENT_FD_READ)) {
+ if (!(flags & TEVENT_FD_READ)) {
return;
}
/*
called every second until all sentenced connections have been reset
*/
-static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
+static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
+ struct tevent_timer *te,
struct timeval t, void *private_data)
{
struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
/* try tickling them again in a seconds time
*/
- event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
- ctdb_tickle_sentenced_connections, killtcp);
+ tevent_add_timer(killtcp->ctdb->ev, killtcp,
+ timeval_current_ofs(1, 0),
+ ctdb_tickle_sentenced_connections, killtcp);
}
/*
if (killtcp->fde == NULL) {
- killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
- EVENT_FD_READ,
- capture_tcp_handler, killtcp);
+ killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
+ killtcp->capture_fd,
+ TEVENT_FD_READ,
+ capture_tcp_handler, killtcp);
tevent_fd_set_auto_close(killtcp->fde);
/* We also need to set up some events to tickle all these connections
until they are all reset
*/
- event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
- ctdb_tickle_sentenced_connections, killtcp);
+ tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
+ ctdb_tickle_sentenced_connections, killtcp);
}
/* tickle him once now */
*/
int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
+ struct ctdb_tcp_connection *killtcp = (struct ctdb_tcp_connection *)indata.dptr;
return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
}
* list->tickles.num) {
DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
return -1;
- }
+ }
+
+ DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
+ ctdb_addr_to_str(&list->addr)));
vnn = find_public_ip_vnn(ctdb, &list->addr);
if (vnn == NULL) {
- DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
+ DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
ctdb_addr_to_str(&list->addr)));
return 1;
talloc_free(vnn->tcp_array);
vnn->tcp_array = NULL;
- tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
+ tcparray = talloc(vnn, struct ctdb_tcp_array);
CTDB_NO_MEMORY(ctdb, tcparray);
tcparray->num = list->tickles.num;
tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
CTDB_NO_MEMORY(ctdb, tcparray->connections);
- memcpy(tcparray->connections, &list->tickles.connections[0],
+ memcpy(tcparray->connections, &list->tickles.connections[0],
sizeof(struct ctdb_tcp_connection)*tcparray->num);
/* We now have a new fresh tickle list array for this vnn */
- vnn->tcp_array = talloc_steal(vnn, tcparray);
-
+ vnn->tcp_array = tcparray;
+
return 0;
}
/*
set the list of all tcp tickles for a public address
*/
-static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
- struct timeval timeout, uint32_t destnode,
- ctdb_sock_addr *addr,
- struct ctdb_tcp_array *tcparray)
+static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
+ ctdb_sock_addr *addr,
+ struct ctdb_tcp_array *tcparray)
{
int ret, num;
TDB_DATA data;
memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
}
- ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
+ ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
CTDB_CONTROL_SET_TCP_TICKLE_LIST,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ret != 0) {
/*
perform tickle updates if required
*/
-static void ctdb_update_tcp_tickles(struct event_context *ev,
- struct timed_event *te,
- struct timeval t, void *private_data)
+static void ctdb_update_tcp_tickles(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int ret;
if (!vnn->tcp_update_needed) {
continue;
}
- ret = ctdb_ctrl_set_tcp_tickles(ctdb,
- TAKEOVER_TIMEOUT(),
- CTDB_BROADCAST_CONNECTED,
- &vnn->public_address,
- vnn->tcp_array);
+ ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
+ &vnn->public_address,
+ vnn->tcp_array);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
ctdb_addr_to_str(&vnn->public_address)));
+ } else {
+ DEBUG(DEBUG_INFO,
+ ("Sent tickle update for public address %s\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ vnn->tcp_update_needed = false;
}
}
- event_add_timed(ctdb->ev, ctdb->tickle_update_context,
- timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
- ctdb_update_tcp_tickles, ctdb);
-}
-
+ tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
+}
/*
start periodic update of tcp tickles
{
ctdb->tickle_update_context = talloc_new(ctdb);
- event_add_timed(ctdb->ev, ctdb->tickle_update_context,
- timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
- ctdb_update_tcp_tickles, ctdb);
+ tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
}
/*
send a control_gratuitous arp
*/
-static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+static void send_gratious_arp(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
{
int ret;
struct control_gratious_arp *arp = talloc_get_type(private_data,
return;
}
- event_add_timed(arp->ctdb->ev, arp,
- timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
- send_gratious_arp, arp);
+ tevent_add_timer(arp->ctdb->ev, arp,
+ timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
+ send_gratious_arp, arp);
}
arp->iface = talloc_strdup(arp, gratious_arp->iface);
CTDB_NO_MEMORY(ctdb, arp->iface);
arp->count = 0;
-
- event_add_timed(arp->ctdb->ev, arp,
- timeval_zero(), send_gratious_arp, arp);
+
+ tevent_add_timer(arp->ctdb->ev, arp,
+ timeval_zero(), send_gratious_arp, arp);
return 0;
}
return 0;
}
+struct delete_ip_callback_state {
+ struct ctdb_req_control_old *c;
+};
+
/*
called when releaseip event finishes for del_public_address
*/
-static void delete_ip_callback(struct ctdb_context *ctdb, int status,
- void *private_data)
+static void delete_ip_callback(struct ctdb_context *ctdb,
+ int32_t status, TDB_DATA data,
+ const char *errormsg,
+ void *private_data)
{
+ struct delete_ip_callback_state *state =
+ talloc_get_type(private_data, struct delete_ip_callback_state);
+
+ /* If release failed then fail. */
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
talloc_free(private_data);
}
-int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
+int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, bool *async_reply)
{
struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
struct ctdb_vnn *vnn;
- int ret;
/* verify the size of indata */
if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
/* walk over all public addresses until we find a match */
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
- TALLOC_CTX *mem_ctx = talloc_new(ctdb);
-
- DLIST_REMOVE(ctdb->vnn, vnn);
- talloc_steal(mem_ctx, vnn);
- ctdb_remove_orphaned_ifaces(ctdb, vnn, mem_ctx);
- if (vnn->pnn != ctdb->pnn) {
- if (vnn->iface != NULL) {
- ctdb_vnn_unassign_iface(ctdb, vnn);
+ if (vnn->pnn == ctdb->pnn) {
+ struct delete_ip_callback_state *state;
+ struct ctdb_public_ip *ip;
+ TDB_DATA data;
+ int ret;
+
+ vnn->delete_pending = true;
+
+ state = talloc(ctdb,
+ struct delete_ip_callback_state);
+ CTDB_NO_MEMORY(ctdb, state);
+ state->c = c;
+
+ ip = talloc(state, struct ctdb_public_ip);
+ if (ip == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Out of memory\n"));
+ talloc_free(state);
+ return -1;
+ }
+ ip->pnn = -1;
+ ip->addr = pub->addr;
+
+ data.dsize = sizeof(struct ctdb_public_ip);
+ data.dptr = (unsigned char *)ip;
+
+ ret = ctdb_daemon_send_control(ctdb,
+ ctdb_get_pnn(ctdb),
+ 0,
+ CTDB_CONTROL_RELEASE_IP,
+ 0, 0,
+ data,
+ delete_ip_callback,
+ state);
+ if (ret == -1) {
+ DEBUG(DEBUG_ERR,
+ (__location__ "Unable to send "
+ "CTDB_CONTROL_RELEASE_IP\n"));
+ talloc_free(state);
+ return -1;
}
- talloc_free(mem_ctx);
- return 0;
- }
- vnn->pnn = -1;
- ret = ctdb_event_script_callback(ctdb,
- mem_ctx, delete_ip_callback, mem_ctx,
- false,
- CTDB_EVENT_RELEASE_IP,
- "%s %s %u",
- ctdb_vnn_iface_string(vnn),
- ctdb_addr_to_str(&vnn->public_address),
- vnn->public_netmask_bits);
- if (vnn->iface != NULL) {
- ctdb_vnn_unassign_iface(ctdb, vnn);
- }
- if (ret != 0) {
- return -1;
+ state->c = talloc_steal(state, c);
+ *async_reply = true;
+ } else {
+ /* This IP is not hosted on the
+ * current node so just delete it
+ * now. */
+ do_delete_ip(ctdb, vnn);
}
+
return 0;
}
}
+ DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
+ ctdb_addr_to_str(&pub->addr)));
return -1;
}
struct ipreallocated_callback_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
};
static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
/* A control to run the ipreallocated event */
int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
bool *async_reply)
{
int ret;
ret = ctdb_event_script_callback(ctdb, state,
ctdb_ipreallocated_callback, state,
- false, CTDB_EVENT_IPREALLOCATED,
+ CTDB_EVENT_IPREALLOCATED,
"%s", "");
if (ret != 0) {
node has the expected ip allocation.
This is verified against ctdb->ip_tree
*/
-int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
+int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+ struct ctdb_all_public_ips *ips,
+ uint32_t pnn)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
int i;
if (ctdb->ip_tree == NULL) {
for (i=0; i<ips->num; i++) {
tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
if (tmp_ip == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+ DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
return -1;
}
}
if (tmp_ip->pnn != ips->ips[i].pnn) {
- DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
+ DEBUG(DEBUG_ERR,
+ ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
+ pnn,
+ ctdb_addr_to_str(&ips->ips[i].addr),
+ ips->ips[i].pnn, tmp_ip->pnn));
return -1;
}
}
int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
+
+ /* IP tree is never built if DisableIPFailover is set */
+ if (ctdb->tunable.disable_ip_failover != 0) {
+ return 0;
+ }
if (ctdb->ip_tree == NULL) {
DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
return 0;
}
+void clear_ip_assignment_tree(struct ctdb_context *ctdb)
+{
+ TALLOC_FREE(ctdb->ip_tree);
+}
struct ctdb_reloadips_handle {
struct ctdb_context *ctdb;
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
int status;
int fd[2];
pid_t child;
- struct fd_event *fde;
+ struct tevent_fd *fde;
};
static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
return 0;
}
-static void ctdb_reloadips_timeout_event(struct event_context *ev,
- struct timed_event *te,
- struct timeval t, void *private_data)
+static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
{
struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
talloc_free(h);
-}
+}
-static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde,
- uint16_t flags, void *private_data)
+static void ctdb_reloadips_child_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
{
struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
char res;
int ret;
- ret = read(h->fd[0], &res, 1);
+ ret = sys_read(h->fd[0], &res, 1);
if (ret < 1 || res != 0) {
DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
res = 1;
TALLOC_CTX *mem_ctx = talloc_new(NULL);
struct ctdb_all_public_ips *ips;
struct ctdb_vnn *vnn;
+ struct client_async_data *async_data;
+ struct timeval timeout;
+ TDB_DATA data;
+ struct ctdb_client_control_state *state;
+ bool first_add;
int i, ret;
- /* read the ip allocation from the local node */
- ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
+ CTDB_NO_MEMORY(ctdb, mem_ctx);
+
+ /* Read IPs from local node */
+ ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
+ CTDB_CURRENT_NODE, mem_ctx, &ips);
if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get public ips from local node\n"));
+ DEBUG(DEBUG_ERR,
+ ("Unable to fetch public IPs from local node\n"));
talloc_free(mem_ctx);
return -1;
}
- /* re-read the public ips file */
+ /* Read IPs file - this is safe since this is a child process */
ctdb->vnn = NULL;
if (ctdb_set_public_addresses(ctdb, false) != 0) {
DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
talloc_free(mem_ctx);
return -1;
- }
+ }
+ async_data = talloc_zero(mem_ctx, struct client_async_data);
+ CTDB_NO_MEMORY(ctdb, async_data);
- /* check the previous list of ips and scan for ips that have been
- dropped.
- */
+ /* Compare IPs between node and file for IPs to be deleted */
for (i = 0; i < ips->num; i++) {
+ /* */
for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
- if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
+ if (ctdb_same_ip(&vnn->public_address,
+ &ips->ips[i].addr)) {
+ /* IP is still in file */
break;
}
}
- /* we need to delete this ip, no longer available on this node */
if (vnn == NULL) {
- struct ctdb_control_ip_iface pub;
+ /* Delete IP ips->ips[i] */
+ struct ctdb_control_ip_iface *pub;
- DEBUG(DEBUG_NOTICE,("RELOADIPS: IP%s is no longer available on this node. Deleting it.\n", ctdb_addr_to_str(&ips->ips[i].addr)));
- pub.addr = ips->ips[i].addr;
- pub.mask = 0;
- pub.len = 0;
+ DEBUG(DEBUG_NOTICE,
+ ("IP %s no longer configured, deleting it\n",
+ ctdb_addr_to_str(&ips->ips[i].addr)));
- ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
- return -1;
+ pub = talloc_zero(mem_ctx,
+ struct ctdb_control_ip_iface);
+ CTDB_NO_MEMORY(ctdb, pub);
+
+ pub->addr = ips->ips[i].addr;
+ pub->mask = 0;
+ pub->len = 0;
+
+ timeout = TAKEOVER_TIMEOUT();
+
+ data.dsize = offsetof(struct ctdb_control_ip_iface,
+ iface) + pub->len;
+ data.dptr = (uint8_t *)pub;
+
+ state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
+ CTDB_CONTROL_DEL_PUBLIC_IP,
+ 0, data, async_data,
+ &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
+ goto failed;
}
+
+ ctdb_client_async_add(async_data, state);
}
}
-
- /* loop over all new ones and check the ones we need to add */
+ /* Compare IPs between node and file for IPs to be added */
+ first_add = true;
for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
for (i = 0; i < ips->num; i++) {
- if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
+ if (ctdb_same_ip(&vnn->public_address,
+ &ips->ips[i].addr)) {
+ /* IP already on node */
break;
}
}
if (i == ips->num) {
- struct ctdb_control_ip_iface pub;
+ /* Add IP ips->ips[i] */
+ struct ctdb_control_ip_iface *pub;
const char *ifaces = NULL;
+ uint32_t len;
int iface = 0;
- DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
-
- pub.addr = vnn->public_address;
- pub.mask = vnn->public_netmask_bits;
+ DEBUG(DEBUG_NOTICE,
+ ("New IP %s configured, adding it\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ if (first_add) {
+ uint32_t pnn = ctdb_get_pnn(ctdb);
+
+ data.dsize = sizeof(pnn);
+ data.dptr = (uint8_t *)&pnn;
+
+ ret = ctdb_client_send_message(
+ ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_REBALANCE_NODE,
+ data);
+ if (ret != 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
+ }
+ first_add = false;
+ }
ifaces = vnn->ifaces[0];
iface = 1;
while (vnn->ifaces[iface] != NULL) {
- ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
+ ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
+ vnn->ifaces[iface]);
iface++;
}
- pub.len = strlen(ifaces)+1;
- memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
- ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
- return -1;
+ len = strlen(ifaces) + 1;
+ pub = talloc_zero_size(mem_ctx,
+ offsetof(struct ctdb_control_ip_iface, iface) + len);
+ CTDB_NO_MEMORY(ctdb, pub);
+
+ pub->addr = vnn->public_address;
+ pub->mask = vnn->public_netmask_bits;
+ pub->len = len;
+ memcpy(&pub->iface[0], ifaces, pub->len);
+
+ timeout = TAKEOVER_TIMEOUT();
+
+ data.dsize = offsetof(struct ctdb_control_ip_iface,
+ iface) + pub->len;
+ data.dptr = (uint8_t *)pub;
+
+ state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
+ CTDB_CONTROL_ADD_PUBLIC_IP,
+ 0, data, async_data,
+ &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
+ goto failed;
}
+
+ ctdb_client_async_add(async_data, state);
}
}
+ if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
+ goto failed;
+ }
+
+ talloc_free(mem_ctx);
return 0;
+
+failed:
+ talloc_free(mem_ctx);
+ return -1;
}
/* This control is sent to force the node to re-read the public addresses file
and drop any addresses we should nnot longer host, and add new addresses
that we are now able to host
*/
-int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
+int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
{
struct ctdb_reloadips_handle *h;
pid_t parent = getpid();
close(h->fd[0]);
debug_extra = talloc_asprintf(NULL, "reloadips:");
+ ctdb_set_process_name("ctdb_reloadips");
if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
res = -1;
}
}
- write(h->fd[1], &res, 1);
+ sys_write(h->fd[1], &res, 1);
/* make sure we die when our parent dies */
while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
talloc_set_destructor(h, ctdb_reloadips_destructor);
- h->fde = event_add_fd(ctdb->ev, h, h->fd[0],
- EVENT_FD_READ, ctdb_reloadips_child_handler,
- (void *)h);
+ h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
+ ctdb_reloadips_child_handler, (void *)h);
tevent_fd_set_auto_close(h->fde);
- event_add_timed(ctdb->ev, h,
- timeval_current_ofs(120, 0),
- ctdb_reloadips_timeout_event, h);
+ tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
+ ctdb_reloadips_timeout_event, h);
/* we reply later */
*async_reply = true;