#include "lib/util/dlinklist.h"
#include "lib/util/debug.h"
#include "lib/util/samba_util.h"
+#include "lib/util/util_process.h"
#include "ctdb_private.h"
#include "ctdb_client.h"
#define CTDB_ARP_REPEAT 3
/* Flags used in IP allocation algorithms. */
-struct ctdb_ipflags {
- bool noiptakeover;
- bool noiphost;
- enum ctdb_runstate runstate;
+enum ipalloc_algorithm {
+ IPALLOC_DETERMINISTIC,
+ IPALLOC_NONDETERMINISTIC,
+ IPALLOC_LCP2,
+};
+
+struct ipalloc_state {
+ uint32_t num;
+
+ /* Arrays with data for each node */
+ struct ctdb_public_ip_list_old **known_public_ips;
+ struct ctdb_public_ip_list_old **available_public_ips;
+ bool *noiptakeover;
+ bool *noiphost;
+
+ struct public_ip_list *all_ips;
+ enum ipalloc_algorithm algorithm;
+ uint32_t no_ip_failback;
+ uint32_t *force_rebalance_nodes;
};
struct ctdb_interface {
/* Given a physical node, return the number of
public addresses that is currently assigned to this node.
*/
-static int node_ip_coverage(struct ctdb_context *ctdb, int32_t pnn,
- struct public_ip_list *ips)
+static int node_ip_coverage(int32_t pnn, struct public_ip_list *ips)
{
int num=0;
/* Can the given node host the given IP: is the public IP known to the
* node and is NOIPHOST unset?
*/
-static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
- struct ctdb_ipflags ipflags,
+static bool can_node_host_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
struct public_ip_list *ip)
{
struct ctdb_public_ip_list_old *public_ips;
int i;
- if (ipflags.noiphost) {
+ if (ipalloc_state->noiphost[pnn]) {
return false;
}
- public_ips = ctdb->nodes[pnn]->available_public_ips;
+ public_ips = ipalloc_state->available_public_ips[pnn];
if (public_ips == NULL) {
return false;
return false;
}
-static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
- struct ctdb_ipflags ipflags,
+static bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
struct public_ip_list *ip)
{
- if (ipflags.noiptakeover) {
+ if (ipalloc_state->noiptakeover[pnn]) {
return false;
}
- return can_node_host_ip(ctdb, pnn, ipflags, ip);
+ return can_node_host_ip(ipalloc_state, pnn, ip);
}
/* search the node lists list for a node to takeover this ip.
pick the node that currently are serving the least number of ips
so that the ips get spread out evenly.
*/
-static int find_takeover_node(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *ip,
- struct public_ip_list *all_ips)
+static int find_takeover_node(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *ip)
{
int pnn, min=0, num;
int i, numnodes;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
pnn = -1;
for (i=0; i<numnodes; i++) {
/* verify that this node can serve this ip */
- if (!can_node_takeover_ip(ctdb, i, ipflags[i], ip)) {
+ if (!can_node_takeover_ip(ipalloc_state, i, ip)) {
/* no it couldnt so skip to the next node */
continue;
}
- num = node_ip_coverage(ctdb, i, all_ips);
+ num = node_ip_coverage(i, ipalloc_state->all_ips);
/* was this the first node we checked ? */
if (pnn == -1) {
pnn = i;
min = num;
}
}
- }
+ }
if (pnn == -1) {
DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
ctdb_addr_to_str(&ip->addr)));
uint32_t pnn);
static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
+ struct ipalloc_state *ipalloc_state,
struct ctdb_node_map_old *nodemap)
{
int j;
int ret;
- if (ctdb->num_nodes != nodemap->num) {
- DEBUG(DEBUG_ERR, (__location__ " ctdb->num_nodes (%d) != nodemap->num (%d) invalid param\n",
- ctdb->num_nodes, nodemap->num));
+ if (ipalloc_state->num != nodemap->num) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
+ ipalloc_state->num, nodemap->num));
return -1;
}
for (j=0; j<nodemap->num; j++) {
- /* For readability */
- struct ctdb_node *node = ctdb->nodes[j];
-
- /* release any existing data */
- TALLOC_FREE(node->known_public_ips);
- TALLOC_FREE(node->available_public_ips);
-
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
/* Retrieve the list of known public IPs from the node */
ret = ctdb_ctrl_get_public_ips_flags(ctdb,
TAKEOVER_TIMEOUT(),
- node->pnn,
+ j,
ctdb->nodes,
0,
- &node->known_public_ips);
+ &ipalloc_state->known_public_ips[j]);
if (ret != 0) {
DEBUG(DEBUG_ERR,
("Failed to read known public IPs from node: %u\n",
- node->pnn));
+ j));
return -1;
}
if (ctdb->do_checkpublicip) {
verify_remote_ip_allocation(ctdb,
- node->known_public_ips,
- node->pnn);
+ ipalloc_state->known_public_ips[j],
+ j);
}
/* Retrieve the list of available public IPs from the node */
ret = ctdb_ctrl_get_public_ips_flags(ctdb,
TAKEOVER_TIMEOUT(),
- node->pnn,
+ j,
ctdb->nodes,
CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
- &node->available_public_ips);
+ &ipalloc_state->available_public_ips[j]);
if (ret != 0) {
DEBUG(DEBUG_ERR,
("Failed to read available public IPs from node: %u\n",
- node->pnn));
+ j));
return -1;
}
}
}
static struct public_ip_list *
-create_merged_ip_list(struct ctdb_context *ctdb)
+create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
{
int i, j;
struct public_ip_list *ip_list;
struct ctdb_public_ip_list_old *public_ips;
- if (ctdb->ip_tree != NULL) {
- talloc_free(ctdb->ip_tree);
- ctdb->ip_tree = NULL;
- }
+ TALLOC_FREE(ctdb->ip_tree);
ctdb->ip_tree = trbt_create(ctdb, 0);
- for (i=0;i<ctdb->num_nodes;i++) {
- public_ips = ctdb->nodes[i]->known_public_ips;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ public_ips = ipalloc_state->known_public_ips[i];
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
/* there were no public ips for this node */
if (public_ips == NULL) {
continue;
- }
+ }
- for (j=0;j<public_ips->num;j++) {
+ for (j=0; j < public_ips->num; j++) {
struct public_ip_list *tmp_ip;
tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
uint32_t sum = 0;
- for (t=ips; t != NULL; t=t->next) {
+ for (t = ips; t != NULL; t = t->next) {
if (t->pnn != pnn) {
continue;
}
uint32_t imbalance = 0;
- for (t=all_ips; t!=NULL; t=t->next) {
+ for (t = all_ips; t != NULL; t = t->next) {
if (t->pnn != pnn) {
continue;
}
/* Allocate any unassigned IPs just by looping through the IPs and
* finding the best node for each.
*/
-static void basic_allocate_unassigned(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
+static void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state)
{
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
- /* loop over all ip's and find a physical node to cover for
+ /* loop over all ip's and find a physical node to cover for
each unassigned ip.
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- if (find_takeover_node(ctdb, ipflags, tmp_ip, all_ips)) {
- DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
+ if (find_takeover_node(ipalloc_state, t)) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to find node to cover ip %s\n",
+ ctdb_addr_to_str(&t->addr)));
}
}
}
/* Basic non-deterministic rebalancing algorithm.
*/
-static void basic_failback(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
+static void basic_failback(struct ipalloc_state *ipalloc_state,
int num_ips)
{
int i, numnodes;
int maxnode, maxnum, minnode, minnum, num, retries;
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
retries = 0;
try_again:
serving the most and the node serving the least ip's are
not greater than 1.
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
continue;
}
minnode = -1;
for (i=0; i<numnodes; i++) {
/* only check nodes that can actually serve this ip */
- if (!can_node_takeover_ip(ctdb, i, ipflags[i], tmp_ip)) {
+ if (!can_node_takeover_ip(ipalloc_state, i,
+ t)) {
/* no it couldnt so skip to the next node */
continue;
}
- num = node_ip_coverage(ctdb, i, all_ips);
+ num = node_ip_coverage(i, ipalloc_state->all_ips);
if (maxnode == -1) {
maxnode = i;
maxnum = num;
}
}
if (maxnode == -1) {
- DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
+ DEBUG(DEBUG_WARNING,
+ (__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
+ ctdb_addr_to_str(&t->addr)));
continue;
}
try to do this a limited number of times since we dont
want to spend too much time balancing the ip coverage.
*/
- if ( (maxnum > minnum+1)
- && (retries < (num_ips + 5)) ){
- struct public_ip_list *tmp;
+ if ((maxnum > minnum+1) &&
+ (retries < (num_ips + 5))){
+ struct public_ip_list *tt;
/* Reassign one of maxnode's VNNs */
- for (tmp=all_ips;tmp;tmp=tmp->next) {
- if (tmp->pnn == maxnode) {
- (void)find_takeover_node(ctdb, ipflags, tmp, all_ips);
+ for (tt = ipalloc_state->all_ips; tt != NULL; tt = tt->next) {
+ if (tt->pnn == maxnode) {
+ (void)find_takeover_node(ipalloc_state,
+ tt);
retries++;
goto try_again;;
}
}
}
-static void lcp2_init(struct ctdb_context *tmp_ctx,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- uint32_t *force_rebalance_nodes,
+static bool lcp2_init(struct ipalloc_state *ipalloc_state,
uint32_t **lcp2_imbalances,
bool **rebalance_candidates)
{
int i, numnodes;
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
- *rebalance_candidates = talloc_array(tmp_ctx, bool, numnodes);
- CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
- *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, numnodes);
- CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
+ *rebalance_candidates = talloc_array(ipalloc_state, bool, numnodes);
+ if (*rebalance_candidates == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return false;
+ }
+ *lcp2_imbalances = talloc_array(ipalloc_state, uint32_t, numnodes);
+ if (*lcp2_imbalances == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return false;
+ }
for (i=0; i<numnodes; i++) {
- (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
+ (*lcp2_imbalances)[i] =
+ lcp2_imbalance(ipalloc_state->all_ips, i);
/* First step: assume all nodes are candidates */
(*rebalance_candidates)[i] = true;
}
* keep state and invalidate it every time the recovery master
* changes.
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn != -1) {
- (*rebalance_candidates)[tmp_ip->pnn] = false;
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn != -1) {
+ (*rebalance_candidates)[t->pnn] = false;
}
}
/* 3rd step: if a node is forced to re-balance then
we allow failback onto the node */
- if (force_rebalance_nodes == NULL) {
- return;
+ if (ipalloc_state->force_rebalance_nodes == NULL) {
+ return true;
}
- for (i = 0; i < talloc_array_length(force_rebalance_nodes); i++) {
- uint32_t pnn = force_rebalance_nodes[i];
+ for (i = 0;
+ i < talloc_array_length(ipalloc_state->force_rebalance_nodes);
+ i++) {
+ uint32_t pnn = ipalloc_state->force_rebalance_nodes[i];
if (pnn >= numnodes) {
DEBUG(DEBUG_ERR,
(__location__ "unknown node %u\n", pnn));
("Forcing rebalancing of IPs to node %u\n", pnn));
(*rebalance_candidates)[pnn] = true;
}
+
+ return true;
}
/* Allocate any unassigned addresses using the LCP2 algorithm to find
* the IP/node combination that will cost the least.
*/
-static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
+static void lcp2_allocate_unassigned(struct ipalloc_state *ipalloc_state,
uint32_t *lcp2_imbalances)
{
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
int dstnode, numnodes;
int minnode;
bool should_loop = true;
bool have_unassigned = true;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
while (have_unassigned && should_loop) {
should_loop = false;
minip = NULL;
/* loop over each unassigned ip. */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn != -1) {
+ for (t = ipalloc_state->all_ips; t != NULL ; t = t->next) {
+ if (t->pnn != -1) {
continue;
}
- for (dstnode=0; dstnode<numnodes; dstnode++) {
+ for (dstnode = 0; dstnode < numnodes; dstnode++) {
/* only check nodes that can actually takeover this ip */
- if (!can_node_takeover_ip(ctdb, dstnode,
- ipflags[dstnode],
- tmp_ip)) {
+ if (!can_node_takeover_ip(ipalloc_state,
+ dstnode,
+ t)) {
/* no it couldnt so skip to the next node */
continue;
}
- dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
+ dstdsum = ip_distance_2_sum(&(t->addr),
+ ipalloc_state->all_ips,
+ dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
- DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
- ctdb_addr_to_str(&(tmp_ip->addr)),
- dstnode,
- dstimbl - lcp2_imbalances[dstnode]));
+ DEBUG(DEBUG_DEBUG,
+ (" %s -> %d [+%d]\n",
+ ctdb_addr_to_str(&(t->addr)),
+ dstnode,
+ dstimbl - lcp2_imbalances[dstnode]));
if ((minnode == -1) || (dstdsum < mindsum)) {
minnode = dstnode;
minimbl = dstimbl;
mindsum = dstdsum;
- minip = tmp_ip;
+ minip = t;
should_loop = true;
}
}
/* There might be a better way but at least this is clear. */
have_unassigned = false;
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
have_unassigned = true;
}
}
* well optimise.
*/
if (have_unassigned) {
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to find node to cover ip %s\n",
+ ctdb_addr_to_str(&t->addr)));
}
}
}
* to move IPs from, determines the best IP/destination node
* combination to move from the source node.
*/
-static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
+static bool lcp2_failback_candidate(struct ipalloc_state *ipalloc_state,
int srcnode,
uint32_t *lcp2_imbalances,
bool *rebalance_candidates)
uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
uint32_t minsrcimbl, mindstimbl;
struct public_ip_list *minip;
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
/* Find an IP and destination node that best reduces imbalance. */
srcimbl = 0;
mindstnode = -1;
mindstimbl = 0;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n",
srcnode, lcp2_imbalances[srcnode]));
- for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
/* Only consider addresses on srcnode. */
- if (tmp_ip->pnn != srcnode) {
+ if (t->pnn != srcnode) {
continue;
}
/* What is this IP address costing the source node? */
- srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
+ srcdsum = ip_distance_2_sum(&(t->addr),
+ ipalloc_state->all_ips,
+ srcnode);
srcimbl = lcp2_imbalances[srcnode] - srcdsum;
/* Consider this IP address would cost each potential
* to do gratuitous failover of IPs just to make minor
* balance improvements.
*/
- for (dstnode=0; dstnode<numnodes; dstnode++) {
+ for (dstnode = 0; dstnode < numnodes; dstnode++) {
if (!rebalance_candidates[dstnode]) {
continue;
}
/* only check nodes that can actually takeover this ip */
- if (!can_node_takeover_ip(ctdb, dstnode,
- ipflags[dstnode], tmp_ip)) {
+ if (!can_node_takeover_ip(ipalloc_state, dstnode,
+ t)) {
/* no it couldnt so skip to the next node */
continue;
}
- dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
+ dstdsum = ip_distance_2_sum(&(t->addr),
+ ipalloc_state->all_ips,
+ dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
srcnode, -srcdsum,
- ctdb_addr_to_str(&(tmp_ip->addr)),
+ ctdb_addr_to_str(&(t->addr)),
dstnode, dstdsum));
if ((dstimbl < lcp2_imbalances[srcnode]) &&
((mindstnode == -1) || \
((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
- minip = tmp_ip;
+ minip = t;
minsrcimbl = srcimbl;
mindstnode = dstnode;
mindstimbl = dstimbl;
if (mindstnode != -1) {
/* We found a move that makes things better... */
- DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
- srcnode, minsrcimbl - lcp2_imbalances[srcnode],
- ctdb_addr_to_str(&(minip->addr)),
- mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
+ DEBUG(DEBUG_INFO,
+ ("%d [%d] -> %s -> %d [+%d]\n",
+ srcnode, minsrcimbl - lcp2_imbalances[srcnode],
+ ctdb_addr_to_str(&(minip->addr)),
+ mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
lcp2_imbalances[srcnode] = minsrcimbl;
* node with the highest LCP2 imbalance, and then determines the best
* IP/destination node combination to move from the source node.
*/
-static void lcp2_failback(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
+static void lcp2_failback(struct ipalloc_state *ipalloc_state,
uint32_t *lcp2_imbalances,
bool *rebalance_candidates)
{
struct lcp2_imbalance_pnn * lips;
bool again;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
try_again:
/* Put the imbalances and nodes into an array, sort them and
*/
DEBUG(DEBUG_DEBUG,("+++++++++++++++++++++++++++++++++++++++++\n"));
DEBUG(DEBUG_DEBUG,("Selecting most imbalanced node from:\n"));
- lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, numnodes);
- for (i=0; i<numnodes; i++) {
+ lips = talloc_array(ipalloc_state, struct lcp2_imbalance_pnn, numnodes);
+ for (i = 0; i < numnodes; i++) {
lips[i].imbalance = lcp2_imbalances[i];
lips[i].pnn = i;
DEBUG(DEBUG_DEBUG,(" %d [%d]\n", i, lcp2_imbalances[i]));
lcp2_cmp_imbalance_pnn);
again = false;
- for (i=0; i<numnodes; i++) {
+ for (i = 0; i < numnodes; i++) {
/* This means that all nodes had 0 or 1 addresses, so
* can't be imbalanced.
*/
break;
}
- if (lcp2_failback_candidate(ctdb,
- ipflags,
- all_ips,
+ if (lcp2_failback_candidate(ipalloc_state,
lips[i].pnn,
lcp2_imbalances,
rebalance_candidates)) {
}
}
-static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
+static void unassign_unsuitable_ips(struct ipalloc_state *ipalloc_state)
{
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
/* verify that the assigned nodes can serve that public ip
and set it to -1 if not
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
continue;
}
- if (!can_node_host_ip(ctdb, tmp_ip->pnn,
- ipflags[tmp_ip->pnn], tmp_ip) != 0) {
+ if (!can_node_host_ip(ipalloc_state, t->pnn, t) != 0) {
/* this node can not serve this ip. */
DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
- ctdb_addr_to_str(&(tmp_ip->addr)),
- tmp_ip->pnn));
- tmp_ip->pnn = -1;
+ ctdb_addr_to_str(&(t->addr)),
+ t->pnn));
+ t->pnn = -1;
}
}
}
-static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
+static bool ip_alloc_deterministic_ips(struct ipalloc_state *ipalloc_state)
{
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
int i, numnodes;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
/* Allocate IPs to nodes in a modulo fashion so that IPs will
* available/unavailable nodes.
*/
- for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
- tmp_ip->pnn = i % numnodes;
+ for (i = 0, t = ipalloc_state->all_ips; t!= NULL; t = t->next, i++) {
+ t->pnn = i % numnodes;
}
/* IP failback doesn't make sense with deterministic
* IPs, since the modulo step above implicitly fails
* back IPs to their "home" node.
*/
- if (1 == ctdb->tunable.no_ip_failback) {
+ if (1 == ipalloc_state->no_ip_failback) {
DEBUG(DEBUG_WARNING, ("WARNING: 'NoIPFailback' set but ignored - incompatible with 'DeterministicIPs\n"));
}
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
+ unassign_unsuitable_ips(ipalloc_state);
- basic_allocate_unassigned(ctdb, ipflags, all_ips);
+ basic_allocate_unassigned(ipalloc_state);
/* No failback here! */
+
+ return true;
}
-static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
+static bool ip_alloc_nondeterministic_ips(struct ipalloc_state *ipalloc_state)
{
/* This should be pushed down into basic_failback. */
- struct public_ip_list *tmp_ip;
+ struct public_ip_list *t;
int num_ips = 0;
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
num_ips++;
}
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
+ unassign_unsuitable_ips(ipalloc_state);
- basic_allocate_unassigned(ctdb, ipflags, all_ips);
+ basic_allocate_unassigned(ipalloc_state);
/* If we don't want IPs to fail back then don't rebalance IPs. */
- if (1 == ctdb->tunable.no_ip_failback) {
- return;
+ if (1 == ipalloc_state->no_ip_failback) {
+ return true;
}
/* Now, try to make sure the ip adresses are evenly distributed
across the nodes.
*/
- basic_failback(ctdb, ipflags, all_ips, num_ips);
+ basic_failback(ipalloc_state, num_ips);
+
+ return true;
}
-static void ip_alloc_lcp2(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- uint32_t *force_rebalance_nodes)
+static bool ip_alloc_lcp2(struct ipalloc_state *ipalloc_state)
{
uint32_t *lcp2_imbalances;
bool *rebalance_candidates;
int numnodes, num_rebalance_candidates, i;
+ bool ret = true;
- TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ unassign_unsuitable_ips(ipalloc_state);
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
-
- lcp2_init(tmp_ctx, ipflags, all_ips,force_rebalance_nodes,
- &lcp2_imbalances, &rebalance_candidates);
+ if (!lcp2_init(ipalloc_state,
+ &lcp2_imbalances, &rebalance_candidates)) {
+ ret = false;
+ goto finished;
+ }
- lcp2_allocate_unassigned(ctdb, ipflags, all_ips, lcp2_imbalances);
+ lcp2_allocate_unassigned(ipalloc_state, lcp2_imbalances);
/* If we don't want IPs to fail back then don't rebalance IPs. */
- if (1 == ctdb->tunable.no_ip_failback) {
+ if (1 == ipalloc_state->no_ip_failback) {
goto finished;
}
* nodes to transfer IPs to. This check is much cheaper than
* continuing on...
*/
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
num_rebalance_candidates = 0;
for (i=0; i<numnodes; i++) {
if (rebalance_candidates[i]) {
/* Now, try to make sure the ip adresses are evenly distributed
across the nodes.
*/
- lcp2_failback(ctdb, ipflags, all_ips,
- lcp2_imbalances, rebalance_candidates);
+ lcp2_failback(ipalloc_state, lcp2_imbalances, rebalance_candidates);
finished:
- talloc_free(tmp_ctx);
+ return ret;
}
static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
}
/* The calculation part of the IP allocation algorithm. */
-static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list **all_ips_p,
- uint32_t *force_rebalance_nodes)
+static bool ipalloc(struct ipalloc_state *ipalloc_state)
{
- /* since nodes only know about those public addresses that
- can be served by that particular node, no single node has
- a full list of all public addresses that exist in the cluster.
- Walk over all node structures and create a merged list of
- all public addresses that exist in the cluster.
-
- keep the tree of ips around as ctdb->ip_tree
- */
- *all_ips_p = create_merged_ip_list(ctdb);
+ bool ret;
- if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
- ip_alloc_lcp2(ctdb, ipflags, *all_ips_p, force_rebalance_nodes);
- } else if (1 == ctdb->tunable.deterministic_public_ips) {
- ip_alloc_deterministic_ips(ctdb, ipflags, *all_ips_p);
- } else {
- ip_alloc_nondeterministic_ips(ctdb, ipflags, *all_ips_p);
+ switch (ipalloc_state->algorithm) {
+ case IPALLOC_LCP2:
+ ret = ip_alloc_lcp2(ipalloc_state);
+ break;
+ case IPALLOC_DETERMINISTIC:
+ ret = ip_alloc_deterministic_ips(ipalloc_state);
+ break;
+ case IPALLOC_NONDETERMINISTIC:
+ ret = ip_alloc_nondeterministic_ips(ipalloc_state);
+ break;
}
/* at this point ->pnn is the node which will own each IP
or -1 if there is no node that can cover this ip
*/
- return;
+ return ret;
}
struct get_tunable_callback_data {
return tvals;
}
-struct get_runstate_callback_data {
- enum ctdb_runstate *out;
- bool fatal;
-};
-
-static void get_runstate_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback_data)
-{
- struct get_runstate_callback_data *cd =
- (struct get_runstate_callback_data *)callback_data;
- int size;
-
- if (res != 0) {
- /* Already handled in fail callback */
- return;
- }
-
- if (outdata.dsize != sizeof(uint32_t)) {
- DEBUG(DEBUG_ERR,("Wrong size of returned data when getting runstate from node %d. Expected %d bytes but received %d bytes\n",
- pnn, (int)sizeof(uint32_t),
- (int)outdata.dsize));
- cd->fatal = true;
- return;
- }
-
- size = talloc_array_length(cd->out);
- if (pnn >= size) {
- DEBUG(DEBUG_ERR,("Got reply from node %d but nodemap only has %d entries\n",
- pnn, size));
- return;
- }
-
- cd->out[pnn] = (enum ctdb_runstate)*(uint32_t *)outdata.dptr;
-}
-
-static void get_runstate_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback)
-{
- struct get_runstate_callback_data *cd =
- (struct get_runstate_callback_data *)callback;
-
- switch (res) {
- case -ETIME:
- DEBUG(DEBUG_ERR,
- ("Timed out getting runstate from node %d\n", pnn));
- cd->fatal = true;
- break;
- default:
- DEBUG(DEBUG_WARNING,
- ("Error getting runstate from node %d - assuming runstates not supported\n",
- pnn));
- }
-}
-
-static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap,
- enum ctdb_runstate default_value)
-{
- uint32_t *nodes;
- enum ctdb_runstate *rs;
- struct get_runstate_callback_data callback_data;
- int i;
-
- rs = talloc_array(tmp_ctx, enum ctdb_runstate, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, rs);
- for (i=0; i<nodemap->num; i++) {
- rs[i] = default_value;
- }
-
- callback_data.out = rs;
- callback_data.fatal = false;
-
- nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
- if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_RUNSTATE,
- nodes, 0, TAKEOVER_TIMEOUT(),
- true, tdb_null,
- get_runstate_callback,
- get_runstate_fail_callback,
- &callback_data) != 0) {
- if (callback_data.fatal) {
- free(rs);
- rs = NULL;
- }
- }
- talloc_free(nodes);
-
- return rs;
-}
-
/* Set internal flags for IP allocation:
* Clear ip flags
* Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
* else
* Set NOIPHOST ip flags for disabled nodes
*/
-static struct ctdb_ipflags *
-set_ipflags_internal(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap,
- uint32_t *tval_noiptakeover,
- uint32_t *tval_noiphostonalldisabled,
- enum ctdb_runstate *runstate)
+static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap,
+ uint32_t *tval_noiptakeover,
+ uint32_t *tval_noiphostonalldisabled)
{
int i;
- struct ctdb_ipflags *ipflags;
-
- /* Clear IP flags - implicit due to talloc_zero */
- ipflags = talloc_zero_array(tmp_ctx, struct ctdb_ipflags, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, ipflags);
for (i=0;i<nodemap->num;i++) {
/* Can not take IPs on node with NoIPTakeover set */
if (tval_noiptakeover[i] != 0) {
- ipflags[i].noiptakeover = true;
+ ipalloc_state->noiptakeover[i] = true;
}
- /* Can not host IPs on node not in RUNNING state */
- if (runstate[i] != CTDB_RUNSTATE_RUNNING) {
- ipflags[i].noiphost = true;
- continue;
- }
/* Can not host IPs on INACTIVE node */
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
- ipflags[i].noiphost = true;
+ ipalloc_state->noiphost[i] = true;
}
- /* Remember the runstate */
- ipflags[i].runstate = runstate[i];
}
if (all_nodes_are_disabled(nodemap)) {
*/
for (i=0;i<nodemap->num;i++) {
if (tval_noiphostonalldisabled[i] != 0) {
- ipflags[i].noiphost = true;
+ ipalloc_state->noiphost[i] = true;
}
}
} else {
*/
for (i=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
- ipflags[i].noiphost = true;
+ ipalloc_state->noiphost[i] = true;
}
}
}
-
- return ipflags;
}
-static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap)
+static bool set_ipflags(struct ctdb_context *ctdb,
+ struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap)
{
uint32_t *tval_noiptakeover;
uint32_t *tval_noiphostonalldisabled;
- struct ctdb_ipflags *ipflags;
- enum ctdb_runstate *runstate;
-
- tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
+ tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
"NoIPTakeover", 0);
if (tval_noiptakeover == NULL) {
- return NULL;
+ return false;
}
tval_noiphostonalldisabled =
- get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
+ get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
"NoIPHostOnAllDisabled", 0);
if (tval_noiphostonalldisabled == NULL) {
/* Caller frees tmp_ctx */
+ return false;
+ }
+
+ set_ipflags_internal(ipalloc_state, nodemap,
+ tval_noiptakeover,
+ tval_noiphostonalldisabled);
+
+ talloc_free(tval_noiptakeover);
+ talloc_free(tval_noiphostonalldisabled);
+
+ return true;
+}
+
+static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ipalloc_state *ipalloc_state =
+ talloc_zero(mem_ctx, struct ipalloc_state);
+ if (ipalloc_state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
return NULL;
}
- /* Any nodes where CTDB_CONTROL_GET_RUNSTATE is not supported
- * will default to CTDB_RUNSTATE_RUNNING. This ensures
- * reasonable behaviour on a mixed cluster during upgrade.
- */
- runstate = get_runstate_from_nodes(ctdb, tmp_ctx, nodemap,
- CTDB_RUNSTATE_RUNNING);
- if (runstate == NULL) {
- /* Caller frees tmp_ctx */
+ ipalloc_state->num = ctdb->num_nodes;
+ ipalloc_state->known_public_ips =
+ talloc_zero_array(ipalloc_state,
+ struct ctdb_public_ip_list_old *,
+ ipalloc_state->num);
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
+ return NULL;
+ }
+ ipalloc_state->available_public_ips =
+ talloc_zero_array(ipalloc_state,
+ struct ctdb_public_ip_list_old *,
+ ipalloc_state->num);
+ if (ipalloc_state->available_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
+ return NULL;
+ }
+ ipalloc_state->noiptakeover =
+ talloc_zero_array(ipalloc_state,
+ bool,
+ ipalloc_state->num);
+ if (ipalloc_state->noiptakeover == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
+ return NULL;
+ }
+ ipalloc_state->noiphost =
+ talloc_zero_array(ipalloc_state,
+ bool,
+ ipalloc_state->num);
+ if (ipalloc_state->noiphost == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
return NULL;
}
- ipflags = set_ipflags_internal(ctdb, tmp_ctx, nodemap,
- tval_noiptakeover,
- tval_noiphostonalldisabled,
- runstate);
+ if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
+ ipalloc_state->algorithm = IPALLOC_LCP2;
+ } else if (1 == ctdb->tunable.deterministic_public_ips) {
+ ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
+ } else {
+ ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
+ }
- talloc_free(tval_noiptakeover);
- talloc_free(tval_noiphostonalldisabled);
- talloc_free(runstate);
+ ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
- return ipflags;
+ return ipalloc_state;
}
struct iprealloc_callback_data {
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- struct ctdb_ipflags *ipflags;
+ struct ipalloc_state *ipalloc_state;
struct takeover_callback_data *takeover_data;
struct iprealloc_callback_data iprealloc_data;
bool *retry_data;
goto ipreallocated;
}
- ipflags = set_ipflags(ctdb, tmp_ctx, nodemap);
- if (ipflags == NULL) {
+ ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
+ if (ipalloc_state == NULL) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
talloc_free(tmp_ctx);
return -1;
}
/* Fetch known/available public IPs from each active node */
- ret = ctdb_reload_remote_public_ips(ctdb, nodemap);
+ ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
if (ret != 0) {
talloc_free(tmp_ctx);
return -1;
/* Short-circuit IP allocation if no node has available IPs */
can_host_ips = false;
- for (i=0; i < ctdb->num_nodes; i++) {
- if (ctdb->nodes[i]->available_public_ips != NULL) {
+ for (i=0; i < ipalloc_state->num; i++) {
+ if (ipalloc_state->available_public_ips[i] != NULL) {
can_host_ips = true;
}
}
return 0;
}
+ /* since nodes only know about those public addresses that
+ can be served by that particular node, no single node has
+ a full list of all public addresses that exist in the cluster.
+ Walk over all node structures and create a merged list of
+ all public addresses that exist in the cluster.
+
+ keep the tree of ips around as ctdb->ip_tree
+ */
+ all_ips = create_merged_ip_list(ctdb, ipalloc_state);
+ ipalloc_state->all_ips = all_ips;
+
+ ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
+
/* Do the IP reassignment calculations */
- ctdb_takeover_run_core(ctdb, ipflags, &all_ips, force_rebalance_nodes);
+ ipalloc(ipalloc_state);
/* Now tell all nodes to release any public IPs should not
* host. This will be a NOOP on nodes that don't currently
}
-/*
- called by a daemon to inform us of a TCP connection that one of its
- clients managing that should tickled with an ACK when IP takeover is
- done
- */
-static void ctdb_remove_connection(struct ctdb_context *ctdb, struct ctdb_connection *conn)
+static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
{
struct ctdb_connection *tcpp;
- struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst);
if (vnn == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
- ctdb_addr_to_str(&conn->dst)));
return;
}
*/
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
{
+ struct ctdb_vnn *vnn;
struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
/* If we don't have public IPs, tickles are useless */
return 0;
}
- ctdb_remove_connection(ctdb, conn);
+ vnn = find_public_ip_vnn(ctdb, &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ return 0;
+ }
+
+ ctdb_remove_connection(vnn, conn);
return 0;
}
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
{
while (client->tcp_list) {
+ struct ctdb_vnn *vnn;
struct ctdb_tcp_list *tcp = client->tcp_list;
+ struct ctdb_connection *conn = &tcp->connection;
+
DLIST_REMOVE(client->tcp_list, tcp);
- ctdb_remove_connection(client->ctdb, &tcp->connection);
+
+ vnn = find_public_ip_vnn(client->ctdb,
+ &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ continue;
+ }
+
+ /* If the IP address is hosted on this node then
+ * remove the connection. */
+ if (vnn->pnn == client->ctdb->pnn) {
+ ctdb_remove_connection(vnn, conn);
+ }
+
+ /* Otherwise this function has been called because the
+ * server IP address has been released to another node
+ * and the client has exited. This means that we
+ * should not delete the connection information. The
+ * takeover node processes connections too. */
}
}
return 1;
}
+ if (vnn->pnn == ctdb->pnn) {
+ DEBUG(DEBUG_INFO,
+ ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
+ ctdb_addr_to_str(&list->addr)));
+ return 0;
+ }
+
/* remove any old ticklelist we might have */
talloc_free(vnn->tcp_array);
vnn->tcp_array = NULL;
close(h->fd[0]);
debug_extra = talloc_asprintf(NULL, "reloadips:");
- ctdb_set_process_name("ctdb_reloadips");
+ prctl_set_comment("ctdb_reloadips");
if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
res = -1;