You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "includes.h"
-#include "tdb.h"
-#include "lib/util/dlinklist.h"
+#include "replace.h"
#include "system/network.h"
#include "system/filesys.h"
+#include "system/time.h"
#include "system/wait.h"
-#include "../include/ctdb_private.h"
-#include "../common/rb_tree.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+#include "lib/util/util_process.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/rb_tree.h"
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
#define CTDB_ARP_REPEAT 3
/* Flags used in IP allocation algorithms. */
-struct ctdb_ipflags {
- bool noiptakeover;
- bool noiphost;
+enum ipalloc_algorithm {
+ IPALLOC_DETERMINISTIC,
+ IPALLOC_NONDETERMINISTIC,
+ IPALLOC_LCP2,
+};
+
+struct ipalloc_state {
+ uint32_t num;
+
+ /* Arrays with data for each node */
+ struct ctdb_public_ip_list_old **known_public_ips;
+ struct ctdb_public_ip_list_old **available_public_ips;
+ bool *noiptakeover;
+ bool *noiphost;
+
+ enum ipalloc_algorithm algorithm;
+ uint32_t no_ip_failback;
};
-struct ctdb_iface {
- struct ctdb_iface *prev, *next;
+struct ctdb_interface {
+ struct ctdb_interface *prev, *next;
const char *name;
bool link_up;
uint32_t references;
static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
{
- struct ctdb_iface *i;
+ struct ctdb_interface *i;
- /* Verify that we dont have an entry for this ip yet */
+ /* Verify that we don't have an entry for this ip yet */
for (i=ctdb->ifaces;i;i=i->next) {
if (strcmp(i->name, iface) == 0) {
return 0;
}
/* create a new structure for this interface */
- i = talloc_zero(ctdb, struct ctdb_iface);
+ i = talloc_zero(ctdb, struct ctdb_interface);
CTDB_NO_MEMORY_FATAL(ctdb, i);
i->name = talloc_strdup(i, iface);
CTDB_NO_MEMORY(ctdb, i->name);
- /*
- * If link_up defaults to true then IPs can be allocated to a
- * node during the first recovery. However, then an interface
- * could have its link marked down during the startup event,
- * causing the IP to move almost immediately. If link_up
- * defaults to false then, during normal operation, IPs added
- * to a new interface can't be assigned until a monitor cycle
- * has occurred and marked the new interfaces up. This makes
- * IP allocation unpredictable. The following is a neat
- * compromise: early in startup link_up defaults to false, so
- * IPs can't be assigned, and after startup IPs can be
- * assigned immediately.
- */
- i->link_up = (ctdb->runstate == CTDB_RUNSTATE_RUNNING);
+
+ i->link_up = true;
DLIST_ADD(ctdb->ifaces, i);
* causes problems... :-)
*/
static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
- struct ctdb_vnn *vnn,
- TALLOC_CTX *mem_ctx)
+ struct ctdb_vnn *vnn)
{
- struct ctdb_iface *i;
+ struct ctdb_interface *i, *next;
/* For each interface, check if there's an IP using it. */
- for(i=ctdb->ifaces; i; i=i->next) {
+ for (i = ctdb->ifaces; i != NULL; i = next) {
struct ctdb_vnn *tv;
bool found;
+ next = i->next;
/* Only consider interfaces named in the given VNN. */
if (!vnn_has_interface_with_name(vnn, i->name)) {
if (!found) {
/* None of the VNNs are using this interface. */
DLIST_REMOVE(ctdb->ifaces, i);
- /* Caller will free mem_ctx when convenient. */
- talloc_steal(mem_ctx, i);
+ talloc_free(i);
}
}
}
-static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
- const char *iface)
+static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
+ const char *iface)
{
- struct ctdb_iface *i;
+ struct ctdb_interface *i;
for (i=ctdb->ifaces;i;i=i->next) {
if (strcmp(i->name, iface) == 0) {
return NULL;
}
-static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
- struct ctdb_vnn *vnn)
+static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
+ struct ctdb_vnn *vnn)
{
int i;
- struct ctdb_iface *cur = NULL;
- struct ctdb_iface *best = NULL;
+ struct ctdb_interface *cur = NULL;
+ struct ctdb_interface *best = NULL;
for (i=0; vnn->ifaces[i]; i++) {
static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
struct ctdb_vnn *vnn)
{
- struct ctdb_iface *best = NULL;
+ struct ctdb_interface *best = NULL;
if (vnn->iface) {
DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
{
int i;
+ /* Nodes that are not RUNNING can not host IPs */
+ if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
+ return false;
+ }
+
+ if (vnn->delete_pending) {
+ return false;
+ }
+
if (vnn->iface && vnn->iface->link_up) {
return true;
}
for (i=0; vnn->ifaces[i]; i++) {
- struct ctdb_iface *cur;
+ struct ctdb_interface *cur;
cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
if (cur == NULL) {
*/
struct ctdb_tcp_list {
struct ctdb_tcp_list *prev, *next;
- struct ctdb_tcp_connection connection;
+ struct ctdb_connection connection;
};
/*
/*
send a gratuitous arp
*/
-static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
+static void ctdb_control_send_arp(struct tevent_context *ev,
+ struct tevent_timer *te,
struct timeval t, void *private_data)
{
struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
tcparray = arp->tcparray;
if (tcparray) {
for (i=0;i<tcparray->num;i++) {
- struct ctdb_tcp_connection *tcon;
+ struct ctdb_connection *tcon;
tcon = &tcparray->connections[i];
DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
- (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
- ctdb_addr_to_str(&tcon->src_addr),
- (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
+ (unsigned)ntohs(tcon->dst.ip.sin_port),
+ ctdb_addr_to_str(&tcon->src),
+ (unsigned)ntohs(tcon->src.ip.sin_port)));
ret = ctdb_sys_send_tcp(
- &tcon->src_addr,
- &tcon->dst_addr,
+ &tcon->src,
+ &tcon->dst,
0, 0, 0);
if (ret != 0) {
DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
- ctdb_addr_to_str(&tcon->src_addr)));
+ ctdb_addr_to_str(&tcon->src)));
}
}
}
return;
}
- event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
- timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
- ctdb_control_send_arp, arp);
+ tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
+ timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
+ ctdb_control_send_arp, arp);
}
static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
vnn->tcp_update_needed = true;
}
- event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
- timeval_zero(), ctdb_control_send_arp, arp);
+ tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
+ timeval_zero(), ctdb_control_send_arp, arp);
return 0;
}
struct takeover_callback_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
ctdb_sock_addr *addr;
struct ctdb_vnn *vnn;
};
struct ctdb_do_takeip_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
struct ctdb_vnn *vnn;
};
take over an ip address
*/
static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
struct ctdb_vnn *vnn)
{
int ret;
state,
ctdb_do_takeip_callback,
state,
- false,
CTDB_EVENT_TAKE_IP,
"%s %s %u",
ctdb_vnn_iface_string(vnn),
}
struct ctdb_do_updateip_state {
- struct ctdb_req_control *c;
- struct ctdb_iface *old;
+ struct ctdb_req_control_old *c;
+ struct ctdb_interface *old;
struct ctdb_vnn *vnn;
};
update (move) an ip address
*/
static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
struct ctdb_vnn *vnn)
{
int ret;
struct ctdb_do_updateip_state *state;
- struct ctdb_iface *old = vnn->iface;
+ struct ctdb_interface *old = vnn->iface;
const char *new_name;
if (vnn->update_in_flight) {
state,
ctdb_do_updateip_callback,
state,
- false,
CTDB_EVENT_UPDATE_IP,
"%s %s %s %u",
state->old->name,
take over an ip address
*/
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata,
bool *async_reply)
{
bool have_ip = false;
bool do_updateip = false;
bool do_takeip = false;
- struct ctdb_iface *best_iface = NULL;
+ struct ctdb_interface *best_iface = NULL;
if (pip->pnn != ctdb->pnn) {
DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
return 0;
}
- if (ctdb->do_checkpublicip) {
+ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
have_ip = ctdb_sys_have_ip(&pip->addr);
}
best_iface = ctdb_vnn_best_iface(ctdb, vnn);
return 0;
}
-/*
- takeover an ip address old v4 style
- */
-int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
- TDB_DATA indata,
- bool *async_reply)
-{
- TDB_DATA data;
-
- data.dsize = sizeof(struct ctdb_public_ip);
- data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
- CTDB_NO_MEMORY(ctdb, data.dptr);
-
- memcpy(data.dptr, indata.dptr, indata.dsize);
- return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
-}
-
/*
kill any clients that are registered with a IP that is being released
*/
ctdb_addr_to_str(&ip->addr)));
if (ctdb_same_ip(&tmp_addr, addr)) {
- struct ctdb_client *client = ctdb_reqid_find(ctdb,
- ip->client_id,
- struct ctdb_client);
+ struct ctdb_client *client = reqid_find(ctdb->idr,
+ ip->client_id,
+ struct ctdb_client);
DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
ip->client_id,
ctdb_addr_to_str(&ip->addr),
}
}
+static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
+{
+ DLIST_REMOVE(ctdb->vnn, vnn);
+ ctdb_vnn_unassign_iface(ctdb, vnn);
+ ctdb_remove_orphaned_ifaces(ctdb, vnn);
+ talloc_free(vnn);
+}
+
/*
called when releaseip event finishes
*/
ctdb_ban_self(ctdb);
}
- if (ctdb->do_checkpublicip && ctdb_sys_have_ip(state->addr)) {
- DEBUG(DEBUG_ERR, ("IP %s still hosted during release IP callback, failing\n",
- ctdb_addr_to_str(state->addr)));
- ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
- talloc_free(state);
- return;
+ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
+ if (ctdb_sys_have_ip(state->addr)) {
+ DEBUG(DEBUG_ERR,
+ ("IP %s still hosted during release IP callback, failing\n",
+ ctdb_addr_to_str(state->addr)));
+ ctdb_request_control_reply(ctdb, state->c,
+ NULL, -1, NULL);
+ talloc_free(state);
+ return;
+ }
}
/* send a message to all clients of this node telling them
ctdb_vnn_unassign_iface(ctdb, state->vnn);
+ /* Process the IP if it has been marked for deletion */
+ if (state->vnn->delete_pending) {
+ do_delete_ip(ctdb, state->vnn);
+ state->vnn = NULL;
+ }
+
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
{
- state->vnn->update_in_flight = false;
+ if (state->vnn != NULL) {
+ state->vnn->update_in_flight = false;
+ }
return 0;
}
release an ip address
*/
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata,
bool *async_reply)
{
* intended new node. The following causes makes ctdbd ignore
* a release for any address it doesn't host.
*/
- if (ctdb->do_checkpublicip) {
+ if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
if (!ctdb_sys_have_ip(&pip->addr)) {
DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
ctdb_addr_to_str(&pip->addr),
return -1;
}
- if (ctdb->do_checkpublicip) {
- iface = ctdb_sys_find_ifname(&pip->addr);
- if (iface == NULL) {
- DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
- return 0;
- }
- if (vnn->iface == NULL) {
- DEBUG(DEBUG_WARNING,
- ("Public IP %s is hosted on interface %s but we have no VNN\n",
- ctdb_addr_to_str(&pip->addr),
- iface));
- } else if (strcmp(iface, ctdb_vnn_iface_string(vnn)) != 0) {
- DEBUG(DEBUG_WARNING,
- ("Public IP %s is hosted on inteterface %s but VNN says %s\n",
- ctdb_addr_to_str(&pip->addr),
- iface,
- ctdb_vnn_iface_string(vnn)));
- /* Should we fix vnn->iface? If we do, what
- * happens to reference counts?
- */
- }
- } else {
- iface = strdup(ctdb_vnn_iface_string(vnn));
- }
+ iface = strdup(ctdb_vnn_iface_string(vnn));
DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
ctdb_addr_to_str(&pip->addr),
pip->pnn));
state = talloc(ctdb, struct takeover_callback_state);
- CTDB_NO_MEMORY(ctdb, state);
+ if (state == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ free(iface);
+ return -1;
+ }
state->c = talloc_steal(state, c);
state->addr = talloc(state, ctdb_sock_addr);
- CTDB_NO_MEMORY(ctdb, state->addr);
+ if (state->addr == NULL) {
+ ctdb_set_error(ctdb, "Out of memory at %s:%d",
+ __FILE__, __LINE__);
+ free(iface);
+ talloc_free(state);
+ return -1;
+ }
*state->addr = pip->addr;
state->vnn = vnn;
ret = ctdb_event_script_callback(ctdb,
state, release_ip_callback, state,
- false,
CTDB_EVENT_RELEASE_IP,
"%s %s %u",
iface,
return 0;
}
-/*
- release an ip address old v4 style
- */
-int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
- TDB_DATA indata,
- bool *async_reply)
-{
- TDB_DATA data;
-
- data.dsize = sizeof(struct ctdb_public_ip);
- data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
- CTDB_NO_MEMORY(ctdb, data.dptr);
-
- memcpy(data.dptr, indata.dptr, indata.dsize);
- return ctdb_control_release_ip(ctdb, c, data, async_reply);
-}
-
-
static int ctdb_add_public_address(struct ctdb_context *ctdb,
ctdb_sock_addr *addr,
unsigned mask, const char *ifaces,
}
free(tmp);
- /* Verify that we dont have an entry for this ip yet */
+ /* Verify that we don't have an entry for this ip yet */
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
return 0;
}
-/*
- setup the event script directory
-*/
-int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
-{
- ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
- CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
- return 0;
-}
-
-static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
-{
- struct ctdb_context *ctdb = talloc_get_type(private_data,
- struct ctdb_context);
- struct ctdb_vnn *vnn;
-
- for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
- int i;
-
- for (i=0; vnn->ifaces[i] != NULL; i++) {
- if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
- DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
- vnn->ifaces[i],
- ctdb_addr_to_str(&vnn->public_address)));
- }
- }
- }
-
- event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
- timeval_current_ofs(30, 0),
- ctdb_check_interfaces_event, ctdb);
-}
-
-
-int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
-{
- if (ctdb->check_public_ifaces_ctx != NULL) {
- talloc_free(ctdb->check_public_ifaces_ctx);
- ctdb->check_public_ifaces_ctx = NULL;
- }
-
- ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
- if (ctdb->check_public_ifaces_ctx == NULL) {
- ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
- }
-
- event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
- timeval_current_ofs(30, 0),
- ctdb_check_interfaces_event, ctdb);
-
- return 0;
-}
-
-
/*
setup the public address lists from a file
*/
int nlines;
int i;
- lines = file_lines_load(ctdb->public_addresses_file, &nlines, ctdb);
+ lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
if (lines == NULL) {
ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
return -1;
const char *ip)
{
struct ctdb_vnn *svnn;
- struct ctdb_iface *cur = NULL;
+ struct ctdb_interface *cur = NULL;
bool ok;
int ret;
return 0;
}
-struct ctdb_public_ip_list {
- struct ctdb_public_ip_list *next;
+struct public_ip_list {
+ struct public_ip_list *next;
uint32_t pnn;
ctdb_sock_addr addr;
};
/* Given a physical node, return the number of
public addresses that is currently assigned to this node.
*/
-static int node_ip_coverage(struct ctdb_context *ctdb,
- int32_t pnn,
- struct ctdb_public_ip_list *ips)
+static int node_ip_coverage(int32_t pnn, struct public_ip_list *ips)
{
int num=0;
/* Can the given node host the given IP: is the public IP known to the
* node and is NOIPHOST unset?
*/
-static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
- struct ctdb_ipflags ipflags,
- struct ctdb_public_ip_list *ip)
+static bool can_node_host_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
+ struct public_ip_list *ip)
{
- struct ctdb_all_public_ips *public_ips;
+ struct ctdb_public_ip_list_old *public_ips;
int i;
- if (ipflags.noiphost) {
+ if (ipalloc_state->noiphost[pnn]) {
return false;
}
- public_ips = ctdb->nodes[pnn]->available_public_ips;
+ public_ips = ipalloc_state->available_public_ips[pnn];
if (public_ips == NULL) {
return false;
return false;
}
-static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
- struct ctdb_ipflags ipflags,
- struct ctdb_public_ip_list *ip)
+static bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state,
+ int32_t pnn,
+ struct public_ip_list *ip)
{
- if (ipflags.noiptakeover) {
+ if (ipalloc_state->noiptakeover[pnn]) {
return false;
}
- return can_node_host_ip(ctdb, pnn, ipflags, ip);
+ return can_node_host_ip(ipalloc_state, pnn, ip);
}
/* search the node lists list for a node to takeover this ip.
pick the node that currently are serving the least number of ips
so that the ips get spread out evenly.
*/
-static int find_takeover_node(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *ip,
- struct ctdb_public_ip_list *all_ips)
+static int find_takeover_node(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *ip,
+ struct public_ip_list *all_ips)
{
int pnn, min=0, num;
int i, numnodes;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
pnn = -1;
for (i=0; i<numnodes; i++) {
/* verify that this node can serve this ip */
- if (!can_node_takeover_ip(ctdb, i, ipflags[i], ip)) {
+ if (!can_node_takeover_ip(ipalloc_state, i, ip)) {
/* no it couldnt so skip to the next node */
continue;
}
- num = node_ip_coverage(ctdb, i, all_ips);
+ num = node_ip_coverage(i, all_ips);
/* was this the first node we checked ? */
if (pnn == -1) {
pnn = i;
min = num;
}
}
- }
+ }
if (pnn == -1) {
DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
ctdb_addr_to_str(&ip->addr)));
static void *add_ip_callback(void *parm, void *data)
{
- struct ctdb_public_ip_list *this_ip = parm;
- struct ctdb_public_ip_list *prev_ip = data;
+ struct public_ip_list *this_ip = parm;
+ struct public_ip_list *prev_ip = data;
if (prev_ip == NULL) {
return parm;
static int getips_count_callback(void *param, void *data)
{
- struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
- struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
+ struct public_ip_list **ip_list = (struct public_ip_list **)param;
+ struct public_ip_list *new_ip = (struct public_ip_list *)data;
new_ip->next = *ip_list;
*ip_list = new_ip;
return 0;
}
-static struct ctdb_public_ip_list *
-create_merged_ip_list(struct ctdb_context *ctdb)
+static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+ struct ctdb_public_ip_list_old *ips,
+ uint32_t pnn);
+
+static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
+ struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap)
{
- int i, j;
- struct ctdb_public_ip_list *ip_list;
- struct ctdb_all_public_ips *public_ips;
+ int j;
+ int ret;
+
+ if (ipalloc_state->num != nodemap->num) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
+ ipalloc_state->num, nodemap->num));
+ return -1;
+ }
+
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+
+ /* Retrieve the list of known public IPs from the node */
+ ret = ctdb_ctrl_get_public_ips_flags(ctdb,
+ TAKEOVER_TIMEOUT(),
+ j,
+ ctdb->nodes,
+ 0,
+ &ipalloc_state->known_public_ips[j]);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to read known public IPs from node: %u\n",
+ j));
+ return -1;
+ }
+
+ if (ctdb->do_checkpublicip) {
+ verify_remote_ip_allocation(ctdb,
+ ipalloc_state->known_public_ips[j],
+ j);
+ }
- if (ctdb->ip_tree != NULL) {
- talloc_free(ctdb->ip_tree);
- ctdb->ip_tree = NULL;
+ /* Retrieve the list of available public IPs from the node */
+ ret = ctdb_ctrl_get_public_ips_flags(ctdb,
+ TAKEOVER_TIMEOUT(),
+ j,
+ ctdb->nodes,
+ CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
+ &ipalloc_state->available_public_ips[j]);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to read available public IPs from node: %u\n",
+ j));
+ return -1;
+ }
}
+
+ return 0;
+}
+
+static struct public_ip_list *
+create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
+{
+ int i, j;
+ struct public_ip_list *ip_list;
+ struct ctdb_public_ip_list_old *public_ips;
+
+ TALLOC_FREE(ctdb->ip_tree);
ctdb->ip_tree = trbt_create(ctdb, 0);
- for (i=0;i<ctdb->num_nodes;i++) {
- public_ips = ctdb->nodes[i]->known_public_ips;
+ for (i=0; i < ctdb->num_nodes; i++) {
+ public_ips = ipalloc_state->known_public_ips[i];
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
continue;
/* there were no public ips for this node */
if (public_ips == NULL) {
continue;
- }
+ }
- for (j=0;j<public_ips->num;j++) {
- struct ctdb_public_ip_list *tmp_ip;
+ for (j=0; j < public_ips->num; j++) {
+ struct public_ip_list *tmp_ip;
- tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
+ tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
/* Do not use information about IP addresses hosted
* on other nodes, it may not be accurate */
used in the main part of the algorithm.
*/
static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
- struct ctdb_public_ip_list *ips,
+ struct public_ip_list *ips,
int pnn)
{
- struct ctdb_public_ip_list *t;
+ struct public_ip_list *t;
uint32_t d;
uint32_t sum = 0;
- for (t=ips; t != NULL; t=t->next) {
+ for (t = ips; t != NULL; t = t->next) {
if (t->pnn != pnn) {
continue;
}
/* Return the LCP2 imbalance metric for addresses currently assigned
to the given node.
*/
-static uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
+static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn)
{
- struct ctdb_public_ip_list *t;
+ struct public_ip_list *t;
uint32_t imbalance = 0;
- for (t=all_ips; t!=NULL; t=t->next) {
+ for (t = all_ips; t != NULL; t = t->next) {
if (t->pnn != pnn) {
continue;
}
/* Allocate any unassigned IPs just by looping through the IPs and
* finding the best node for each.
*/
-static void basic_allocate_unassigned(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+static void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
- /* loop over all ip's and find a physical node to cover for
+ /* loop over all ip's and find a physical node to cover for
each unassigned ip.
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- if (find_takeover_node(ctdb, ipflags, tmp_ip, all_ips)) {
- DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
+ if (find_takeover_node(ipalloc_state,
+ t, all_ips)) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to find node to cover ip %s\n",
+ ctdb_addr_to_str(&t->addr)));
}
}
}
/* Basic non-deterministic rebalancing algorithm.
*/
-static void basic_failback(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+static void basic_failback(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
int num_ips)
{
int i, numnodes;
int maxnode, maxnum, minnode, minnum, num, retries;
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
retries = 0;
try_again:
serving the most and the node serving the least ip's are
not greater than 1.
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
continue;
}
minnode = -1;
for (i=0; i<numnodes; i++) {
/* only check nodes that can actually serve this ip */
- if (!can_node_takeover_ip(ctdb, i, ipflags[i], tmp_ip)) {
+ if (!can_node_takeover_ip(ipalloc_state, i,
+ t)) {
/* no it couldnt so skip to the next node */
continue;
}
- num = node_ip_coverage(ctdb, i, all_ips);
+ num = node_ip_coverage(i, all_ips);
if (maxnode == -1) {
maxnode = i;
maxnum = num;
}
}
if (maxnode == -1) {
- DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
+ DEBUG(DEBUG_WARNING,
+ (__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
+ ctdb_addr_to_str(&t->addr)));
continue;
}
try to do this a limited number of times since we dont
want to spend too much time balancing the ip coverage.
*/
- if ( (maxnum > minnum+1)
- && (retries < (num_ips + 5)) ){
- struct ctdb_public_ip_list *tmp;
+ if ((maxnum > minnum+1) &&
+ (retries < (num_ips + 5))){
+ struct public_ip_list *tt;
/* Reassign one of maxnode's VNNs */
- for (tmp=all_ips;tmp;tmp=tmp->next) {
- if (tmp->pnn == maxnode) {
- (void)find_takeover_node(ctdb, ipflags, tmp, all_ips);
+ for (tt = all_ips; tt != NULL; tt = tt->next) {
+ if (tt->pnn == maxnode) {
+ (void)find_takeover_node(ipalloc_state,
+ tt,
+ all_ips);
retries++;
goto try_again;;
}
}
}
-static void lcp2_init(struct ctdb_context *tmp_ctx,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+static bool lcp2_init(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
uint32_t *force_rebalance_nodes,
uint32_t **lcp2_imbalances,
bool **rebalance_candidates)
{
int i, numnodes;
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
- *rebalance_candidates = talloc_array(tmp_ctx, bool, numnodes);
- CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
- *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, numnodes);
- CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
+ *rebalance_candidates = talloc_array(ipalloc_state, bool, numnodes);
+ if (*rebalance_candidates == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return false;
+ }
+ *lcp2_imbalances = talloc_array(ipalloc_state, uint32_t, numnodes);
+ if (*lcp2_imbalances == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return false;
+ }
for (i=0; i<numnodes; i++) {
(*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
* keep state and invalidate it every time the recovery master
* changes.
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn != -1) {
- (*rebalance_candidates)[tmp_ip->pnn] = false;
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn != -1) {
+ (*rebalance_candidates)[t->pnn] = false;
}
}
/* 3rd step: if a node is forced to re-balance then
we allow failback onto the node */
if (force_rebalance_nodes == NULL) {
- return;
+ return true;
}
for (i = 0; i < talloc_array_length(force_rebalance_nodes); i++) {
uint32_t pnn = force_rebalance_nodes[i];
("Forcing rebalancing of IPs to node %u\n", pnn));
(*rebalance_candidates)[pnn] = true;
}
+
+ return true;
}
/* Allocate any unassigned addresses using the LCP2 algorithm to find
* the IP/node combination that will cost the least.
*/
-static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+static void lcp2_allocate_unassigned(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
uint32_t *lcp2_imbalances)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
int dstnode, numnodes;
int minnode;
uint32_t mindsum, dstdsum, dstimbl, minimbl;
- struct ctdb_public_ip_list *minip;
+ struct public_ip_list *minip;
bool should_loop = true;
bool have_unassigned = true;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
while (have_unassigned && should_loop) {
should_loop = false;
minip = NULL;
/* loop over each unassigned ip. */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn != -1) {
+ for (t = all_ips; t != NULL ; t = t->next) {
+ if (t->pnn != -1) {
continue;
}
- for (dstnode=0; dstnode<numnodes; dstnode++) {
+ for (dstnode = 0; dstnode < numnodes; dstnode++) {
/* only check nodes that can actually takeover this ip */
- if (!can_node_takeover_ip(ctdb, dstnode,
- ipflags[dstnode],
- tmp_ip)) {
+ if (!can_node_takeover_ip(ipalloc_state,
+ dstnode,
+ t)) {
/* no it couldnt so skip to the next node */
continue;
}
- dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
+ dstdsum = ip_distance_2_sum(&(t->addr), all_ips, dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
- DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
- ctdb_addr_to_str(&(tmp_ip->addr)),
- dstnode,
- dstimbl - lcp2_imbalances[dstnode]));
+ DEBUG(DEBUG_DEBUG,
+ (" %s -> %d [+%d]\n",
+ ctdb_addr_to_str(&(t->addr)),
+ dstnode,
+ dstimbl - lcp2_imbalances[dstnode]));
if ((minnode == -1) || (dstdsum < mindsum)) {
minnode = dstnode;
minimbl = dstimbl;
mindsum = dstdsum;
- minip = tmp_ip;
+ minip = t;
should_loop = true;
}
}
/* There might be a better way but at least this is clear. */
have_unassigned = false;
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
have_unassigned = true;
}
}
* well optimise.
*/
if (have_unassigned) {
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to find node to cover ip %s\n",
+ ctdb_addr_to_str(&t->addr)));
}
}
}
* to move IPs from, determines the best IP/destination node
* combination to move from the source node.
*/
-static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+static bool lcp2_failback_candidate(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
int srcnode,
- uint32_t candimbl,
uint32_t *lcp2_imbalances,
bool *rebalance_candidates)
{
int dstnode, mindstnode, numnodes;
uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
uint32_t minsrcimbl, mindstimbl;
- struct ctdb_public_ip_list *minip;
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *minip;
+ struct public_ip_list *t;
/* Find an IP and destination node that best reduces imbalance. */
srcimbl = 0;
mindstnode = -1;
mindstimbl = 0;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
- DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n",
+ srcnode, lcp2_imbalances[srcnode]));
- for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
+ for (t = all_ips; t != NULL; t = t->next) {
/* Only consider addresses on srcnode. */
- if (tmp_ip->pnn != srcnode) {
+ if (t->pnn != srcnode) {
continue;
}
/* What is this IP address costing the source node? */
- srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
- srcimbl = candimbl - srcdsum;
+ srcdsum = ip_distance_2_sum(&(t->addr), all_ips, srcnode);
+ srcimbl = lcp2_imbalances[srcnode] - srcdsum;
/* Consider this IP address would cost each potential
* destination node. Destination nodes are limited to
* to do gratuitous failover of IPs just to make minor
* balance improvements.
*/
- for (dstnode=0; dstnode<numnodes; dstnode++) {
+ for (dstnode = 0; dstnode < numnodes; dstnode++) {
if (!rebalance_candidates[dstnode]) {
continue;
}
/* only check nodes that can actually takeover this ip */
- if (!can_node_takeover_ip(ctdb, dstnode,
- ipflags[dstnode], tmp_ip)) {
+ if (!can_node_takeover_ip(ipalloc_state, dstnode,
+ t)) {
/* no it couldnt so skip to the next node */
continue;
}
- dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
+ dstdsum = ip_distance_2_sum(&(t->addr), all_ips, dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
- srcnode, srcimbl - lcp2_imbalances[srcnode],
- ctdb_addr_to_str(&(tmp_ip->addr)),
- dstnode, dstimbl - lcp2_imbalances[dstnode]));
+ srcnode, -srcdsum,
+ ctdb_addr_to_str(&(t->addr)),
+ dstnode, dstdsum));
- if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
+ if ((dstimbl < lcp2_imbalances[srcnode]) &&
+ (dstdsum < srcdsum) && \
((mindstnode == -1) || \
((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
- minip = tmp_ip;
+ minip = t;
minsrcimbl = srcimbl;
mindstnode = dstnode;
mindstimbl = dstimbl;
if (mindstnode != -1) {
/* We found a move that makes things better... */
- DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
- srcnode, minsrcimbl - lcp2_imbalances[srcnode],
- ctdb_addr_to_str(&(minip->addr)),
- mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
+ DEBUG(DEBUG_INFO,
+ ("%d [%d] -> %s -> %d [+%d]\n",
+ srcnode, minsrcimbl - lcp2_imbalances[srcnode],
+ ctdb_addr_to_str(&(minip->addr)),
+ mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
- lcp2_imbalances[srcnode] = srcimbl;
+ lcp2_imbalances[srcnode] = minsrcimbl;
lcp2_imbalances[mindstnode] = mindstimbl;
minip->pnn = mindstnode;
* node with the highest LCP2 imbalance, and then determines the best
* IP/destination node combination to move from the source node.
*/
-static void lcp2_failback(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+static void lcp2_failback(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
uint32_t *lcp2_imbalances,
bool *rebalance_candidates)
{
- int i, num_rebalance_candidates, numnodes;
+ int i, numnodes;
struct lcp2_imbalance_pnn * lips;
bool again;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
try_again:
-
- /* It is only worth continuing if we have suitable target
- * nodes to transfer IPs to. This check is much cheaper than
- * continuing on...
- */
- num_rebalance_candidates = 0;
- for (i=0; i<numnodes; i++) {
- if (rebalance_candidates[i]) {
- num_rebalance_candidates++;
- }
- }
- if (num_rebalance_candidates == 0) {
- return;
- }
-
/* Put the imbalances and nodes into an array, sort them and
* iterate through candidates. Usually the 1st one will be
* used, so this doesn't cost much...
*/
- lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, numnodes);
- for (i=0; i<numnodes; i++) {
+ DEBUG(DEBUG_DEBUG,("+++++++++++++++++++++++++++++++++++++++++\n"));
+ DEBUG(DEBUG_DEBUG,("Selecting most imbalanced node from:\n"));
+ lips = talloc_array(ipalloc_state, struct lcp2_imbalance_pnn, numnodes);
+ for (i = 0; i < numnodes; i++) {
lips[i].imbalance = lcp2_imbalances[i];
lips[i].pnn = i;
+ DEBUG(DEBUG_DEBUG,(" %d [%d]\n", i, lcp2_imbalances[i]));
}
qsort(lips, numnodes, sizeof(struct lcp2_imbalance_pnn),
lcp2_cmp_imbalance_pnn);
again = false;
- for (i=0; i<numnodes; i++) {
+ for (i = 0; i < numnodes; i++) {
/* This means that all nodes had 0 or 1 addresses, so
* can't be imbalanced.
*/
break;
}
- if (lcp2_failback_candidate(ctdb,
- ipflags,
+ if (lcp2_failback_candidate(ipalloc_state,
all_ips,
lips[i].pnn,
- lips[i].imbalance,
lcp2_imbalances,
rebalance_candidates)) {
again = true;
}
}
-static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+static void unassign_unsuitable_ips(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
/* verify that the assigned nodes can serve that public ip
and set it to -1 if not
*/
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
+ for (t = all_ips; t != NULL; t = t->next) {
+ if (t->pnn == -1) {
continue;
}
- if (!can_node_host_ip(ctdb, tmp_ip->pnn,
- ipflags[tmp_ip->pnn], tmp_ip) != 0) {
+ if (!can_node_host_ip(ipalloc_state, t->pnn, t) != 0) {
/* this node can not serve this ip. */
DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
- ctdb_addr_to_str(&(tmp_ip->addr)),
- tmp_ip->pnn));
- tmp_ip->pnn = -1;
+ ctdb_addr_to_str(&(t->addr)),
+ t->pnn));
+ t->pnn = -1;
}
}
}
-static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+static bool ip_alloc_deterministic_ips(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
int i, numnodes;
- numnodes = talloc_array_length(ipflags);
+ numnodes = ipalloc_state->num;
DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
/* Allocate IPs to nodes in a modulo fashion so that IPs will
* available/unavailable nodes.
*/
- for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
- tmp_ip->pnn = i % numnodes;
+ for (i = 0, t = all_ips; t!= NULL; t = t->next, i++) {
+ t->pnn = i % numnodes;
}
/* IP failback doesn't make sense with deterministic
* IPs, since the modulo step above implicitly fails
* back IPs to their "home" node.
*/
- if (1 == ctdb->tunable.no_ip_failback) {
+ if (1 == ipalloc_state->no_ip_failback) {
DEBUG(DEBUG_WARNING, ("WARNING: 'NoIPFailback' set but ignored - incompatible with 'DeterministicIPs\n"));
}
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
+ unassign_unsuitable_ips(ipalloc_state, all_ips);
- basic_allocate_unassigned(ctdb, ipflags, all_ips);
+ basic_allocate_unassigned(ipalloc_state, all_ips);
/* No failback here! */
+
+ return true;
}
-static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips)
+static bool ip_alloc_nondeterministic_ips(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips)
{
/* This should be pushed down into basic_failback. */
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *t;
int num_ips = 0;
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ for (t = all_ips; t != NULL; t = t->next) {
num_ips++;
}
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
+ unassign_unsuitable_ips(ipalloc_state, all_ips);
- basic_allocate_unassigned(ctdb, ipflags, all_ips);
+ basic_allocate_unassigned(ipalloc_state, all_ips);
/* If we don't want IPs to fail back then don't rebalance IPs. */
- if (1 == ctdb->tunable.no_ip_failback) {
- return;
+ if (1 == ipalloc_state->no_ip_failback) {
+ return true;
}
/* Now, try to make sure the ip adresses are evenly distributed
across the nodes.
*/
- basic_failback(ctdb, ipflags, all_ips, num_ips);
+ basic_failback(ipalloc_state, all_ips, num_ips);
+
+ return true;
}
-static void ip_alloc_lcp2(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list *all_ips,
+static bool ip_alloc_lcp2(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
uint32_t *force_rebalance_nodes)
{
uint32_t *lcp2_imbalances;
bool *rebalance_candidates;
+ int numnodes, num_rebalance_candidates, i;
+ bool ret = true;
- TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
-
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
+ unassign_unsuitable_ips(ipalloc_state, all_ips);
- lcp2_init(tmp_ctx, ipflags, all_ips,force_rebalance_nodes,
- &lcp2_imbalances, &rebalance_candidates);
+ if (!lcp2_init(ipalloc_state, all_ips,force_rebalance_nodes,
+ &lcp2_imbalances, &rebalance_candidates)) {
+ ret = false;
+ goto finished;
+ }
- lcp2_allocate_unassigned(ctdb, ipflags, all_ips, lcp2_imbalances);
+ lcp2_allocate_unassigned(ipalloc_state, all_ips, lcp2_imbalances);
/* If we don't want IPs to fail back then don't rebalance IPs. */
- if (1 == ctdb->tunable.no_ip_failback) {
+ if (1 == ipalloc_state->no_ip_failback) {
+ goto finished;
+ }
+
+ /* It is only worth continuing if we have suitable target
+ * nodes to transfer IPs to. This check is much cheaper than
+ * continuing on...
+ */
+ numnodes = ipalloc_state->num;
+ num_rebalance_candidates = 0;
+ for (i=0; i<numnodes; i++) {
+ if (rebalance_candidates[i]) {
+ num_rebalance_candidates++;
+ }
+ }
+ if (num_rebalance_candidates == 0) {
goto finished;
}
/* Now, try to make sure the ip adresses are evenly distributed
across the nodes.
*/
- lcp2_failback(ctdb, ipflags, all_ips,
+ lcp2_failback(ipalloc_state, all_ips,
lcp2_imbalances, rebalance_candidates);
finished:
- talloc_free(tmp_ctx);
+ return ret;
}
-static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
+static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
{
- int i, num_healthy;
+ int i;
- /* Count how many completely healthy nodes we have */
- num_healthy = 0;
for (i=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
- num_healthy++;
+ /* Found one completely healthy node */
+ return false;
}
}
- return num_healthy == 0;
+ return true;
}
/* The calculation part of the IP allocation algorithm. */
-static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct ctdb_public_ip_list **all_ips_p,
+static bool ctdb_takeover_run_core(struct ipalloc_state *ipalloc_state,
+ struct public_ip_list *all_ips,
uint32_t *force_rebalance_nodes)
{
- /* since nodes only know about those public addresses that
- can be served by that particular node, no single node has
- a full list of all public addresses that exist in the cluster.
- Walk over all node structures and create a merged list of
- all public addresses that exist in the cluster.
-
- keep the tree of ips around as ctdb->ip_tree
- */
- *all_ips_p = create_merged_ip_list(ctdb);
+ bool ret;
- if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
- ip_alloc_lcp2(ctdb, ipflags, *all_ips_p, force_rebalance_nodes);
- } else if (1 == ctdb->tunable.deterministic_public_ips) {
- ip_alloc_deterministic_ips(ctdb, ipflags, *all_ips_p);
- } else {
- ip_alloc_nondeterministic_ips(ctdb, ipflags, *all_ips_p);
+ switch (ipalloc_state->algorithm) {
+ case IPALLOC_LCP2:
+ ret = ip_alloc_lcp2(ipalloc_state, all_ips,
+ force_rebalance_nodes);
+ break;
+ case IPALLOC_DETERMINISTIC:
+ ret = ip_alloc_deterministic_ips(ipalloc_state, all_ips);
+ break;
+ case IPALLOC_NONDETERMINISTIC:
+ ret = ip_alloc_nondeterministic_ips(ipalloc_state, all_ips);
+ break;
}
/* at this point ->pnn is the node which will own each IP
or -1 if there is no node that can cover this ip
*/
- return;
+ return ret;
}
struct get_tunable_callback_data {
static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap,
+ struct ctdb_node_map_old *nodemap,
const char *tunable,
uint32_t default_value)
{
return tvals;
}
-struct get_runstate_callback_data {
- enum ctdb_runstate *out;
- bool fatal;
-};
-
-static void get_runstate_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback_data)
+/* Set internal flags for IP allocation:
+ * Clear ip flags
+ * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
+ * Set NOIPHOST ip flag for each INACTIVE node
+ * if all nodes are disabled:
+ * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
+ * else
+ * Set NOIPHOST ip flags for disabled nodes
+ */
+static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap,
+ uint32_t *tval_noiptakeover,
+ uint32_t *tval_noiphostonalldisabled)
{
- struct get_runstate_callback_data *cd =
- (struct get_runstate_callback_data *)callback_data;
- int size;
+ int i;
- if (res != 0) {
- /* Already handled in fail callback */
- return;
+ for (i=0;i<nodemap->num;i++) {
+ /* Can not take IPs on node with NoIPTakeover set */
+ if (tval_noiptakeover[i] != 0) {
+ ipalloc_state->noiptakeover[i] = true;
+ }
+
+ /* Can not host IPs on INACTIVE node */
+ if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+ ipalloc_state->noiphost[i] = true;
+ }
}
- if (outdata.dsize != sizeof(uint32_t)) {
- DEBUG(DEBUG_ERR,("Wrong size of returned data when getting runstate from node %d. Expected %d bytes but received %d bytes\n",
- pnn, (int)sizeof(uint32_t),
- (int)outdata.dsize));
- cd->fatal = true;
- return;
+ if (all_nodes_are_disabled(nodemap)) {
+ /* If all nodes are disabled, can not host IPs on node
+ * with NoIPHostOnAllDisabled set
+ */
+ for (i=0;i<nodemap->num;i++) {
+ if (tval_noiphostonalldisabled[i] != 0) {
+ ipalloc_state->noiphost[i] = true;
+ }
+ }
+ } else {
+ /* If some nodes are not disabled, then can not host
+ * IPs on DISABLED node
+ */
+ for (i=0;i<nodemap->num;i++) {
+ if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
+ ipalloc_state->noiphost[i] = true;
+ }
+ }
}
+}
- size = talloc_array_length(cd->out);
- if (pnn >= size) {
- DEBUG(DEBUG_ERR,("Got reply from node %d but nodemap only has %d entries\n",
- pnn, size));
- return;
- }
-
- cd->out[pnn] = (enum ctdb_runstate)*(uint32_t *)outdata.dptr;
-}
-
-static void get_runstate_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback)
+static bool set_ipflags(struct ctdb_context *ctdb,
+ struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap)
{
- struct get_runstate_callback_data *cd =
- (struct get_runstate_callback_data *)callback;
+ uint32_t *tval_noiptakeover;
+ uint32_t *tval_noiphostonalldisabled;
- switch (res) {
- case -ETIME:
- DEBUG(DEBUG_ERR,
- ("Timed out getting runstate from node %d\n", pnn));
- cd->fatal = true;
- break;
- default:
- DEBUG(DEBUG_WARNING,
- ("Error getting runstate from node %d - assuming runstates not supported\n",
- pnn));
+ tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
+ "NoIPTakeover", 0);
+ if (tval_noiptakeover == NULL) {
+ return false;
}
-}
-
-static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap,
- enum ctdb_runstate default_value)
-{
- uint32_t *nodes;
- enum ctdb_runstate *rs;
- struct get_runstate_callback_data callback_data;
- int i;
- rs = talloc_array(tmp_ctx, enum ctdb_runstate, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, rs);
- for (i=0; i<nodemap->num; i++) {
- rs[i] = default_value;
+ tval_noiphostonalldisabled =
+ get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
+ "NoIPHostOnAllDisabled", 0);
+ if (tval_noiphostonalldisabled == NULL) {
+ /* Caller frees tmp_ctx */
+ return false;
}
- callback_data.out = rs;
- callback_data.fatal = false;
+ set_ipflags_internal(ipalloc_state, nodemap,
+ tval_noiptakeover,
+ tval_noiphostonalldisabled);
- nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
- if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_RUNSTATE,
- nodes, 0, TAKEOVER_TIMEOUT(),
- true, tdb_null,
- get_runstate_callback,
- get_runstate_fail_callback,
- &callback_data) != 0) {
- if (callback_data.fatal) {
- free(rs);
- rs = NULL;
- }
- }
- talloc_free(nodes);
+ talloc_free(tval_noiptakeover);
+ talloc_free(tval_noiphostonalldisabled);
- return rs;
+ return true;
}
-/* Set internal flags for IP allocation:
- * Clear ip flags
- * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
- * Set NOIPHOST ip flag for each INACTIVE node
- * if all nodes are disabled:
- * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
- * else
- * Set NOIPHOST ip flags for disabled nodes
- */
-static struct ctdb_ipflags *
-set_ipflags_internal(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap,
- uint32_t *tval_noiptakeover,
- uint32_t *tval_noiphostonalldisabled,
- enum ctdb_runstate *runstate)
+static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx)
{
- int i;
- struct ctdb_ipflags *ipflags;
-
- /* Clear IP flags - implicit due to talloc_zero */
- ipflags = talloc_zero_array(tmp_ctx, struct ctdb_ipflags, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, ipflags);
-
- for (i=0;i<nodemap->num;i++) {
- /* Can not take IPs on node with NoIPTakeover set */
- if (tval_noiptakeover[i] != 0) {
- ipflags[i].noiptakeover = true;
- }
-
- /* Can not host IPs on node not in RUNNING state */
- if (runstate[i] != CTDB_RUNSTATE_RUNNING) {
- ipflags[i].noiphost = true;
- continue;
- }
- /* Can not host IPs on INACTIVE node */
- if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
- ipflags[i].noiphost = true;
- }
+ struct ipalloc_state *ipalloc_state =
+ talloc_zero(mem_ctx, struct ipalloc_state);
+ if (ipalloc_state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return NULL;
}
- if (all_nodes_are_disabled(nodemap)) {
- /* If all nodes are disabled, can not host IPs on node
- * with NoIPHostOnAllDisabled set
- */
- for (i=0;i<nodemap->num;i++) {
- if (tval_noiphostonalldisabled[i] != 0) {
- ipflags[i].noiphost = true;
- }
- }
- } else {
- /* If some nodes are not disabled, then can not host
- * IPs on DISABLED node
- */
- for (i=0;i<nodemap->num;i++) {
- if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
- ipflags[i].noiphost = true;
- }
- }
+ ipalloc_state->num = ctdb->num_nodes;
+ ipalloc_state->known_public_ips =
+ talloc_zero_array(ipalloc_state,
+ struct ctdb_public_ip_list_old *,
+ ipalloc_state->num);
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
+ return NULL;
}
-
- return ipflags;
-}
-
-static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map *nodemap)
-{
- uint32_t *tval_noiptakeover;
- uint32_t *tval_noiphostonalldisabled;
- struct ctdb_ipflags *ipflags;
- enum ctdb_runstate *runstate;
-
-
- tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
- "NoIPTakeover", 0);
- if (tval_noiptakeover == NULL) {
+ ipalloc_state->available_public_ips =
+ talloc_zero_array(ipalloc_state,
+ struct ctdb_public_ip_list_old *,
+ ipalloc_state->num);
+ if (ipalloc_state->available_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
return NULL;
}
-
- tval_noiphostonalldisabled =
- get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
- "NoIPHostOnAllDisabled", 0);
- if (tval_noiphostonalldisabled == NULL) {
- /* Caller frees tmp_ctx */
+ ipalloc_state->noiptakeover =
+ talloc_zero_array(ipalloc_state,
+ bool,
+ ipalloc_state->num);
+ if (ipalloc_state->noiptakeover == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
return NULL;
}
-
- /* Any nodes where CTDB_CONTROL_GET_RUNSTATE is not supported
- * will default to CTDB_RUNSTATE_RUNNING. This ensures
- * reasonable behaviour on a mixed cluster during upgrade.
- */
- runstate = get_runstate_from_nodes(ctdb, tmp_ctx, nodemap,
- CTDB_RUNSTATE_RUNNING);
- if (runstate == NULL) {
- /* Caller frees tmp_ctx */
+ ipalloc_state->noiphost =
+ talloc_zero_array(ipalloc_state,
+ bool,
+ ipalloc_state->num);
+ if (ipalloc_state->noiphost == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ talloc_free(ipalloc_state);
return NULL;
}
- ipflags = set_ipflags_internal(ctdb, tmp_ctx, nodemap,
- tval_noiptakeover,
- tval_noiphostonalldisabled,
- runstate);
+ if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
+ ipalloc_state->algorithm = IPALLOC_LCP2;
+ } else if (1 == ctdb->tunable.deterministic_public_ips) {
+ ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
+ } else {
+ ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
+ }
- talloc_free(tval_noiptakeover);
- talloc_free(tval_noiphostonalldisabled);
- talloc_free(runstate);
+ ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
- return ipflags;
+ return ipalloc_state;
}
struct iprealloc_callback_data {
int retry_count;
client_async_callback fail_callback;
void *fail_callback_data;
- struct ctdb_node_map *nodemap;
+ struct ctdb_node_map_old *nodemap;
};
static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
struct iprealloc_callback_data *cd =
(struct iprealloc_callback_data *)callback;
+ numnodes = talloc_array_length(cd->retry_nodes);
+ if (pnn > numnodes) {
+ DEBUG(DEBUG_ERR,
+ ("ipreallocated failure from node %d, "
+ "but only %d nodes in nodemap\n",
+ pnn, numnodes));
+ return;
+ }
+
+ /* Can't run the "ipreallocated" event on a INACTIVE node */
+ if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
+ DEBUG(DEBUG_WARNING,
+ ("ipreallocated failed on inactive node %d, ignoring\n",
+ pnn));
+ return;
+ }
+
switch (res) {
case -ETIME:
/* If the control timed out then that's a real error,
* so call the real fail callback
*/
- cd->fail_callback(ctdb, pnn, res, outdata,
- cd->fail_callback_data);
+ if (cd->fail_callback) {
+ cd->fail_callback(ctdb, pnn, res, outdata,
+ cd->fail_callback_data);
+ } else {
+ DEBUG(DEBUG_WARNING,
+ ("iprealloc timed out but no callback registered\n"));
+ }
break;
default:
/* If not a timeout then either the ipreallocated
* because the error codes are all folded down to -1.
* Consider retrying using EVENTSCRIPT control...
*/
-
- numnodes = talloc_array_length(cd->retry_nodes);
- if (pnn > numnodes) {
- DEBUG(DEBUG_ERR,
- ("ipreallocated failure from node %d, but only %d nodes in nodemap\n",
- pnn, numnodes));
- return;
- }
-
- /* Can't run the "ipreallocated" event on a INACTIVE node */
- if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
- DEBUG(DEBUG_ERR,
- ("ipreallocated failure from node %d, but node is inactive - not flagging a retry\n",
- pnn));
- return;
- }
-
DEBUG(DEBUG_WARNING,
("ipreallocated failure from node %d, flagging retry\n",
pnn));
bool *node_failed;
client_async_callback fail_callback;
void *fail_callback_data;
- struct ctdb_node_map *nodemap;
+ struct ctdb_node_map_old *nodemap;
};
static void takeover_run_fail_callback(struct ctdb_context *ctdb,
/*
make any IP alias changes for public addresses that are necessary
*/
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
+int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
uint32_t *force_rebalance_nodes,
client_async_callback fail_callback, void *callback_data)
{
int i, j, ret;
struct ctdb_public_ip ip;
- struct ctdb_public_ipv4 ipv4;
uint32_t *nodes;
- struct ctdb_public_ip_list *all_ips, *tmp_ip;
+ struct public_ip_list *all_ips, *tmp_ip;
TDB_DATA data;
struct timeval timeout;
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- struct ctdb_ipflags *ipflags;
+ struct ipalloc_state *ipalloc_state;
struct takeover_callback_data *takeover_data;
struct iprealloc_callback_data iprealloc_data;
bool *retry_data;
+ bool can_host_ips;
/*
* ip failover is completely disabled, just send out the
goto ipreallocated;
}
- ipflags = set_ipflags(ctdb, tmp_ctx, nodemap);
- if (ipflags == NULL) {
+ ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
+ if (ipalloc_state == NULL) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
talloc_free(tmp_ctx);
return -1;
}
- ZERO_STRUCT(ip);
+ /* Fetch known/available public IPs from each active node */
+ ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
+ if (ret != 0) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ /* Short-circuit IP allocation if no node has available IPs */
+ can_host_ips = false;
+ for (i=0; i < ipalloc_state->num; i++) {
+ if (ipalloc_state->available_public_ips[i] != NULL) {
+ can_host_ips = true;
+ }
+ }
+ if (!can_host_ips) {
+ DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
+ return 0;
+ }
+
+ /* since nodes only know about those public addresses that
+ can be served by that particular node, no single node has
+ a full list of all public addresses that exist in the cluster.
+ Walk over all node structures and create a merged list of
+ all public addresses that exist in the cluster.
+
+ keep the tree of ips around as ctdb->ip_tree
+ */
+ all_ips = create_merged_ip_list(ctdb, ipalloc_state);
/* Do the IP reassignment calculations */
- ctdb_takeover_run_core(ctdb, ipflags, &all_ips, force_rebalance_nodes);
+ ctdb_takeover_run_core(ipalloc_state,
+ all_ips, force_rebalance_nodes);
/* Now tell all nodes to release any public IPs should not
* host. This will be a NOOP on nodes that don't currently
async_data->fail_callback = takeover_run_fail_callback;
async_data->callback_data = takeover_data;
+ ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
+
+ /* Send a RELEASE_IP to all nodes that should not be hosting
+ * each IP. For each IP, all but one of these will be
+ * redundant. However, the redundant ones are used to tell
+ * nodes which node should be hosting the IP so that commands
+ * like "ctdb ip" can display a particular nodes idea of who
+ * is hosting what. */
for (i=0;i<nodemap->num;i++) {
/* don't talk to unconnected nodes, but do talk to banned nodes */
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
/* This node should be serving this
- vnn so dont tell it to release the ip
+ vnn so don't tell it to release the ip
*/
continue;
}
- if (tmp_ip->addr.sa.sa_family == AF_INET) {
- ipv4.pnn = tmp_ip->pnn;
- ipv4.sin = tmp_ip->addr.ip;
-
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ipv4);
- data.dptr = (uint8_t *)&ipv4;
- state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
- 0, CTDB_CONTROL_RELEASE_IPv4, 0,
- data, async_data,
- &timeout, NULL);
- } else {
- ip.pnn = tmp_ip->pnn;
- ip.addr = tmp_ip->addr;
-
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ip);
- data.dptr = (uint8_t *)&ip;
- state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
- 0, CTDB_CONTROL_RELEASE_IP, 0,
- data, async_data,
- &timeout, NULL);
- }
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
+ timeout = TAKEOVER_TIMEOUT();
+ data.dsize = sizeof(ip);
+ data.dptr = (uint8_t *)&ip;
+ state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
+ 0, CTDB_CONTROL_RELEASE_IP, 0,
+ data, async_data,
+ &timeout, NULL);
if (state == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
talloc_free(tmp_ctx);
return -1;
}
-
+
ctdb_client_async_add(async_data, state);
}
}
talloc_free(async_data);
- /* tell all nodes to get their own IPs */
+ /* For each IP, send a TAKOVER_IP to the node that should be
+ * hosting it. Many of these will often be redundant (since
+ * the allocation won't have changed) but they can be useful
+ * to recover from inconsistencies. */
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
continue;
}
- if (tmp_ip->addr.sa.sa_family == AF_INET) {
- ipv4.pnn = tmp_ip->pnn;
- ipv4.sin = tmp_ip->addr.ip;
-
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ipv4);
- data.dptr = (uint8_t *)&ipv4;
- state = ctdb_control_send(ctdb, tmp_ip->pnn,
- 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
- data, async_data,
- &timeout, NULL);
- } else {
- ip.pnn = tmp_ip->pnn;
- ip.addr = tmp_ip->addr;
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
- timeout = TAKEOVER_TIMEOUT();
- data.dsize = sizeof(ip);
- data.dptr = (uint8_t *)&ip;
- state = ctdb_control_send(ctdb, tmp_ip->pnn,
- 0, CTDB_CONTROL_TAKEOVER_IP, 0,
- data, async_data,
- &timeout, NULL);
- }
+ timeout = TAKEOVER_TIMEOUT();
+ data.dsize = sizeof(ip);
+ data.dptr = (uint8_t *)&ip;
+ state = ctdb_control_send(ctdb, tmp_ip->pnn,
+ 0, CTDB_CONTROL_TAKEOVER_IP, 0,
+ data, async_data, &timeout, NULL);
if (state == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
talloc_free(tmp_ctx);
return -1;
}
-
+
ctdb_client_async_add(async_data, state);
}
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
}
ipreallocated:
- /*
+ /*
* Tell all nodes to run eventscripts to process the
* "ipreallocated" event. This can do a lot of things,
* including restarting services to reconfigure them if public
* IPs have moved. Once upon a time this event only used to
- * update natwg.
+ * update natgw.
*/
retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
/*
called by a client to inform us of a TCP connection that it is managing
that should tickled with an ACK when IP takeover is done
- we handle both the old ipv4 style of packets as well as the new ipv4/6
- pdus.
*/
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
TDB_DATA indata)
{
- struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
- struct ctdb_control_tcp *old_addr = NULL;
- struct ctdb_control_tcp_addr new_addr;
- struct ctdb_control_tcp_addr *tcp_sock = NULL;
+ struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
+ struct ctdb_connection *tcp_sock = NULL;
struct ctdb_tcp_list *tcp;
- struct ctdb_tcp_connection t;
+ struct ctdb_connection t;
int ret;
TDB_DATA data;
struct ctdb_client_ip *ip;
struct ctdb_vnn *vnn;
ctdb_sock_addr addr;
- switch (indata.dsize) {
- case sizeof(struct ctdb_control_tcp):
- old_addr = (struct ctdb_control_tcp *)indata.dptr;
- ZERO_STRUCT(new_addr);
- tcp_sock = &new_addr;
- tcp_sock->src.ip = old_addr->src;
- tcp_sock->dest.ip = old_addr->dest;
- break;
- case sizeof(struct ctdb_control_tcp_addr):
- tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
- break;
- default:
- DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
- "to ctdb_control_tcp_client. size was %d but "
- "only allowed sizes are %lu and %lu\n",
- (int)indata.dsize,
- (long unsigned)sizeof(struct ctdb_control_tcp),
- (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
- return -1;
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
}
+ tcp_sock = (struct ctdb_connection *)indata.dptr;
+
addr = tcp_sock->src;
ctdb_canonicalize_ip(&addr, &tcp_sock->src);
- addr = tcp_sock->dest;
- ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
+ addr = tcp_sock->dst;
+ ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
ZERO_STRUCT(addr);
- memcpy(&addr, &tcp_sock->dest, sizeof(addr));
+ memcpy(&addr, &tcp_sock->dst, sizeof(addr));
vnn = find_public_ip_vnn(ctdb, &addr);
if (vnn == NULL) {
switch (addr.sa.sa_family) {
tcp = talloc(client, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
- tcp->connection.src_addr = tcp_sock->src;
- tcp->connection.dst_addr = tcp_sock->dest;
+ tcp->connection.src = tcp_sock->src;
+ tcp->connection.dst = tcp_sock->dst;
DLIST_ADD(client->tcp_list, tcp);
- t.src_addr = tcp_sock->src;
- t.dst_addr = tcp_sock->dest;
+ t.src = tcp_sock->src;
+ t.dst = tcp_sock->dst;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
switch (addr.sa.sa_family) {
case AF_INET:
DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
- (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
+ (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
ctdb_addr_to_str(&tcp_sock->src),
(unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
break;
case AF_INET6:
DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
- (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
+ (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
ctdb_addr_to_str(&tcp_sock->src),
(unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
break;
/*
find a tcp address on a list
*/
-static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
- struct ctdb_tcp_connection *tcp)
+static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
+ struct ctdb_connection *tcp)
{
int i;
}
for (i=0;i<array->num;i++) {
- if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
- ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
+ if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
+ ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
return &array->connections[i];
}
}
*/
int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
{
- struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
+ struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
struct ctdb_tcp_array *tcparray;
- struct ctdb_tcp_connection tcp;
+ struct ctdb_connection tcp;
struct ctdb_vnn *vnn;
- vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
+ vnn = find_public_ip_vnn(ctdb, &p->dst);
if (vnn == NULL) {
DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
- ctdb_addr_to_str(&p->dst_addr)));
+ ctdb_addr_to_str(&p->dst)));
return -1;
}
/* If this is the first tickle */
if (tcparray == NULL) {
- tcparray = talloc_size(ctdb->nodes,
- offsetof(struct ctdb_tcp_array, connections) +
- sizeof(struct ctdb_tcp_connection) * 1);
+ tcparray = talloc(vnn, struct ctdb_tcp_array);
CTDB_NO_MEMORY(ctdb, tcparray);
vnn->tcp_array = tcparray;
tcparray->num = 0;
- tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
+ tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
CTDB_NO_MEMORY(ctdb, tcparray->connections);
- tcparray->connections[tcparray->num].src_addr = p->src_addr;
- tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
+ tcparray->connections[tcparray->num].src = p->src;
+ tcparray->connections[tcparray->num].dst = p->dst;
tcparray->num++;
if (tcp_update_needed) {
/* Do we already have this tickle ?*/
- tcp.src_addr = p->src_addr;
- tcp.dst_addr = p->dst_addr;
- if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
+ tcp.src = p->src;
+ tcp.dst = p->dst;
+ if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
- ctdb_addr_to_str(&tcp.dst_addr),
- ntohs(tcp.dst_addr.ip.sin_port),
+ ctdb_addr_to_str(&tcp.dst),
+ ntohs(tcp.dst.ip.sin_port),
vnn->pnn));
return 0;
}
/* A new tickle, we must add it to the array */
tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
- struct ctdb_tcp_connection,
+ struct ctdb_connection,
tcparray->num+1);
CTDB_NO_MEMORY(ctdb, tcparray->connections);
- vnn->tcp_array = tcparray;
- tcparray->connections[tcparray->num].src_addr = p->src_addr;
- tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
+ tcparray->connections[tcparray->num].src = p->src;
+ tcparray->connections[tcparray->num].dst = p->dst;
tcparray->num++;
-
+
DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
- ctdb_addr_to_str(&tcp.dst_addr),
- ntohs(tcp.dst_addr.ip.sin_port),
+ ctdb_addr_to_str(&tcp.dst),
+ ntohs(tcp.dst.ip.sin_port),
vnn->pnn));
if (tcp_update_needed) {
}
-/*
- called by a daemon to inform us of a TCP connection that one of its
- clients managing that should tickled with an ACK when IP takeover is
- done
- */
-static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
+static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
{
- struct ctdb_tcp_connection *tcpp;
- struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
+ struct ctdb_connection *tcpp;
if (vnn == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
- ctdb_addr_to_str(&conn->dst_addr)));
return;
}
/* if the array is empty we cant remove it
- and we dont need to do anything
+ and we don't need to do anything
*/
if (vnn->tcp_array == NULL) {
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
- ctdb_addr_to_str(&conn->dst_addr),
- ntohs(conn->dst_addr.ip.sin_port)));
+ ctdb_addr_to_str(&conn->dst),
+ ntohs(conn->dst.ip.sin_port)));
return;
}
/* See if we know this connection
- if we dont know this connection then we dont need to do anything
+ if we don't know this connection then we dont need to do anything
*/
tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
if (tcpp == NULL) {
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
- ctdb_addr_to_str(&conn->dst_addr),
- ntohs(conn->dst_addr.ip.sin_port)));
+ ctdb_addr_to_str(&conn->dst),
+ ntohs(conn->dst.ip.sin_port)));
return;
}
vnn->tcp_update_needed = true;
DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
- ctdb_addr_to_str(&conn->src_addr),
- ntohs(conn->src_addr.ip.sin_port)));
+ ctdb_addr_to_str(&conn->src),
+ ntohs(conn->src.ip.sin_port)));
}
*/
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
+ struct ctdb_vnn *vnn;
+ struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
- ctdb_remove_tcp_connection(ctdb, conn);
+ /* If we don't have public IPs, tickles are useless */
+ if (ctdb->vnn == NULL) {
+ return 0;
+ }
+
+ vnn = find_public_ip_vnn(ctdb, &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ return 0;
+ }
+
+ ctdb_remove_connection(vnn, conn);
return 0;
}
/*
- called when a daemon restarts - send all tickes for all public addresses
- we are serving immediately to the new node.
+ Called when another daemon starts - causes all tickles for all
+ public addresses we are serving to be sent to the new node on the
+ next check. This actually causes the next scheduled call to
+ tdb_update_tcp_tickles() to update all nodes. This is simple and
+ doesn't require careful error handling.
*/
-int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
+int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
{
-/*XXX here we should send all tickes we are serving to the new node */
+ struct ctdb_vnn *vnn;
+
+ DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
+ (unsigned long) pnn));
+
+ for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
+ vnn->tcp_update_needed = true;
+ }
+
return 0;
}
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
{
while (client->tcp_list) {
+ struct ctdb_vnn *vnn;
struct ctdb_tcp_list *tcp = client->tcp_list;
+ struct ctdb_connection *conn = &tcp->connection;
+
DLIST_REMOVE(client->tcp_list, tcp);
- ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
+
+ vnn = find_public_ip_vnn(client->ctdb,
+ &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ continue;
+ }
+
+ /* If the IP address is hosted on this node then
+ * remove the connection. */
+ if (vnn->pnn == client->ctdb->pnn) {
+ ctdb_remove_connection(vnn, conn);
+ }
+
+ /* Otherwise this function has been called because the
+ * server IP address has been released to another node
+ * and the client has exited. This means that we
+ * should not delete the connection information. The
+ * takeover node processes connections too. */
}
}
-/*
- release all IPs on shutdown
- */
void ctdb_release_all_ips(struct ctdb_context *ctdb)
{
struct ctdb_vnn *vnn;
int count = 0;
+ if (ctdb->tunable.disable_ip_failover == 1) {
+ return;
+ }
+
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (!ctdb_sys_have_ip(&vnn->public_address)) {
ctdb_vnn_unassign_iface(ctdb, vnn);
continue;
}
+ /* Don't allow multiple releases at once. Some code,
+ * particularly ctdb_tickle_sentenced_connections() is
+ * not re-entrant */
+ if (vnn->update_in_flight) {
+ DEBUG(DEBUG_WARNING,
+ (__location__
+ " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
+ ctdb_addr_to_str(&vnn->public_address),
+ vnn->public_netmask_bits,
+ ctdb_vnn_iface_string(vnn)));
+ continue;
+ }
+ vnn->update_in_flight = true;
+
DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
ctdb_addr_to_str(&vnn->public_address),
vnn->public_netmask_bits,
vnn->public_netmask_bits);
release_kill_clients(ctdb, &vnn->public_address);
ctdb_vnn_unassign_iface(ctdb, vnn);
+ vnn->update_in_flight = false;
count++;
}
get list of public IPs
*/
int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
- struct ctdb_req_control *c, TDB_DATA *outdata)
+ struct ctdb_req_control_old *c, TDB_DATA *outdata)
{
int i, num, len;
- struct ctdb_all_public_ips *ips;
+ struct ctdb_public_ip_list_old *ips;
struct ctdb_vnn *vnn;
bool only_available = false;
num++;
}
- len = offsetof(struct ctdb_all_public_ips, ips) +
+ len = offsetof(struct ctdb_public_ip_list_old, ips) +
num*sizeof(struct ctdb_public_ip);
ips = talloc_zero_size(outdata, len);
CTDB_NO_MEMORY(ctdb, ips);
i++;
}
ips->num = i;
- len = offsetof(struct ctdb_all_public_ips, ips) +
+ len = offsetof(struct ctdb_public_ip_list_old, ips) +
i*sizeof(struct ctdb_public_ip);
outdata->dsize = len;
}
-/*
- get list of public IPs, old ipv4 style. only returns ipv4 addresses
- */
-int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
- struct ctdb_req_control *c, TDB_DATA *outdata)
-{
- int i, num, len;
- struct ctdb_all_public_ipsv4 *ips;
- struct ctdb_vnn *vnn;
-
- /* count how many public ip structures we have */
- num = 0;
- for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
- if (vnn->public_address.sa.sa_family != AF_INET) {
- continue;
- }
- num++;
- }
-
- len = offsetof(struct ctdb_all_public_ipsv4, ips) +
- num*sizeof(struct ctdb_public_ipv4);
- ips = talloc_zero_size(outdata, len);
- CTDB_NO_MEMORY(ctdb, ips);
-
- outdata->dsize = len;
- outdata->dptr = (uint8_t *)ips;
-
- ips->num = num;
- i = 0;
- for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
- if (vnn->public_address.sa.sa_family != AF_INET) {
- continue;
- }
- ips->ips[i].pnn = vnn->pnn;
- ips->ips[i].sin = vnn->public_address.ip;
- i++;
- }
-
- return 0;
-}
-
int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata,
TDB_DATA *outdata)
{
int i, num, len;
ctdb_sock_addr *addr;
- struct ctdb_control_public_ip_info *info;
+ struct ctdb_public_ip_info_old *info;
struct ctdb_vnn *vnn;
addr = (ctdb_sock_addr *)indata.dptr;
num++;
}
- len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
- num*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
+ num*sizeof(struct ctdb_iface);
info = talloc_zero_size(outdata, len);
CTDB_NO_MEMORY(ctdb, info);
info->active_idx = 0xFFFFFFFF;
for (i=0; vnn->ifaces[i]; i++) {
- struct ctdb_iface *cur;
+ struct ctdb_interface *cur;
cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
if (cur == NULL) {
if (vnn->iface == cur) {
info->active_idx = i;
}
- strcpy(info->ifaces[i].name, cur->name);
+ strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1);
info->ifaces[i].link_state = cur->link_up;
info->ifaces[i].references = cur->references;
}
info->num = i;
- len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
- i*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
+ i*sizeof(struct ctdb_iface);
outdata->dsize = len;
outdata->dptr = (uint8_t *)info;
}
int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA *outdata)
{
int i, num, len;
- struct ctdb_control_get_ifaces *ifaces;
- struct ctdb_iface *cur;
+ struct ctdb_iface_list_old *ifaces;
+ struct ctdb_interface *cur;
/* count how many public ip structures we have */
num = 0;
num++;
}
- len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
- num*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_iface_list_old, ifaces) +
+ num*sizeof(struct ctdb_iface);
ifaces = talloc_zero_size(outdata, len);
CTDB_NO_MEMORY(ctdb, ifaces);
i++;
}
ifaces->num = i;
- len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
- i*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_iface_list_old, ifaces) +
+ i*sizeof(struct ctdb_iface);
outdata->dsize = len;
outdata->dptr = (uint8_t *)ifaces;
}
int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
TDB_DATA indata)
{
- struct ctdb_control_iface_info *info;
- struct ctdb_iface *iface;
+ struct ctdb_iface *info;
+ struct ctdb_interface *iface;
bool link_up = false;
- info = (struct ctdb_control_iface_info *)indata.dptr;
+ info = (struct ctdb_iface *)indata.dptr;
if (info->name[CTDB_IFACE_SIZE] != '\0') {
int len = strnlen(info->name, CTDB_IFACE_SIZE);
struct ctdb_vnn *vnn;
struct ctdb_context *ctdb;
int capture_fd;
- struct fd_event *fde;
+ struct tevent_fd *fde;
trbt_tree_t *connections;
void *private_data;
};
/*
called when we get a read event on the raw socket
*/
-static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
+static void capture_tcp_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
uint16_t flags, void *private_data)
{
struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
ctdb_sock_addr src, dst;
uint32_t ack_seq, seq;
- if (!(flags & EVENT_FD_READ)) {
+ if (!(flags & TEVENT_FD_READ)) {
return;
}
/*
called every second until all sentenced connections have been reset
*/
-static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
+static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
+ struct tevent_timer *te,
struct timeval t, void *private_data)
{
struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
/* try tickling them again in a seconds time
*/
- event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
- ctdb_tickle_sentenced_connections, killtcp);
+ tevent_add_timer(killtcp->ctdb->ev, killtcp,
+ timeval_current_ofs(1, 0),
+ ctdb_tickle_sentenced_connections, killtcp);
}
/*
/* nothing fancy here, just unconditionally replace any existing
connection structure with the new one.
- dont even free the old one if it did exist, that one is talloc_stolen
+ don't even free the old one if it did exist, that one is talloc_stolen
by the same node in the tree anyway and will be deleted when the new data
is deleted
*/
add_killtcp_callback, con);
/*
- If we dont have a socket to listen on yet we must create it
+ If we don't have a socket to listen on yet we must create it
*/
if (killtcp->capture_fd == -1) {
const char *iface = ctdb_vnn_iface_string(vnn);
if (killtcp->fde == NULL) {
- killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
- EVENT_FD_READ,
- capture_tcp_handler, killtcp);
+ killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
+ killtcp->capture_fd,
+ TEVENT_FD_READ,
+ capture_tcp_handler, killtcp);
tevent_fd_set_auto_close(killtcp->fde);
/* We also need to set up some events to tickle all these connections
until they are all reset
*/
- event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
- ctdb_tickle_sentenced_connections, killtcp);
+ tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
+ ctdb_tickle_sentenced_connections, killtcp);
}
/* tickle him once now */
*/
int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
+ struct ctdb_connection *killtcp = (struct ctdb_connection *)indata.dptr;
- return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
+ return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
}
/*
*/
int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
+ struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
struct ctdb_tcp_array *tcparray;
struct ctdb_vnn *vnn;
/* We must at least have tickles.num or else we cant verify the size
of the received data blob
*/
- if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections)) {
- DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
+ if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
return -1;
}
/* verify that the size of data matches what we expect */
- if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections)
- + sizeof(struct ctdb_tcp_connection)
- * list->tickles.num) {
- DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
+ if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
+ + sizeof(struct ctdb_connection) * list->num) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
return -1;
- }
+ }
+
+ DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
+ ctdb_addr_to_str(&list->addr)));
vnn = find_public_ip_vnn(ctdb, &list->addr);
if (vnn == NULL) {
- DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
+ DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
ctdb_addr_to_str(&list->addr)));
return 1;
}
+ if (vnn->pnn == ctdb->pnn) {
+ DEBUG(DEBUG_INFO,
+ ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
+ ctdb_addr_to_str(&list->addr)));
+ return 0;
+ }
+
/* remove any old ticklelist we might have */
talloc_free(vnn->tcp_array);
vnn->tcp_array = NULL;
- tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
+ tcparray = talloc(vnn, struct ctdb_tcp_array);
CTDB_NO_MEMORY(ctdb, tcparray);
- tcparray->num = list->tickles.num;
+ tcparray->num = list->num;
- tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
+ tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
CTDB_NO_MEMORY(ctdb, tcparray->connections);
- memcpy(tcparray->connections, &list->tickles.connections[0],
- sizeof(struct ctdb_tcp_connection)*tcparray->num);
+ memcpy(tcparray->connections, &list->connections[0],
+ sizeof(struct ctdb_connection)*tcparray->num);
/* We now have a new fresh tickle list array for this vnn */
- vnn->tcp_array = talloc_steal(vnn, tcparray);
-
+ vnn->tcp_array = tcparray;
+
return 0;
}
int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
- struct ctdb_control_tcp_tickle_list *list;
+ struct ctdb_tickle_list_old *list;
struct ctdb_tcp_array *tcparray;
int num;
struct ctdb_vnn *vnn;
num = 0;
}
- outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections)
- + sizeof(struct ctdb_tcp_connection) * num;
+ outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
+ + sizeof(struct ctdb_connection) * num;
outdata->dptr = talloc_size(outdata, outdata->dsize);
CTDB_NO_MEMORY(ctdb, outdata->dptr);
- list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
+ list = (struct ctdb_tickle_list_old *)outdata->dptr;
list->addr = *addr;
- list->tickles.num = num;
+ list->num = num;
if (num) {
- memcpy(&list->tickles.connections[0], tcparray->connections,
- sizeof(struct ctdb_tcp_connection) * num);
+ memcpy(&list->connections[0], tcparray->connections,
+ sizeof(struct ctdb_connection) * num);
}
return 0;
/*
set the list of all tcp tickles for a public address
*/
-static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
- struct timeval timeout, uint32_t destnode,
- ctdb_sock_addr *addr,
- struct ctdb_tcp_array *tcparray)
+static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
+ ctdb_sock_addr *addr,
+ struct ctdb_tcp_array *tcparray)
{
int ret, num;
TDB_DATA data;
- struct ctdb_control_tcp_tickle_list *list;
+ struct ctdb_tickle_list_old *list;
if (tcparray) {
num = tcparray->num;
num = 0;
}
- data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections) +
- sizeof(struct ctdb_tcp_connection) * num;
+ data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
+ sizeof(struct ctdb_connection) * num;
data.dptr = talloc_size(ctdb, data.dsize);
CTDB_NO_MEMORY(ctdb, data.dptr);
- list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
+ list = (struct ctdb_tickle_list_old *)data.dptr;
list->addr = *addr;
- list->tickles.num = num;
+ list->num = num;
if (tcparray) {
- memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
+ memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
}
- ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
+ ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
CTDB_CONTROL_SET_TCP_TICKLE_LIST,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ret != 0) {
/*
perform tickle updates if required
*/
-static void ctdb_update_tcp_tickles(struct event_context *ev,
- struct timed_event *te,
- struct timeval t, void *private_data)
+static void ctdb_update_tcp_tickles(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int ret;
if (!vnn->tcp_update_needed) {
continue;
}
- ret = ctdb_ctrl_set_tcp_tickles(ctdb,
- TAKEOVER_TIMEOUT(),
- CTDB_BROADCAST_CONNECTED,
- &vnn->public_address,
- vnn->tcp_array);
+ ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
+ &vnn->public_address,
+ vnn->tcp_array);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
ctdb_addr_to_str(&vnn->public_address)));
+ } else {
+ DEBUG(DEBUG_INFO,
+ ("Sent tickle update for public address %s\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ vnn->tcp_update_needed = false;
}
}
- event_add_timed(ctdb->ev, ctdb->tickle_update_context,
- timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
- ctdb_update_tcp_tickles, ctdb);
-}
-
+ tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
+}
/*
start periodic update of tcp tickles
{
ctdb->tickle_update_context = talloc_new(ctdb);
- event_add_timed(ctdb->ev, ctdb->tickle_update_context,
- timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
- ctdb_update_tcp_tickles, ctdb);
+ tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
+ timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
+ ctdb_update_tcp_tickles, ctdb);
}
/*
send a control_gratuitous arp
*/
-static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+static void send_gratious_arp(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
{
int ret;
struct control_gratious_arp *arp = talloc_get_type(private_data,
return;
}
- event_add_timed(arp->ctdb->ev, arp,
- timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
- send_gratious_arp, arp);
+ tevent_add_timer(arp->ctdb->ev, arp,
+ timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
+ send_gratious_arp, arp);
}
*/
int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
+ struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
struct control_gratious_arp *arp;
/* verify the size of indata */
- if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
+ (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
return -1;
}
if (indata.dsize !=
- ( offsetof(struct ctdb_control_gratious_arp, iface)
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ gratious_arp->len ) ){
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
"but should be %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
return -1;
}
arp->iface = talloc_strdup(arp, gratious_arp->iface);
CTDB_NO_MEMORY(ctdb, arp->iface);
arp->count = 0;
-
- event_add_timed(arp->ctdb->ev, arp,
- timeval_zero(), send_gratious_arp, arp);
+
+ tevent_add_timer(arp->ctdb->ev, arp,
+ timeval_zero(), send_gratious_arp, arp);
return 0;
}
int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
+ struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
int ret;
/* verify the size of indata */
- if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
- DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
return -1;
}
if (indata.dsize !=
- ( offsetof(struct ctdb_control_ip_iface, iface)
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ pub->len ) ){
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
"but should be %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
return -1;
}
return 0;
}
+struct delete_ip_callback_state {
+ struct ctdb_req_control_old *c;
+};
+
/*
called when releaseip event finishes for del_public_address
*/
-static void delete_ip_callback(struct ctdb_context *ctdb, int status,
- void *private_data)
+static void delete_ip_callback(struct ctdb_context *ctdb,
+ int32_t status, TDB_DATA data,
+ const char *errormsg,
+ void *private_data)
{
+ struct delete_ip_callback_state *state =
+ talloc_get_type(private_data, struct delete_ip_callback_state);
+
+ /* If release failed then fail. */
+ ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
talloc_free(private_data);
}
-int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
+int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
+ struct ctdb_req_control_old *c,
+ TDB_DATA indata, bool *async_reply)
{
- struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
+ struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
struct ctdb_vnn *vnn;
- int ret;
/* verify the size of indata */
- if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
- DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
return -1;
}
if (indata.dsize !=
- ( offsetof(struct ctdb_control_ip_iface, iface)
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ pub->len ) ){
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
"but should be %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
return -1;
}
/* walk over all public addresses until we find a match */
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
- TALLOC_CTX *mem_ctx = talloc_new(ctdb);
-
- DLIST_REMOVE(ctdb->vnn, vnn);
- talloc_steal(mem_ctx, vnn);
- ctdb_remove_orphaned_ifaces(ctdb, vnn, mem_ctx);
- if (vnn->pnn != ctdb->pnn) {
- if (vnn->iface != NULL) {
- ctdb_vnn_unassign_iface(ctdb, vnn);
+ if (vnn->pnn == ctdb->pnn) {
+ struct delete_ip_callback_state *state;
+ struct ctdb_public_ip *ip;
+ TDB_DATA data;
+ int ret;
+
+ vnn->delete_pending = true;
+
+ state = talloc(ctdb,
+ struct delete_ip_callback_state);
+ CTDB_NO_MEMORY(ctdb, state);
+ state->c = c;
+
+ ip = talloc(state, struct ctdb_public_ip);
+ if (ip == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Out of memory\n"));
+ talloc_free(state);
+ return -1;
+ }
+ ip->pnn = -1;
+ ip->addr = pub->addr;
+
+ data.dsize = sizeof(struct ctdb_public_ip);
+ data.dptr = (unsigned char *)ip;
+
+ ret = ctdb_daemon_send_control(ctdb,
+ ctdb_get_pnn(ctdb),
+ 0,
+ CTDB_CONTROL_RELEASE_IP,
+ 0, 0,
+ data,
+ delete_ip_callback,
+ state);
+ if (ret == -1) {
+ DEBUG(DEBUG_ERR,
+ (__location__ "Unable to send "
+ "CTDB_CONTROL_RELEASE_IP\n"));
+ talloc_free(state);
+ return -1;
}
- talloc_free(mem_ctx);
- return 0;
- }
- vnn->pnn = -1;
- ret = ctdb_event_script_callback(ctdb,
- mem_ctx, delete_ip_callback, mem_ctx,
- false,
- CTDB_EVENT_RELEASE_IP,
- "%s %s %u",
- ctdb_vnn_iface_string(vnn),
- ctdb_addr_to_str(&vnn->public_address),
- vnn->public_netmask_bits);
- if (vnn->iface != NULL) {
- ctdb_vnn_unassign_iface(ctdb, vnn);
- }
- if (ret != 0) {
- return -1;
+ state->c = talloc_steal(state, c);
+ *async_reply = true;
+ } else {
+ /* This IP is not hosted on the
+ * current node so just delete it
+ * now. */
+ do_delete_ip(ctdb, vnn);
}
+
return 0;
}
}
+ DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
+ ctdb_addr_to_str(&pub->addr)));
return -1;
}
struct ipreallocated_callback_state {
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
};
static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
/* A control to run the ipreallocated event */
int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
- struct ctdb_req_control *c,
+ struct ctdb_req_control_old *c,
bool *async_reply)
{
int ret;
ret = ctdb_event_script_callback(ctdb, state,
ctdb_ipreallocated_callback, state,
- false, CTDB_EVENT_IPREALLOCATED,
+ CTDB_EVENT_IPREALLOCATED,
"%s", "");
if (ret != 0) {
node has the expected ip allocation.
This is verified against ctdb->ip_tree
*/
-int verify_remote_ip_allocation(struct ctdb_context *ctdb,
- struct ctdb_all_public_ips *ips,
- uint32_t pnn)
+static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+ struct ctdb_public_ip_list_old *ips,
+ uint32_t pnn)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
int i;
if (ctdb->ip_tree == NULL) {
- /* dont know the expected allocation yet, assume remote node
+ /* don't know the expected allocation yet, assume remote node
is correct. */
return 0;
}
int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
{
- struct ctdb_public_ip_list *tmp_ip;
+ struct public_ip_list *tmp_ip;
+
+ /* IP tree is never built if DisableIPFailover is set */
+ if (ctdb->tunable.disable_ip_failover != 0) {
+ return 0;
+ }
if (ctdb->ip_tree == NULL) {
DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
return 0;
}
+void clear_ip_assignment_tree(struct ctdb_context *ctdb)
+{
+ TALLOC_FREE(ctdb->ip_tree);
+}
struct ctdb_reloadips_handle {
struct ctdb_context *ctdb;
- struct ctdb_req_control *c;
+ struct ctdb_req_control_old *c;
int status;
int fd[2];
pid_t child;
- struct fd_event *fde;
+ struct tevent_fd *fde;
};
static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
return 0;
}
-static void ctdb_reloadips_timeout_event(struct event_context *ev,
- struct timed_event *te,
- struct timeval t, void *private_data)
+static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *private_data)
{
struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
talloc_free(h);
-}
+}
-static void ctdb_reloadips_child_handler(struct event_context *ev, struct fd_event *fde,
- uint16_t flags, void *private_data)
+static void ctdb_reloadips_child_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
{
struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
char res;
int ret;
- ret = read(h->fd[0], &res, 1);
+ ret = sys_read(h->fd[0], &res, 1);
if (ret < 1 || res != 0) {
DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
res = 1;
static int ctdb_reloadips_child(struct ctdb_context *ctdb)
{
TALLOC_CTX *mem_ctx = talloc_new(NULL);
- struct ctdb_all_public_ips *ips;
+ struct ctdb_public_ip_list_old *ips;
struct ctdb_vnn *vnn;
+ struct client_async_data *async_data;
+ struct timeval timeout;
+ TDB_DATA data;
+ struct ctdb_client_control_state *state;
+ bool first_add;
int i, ret;
CTDB_NO_MEMORY(ctdb, mem_ctx);
- /* read the ip allocation from the local node */
- ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
+ /* Read IPs from local node */
+ ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
+ CTDB_CURRENT_NODE, mem_ctx, &ips);
if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get public ips from local node\n"));
+ DEBUG(DEBUG_ERR,
+ ("Unable to fetch public IPs from local node\n"));
talloc_free(mem_ctx);
return -1;
}
- /* re-read the public ips file */
+ /* Read IPs file - this is safe since this is a child process */
ctdb->vnn = NULL;
if (ctdb_set_public_addresses(ctdb, false) != 0) {
DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
return -1;
}
+ async_data = talloc_zero(mem_ctx, struct client_async_data);
+ CTDB_NO_MEMORY(ctdb, async_data);
- /* check the previous list of ips and scan for ips that have been
- dropped.
- */
+ /* Compare IPs between node and file for IPs to be deleted */
for (i = 0; i < ips->num; i++) {
+ /* */
for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
- if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
+ if (ctdb_same_ip(&vnn->public_address,
+ &ips->ips[i].addr)) {
+ /* IP is still in file */
break;
}
}
- /* we need to delete this ip, no longer available on this node */
if (vnn == NULL) {
- struct ctdb_control_ip_iface pub;
+ /* Delete IP ips->ips[i] */
+ struct ctdb_addr_info_old *pub;
- DEBUG(DEBUG_NOTICE,("RELOADIPS: IP%s is no longer available on this node. Deleting it.\n", ctdb_addr_to_str(&ips->ips[i].addr)));
- pub.addr = ips->ips[i].addr;
- pub.mask = 0;
- pub.len = 0;
+ DEBUG(DEBUG_NOTICE,
+ ("IP %s no longer configured, deleting it\n",
+ ctdb_addr_to_str(&ips->ips[i].addr)));
- ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
- if (ret != 0) {
- talloc_free(mem_ctx);
- DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
- return -1;
+ pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
+ CTDB_NO_MEMORY(ctdb, pub);
+
+ pub->addr = ips->ips[i].addr;
+ pub->mask = 0;
+ pub->len = 0;
+
+ timeout = TAKEOVER_TIMEOUT();
+
+ data.dsize = offsetof(struct ctdb_addr_info_old,
+ iface) + pub->len;
+ data.dptr = (uint8_t *)pub;
+
+ state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
+ CTDB_CONTROL_DEL_PUBLIC_IP,
+ 0, data, async_data,
+ &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
+ goto failed;
}
+
+ ctdb_client_async_add(async_data, state);
}
}
-
- /* loop over all new ones and check the ones we need to add */
+ /* Compare IPs between node and file for IPs to be added */
+ first_add = true;
for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
for (i = 0; i < ips->num; i++) {
- if (ctdb_same_ip(&vnn->public_address, &ips->ips[i].addr)) {
+ if (ctdb_same_ip(&vnn->public_address,
+ &ips->ips[i].addr)) {
+ /* IP already on node */
break;
}
}
if (i == ips->num) {
- struct ctdb_control_ip_iface *pub;
+ /* Add IP ips->ips[i] */
+ struct ctdb_addr_info_old *pub;
const char *ifaces = NULL;
+ uint32_t len;
int iface = 0;
- DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
+ DEBUG(DEBUG_NOTICE,
+ ("New IP %s configured, adding it\n",
+ ctdb_addr_to_str(&vnn->public_address)));
+ if (first_add) {
+ uint32_t pnn = ctdb_get_pnn(ctdb);
+
+ data.dsize = sizeof(pnn);
+ data.dptr = (uint8_t *)&pnn;
+
+ ret = ctdb_client_send_message(
+ ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_REBALANCE_NODE,
+ data);
+ if (ret != 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
+ }
- pub = talloc_zero(mem_ctx, struct ctdb_control_ip_iface);
- pub->addr = vnn->public_address;
- pub->mask = vnn->public_netmask_bits;
+ first_add = false;
+ }
ifaces = vnn->ifaces[0];
iface = 1;
while (vnn->ifaces[iface] != NULL) {
- ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
+ ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
+ vnn->ifaces[iface]);
iface++;
}
- pub->len = strlen(ifaces)+1;
- pub = talloc_realloc_size(mem_ctx, pub,
- offsetof(struct ctdb_control_ip_iface, iface) + pub->len);
- if (pub == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " Failed to allocate memory\n"));
- talloc_free(mem_ctx);
- return -1;
- }
+
+ len = strlen(ifaces) + 1;
+ pub = talloc_zero_size(mem_ctx,
+ offsetof(struct ctdb_addr_info_old, iface) + len);
+ CTDB_NO_MEMORY(ctdb, pub);
+
+ pub->addr = vnn->public_address;
+ pub->mask = vnn->public_netmask_bits;
+ pub->len = len;
memcpy(&pub->iface[0], ifaces, pub->len);
- ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(),
- CTDB_CURRENT_NODE, pub);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
- talloc_free(mem_ctx);
- return -1;
+ timeout = TAKEOVER_TIMEOUT();
+
+ data.dsize = offsetof(struct ctdb_addr_info_old,
+ iface) + pub->len;
+ data.dptr = (uint8_t *)pub;
+
+ state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
+ CTDB_CONTROL_ADD_PUBLIC_IP,
+ 0, data, async_data,
+ &timeout, NULL);
+ if (state == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
+ goto failed;
}
+
+ ctdb_client_async_add(async_data, state);
}
}
+ if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
+ goto failed;
+ }
+
talloc_free(mem_ctx);
return 0;
+
+failed:
+ talloc_free(mem_ctx);
+ return -1;
}
/* This control is sent to force the node to re-read the public addresses file
and drop any addresses we should nnot longer host, and add new addresses
that we are now able to host
*/
-int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
+int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
{
struct ctdb_reloadips_handle *h;
pid_t parent = getpid();
close(h->fd[0]);
debug_extra = talloc_asprintf(NULL, "reloadips:");
- ctdb_set_process_name("ctdb_reloadips");
+ prctl_set_comment("ctdb_reloadips");
if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
res = -1;
}
}
- write(h->fd[1], &res, 1);
+ sys_write(h->fd[1], &res, 1);
/* make sure we die when our parent dies */
while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
talloc_set_destructor(h, ctdb_reloadips_destructor);
- h->fde = event_add_fd(ctdb->ev, h, h->fd[0],
- EVENT_FD_READ, ctdb_reloadips_child_handler,
- (void *)h);
+ h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
+ ctdb_reloadips_child_handler, (void *)h);
tevent_fd_set_auto_close(h->fde);
- event_add_timed(ctdb->ev, h,
- timeval_current_ofs(120, 0),
- ctdb_reloadips_timeout_event, h);
+ tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
+ ctdb_reloadips_timeout_event, h);
/* we reply later */
*async_reply = true;