ctdb-daemon: Switch to using ETIMEDOUT instead of ETIME
[samba.git] / ctdb / server / ctdb_takeover.c
index 28daf4d61cc7f7e6a634bc47648111c6a17ad68b..a7638cfd677bebd93d010ba5930d28a3d659190b 100644 (file)
 #include "lib/util/dlinklist.h"
 #include "lib/util/debug.h"
 #include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
 #include "lib/util/util_process.h"
 
+#include "protocol/protocol_util.h"
+
 #include "ctdb_private.h"
 #include "ctdb_client.h"
 
 #include "common/rb_tree.h"
 #include "common/reqid.h"
 #include "common/system.h"
+#include "common/system_socket.h"
 #include "common/common.h"
 #include "common/logging.h"
 
@@ -103,6 +107,9 @@ static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
        return iface_string(vnn->iface);
 }
 
+static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
+                                             const char *iface);
+
 static struct ctdb_interface *
 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
 {
@@ -114,10 +121,9 @@ ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
        }
 
        /* Verify that we don't have an entry for this ip yet */
-       for (i=ctdb->ifaces;i;i=i->next) {
-               if (strcmp(i->name, iface) == 0) {
-                       return i;
-               }
+       i = ctdb_find_iface(ctdb, iface);
+       if (i != NULL) {
+               return i;
        }
 
        /* create a new structure for this interface */
@@ -140,13 +146,13 @@ ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
        return i;
 }
 
-static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
-                                       const char *name)
+static bool vnn_has_interface(struct ctdb_vnn *vnn,
+                             const struct ctdb_interface *iface)
 {
        struct vnn_interface *i;
 
        for (i = vnn->ifaces; i != NULL; i = i->next) {
-               if (strcmp(name, i->iface->name) == 0) {
+               if (iface == i->iface) {
                        return true;
                }
        }
@@ -177,14 +183,14 @@ static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
                next = i->next;
 
                /* Only consider interfaces named in the given VNN. */
-               if (!vnn_has_interface_with_name(vnn, i->name)) {
+               if (!vnn_has_interface(vnn, i)) {
                        continue;
                }
 
                /* Search for a vnn with this interface. */
                found = false;
                for (tv=ctdb->vnn; tv; tv=tv->next) {
-                       if (vnn_has_interface_with_name(tv, i->name)) {
+                       if (vnn_has_interface(tv, i)) {
                                found = true;
                                break;
                        }
@@ -295,6 +301,7 @@ static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
                               struct ctdb_vnn *vnn)
 {
+       uint32_t flags;
        struct vnn_interface *i;
 
        /* Nodes that are not RUNNING can not host IPs */
@@ -302,6 +309,11 @@ static bool ctdb_vnn_available(struct ctdb_context *ctdb,
                return false;
        }
 
+       flags = ctdb->nodes[ctdb->pnn]->flags;
+       if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
+               return false;
+       }
+
        if (vnn->delete_pending) {
                return false;
        }
@@ -454,7 +466,7 @@ static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
        TDB_DATA data;
 
        if (status != 0) {
-               if (status == -ETIME) {
+               if (status == -ETIMEDOUT) {
                        ctdb_ban_self(ctdb);
                }
                DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
@@ -573,10 +585,9 @@ static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
 {
        struct ctdb_do_updateip_state *state =
                talloc_get_type(private_data, struct ctdb_do_updateip_state);
-       int32_t ret;
 
        if (status != 0) {
-               if (status == -ETIME) {
+               if (status == -ETIMEDOUT) {
                        ctdb_ban_self(ctdb);
                }
                DEBUG(DEBUG_ERR,
@@ -598,17 +609,6 @@ static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
                return;
        }
 
-       if (ctdb->do_checkpublicip) {
-
-       ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
-       if (ret != 0) {
-               ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
-               talloc_free(state);
-               return;
-       }
-
-       }
-
        /* the control succeeded */
        ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
        talloc_free(state);
@@ -653,9 +653,7 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
                return -1;
        }
 
-       new_name = ctdb_vnn_iface_string(vnn);
-       if (old_name != NULL && new_name != NULL &&
-           strcmp(old_name, new_name) == 0) {
+       if (old == vnn->iface) {
                /* A benign update from one interface onto itself.
                 * no need to run the eventscripts in this case, just return
                 * success.
@@ -674,6 +672,7 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
        vnn->update_in_flight = true;
        talloc_set_destructor(state, ctdb_updateip_destructor);
 
+       new_name = ctdb_vnn_iface_string(vnn);
        DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
                            "interface %s to %s\n",
                            ctdb_addr_to_str(&vnn->public_address),
@@ -887,7 +886,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
        struct release_ip_callback_state *state =
                talloc_get_type(private_data, struct release_ip_callback_state);
 
-       if (status == -ETIME) {
+       if (status == -ETIMEDOUT) {
                ctdb_ban_self(ctdb);
        }
 
@@ -1116,10 +1115,34 @@ static int ctdb_add_public_address(struct ctdb_context *ctdb,
 */
 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
 {
+       bool ok;
        char **lines;
        int nlines;
        int i;
 
+       /* If no public addresses file given then try the default */
+       if (ctdb->public_addresses_file == NULL) {
+               const char *b = getenv("CTDB_BASE");
+               if (b == NULL) {
+                       DBG_ERR("CTDB_BASE not set\n");
+                       return -1;
+               }
+               ctdb->public_addresses_file = talloc_asprintf(
+                                       ctdb, "%s/%s", b, "public_addresses");
+               if (ctdb->public_addresses_file == NULL) {
+                       DBG_ERR("Out of memory\n");
+                       return -1;
+               }
+       }
+
+       /* If the file doesn't exist then warn and do nothing */
+       ok = file_exist(ctdb->public_addresses_file);
+       if (!ok) {
+               D_WARNING("Not loading public addresses, no file %s\n",
+                         ctdb->public_addresses_file);
+               return 0;
+       }
+
        lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
        if (lines == NULL) {
                ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
@@ -1135,6 +1158,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
                const char *addrstr;
                const char *ifaces;
                char *tok, *line;
+               int ret;
 
                line = lines[i];
                while ((*line == ' ') || (*line == '\t')) {
@@ -1148,618 +1172,46 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
                }
                tok = strtok(line, " \t");
                addrstr = tok;
+
                tok = strtok(NULL, " \t");
                if (tok == NULL) {
-                       if (NULL == ctdb->default_public_interface) {
-                               DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
-                                        i+1));
-                               talloc_free(lines);
-                               return -1;
-                       }
-                       ifaces = ctdb->default_public_interface;
-               } else {
-                       ifaces = tok;
-               }
-
-               if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
-                       DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
+                       D_ERR("No interface specified at line %u "
+                             "of public addresses file\n", i+1);
                        talloc_free(lines);
                        return -1;
                }
-               if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
-                       DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
+               ifaces = tok;
+
+               if (addrstr == NULL) {
+                       D_ERR("Badly formed line %u in public address list\n",
+                             i+1);
                        talloc_free(lines);
                        return -1;
                }
-       }
-
-
-       talloc_free(lines);
-       return 0;
-}
-
-static struct ctdb_public_ip_list *
-ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
-                            TALLOC_CTX *mem_ctx,
-                            struct ctdb_node_map_old *nodemap,
-                            uint32_t public_ip_flags)
-{
-       int j, ret;
-       struct ctdb_public_ip_list_old *ip_list;
-       struct ctdb_public_ip_list *public_ips;
-
-       public_ips = talloc_zero_array(mem_ctx,
-                                      struct ctdb_public_ip_list,
-                                      nodemap->num);
-       if (public_ips == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return NULL;
-       }
-
-       for (j = 0; j < nodemap->num; j++) {
-               if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
-                       continue;
-               }
-
-               /* Retrieve the list of public IPs from the
-                * node. Flags says whether it is known or
-                * available. */
-               ret = ctdb_ctrl_get_public_ips_flags(
-                       ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
-                       public_ip_flags, &ip_list);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR,
-                             ("Failed to read public IPs from node: %u\n", j));
-                       talloc_free(public_ips);
-                       return NULL;
-               }
-               public_ips[j].num = ip_list->num;
-               if (ip_list->num == 0) {
-                       talloc_free(ip_list);
-                       continue;
-               }
-               public_ips[j].ip = talloc_zero_array(public_ips,
-                                                    struct ctdb_public_ip,
-                                                    ip_list->num);
-               if (public_ips[j].ip == NULL) {
-                       DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-                       talloc_free(public_ips);
-                       return NULL;
-               }
-               memcpy(public_ips[j].ip, &ip_list->ips[0],
-                      sizeof(struct ctdb_public_ip) * ip_list->num);
-               talloc_free(ip_list);
-       }
-
-       return public_ips;
-}
-
-struct get_tunable_callback_data {
-       const char *tunable;
-       uint32_t *out;
-       bool fatal;
-};
-
-static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
-                                int32_t res, TDB_DATA outdata,
-                                void *callback)
-{
-       struct get_tunable_callback_data *cd =
-               (struct get_tunable_callback_data *)callback;
-       int size;
-
-       if (res != 0) {
-               /* Already handled in fail callback */
-               return;
-       }
-
-       if (outdata.dsize != sizeof(uint32_t)) {
-               DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
-                                cd->tunable, pnn, (int)sizeof(uint32_t),
-                                (int)outdata.dsize));
-               cd->fatal = true;
-               return;
-       }
-
-       size = talloc_array_length(cd->out);
-       if (pnn >= size) {
-               DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
-                                cd->tunable, pnn, size));
-               return;
-       }
-
-               
-       cd->out[pnn] = *(uint32_t *)outdata.dptr;
-}
-
-static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
-                                      int32_t res, TDB_DATA outdata,
-                                      void *callback)
-{
-       struct get_tunable_callback_data *cd =
-               (struct get_tunable_callback_data *)callback;
-
-       switch (res) {
-       case -ETIME:
-               DEBUG(DEBUG_ERR,
-                     ("Timed out getting tunable \"%s\" from node %d\n",
-                      cd->tunable, pnn));
-               cd->fatal = true;
-               break;
-       case -EINVAL:
-       case -1:
-               DEBUG(DEBUG_WARNING,
-                     ("Tunable \"%s\" not implemented on node %d\n",
-                      cd->tunable, pnn));
-               break;
-       default:
-               DEBUG(DEBUG_ERR,
-                     ("Unexpected error getting tunable \"%s\" from node %d\n",
-                      cd->tunable, pnn));
-               cd->fatal = true;
-       }
-}
-
-static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
-                                       TALLOC_CTX *tmp_ctx,
-                                       struct ctdb_node_map_old *nodemap,
-                                       const char *tunable,
-                                       uint32_t default_value)
-{
-       TDB_DATA data;
-       struct ctdb_control_get_tunable *t;
-       uint32_t *nodes;
-       uint32_t *tvals;
-       struct get_tunable_callback_data callback_data;
-       int i;
-
-       tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
-       CTDB_NO_MEMORY_NULL(ctdb, tvals);
-       for (i=0; i<nodemap->num; i++) {
-               tvals[i] = default_value;
-       }
-               
-       callback_data.out = tvals;
-       callback_data.tunable = tunable;
-       callback_data.fatal = false;
-
-       data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
-       data.dptr  = talloc_size(tmp_ctx, data.dsize);
-       t = (struct ctdb_control_get_tunable *)data.dptr;
-       t->length = strlen(tunable)+1;
-       memcpy(t->name, tunable, t->length);
-       nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
-       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
-                                     nodes, 0, TAKEOVER_TIMEOUT(),
-                                     false, data,
-                                     get_tunable_callback,
-                                     get_tunable_fail_callback,
-                                     &callback_data) != 0) {
-               if (callback_data.fatal) {
-                       talloc_free(tvals);
-                       tvals = NULL;
-               }
-       }
-       talloc_free(nodes);
-       talloc_free(data.dptr);
-
-       return tvals;
-}
-
-static struct ctdb_node_map *
-ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
-                        const struct ctdb_node_map_old *old)
-{
-       struct ctdb_node_map *new;
-
-       new = talloc(mem_ctx, struct ctdb_node_map);
-       if (new == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return NULL;
-       }
-       new->num = old->num;
-       new->node = talloc_zero_array(new,
-                                     struct ctdb_node_and_flags, new->num);
-       memcpy(new->node, &old->nodes[0],
-              sizeof(struct ctdb_node_and_flags) * new->num);
-
-       return new;
-}
-
-
-static bool set_ipflags(struct ctdb_context *ctdb,
-                       struct ipalloc_state *ipalloc_state,
-                       struct ctdb_node_map_old *nodemap)
-{
-       uint32_t *tval_noiptakeover;
-       uint32_t *tval_noiphostonalldisabled;
-       struct ctdb_node_map *new;
-
-       tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
-                                                  "NoIPTakeover", 0);
-       if (tval_noiptakeover == NULL) {
-               return false;
-       }
-
-       tval_noiphostonalldisabled =
-               get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
-                                      "NoIPHostOnAllDisabled", 0);
-       if (tval_noiphostonalldisabled == NULL) {
-               /* Caller frees tmp_ctx */
-               return false;
-       }
-
-       new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
-       if (new == NULL) {
-               return false;
-       }
-
-       ipalloc_set_node_flags(ipalloc_state, new,
-                            tval_noiptakeover,
-                            tval_noiphostonalldisabled);
-
-       talloc_free(tval_noiptakeover);
-       talloc_free(tval_noiphostonalldisabled);
-       talloc_free(new);
-
-       return true;
-}
-
-static enum ipalloc_algorithm
-determine_algorithm(const struct ctdb_tunable_list *tunables)
-{
-       if (1 == tunables->lcp2_public_ip_assignment) {
-               return IPALLOC_LCP2;
-       } else if (1 == tunables->deterministic_public_ips) {
-               return IPALLOC_DETERMINISTIC;
-       } else {
-               return IPALLOC_NONDETERMINISTIC;
-       }
-}
-
-struct takeover_callback_data {
-       uint32_t num_nodes;
-       unsigned int *fail_count;
-};
-
-static struct takeover_callback_data *
-takeover_callback_data_init(TALLOC_CTX *mem_ctx,
-                           uint32_t num_nodes)
-{
-       static struct takeover_callback_data *takeover_data;
-
-       takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
-       if (takeover_data == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return NULL;
-       }
-
-       takeover_data->fail_count = talloc_zero_array(takeover_data,
-                                                     unsigned int, num_nodes);
-       if (takeover_data->fail_count == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               talloc_free(takeover_data);
-               return NULL;
-       }
-
-       takeover_data->num_nodes = num_nodes;
-
-       return takeover_data;
-}
-
-static void takeover_run_fail_callback(struct ctdb_context *ctdb,
-                                      uint32_t node_pnn, int32_t res,
-                                      TDB_DATA outdata, void *callback_data)
-{
-       struct takeover_callback_data *cd =
-               talloc_get_type_abort(callback_data,
-                                     struct takeover_callback_data);
-
-       if (node_pnn >= cd->num_nodes) {
-               DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
-               return;
-       }
-
-       if (cd->fail_count[node_pnn] == 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Node %u failed the takeover run\n", node_pnn));
-       }
-
-       cd->fail_count[node_pnn]++;
-}
-
-static void takeover_run_process_failures(struct ctdb_context *ctdb,
-                                         struct takeover_callback_data *tcd)
-{
-       unsigned int max_fails = 0;
-       uint32_t max_pnn = -1;
-       uint32_t i;
-
-       for (i = 0; i < tcd->num_nodes; i++) {
-               if (tcd->fail_count[i] > max_fails) {
-                       max_pnn = i;
-                       max_fails = tcd->fail_count[i];
-               }
-       }
 
-       if (max_fails > 0) {
-               int ret;
-               TDB_DATA data;
-
-               DEBUG(DEBUG_ERR,
-                     ("Sending banning credits to %u with fail count %u\n",
-                      max_pnn, max_fails));
-
-               data.dptr = (uint8_t *)&max_pnn;
-               data.dsize = sizeof(uint32_t);
-               ret = ctdb_client_send_message(ctdb,
-                                              CTDB_BROADCAST_CONNECTED,
-                                              CTDB_SRVID_BANNING,
-                                              data);
+               ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR,
-                             ("Failed to set banning credits for node %u\n",
-                              max_pnn));
-               }
-       }
-}
-
-/*
- * Recalculate the allocation of public IPs to nodes and have the
- * nodes host their allocated addresses.
- *
- * - Initialise IP allocation state.  Pass:
-     + algorithm to be used;
-     + whether IP rebalancing ("failback") should be done (this uses a
-       cluster-wide configuration variable and only the value form the
-       master node is used); and
- *   + list of nodes to force rebalance (internal structure, currently
- *     no way to fetch, only used by LCP2 for nodes that have had new
- *     IP addresses added).
- * - Set IP flags for IP allocation based on node map and tunables
- *   NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
- *   (tunable fetching done separately so values can be faked in unit
- *   testing)
- * - Retrieve known and available IP addresses (done separately so
- *   values can be faked in unit testing)
- * - Use ipalloc_set_public_ips() to set known and available IP
-     addresses for allocation
- * - If cluster can't host IP addresses then early exit
- * - Run IP allocation algorithm
- * - Send RELEASE_IP to all nodes for IPs they should not host
- * - Send TAKE_IP to all nodes for IPs they should host
- * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
- */
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
-                     uint32_t *force_rebalance_nodes)
-{
-       int i, ret;
-       struct ctdb_public_ip ip;
-       uint32_t *nodes;
-       struct public_ip_list *all_ips, *tmp_ip;
-       TDB_DATA data;
-       struct timeval timeout;
-       struct client_async_data *async_data;
-       struct ctdb_client_control_state *state;
-       TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
-       struct ipalloc_state *ipalloc_state;
-       struct ctdb_public_ip_list *known_ips, *available_ips;
-       struct takeover_callback_data *takeover_data;
-
-       /* Initialise fail callback data to be used with
-        * takeover_run_fail_callback().  A failure in any of the
-        * following steps will cause an early return, so this can be
-        * reused for each of those steps without re-initialising. */
-       takeover_data = takeover_callback_data_init(tmp_ctx,
-                                                   nodemap->num);
-       if (takeover_data == NULL) {
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       /* Default timeout for early jump to IPREALLOCATED.  See below
-        * for explanation of 3 times... */
-       timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
-
-       /*
-        * ip failover is completely disabled, just send out the 
-        * ipreallocated event.
-        */
-       if (ctdb->tunable.disable_ip_failover != 0) {
-               goto ipreallocated;
-       }
-
-       ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
-                                          determine_algorithm(&ctdb->tunable),
-                                          (ctdb->tunable.no_ip_failback != 0),
-                                          force_rebalance_nodes);
-       if (ipalloc_state == NULL) {
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
-               DEBUG(DEBUG_ERR,
-                     ("Failed to set IP flags - aborting takeover run\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       /* Fetch known/available public IPs from each active node */
-       /* Fetch lists of known public IPs from all nodes */
-       known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
-                                                nodemap, 0);
-       if (known_ips == NULL) {
-               DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-       available_ips = ctdb_fetch_remote_public_ips(
-               ctdb, ipalloc_state, nodemap,
-               CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
-       if (available_ips == NULL) {
-               DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
-               DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       if (! ipalloc_can_host_ips(ipalloc_state)) {
-               DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
-               goto ipreallocated;
-       }
-
-       /* Do the IP reassignment calculations */
-       all_ips = ipalloc(ipalloc_state);
-       if (all_ips == NULL) {
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       /* Now tell all nodes to release any public IPs should not
-        * host.  This will be a NOOP on nodes that don't currently
-        * hold the given IP.
-        */
-       async_data = talloc_zero(tmp_ctx, struct client_async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
-       async_data->fail_callback = takeover_run_fail_callback;
-       async_data->callback_data = takeover_data;
-
-       ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
-
-       /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
-        * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
-        * seconds.  However, RELEASE_IP can take longer due to TCP
-        * connection killing, so sometimes needs more time.
-        * Therefore, use a cumulative timeout of TakeoverTimeout * 3
-        * seconds across all 3 stages.  No explicit expiry checks are
-        * needed before each stage because tevent is smart enough to
-        * fire the timeouts even if they are in the past.  Initialise
-        * this here so it explicitly covers the stages we're
-        * interested in but, in particular, not the time taken by the
-        * ipalloc().
-        */
-       timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
-
-       /* Send a RELEASE_IP to all nodes that should not be hosting
-        * each IP.  For each IP, all but one of these will be
-        * redundant.  However, the redundant ones are used to tell
-        * nodes which node should be hosting the IP so that commands
-        * like "ctdb ip" can display a particular nodes idea of who
-        * is hosting what. */
-       for (i=0;i<nodemap->num;i++) {
-               /* don't talk to unconnected nodes, but do talk to banned nodes */
-               if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
-                       continue;
-               }
-
-               for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
-                       if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
-                               /* This node should be serving this
-                                  vnn so don't tell it to release the ip
-                               */
-                               continue;
-                       }
-                       ip.pnn  = tmp_ip->pnn;
-                       ip.addr = tmp_ip->addr;
-
-                       data.dsize = sizeof(ip);
-                       data.dptr  = (uint8_t *)&ip;
-                       state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
-                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
-                                                 data, async_data,
-                                                 &timeout, NULL);
-                       if (state == NULL) {
-                               DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
-                               talloc_free(tmp_ctx);
-                               return -1;
-                       }
-
-                       ctdb_client_async_add(async_data, state);
-               }
-       }
-       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
-               goto fail;
-       }
-       talloc_free(async_data);
-
-
-       /* For each IP, send a TAKOVER_IP to the node that should be
-        * hosting it.  Many of these will often be redundant (since
-        * the allocation won't have changed) but they can be useful
-        * to recover from inconsistencies. */
-       async_data = talloc_zero(tmp_ctx, struct client_async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
-       async_data->fail_callback = takeover_run_fail_callback;
-       async_data->callback_data = takeover_data;
-
-       for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
-               if (tmp_ip->pnn == -1) {
-                       /* this IP won't be taken over */
-                       continue;
+                       D_ERR("Badly formed line %u in public address list\n",
+                             i+1);
+                       talloc_free(lines);
+                       return -1;
                }
 
-               ip.pnn  = tmp_ip->pnn;
-               ip.addr = tmp_ip->addr;
-
-               data.dsize = sizeof(ip);
-               data.dptr  = (uint8_t *)&ip;
-               state = ctdb_control_send(ctdb, tmp_ip->pnn,
-                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
-                                         data, async_data, &timeout, NULL);
-               if (state == NULL) {
-                       DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
-                       talloc_free(tmp_ctx);
+               if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
+                       DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
+                       talloc_free(lines);
                        return -1;
                }
-
-               ctdb_client_async_add(async_data, state);
-       }
-       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
-               goto fail;
        }
 
-ipreallocated:
-       /*
-        * Tell all nodes to run eventscripts to process the
-        * "ipreallocated" event.  This can do a lot of things,
-        * including restarting services to reconfigure them if public
-        * IPs have moved.  Once upon a time this event only used to
-        * update natgw.
-        */
-       nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
-       ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
-                                       nodes, 0, timeout,
-                                       false, tdb_null,
-                                       NULL, takeover_run_fail_callback,
-                                       takeover_data);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
-               goto fail;
-       }
 
-       talloc_free(tmp_ctx);
-       return ret;
+       D_NOTICE("Loaded public addresses from %s\n",
+                ctdb->public_addresses_file);
 
-fail:
-       takeover_run_process_failures(ctdb, takeover_data);
-       talloc_free(tmp_ctx);
-       return -1;
+       talloc_free(lines);
+       return 0;
 }
 
-
 /*
   destroy a ctdb_client_ip structure
  */
@@ -2004,7 +1456,7 @@ static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection
           and we don't need to do anything
         */
        if (vnn->tcp_array == NULL) {
-               DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
+               DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
                        ctdb_addr_to_str(&conn->dst),
                        ntohs(conn->dst.ip.sin_port)));
                return;
@@ -2016,7 +1468,7 @@ static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection
         */
        tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
        if (tcpp == NULL) {
-               DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
+               DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
                        ctdb_addr_to_str(&conn->dst),
                        ntohs(conn->dst.ip.sin_port)));
                return;
@@ -2075,24 +1527,23 @@ int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
 }
 
 
+static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
+                                             bool force);
+
 /*
   Called when another daemon starts - causes all tickles for all
   public addresses we are serving to be sent to the new node on the
-  next check.  This actually causes the next scheduled call to
-  tdb_update_tcp_tickles() to update all nodes.  This is simple and
+  next check.  This actually causes the tickles to be sent to the
+  other node immediately.  In case there is an error, the periodic
+  timer will send the updates on timer event.  This is simple and
   doesn't require careful error handling.
  */
 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
 {
-       struct ctdb_vnn *vnn;
-
        DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
                           (unsigned long) pnn));
 
-       for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
-               vnn->tcp_update_needed = true;
-       }
-
+       ctdb_send_set_tcp_tickles_for_all(ctdb, true);
        return 0;
 }
 
@@ -2158,7 +1609,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
                if (vnn->update_in_flight) {
                        DEBUG(DEBUG_WARNING,
                              (__location__
-                              " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
+                              " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
                                    ctdb_addr_to_str(&vnn->public_address),
                                    vnn->public_netmask_bits,
                                    ctdb_vnn_iface_string(vnn)));
@@ -2575,43 +2026,53 @@ static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
        return ret;
 }
 
-
-/*
-  perform tickle updates if required
- */
-static void ctdb_update_tcp_tickles(struct tevent_context *ev,
-                                   struct tevent_timer *te,
-                                   struct timeval t, void *private_data)
+static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
+                                             bool force)
 {
-       struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
-       int ret;
        struct ctdb_vnn *vnn;
+       int ret;
 
-       for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
-               /* we only send out updates for public addresses that 
+       for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
+               /* we only send out updates for public addresses that
                   we have taken over
                 */
                if (ctdb->pnn != vnn->pnn) {
                        continue;
                }
+
                /* We only send out the updates if we need to */
-               if (!vnn->tcp_update_needed) {
+               if (!force && !vnn->tcp_update_needed) {
                        continue;
                }
+
                ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
                                                       &vnn->public_address,
                                                       vnn->tcp_array);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
-                               ctdb_addr_to_str(&vnn->public_address)));
+                       D_ERR("Failed to send the tickle update for ip %s\n",
+                             ctdb_addr_to_str(&vnn->public_address));
+                       vnn->tcp_update_needed = true;
                } else {
-                       DEBUG(DEBUG_INFO,
-                             ("Sent tickle update for public address %s\n",
-                              ctdb_addr_to_str(&vnn->public_address)));
+                       D_INFO("Sent tickle update for ip %s\n",
+                              ctdb_addr_to_str(&vnn->public_address));
                        vnn->tcp_update_needed = false;
                }
        }
 
+}
+
+/*
+  perform tickle updates if required
+ */
+static void ctdb_update_tcp_tickles(struct tevent_context *ev,
+                                   struct tevent_timer *te,
+                                   struct timeval t, void *private_data)
+{
+       struct ctdb_context *ctdb = talloc_get_type(
+               private_data, struct ctdb_context);
+
+       ctdb_send_set_tcp_tickles_for_all(ctdb, false);
+
        tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
                         timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
                         ctdb_update_tcp_tickles, ctdb);
@@ -2809,7 +2270,7 @@ static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
                DEBUG(DEBUG_ERR,
                      (" \"ipreallocated\" event script failed (status %d)\n",
                       status));
-               if (status == -ETIME) {
+               if (status == -ETIMEDOUT) {
                        ctdb_ban_self(ctdb);
                }
        }
@@ -3113,10 +2574,9 @@ int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_re
                signed char res = 0;
 
                close(h->fd[0]);
-               debug_extra = talloc_asprintf(NULL, "reloadips:");
 
                prctl_set_comment("ctdb_reloadips");
-               if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
+               if (switch_from_server_to_client(ctdb) != 0) {
                        DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
                        res = -1;
                } else {