recoverd: Remove an orphaned comment
[metze/samba/wip.git] / ctdb / server / ctdb_takeover.c
index d8e77dfb1f04505efbaddacc66456327bc67705a..130df8a3484f775c7ddffc80be498449a1dc7501 100644 (file)
@@ -19,7 +19,7 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -861,7 +861,7 @@ static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr
                                        (unsigned)client->pid,
                                        ctdb_addr_to_str(addr),
                                        ip->client_id));
-                               ctdb_kill(ctdb, client->pid, SIGKILL);
+                               kill(client->pid, SIGKILL);
                        }
                }
        }
@@ -881,6 +881,14 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
                ctdb_ban_self(ctdb);
        }
 
+       if (ctdb->do_checkpublicip && ctdb_sys_have_ip(state->addr)) {
+               DEBUG(DEBUG_ERR, ("IP %s still hosted during release IP callback, failing\n",
+                                 ctdb_addr_to_str(state->addr)));
+               ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
+               talloc_free(state);
+               return;
+       }
+
        /* send a message to all clients of this node telling them
           that the cluster has been reconfigured and they should
           release any sockets on this IP */
@@ -977,6 +985,21 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
                        DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
                        return 0;
                }
+               if (vnn->iface == NULL) {
+                       DEBUG(DEBUG_WARNING,
+                             ("Public IP %s is hosted on interface %s but we have no VNN\n",
+                              ctdb_addr_to_str(&pip->addr),
+                              iface));
+               } else if (strcmp(iface, ctdb_vnn_iface_string(vnn)) != 0) {
+                       DEBUG(DEBUG_WARNING,
+                             ("Public IP %s is hosted on inteterface %s but VNN says %s\n",
+                              ctdb_addr_to_str(&pip->addr),
+                              iface,
+                              ctdb_vnn_iface_string(vnn)));
+                       /* Should we fix vnn->iface?  If we do, what
+                        * happens to reference counts?
+                        */
+               }
        } else {
                iface = strdup(ctdb_vnn_iface_string(vnn));
        }
@@ -1286,6 +1309,12 @@ int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
        return 0;
 }
 
+struct ctdb_public_ip_list {
+       struct ctdb_public_ip_list *next;
+       uint32_t pnn;
+       ctdb_sock_addr addr;
+};
+
 /* Given a physical node, return the number of
    public addresses that is currently assigned to this node.
 */
@@ -1324,7 +1353,7 @@ static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
                return false;
        }
 
-       for (i=0;i<public_ips->num;i++) {
+       for (i=0; i<public_ips->num; i++) {
                if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
                        /* yes, this node can serve this public ip */
                        return true;
@@ -1357,9 +1386,9 @@ static int find_takeover_node(struct ctdb_context *ctdb,
        int pnn, min=0, num;
        int i, numnodes;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
        pnn    = -1;
-       for (i=0;i<numnodes;i++) {
+       for (i=0; i<numnodes; i++) {
                /* verify that this node can serve this ip */
                if (!can_node_takeover_ip(ctdb, i, ipflags[i], ip)) {
                        /* no it couldnt   so skip to the next node */
@@ -1626,7 +1655,7 @@ static void basic_failback(struct ctdb_context *ctdb,
        int maxnode, maxnum, minnode, minnum, num, retries;
        struct ctdb_public_ip_list *tmp_ip;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
        retries = 0;
 
 try_again:
@@ -1648,7 +1677,7 @@ try_again:
                */
                maxnode = -1;
                minnode = -1;
-               for (i=0;i<numnodes;i++) {
+               for (i=0; i<numnodes; i++) {
                        /* only check nodes that can actually serve this ip */
                        if (!can_node_takeover_ip(ctdb, i, ipflags[i], tmp_ip)) {
                                /* no it couldnt   so skip to the next node */
@@ -1742,14 +1771,14 @@ static void lcp2_init(struct ctdb_context *tmp_ctx,
        int i, numnodes;
        struct ctdb_public_ip_list *tmp_ip;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
 
        *rebalance_candidates = talloc_array(tmp_ctx, bool, numnodes);
        CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
        *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, numnodes);
        CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
 
-       for (i=0;i<numnodes;i++) {
+       for (i=0; i<numnodes; i++) {
                (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
                /* First step: assume all nodes are candidates */
                (*rebalance_candidates)[i] = true;
@@ -1801,7 +1830,7 @@ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
        bool should_loop = true;
        bool have_unassigned = true;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
 
        while (have_unassigned && should_loop) {
                should_loop = false;
@@ -1819,7 +1848,7 @@ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
                                continue;
                        }
 
-                       for (dstnode=0; dstnode < numnodes; dstnode++) {
+                       for (dstnode=0; dstnode<numnodes; dstnode++) {
                                /* only check nodes that can actually takeover this ip */
                                if (!can_node_takeover_ip(ctdb, dstnode,
                                                          ipflags[dstnode],
@@ -1899,12 +1928,13 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
        struct ctdb_public_ip_list *tmp_ip;
 
        /* Find an IP and destination node that best reduces imbalance. */
+       srcimbl = 0;
        minip = NULL;
        minsrcimbl = 0;
        mindstnode = -1;
        mindstimbl = 0;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
 
        DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
        DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
@@ -1925,7 +1955,7 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
                 * to do gratuitous failover of IPs just to make minor
                 * balance improvements.
                 */
-               for (dstnode=0; dstnode < numnodes; dstnode++) {
+               for (dstnode=0; dstnode<numnodes; dstnode++) {
                        if (!rebalance_candidates[dstnode]) {
                                continue;
                        }
@@ -2009,7 +2039,7 @@ static void lcp2_failback(struct ctdb_context *ctdb,
        struct lcp2_imbalance_pnn * lips;
        bool again;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
 
 try_again:
 
@@ -2018,7 +2048,7 @@ try_again:
         * continuing on...
         */
        num_rebalance_candidates = 0;
-       for (i = 0; i < numnodes; i++) {
+       for (i=0; i<numnodes; i++) {
                if (rebalance_candidates[i]) {
                        num_rebalance_candidates++;
                }
@@ -2032,7 +2062,7 @@ try_again:
         * used, so this doesn't cost much...
         */
        lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, numnodes);
-       for (i = 0; i < numnodes; i++) {
+       for (i=0; i<numnodes; i++) {
                lips[i].imbalance = lcp2_imbalances[i];
                lips[i].pnn = i;
        }
@@ -2040,7 +2070,7 @@ try_again:
              lcp2_cmp_imbalance_pnn);
 
        again = false;
-       for (i = 0; i < numnodes; i++) {
+       for (i=0; i<numnodes; i++) {
                /* This means that all nodes had 0 or 1 addresses, so
                 * can't be imbalanced.
                 */
@@ -2097,7 +2127,7 @@ static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
        struct ctdb_public_ip_list *tmp_ip;
        int i, numnodes;
 
-       numnodes = talloc_get_size(ipflags) / sizeof(struct ctdb_ipflags);
+       numnodes = talloc_array_length(ipflags);
 
        DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
        /* Allocate IPs to nodes in a modulo fashion so that IPs will
@@ -2106,7 +2136,7 @@ static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
        */
 
        for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
-               tmp_ip->pnn = i%numnodes;
+               tmp_ip->pnn = i % numnodes;
        }
 
        /* IP failback doesn't make sense with deterministic
@@ -2229,6 +2259,7 @@ static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
 struct get_tunable_callback_data {
        const char *tunable;
        uint32_t *out;
+       bool fatal;
 };
 
 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
@@ -2240,9 +2271,7 @@ static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
        int size;
 
        if (res != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Failure to read \"%s\" tunable from remote node %d\n",
-                      cd->tunable, pnn));
+               /* Already handled in fail callback */
                return;
        }
 
@@ -2250,10 +2279,11 @@ static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
                DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
                                 cd->tunable, pnn, (int)sizeof(uint32_t),
                                 (int)outdata.dsize));
+               cd->fatal = true;
                return;
        }
 
-       size = talloc_get_size(cd->out) / sizeof(uint32_t);
+       size = talloc_array_length(cd->out);
        if (pnn >= size) {
                DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
                                 cd->tunable, pnn, size));
@@ -2264,21 +2294,56 @@ static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
        cd->out[pnn] = *(uint32_t *)outdata.dptr;
 }
 
+static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+                                      int32_t res, TDB_DATA outdata,
+                                      void *callback)
+{
+       struct get_tunable_callback_data *cd =
+               (struct get_tunable_callback_data *)callback;
+
+       switch (res) {
+       case -ETIME:
+               DEBUG(DEBUG_ERR,
+                     ("Timed out getting tunable \"%s\" from node %d\n",
+                      cd->tunable, pnn));
+               cd->fatal = true;
+               break;
+       case -EINVAL:
+       case -1:
+               DEBUG(DEBUG_WARNING,
+                     ("Tunable \"%s\" not implemented on node %d\n",
+                      cd->tunable, pnn));
+               break;
+       default:
+               DEBUG(DEBUG_ERR,
+                     ("Unexpected error getting tunable \"%s\" from node %d\n",
+                      cd->tunable, pnn));
+               cd->fatal = true;
+       }
+}
+
 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
                                        TALLOC_CTX *tmp_ctx,
                                        struct ctdb_node_map *nodemap,
-                                       const char *tunable)
+                                       const char *tunable,
+                                       uint32_t default_value)
 {
        TDB_DATA data;
        struct ctdb_control_get_tunable *t;
        uint32_t *nodes;
        uint32_t *tvals;
        struct get_tunable_callback_data callback_data;
+       int i;
 
-       tvals = talloc_zero_array(tmp_ctx, uint32_t, nodemap->num);
+       tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
        CTDB_NO_MEMORY_NULL(ctdb, tvals);
+       for (i=0; i<nodemap->num; i++) {
+               tvals[i] = default_value;
+       }
+               
        callback_data.out = tvals;
        callback_data.tunable = tunable;
+       callback_data.fatal = false;
 
        data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
        data.dptr  = talloc_size(tmp_ctx, data.dsize);
@@ -2289,9 +2354,13 @@ static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
        if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
                                      nodes, 0, TAKEOVER_TIMEOUT(),
                                      false, data,
-                                     get_tunable_callback, NULL,
+                                     get_tunable_callback,
+                                     get_tunable_fail_callback,
                                      &callback_data) != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " ctdb_control to get %s tunable failed\n", tunable));
+               if (callback_data.fatal) {
+                       talloc_free(tvals);
+                       tvals = NULL;
+               }
        }
        talloc_free(nodes);
        talloc_free(data.dptr);
@@ -2299,6 +2368,98 @@ static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
        return tvals;
 }
 
+struct get_runstate_callback_data {
+       enum ctdb_runstate *out;
+       bool fatal;
+};
+
+static void get_runstate_callback(struct ctdb_context *ctdb, uint32_t pnn,
+                                 int32_t res, TDB_DATA outdata,
+                                 void *callback_data)
+{
+       struct get_runstate_callback_data *cd =
+               (struct get_runstate_callback_data *)callback_data;
+       int size;
+
+       if (res != 0) {
+               /* Already handled in fail callback */
+               return;
+       }
+
+       if (outdata.dsize != sizeof(uint32_t)) {
+               DEBUG(DEBUG_ERR,("Wrong size of returned data when getting runstate from node %d. Expected %d bytes but received %d bytes\n",
+                                pnn, (int)sizeof(uint32_t),
+                                (int)outdata.dsize));
+               cd->fatal = true;
+               return;
+       }
+
+       size = talloc_array_length(cd->out);
+       if (pnn >= size) {
+               DEBUG(DEBUG_ERR,("Got reply from node %d but nodemap only has %d entries\n",
+                                pnn, size));
+               return;
+       }
+
+       cd->out[pnn] = (enum ctdb_runstate)*(uint32_t *)outdata.dptr;
+}
+
+static void get_runstate_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+                                      int32_t res, TDB_DATA outdata,
+                                      void *callback)
+{
+       struct get_runstate_callback_data *cd =
+               (struct get_runstate_callback_data *)callback;
+
+       switch (res) {
+       case -ETIME:
+               DEBUG(DEBUG_ERR,
+                     ("Timed out getting runstate from node %d\n", pnn));
+               cd->fatal = true;
+               break;
+       default:
+               DEBUG(DEBUG_WARNING,
+                     ("Error getting runstate from node %d - assuming runstates not supported\n",
+                      pnn));
+       }
+}
+
+static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb,
+                                                   TALLOC_CTX *tmp_ctx,
+                                                   struct ctdb_node_map *nodemap,
+                                                   enum ctdb_runstate default_value)
+{
+       uint32_t *nodes;
+       enum ctdb_runstate *rs;
+       struct get_runstate_callback_data callback_data;
+       int i;
+
+       rs = talloc_array(tmp_ctx, enum ctdb_runstate, nodemap->num);
+       CTDB_NO_MEMORY_NULL(ctdb, rs);
+       for (i=0; i<nodemap->num; i++) {
+               rs[i] = default_value;
+       }
+
+       callback_data.out = rs;
+       callback_data.fatal = false;
+
+       nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_RUNSTATE,
+                                     nodes, 0, TAKEOVER_TIMEOUT(),
+                                     true, tdb_null,
+                                     get_runstate_callback,
+                                     get_runstate_fail_callback,
+                                     &callback_data) != 0) {
+               if (callback_data.fatal) {
+                       free(rs);
+                       rs = NULL;
+               }
+       }
+       talloc_free(nodes);
+
+       return rs;
+}
+
 /* Set internal flags for IP allocation:
  *   Clear ip flags
  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
@@ -2313,7 +2474,8 @@ set_ipflags_internal(struct ctdb_context *ctdb,
                     TALLOC_CTX *tmp_ctx,
                     struct ctdb_node_map *nodemap,
                     uint32_t *tval_noiptakeover,
-                    uint32_t *tval_noiphostonalldisabled)
+                    uint32_t *tval_noiphostonalldisabled,
+                    enum ctdb_runstate *runstate)
 {
        int i;
        struct ctdb_ipflags *ipflags;
@@ -2328,6 +2490,11 @@ set_ipflags_internal(struct ctdb_context *ctdb,
                        ipflags[i].noiptakeover = true;
                }
 
+               /* Can not host IPs on node not in RUNNING state */
+               if (runstate[i] != CTDB_RUNSTATE_RUNNING) {
+                       ipflags[i].noiphost = true;
+                       continue;
+               }
                /* Can not host IPs on INACTIVE node */
                if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
                        ipflags[i].noiphost = true;
@@ -2364,37 +2531,144 @@ static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb,
        uint32_t *tval_noiptakeover;
        uint32_t *tval_noiphostonalldisabled;
        struct ctdb_ipflags *ipflags;
+       enum ctdb_runstate *runstate;
+
 
        tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
-                                                  "NoIPTakeover");
+                                                  "NoIPTakeover", 0);
        if (tval_noiptakeover == NULL) {
                return NULL;
        }
 
        tval_noiphostonalldisabled =
                get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
-                                      "NoIPHostOnAllDisabled");
+                                      "NoIPHostOnAllDisabled", 0);
        if (tval_noiphostonalldisabled == NULL) {
+               /* Caller frees tmp_ctx */
+               return NULL;
+       }
+
+       /* Any nodes where CTDB_CONTROL_GET_RUNSTATE is not supported
+        * will default to CTDB_RUNSTATE_RUNNING.  This ensures
+        * reasonable behaviour on a mixed cluster during upgrade.
+        */
+       runstate = get_runstate_from_nodes(ctdb, tmp_ctx, nodemap,
+                                          CTDB_RUNSTATE_RUNNING);
+       if (runstate == NULL) {
+               /* Caller frees tmp_ctx */
                return NULL;
        }
 
        ipflags = set_ipflags_internal(ctdb, tmp_ctx, nodemap,
                                       tval_noiptakeover,
-                                      tval_noiphostonalldisabled);
+                                      tval_noiphostonalldisabled,
+                                      runstate);
 
        talloc_free(tval_noiptakeover);
        talloc_free(tval_noiphostonalldisabled);
+       talloc_free(runstate);
 
        return ipflags;
 }
 
+struct iprealloc_callback_data {
+       bool *retry_nodes;
+       int retry_count;
+       client_async_callback fail_callback;
+       void *fail_callback_data;
+       struct ctdb_node_map *nodemap;
+};
+
+static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+                                       int32_t res, TDB_DATA outdata,
+                                       void *callback)
+{
+       int numnodes;
+       struct iprealloc_callback_data *cd =
+               (struct iprealloc_callback_data *)callback;
+
+       switch (res) {
+       case -ETIME:
+               /* If the control timed out then that's a real error,
+                * so call the real fail callback
+                */
+               cd->fail_callback(ctdb, pnn, res, outdata,
+                                 cd->fail_callback_data);
+               break;
+       default:
+               /* If not a timeout then either the ipreallocated
+                * eventscript (or some setup) failed.  This might
+                * have failed because the IPREALLOCATED control isn't
+                * implemented - right now there is no way of knowing
+                * because the error codes are all folded down to -1.
+                * Consider retrying using EVENTSCRIPT control...
+                */
+
+               numnodes = talloc_array_length(cd->retry_nodes);
+               if (pnn > numnodes) {
+                       DEBUG(DEBUG_ERR,
+                             ("ipreallocated failure from node %d, but only %d nodes in nodemap\n",
+                              pnn, numnodes));
+                       return;
+               }
+
+               /* Can't run the "ipreallocated" event on a STOPPED node */
+               if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_STOPPED) {
+                       DEBUG(DEBUG_ERR,
+                             ("ipreallocated failure from node %d, but node is stopped - not flagging a retry\n",
+                              pnn));
+                       return;
+               }
+
+               DEBUG(DEBUG_WARNING,
+                     ("ipreallocated failure from node %d, flagging retry\n",
+                      pnn));
+               cd->retry_nodes[pnn] = true;
+               cd->retry_count++;
+       }
+}
+
+struct takeover_callback_data {
+       bool *node_failed;
+       client_async_callback fail_callback;
+       void *fail_callback_data;
+       struct ctdb_node_map *nodemap;
+};
+
+static void takeover_run_fail_callback(struct ctdb_context *ctdb,
+                                      uint32_t node_pnn, int32_t res,
+                                      TDB_DATA outdata, void *callback_data)
+{
+       struct takeover_callback_data *cd =
+               talloc_get_type_abort(callback_data,
+                                     struct takeover_callback_data);
+       int i;
+
+       for (i = 0; i < cd->nodemap->num; i++) {
+               if (node_pnn == cd->nodemap->nodes[i].pnn) {
+                       break;
+               }
+       }
+
+       if (i == cd->nodemap->num) {
+               DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
+               return;
+       }
+
+       if (!cd->node_failed[i]) {
+               cd->node_failed[i] = true;
+               cd->fail_callback(ctdb, node_pnn, res, outdata,
+                                 cd->fail_callback_data);
+       }
+}
+
 /*
   make any IP alias changes for public addresses that are necessary 
  */
 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
                      client_async_callback fail_callback, void *callback_data)
 {
-       int i;
+       int i, j;
        struct ctdb_public_ip ip;
        struct ctdb_public_ipv4 ipv4;
        uint32_t *nodes;
@@ -2406,6 +2680,9 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
        TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
        uint32_t disable_timeout;
        struct ctdb_ipflags *ipflags;
+       struct takeover_callback_data *takeover_data;
+       struct iprealloc_callback_data iprealloc_data;
+       bool *retry_data;
 
        /*
         * ip failover is completely disabled, just send out the 
@@ -2427,10 +2704,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
        /* Do the IP reassignment calculations */
        ctdb_takeover_run_core(ctdb, ipflags, &all_ips);
 
-       /* The IP flags need to be cleared because they should never
-        * be seen outside the IP allocation code.
-        */
-
        /* The recovery daemon does regular sanity checks of the IPs.
         * However, sometimes it is overzealous and thinks changes are
         * required when they're already underway.  This stops the
@@ -2444,14 +2717,25 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
                DEBUG(DEBUG_INFO,("Failed to disable ip verification\n"));
        }
 
-       /* now tell all nodes to delete any alias that they should not
-          have.  This will be a NOOP on nodes that don't currently
-          hold the given alias */
+       /* Now tell all nodes to release any public IPs should not
+        * host.  This will be a NOOP on nodes that don't currently
+        * hold the given IP.
+        */
+       takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
+
+       takeover_data->node_failed = talloc_zero_array(tmp_ctx,
+                                                      bool, nodemap->num);
+       CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
+       takeover_data->fail_callback = fail_callback;
+       takeover_data->fail_callback_data = callback_data;
+       takeover_data->nodemap = nodemap;
+
        async_data = talloc_zero(tmp_ctx, struct client_async_data);
        CTDB_NO_MEMORY_FATAL(ctdb, async_data);
 
-       async_data->fail_callback = fail_callback;
-       async_data->callback_data = callback_data;
+       async_data->fail_callback = takeover_run_fail_callback;
+       async_data->callback_data = takeover_data;
 
        for (i=0;i<nodemap->num;i++) {
                /* don't talk to unconnected nodes, but do talk to banned nodes */
@@ -2565,13 +2849,53 @@ ipreallocated:
         * IPs have moved.  Once upon a time this event only used to
         * update natwg.
         */
+       retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
+       CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
+       iprealloc_data.retry_nodes = retry_data;
+       iprealloc_data.retry_count = 0;
+       iprealloc_data.fail_callback = fail_callback;
+       iprealloc_data.fail_callback_data = callback_data;
+       iprealloc_data.nodemap = nodemap;
+
        nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
        if (ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
                                      nodes, 0, TAKEOVER_TIMEOUT(),
                                      false, tdb_null,
-                                     NULL, fail_callback,
-                                     callback_data) != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
+                                     NULL, iprealloc_fail_callback,
+                                     &iprealloc_data) != 0) {
+
+               /* If the control failed then we should retry to any
+                * nodes flagged by iprealloc_fail_callback using the
+                * EVENTSCRIPT control.  This is a best-effort at
+                * backward compatiblity when running a mixed cluster
+                * where some nodes have not yet been upgraded to
+                * support the IPREALLOCATED control.
+                */
+               DEBUG(DEBUG_WARNING,
+                     ("Retry ipreallocated to some nodes using eventscript control\n"));
+
+               nodes = talloc_array(tmp_ctx, uint32_t,
+                                    iprealloc_data.retry_count);
+               CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+               j = 0;
+               for (i=0; i<nodemap->num; i++) {
+                       if (iprealloc_data.retry_nodes[i]) {
+                               nodes[j] = i;
+                               j++;
+                       }
+               }
+
+               data.dptr  = discard_const("ipreallocated");
+               data.dsize = strlen((char *)data.dptr) + 1; 
+               if (ctdb_client_async_control(ctdb,
+                                             CTDB_CONTROL_RUN_EVENTSCRIPTS,
+                                             nodes, 0, TAKEOVER_TIMEOUT(),
+                                             false, data,
+                                             NULL, fail_callback,
+                                             callback_data) != 0) {
+                       DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
+               }
        }
 
        talloc_free(tmp_ctx);
@@ -2935,6 +3259,7 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
 void ctdb_release_all_ips(struct ctdb_context *ctdb)
 {
        struct ctdb_vnn *vnn;
+       int count = 0;
 
        for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
                if (!ctdb_sys_have_ip(&vnn->public_address)) {
@@ -2944,13 +3269,22 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
                if (!vnn->iface) {
                        continue;
                }
+
+               DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
+                                   ctdb_addr_to_str(&vnn->public_address),
+                                   vnn->public_netmask_bits,
+                                   ctdb_vnn_iface_string(vnn)));
+
                ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
                                  ctdb_vnn_iface_string(vnn),
                                  ctdb_addr_to_str(&vnn->public_address),
                                  vnn->public_netmask_bits);
                release_kill_clients(ctdb, &vnn->public_address);
                ctdb_vnn_unassign_iface(ctdb, vnn);
+               count++;
        }
+
+       DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
 }
 
 
@@ -3981,7 +4315,9 @@ int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
    node has the expected ip allocation.
    This is verified against ctdb->ip_tree
 */
-int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
+int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+                               struct ctdb_all_public_ips *ips,
+                               uint32_t pnn)
 {
        struct ctdb_public_ip_list *tmp_ip; 
        int i;
@@ -3999,7 +4335,7 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_publi
        for (i=0; i<ips->num; i++) {
                tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
                if (tmp_ip == NULL) {
-                       DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+                       DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
                        return -1;
                }
 
@@ -4008,7 +4344,11 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_publi
                }
 
                if (tmp_ip->pnn != ips->ips[i].pnn) {
-                       DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
+                       DEBUG(DEBUG_ERR,
+                             ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
+                              pnn,
+                              ctdb_addr_to_str(&ips->ips[i].addr),
+                              ips->ips[i].pnn, tmp_ip->pnn));
                        return -1;
                }
        }
@@ -4094,6 +4434,8 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
        struct ctdb_vnn *vnn;
        int i, ret;
 
+       CTDB_NO_MEMORY(ctdb, mem_ctx);
+
        /* read the ip allocation from the local node */
        ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
        if (ret != 0) {
@@ -4108,7 +4450,7 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
                DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
                talloc_free(mem_ctx);
                return -1;
-       }               
+       }
 
 
        /* check the previous list of ips and scan for ips that have been
@@ -4132,6 +4474,7 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
 
                        ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
                        if (ret != 0) {
+                               talloc_free(mem_ctx);
                                DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
                                return -1;
                        }
@@ -4147,15 +4490,15 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
                        }
                }
                if (i == ips->num) {
-                       struct ctdb_control_ip_iface pub;
+                       struct ctdb_control_ip_iface *pub;
                        const char *ifaces = NULL;
                        int iface = 0;
 
                        DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
 
-                       pub.addr  = vnn->public_address;
-                       pub.mask  = vnn->public_netmask_bits;
-
+                       pub = talloc_zero(mem_ctx, struct ctdb_control_ip_iface);
+                       pub->addr  = vnn->public_address;
+                       pub->mask  = vnn->public_netmask_bits;
 
                        ifaces = vnn->ifaces[0];
                        iface = 1;
@@ -4163,17 +4506,27 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
                                ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
                                iface++;
                        }
-                       pub.len   = strlen(ifaces)+1;
-                       memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
+                       pub->len   = strlen(ifaces)+1;
+                       pub = talloc_realloc_size(mem_ctx, pub,
+                               offsetof(struct ctdb_control_ip_iface, iface) + pub->len);
+                       if (pub == NULL) {
+                               DEBUG(DEBUG_ERR, (__location__ " Failed to allocate memory\n"));
+                               talloc_free(mem_ctx);
+                               return -1;
+                       }
+                       memcpy(&pub->iface[0], ifaces, pub->len);
 
-                       ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
+                       ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(),
+                                                     CTDB_CURRENT_NODE, pub);
                        if (ret != 0) {
                                DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
+                               talloc_free(mem_ctx);
                                return -1;
                        }
                }
        }
 
+       talloc_free(mem_ctx);
        return 0;
 }
 
@@ -4219,6 +4572,7 @@ int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_re
                close(h->fd[0]);
                debug_extra = talloc_asprintf(NULL, "reloadips:");
 
+               ctdb_set_process_name("ctdb_reloadips");
                if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
                        DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
                        res = -1;