recoverd: Move disabling of IP checks into do_takeover_run()
authorMartin Schwenke <martin@meltin.net>
Tue, 3 Sep 2013 01:21:09 +0000 (11:21 +1000)
committerAmitay Isaacs <amitay@gmail.com>
Thu, 19 Sep 2013 02:54:30 +0000 (12:54 +1000)
Signed-off-by: Martin Schwenke <martin@meltin.net>
(This used to be ctdb commit 48b603fbf16311daa47b01e7a33d477ed51da56d)

ctdb/server/ctdb_recoverd.c
ctdb/server/ctdb_takeover.c

index a94fb314313ac3e6796e839329adf01bb3abf321..2d01e050ab8147d1c84c87d59c884e5bd5fb349e 100644 (file)
@@ -1584,6 +1584,8 @@ static bool do_takeover_run(struct ctdb_recoverd *rec,
                            struct ctdb_node_map *nodemap,
                            bool banning_credits_on_fail)
 {
+       uint32_t disable_timeout;
+       TDB_DATA data;
        int ret;
        bool ok;
 
@@ -1594,10 +1596,34 @@ static bool do_takeover_run(struct ctdb_recoverd *rec,
                goto done;
        }
 
+       /* Disable IP checks while doing this takeover run.  This will
+        * stop those other nodes from triggering takeover runs when
+        * think they should be hosting an IP but it isn't yet on an
+        * interface.
+        */
+       data.dptr  = (uint8_t*)&disable_timeout;
+       data.dsize = sizeof(disable_timeout);
+
+       disable_timeout = rec->ctdb->tunable.takeover_timeout;
+       if (ctdb_client_send_message(rec->ctdb, CTDB_BROADCAST_CONNECTED,
+                                    CTDB_SRVID_DISABLE_IP_CHECK,
+                                    data) != 0) {
+               DEBUG(DEBUG_INFO,("Failed to disable IP check\n"));
+       }
+
        rec->takeover_run_in_progress = true;
 
        ret = ctdb_takeover_run(rec->ctdb, nodemap, takeover_fail_callback,
                                banning_credits_on_fail ? rec : NULL);
+
+       /* Reenable IP checks */
+       disable_timeout = 0;
+       if (ctdb_client_send_message(rec->ctdb, CTDB_BROADCAST_CONNECTED,
+                                    CTDB_SRVID_DISABLE_IP_CHECK,
+                                    data) != 0) {
+               DEBUG(DEBUG_INFO,("Failed to reenable IP check\n"));
+       }
+
        if (ret != 0) {
                DEBUG(DEBUG_ERR, ("IP reallocation failed\n"));
                ok = false;
index 15619f3e90c7cf1001b68c547db7161426f49e03..4a7cfd6e6f6bb8afca58d51fca9e7c4296202afc 100644 (file)
@@ -2678,7 +2678,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
        struct client_async_data *async_data;
        struct ctdb_client_control_state *state;
        TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
-       uint32_t disable_timeout;
        struct ctdb_ipflags *ipflags;
        struct takeover_callback_data *takeover_data;
        struct iprealloc_callback_data iprealloc_data;
@@ -2704,19 +2703,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
        /* Do the IP reassignment calculations */
        ctdb_takeover_run_core(ctdb, ipflags, &all_ips);
 
-       /* The recovery daemon does regular sanity checks of the IPs.
-        * However, sometimes it is overzealous and thinks changes are
-        * required when they're already underway.  This stops the
-        * checks for a while before we start moving IPs.
-        */
-       disable_timeout = ctdb->tunable.takeover_timeout;
-       data.dptr  = (uint8_t*)&disable_timeout;
-       data.dsize = sizeof(disable_timeout);
-       if (ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
-                                    CTDB_SRVID_DISABLE_IP_CHECK, data) != 0) {
-               DEBUG(DEBUG_INFO,("Failed to disable ip verification\n"));
-       }
-
        /* Now tell all nodes to release any public IPs should not
         * host.  This will be a NOOP on nodes that don't currently
         * hold the given IP.