merge async recovery changes from Ronnie
authorAndrew Tridgell <tridge@samba.org>
Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
committerAndrew Tridgell <tridge@samba.org>
Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
(This used to be ctdb commit 576e317640d25f8059114f15c6f1ebcee5e5b6e2)

ctdb/client/ctdb_client.c
ctdb/config/ctdb.sysconfig
ctdb/include/ctdb.h
ctdb/include/ctdb_private.h
ctdb/server/ctdb_control.c
ctdb/server/ctdb_recover.c
ctdb/server/ctdb_recoverd.c
ctdb/server/ctdb_takeover.c
ctdb/tests/events.d/00.test
ctdb/tools/ctdb_vacuum.c

index 677e02da192169875a2a092ebdaf0a56480c645d..fdd2b99f80a4c250999e751024fd6f0ddcff904f 100644 (file)
@@ -2486,3 +2486,194 @@ int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct time
        return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
 }
 
+/*
+  send a control to execute the "recovered" event script on a node
+ */
+int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+       int ret;
+       int32_t status;
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_END_RECOVERY, 0, tdb_null, 
+                          NULL, NULL, &status, &timeout, NULL);
+       if (ret != 0 || status != 0) {
+               DEBUG(0,(__location__ " ctdb_control for end_recovery failed\n"));
+               return -1;
+       }
+
+       return 0;
+}
+
+/* 
+  callback for the async helpers used when sending the same control
+  to multiple nodes in parallell.
+*/
+static void async_callback(struct ctdb_client_control_state *state)
+{
+       struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
+       int ret;
+       int32_t res;
+
+       /* one more node has responded with recmode data */
+       data->count--;
+
+       /* if we failed to push the db, then return an error and let
+          the main loop try again.
+       */
+       if (state->state != CTDB_CONTROL_DONE) {
+               if ( !data->dont_log_errors) {
+                       DEBUG(0,("Async operation failed with state %d\n", state->state));
+               }
+               data->fail_count++;
+               return;
+       }
+       
+       state->async.fn = NULL;
+
+       ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
+       if ((ret != 0) || (res != 0)) {
+               if ( !data->dont_log_errors) {
+                       DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
+               }
+               data->fail_count++;
+       }
+}
+
+
+void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
+{
+       /* set up the callback functions */
+       state->async.fn = async_callback;
+       state->async.private_data = data;
+       
+       /* one more control to wait for to complete */
+       data->count++;
+}
+
+
+/* wait for up to the maximum number of seconds allowed
+   or until all nodes we expect a response from has replied
+*/
+int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
+{
+       while (data->count > 0) {
+               event_loop_once(ctdb->ev);
+       }
+       if (data->fail_count != 0) {
+               if (!data->dont_log_errors) {
+                       DEBUG(0,("Async wait failed - fail_count=%u\n", 
+                                data->fail_count));
+               }
+               return -1;
+       }
+       return 0;
+}
+
+
+/* 
+   perform a simple control on the listed nodes
+   The control cannot return data
+ */
+int ctdb_client_async_control(struct ctdb_context *ctdb,
+                               enum ctdb_controls opcode,
+                               uint32_t *nodes,
+                               struct timeval timeout,
+                               bool dont_log_errors,
+                               TDB_DATA data)
+{
+       struct client_async_data *async_data;
+       struct ctdb_client_control_state *state;
+       int j, num_nodes;
+       
+       async_data = talloc_zero(ctdb, struct client_async_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+       async_data->dont_log_errors = dont_log_errors;
+
+       num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
+
+       /* loop over all nodes and send an async control to each of them */
+       for (j=0; j<num_nodes; j++) {
+               uint32_t pnn = nodes[j];
+
+               state = ctdb_control_send(ctdb, pnn, 0, opcode, 
+                                         0, data, async_data, &timeout, NULL);
+               if (state == NULL) {
+                       DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
+                       talloc_free(async_data);
+                       return -1;
+               }
+               
+               ctdb_client_async_add(async_data, state);
+       }
+
+       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+               talloc_free(async_data);
+               return -1;
+       }
+
+       talloc_free(async_data);
+       return 0;
+}
+
+uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_vnn_map *vnn_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self)
+{
+       int i, j, num_nodes;
+       uint32_t *nodes;
+
+       for (i=num_nodes=0;i<vnn_map->size;i++) {
+               if (vnn_map->map[i] == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               num_nodes++;
+       } 
+
+       nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+       CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+       for (i=j=0;i<vnn_map->size;i++) {
+               if (vnn_map->map[i] == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               nodes[j++] = vnn_map->map[i];
+       } 
+
+       return nodes;
+}
+
+uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_node_map *node_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self)
+{
+       int i, j, num_nodes;
+       uint32_t *nodes;
+
+       for (i=num_nodes=0;i<node_map->num;i++) {
+               if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+               if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               num_nodes++;
+       } 
+
+       nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+       CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+       for (i=j=0;i<node_map->num;i++) {
+               if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+               if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               nodes[j++] = node_map->nodes[i].pnn;
+       } 
+
+       return nodes;
+}
index f236cda6e72a0ea6e675b82179aca8ad4e53ee2b..9306884b642bb542b447689ccb2955101ce8a2da 100644 (file)
 # default is to not manage Samba
 # CTDB_MANAGES_SAMBA=yes
 
-# should ctdb manage starting/stopping the http service for you?
-# default is to not manage http 
-# CTDB_MANAGES_HTTPD=yes
-
 # should ctdb manage starting/stopping Winbind service?
 # if left comented out then it will be autodetected based on smb.conf
 # CTDB_MANAGES_WINBIND=yes
index eee698341727ce44bb5a1e6be45de75477045d44..b779b94dcdcddb024028bb440d272ffc99262379 100644 (file)
@@ -499,4 +499,15 @@ struct ctdb_client_control_state *ctdb_ctrl_uptime_send(struct ctdb_context *ctd
 
 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime);
 
+int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
+
+uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_node_map *node_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self);
+uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_vnn_map *vnn_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self);
+
 #endif
index 57501fc68a8e7aba33bc3de6974841d0e2ed7d21..ab875924fa16614f0c7acd0a88d6bb18d8ed6896 100644 (file)
@@ -488,6 +488,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
                    CTDB_CONTROL_WIPE_DATABASE           = 67,
                    CTDB_CONTROL_DELETE_RECORD           = 68,
                    CTDB_CONTROL_UPTIME                  = 69,
+                   CTDB_CONTROL_START_RECOVERY          = 70,
+                   CTDB_CONTROL_END_RECOVERY            = 71,
 };     
 
 /*
@@ -1082,6 +1084,12 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
                                 struct ctdb_req_control *c,
                                 TDB_DATA indata, 
                                 bool *async_reply);
+int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                bool *async_reply);
+int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                bool *async_reply);
 
 struct ctdb_public_ip {
        uint32_t pnn;
@@ -1221,4 +1229,20 @@ void ctdb_unblock_signal(int signum);
 int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb);
 int ctdb_set_child_logging(struct ctdb_context *ctdb);
 
+
+struct client_async_data {
+       bool dont_log_errors;
+       uint32_t count;
+       uint32_t fail_count;
+};
+void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state);
+int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data);
+int ctdb_client_async_control(struct ctdb_context *ctdb,
+                               enum ctdb_controls opcode,
+                               uint32_t *nodes,
+                               struct timeval timeout,
+                               bool dont_log_errors,
+                               TDB_DATA data);
+
+
 #endif
index 01a77fe887cb796a591e8d6acb5a1dedd181b46f..884ed69177f0a74e75eba76adae40c5a30d5c99b 100644 (file)
@@ -355,6 +355,11 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
        case CTDB_CONTROL_UPTIME:
                return ctdb_control_uptime(ctdb, outdata);
 
+       case CTDB_CONTROL_START_RECOVERY:
+               return ctdb_control_start_recovery(ctdb, c, async_reply);
+
+       case CTDB_CONTROL_END_RECOVERY:
+               return ctdb_control_end_recovery(ctdb, c, async_reply);
        default:
                DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode));
                return -1;
index 65ad47156476f77d2045d0a681e147535231686e..b239554a02a8baeef312004f27635912493eaef8 100644 (file)
@@ -396,27 +396,6 @@ struct ctdb_set_recmode_state {
        pid_t child;
 };
 
-/*
-  called when the 'recovered' event script has finished
- */
-static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void *p)
-{
-       struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state);
-
-       ctdb_enable_monitoring(state->ctdb);
-
-       if (status == 0) {
-               ctdb->recovery_mode = state->recmode;
-       } else {
-               DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status));
-       }
-
-       ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
-       talloc_free(state);
-
-       gettimeofday(&ctdb->last_recovery_time, NULL);
-}
-
 /*
   called if our set_recmode child times out. this would happen if
   ctdb_recovery_lock() would block.
@@ -473,23 +452,11 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde,
                return;
        }
 
+       state->ctdb->recovery_mode = state->recmode;
 
-       ctdb_disable_monitoring(state->ctdb);
-
-       /* call the events script to tell all subsystems that we have recovered */
-       ret = ctdb_event_script_callback(state->ctdb, 
-                                        timeval_current_ofs(state->ctdb->tunable.script_timeout, 0),
-                                        state, 
-                                        ctdb_recovered_callback, 
-                                        state, "recovered");
-
-       if (ret != 0) {
-               ctdb_enable_monitoring(state->ctdb);
-
-               ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "failed to run eventscript from set_recmode");
-               talloc_free(state);
-               return;
-       }
+       ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
+       talloc_free(state);
+       return;
 }
 
 /*
@@ -742,3 +709,122 @@ int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata)
        free(data.dptr);
        return 0;       
 }
+
+
+struct recovery_callback_state {
+       struct ctdb_req_control *c;
+};
+
+
+/*
+  called when the 'recovered' event script has finished
+ */
+static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+       struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
+
+       ctdb_enable_monitoring(ctdb);
+
+       if (status != 0) {
+               DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status));
+       }
+
+       ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+       talloc_free(state);
+
+       gettimeofday(&ctdb->last_recovery_time, NULL);
+}
+
+/*
+  recovery has finished
+ */
+int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, 
+                               struct ctdb_req_control *c,
+                               bool *async_reply)
+{
+       int ret;
+       struct recovery_callback_state *state;
+
+       DEBUG(0,("Recovery has finished\n"));
+
+       state = talloc(ctdb, struct recovery_callback_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->c    = talloc_steal(state, c);
+
+       ctdb_disable_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, 
+                                        ctdb_end_recovery_callback, 
+                                        state, "recovered");
+
+       if (ret != 0) {
+               ctdb_enable_monitoring(ctdb);
+
+               DEBUG(0,(__location__ " Failed to end recovery\n"));
+               talloc_free(state);
+               return -1;
+       }
+
+       /* tell the control that we will be reply asynchronously */
+       *async_reply = true;
+       return 0;
+}
+
+/*
+  called when the 'startrecovery' event script has finished
+ */
+static void ctdb_start_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+       struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
+
+       ctdb_enable_monitoring(ctdb);
+
+       if (status != 0) {
+               DEBUG(0,(__location__ " startrecovery event script failed (status %d)\n", status));
+       }
+
+       ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+       talloc_free(state);
+}
+
+/*
+  start a recuvery
+ */
+int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, 
+                               struct ctdb_req_control *c,
+                               bool *async_reply)
+{
+       int ret;
+       struct recovery_callback_state *state;
+
+       DEBUG(0,("Recovery has started\n"));
+
+       state = talloc(ctdb, struct recovery_callback_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->c    = talloc_steal(state, c);
+
+       ctdb_disable_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, 
+                                        ctdb_start_recovery_callback, 
+                                        state, "startrecovery");
+
+       if (ret != 0) {
+               ctdb_enable_monitoring(ctdb);
+
+               DEBUG(0,(__location__ " Failed to start recovery\n"));
+               talloc_free(state);
+               return -1;
+       }
+
+       /* tell the control that we will be reply asynchronously */
+       *async_reply = true;
+       return 0;
+}
+
index c13136e848a737c0be9838ff2dcad64e2cc57a7e..8595706cc0f7fdb0ab2fac61a9e1c01ae91e38d3 100644 (file)
@@ -58,66 +58,6 @@ struct ctdb_recoverd {
 #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
 
 
-struct async_data {
-       uint32_t count;
-       uint32_t fail_count;
-};
-
-static void async_callback(struct ctdb_client_control_state *state)
-{
-       struct async_data *data = talloc_get_type(state->async.private_data, struct async_data);
-       int ret;
-       int32_t res;
-
-       /* one more node has responded with recmode data */
-       data->count--;
-
-       /* if we failed to push the db, then return an error and let
-          the main loop try again.
-       */
-       if (state->state != CTDB_CONTROL_DONE) {
-               DEBUG(0,("Async operation failed with state %d\n", state->state));
-               data->fail_count++;
-               return;
-       }
-       
-       state->async.fn = NULL;
-
-       ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
-       if ((ret != 0) || (res != 0)) {
-               DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
-               data->fail_count++;
-       }
-}
-
-
-static void async_add(struct async_data *data, struct ctdb_client_control_state *state)
-{
-       /* set up the callback functions */
-       state->async.fn = async_callback;
-       state->async.private_data = data;
-       
-       /* one more control to wait for to complete */
-       data->count++;
-}
-
-
-/* wait for up to the maximum number of seconds allowed
-   or until all nodes we expect a response from has replied
-*/
-static int async_wait(struct ctdb_context *ctdb, struct async_data *data)
-{
-       while (data->count > 0) {
-               event_loop_once(ctdb->ev);
-       }
-       if (data->fail_count != 0) {
-               DEBUG(0,("Async wait failed - fail_count=%u\n", data->fail_count));
-               return -1;
-       }
-       return 0;
-}
-
-
 /*
   unban a node
  */
@@ -255,50 +195,49 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_
 enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
 
 
-/* 
-   perform a simple control on all active nodes. The control cannot return data
+/*
+  run the "recovered" eventscript on all nodes
  */
-static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_controls opcode,
-                                        struct ctdb_node_map *nodemap, TDB_DATA data, bool include_self)
+static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
 {
-       struct async_data *async_data;
-       struct ctdb_client_control_state *state;
-       int j;
-       struct timeval timeout = CONTROL_TIMEOUT();
-       
-       async_data = talloc_zero(ctdb, struct async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+       TALLOC_CTX *tmp_ctx;
 
-       /* loop over all active nodes and send an async control to each of them */
-       for (j=0; j<nodemap->num; j++) {
-               if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
-                       continue;
-               }
-               if (nodemap->nodes[j].pnn == ctdb->pnn && !include_self) {
-                       continue;
-               }
-               state = ctdb_control_send(ctdb, nodemap->nodes[j].pnn, 0, opcode, 
-                                         0, data, async_data, &timeout, NULL);
-               if (state == NULL) {
-                       DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
-                       talloc_free(async_data);
-                       return -1;
-               }
-               
-               async_add(async_data, state);
-       }
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
 
-       if (async_wait(ctdb, async_data) != 0) {
-               DEBUG(0,(__location__ " Failed async control %u\n", (unsigned)opcode));
-               talloc_free(async_data);
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_END_RECOVERY,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, tdb_null) != 0) {
+               DEBUG(0, (__location__ " Unable to run the 'recovered' event. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
                return -1;
        }
 
-       talloc_free(async_data);
+       talloc_free(tmp_ctx);
        return 0;
 }
 
+/*
+  run the "startrecovery" eventscript on all nodes
+ */
+static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
+{
+       TALLOC_CTX *tmp_ctx;
 
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_START_RECOVERY,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, tdb_null) != 0) {
+               DEBUG(0, (__location__ " Unable to run the 'startrecovery' event. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
+       talloc_free(tmp_ctx);
+       return 0;
+}
 
 /*
   change recovery mode on all nodes
@@ -306,12 +245,21 @@ static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_co
 static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
 {
        TDB_DATA data;
+       uint32_t *nodes;
+       TALLOC_CTX *tmp_ctx;
+
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+       nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
 
        /* freeze all nodes */
        if (rec_mode == CTDB_RECOVERY_ACTIVE) {
-               if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_FREEZE, 
-                                                 nodemap, tdb_null, true) != 0) {
+               if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE,
+                                               nodes, CONTROL_TIMEOUT(),
+                                               false, tdb_null) != 0) {
                        DEBUG(0, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
+                       talloc_free(tmp_ctx);
                        return -1;
                }
        }
@@ -320,20 +268,25 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
        data.dsize = sizeof(uint32_t);
        data.dptr = (unsigned char *)&rec_mode;
 
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMODE, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMODE,
+                                       nodes, CONTROL_TIMEOUT(),
+                                       false, data) != 0) {
                DEBUG(0, (__location__ " Unable to set recovery mode. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
                return -1;
        }
 
        if (rec_mode == CTDB_RECOVERY_NORMAL) {
-               if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_THAW, 
-                                                 nodemap, tdb_null, true) != 0) {
+               if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW,
+                                               nodes, CONTROL_TIMEOUT(),
+                                               false, tdb_null) != 0) {
                        DEBUG(0, (__location__ " Unable to thaw nodes. Recovery failed.\n"));
+                       talloc_free(tmp_ctx);
                        return -1;
                }
        }
 
+       talloc_free(tmp_ctx);
        return 0;
 }
 
@@ -343,16 +296,23 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
 static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t pnn)
 {
        TDB_DATA data;
+       TALLOC_CTX *tmp_ctx;
+
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
 
        data.dsize = sizeof(uint32_t);
        data.dptr = (unsigned char *)&pnn;
 
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMASTER, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMASTER,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                DEBUG(0, (__location__ " Unable to set recmaster. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
                return -1;
        }
 
+       talloc_free(tmp_ctx);
        return 0;
 }
 
@@ -1141,6 +1101,10 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
        struct recdb_data params;
        struct ctdb_control_pulldb_reply *recdata;
        TDB_DATA outdata;
+       TALLOC_CTX *tmp_ctx;
+
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
 
        recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply);
        CTDB_NO_MEMORY(ctdb, recdata);
@@ -1155,12 +1119,14 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
        if (tdb_traverse_read(recdb->tdb, traverse_recdb, &params) == -1) {
                DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
                talloc_free(params.recdata);
+               talloc_free(tmp_ctx);
                return -1;
        }
 
        if (params.failed) {
                DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
                talloc_free(params.recdata);
+               talloc_free(tmp_ctx);
                return -1;              
        }
 
@@ -1169,9 +1135,12 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
        outdata.dptr = (void *)recdata;
        outdata.dsize = params.len;
 
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_PUSH_DB, nodemap, outdata, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, outdata) != 0) {
                DEBUG(0,(__location__ " Failed to push recdb records to nodes for db 0x%x\n", dbid));
                talloc_free(recdata);
+               talloc_free(tmp_ctx);
                return -1;
        }
 
@@ -1179,6 +1148,7 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
                  dbid, recdata->count));
 
        talloc_free(recdata);
+       talloc_free(tmp_ctx);
 
        return 0;
 }
@@ -1221,9 +1191,11 @@ static int recover_database(struct ctdb_recoverd *rec,
        data.dptr = (void *)&w;
        data.dsize = sizeof(w);
 
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_WIPE_DATABASE, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE,
+                       list_of_active_nodes(ctdb, nodemap, recdb, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                DEBUG(0, (__location__ " Unable to wipe database. Recovery failed.\n"));
+               talloc_free(recdb);
                return -1;
        }
        
@@ -1304,6 +1276,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
 
        DEBUG(0, (__location__ " Recovery - created remote databases\n"));
 
+
        /* set recovery mode to active on all nodes */
        ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
        if (ret!=0) {
@@ -1311,6 +1284,13 @@ static int do_recovery(struct ctdb_recoverd *rec,
                return -1;
        }
 
+       /* execute the "startrecovery" event script on all nodes */
+       ret = run_startrecovery_eventscript(ctdb, nodemap);
+       if (ret!=0) {
+               DEBUG(0, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
+               return -1;
+       }
+
        /* pick a new generation number */
        generation = new_generation();
 
@@ -1334,8 +1314,9 @@ static int do_recovery(struct ctdb_recoverd *rec,
        data.dptr = (void *)&generation;
        data.dsize = sizeof(uint32_t);
 
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_START, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START,
+                       list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                DEBUG(0, (__location__ " Unable to start transactions. Recovery failed.\n"));
                return -1;
        }
@@ -1352,8 +1333,9 @@ static int do_recovery(struct ctdb_recoverd *rec,
        DEBUG(0, (__location__ " Recovery - starting database commits\n"));
 
        /* commit all the changes */
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT,
+                       list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                DEBUG(0, (__location__ " Unable to commit recovery changes. Recovery failed.\n"));
                return -1;
        }
@@ -1417,6 +1399,13 @@ static int do_recovery(struct ctdb_recoverd *rec,
                DEBUG(1, (__location__ " Recovery - done takeover\n"));
        }
 
+       /* execute the "recovered" event script on all nodes */
+       ret = run_recovered_eventscript(ctdb, nodemap);
+       if (ret!=0) {
+               DEBUG(0, (__location__ " Unable to run the 'recovered' event on cluster\n"));
+               return -1;
+       }
+
        /* disable recovery mode */
        ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
        if (ret!=0) {
@@ -2445,12 +2434,29 @@ again:
        /* we might need to change who has what IP assigned */
        if (rec->need_takeover_run) {
                rec->need_takeover_run = false;
+
+               /* execute the "startrecovery" event script on all nodes */
+               ret = run_startrecovery_eventscript(ctdb, nodemap);
+               if (ret!=0) {
+                       DEBUG(0, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
+                       do_recovery(rec, mem_ctx, pnn, num_active, nodemap, 
+                                   vnnmap, ctdb->pnn);
+               }
+
                ret = ctdb_takeover_run(ctdb, nodemap);
                if (ret != 0) {
                        DEBUG(0, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
                        do_recovery(rec, mem_ctx, pnn, num_active, nodemap, 
                                    vnnmap, ctdb->pnn);
                }
+
+               /* execute the "recovered" event script on all nodes */
+               ret = run_recovered_eventscript(ctdb, nodemap);
+               if (ret!=0) {
+                       DEBUG(0, (__location__ " Unable to run the 'recovered' event on cluster\n"));
+                       do_recovery(rec, mem_ctx, pnn, num_active, nodemap, 
+                                   vnnmap, ctdb->pnn);
+               }
        }
 
        goto again;
index b63b88f4c25e9bf3fc9efd1836c663e866b56459..cea3f95e34e3c4e3ff05ecd646034a8f1c60d3d5 100644 (file)
@@ -641,11 +641,14 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
 {
        int i, num_healthy, retries;
-       int ret;
        struct ctdb_public_ip ip;
        uint32_t mask;
        struct ctdb_public_ip_list *all_ips, *tmp_ip;
        int maxnode, maxnum=0, minnode, minnum=0, num;
+       TDB_DATA data;
+       struct timeval timeout;
+       struct client_async_data *async_data;
+       struct ctdb_client_control_state *state;
        TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
 
 
@@ -813,6 +816,9 @@ try_again:
        /* now tell all nodes to delete any alias that they should not
           have.  This will be a NOOP on nodes that don't currently
           hold the given alias */
+       async_data = talloc_zero(tmp_ctx, struct client_async_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+
        for (i=0;i<nodemap->num;i++) {
                /* don't talk to unconnected nodes, but do talk to banned nodes */
                if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
@@ -830,21 +836,33 @@ try_again:
                        ip.sin.sin_family = AF_INET;
                        ip.sin.sin_addr   = tmp_ip->sin.sin_addr;
 
-                       ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(),
-                                                  nodemap->nodes[i].pnn, 
-                                                  &ip);
-                       if (ret != 0) {
-                               DEBUG(0,("Failed to tell vnn %u to release IP %s\n",
-                                        nodemap->nodes[i].pnn,
-                                        inet_ntoa(tmp_ip->sin.sin_addr)));
+                       timeout = TAKEOVER_TIMEOUT();
+                       data.dsize = sizeof(ip);
+                       data.dptr  = (uint8_t *)&ip;
+                       state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
+                                       0, CTDB_CONTROL_RELEASE_IP, 0,
+                                       data, async_data,
+                                       &timeout, NULL);
+                       if (state == NULL) {
+                               DEBUG(0,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
                                talloc_free(tmp_ctx);
                                return -1;
                        }
+               
+                       ctdb_client_async_add(async_data, state);
                }
        }
+       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+               DEBUG(0,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+       talloc_free(async_data);
 
 
        /* tell all nodes to get their own IPs */
+       async_data = talloc_zero(tmp_ctx, struct client_async_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
        for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
                if (tmp_ip->pnn == -1) {
                        /* this IP won't be taken over */
@@ -854,16 +872,25 @@ try_again:
                ip.sin.sin_family = AF_INET;
                ip.sin.sin_addr = tmp_ip->sin.sin_addr;
 
-               ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(), 
-                                           tmp_ip->pnn, 
-                                           &ip);
-               if (ret != 0) {
-                       DEBUG(0,("Failed asking vnn %u to take over IP %s\n",
-                                tmp_ip->pnn, 
-                                inet_ntoa(tmp_ip->sin.sin_addr)));
+               timeout = TAKEOVER_TIMEOUT();
+               data.dsize = sizeof(ip);
+               data.dptr  = (uint8_t *)&ip;
+               state = ctdb_control_send(ctdb, tmp_ip->pnn,
+                               0, CTDB_CONTROL_TAKEOVER_IP, 0,
+                               data, async_data,
+                               &timeout, NULL);
+               if (state == NULL) {
+                       DEBUG(0,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
                        talloc_free(tmp_ctx);
                        return -1;
                }
+               
+               ctdb_client_async_add(async_data, state);
+       }
+       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+               DEBUG(0,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
        }
 
        talloc_free(tmp_ctx);
index 026cf6cba77f39ea7602a3dbcb9662185f453d94..c5933be36afaca7a57d074e5a8865c380c48fc2a 100755 (executable)
@@ -10,6 +10,12 @@ case $cmd in
        echo "monitor event stderr" >&2
        exit 0
        ;;
+
+     startrecovery)
+       echo "ctdb startrecovery event"
+       exit 0; 
+       ;;
+
      startup)
        echo "ctdb startup event"
        exit 0; 
index 2bc9908c7ee1603e880a03a8fb46fadd57dd97a4..f412c0430386f792ce7f9c89b319d531c1b8f362 100644 (file)
 /* should be tunable */
 #define TIMELIMIT() timeval_current_ofs(10, 0)
 
-struct async_data {
-       uint32_t count;
-       uint32_t fail_count;
-};
-
-static void async_callback(struct ctdb_client_control_state *state)
-{
-       struct async_data *data = talloc_get_type(state->async.private_data, struct async_data);
-       int ret;
-       int32_t res;
-
-       /* one more node has responded with recmode data */
-       data->count--;
-
-       /* if we failed to push the db, then return an error and let
-          the main loop try again.
-       */
-       if (state->state != CTDB_CONTROL_DONE) {
-               data->fail_count++;
-               return;
-       }
-       
-       state->async.fn = NULL;
-
-       ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
-       if ((ret != 0) || (res != 0)) {
-               data->fail_count++;
-       }
-}
-
-static void async_add(struct async_data *data, struct ctdb_client_control_state *state)
-{
-       /* set up the callback functions */
-       state->async.fn = async_callback;
-       state->async.private_data = data;
-       
-       /* one more control to wait for to complete */
-       data->count++;
-}
-
-
-/* wait for up to the maximum number of seconds allowed
-   or until all nodes we expect a response from has replied
-*/
-static int async_wait(struct ctdb_context *ctdb, struct async_data *data)
-{
-       while (data->count > 0) {
-               event_loop_once(ctdb->ev);
-       }
-       if (data->fail_count != 0) {
-               return -1;
-       }
-       return 0;
-}
-
-/* 
-   perform a simple control on nodes in the vnn map except ourselves.
-   The control cannot return data
- */
-static int async_control_on_vnnmap(struct ctdb_context *ctdb, enum ctdb_controls opcode,
-                                  TDB_DATA data)
-{
-       struct async_data *async_data;
-       struct ctdb_client_control_state *state;
-       int j;
-       struct timeval timeout = TIMELIMIT();
-       
-       async_data = talloc_zero(ctdb, struct async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
-       /* loop over all active nodes and send an async control to each of them */
-       for (j=0; j<ctdb->vnn_map->size; j++) {
-               uint32_t pnn = ctdb->vnn_map->map[j];
-               if (pnn == ctdb->pnn) {
-                       continue;
-               }
-               state = ctdb_control_send(ctdb, pnn, 0, opcode, 
-                                         0, data, async_data, &timeout, NULL);
-               if (state == NULL) {
-                       DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
-                       talloc_free(async_data);
-                       return -1;
-               }
-               
-               async_add(async_data, state);
-       }
-
-       if (async_wait(ctdb, async_data) != 0) {
-               talloc_free(async_data);
-               return -1;
-       }
-
-       talloc_free(async_data);
-       return 0;
-}
-
-
 /*
   vacuum one record
  */
@@ -172,7 +75,9 @@ static int ctdb_vacuum_one(struct ctdb_context *ctdb, TDB_DATA key,
        data.dptr = (void *)rec;
        data.dsize = rec->length;
 
-       if (async_control_on_vnnmap(ctdb, CTDB_CONTROL_DELETE_RECORD, data) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_DELETE_RECORD,
+                       list_of_vnnmap_nodes(ctdb, ctdb->vnn_map, rec, false),
+                       TIMELIMIT(), true, data) != 0) {
                /* one or more nodes failed to delete a record - no problem! */
                talloc_free(rec);
                return 0;