merge async recovery changes from Ronnie

author Andrew Tridgell <tridge@samba.org>

Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)

committer Andrew Tridgell <tridge@samba.org>

Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
author Andrew Tridgell <tridge@samba.org>
Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
committer Andrew Tridgell <tridge@samba.org>
Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c

index 677e02da192169875a2a092ebdaf0a56480c645d..fdd2b99f80a4c250999e751024fd6f0ddcff904f 100644 (file)
--- a/ctdb/client/ctdb_client.c
+++ b/ctdb/client/ctdb_client.c
@@ -2486,3 +2486,194 @@ int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct time
         return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
  }
  
+/*
+  send a control to execute the "recovered" event script on a node
+ */
+int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+       int ret;
+       int32_t status;
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_END_RECOVERY, 0, tdb_null, 
+                          NULL, NULL, &status, &timeout, NULL);
+       if (ret != 0 || status != 0) {
+               DEBUG(0,(__location__ " ctdb_control for end_recovery failed\n"));
+               return -1;
+       }
+
+       return 0;
+}
+
+/* 
+  callback for the async helpers used when sending the same control
+  to multiple nodes in parallell.
+*/
+static void async_callback(struct ctdb_client_control_state *state)
+{
+       struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
+       int ret;
+       int32_t res;
+
+       /* one more node has responded with recmode data */
+       data->count--;
+
+       /* if we failed to push the db, then return an error and let
+          the main loop try again.
+       */
+       if (state->state != CTDB_CONTROL_DONE) {
+               if ( !data->dont_log_errors) {
+                       DEBUG(0,("Async operation failed with state %d\n", state->state));
+               }
+               data->fail_count++;
+               return;
+       }
+       
+       state->async.fn = NULL;
+
+       ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
+       if ((ret != 0) || (res != 0)) {
+               if ( !data->dont_log_errors) {
+                       DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
+               }
+               data->fail_count++;
+       }
+}
+
+
+void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
+{
+       /* set up the callback functions */
+       state->async.fn = async_callback;
+       state->async.private_data = data;
+       
+       /* one more control to wait for to complete */
+       data->count++;
+}
+
+
+/* wait for up to the maximum number of seconds allowed
+   or until all nodes we expect a response from has replied
+*/
+int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
+{
+       while (data->count > 0) {
+               event_loop_once(ctdb->ev);
+       }
+       if (data->fail_count != 0) {
+               if (!data->dont_log_errors) {
+                       DEBUG(0,("Async wait failed - fail_count=%u\n", 
+                                data->fail_count));
+               }
+               return -1;
+       }
+       return 0;
+}
+
+
+/* 
+   perform a simple control on the listed nodes
+   The control cannot return data
+ */
+int ctdb_client_async_control(struct ctdb_context *ctdb,
+                               enum ctdb_controls opcode,
+                               uint32_t *nodes,
+                               struct timeval timeout,
+                               bool dont_log_errors,
+                               TDB_DATA data)
+{
+       struct client_async_data *async_data;
+       struct ctdb_client_control_state *state;
+       int j, num_nodes;
+       
+       async_data = talloc_zero(ctdb, struct client_async_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+       async_data->dont_log_errors = dont_log_errors;
+
+       num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
+
+       /* loop over all nodes and send an async control to each of them */
+       for (j=0; j<num_nodes; j++) {
+               uint32_t pnn = nodes[j];
+
+               state = ctdb_control_send(ctdb, pnn, 0, opcode, 
+                                         0, data, async_data, &timeout, NULL);
+               if (state == NULL) {
+                       DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
+                       talloc_free(async_data);
+                       return -1;
+               }
+               
+               ctdb_client_async_add(async_data, state);
+       }
+
+       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+               talloc_free(async_data);
+               return -1;
+       }
+
+       talloc_free(async_data);
+       return 0;
+}
+
+uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_vnn_map *vnn_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self)
+{
+       int i, j, num_nodes;
+       uint32_t *nodes;
+
+       for (i=num_nodes=0;i<vnn_map->size;i++) {
+               if (vnn_map->map[i] == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               num_nodes++;
+       } 
+
+       nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+       CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+       for (i=j=0;i<vnn_map->size;i++) {
+               if (vnn_map->map[i] == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               nodes[j++] = vnn_map->map[i];
+       } 
+
+       return nodes;
+}
+
+uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_node_map *node_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self)
+{
+       int i, j, num_nodes;
+       uint32_t *nodes;
+
+       for (i=num_nodes=0;i<node_map->num;i++) {
+               if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+               if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               num_nodes++;
+       } 
+
+       nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+       CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+       for (i=j=0;i<node_map->num;i++) {
+               if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+               if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+                       continue;
+               }
+               nodes[j++] = node_map->nodes[i].pnn;
+       } 
+
+       return nodes;
+}
diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig

index f236cda6e72a0ea6e675b82179aca8ad4e53ee2b..9306884b642bb542b447689ccb2955101ce8a2da 100644 (file)
--- a/ctdb/config/ctdb.sysconfig
+++ b/ctdb/config/ctdb.sysconfig
@@ -42,10 +42,6 @@
  # default is to not manage Samba
  # CTDB_MANAGES_SAMBA=yes
  
-# should ctdb manage starting/stopping the http service for you?
-# default is to not manage http 
-# CTDB_MANAGES_HTTPD=yes
-
  # should ctdb manage starting/stopping Winbind service?
  # if left comented out then it will be autodetected based on smb.conf
  # CTDB_MANAGES_WINBIND=yes
diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h

index eee698341727ce44bb5a1e6be45de75477045d44..b779b94dcdcddb024028bb440d272ffc99262379 100644 (file)
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@@ -499,4 +499,15 @@ struct ctdb_client_control_state *ctdb_ctrl_uptime_send(struct ctdb_context *ctd
  
  int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime);
  
+int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
+
+uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_node_map *node_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self);
+uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
+                               struct ctdb_vnn_map *vnn_map,
+                               TALLOC_CTX *mem_ctx,
+                               bool include_self);
+
  #endif
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h

index 57501fc68a8e7aba33bc3de6974841d0e2ed7d21..ab875924fa16614f0c7acd0a88d6bb18d8ed6896 100644 (file)
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -488,6 +488,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
                     CTDB_CONTROL_WIPE_DATABASE           = 67,
                     CTDB_CONTROL_DELETE_RECORD           = 68,
                     CTDB_CONTROL_UPTIME                  = 69,
+                   CTDB_CONTROL_START_RECOVERY          = 70,
+                   CTDB_CONTROL_END_RECOVERY            = 71,
  };     
  
  /*
@@ -1082,6 +1084,12 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
                                  struct ctdb_req_control *c,
                                  TDB_DATA indata, 
                                  bool *async_reply);
+int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                bool *async_reply);
+int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                bool *async_reply);
  
  struct ctdb_public_ip {
         uint32_t pnn;
@@ -1221,4 +1229,20 @@ void ctdb_unblock_signal(int signum);
  int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb);
  int ctdb_set_child_logging(struct ctdb_context *ctdb);
  
+
+struct client_async_data {
+       bool dont_log_errors;
+       uint32_t count;
+       uint32_t fail_count;
+};
+void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state);
+int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data);
+int ctdb_client_async_control(struct ctdb_context *ctdb,
+                               enum ctdb_controls opcode,
+                               uint32_t *nodes,
+                               struct timeval timeout,
+                               bool dont_log_errors,
+                               TDB_DATA data);
+
+
  #endif
diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c

index 01a77fe887cb796a591e8d6acb5a1dedd181b46f..884ed69177f0a74e75eba76adae40c5a30d5c99b 100644 (file)
--- a/ctdb/server/ctdb_control.c
+++ b/ctdb/server/ctdb_control.c
@@ -355,6 +355,11 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
         case CTDB_CONTROL_UPTIME:
                 return ctdb_control_uptime(ctdb, outdata);
  
+       case CTDB_CONTROL_START_RECOVERY:
+               return ctdb_control_start_recovery(ctdb, c, async_reply);
+
+       case CTDB_CONTROL_END_RECOVERY:
+               return ctdb_control_end_recovery(ctdb, c, async_reply);
         default:
                 DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode));
                 return -1;
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c

index 65ad47156476f77d2045d0a681e147535231686e..b239554a02a8baeef312004f27635912493eaef8 100644 (file)
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@ -396,27 +396,6 @@ struct ctdb_set_recmode_state {
         pid_t child;
  };
  
-/*
-  called when the 'recovered' event script has finished
- */
-static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void *p)
-{
-       struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state);
-
-       ctdb_enable_monitoring(state->ctdb);
-
-       if (status == 0) {
-               ctdb->recovery_mode = state->recmode;
-       } else {
-               DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status));
-       }
-
-       ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
-       talloc_free(state);
-
-       gettimeofday(&ctdb->last_recovery_time, NULL);
-}
-
  /*
    called if our set_recmode child times out. this would happen if
    ctdb_recovery_lock() would block.
@@ -473,23 +452,11 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde,
                 return;
         }
  
+       state->ctdb->recovery_mode = state->recmode;
  
-       ctdb_disable_monitoring(state->ctdb);
-
-       /* call the events script to tell all subsystems that we have recovered */
-       ret = ctdb_event_script_callback(state->ctdb, 
-                                        timeval_current_ofs(state->ctdb->tunable.script_timeout, 0),
-                                        state, 
-                                        ctdb_recovered_callback, 
-                                        state, "recovered");
-
-       if (ret != 0) {
-               ctdb_enable_monitoring(state->ctdb);
-
-               ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "failed to run eventscript from set_recmode");
-               talloc_free(state);
-               return;
-       }
+       ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
+       talloc_free(state);
+       return;
  }
  
  /*
@@ -742,3 +709,122 @@ int32_t ctdb_control_delete_record(struct ctdb_context *ctdb, TDB_DATA indata)
         free(data.dptr);
         return 0;       
  }
+
+
+struct recovery_callback_state {
+       struct ctdb_req_control *c;
+};
+
+
+/*
+  called when the 'recovered' event script has finished
+ */
+static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+       struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
+
+       ctdb_enable_monitoring(ctdb);
+
+       if (status != 0) {
+               DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status));
+       }
+
+       ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+       talloc_free(state);
+
+       gettimeofday(&ctdb->last_recovery_time, NULL);
+}
+
+/*
+  recovery has finished
+ */
+int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, 
+                               struct ctdb_req_control *c,
+                               bool *async_reply)
+{
+       int ret;
+       struct recovery_callback_state *state;
+
+       DEBUG(0,("Recovery has finished\n"));
+
+       state = talloc(ctdb, struct recovery_callback_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->c    = talloc_steal(state, c);
+
+       ctdb_disable_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, 
+                                        ctdb_end_recovery_callback, 
+                                        state, "recovered");
+
+       if (ret != 0) {
+               ctdb_enable_monitoring(ctdb);
+
+               DEBUG(0,(__location__ " Failed to end recovery\n"));
+               talloc_free(state);
+               return -1;
+       }
+
+       /* tell the control that we will be reply asynchronously */
+       *async_reply = true;
+       return 0;
+}
+
+/*
+  called when the 'startrecovery' event script has finished
+ */
+static void ctdb_start_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+       struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
+
+       ctdb_enable_monitoring(ctdb);
+
+       if (status != 0) {
+               DEBUG(0,(__location__ " startrecovery event script failed (status %d)\n", status));
+       }
+
+       ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+       talloc_free(state);
+}
+
+/*
+  start a recuvery
+ */
+int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, 
+                               struct ctdb_req_control *c,
+                               bool *async_reply)
+{
+       int ret;
+       struct recovery_callback_state *state;
+
+       DEBUG(0,("Recovery has started\n"));
+
+       state = talloc(ctdb, struct recovery_callback_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->c    = talloc_steal(state, c);
+
+       ctdb_disable_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, 
+                                        ctdb_start_recovery_callback, 
+                                        state, "startrecovery");
+
+       if (ret != 0) {
+               ctdb_enable_monitoring(ctdb);
+
+               DEBUG(0,(__location__ " Failed to start recovery\n"));
+               talloc_free(state);
+               return -1;
+       }
+
+       /* tell the control that we will be reply asynchronously */
+       *async_reply = true;
+       return 0;
+}
+
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c

index c13136e848a737c0be9838ff2dcad64e2cc57a7e..8595706cc0f7fdb0ab2fac61a9e1c01ae91e38d3 100644 (file)
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -58,66 +58,6 @@ struct ctdb_recoverd {
  #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
  
  
-struct async_data {
-       uint32_t count;
-       uint32_t fail_count;
-};
-
-static void async_callback(struct ctdb_client_control_state *state)
-{
-       struct async_data *data = talloc_get_type(state->async.private_data, struct async_data);
-       int ret;
-       int32_t res;
-
-       /* one more node has responded with recmode data */
-       data->count--;
-
-       /* if we failed to push the db, then return an error and let
-          the main loop try again.
-       */
-       if (state->state != CTDB_CONTROL_DONE) {
-               DEBUG(0,("Async operation failed with state %d\n", state->state));
-               data->fail_count++;
-               return;
-       }
-       
-       state->async.fn = NULL;
-
-       ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
-       if ((ret != 0) || (res != 0)) {
-               DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
-               data->fail_count++;
-       }
-}
-
-
-static void async_add(struct async_data *data, struct ctdb_client_control_state *state)
-{
-       /* set up the callback functions */
-       state->async.fn = async_callback;
-       state->async.private_data = data;
-       
-       /* one more control to wait for to complete */
-       data->count++;
-}
-
-
-/* wait for up to the maximum number of seconds allowed
-   or until all nodes we expect a response from has replied
-*/
-static int async_wait(struct ctdb_context *ctdb, struct async_data *data)
-{
-       while (data->count > 0) {
-               event_loop_once(ctdb->ev);
-       }
-       if (data->fail_count != 0) {
-               DEBUG(0,("Async wait failed - fail_count=%u\n", data->fail_count));
-               return -1;
-       }
-       return 0;
-}
-
-
  /*
    unban a node
   */
@@ -255,50 +195,49 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_
  enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
  
  
-/* 
-   perform a simple control on all active nodes. The control cannot return data
+/*
+  run the "recovered" eventscript on all nodes
   */
-static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_controls opcode,
-                                        struct ctdb_node_map *nodemap, TDB_DATA data, bool include_self)
+static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
  {
-       struct async_data *async_data;
-       struct ctdb_client_control_state *state;
-       int j;
-       struct timeval timeout = CONTROL_TIMEOUT();
-       
-       async_data = talloc_zero(ctdb, struct async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+       TALLOC_CTX *tmp_ctx;
  
-       /* loop over all active nodes and send an async control to each of them */
-       for (j=0; j<nodemap->num; j++) {
-               if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
-                       continue;
-               }
-               if (nodemap->nodes[j].pnn == ctdb->pnn && !include_self) {
-                       continue;
-               }
-               state = ctdb_control_send(ctdb, nodemap->nodes[j].pnn, 0, opcode, 
-                                         0, data, async_data, &timeout, NULL);
-               if (state == NULL) {
-                       DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
-                       talloc_free(async_data);
-                       return -1;
-               }
-               
-               async_add(async_data, state);
-       }
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
  
-       if (async_wait(ctdb, async_data) != 0) {
-               DEBUG(0,(__location__ " Failed async control %u\n", (unsigned)opcode));
-               talloc_free(async_data);
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_END_RECOVERY,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, tdb_null) != 0) {
+               DEBUG(0, (__location__ " Unable to run the 'recovered' event. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
                 return -1;
         }
  
-       talloc_free(async_data);
+       talloc_free(tmp_ctx);
         return 0;
  }
  
+/*
+  run the "startrecovery" eventscript on all nodes
+ */
+static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
+{
+       TALLOC_CTX *tmp_ctx;
  
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_START_RECOVERY,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, tdb_null) != 0) {
+               DEBUG(0, (__location__ " Unable to run the 'startrecovery' event. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
+       talloc_free(tmp_ctx);
+       return 0;
+}
  
  /*
    change recovery mode on all nodes
@@ -306,12 +245,21 @@ static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_co
  static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
  {
         TDB_DATA data;
+       uint32_t *nodes;
+       TALLOC_CTX *tmp_ctx;
+
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
+
+       nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
  
         /* freeze all nodes */
         if (rec_mode == CTDB_RECOVERY_ACTIVE) {
-               if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_FREEZE, 
-                                                 nodemap, tdb_null, true) != 0) {
+               if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE,
+                                               nodes, CONTROL_TIMEOUT(),
+                                               false, tdb_null) != 0) {
                         DEBUG(0, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
+                       talloc_free(tmp_ctx);
                         return -1;
                 }
         }
@@ -320,20 +268,25 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
         data.dsize = sizeof(uint32_t);
         data.dptr = (unsigned char *)&rec_mode;
  
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMODE, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMODE,
+                                       nodes, CONTROL_TIMEOUT(),
+                                       false, data) != 0) {
                 DEBUG(0, (__location__ " Unable to set recovery mode. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
                 return -1;
         }
  
         if (rec_mode == CTDB_RECOVERY_NORMAL) {
-               if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_THAW, 
-                                                 nodemap, tdb_null, true) != 0) {
+               if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW,
+                                               nodes, CONTROL_TIMEOUT(),
+                                               false, tdb_null) != 0) {
                         DEBUG(0, (__location__ " Unable to thaw nodes. Recovery failed.\n"));
+                       talloc_free(tmp_ctx);
                         return -1;
                 }
         }
  
+       talloc_free(tmp_ctx);
         return 0;
  }
  
@@ -343,16 +296,23 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
  static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t pnn)
  {
         TDB_DATA data;
+       TALLOC_CTX *tmp_ctx;
+
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
  
         data.dsize = sizeof(uint32_t);
         data.dptr = (unsigned char *)&pnn;
  
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMASTER, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMASTER,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                 DEBUG(0, (__location__ " Unable to set recmaster. Recovery failed.\n"));
+               talloc_free(tmp_ctx);
                 return -1;
         }
  
+       talloc_free(tmp_ctx);
         return 0;
  }
  
@@ -1141,6 +1101,10 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
         struct recdb_data params;
         struct ctdb_control_pulldb_reply *recdata;
         TDB_DATA outdata;
+       TALLOC_CTX *tmp_ctx;
+
+       tmp_ctx = talloc_new(ctdb);
+       CTDB_NO_MEMORY(ctdb, tmp_ctx);
  
         recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply);
         CTDB_NO_MEMORY(ctdb, recdata);
@@ -1155,12 +1119,14 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
         if (tdb_traverse_read(recdb->tdb, traverse_recdb, &params) == -1) {
                 DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
                 talloc_free(params.recdata);
+               talloc_free(tmp_ctx);
                 return -1;
         }
  
         if (params.failed) {
                 DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
                 talloc_free(params.recdata);
+               talloc_free(tmp_ctx);
                 return -1;              
         }
  
@@ -1169,9 +1135,12 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
         outdata.dptr = (void *)recdata;
         outdata.dsize = params.len;
  
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_PUSH_DB, nodemap, outdata, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB,
+                       list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
+                       CONTROL_TIMEOUT(), false, outdata) != 0) {
                 DEBUG(0,(__location__ " Failed to push recdb records to nodes for db 0x%x\n", dbid));
                 talloc_free(recdata);
+               talloc_free(tmp_ctx);
                 return -1;
         }
  
@@ -1179,6 +1148,7 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
                   dbid, recdata->count));
  
         talloc_free(recdata);
+       talloc_free(tmp_ctx);
  
         return 0;
  }
@@ -1221,9 +1191,11 @@ static int recover_database(struct ctdb_recoverd *rec,
         data.dptr = (void *)&w;
         data.dsize = sizeof(w);
  
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_WIPE_DATABASE, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE,
+                       list_of_active_nodes(ctdb, nodemap, recdb, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                 DEBUG(0, (__location__ " Unable to wipe database. Recovery failed.\n"));
+               talloc_free(recdb);
                 return -1;
         }
         
@@ -1304,6 +1276,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
  
         DEBUG(0, (__location__ " Recovery - created remote databases\n"));
  
+
         /* set recovery mode to active on all nodes */
         ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
         if (ret!=0) {
@@ -1311,6 +1284,13 @@ static int do_recovery(struct ctdb_recoverd *rec,
                 return -1;
         }
  
+       /* execute the "startrecovery" event script on all nodes */
+       ret = run_startrecovery_eventscript(ctdb, nodemap);
+       if (ret!=0) {
+               DEBUG(0, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
+               return -1;
+       }
+
         /* pick a new generation number */
         generation = new_generation();
  
@@ -1334,8 +1314,9 @@ static int do_recovery(struct ctdb_recoverd *rec,
         data.dptr = (void *)&generation;
         data.dsize = sizeof(uint32_t);
  
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_START, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START,
+                       list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                 DEBUG(0, (__location__ " Unable to start transactions. Recovery failed.\n"));
                 return -1;
         }
@@ -1352,8 +1333,9 @@ static int do_recovery(struct ctdb_recoverd *rec,
         DEBUG(0, (__location__ " Recovery - starting database commits\n"));
  
         /* commit all the changes */
-       if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT, 
-                                         nodemap, data, true) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT,
+                       list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
+                       CONTROL_TIMEOUT(), false, data) != 0) {
                 DEBUG(0, (__location__ " Unable to commit recovery changes. Recovery failed.\n"));
                 return -1;
         }
@@ -1417,6 +1399,13 @@ static int do_recovery(struct ctdb_recoverd *rec,
                 DEBUG(1, (__location__ " Recovery - done takeover\n"));
         }
  
+       /* execute the "recovered" event script on all nodes */
+       ret = run_recovered_eventscript(ctdb, nodemap);
+       if (ret!=0) {
+               DEBUG(0, (__location__ " Unable to run the 'recovered' event on cluster\n"));
+               return -1;
+       }
+
         /* disable recovery mode */
         ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
         if (ret!=0) {
@@ -2445,12 +2434,29 @@ again:
         /* we might need to change who has what IP assigned */
         if (rec->need_takeover_run) {
                 rec->need_takeover_run = false;
+
+               /* execute the "startrecovery" event script on all nodes */
+               ret = run_startrecovery_eventscript(ctdb, nodemap);
+               if (ret!=0) {
+                       DEBUG(0, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
+                       do_recovery(rec, mem_ctx, pnn, num_active, nodemap, 
+                                   vnnmap, ctdb->pnn);
+               }
+
                 ret = ctdb_takeover_run(ctdb, nodemap);
                 if (ret != 0) {
                         DEBUG(0, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
                         do_recovery(rec, mem_ctx, pnn, num_active, nodemap, 
                                     vnnmap, ctdb->pnn);
                 }
+
+               /* execute the "recovered" event script on all nodes */
+               ret = run_recovered_eventscript(ctdb, nodemap);
+               if (ret!=0) {
+                       DEBUG(0, (__location__ " Unable to run the 'recovered' event on cluster\n"));
+                       do_recovery(rec, mem_ctx, pnn, num_active, nodemap, 
+                                   vnnmap, ctdb->pnn);
+               }
         }
  
         goto again;
diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c

index b63b88f4c25e9bf3fc9efd1836c663e866b56459..cea3f95e34e3c4e3ff05ecd646034a8f1c60d3d5 100644 (file)
--- a/ctdb/server/ctdb_takeover.c
+++ b/ctdb/server/ctdb_takeover.c
@@ -641,11 +641,14 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
  int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
  {
         int i, num_healthy, retries;
-       int ret;
         struct ctdb_public_ip ip;
         uint32_t mask;
         struct ctdb_public_ip_list *all_ips, *tmp_ip;
         int maxnode, maxnum=0, minnode, minnum=0, num;
+       TDB_DATA data;
+       struct timeval timeout;
+       struct client_async_data *async_data;
+       struct ctdb_client_control_state *state;
         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
  
  
@@ -813,6 +816,9 @@ try_again:
         /* now tell all nodes to delete any alias that they should not
            have.  This will be a NOOP on nodes that don't currently
            hold the given alias */
+       async_data = talloc_zero(tmp_ctx, struct client_async_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+
         for (i=0;i<nodemap->num;i++) {
                 /* don't talk to unconnected nodes, but do talk to banned nodes */
                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
@@ -830,21 +836,33 @@ try_again:
                         ip.sin.sin_family = AF_INET;
                         ip.sin.sin_addr   = tmp_ip->sin.sin_addr;
  
-                       ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(),
-                                                  nodemap->nodes[i].pnn, 
-                                                  &ip);
-                       if (ret != 0) {
-                               DEBUG(0,("Failed to tell vnn %u to release IP %s\n",
-                                        nodemap->nodes[i].pnn,
-                                        inet_ntoa(tmp_ip->sin.sin_addr)));
+                       timeout = TAKEOVER_TIMEOUT();
+                       data.dsize = sizeof(ip);
+                       data.dptr  = (uint8_t *)&ip;
+                       state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
+                                       0, CTDB_CONTROL_RELEASE_IP, 0,
+                                       data, async_data,
+                                       &timeout, NULL);
+                       if (state == NULL) {
+                               DEBUG(0,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
                                 talloc_free(tmp_ctx);
                                 return -1;
                         }
+               
+                       ctdb_client_async_add(async_data, state);
                 }
         }
+       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+               DEBUG(0,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+       talloc_free(async_data);
  
  
         /* tell all nodes to get their own IPs */
+       async_data = talloc_zero(tmp_ctx, struct client_async_data);
+       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
                 if (tmp_ip->pnn == -1) {
                         /* this IP won't be taken over */
@@ -854,16 +872,25 @@ try_again:
                 ip.sin.sin_family = AF_INET;
                 ip.sin.sin_addr = tmp_ip->sin.sin_addr;
  
-               ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(), 
-                                           tmp_ip->pnn, 
-                                           &ip);
-               if (ret != 0) {
-                       DEBUG(0,("Failed asking vnn %u to take over IP %s\n",
-                                tmp_ip->pnn, 
-                                inet_ntoa(tmp_ip->sin.sin_addr)));
+               timeout = TAKEOVER_TIMEOUT();
+               data.dsize = sizeof(ip);
+               data.dptr  = (uint8_t *)&ip;
+               state = ctdb_control_send(ctdb, tmp_ip->pnn,
+                               0, CTDB_CONTROL_TAKEOVER_IP, 0,
+                               data, async_data,
+                               &timeout, NULL);
+               if (state == NULL) {
+                       DEBUG(0,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
                         talloc_free(tmp_ctx);
                         return -1;
                 }
+               
+               ctdb_client_async_add(async_data, state);
+       }
+       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
+               DEBUG(0,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
         }
  
         talloc_free(tmp_ctx);
diff --git a/ctdb/tests/events.d/00.test b/ctdb/tests/events.d/00.test

index 026cf6cba77f39ea7602a3dbcb9662185f453d94..c5933be36afaca7a57d074e5a8865c380c48fc2a 100755 (executable)
--- a/ctdb/tests/events.d/00.test
+++ b/ctdb/tests/events.d/00.test
@@ -10,6 +10,12 @@ case $cmd in
         echo "monitor event stderr" >&2
         exit 0
         ;;
+
+     startrecovery)
+       echo "ctdb startrecovery event"
+       exit 0; 
+       ;;
+
       startup)
         echo "ctdb startup event"
         exit 0; 
diff --git a/ctdb/tools/ctdb_vacuum.c b/ctdb/tools/ctdb_vacuum.c

index 2bc9908c7ee1603e880a03a8fb46fadd57dd97a4..f412c0430386f792ce7f9c89b319d531c1b8f362 100644 (file)
--- a/ctdb/tools/ctdb_vacuum.c
+++ b/ctdb/tools/ctdb_vacuum.c
@@ -28,103 +28,6 @@
  /* should be tunable */
  #define TIMELIMIT() timeval_current_ofs(10, 0)
  
-struct async_data {
-       uint32_t count;
-       uint32_t fail_count;
-};
-
-static void async_callback(struct ctdb_client_control_state *state)
-{
-       struct async_data *data = talloc_get_type(state->async.private_data, struct async_data);
-       int ret;
-       int32_t res;
-
-       /* one more node has responded with recmode data */
-       data->count--;
-
-       /* if we failed to push the db, then return an error and let
-          the main loop try again.
-       */
-       if (state->state != CTDB_CONTROL_DONE) {
-               data->fail_count++;
-               return;
-       }
-       
-       state->async.fn = NULL;
-
-       ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
-       if ((ret != 0) || (res != 0)) {
-               data->fail_count++;
-       }
-}
-
-static void async_add(struct async_data *data, struct ctdb_client_control_state *state)
-{
-       /* set up the callback functions */
-       state->async.fn = async_callback;
-       state->async.private_data = data;
-       
-       /* one more control to wait for to complete */
-       data->count++;
-}
-
-
-/* wait for up to the maximum number of seconds allowed
-   or until all nodes we expect a response from has replied
-*/
-static int async_wait(struct ctdb_context *ctdb, struct async_data *data)
-{
-       while (data->count > 0) {
-               event_loop_once(ctdb->ev);
-       }
-       if (data->fail_count != 0) {
-               return -1;
-       }
-       return 0;
-}
-
-/* 
-   perform a simple control on nodes in the vnn map except ourselves.
-   The control cannot return data
- */
-static int async_control_on_vnnmap(struct ctdb_context *ctdb, enum ctdb_controls opcode,
-                                  TDB_DATA data)
-{
-       struct async_data *async_data;
-       struct ctdb_client_control_state *state;
-       int j;
-       struct timeval timeout = TIMELIMIT();
-       
-       async_data = talloc_zero(ctdb, struct async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
-       /* loop over all active nodes and send an async control to each of them */
-       for (j=0; j<ctdb->vnn_map->size; j++) {
-               uint32_t pnn = ctdb->vnn_map->map[j];
-               if (pnn == ctdb->pnn) {
-                       continue;
-               }
-               state = ctdb_control_send(ctdb, pnn, 0, opcode, 
-                                         0, data, async_data, &timeout, NULL);
-               if (state == NULL) {
-                       DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
-                       talloc_free(async_data);
-                       return -1;
-               }
-               
-               async_add(async_data, state);
-       }
-
-       if (async_wait(ctdb, async_data) != 0) {
-               talloc_free(async_data);
-               return -1;
-       }
-
-       talloc_free(async_data);
-       return 0;
-}
-
-
  /*
    vacuum one record
   */
@@ -172,7 +75,9 @@ static int ctdb_vacuum_one(struct ctdb_context *ctdb, TDB_DATA key,
         data.dptr = (void *)rec;
         data.dsize = rec->length;
  
-       if (async_control_on_vnnmap(ctdb, CTDB_CONTROL_DELETE_RECORD, data) != 0) {
+       if (ctdb_client_async_control(ctdb, CTDB_CONTROL_DELETE_RECORD,
+                       list_of_vnnmap_nodes(ctdb, ctdb->vnn_map, rec, false),
+                       TIMELIMIT(), true, data) != 0) {
                 /* one or more nodes failed to delete a record - no problem! */
                 talloc_free(rec);
                 return 0;
author	Andrew Tridgell <tridge@samba.org>
	Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
committer	Andrew Tridgell <tridge@samba.org>
	Tue, 29 Jan 2008 02:59:28 +0000 (13:59 +1100)
ctdb/client/ctdb_client.c		patch \| blob \| history
ctdb/config/ctdb.sysconfig		patch \| blob \| history
ctdb/include/ctdb.h		patch \| blob \| history
ctdb/include/ctdb_private.h		patch \| blob \| history
ctdb/server/ctdb_control.c		patch \| blob \| history
ctdb/server/ctdb_recover.c		patch \| blob \| history
ctdb/server/ctdb_recoverd.c		patch \| blob \| history
ctdb/server/ctdb_takeover.c		patch \| blob \| history
ctdb/tests/events.d/00.test		patch \| blob \| history
ctdb/tools/ctdb_vacuum.c		patch \| blob \| history