ctdb-recoverd: Add fail callback to assign banning credits
authorMartin Schwenke <martin@meltin.net>
Thu, 18 Jan 2018 08:58:15 +0000 (19:58 +1100)
committerStefan Metzmacher <metze@samba.org>
Thu, 27 Aug 2020 10:48:08 +0000 (10:48 +0000)
Also drop error handling in main_loop() that is replaced by this
change.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14466
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
(cherry picked from commit 10ce0dbf1c11eaaab7b28b6bbd014235a36d1962)

ctdb/server/ctdb_recoverd.c

index 406f870afc8fc0becde5e9ad30adc2e4b1b83e24..e50facfe99205b3dc7338600a87ad27763582036 100644 (file)
@@ -2225,6 +2225,7 @@ done:
 
 struct remote_nodemaps_state {
        struct ctdb_node_map_old **remote_nodemaps;
+       struct ctdb_recoverd *rec;
 };
 
 static void async_getnodemap_callback(struct ctdb_context *ctdb,
@@ -2247,6 +2248,20 @@ static void async_getnodemap_callback(struct ctdb_context *ctdb,
 
 }
 
+static void async_getnodemap_error(struct ctdb_context *ctdb,
+                                  uint32_t node_pnn,
+                                  int32_t res,
+                                  TDB_DATA outdata,
+                                  void *callback_data)
+{
+       struct remote_nodemaps_state *state =
+               (struct remote_nodemaps_state *)callback_data;
+       struct ctdb_recoverd *rec = state->rec;
+
+       DBG_ERR("Failed to retrieve nodemap from node %u\n", node_pnn);
+       ctdb_set_culprit(rec, node_pnn);
+}
+
 static int get_remote_nodemaps(struct ctdb_recoverd *rec,
                               TALLOC_CTX *mem_ctx,
                               struct ctdb_node_map_old ***remote_nodemaps)
@@ -2268,6 +2283,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec,
        nodes = list_of_active_nodes(ctdb, rec->nodemap, mem_ctx, true);
 
        state.remote_nodemaps = t;
+       state.rec = rec;
 
        ret = ctdb_client_async_control(ctdb,
                                        CTDB_CONTROL_GET_NODEMAP,
@@ -2277,7 +2293,7 @@ static int get_remote_nodemaps(struct ctdb_recoverd *rec,
                                        false,
                                        tdb_null,
                                        async_getnodemap_callback,
-                                       NULL,
+                                       async_getnodemap_error,
                                        &state);
        talloc_free(nodes);
 
@@ -2622,13 +2638,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
                        continue;
                }
 
-               if (remote_nodemaps[j] == NULL) {
-                       DEBUG(DEBUG_ERR,(__location__ " Did not get a remote nodemap for node %d, restarting monitoring\n", j));
-                       ctdb_set_culprit(rec, j);
-
-                       return;
-               }
-
                /* if the nodes disagree on how many nodes there are
                   then this is a good reason to try recovery
                 */