ctdb-recoverd: Use race for cluster lock as election when lock is enabled

author Martin Schwenke <martin@meltin.net>

Wed, 18 Mar 2020 04:14:39 +0000 (15:14 +1100)

committer Martin Schwenke <martins@samba.org>

Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
author Martin Schwenke <martin@meltin.net>
Wed, 18 Mar 2020 04:14:39 +0000 (15:14 +1100)
committer Martin Schwenke <martins@samba.org>
Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c

index 51c4693c97439894e749b95a08f9bced08ed5a1f..c2a48a07b4ef00f16727368939935fe78ffd436a 100644 (file)
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -1831,6 +1831,37 @@ static void election_handler(uint64_t srvid, TDB_DATA data, void *private_data)
         return;
  }
  
+static void cluster_lock_election(struct ctdb_recoverd *rec)
+{
+       bool ok;
+
+       if (!this_node_can_be_leader(rec)) {
+               if (cluster_lock_held(rec)) {
+                       cluster_lock_release(rec);
+               }
+               return;
+       }
+
+       /*
+        * Don't need to unconditionally release the lock and then
+        * attempt to retake it.  This provides stability.
+        */
+       if (cluster_lock_held(rec)) {
+               return;
+       }
+
+       rec->leader = CTDB_UNKNOWN_PNN;
+       rec->election_in_progress = true;
+
+       ok = cluster_lock_take(rec);
+       if (ok) {
+               rec->leader = rec->pnn;
+               D_WARNING("Took cluster lock, leader=%"PRIu32"\n", rec->leader);
+       }
+
+       rec->election_in_progress = false;
+}
+
  /*
    force the start of the election process
   */
@@ -1848,6 +1879,11 @@ static void force_election(struct ctdb_recoverd *rec)
                 return;
         }
  
+       if (cluster_lock_enabled(rec)) {
+               cluster_lock_election(rec);
+               return;
+       }
+
         talloc_free(rec->election_timeout);
         rec->election_in_progress = true;
         rec->election_timeout = tevent_add_timer(
@@ -2007,12 +2043,23 @@ static void leader_handler(uint64_t srvid, TDB_DATA data, void *private_data)
         }
  
         if (pnn == CTDB_UNKNOWN_PNN) {
+               bool was_election_in_progress = rec->election_in_progress;
+
                 /*
                  * Leader broadcast timeout was cancelled above - stop
                  * main loop from restarting it until election is
                  * complete
                  */
                 rec->election_in_progress = true;
+
+               /*
+                * This is the only notification for a cluster lock
+                * election, so handle it here...
+                */
+               if (cluster_lock_enabled(rec) && !was_election_in_progress) {
+                       cluster_lock_election(rec);
+               }
+
                 return;
         }
  
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh

index 36b72818f2402b4c5b9052dc0ca9d3db381804cd..35363d11f1d1ec398afba928fdd90b08cce8670f 100755 (executable)
--- a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
+++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
@@ -82,8 +82,7 @@ leader_old="$leader"
  leader_get "$test_node"
  
  if [ "$leader" != "$leader_old" ] ; then
-       ctdb_test_fail \
-               "BAD: Leader has changed to node ${leader}"
+       echo "OK: Leader has changed to node ${leader_new}"
  fi
  echo "GOOD: Leader is still node ${leader}"
  echo
author	Martin Schwenke <martin@meltin.net>
	Wed, 18 Mar 2020 04:14:39 +0000 (15:14 +1100)
committer	Martin Schwenke <martins@samba.org>
	Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
ctdb/server/ctdb_recoverd.c		patch \| blob \| history
ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh		patch \| blob \| history