ctdb-recoverd: Use race for cluster lock as election when lock is enabled
authorMartin Schwenke <martin@meltin.net>
Wed, 18 Mar 2020 04:14:39 +0000 (15:14 +1100)
committerMartin Schwenke <martins@samba.org>
Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
If the cluster is partitioned then nodes in one partition can not take
the lock anyway, so election is pointless.  It just introduces
unnecessary corner cases.

Instead just race for the lock.

When a node notices a lack of leader and notifies other nodes of an
election via an unknown leader broadcast, the cluster lock election is
hooked into this broadcast.

The test needs to be updated because losing the cluster lock can now
result in a leadership change.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/server/ctdb_recoverd.c
ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh

index 51c4693c97439894e749b95a08f9bced08ed5a1f..c2a48a07b4ef00f16727368939935fe78ffd436a 100644 (file)
@@ -1831,6 +1831,37 @@ static void election_handler(uint64_t srvid, TDB_DATA data, void *private_data)
        return;
 }
 
+static void cluster_lock_election(struct ctdb_recoverd *rec)
+{
+       bool ok;
+
+       if (!this_node_can_be_leader(rec)) {
+               if (cluster_lock_held(rec)) {
+                       cluster_lock_release(rec);
+               }
+               return;
+       }
+
+       /*
+        * Don't need to unconditionally release the lock and then
+        * attempt to retake it.  This provides stability.
+        */
+       if (cluster_lock_held(rec)) {
+               return;
+       }
+
+       rec->leader = CTDB_UNKNOWN_PNN;
+       rec->election_in_progress = true;
+
+       ok = cluster_lock_take(rec);
+       if (ok) {
+               rec->leader = rec->pnn;
+               D_WARNING("Took cluster lock, leader=%"PRIu32"\n", rec->leader);
+       }
+
+       rec->election_in_progress = false;
+}
+
 /*
   force the start of the election process
  */
@@ -1848,6 +1879,11 @@ static void force_election(struct ctdb_recoverd *rec)
                return;
        }
 
+       if (cluster_lock_enabled(rec)) {
+               cluster_lock_election(rec);
+               return;
+       }
+
        talloc_free(rec->election_timeout);
        rec->election_in_progress = true;
        rec->election_timeout = tevent_add_timer(
@@ -2007,12 +2043,23 @@ static void leader_handler(uint64_t srvid, TDB_DATA data, void *private_data)
        }
 
        if (pnn == CTDB_UNKNOWN_PNN) {
+               bool was_election_in_progress = rec->election_in_progress;
+
                /*
                 * Leader broadcast timeout was cancelled above - stop
                 * main loop from restarting it until election is
                 * complete
                 */
                rec->election_in_progress = true;
+
+               /*
+                * This is the only notification for a cluster lock
+                * election, so handle it here...
+                */
+               if (cluster_lock_enabled(rec) && !was_election_in_progress) {
+                       cluster_lock_election(rec);
+               }
+
                return;
        }
 
index 36b72818f2402b4c5b9052dc0ca9d3db381804cd..35363d11f1d1ec398afba928fdd90b08cce8670f 100755 (executable)
@@ -82,8 +82,7 @@ leader_old="$leader"
 leader_get "$test_node"
 
 if [ "$leader" != "$leader_old" ] ; then
-       ctdb_test_fail \
-               "BAD: Leader has changed to node ${leader}"
+       echo "OK: Leader has changed to node ${leader_new}"
 fi
 echo "GOOD: Leader is still node ${leader}"
 echo