ctdb-tests: Add recovery record resurrection test for volatile databases
authorMartin Schwenke <martin@meltin.net>
Mon, 24 Sep 2018 06:17:19 +0000 (16:17 +1000)
committerAmitay Isaacs <amitay@samba.org>
Mon, 8 Oct 2018 00:46:20 +0000 (02:46 +0200)
Ensure that deleted records and vacuumed records are not resurrected
from recently inactive nodes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/tests/simple/69_recovery_resurrect_deleted.sh [new file with mode: 0755]

diff --git a/ctdb/tests/simple/69_recovery_resurrect_deleted.sh b/ctdb/tests/simple/69_recovery_resurrect_deleted.sh
new file mode 100755 (executable)
index 0000000..95e79fd
--- /dev/null
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+test_info()
+{
+    cat <<EOF
+Ensure recovery doesn't resurrect deleted records from recently inactive nodes
+EOF
+}
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+ctdb_test_init "$@"
+
+set -e
+
+cluster_is_healthy
+
+# Reset configuration
+ctdb_restart_when_done
+
+testdb="rec_test.tdb"
+
+echo "Getting list of nodes..."
+try_command_on_node -v any "onnode -pq all ctdb pnn | grep '^[0-9][0-9]*$'"
+
+first=$(echo "$out" | sed -n -e '1p')
+second=$(echo "$out" | sed -n -e '2p')
+notfirst=$(echo "$out" | tail -n +2)
+
+echo "Create/wipe test database ${testdb}"
+try_command_on_node $first $CTDB attach "$testdb"
+try_command_on_node $first $CTDB wipedb "$testdb"
+
+echo "store key(test1) data(value1)"
+try_command_on_node $first $CTDB writekey "$testdb" test1 value1
+
+echo "Migrate key(test1) to all nodes"
+try_command_on_node all $CTDB readkey "$testdb" test1
+
+echo "Stop node ${first}"
+try_command_on_node $first $CTDB stop
+wait_until_node_has_status $first stopped
+
+echo "Delete key(test1)"
+try_command_on_node $second $CTDB deletekey "$testdb" test1
+
+database_has_zero_records ()
+{
+       local n
+       for n in $notfirst ; do
+               try_command_on_node $n $CTDB cattdb "$testdb"
+               if echo "$out" | grep -q '^key(' ; then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+echo "Get vacuum interval"
+try_command_on_node -v $second $CTDB getvar VacuumInterval
+vacuum_interval="${out#* = }"
+
+echo "Wait until vacuuming deletes the record on active nodes"
+# Why 4?  Steps are:
+# 1. Original node processes delete queue, asks lmaster to fetch
+# 2. lmaster recoverd fetches
+# 3. lmaster processes delete queue
+# If vacuuming is just missed then need an extra interval.
+wait_until $((vacuum_interval * 4)) database_has_zero_records
+
+echo "Continue node ${first}"
+try_command_on_node $first $CTDB continue
+wait_until_node_has_status $first notstopped
+
+echo "Get database contents"
+try_command_on_node -v $first $CTDB catdb "$testdb"
+
+if echo "$out" | grep -q '^key(' ; then
+       echo "BAD: Deleted record has been resurrected"
+       exit 1
+fi
+
+echo "GOOD: Deleted record is still gone"