ctdb-tests: Wait to allow database attach/detach to take effect
authorMartin Schwenke <martin@meltin.net>
Sat, 27 Apr 2019 04:54:09 +0000 (14:54 +1000)
committerAmitay Isaacs <amitay@samba.org>
Tue, 7 May 2019 05:45:35 +0000 (05:45 +0000)
Sometimes the detach test fails:

  Check detaching single test database detach_test1.tdb
  BAD: database detach_test1.tdb is still attached
  Number of databases:4
  dbid:0x5ae995ee name:detach_test4.tdb path:tests/var/simple/node.0/db/volatile/detach_test4.tdb.0
  dbid:0xd84cc13c name:detach_test3.tdb path:tests/var/simple/node.0/db/volatile/detach_test3.tdb.0
  dbid:0x8e8e8cef name:detach_test2.tdb path:tests/var/simple/node.0/db/volatile/detach_test2.tdb.0
  dbid:0xc62491f4 name:detach_test1.tdb path:tests/var/simple/node.0/db/volatile/detach_test1.tdb.0
  Number of databases:3
  dbid:0x5ae995ee name:detach_test4.tdb path:tests/var/simple/node.1/db/volatile/detach_test4.tdb.1
  dbid:0xd84cc13c name:detach_test3.tdb path:tests/var/simple/node.1/db/volatile/detach_test3.tdb.1
  dbid:0x8e8e8cef name:detach_test2.tdb path:tests/var/simple/node.1/db/volatile/detach_test2.tdb.1
  Number of databases:4
  dbid:0x5ae995ee name:detach_test4.tdb path:tests/var/simple/node.2/db/volatile/detach_test4.tdb.2
  dbid:0xd84cc13c name:detach_test3.tdb path:tests/var/simple/node.2/db/volatile/detach_test3.tdb.2
  dbid:0x8e8e8cef name:detach_test2.tdb path:tests/var/simple/node.2/db/volatile/detach_test2.tdb.2
  dbid:0xc62491f4 name:detach_test1.tdb path:tests/var/simple/node.2/db/volatile/detach_test1.tdb.2
  *** TEST COMPLETED (RC=1) AT 2019-04-27 03:35:40, CLEANING UP...

When issued from a client, the detach control re-broadcasts itself
asynchronously to all nodes and then returns success.  The controls to
some nodes to do the actual detach may still be in flight when success
is returned to the client.  Therefore, the test should wait for a few
seconds to allow the asynchronous controls to complete.

The same is true for the attach control, so workaround the problem in
the attach test too.

An alternative is to make the attach and detach controls synchronous
by avoiding the broadcast and waiting for the results of the
individual controls sent to the nodes.  However, a simple
implementation would involve adding new nested event loops.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13924

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/tests/simple/21_ctdb_attach.sh
ctdb/tests/simple/27_ctdb_detach.sh

index 7965ced6052375114d897adee35d38b32018696c..b9ae9481d91bcbea1b0ac497504518102ad6267b 100755 (executable)
@@ -42,27 +42,39 @@ lastnode=$(( numnodes - 1 ))
 
 ######################################################################
 
-# Confirm that the database is attached
+# Confirm that the database is attached with appropriate flags
+check_db_once ()
+{
+       local pnn="$1"
+       local db="$2"
+
+       try_command_on_node "$pnn" $CTDB getdbmap
+       if grep -qF "name:${db}" "$outfile" >/dev/null ; then
+               return 0
+       else
+               return 1
+       fi
+}
+
 check_db ()
 {
-    pnn="$1"
-    db="$2"
-    flag="$3"
-    try_command_on_node $pnn "$CTDB getdbmap | grep $db"
-    if [ -z "$out" ] ; then
-       echo "BAD: database $db is not attached on node $node"
-       echo "$out"
-       exit 1
-    else
-       local flags=$(awk '{print $4}' "$outfile") || true
+       local pnn="$1"
+       local db="$2"
+       local flag="$3"
+
+       local flags
+
+       echo "Waiting until database ${db} is attached on node ${pnn}"
+       wait_until 10 check_db_once "$pnn" "$db"
+
+       flags=$(awk -v db="$db" '$2 == "name:" db {print $4}' "$outfile")
        if [ "$flags" = "$flag" ]; then
-           echo "GOOD: database $db is attached on node $node with flag $flag"
+               echo "GOOD: db ${db} attached on node ${pnn} with flag $flag"
        else
-           echo "BAD: database $db is attached on node $node with wrong flag"
-           echo "$out"
-           exit 1
+               echo "BAD: db ${db} attached on node ${pnn} with wrong flag"
+               cat "$outfile"
+               exit 1
        fi
-    fi
 }
 
 ######################################################################
index f7f7a7e8c4072ff171ba4892ec9c750095e895f1..5d1e12328c66190eab2ef7464482bfd4da3ff621 100755 (executable)
@@ -38,33 +38,51 @@ numnodes="$out"
 ######################################################################
 
 # Confirm that the database is attached
+check_db_once ()
+{
+       local db="$1"
+
+       local num_db
+
+       try_command_on_node all "$CTDB getdbmap"
+       num_db=$(grep -cF "name:${db}" "$outfile") || true
+       if [ "$num_db" -eq "$numnodes" ]; then
+               return 0
+       else
+               return 1
+       fi
+}
+
 check_db ()
 {
-    db="$1"
-    try_command_on_node all $CTDB getdbmap
-    local num_db=$(grep -cF "$db" "$outfile") || true
-    if [ $num_db -eq $numnodes ]; then
-       echo "GOOD: database $db is attached on all nodes"
-    else
-       echo "BAD: database $db is not attached on all nodes"
-       cat "$outfile"
-       exit 1
-    fi
+       local db="$1"
+
+       echo "Waiting until database ${db} is attached on all nodes"
+       wait_until 10 check_db_once "$db"
 }
 
 # Confirm that no nodes have databases attached
+check_no_db_once ()
+{
+       local db="$1"
+
+       local num_db
+
+       try_command_on_node all "$CTDB getdbmap"
+       num_db=$(grep -cF "name:${db}" "$outfile") || true
+       if [ "$num_db" -eq 0 ]; then
+               return 0
+       else
+               return 1
+       fi
+}
+
 check_no_db ()
 {
-    db="$1"
-    try_command_on_node all $CTDB getdbmap
-    local num_db=$(grep -cF "$db" "$outfile") || true
-    if [ $num_db -eq 0 ]; then
-       echo "GOOD: database $db is not attached any more"
-    else
-       echo "BAD: database $db is still attached"
-       cat "$outfile"
-       exit 1
-    fi
+       local db="$1"
+
+       echo "Waiting until database ${db} is detached on all nodes"
+       wait_until 10 check_no_db_once "$db"
 }
 
 ######################################################################