Add some simple tests that can be run from within the tree.
authorMartin Schwenke <martin@meltin.net>
Thu, 20 Nov 2008 09:40:01 +0000 (20:40 +1100)
committerMartin Schwenke <martin@meltin.net>
Thu, 20 Nov 2008 09:40:01 +0000 (20:40 +1100)
Signed-off-by: Martin Schwenke <martin@meltin.net>
(This used to be ctdb commit eacb2ef82ea4809d874158756db973dd1e3fc8fc)

22 files changed:
ctdb/.gitignore
ctdb/tests/nodes.txt
ctdb/tests/scripts/ctdb_test_functions.bash [new file with mode: 0644]
ctdb/tests/scripts/run_tests [new file with mode: 0755]
ctdb/tests/scripts/test_wrap [new file with mode: 0755]
ctdb/tests/simple/00_ctdb_init.sh [new file with mode: 0755]
ctdb/tests/simple/00_ctdb_onnode.sh [new file with mode: 0755]
ctdb/tests/simple/01_ctdb_version.sh.disabled [new file with mode: 0755]
ctdb/tests/simple/02_ctdb_listvars.sh [new file with mode: 0755]
ctdb/tests/simple/03_ctdb_getvar.sh [new file with mode: 0755]
ctdb/tests/simple/04_ctdb_setvar.sh [new file with mode: 0755]
ctdb/tests/simple/05_ctdb_listnodes.sh [new file with mode: 0755]
ctdb/tests/simple/06_ctdb_getpid.sh [new file with mode: 0755]
ctdb/tests/simple/07_ctdb_process_exists.sh [new file with mode: 0755]
ctdb/tests/simple/08_ctdb_isnotrecmaster.sh [new file with mode: 0755]
ctdb/tests/simple/11_ctdb_ip.sh [new file with mode: 0755]
ctdb/tests/simple/31_ctdb_disable_simple.sh [new file with mode: 0755]
ctdb/tests/simple/32_ctdb_enable_simple.sh [new file with mode: 0755]
ctdb/tests/simple/41_ctdb_ban_simple.sh [new file with mode: 0755]
ctdb/tests/simple/42_ctdb_unban_simple.sh [new file with mode: 0755]
ctdb/tests/start_daemons.sh
ctdb/tools/onnode

index 37b1a749d88b30891ceef2b9ef216e1a404ec84c..91124db066a746351008c8ac789ef11251c6d506 100644 (file)
@@ -15,5 +15,7 @@ utils/smnotify/gen_smnotify.c
 utils/smnotify/gen_xdr.c
 utils/smnotify/smnotify.h
 nodes.txt
+public_addresses.txt
 rec.lock
 test.db
+var
index 99b07328b383bd468885a27f4b84aa7ed35e331d..b910649a18c3f6d2b5d9fc146d389ce6dd1278a9 100644 (file)
@@ -1,4 +1,3 @@
 127.0.0.1
 127.0.0.2
 127.0.0.3
-127.0.0.4
diff --git a/ctdb/tests/scripts/ctdb_test_functions.bash b/ctdb/tests/scripts/ctdb_test_functions.bash
new file mode 100644 (file)
index 0000000..d1886b8
--- /dev/null
@@ -0,0 +1,333 @@
+# Hey Emacs, this is a -*- shell-script -*- !!!  :-)
+
+numnodes=3
+
+export CTDB_NODES_SOCKETS=""
+for i in $(seq 1 $numnodes) ; do
+    CTDB_NODES_SOCKETS="${CTDB_NODES_SOCKETS}${CTDB_NODES_SOCKETS:+ }${PWD}/sock.${i}"
+done
+
+
+######################################################################
+
+fail ()
+{
+    echo "$*"
+    exit 1
+}
+
+######################################################################
+
+#. /root/SOFS/autosofs/scripts/tester.bash
+
+test_begin ()
+{
+    local name="$1"
+
+    teststarttime=$(date '+%s')
+    testduration=0
+
+    echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
+    echo "Running test $name ($(date '+%T'))"
+    echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
+}
+
+test_end ()
+{
+    local name="$1" ; shift
+    local status="$1" ; shift
+    # "$@" is command-line
+
+    local interp="SKIPPED"
+    local statstr=" (reason $*)"
+    if [ -n "$status" ] ; then
+       if [ $status -eq 0 ] ; then
+           interp="PASSED"
+           statstr=""
+           echo "ALL OK: $*"
+       else
+           interp="FAILED"
+           statstr=" (status $status)"
+           testfailures=$(($testfailures+1))
+       fi
+    fi
+
+    testduration=$(($(date +%s)-$teststarttime))
+
+    echo "=========================================================================="
+    echo "TEST ${interp}: ${name}${statstr}, duration: $testduration sec."
+    echo "=========================================================================="
+
+}
+
+
+test_exit() {
+    exit $(($testfailures+0))
+}
+
+test_run ()
+{
+    local name="$1" ; shift
+    
+    [ -n "$1" ] || set -- "$name"
+
+    test_begin "$name"
+
+    local status=0
+    "$@" || status=$?
+
+    test_end "$name" "$status" "$*"
+    
+    return $status
+}
+
+########################################
+
+# Sets: $out
+try_command_on_node ()
+{
+    local nodespec="$1" ; shift
+    local cmd="$*"
+
+    out=$(onnode -q "$nodespec" "$cmd" 2>&1) || {
+
+       echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
+       echo "$out"
+       exit 1
+    }
+}
+
+sanity_check_output ()
+{
+    local min_lines="$1"
+    local regexp="$2" # Should be anchored to match whole lines.
+    local output="$3"
+
+    local ret=0
+
+    local num_lines=$(echo "$output" | wc -l)
+    echo "There are $num_lines lines of output"
+    if [ $num_lines -lt $min_lines ] ; then
+       echo "BAD: that's less than the required number (${min_lines})"
+       ret=1
+    fi
+
+    local status=0
+    local unexpected # local doesn't pass through status of command on RHS.
+    unexpected=$(echo "$output" | egrep -v "$regexp") || status=$?
+
+    # Note that this is reversed.
+    if [ $status -eq 0 ] ; then
+       echo "BAD: unexpected lines in output:"
+       echo "$unexpected"
+       ret=1
+    else
+       echo "Output lines look OK"
+    fi
+
+    return $ret
+}
+
+#######################################
+
+# Wait until either timeout expires or command succeeds.  The command
+# will be tried once per second.
+wait_until ()
+{
+    local timeout="$1" ; shift # "$@" is the command...
+
+    echo -n "|${timeout}|"
+    while [ $timeout -gt 0 ] ; do
+       if "$@" ; then
+           echo '|'
+           echo "OK"
+           return 0
+       fi
+       echo -n .
+       timeout=$(($timeout - 1))
+       sleep 1
+    done
+    
+    echo "*TIMEOUT*"
+    
+    return 1
+}
+
+sleep_for ()
+{
+    echo -n "|${1}|"
+    for i in $(seq 1 $1) ; do
+       echo -n '.'
+       sleep 1
+    done
+    echo '|'
+}
+
+_cluster_is_healthy ()
+{
+    local out x count line
+
+    out=$(ctdb -Y status 2>&1) || return 1
+
+    {
+        read x
+       count=0
+        while read line ; do
+           count=$(($count + 1))
+           [ "${line#:*:*:}" != "0:0:0:0:" ] && return 1
+        done
+       [ $count -gt 0 ] && return $?
+    } <<<"$out" # Yay bash!
+}
+
+cluster_is_healthy ()
+{
+    if _cluster_is_healthy ; then
+       echo "Cluster is HEALTHY"
+       exit 0
+    else
+       echo "Cluster is UNHEALTHY"
+       exit 1
+    fi
+}
+
+wait_until_healthy ()
+{
+    local timeout="${1:-120}"
+
+    echo "Waiting for cluster to become healthy..."
+
+    wait_until 120 _cluster_is_healthy
+}
+
+# Incomplete! Do not use!
+node_has_status ()
+{
+    local pnn="$1"
+    local status="$2"
+
+    local bits
+    case "$status" in
+       banned)
+           bits="?:1:?:?"
+           ;;
+       unbanned)
+           bits="?:0:?:?"
+           ;;
+       disabled)
+           bits="?:?:1:?"
+           ;;
+       enabled)
+           bits="?:?:0:?"
+           ;;
+       *)
+           echo "node_has_status: unknown status \"$status\""
+           return 1
+    esac
+
+    local out x line
+
+    out=$(ctdb -Y status 2>&1) || return 1
+
+    {
+        read x
+        while read line ; do
+           [ "${line#:${pnn}:*:${bits}:}" = "" ] && return 0
+        done
+       return 1
+    } <<<"$out" # Yay bash!
+}
+
+wait_until_node_has_status ()
+{
+    local pnn="$1"
+    local status="$2"
+    local timeout="${3:-30}"
+
+    echo "Waiting until node $pnn has status \"$status\"..."
+
+    wait_until $timeout node_has_status "$pnn" "$status"
+}
+
+# Useful for superficially testing IP failover.
+# IPs must be on nodes matching nodeglob.
+ips_are_on_nodeglob ()
+{
+    local nodeglob="$1" ; shift
+    local ips="$*"
+
+    local out
+
+    try_command_on_node 1 ctdb ip
+
+    while read ip pnn ; do
+       for check in $ips ; do
+           if [ "$check" = "$ip" ] ; then
+               case "$pnn" in
+                   ($nodeglob) : ;;
+                   (*) return 1  ;;
+               esac
+               ips="${ips/${ip}}" # Remove from list
+           fi
+       done
+    done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+
+    ips="${ips// }" # Remove any spaces.
+    [ -z "$ips" ]
+}
+
+wait_until_ips_are_on_nodeglob ()
+{
+    echo "Waiting for IPs to fail over..."
+
+    wait_until 60 ips_are_on_nodeglob "$@"
+}
+
+
+start_daemons ()
+{
+    $CTDB_DIR/tests/start_daemons.sh $numnodes >$CTDB_DIR/var/daemons.log
+}
+
+_restart_ctdb ()
+{
+    if [ -e /etc/redhat-release ] ; then
+       service ctdb restart
+    else
+       /etc/init.d/ctdb restart
+    fi
+}
+
+restart_ctdb ()
+{
+    if [ -n "$CTDB_NODES_SOCKETS" ] ; then
+       onnode all ctdb shutdown
+       start_daemons
+    else
+       onnode -pq all $TEST_WRAP _restart_ctdb 
+    fi || return 1
+       
+    onnode -q 1  $TEST_WRAP wait_until_healthy || return 1
+
+    echo "Setting RerecoveryTimeout to 1"
+    onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+
+    #echo "Sleeping to allow ctdb to settle..."
+    #sleep_for 10
+
+    echo "ctdb is ready"
+}
+
+ctdb_test_exit ()
+{
+    if ! onnode 0 $TEST_WRAP cluster_is_healthy ; then
+       echo "Restarting ctdb on all nodes to get back into known state..."
+       restart_ctdb
+    fi
+
+    test_exit
+}
+
+########################################
+
+export PATH=/usr/local/autocluster:$PATH
diff --git a/ctdb/tests/scripts/run_tests b/ctdb/tests/scripts/run_tests
new file mode 100755 (executable)
index 0000000..eab28f0
--- /dev/null
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+export CTDB_DIR=$(cd $(dirname $(dirname $(dirname $0))) ; pwd)
+
+ctdb_bin_dir="${CTDB_DIR}/bin"
+ctdb_tools_dir="${CTDB_DIR}/tools"
+ctdb_test_scripts_dir=$(cd $(dirname $0) ; pwd)
+
+PATH="${ctdb_test_scripts_dir}:${ctdb_bin_dir}:${ctdb_tools_dir}:${PATH}"
+
+export TEST_WRAP="${ctdb_test_scripts_dir}/test_wrap"
+
+. ctdb_test_functions.bash
+
+usage() {
+    cat <<EOF
+Usage: run_tests [OPTIONS] [TESTS]
+
+EOF
+    exit 1
+}
+
+######################################################################
+
+temp=$(getopt -n "$prog" -o "xh" -l help -- "$@")
+
+[ $? != 0 ] && usage
+
+eval set -- "$temp"
+
+while true ; do
+    case "$1" in
+       -x) set -x; shift ;;
+       --) shift ; break ;;
+       -h|--help|*) usage ;; # * shouldn't happen, so this is reasonable.
+    esac
+done
+
+######################################################################
+
+for f; do
+    [ -x $f ] || fail "test $f is not executable"
+    test_run "$f"
+done
+
+test_exit
diff --git a/ctdb/tests/scripts/test_wrap b/ctdb/tests/scripts/test_wrap
new file mode 100755 (executable)
index 0000000..35ad418
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Execute the given command.  The intention is that it is a function
+# from ctdb_test_functions.bash.
+
+PATH="$(dirname $0):${PATH}"
+
+. ctdb_test_functions.bash
+
+"$@"
diff --git a/ctdb/tests/simple/00_ctdb_init.sh b/ctdb/tests/simple/00_ctdb_init.sh
new file mode 100755 (executable)
index 0000000..13b4303
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+echo "Restartng ctdb on all nodes..."
+restart_ctdb
diff --git a/ctdb/tests/simple/00_ctdb_onnode.sh b/ctdb/tests/simple/00_ctdb_onnode.sh
new file mode 100755 (executable)
index 0000000..bd269d7
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Do a recursive "onnode all" to make sure all the nodes can connect
+# to each other.  On a cluster this ensures that SSH keys are known
+# between all hosts, which will stop output being corrupted with
+# messages about nodes being added to the list of known hosts.
+
+. ctdb_test_functions.bash
+
+echo "Checking connectivity between nodes..."
+onnode all onnode all true
diff --git a/ctdb/tests/simple/01_ctdb_version.sh.disabled b/ctdb/tests/simple/01_ctdb_version.sh.disabled
new file mode 100755 (executable)
index 0000000..36e0368
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. /root/SOFS/autosofs/scripts/functions
+
+set -e
+
+onnode 0 /root/SOFS/autosofs/scripts/cluster_is_healthy.sh
+
+rpm_ver_cmd="rpm -q ctdb"
+ctdb_ver_cmd="ctdb version"
+
+rpm_ver=$($rpm_ver_cmd)
+echo "$rpm_ver_cmd"
+echo "  $rpm_ver"
+
+ctdb_ver=$(onnode 0 $ctdb_ver_cmd)
+echo "$ctdb_ver_cmd"
+echo "  $ctdb_ver"
+
+set -x  
+
+[ "${ctdb_ver#CTDB version: }" = "${rpm_ver#ctdb-}" ]
+
+test_exit
diff --git a/ctdb/tests/simple/02_ctdb_listvars.sh b/ctdb/tests/simple/02_ctdb_listvars.sh
new file mode 100755 (executable)
index 0000000..fd0af39
--- /dev/null
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 0 "ctdb listvars"
+
+echo "Output from \"ctdb listvars\" on node 0:"
+echo "$out"
+
+sanity_check_output \
+    5 \
+    '^[[:alpha:]]+[[:space:]]*=[[:space:]]*[[:digit:]]+$' \
+    "$out"
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/03_ctdb_getvar.sh b/ctdb/tests/simple/03_ctdb_getvar.sh
new file mode 100755 (executable)
index 0000000..f21b29b
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 0 "ctdb listvars"
+
+echo "Veryifying all variable values using \"ctdb getvar\"..."
+
+echo "$out" |
+while read var x val ; do
+    try_command_on_node 0 "ctdb getvar $var"
+
+    val2=$(echo $out | sed -e 's@.*[[:space:]]@@')
+
+    if [ "$val" != "$val2" ] ; then
+       echo "MISMATCH on $var: $val != $val2"
+       exit 1
+    fi
+done
+
+testfailures=$?
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/04_ctdb_setvar.sh b/ctdb/tests/simple/04_ctdb_setvar.sh
new file mode 100755 (executable)
index 0000000..d3b543d
--- /dev/null
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Doesn't strictly follow the procedure, since it doesn't pick a
+# variable from the output of "ctdb listvars".
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+var="RecoverTimeout"
+
+cmd="ctdb getvar $var"
+try_command_on_node 0 $cmd
+
+val=$(echo "$out" | sed -e 's@.*[[:space:]]@@')
+
+echo "$out"
+
+echo "Going to try incrementing it..."
+
+incr=$(($val + 1))
+
+cmd="ctdb setvar $var $incr"
+try_command_on_node 0 $cmd
+
+echo "That seemed to work, let's check the value..."
+
+cmd="ctdb getvar $var"
+try_command_on_node 0 $cmd
+
+newval=$(echo "$out" | sed -e 's@.*[[:space:]]@@')
+
+echo "$out"
+
+if [ "$incr" != "$newval" ] ; then
+    echo "Nope, that didn't work..."
+    exit 1
+fi
+
+echo "Look's good!  Now verifying with \"ctdb listvars\""
+cmd="ctdb listvars"
+try_command_on_node 0 $cmd
+
+line=$(echo "$out" | grep "^$var")
+echo "$line"
+
+check=$(echo "$line" | sed -e 's@.*[[:space:]]@@')
+
+if [ "$incr" != "$check" ] ; then
+    echo "Nope, that didn't work..."
+    exit 1
+fi
+
+echo "Look's good!  Putting the old value back..."
+cmd="ctdb setvar $var $val"
+try_command_on_node 0 $cmd
+
+echo "All done..."
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/05_ctdb_listnodes.sh b/ctdb/tests/simple/05_ctdb_listnodes.sh
new file mode 100755 (executable)
index 0000000..9337a40
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 0 "ctdb listnodes"
+
+num_nodes=$(echo "$out" | wc -l)
+
+echo "Output for \"ctdb listnodes\" on node 0 (${num_nodes} nodes listed):"
+echo "$out"
+
+# Each line should look like an IP address.
+sanity_check_output \
+    2 \
+    '^[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+$' \
+    "$out"
+
+out_0="$out"
+
+echo "Checking other nodes..."
+
+n=1
+while [ $n -lt $num_nodes ] ; do
+    echo -n "Node ${n}: "
+    try_command_on_node $n "ctdb listnodes"
+    if [ "$out_0" = "$out" ] ; then
+       echo "OK"
+    else
+       echo "DIFFERs from node 0:"
+       echo "$out"
+       testfailures=1
+    fi
+    n=$(($n + 1))
+done
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/06_ctdb_getpid.sh b/ctdb/tests/simple/06_ctdb_getpid.sh
new file mode 100755 (executable)
index 0000000..4a3290c
--- /dev/null
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+# This is an attempt at being independent of the number of nodes
+# reported by "ctdb getpid -n all".
+try_command_on_node 0 "ctdb listnodes | wc -l"
+
+num_nodes="$out"
+
+echo "There are $num_nodes nodes..."
+
+# Call getpid a few different ways and make sure the answer is always the same.
+
+cmd="onnode -q all ctdb getpid"
+try_command_on_node 1 "$cmd"
+pids_onnode="$out"
+echo "Results from \"$cmd\":"
+echo "$pids_onnode"
+
+cmd="onnode -q 1 ctdb getpid -n all"
+try_command_on_node 1 "$cmd"
+pids_getpid_all="$out"
+echo "Results from \"$cmd\":"
+echo "$pids_getpid_all"
+
+cmd=""
+n=0
+while [ $n -lt $num_nodes ] ; do
+    cmd="${cmd}${cmd:+; }ctdb getpid -n $n"
+    n=$(($n + 1))
+done
+try_command_on_node 1 "$cmd"
+pids_getpid_n="$out"
+echo "Results from \"$cmd\":"
+echo "$pids_getpid_n"
+
+if [ "$pids_onnode" = "$pids_getpid_all" -a \
+    "$pids_getpid_all" = "$pids_getpid_n" ] ; then
+    echo "They're the same... cool!"
+else
+    echo "Error: they differ."
+    testfailures=1
+fi
+
+echo "Checking each PID for validity"
+
+n=0
+while [ $n -lt $num_nodes ] ; do
+    read line
+    pid=${line#Pid:}
+    try_command_on_node $n "ls -l /proc/${pid}/exe | sed -e 's@.*/@@'"
+    echo -n "Node ${n}, PID ${pid} looks to be running \"$out\" - "
+    if [ "$out" = "ctdbd" ] ; then
+       echo "GOOD!"
+    else
+       echo "BAD!"
+       testfailures=1
+    fi
+    n=$(($n + 1))
+done <<<"$pids_onnode"
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/07_ctdb_process_exists.sh b/ctdb/tests/simple/07_ctdb_process_exists.sh
new file mode 100755 (executable)
index 0000000..17f1231
--- /dev/null
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+# Create a background process on node 2 that will last for 60 seconds.
+try_command_on_node 2 'sleep 60 >/dev/null 2>&1 & echo $!'
+pid="$out"
+
+echo "Checking for PID $pid on node 2"
+# set -e is good, but avoid it here
+status=0
+onnode 1 "ctdb process-exists 2:$pid" || status=$?
+echo "$out"
+
+if [ $status -eq 0 ] ; then
+    echo "OK"
+else
+    echo "BAD"
+    testfailures=1
+fi
+
+# Now just echo the PID of the shell from the onnode process on node
+# 2.  This PID will disappear and PIDs shouldn't roll around fast
+# enough to trick the test...  but there is a chance that will happen.
+try_command_on_node 2 'echo $$'
+pid="$out"
+
+echo "Checking for PID $pid on node 2"
+# set -e is good, but avoid it here
+status=0
+onnode 1 "ctdb process-exists 2:$pid" || status=$?
+echo "$out"
+
+if [ $status -ne 0 ] ; then
+    echo "OK"
+else
+    echo "BAD"
+    testfailures=1
+fi
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/08_ctdb_isnotrecmaster.sh b/ctdb/tests/simple/08_ctdb_isnotrecmaster.sh
new file mode 100755 (executable)
index 0000000..b0aac1c
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+cmd='ctdb isnotrecmaster || true'
+try_command_on_node all "$cmd"
+echo "Output of \"$cmd\":"
+echo "$out"
+
+num_all_lines=$(echo "$out" |  wc -l)
+num_rm_lines=$(echo "$out" | fgrep -c 'this node is the recmaster') || true
+num_not_rm_lines=$(echo "$out" | fgrep -c 'this node is not the recmaster') || true
+
+if [ $num_rm_lines -eq 1 ] ; then
+    echo "OK, there is only 1 recmaster"
+else
+    echo "BAD, there are ${num_rm_lines} nodes claiming to be the recmaster"
+    testfailures=1
+fi
+
+if [ $(($num_all_lines - $num_not_rm_lines)) -eq 1 ] ; then
+    echo "OK, all the other nodes claim not to be the recmaster"
+else
+    echo "BAD, there are only ${num_not_rm_lines} nodes claiming not to be the recmaster"
+    testfailures=1
+fi
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/11_ctdb_ip.sh b/ctdb/tests/simple/11_ctdb_ip.sh
new file mode 100755 (executable)
index 0000000..de1b4a8
--- /dev/null
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+echo "Getting list of public IPs..."
+try_command_on_node 1 ctdb ip -n all
+ips=$(echo "$out" | sed -e '1d')
+colons=$(echo "$ips" | sed -e 's@^@:@' -e 's@$@:@' -e 's@ @:@')
+
+while read ip pnn ; do
+    try_command_on_node $pnn "ip addr show"
+    if [ "${out/inet ${ip}\/}" != "$out" ] ; then
+       echo "GOOD: node $pnn appears to have $ip assigned"
+    else
+       echo "BAD:  node $pnn does not appear to have $ip assigned"
+       testfailures=1
+    fi
+done <<<"$ips" # bashism to avoid problem setting variable in pipeline.
+
+[ "$testfailures" != 1 ] && echo "Looks good!"
+
+cmd="ctdb -Y ip -n all | sed -e '1d'"
+echo "Checking that \"$cmd\" produces expected output..."
+
+try_command_on_node 1 "$cmd"
+if [ "$out" = "$colons" ] ; then
+    echo "Yep, looks good!"
+else
+    echo "Nope, it looks like this:"
+    echo "$out"
+    testfailures=1
+fi
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/31_ctdb_disable_simple.sh b/ctdb/tests/simple/31_ctdb_disable_simple.sh
new file mode 100755 (executable)
index 0000000..fdb7dae
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# From node 1, disable node 2.  Make sure that according to "ctdb ip"
+# the public addresses are taken over and according to "ctdb status"
+# the node appears to be disabled.  Don't actually check if the
+# address has been correctly taken over.
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 1 ctdb ip -n all
+
+ips=""
+while read ip pnn ; do
+    if [ "$pnn" = "2" ] ; then
+       ips="${ips}${ips:+ }${ip}"
+    fi
+done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+
+echo "Node 2 has IPs: $ips"
+
+echo "Disabling node 2"
+
+try_command_on_node 1 ctdb disable -n 2
+
+# Avoid a potential race condition...
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 disabled
+
+if wait_until_ips_are_on_nodeglob '[!2]' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/32_ctdb_enable_simple.sh b/ctdb/tests/simple/32_ctdb_enable_simple.sh
new file mode 100755 (executable)
index 0000000..94b5bee
--- /dev/null
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# From node 1, disable node 2.  Make sure that according to "ctdb ip"
+# the public addresses are taken over and according to "ctdb status"
+# the node appears to be disabled.  Don't actually check if the
+# address has been correctly taken over.
+
+. ctdb_test_functions.bash
+
+# Note that this doesn't work reliably over NFS!
+ctdb_trigger_recovered_file="/tmp/ctdb-trigger-recovered"
+
+setup_recovered_trigger ()
+{
+    onnode -q 0 touch "$ctdb_trigger_recovered_file"
+}
+
+recovered_triggered ()
+{
+    onnode -q 0 '! [ -e "$ctdb_trigger_recovered_file" ]'
+}
+
+wait_until_recovered_triggered ()
+{
+    wait_until 30 recovered_triggered
+}
+
+########################################
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 1 ctdb ip -n all
+
+ips=""
+while read ip pnn ; do
+    if [ "$pnn" = "2" ] ; then
+       ips="${ips}${ips:+ }${ip}"
+    fi
+done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+
+echo "Node 2 has IPs: $ips"
+
+setup_recovered_trigger
+
+echo "Disabling node 2"
+try_command_on_node 1 ctdb disable -n 2
+
+# Avoid a potential race condition...
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 disabled
+
+if wait_until_ips_are_on_nodeglob '[!2]' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+#echo "Waiting until cluster has recovered..."
+#wait_until_recovered_triggered
+
+#echo "Sleeping to avoid potential race..."
+#sleep_for 3
+
+echo "Reenabling node 2"
+try_command_on_node 1 ctdb enable -n 2
+
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 enabled
+
+# BUG: this is only guaranteed if DeterministicIPs is 1 and
+#      NoIPFailback is 0.
+if wait_until_ips_are_on_nodeglob '2' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+# Disabling this because for some reason it is completely unreliable.
+# Depend even more on the sleep below...
+echo "Waiting until cluster has recovered..."
+wait_until_recovered_triggered
+
+#echo "Sleeping to avoid potential race..."
+#sleep_for 10
+
+echo "All done!"
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/41_ctdb_ban_simple.sh b/ctdb/tests/simple/41_ctdb_ban_simple.sh
new file mode 100755 (executable)
index 0000000..e5d4bdb
--- /dev/null
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+CATEGORY="CTDB"
+
+test_info()
+{
+cat <<EOF
+Verify the operation of the 'ctdb ban' command.
+
+This is a superficial test of the 'ctdb ban' command.  It trusts
+information from CTDB that indicates that the IP failover has
+happened correctly.  Another test should check that the failover
+has actually happened at the networking level.
+
+Prerequisites:
+
+* An active CTDB cluster with at least 2 active nodes.
+
+Steps:
+
+1. Verify that the status on all of the ctdb nodes is 'OK'.
+2. Ban one of the nodes using the 'ctdb ban <timeout>' command.
+3. Before the ban timeout expires, verify that the status of the
+   node changes to 'banned'.
+4. Verify that the public IP addresses that were being served by
+   the node are failed over to one of the other nodes.
+5. When the ban expires ensure that the status of the node changes
+   back to 'OK' and that the public IP addresses move back to the
+   node.
+
+Expected results:
+
+* The status of the banned nodes changes as expected and IP addresses
+  failover as expected.
+
+EOF
+}
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 1 ctdb ip -n all
+
+ips=""
+while read ip pnn ; do
+    if [ "$pnn" = "2" ] ; then
+       ips="${ips}${ips:+ }${ip}"
+    fi
+done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+
+echo "Node 2 has IPs: $ips"
+
+ban_time=15
+
+echo "Banning node 2 for $ban_time seconds"
+try_command_on_node 1 ctdb ban $ban_time -n 2
+
+# Avoid a potential race condition...
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 banned
+
+if wait_until_ips_are_on_nodeglob '[!2]' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+echo "Sleeping until ban expires..."
+sleep_for $ban_time
+
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 unbanned
+
+# BUG: this is only guaranteed if DeterministicIPs is 1 and
+#      NoIPFailback is 0.
+if wait_until_ips_are_on_nodeglob '2' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+echo "Sleeping to avoid potential race..."
+sleep_for 3
+
+ctdb_test_exit
diff --git a/ctdb/tests/simple/42_ctdb_unban_simple.sh b/ctdb/tests/simple/42_ctdb_unban_simple.sh
new file mode 100755 (executable)
index 0000000..05b363a
--- /dev/null
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+CATEGORY="CTDB"
+
+test_info()
+{
+cat <<EOF
+Verify the operation of the 'ctdb unban' command.
+
+This is a superficial test of the 'ctdb uban' command.  It trusts
+information from CTDB that indicates that the IP failover and failback
+has happened correctly.  Another test should check that the failover
+and failback has actually happened at the networking level.
+
+Prerequisites:
+
+* An active CTDB cluster with at least 2 active nodes.
+
+Steps:
+
+1. Verify that the status on all of the ctdb nodes is 'OK'.
+2. Ban one of the nodes using the 'ctdb ban <timeout>' command.
+3. Before the ban timeout expires, verify that the status of the
+   node changes to 'banned'.
+4. Verify that the public IP addresses that were being served by
+   the node are failed over to one of the other nodes.
+5. Before the ban timeout expires, use 'ctdb unban' to unban the
+   node.
+6. Verify that the status of the node changes back to 'OK' and that
+   the public IP addresses move back to the node.
+
+Expected results:
+
+* The 'ctdb unban' command successfully unbans a banned node.
+
+EOF
+}
+
+. ctdb_test_functions.bash
+
+set -e
+
+onnode 0 $TEST_WRAP cluster_is_healthy
+
+try_command_on_node 1 ctdb ip -n all
+
+ips=""
+while read ip pnn ; do
+    if [ "$pnn" = "2" ] ; then
+       ips="${ips}${ips:+ }${ip}"
+    fi
+done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+
+echo "Node 2 has IPs: $ips"
+
+ban_time=60
+
+echo "Banning node 2 for $ban_time seconds"
+try_command_on_node 1 ctdb ban $ban_time -n 2
+
+# Avoid a potential race condition...
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 banned
+
+if wait_until_ips_are_on_nodeglob '[!2]' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+echo "Sleeping to avoid a potential race condition..."
+sleep_for 3
+
+echo "Unbanning node 2"
+try_command_on_node 1 ctdb unban -n 2
+
+onnode 0 $TEST_WRAP wait_until_node_has_status 2 unbanned
+
+# BUG: this is only guaranteed if DeterministicIPs is 1 and
+#      NoIPFailback is 0.
+if wait_until_ips_are_on_nodeglob '2' $ips ; then
+    echo "All IPs moved."
+else
+    echo "Some IPs didn't move."
+    testfailures=1
+fi
+
+echo "Sleeping to avoid potential race..."
+sleep_for 3
+
+ctdb_test_exit
index cf6b738f4a1d66ae21b6b4279d7f11c47739ecf7..424d8920610b32878e38eba8fdff19000904183e 100755 (executable)
@@ -8,19 +8,25 @@ shift
 
 NODES="./tests/nodes.txt"
 rm -f $NODES
+PUBLIC_ADDRESSES=./tests/public_addresses.txt
+rm -f $PUBLIC_ADDRESSES
 for i in `seq 1 $NUMNODES`; do
   if [ "${CTDB_USE_IPV6}x" != "x" ]; then
     echo ::$i >> $NODES
     ip addr add ::$i/128 dev lo
   else
     echo 127.0.0.$i >> $NODES
+    #echo "127.0.1.$i/24 lo" >> $PUBLIC_ADDRESSES
+    #echo "127.0.1.$(($i + $NUMNODES))/24 lo" >> $PUBLIC_ADDRESSES
+    echo "192.0.2.$i/24 lo" >> $PUBLIC_ADDRESSES
+    echo "192.0.2.$(($i + $NUMNODES))/24 lo" >> $PUBLIC_ADDRESSES
   fi
 done
 
 killall -q ctdbd
 rm -rf test.db/persistent/*
  
-CTDB_OPTIONS="--reclock=rec.lock --nlist $NODES --event-script-dir=tests/events.d --logfile=- -d 0 --dbdir=test.db --dbdir-persistent=test.db/persistent $*"
+CTDB_OPTIONS="--reclock=rec.lock --nlist $NODES --public-addresses $PUBLIC_ADDRESSES --event-script-dir=tests/events.d --logfile=- -d 0 --dbdir=test.db --dbdir-persistent=test.db/persistent $*"
 
 echo "Starting $NUMNODES ctdb daemons"
 for i in `seq 1 $NUMNODES`; do
index 6fb8fbe7bc7418edaae1490722430285fb1e0648..5bb5ebbfb89ac8a5a926c785d9150475d6903da3 100755 (executable)
@@ -148,6 +148,7 @@ get_nodes_with_status ()
     esac
 
     if [ -z "$ctdb_status_output" ] ; then
+       # FIXME: need to do something if $CTDB_NODES_SOCKETS is set.
        ctdb_status_output=$(ctdb -Y status 2>/dev/null)
        if [ $? -ne 0 ] ; then
            echo "${prog}: unable to get status of CTDB nodes" >&2
@@ -177,8 +178,14 @@ get_nodes_with_status ()
 ctdb_recmaster=""
 get_nodes ()
 {
-    [ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes
-    local all_nodes=$(egrep '^[[:alnum:]]' $CTDB_NODES_FILE)
+    local all_nodes
+
+    if [ -n "$CTDB_NODES_SOCKETS" ] ; then 
+       all_nodes="$CTDB_NODES_SOCKETS"
+    else
+       [ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes
+       all_nodes=$(egrep '^[[:alnum:]]' $CTDB_NODES_FILE)
+    fi
 
     local nodes=""
     local n
@@ -210,20 +217,29 @@ get_nodes ()
     done
 }
 
+fakessh ()
+{
+    CTDB_SOCKET="$1" sh -c "$2"
+}
+
 ######################################################################
 
 parse_options "$@"
 
 $current && command="cd $PWD && $command"
 
-SSH_OPTS=
-# Could "2>/dev/null || true" but want to see errors from typos in file.
-[ -r /etc/ctdb/onnode.conf ] && . /etc/ctdb/onnode.conf
-[ -n "$SSH" ] || SSH=ssh
-if [ "$SSH" = "ssh" ] ; then
-    ssh_opts="-n"
-else
-    : # rsh? All bets are off!
+ssh_opts=
+if [ -n "$CTDB_NODES_SOCKETS" ] ; then
+    SSH=fakessh
+else 
+    # Could "2>/dev/null || true" but want to see errors from typos in file.
+    [ -r /etc/ctdb/onnode.conf ] && . /etc/ctdb/onnode.conf
+    [ -n "$SSH" ] || SSH=ssh
+    if [ "$SSH" = "ssh" ] ; then
+       ssh_opts="-n"
+    else
+       : # rsh? All bets are off!
+    fi
 fi
 
 ######################################################################