# Hey Emacs, this is a -*- shell-script -*- !!! :-) fail () { echo "$*" exit 1 } ###################################################################### #. /root/SOFS/autosofs/scripts/tester.bash ctdb_test_begin () { local name="$1" teststarttime=$(date '+%s') testduration=0 echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--" echo "Running test $name ($(date '+%T'))" echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--" } ctdb_test_end () { local name="$1" ; shift local status="$1" ; shift # "$@" is command-line local interp="SKIPPED" local statstr=" (reason $*)" if [ -n "$status" ] ; then if [ $status -eq 0 ] ; then interp="PASSED" statstr="" echo "ALL OK: $*" else interp="FAILED" statstr=" (status $status)" testfailures=$(($testfailures+1)) fi fi testduration=$(($(date +%s)-$teststarttime)) echo "==========================================================================" echo "TEST ${interp}: ${name}${statstr} (duration: ${testduration}s)" echo "==========================================================================" } test_exit () { exit $(($testfailures+0)) } ctdb_test_exit () { local status=$? trap - 0 [ $(($testfailures+0)) -eq 0 -a $status -ne 0 ] && testfailures=$status if ! onnode 0 $CTDB_TEST_WRAPPER cluster_is_healthy ; then echo "Restarting ctdb on all nodes to get back into known state..." restart_ctdb fi test_exit } ctdb_test_run () { local name="$1" ; shift [ -n "$1" ] || set -- "$name" ctdb_test_begin "$name" local status=0 "$@" || status=$? ctdb_test_end "$name" "$status" "$*" return $status } ctdb_test_usage() { local status=${1:-2} cat <&1) || { echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\"" echo "$out" exit 1 } if $verbose ; then echo "Output of \"$cmd\":" echo "$out" fi } sanity_check_output () { local min_lines="$1" local regexp="$2" # Should be anchored as necessary. local output="$3" local ret=0 local num_lines=$(echo "$output" | wc -l) echo "There are $num_lines lines of output" if [ $num_lines -lt $min_lines ] ; then echo "BAD: that's less than the required number (${min_lines})" ret=1 fi local status=0 local unexpected # local doesn't pass through status of command on RHS. unexpected=$(echo "$output" | egrep -v "$regexp") || status=$? # Note that this is reversed. if [ $status -eq 0 ] ; then echo "BAD: unexpected lines in output:" echo "$unexpected" | cat -A ret=1 else echo "Output lines look OK" fi return $ret } ####################################### # Wait until either timeout expires or command succeeds. The command # will be tried once per second. wait_until () { local timeout="$1" ; shift # "$@" is the command... echo -n "<${timeout}|" while [ $timeout -gt 0 ] ; do if "$@" ; then echo '|' echo "OK" return 0 fi echo -n . timeout=$(($timeout - 1)) sleep 1 done echo "*TIMEOUT*" return 1 } sleep_for () { echo -n "=${1}|" for i in $(seq 1 $1) ; do echo -n '.' sleep 1 done echo '|' } _cluster_is_healthy () { local out x count line out=$(ctdb -Y status 2>&1) || return 1 { read x count=0 while read line ; do count=$(($count + 1)) [ "${line#:*:*:}" != "0:0:0:0:" ] && return 1 done [ $count -gt 0 ] && return $? } <<<"$out" # Yay bash! } cluster_is_healthy () { if _cluster_is_healthy ; then echo "Cluster is HEALTHY" exit 0 else echo "Cluster is UNHEALTHY" exit 1 fi } wait_until_healthy () { local timeout="${1:-120}" echo "Waiting for cluster to become healthy..." wait_until 120 _cluster_is_healthy } # This function is becoming nicely overloaded. Soon it will collapse! :-) node_has_status () { local pnn="$1" local status="$2" local bits fpat mpat case "$status" in (disconnected) bits="1:?:?:?" ;; (connected) bits="0:?:?:?" ;; (banned) bits="?:1:?:?" ;; (unbanned) bits="?:0:?:?" ;; (disabled) bits="?:?:1:?" ;; (enabled) bits="?:?:0:?" ;; (frozen) fpat='^[[:space:]]+frozen[[:space:]]+1$' ;; (unfrozen) fpat='^[[:space:]]+frozen[[:space:]]+0$' ;; (monon) mpat='^Monitoring mode:ACTIVE \(0\)$' ;; (monoff) mpat='^Monitoring mode:DISABLED \(1\)$' ;; *) echo "node_has_status: unknown status \"$status\"" return 1 esac if [ -n "$bits" ] ; then local out x line out=$(ctdb -Y status 2>&1) || return 1 { read x while read line ; do [ "${line#:${pnn}:*:${bits}:}" = "" ] && return 0 done return 1 } <<<"$out" # Yay bash! elif [ -n "$fpat" ] ; then ctdb statistics -n "$pnn" | egrep -q "$fpat" elif [ -n "$mpat" ] ; then ctdb getmonmode -n "$pnn" | egrep -q "$mpat" else echo 'node_has_status: unknown mode, neither $bits nor $fpat is set' return 1 fi } wait_until_node_has_status () { local pnn="$1" local status="$2" local timeout="${3:-30}" echo "Waiting until node $pnn has status \"$status\"..." wait_until $timeout node_has_status "$pnn" "$status" } # Useful for superficially testing IP failover. # IPs must be on nodes matching nodeglob. ips_are_on_nodeglob () { local nodeglob="$1" ; shift local ips="$*" local out try_command_on_node 1 ctdb ip -n all while read ip pnn ; do for check in $ips ; do if [ "$check" = "$ip" ] ; then case "$pnn" in ($nodeglob) : ;; (*) return 1 ;; esac ips="${ips/${ip}}" # Remove from list fi done done <<<"$out" # bashism to avoid problem setting variable in pipeline. ips="${ips// }" # Remove any spaces. [ -z "$ips" ] } wait_until_ips_are_on_nodeglob () { echo "Waiting for IPs to fail over..." wait_until 60 ips_are_on_nodeglob "$@" } stop_daemons () { echo "Attempting to politely shutdown daemons..." onnode 1 ctdb shutdown -n all || true echo "Sleeping for a while..." sleep_for 1 if pidof $CTDB_DIR/bin/ctdbd >/dev/null ; then echo "Killing remaining daemons..." killall $CTDB_DIR/bin/ctdbd if pidof $CTDB_DIR/bin/ctdbd >/dev/null ; then echo "Once more with feeling.." killall -9 $CTDB_DIR/bin/ctdbd fi fi var_dir=$CTDB_DIR/tests/var rm -rf $var_dir/test.db } start_daemons () { local num_nodes="${1:-2}" # default is 2 nodes shift # "$@" gets passed to ctdbd local var_dir=$CTDB_DIR/tests/var mkdir -p $var_dir/test.db/persistent local nodes=$var_dir/nodes.txt local public_addresses=$var_dir/public_addresses.txt rm -f $nodes $public_addresses # If there are (strictly) greater than 2 nodes then we'll randomly # choose a node to have no public addresses. local no_public_ips=-1 [ $num_nodes -gt 2 ] && no_public_ips=$(($RANDOM % $num_nodes + 1)) local i for i in $(seq 1 $num_nodes) ; do if [ "${CTDB_USE_IPV6}x" != "x" ]; then echo ::$i >> $nodes ip addr add ::$i/128 dev lo else echo 127.0.0.$i >> $nodes # 2 public addresses on most nodes, just to make things interesting. if [ $i -ne $no_public_ips ] ; then echo "192.0.2.$i/24 lo" >> $public_addresses echo "192.0.2.$(($i + $num_nodes))/24 lo" >> $public_addresses fi fi done local ctdb_options="--reclock=$var_dir/rec.lock --nlist $nodes --public-addresses $public_addresses --nopublicipcheck --event-script-dir=tests/events.d --logfile=$var_dir/daemons.log -d 0 --dbdir=$var_dir/test.db --dbdir-persistent=$var_dir/test.db/persistent" echo "Starting $num_nodes ctdb daemons..." for i in $(seq 1 $num_nodes) ; do if [ $(id -u) -eq 0 ]; then ctdb_options="$ctdb_options --public-interface=lo" fi $VALGRIND bin/ctdbd --socket=$var_dir/sock.$i $ctdb_options "$@" || return 1 done if [ -L /tmp/ctdb.socket -o ! -S /tmp/ctdb.socket ] ; then ln -sf $var_dir/sock.1 /tmp/ctdb.socket || return 1 fi } _restart_ctdb () { if [ -e /etc/redhat-release ] ; then service ctdb restart else /etc/init.d/ctdb restart fi } restart_ctdb () { if [ -n "$CTDB_NODES_SOCKETS" ] ; then stop_daemons start_daemons $CTDB_NUM_NODES else onnode -pq all $CTDB_TEST_WRAPPER _restart_ctdb fi || return 1 onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1 echo "Setting RerecoveryTimeout to 1" onnode -pq all "ctdb setvar RerecoveryTimeout 1" #echo "Sleeping to allow ctdb to settle..." #sleep_for 10 echo "ctdb is ready" }