. "${TEST_SCRIPTS_DIR}/common.sh"
-# If we're not running on a real cluster then we need a local copy of
-# ctdb (and other stuff) in $PATH and we will use local daemons.
-if [ -n "$TEST_LOCAL_DAEMONS" ] ; then
- export CTDB_NODES_SOCKETS=""
- for i in $(seq 0 $(($TEST_LOCAL_DAEMONS - 1))) ; do
- CTDB_NODES_SOCKETS="${CTDB_NODES_SOCKETS}${CTDB_NODES_SOCKETS:+ }${TEST_VAR_DIR}/sock.${i}"
- done
-
- # Use in-tree binaries if running against local daemons.
- # Otherwise CTDB need to be installed on all nodes.
- if [ -n "$ctdb_dir" -a -d "${ctdb_dir}/bin" ] ; then
- PATH="${ctdb_dir}/bin:${PATH}"
- export CTDB_LOCK_HELPER="${ctdb_dir}/bin/ctdb_lock_helper"
- fi
-
- export CTDB_NODES="${TEST_VAR_DIR}/nodes.txt"
-fi
-
######################################################################
export CTDB_TIMEOUT=60
if [ -n "$CTDB_TEST_REMOTE_DIR" ] ; then
- CTDB_TEST_WRAPPER="${CTDB_TEST_REMOTE_DIR}/test_wrap"
+ CTDB_TEST_WRAPPER="${CTDB_TEST_REMOTE_DIR}/test_wrap"
else
- _d=$(cd ${TEST_SCRIPTS_DIR}; echo $PWD)
- CTDB_TEST_WRAPPER="$_d/test_wrap"
+ _d=$(cd "$TEST_SCRIPTS_DIR" && echo "$PWD")
+ CTDB_TEST_WRAPPER="$_d/test_wrap"
fi
export CTDB_TEST_WRAPPER
######################################################################
-ctdb_check_time_logs ()
+ctdb_test_on_cluster ()
{
- local threshold=20
-
- local jump=false
- local prev=""
- local ds_prev=""
- local node=""
-
- out=$(onnode all tail -n 20 "${TEST_VAR_DIR}/ctdb.test.time.log" 2>&1)
-
- if [ $? -eq 0 ] ; then
- local line
- while read line ; do
- case "$line" in
- \>\>\ NODE:\ *\ \<\<)
- node="${line#>> NODE: }"
- node=${node% <<*}
- ds_prev=""
- ;;
- *\ *)
- set -- $line
- ds_curr="$1${2:0:1}"
- if [ -n "$ds_prev" ] && \
- [ $(($ds_curr - $ds_prev)) -ge $threshold ] ; then
- echo "Node $node had time jump of $(($ds_curr - $ds_prev))ds between $(date +'%T' -d @${ds_prev%?}) and $(date +'%T' -d @${ds_curr%?})"
- jump=true
- fi
- prev="$line"
- ds_prev="$ds_curr"
- ;;
- esac
- done <<<"$out"
- else
- echo Error getting time logs
- fi
- if $jump ; then
- echo "Check time sync (test client first):"
- date
- onnode -p all date
- echo "Information from test client:"
- hostname
- top -b -n 1
- echo "Information from cluster nodes:"
- onnode all "top -b -n 1 ; echo '/proc/slabinfo' ; cat /proc/slabinfo"
- fi
+ [ -z "$CTDB_TEST_LOCAL_DAEMONS" ]
}
ctdb_test_exit ()
trap - 0
- [ $(($testfailures+0)) -eq 0 -a $status -ne 0 ] && testfailures=$status
- status=$(($testfailures+0))
+ # run_tests.sh pipes stdout into tee. If the tee process is
+ # killed then any attempt to write to stdout (e.g. echo) will
+ # result in SIGPIPE, terminating the caller. Ignore SIGPIPE to
+ # ensure that all clean-up is run.
+ trap '' PIPE
# Avoid making a test fail from this point onwards. The test is
# now complete.
echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
- if [ -z "$TEST_LOCAL_DAEMONS" -a -n "$CTDB_TEST_TIME_LOGGING" -a \
- $status -ne 0 ] ; then
- ctdb_check_time_logs
- fi
-
eval "$ctdb_test_exit_hook" || true
unset ctdb_test_exit_hook
- if $ctdb_test_restart_scheduled || ! cluster_is_healthy ; then
-
- restart_ctdb
- else
- # This could be made unconditional but then we might get
- # duplication from the recovery in restart_ctdb. We want to
- # leave the recovery in restart_ctdb so that future tests that
- # might do a manual restart mid-test will benefit.
- echo "Forcing a recovery..."
- onnode 0 $CTDB recover
- fi
+ echo "Stopping cluster..."
+ ctdb_nodes_stop || ctdb_test_error "Cluster shutdown failed"
exit $status
}
ctdb_test_exit_hook="${ctdb_test_exit_hook}${ctdb_test_exit_hook:+ ; }$*"
}
+# Setting cleanup_pid to <pid>@<node> will cause <pid> to be killed on
+# <node> when the test completes. To cancel, just unset cleanup_pid.
+ctdb_test_cleanup_pid=""
+ctdb_test_cleanup_pid_exit_hook ()
+{
+ if [ -n "$ctdb_test_cleanup_pid" ] ; then
+ local pid="${ctdb_test_cleanup_pid%@*}"
+ local node="${ctdb_test_cleanup_pid#*@}"
+
+ try_command_on_node "$node" "kill ${pid}"
+ fi
+}
+
+ctdb_test_exit_hook_add ctdb_test_cleanup_pid_exit_hook
+
+ctdb_test_cleanup_pid_set ()
+{
+ local node="$1"
+ local pid="$2"
+
+ ctdb_test_cleanup_pid="${pid}@${node}"
+}
+
+ctdb_test_cleanup_pid_clear ()
+{
+ ctdb_test_cleanup_pid=""
+}
+
+# -n option means do not configure/start cluster
ctdb_test_init ()
{
- scriptname=$(basename "$0")
- testfailures=0
- ctdb_test_restart_scheduled=false
+ trap "ctdb_test_exit" 0
+
+ ctdb_nodes_stop >/dev/null 2>&1 || true
+
+ if [ "$1" != "-n" ] ; then
+ echo "Configuring cluster..."
+ setup_ctdb || ctdb_test_error "Cluster configuration failed"
- trap "ctdb_test_exit" 0
+ echo "Starting cluster..."
+ ctdb_init || ctdb_test_error "Cluster startup failed"
+ fi
+
+ echo "*** SETUP COMPLETE AT $(date '+%F %T'), RUNNING TEST..."
+}
+
+ctdb_nodes_start_custom ()
+{
+ if ctdb_test_on_cluster ; then
+ ctdb_test_error "ctdb_nodes_start_custom() on real cluster"
+ fi
+
+ ctdb_nodes_stop >/dev/null 2>&1 || true
+
+ echo "Configuring cluster..."
+ setup_ctdb "$@" || ctdb_test_error "Cluster configuration failed"
+
+ echo "Starting cluster..."
+ ctdb_init || ctdb_test_fail "Cluster startup failed"
+}
+
+ctdb_test_skip_on_cluster ()
+{
+ if ctdb_test_on_cluster ; then
+ ctdb_test_skip \
+ "SKIPPING this test - only runs against local daemons"
+ fi
+}
+
+
+ctdb_nodes_restart ()
+{
+ ctdb_nodes_stop "$@"
+ ctdb_nodes_start "$@"
}
########################################
-# Sets: $out
+# Sets: $out, $outfile
+# * The first 1KB of output is put into $out
+# * Tests should use $outfile for handling large output
+# * $outfile is removed after each test
+out=""
+outfile="${CTDB_TEST_TMP_DIR}/try_command_on_node.out"
+
+outfile_cleanup ()
+{
+ rm -f "$outfile"
+}
+
+ctdb_test_exit_hook_add outfile_cleanup
+
try_command_on_node ()
{
local nodespec="$1" ; shift
local cmd="$*"
- out=$(onnode -q $onnode_opts "$nodespec" "$cmd" 2>&1) || {
+ local status=0
+ # Intentionally unquoted - might be empty
+ # shellcheck disable=SC2086
+ onnode -q $onnode_opts "$nodespec" "$cmd" >"$outfile" 2>&1 || status=$?
+ out=$(dd if="$outfile" bs=1k count=1 2>/dev/null)
+ if [ $status -ne 0 ] ; then
echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
- echo "$out"
- return 1
- }
+ cat "$outfile"
+ return $status
+ fi
if $verbose ; then
echo "Output of \"$cmd\":"
- echo "$out"
+ cat "$outfile" || true
fi
}
+_run_onnode ()
+{
+ local thing="$1"
+ shift
+
+ local options nodespec
+
+ while : ; do
+ case "$1" in
+ -*)
+ options="${options}${options:+ }${1}"
+ shift
+ ;;
+ *)
+ nodespec="$1"
+ shift
+ break
+ esac
+ done
+
+ # shellcheck disable=SC2086
+ # $options can be multi-word
+ try_command_on_node $options "$nodespec" "${thing} $*"
+}
+
+ctdb_onnode ()
+{
+ _run_onnode "$CTDB" "$@"
+}
+
+testprog_onnode ()
+{
+ _run_onnode "${CTDB_TEST_WRAPPER} ${VALGRIND}" "$@"
+}
+
+function_onnode ()
+{
+ _run_onnode "${CTDB_TEST_WRAPPER}" "$@"
+}
+
sanity_check_output ()
{
local min_lines="$1"
local regexp="$2" # Should be anchored as necessary.
- local output="$3"
local ret=0
- local num_lines=$(echo "$output" | wc -l)
+ local num_lines
+ num_lines=$(wc -l <"$outfile" | tr -d '[:space:]')
echo "There are $num_lines lines of output"
- if [ $num_lines -lt $min_lines ] ; then
- echo "BAD: that's less than the required number (${min_lines})"
- ret=1
+ if [ "$num_lines" -lt "$min_lines" ] ; then
+ ctdb_test_fail "BAD: that's less than the required number (${min_lines})"
fi
local status=0
local unexpected # local doesn't pass through status of command on RHS.
- unexpected=$(echo "$output" | egrep -v "$regexp") || status=$?
+ unexpected=$(grep -Ev "$regexp" "$outfile") || status=$?
# Note that this is reversed.
if [ $status -eq 0 ] ; then
return $ret
}
-sanity_check_ips ()
+select_test_node ()
{
- local ips="$1" # list of "ip node" lines
-
- echo "Sanity checking IPs..."
-
- local x ipp prev
- prev=""
- while read x ipp ; do
- [ "$ipp" = "-1" ] && break
- if [ -n "$prev" -a "$ipp" != "$prev" ] ; then
- echo "OK"
- return 0
- fi
- prev="$ipp"
- done <<<"$ips"
+ try_command_on_node any ctdb pnn || return 1
- echo "BAD: a node was -1 or IPs are only assigned to one node"
- echo "Are you running an old version of CTDB?"
- return 1
+ test_node="$out"
+ echo "Selected node ${test_node}"
}
-# This returns a list of "ip node" lines in $out
+# This returns a list of "ip node" lines in $outfile
all_ips_on_node()
{
- local node=$@
- try_command_on_node $node "$CTDB ip -Y -n all | cut -d ':' -f1-3 | sed -e '1d' -e 's@^:@@' -e 's@:@ @g'"
+ local node="$1"
+ try_command_on_node "$node" \
+ "$CTDB ip -X | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
}
_select_test_node_and_ips ()
{
- all_ips_on_node 0
+ try_command_on_node any \
+ "$CTDB ip -X all | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
test_node="" # this matches no PNN
test_node_ips=""
local ip pnn
- while read ip pnn ; do
- if [ -z "$test_node" -a "$pnn" != "-1" ] ; then
+ while read -r ip pnn ; do
+ if [ -z "$test_node" ] && [ "$pnn" != "-1" ] ; then
test_node="$pnn"
fi
if [ "$pnn" = "$test_node" ] ; then
- test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
+ test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
fi
- done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+ done <"$outfile"
echo "Selected node ${test_node} with IPs: ${test_node_ips}."
test_ip="${test_node_ips%% *}"
+ # test_prefix used by caller
+ # shellcheck disable=SC2034
+ case "$test_ip" in
+ *:*) test_prefix="${test_ip}/128" ;;
+ *) test_prefix="${test_ip}/32" ;;
+ esac
+
[ -n "$test_node" ] || return 1
}
while ! _select_test_node_and_ips ; do
echo "Unable to find a test node with IPs assigned"
if [ $timeout -le 0 ] ; then
- echo "BAD: Too many attempts"
+ ctdb_test_error "BAD: Too many attempts"
return 1
fi
sleep_for 1
- timeout=$(($timeout - 1))
+ timeout=$((timeout - 1))
done
return 0
}
-#######################################
-
-# Wait until either timeout expires or command succeeds. The command
-# will be tried once per second.
-wait_until ()
+# Sets: mask, iface
+get_test_ip_mask_and_iface ()
{
- local timeout="$1" ; shift # "$@" is the command...
+ # Find the interface
+ ctdb_onnode "$test_node" "ip -v -X"
+ iface=$(awk -F'|' -v ip="$test_ip" '$2 == ip { print $4 }' "$outfile")
- local negate=false
- if [ "$1" = "!" ] ; then
- negate=true
- shift
+ if ctdb_test_on_cluster ; then
+ # Find the netmask
+ try_command_on_node "$test_node" ip addr show to "$test_ip"
+ mask="${out##*/}"
+ mask="${mask%% *}"
+ else
+ mask="24"
fi
- echo -n "<${timeout}|"
- local t=$timeout
- while [ $t -gt 0 ] ; do
- local rc=0
- "$@" || rc=$?
- if { ! $negate && [ $rc -eq 0 ] ; } || \
- { $negate && [ $rc -ne 0 ] ; } ; then
- echo "|$(($timeout - $t))|"
- echo "OK"
- return 0
- fi
- echo -n .
- t=$(($t - 1))
- sleep 1
- done
+ echo "$test_ip/$mask is on $iface"
+}
- echo "*TIMEOUT*"
+ctdb_get_all_pnns ()
+{
+ try_command_on_node -q all "$CTDB pnn"
+ all_pnns="$out"
+}
- return 1
+# The subtlety is that "ctdb delip" will fail if the IP address isn't
+# configured on a node...
+delete_ip_from_all_nodes ()
+{
+ _ip="$1"
+
+ ctdb_get_all_pnns
+
+ _nodes=""
+
+ for _pnn in $all_pnns ; do
+ all_ips_on_node "$_pnn"
+ while read -r _i _ ; do
+ if [ "$_ip" = "$_i" ] ; then
+ _nodes="${_nodes}${_nodes:+,}${_pnn}"
+ fi
+ done <"$outfile"
+ done
+
+ try_command_on_node -pq "$_nodes" "$CTDB delip $_ip"
}
+#######################################
+
sleep_for ()
{
echo -n "=${1}|"
- for i in $(seq 1 $1) ; do
+ for i in $(seq 1 "$1") ; do
echo -n '.'
sleep 1
done
_cluster_is_healthy ()
{
- $CTDB nodestatus all >/dev/null && \
- node_has_status 0 recovered
+ $CTDB nodestatus all >/dev/null
+}
+
+_cluster_is_recovered ()
+{
+ node_has_status 0 recovered
+}
+
+_cluster_is_ready ()
+{
+ _cluster_is_healthy && _cluster_is_recovered
}
cluster_is_healthy ()
{
- if onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
- echo "Cluster is HEALTHY"
- return 0
- else
+ if onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_healthy ; then
+ echo "Cluster is HEALTHY"
+ if ! onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_recovered ; then
+ echo "WARNING: cluster in recovery mode!"
+ fi
+ return 0
+ fi
+
echo "Cluster is UNHEALTHY"
- if ! ${ctdb_test_restart_scheduled:-false} ; then
- echo "DEBUG AT $(date '+%F %T'):"
- local i
- for i in "onnode -q 0 $CTDB status" "onnode -q 0 onnode all $CTDB scriptstatus" ; do
+
+ echo "DEBUG AT $(date '+%F %T'):"
+ local i
+ for i in "onnode -q 0 $CTDB status" \
+ "onnode -q 0 onnode all $CTDB scriptstatus" ; do
echo "$i"
$i || true
- done
- fi
+ done
+
return 1
- fi
}
-wait_until_healthy ()
+wait_until_ready ()
{
local timeout="${1:-120}"
- echo "Waiting for cluster to become healthy..."
+ echo "Waiting for cluster to become ready..."
- wait_until 120 _cluster_is_healthy
+ wait_until "$timeout" onnode -q any "$CTDB_TEST_WRAPPER" _cluster_is_ready
}
# This function is becoming nicely overloaded. Soon it will collapse! :-)
node_has_status ()
{
- local pnn="$1"
- local status="$2"
-
- local bits fpat mpat rpat
- case "$status" in
- (unhealthy) bits="?:?:?:1:*" ;;
- (healthy) bits="?:?:?:0:*" ;;
- (disconnected) bits="1:*" ;;
- (connected) bits="0:*" ;;
- (banned) bits="?:1:*" ;;
- (unbanned) bits="?:0:*" ;;
- (disabled) bits="?:?:1:*" ;;
- (enabled) bits="?:?:0:*" ;;
- (stopped) bits="?:?:?:?:1:*" ;;
- (notstopped) bits="?:?:?:?:0:*" ;;
- (frozen) fpat='^[[:space:]]+frozen[[:space:]]+1$' ;;
- (unfrozen) fpat='^[[:space:]]+frozen[[:space:]]+0$' ;;
- (monon) mpat='^Monitoring mode:ACTIVE \(0\)$' ;;
- (monoff) mpat='^Monitoring mode:DISABLED \(1\)$' ;;
- (recovered) rpat='^Recovery mode:NORMAL \(0\)$' ;;
+ local pnn="$1"
+ local status="$2"
+
+ case "$status" in
+ recovered)
+ ! $CTDB status -n "$pnn" | \
+ grep -Eq '^Recovery mode:RECOVERY \(1\)$'
+ return
+ ;;
+ notlmaster)
+ ! $CTDB status | grep -Eq "^hash:.* lmaster:${pnn}\$"
+ return
+ ;;
+ esac
+
+ local bits
+ case "$status" in
+ unhealthy) bits="?|?|?|?|1|*" ;;
+ healthy) bits="?|?|?|?|0|*" ;;
+ disconnected) bits="1|*" ;;
+ connected) bits="0|*" ;;
+ banned) bits="?|?|1|*" ;;
+ unbanned) bits="?|?|0|*" ;;
+ disabled) bits="?|?|?|1|*" ;;
+ enabled) bits="?|?|?|0|*" ;;
+ stopped) bits="?|?|?|?|?|1|*" ;;
+ notstopped) bits="?|?|?|?|?|0|*" ;;
*)
- echo "node_has_status: unknown status \"$status\""
- return 1
- esac
+ echo "node_has_status: unknown status \"$status\""
+ return 1
+ esac
+ local out _ line
- if [ -n "$bits" ] ; then
- local out x line
-
- out=$($CTDB -Y status 2>&1) || return 1
+ out=$($CTDB -X status 2>&1) || return 1
{
- read x
- while read line ; do
- # This needs to be done in 2 steps to avoid false matches.
- local line_bits="${line#:${pnn}:*:}"
- [ "$line_bits" = "$line" ] && continue
- [ "${line_bits#${bits}}" != "$line_bits" ] && return 0
- done
- return 1
+ read -r _
+ while read -r line ; do
+ # This needs to be done in 2 steps to
+ # avoid false matches.
+ local line_bits="${line#|"${pnn}"|*|}"
+ [ "$line_bits" = "$line" ] && continue
+ # shellcheck disable=SC2295
+ # This depends on $bits being a pattern
+ [ "${line_bits#${bits}}" != "$line_bits" ] && \
+ return 0
+ done
+ return 1
} <<<"$out" # Yay bash!
- elif [ -n "$fpat" ] ; then
- $CTDB statistics -n "$pnn" | egrep -q "$fpat"
- elif [ -n "$mpat" ] ; then
- $CTDB getmonmode -n "$pnn" | egrep -q "$mpat"
- elif [ -n "$rpat" ] ; then
- $CTDB status -n "$pnn" | egrep -q "$rpat"
- else
- echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
- return 1
- fi
}
wait_until_node_has_status ()
echo "Waiting until node $pnn has status \"$status\"..."
- if ! wait_until $timeout onnode $proxy_pnn $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
+ if ! wait_until "$timeout" onnode "$proxy_pnn" \
+ "$CTDB_TEST_WRAPPER" node_has_status "$pnn" "$status" ; then
+
for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
echo "$i"
$i || true
}
# Useful for superficially testing IP failover.
-# IPs must be on nodes matching nodeglob.
-# If the first argument is '!' then the IPs must not be on nodes
-# matching nodeglob.
-ips_are_on_nodeglob ()
+# IPs must be on the given node.
+# If the first argument is '!' then the IPs must not be on the given node.
+ips_are_on_node ()
{
local negating=false
if [ "$1" = "!" ] ; then
negating=true ; shift
fi
- local nodeglob="$1" ; shift
+ local node="$1" ; shift
local ips="$*"
local out
- all_ips_on_node 1
+ all_ips_on_node "$node"
+ local check
for check in $ips ; do
- while read ip pnn ; do
+ local ip pnn
+ while read -r ip pnn ; do
if [ "$check" = "$ip" ] ; then
- case "$pnn" in
- ($nodeglob) if $negating ; then return 1 ; fi ;;
- (*) if ! $negating ; then return 1 ; fi ;;
- esac
+ if [ "$pnn" = "$node" ] ; then
+ if $negating ; then return 1 ; fi
+ else
+ if ! $negating ; then return 1 ; fi
+ fi
ips="${ips/${ip}}" # Remove from list
break
fi
if $negating ; then
ips="${ips/${check}}"
fi
- done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+ done <"$outfile"
done
ips="${ips// }" # Remove any spaces.
[ -z "$ips" ]
}
-wait_until_ips_are_on_nodeglob ()
+wait_until_ips_are_on_node ()
{
- echo "Waiting for IPs to fail over..."
+ # Go to some trouble to print a use description of what is happening
+ local not=""
+ if [ "$1" == "!" ] ; then
+ not="no longer "
+ fi
+ local node=""
+ local ips=""
+ local i
+ for i ; do
+ [ "$i" != "!" ] || continue
+ if [ -z "$node" ] ; then
+ node="$i"
+ continue
+ fi
+ ips="${ips}${ips:+, }${i}"
+ done
+ echo "Waiting for ${ips} to ${not}be assigned to node ${node}"
- wait_until 60 ips_are_on_nodeglob "$@"
+ wait_until 60 ips_are_on_node "$@"
}
node_has_some_ips ()
local out
- all_ips_on_node 1
+ all_ips_on_node "$node"
- while read ip pnn ; do
+ while read -r ip pnn ; do
if [ "$node" = "$pnn" ] ; then
return 0
fi
- done <<<"$out" # bashism to avoid problem setting variable in pipeline.
+ done <"$outfile"
return 1
}
wait_until_node_has_some_ips ()
{
- echo "Waiting for node to have some IPs..."
+ echo "Waiting for some IPs to be assigned to node ${test_node}"
wait_until 60 node_has_some_ips "$@"
}
-ip2ipmask ()
+wait_until_node_has_no_ips ()
{
- _ip="$1"
+ echo "Waiting until no IPs are assigned to node ${test_node}"
- ip addr show to "$_ip" | awk '$1 == "inet" { print $2 }'
+ wait_until 60 ! node_has_some_ips "$@"
}
#######################################
-daemons_stop ()
+ctdb_init ()
{
- echo "Attempting to politely shutdown daemons..."
- onnode 1 $CTDB shutdown -n all || true
-
- echo "Sleeping for a while..."
- sleep_for 1
-
- local pat="ctdbd --socket=.* --nlist .* --nopublicipcheck"
- if pgrep -f "$pat" >/dev/null ; then
- echo "Killing remaining daemons..."
- pkill -f "$pat"
-
- if pgrep -f "$pat" >/dev/null ; then
- echo "Once more with feeling.."
- pkill -9 -f "$pat"
+ if ! ctdb_nodes_start ; then
+ echo "Cluster start failed"
+ return 1
fi
- fi
-
- rm -rf "${TEST_VAR_DIR}/test.db"
-}
-
-daemons_setup ()
-{
- mkdir -p "${TEST_VAR_DIR}/test.db/persistent"
-
- local public_addresses_all="${TEST_VAR_DIR}/public_addresses_all"
- local no_public_addresses="${TEST_VAR_DIR}/no_public_addresses.txt"
- rm -f $CTDB_NODES $public_addresses_all $no_public_addresses
-
- # If there are (strictly) greater than 2 nodes then we'll randomly
- # choose a node to have no public addresses.
- local no_public_ips=-1
- [ $TEST_LOCAL_DAEMONS -gt 2 ] && no_public_ips=$(($RANDOM % $TEST_LOCAL_DAEMONS))
- echo "$no_public_ips" >$no_public_addresses
- # When running certain tests we add and remove eventscripts, so we
- # need to be able to modify the events.d/ directory. Therefore,
- # we use a temporary events.d/ directory under $TEST_VAR_DIR. We
- # copy the actual test eventscript(s) in there from the original
- # events.d/ directory that sits alongside $TEST_SCRIPT_DIR.
- local top=$(dirname "$TEST_SCRIPTS_DIR")
- local events_d="${top}/events.d"
- mkdir -p "${TEST_VAR_DIR}/events.d"
- cp -p "${events_d}/"* "${TEST_VAR_DIR}/events.d/"
-
- local i
- for i in $(seq 1 $TEST_LOCAL_DAEMONS) ; do
- if [ "${CTDB_USE_IPV6}x" != "x" ]; then
- echo ::$i >>"$CTDB_NODES"
- ip addr add ::$i/128 dev lo
- else
- echo 127.0.0.$i >>"$CTDB_NODES"
- # 2 public addresses on most nodes, just to make things interesting.
- if [ $(($i - 1)) -ne $no_public_ips ] ; then
- echo "192.168.234.$i/24 lo" >>"$public_addresses_all"
- echo "192.168.234.$(($i + $TEST_LOCAL_DAEMONS))/24 lo" >>"$public_addresses_all"
- fi
+ if ! wait_until_ready 120 ; then
+ echo "Cluster didn't become ready"
+ return 1
fi
- done
-}
-
-daemons_start_1 ()
-{
- local pnn="$1"
- shift # "$@" gets passed to ctdbd
-
- local public_addresses_all="${TEST_VAR_DIR}/public_addresses_all"
- local public_addresses_mine="${TEST_VAR_DIR}/public_addresses.${pnn}"
- local no_public_addresses="${TEST_VAR_DIR}/no_public_addresses.txt"
-
- local no_public_ips=-1
- [ -r $no_public_addresses ] && read no_public_ips <$no_public_addresses
-
- if [ "$no_public_ips" = $pnn ] ; then
- echo "Node $no_public_ips will have no public IPs."
- fi
-
- local node_ip=$(sed -n -e "$(($pnn + 1))p" "$CTDB_NODES")
- local ctdb_options="--sloppy-start --reclock=${TEST_VAR_DIR}/rec.lock --nlist $CTDB_NODES --nopublicipcheck --listen=${node_ip} --event-script-dir=${TEST_VAR_DIR}/events.d --logfile=${TEST_VAR_DIR}/daemon.${pnn}.log -d 3 --log-ringbuf-size=10000 --dbdir=${TEST_VAR_DIR}/test.db --dbdir-persistent=${TEST_VAR_DIR}/test.db/persistent --dbdir-state=${TEST_VAR_DIR}/test.db/state"
-
- if [ $pnn -eq $no_public_ips ] ; then
- ctdb_options="$ctdb_options --public-addresses=/dev/null"
- else
- cp "$public_addresses_all" "$public_addresses_mine"
- ctdb_options="$ctdb_options --public-addresses=$public_addresses_mine"
- fi
-
- # We'll use "pkill -f" to kill the daemons with
- # "--socket=.* --nlist .* --nopublicipcheck" as context.
- $VALGRIND ctdbd --socket="${TEST_VAR_DIR}/sock.$pnn" $ctdb_options "$@" ||return 1
-}
-
-daemons_start ()
-{
- # "$@" gets passed to ctdbd
-
- echo "Starting $TEST_LOCAL_DAEMONS ctdb daemons..."
-
- for i in $(seq 0 $(($TEST_LOCAL_DAEMONS - 1))) ; do
- daemons_start_1 $i "$@"
- done
-}
-
-#######################################
-
-_ctdb_hack_options ()
-{
- local ctdb_options="$*"
-
- case "$ctdb_options" in
- *--start-as-stopped*)
- export CTDB_START_AS_STOPPED="yes"
- esac
-}
-
-_restart_ctdb ()
-{
- _ctdb_hack_options "$@"
-
- if [ -e /etc/redhat-release ] ; then
- service ctdb restart
- else
- /etc/init.d/ctdb restart
- fi
-}
-
-_ctdb_start ()
-{
- _ctdb_hack_options "$@"
-
- /etc/init.d/ctdb start
-}
-
-setup_ctdb ()
-{
- if [ -n "$CTDB_NODES_SOCKETS" ] ; then
- daemons_setup
- fi
-}
-
-# Common things to do after starting one or more nodes.
-_ctdb_start_post ()
-{
- onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || return 1
-
- echo "Setting RerecoveryTimeout to 1"
- onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
-
- # In recent versions of CTDB, forcing a recovery like this blocks
- # until the recovery is complete. Hopefully this will help the
- # cluster to stabilise before a subsequent test.
- echo "Forcing a recovery..."
- onnode -q 0 $CTDB recover
- sleep_for 1
-
- echo "ctdb is ready"
-}
-
-# This assumes that ctdbd is not running on the given node.
-ctdb_start_1 ()
-{
- local pnn="$1"
- shift # "$@" is passed to ctdbd start.
-
- echo -n "Starting CTDB on node ${pnn}..."
-
- if [ -n "$CTDB_NODES_SOCKETS" ] ; then
- daemons_start_1 $pnn "$@"
- else
- onnode $pnn $CTDB_TEST_WRAPPER _ctdb_start "$@"
- fi
-
- # If we're starting only 1 node then we're doing something weird.
- ctdb_restart_when_done
-}
-
-restart_ctdb ()
-{
- # "$@" is passed to ctdbd start.
-
- echo -n "Restarting CTDB"
- if $ctdb_test_restart_scheduled ; then
- echo -n " (scheduled)"
- fi
- echo "..."
-
- local i
- for i in $(seq 1 5) ; do
- if [ -n "$CTDB_NODES_SOCKETS" ] ; then
- daemons_stop
- daemons_start "$@"
- else
- onnode -p all $CTDB_TEST_WRAPPER _restart_ctdb "$@"
- fi || {
- echo "Restart failed. Trying again in a few seconds..."
- sleep_for 5
- continue
- }
-
- onnode -q 1 $CTDB_TEST_WRAPPER wait_until_healthy || {
- echo "Cluster didn't become healthy. Restarting..."
- continue
- }
echo "Setting RerecoveryTimeout to 1"
onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
- # In recent versions of CTDB, forcing a recovery like this
- # blocks until the recovery is complete. Hopefully this will
- # help the cluster to stabilise before a subsequent test.
echo "Forcing a recovery..."
- onnode -q 0 $CTDB recover
- sleep_for 1
+ onnode -q 0 "$CTDB recover"
+ sleep_for 2
+
+ if ! onnode -q all "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
+ echo "Cluster has gone into recovery again, waiting..."
+ if ! wait_until 30/2 onnode -q all \
+ "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
+ echo "Cluster did not come out of recovery"
+ return 1
+ fi
+ fi
- # Cluster is still healthy. Good, we're done!
- if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
- echo "Cluster became UNHEALTHY again [$(date)]"
- onnode -p all ctdb status -Y 2>&1
- onnode -p all ctdb scriptstatus 2>&1
- echo "Restarting..."
- continue
+ if ! onnode 0 "$CTDB_TEST_WRAPPER _cluster_is_healthy" ; then
+ echo "Cluster became UNHEALTHY again [$(date)]"
+ return 1
fi
echo "Doing a sync..."
- onnode -q 0 $CTDB sync
+ onnode -q 0 "$CTDB sync"
echo "ctdb is ready"
return 0
- done
-
- echo "Cluster UNHEALTHY... too many attempts..."
- onnode -p all ctdb status -Y 2>&1
- onnode -p all ctdb scriptstatus 2>&1
-
- # Try to make the calling test fail
- status=1
- return 1
-}
-
-ctdb_restart_when_done ()
-{
- ctdb_test_restart_scheduled=true
}
-get_ctdbd_command_line_option ()
+ctdb_base_show ()
{
- local pnn="$1"
- local option="$2"
-
- try_command_on_node "$pnn" "$CTDB getpid" || \
- die "Unable to get PID of ctdbd on node $pnn"
-
- local pid="${out#*:}"
- try_command_on_node "$pnn" "ps -p $pid -o args hww" || \
- die "Unable to get command-line of PID $pid"
-
- # Strip everything up to and including --option
- local t="${out#*--${option}}"
- # Strip leading '=' or space if present
- t="${t#=}"
- t="${t# }"
- # Strip any following options and print
- echo "${t%% -*}"
+ echo "${CTDB_BASE:-${CTDB_SCRIPTS_BASE}}"
}
#######################################
-install_eventscript ()
+# sets: leader
+_leader_get ()
{
- local script_name="$1"
- local script_contents="$2"
+ local node="$1"
- if [ -z "$TEST_LOCAL_DAEMONS" ] ; then
- # The quoting here is *very* fragile. However, we do
- # experience the joy of installing a short script using
- # onnode, and without needing to know the IP addresses of the
- # nodes.
- onnode all "f=\"\${CTDB_BASE:-/etc/ctdb}/events.d/${script_name}\" ; echo \"Installing \$f\" ; echo '${script_contents}' > \"\$f\" ; chmod 755 \"\$f\""
- else
- f="${TEST_VAR_DIR}/events.d/${script_name}"
- echo "$script_contents" >"$f"
- chmod 755 "$f"
- fi
+ ctdb_onnode "$node" leader
+ # shellcheck disable=SC2154
+ # $out set by ctdb_onnode() above
+ leader="$out"
}
-uninstall_eventscript ()
+leader_get ()
{
- local script_name="$1"
+ local node="$1"
- if [ -z "$TEST_LOCAL_DAEMONS" ] ; then
- onnode all "rm -vf \"\${CTDB_BASE:-/etc/ctdb}/events.d/${script_name}\""
- else
- rm -vf "${TEST_VAR_DIR}/events.d/${script_name}"
- fi
+ echo "Get leader"
+ _leader_get "$node"
+ echo "Leader is ${leader}"
+ echo
}
-#######################################
-
-# This section deals with the 99.ctdb_test eventscript.
-
-# Metafunctions: Handle a ctdb-test file on a node.
-# given event.
-ctdb_test_eventscript_file_create ()
+_leader_has_changed ()
{
- local pnn="$1"
- local type="$2"
+ local node="$1"
+ local leader_old="$2"
- try_command_on_node $pnn touch "/tmp/ctdb-test-${type}.${pnn}"
-}
+ _leader_get "$node"
-ctdb_test_eventscript_file_remove ()
-{
- local pnn="$1"
- local type="$2"
-
- try_command_on_node $pnn rm -f "/tmp/ctdb-test-${type}.${pnn}"
+ [ "$leader" != "$leader_old" ]
}
-ctdb_test_eventscript_file_exists ()
+# uses: leader
+wait_until_leader_has_changed ()
{
- local pnn="$1"
- local type="$2"
+ local node="$1"
- try_command_on_node $pnn test -f "/tmp/ctdb-test-${type}.${pnn}" >/dev/null 2>&1
+ echo
+ echo "Wait until leader changes..."
+ wait_until 30 _leader_has_changed "$node" "$leader"
+ echo "Leader changed to ${leader}"
}
+#######################################
-# Handle a flag file on a node that is removed by 99.ctdb_test on the
-# given event.
-ctdb_test_eventscript_flag ()
+# sets: generation
+_generation_get ()
{
- local cmd="$1"
- local pnn="$2"
- local event="$3"
+ local node="$1"
- ctdb_test_eventscript_file_${cmd} "$pnn" "flag-${event}"
+ ctdb_onnode "$node" status
+ # shellcheck disable=SC2154
+ # $outfile set by ctdb_onnode() above
+ generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
}
-
-# Handle a trigger that causes 99.ctdb_test to fail it's monitor
-# event.
-ctdb_test_eventscript_unhealthy_trigger ()
+generation_get ()
{
- local cmd="$1"
- local pnn="$2"
+ local node="$1"
- ctdb_test_eventscript_file_${cmd} "$pnn" "unhealthy-trigger"
+ echo "Get generation"
+ _generation_get "$node"
+ echo "Generation is ${generation}"
+ echo
}
-# Handle the file that 99.ctdb_test created to show that it has marked
-# a node unhealthy because it detected the above trigger.
-ctdb_test_eventscript_unhealthy_detected ()
+_generation_has_changed ()
{
- local cmd="$1"
- local pnn="$2"
+ local node="$1"
+ local generation_old="$2"
- ctdb_test_eventscript_file_${cmd} "$pnn" "unhealthy-detected"
-}
+ _generation_get "$node"
-# Handle a trigger that causes 99.ctdb_test to timeout it's monitor
-# event. This should cause the node to be banned.
-ctdb_test_eventscript_timeout_trigger ()
-{
- local cmd="$1"
- local pnn="$2"
- local event="$3"
-
- ctdb_test_eventscript_file_${cmd} "$pnn" "${event}-timeout"
+ [ "$generation" != "$generation_old" ]
}
-# Note that the eventscript can't use the above functions!
-ctdb_test_eventscript_install ()
+# uses: generation
+wait_until_generation_has_changed ()
{
+ local node="$1"
- local script='#!/bin/sh
-out=$(ctdb pnn)
-pnn="${out#PNN:}"
-
-rm -vf "/tmp/ctdb-test-flag-${1}.${pnn}"
-
-trigger="/tmp/ctdb-test-unhealthy-trigger.${pnn}"
-detected="/tmp/ctdb-test-unhealthy-detected.${pnn}"
-timeout_trigger="/tmp/ctdb-test-${1}-timeout.${pnn}"
-case "$1" in
- monitor)
- if [ -e "$trigger" ] ; then
- echo "${0}: Unhealthy because \"$trigger\" detected"
- touch "$detected"
- exit 1
- elif [ -e "$detected" -a ! -e "$trigger" ] ; then
- echo "${0}: Healthy again, \"$trigger\" no longer detected"
- rm "$detected"
- fi
-
- ;;
- *)
- if [ -e "$timeout_trigger" ] ; then
- echo "${0}: Sleeping for a long time because \"$timeout_trigger\" detected"
- sleep 9999
- fi
- ;;
- *)
-
-esac
-
-exit 0
-'
- install_eventscript "99.ctdb_test" "$script"
+ echo "Wait until generation changes..."
+ wait_until 30 _generation_has_changed "$node" "$generation"
+ echo "Generation changed to ${generation}"
+ echo
}
-ctdb_test_eventscript_uninstall ()
-{
- uninstall_eventscript "99.ctdb_test"
-}
+#######################################
-# Note that this only works if you know all other monitor events will
-# succeed. You also need to install the eventscript before using it.
wait_for_monitor_event ()
{
local pnn="$1"
+ local timeout=120
echo "Waiting for a monitor event on node ${pnn}..."
- ctdb_test_eventscript_flag create $pnn "monitor"
- wait_until 120 ! ctdb_test_eventscript_flag exists $pnn "monitor"
+ ctdb_onnode "$pnn" scriptstatus || {
+ echo "Unable to get scriptstatus from node $pnn"
+ return 1
+ }
+
+ mv "$outfile" "${outfile}.orig"
+
+ wait_until 120 _ctdb_scriptstatus_changed
+}
+
+_ctdb_scriptstatus_changed ()
+{
+ ctdb_onnode "$pnn" scriptstatus || {
+ echo "Unable to get scriptstatus from node $pnn"
+ return 1
+ }
+ ! diff "$outfile" "${outfile}.orig" >/dev/null
}
#######################################
-nfs_test_setup ()
+# If the given IP is hosted then print 2 items: maskbits and iface
+ip_maskbits_iface ()
{
- select_test_node_and_ips
-
- nfs_first_export=$(showmount -e $test_ip | sed -n -e '2s/ .*//p')
+ _addr="$1"
- echo "Creating test subdirectory..."
- try_command_on_node $test_node "mktemp -d --tmpdir=$nfs_first_export"
- nfs_test_dir="$out"
- try_command_on_node $test_node "chmod 777 $nfs_test_dir"
+ case "$_addr" in
+ *:*) _family="inet6" ; _bits=128 ;;
+ *) _family="inet" ; _bits=32 ;;
+ esac
- nfs_mnt_d=$(mktemp -d)
- nfs_local_file="${nfs_mnt_d}/${nfs_test_dir##*/}/TEST_FILE"
- nfs_remote_file="${nfs_test_dir}/TEST_FILE"
+ # Literal backslashes in awk script
+ # shellcheck disable=SC1004
+ ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
+ awk -v family="${_family}" \
+ 'NR == 1 { iface = $2; sub(":$", "", iface) } \
+ $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
+ print mask, iface, family }'
+}
- ctdb_test_exit_hook_add nfs_test_cleanup
+drop_ip ()
+{
+ _addr="${1%/*}" # Remove optional maskbits
- echo "Mounting ${test_ip}:${nfs_first_export} on ${nfs_mnt_d} ..."
- mount -o timeo=1,hard,intr,vers=3 \
- ${test_ip}:${nfs_first_export} ${nfs_mnt_d}
+ # Intentional word splitting
+ # shellcheck disable=SC2046,SC2086
+ set -- $(ip_maskbits_iface $_addr)
+ if [ -n "$1" ] ; then
+ _maskbits="$1"
+ _iface="$2"
+ echo "Removing public address $_addr/$_maskbits from device $_iface"
+ ip addr del "$_ip/$_maskbits" dev "$_iface" >/dev/null 2>&1 || true
+ fi
}
-nfs_test_cleanup ()
+drop_ips ()
{
- rm -f "$nfs_local_file"
- umount -f "$nfs_mnt_d"
- rmdir "$nfs_mnt_d"
- onnode -q $test_node rmdir "$nfs_test_dir"
+ for _ip ; do
+ drop_ip "$_ip"
+ done
}
#######################################
# $1: pnn, $2: DB name
db_get_path ()
{
- try_command_on_node -v $1 $CTDB getdbstatus "$2" |
- sed -n -e "s@^path: @@p"
+ ctdb_onnode -v "$1" "getdbstatus $2" | sed -n -e "s@^path: @@p"
}
# $1: pnn, $2: DB name
db_ctdb_cattdb_count_records ()
{
- try_command_on_node -v $1 $CTDB cattdb "$2" |
- grep '^key' | grep -v '__db_sequence_number__' |
- wc -l
+ # Count the number of keys, excluding any that begin with '_'.
+ # This excludes at least the sequence number record in
+ # persistent/replicated databases. The trailing "|| :" forces
+ # the command to succeed when no records are matched.
+ ctdb_onnode "$1" "cattdb $2 | grep -c '^key([0-9][0-9]*) = \"[^_]' || :"
+ echo "$out"
}
# $1: pnn, $2: DB name, $3: key string, $4: value string, $5: RSN (default 7)
db_ctdb_tstore ()
{
- _tdb=$(db_get_path $1 "$2")
+ _tdb=$(db_get_path "$1" "$2")
_rsn="${5:-7}"
- try_command_on_node $1 $CTDB tstore "$_tdb" "$3" "$4" "$_rsn"
+ ctdb_onnode "$1" tstore "$_tdb" "$3" "$4" "$_rsn"
}
# $1: pnn, $2: DB name, $3: dbseqnum (must be < 255!!!!!)
# Construct 8 byte (unit64_t) database sequence number. This
# probably breaks if $3 > 255
- _value=$(printf "0x%02x%014x" $3 0)
+ _value=$(printf "0x%02x%014x" "$3" 0)
- db_ctdb_tstore $1 "$2" "$_key" "$_value"
+ db_ctdb_tstore "$1" "$2" "$_key" "$_value"
}
-#######################################
+########################################
# Make sure that $CTDB is set.
-: ${CTDB:=ctdb}
+if [ -z "$CTDB" ] ; then
+ CTDB="ctdb"
+fi
+
+if ctdb_test_on_cluster ; then
+ . "${TEST_SCRIPTS_DIR}/integration_real_cluster.bash"
+else
+ . "${TEST_SCRIPTS_DIR}/integration_local_daemons.bash"
+fi
+
-local="${TEST_SUBDIR}/scripts/local.bash"
+local="${CTDB_TEST_SUITE_DIR}/scripts/local.bash"
if [ -r "$local" ] ; then
. "$local"
fi