1 # Hey Emacs, this is a -*- shell-script -*- !!! :-)
3 . "${TEST_SCRIPTS_DIR}/common.sh"
5 ######################################################################
9 if [ -n "$CTDB_TEST_REMOTE_DIR" ] ; then
10 CTDB_TEST_WRAPPER="${CTDB_TEST_REMOTE_DIR}/test_wrap"
12 _d=$(cd "$TEST_SCRIPTS_DIR" && echo "$PWD")
13 CTDB_TEST_WRAPPER="$_d/test_wrap"
15 export CTDB_TEST_WRAPPER
17 # If $VALGRIND is set then use it whenever ctdb is called, but only if
18 # $CTDB is not already set.
19 [ -n "$CTDB" ] || export CTDB="${VALGRIND}${VALGRIND:+ }ctdb"
22 PATH="${TEST_SCRIPTS_DIR}:${PATH}"
24 ######################################################################
26 ctdb_test_on_cluster ()
28 [ -z "$CTDB_TEST_LOCAL_DAEMONS" ]
37 # run_tests.sh pipes stdout into tee. If the tee process is
38 # killed then any attempt to write to stdout (e.g. echo) will
39 # result in SIGPIPE, terminating the caller. Ignore SIGPIPE to
40 # ensure that all clean-up is run.
43 # Avoid making a test fail from this point onwards. The test is
47 echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
49 eval "$ctdb_test_exit_hook" || true
50 unset ctdb_test_exit_hook
52 echo "Stopping cluster..."
53 ctdb_nodes_stop || ctdb_test_error "Cluster shutdown failed"
58 ctdb_test_exit_hook_add ()
60 ctdb_test_exit_hook="${ctdb_test_exit_hook}${ctdb_test_exit_hook:+ ; }$*"
63 # Setting cleanup_pid to <pid>@<node> will cause <pid> to be killed on
64 # <node> when the test completes. To cancel, just unset cleanup_pid.
65 ctdb_test_cleanup_pid=""
66 ctdb_test_cleanup_pid_exit_hook ()
68 if [ -n "$ctdb_test_cleanup_pid" ] ; then
69 local pid="${ctdb_test_cleanup_pid%@*}"
70 local node="${ctdb_test_cleanup_pid#*@}"
72 try_command_on_node "$node" "kill ${pid}"
76 ctdb_test_exit_hook_add ctdb_test_cleanup_pid_exit_hook
78 ctdb_test_cleanup_pid_set ()
83 ctdb_test_cleanup_pid="${pid}@${node}"
86 ctdb_test_cleanup_pid_clear ()
88 ctdb_test_cleanup_pid=""
93 trap "ctdb_test_exit" 0
95 ctdb_nodes_stop >/dev/null 2>&1 || true
97 echo "Configuring cluster..."
98 setup_ctdb "$@" || ctdb_test_error "Cluster configuration failed"
100 echo "Starting cluster..."
101 ctdb_init || ctdb_test_error "Cluster startup failed"
103 echo "*** SETUP COMPLETE AT $(date '+%F %T'), RUNNING TEST..."
106 ctdb_test_skip_on_cluster ()
108 if ctdb_test_on_cluster ; then
110 "SKIPPING this test - only runs against local daemons"
115 ctdb_nodes_restart ()
118 ctdb_nodes_start "$@"
121 ########################################
123 # Sets: $out, $outfile
124 # * The first 1KB of output is put into $out
125 # * Tests should use $outfile for handling large output
126 # * $outfile is removed after each test
128 outfile="${CTDB_TEST_TMP_DIR}/try_command_on_node.out"
135 ctdb_test_exit_hook_add outfile_cleanup
137 try_command_on_node ()
139 local nodespec="$1" ; shift
144 while [ "${nodespec#-}" != "$nodespec" ] ; do
145 if [ "$nodespec" = "-v" ] ; then
148 onnode_opts="${onnode_opts}${onnode_opts:+ }${nodespec}"
150 nodespec="$1" ; shift
156 # Intentionally unquoted - might be empty
157 # shellcheck disable=SC2086
158 onnode -q $onnode_opts "$nodespec" "$cmd" >"$outfile" 2>&1 || status=$?
159 out=$(dd if="$outfile" bs=1k count=1 2>/dev/null)
161 if [ $status -ne 0 ] ; then
162 echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
168 echo "Output of \"$cmd\":"
178 local options nodespec
183 options="${options}${options:+ }${1}"
193 # shellcheck disable=SC2086
194 # $options can be multi-word
195 try_command_on_node $options "$nodespec" "${thing} $*"
200 _run_onnode "$CTDB" "$@"
205 _run_onnode "${CTDB_TEST_WRAPPER} ${VALGRIND}" "$@"
210 _run_onnode "${CTDB_TEST_WRAPPER}" "$@"
213 sanity_check_output ()
216 local regexp="$2" # Should be anchored as necessary.
221 num_lines=$(wc -l <"$outfile" | tr -d '[:space:]')
222 echo "There are $num_lines lines of output"
223 if [ "$num_lines" -lt "$min_lines" ] ; then
224 ctdb_test_fail "BAD: that's less than the required number (${min_lines})"
228 local unexpected # local doesn't pass through status of command on RHS.
229 unexpected=$(grep -Ev "$regexp" "$outfile") || status=$?
231 # Note that this is reversed.
232 if [ $status -eq 0 ] ; then
233 echo "BAD: unexpected lines in output:"
234 echo "$unexpected" | cat -A
237 echo "Output lines look OK"
245 try_command_on_node any ctdb pnn || return 1
248 echo "Selected node ${test_node}"
251 # This returns a list of "ip node" lines in $outfile
255 try_command_on_node "$node" \
256 "$CTDB ip -X | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
259 _select_test_node_and_ips ()
261 try_command_on_node any \
262 "$CTDB ip -X all | awk -F'|' 'NR > 1 { print \$2, \$3 }'"
264 test_node="" # this matches no PNN
267 while read -r ip pnn ; do
268 if [ -z "$test_node" ] && [ "$pnn" != "-1" ] ; then
271 if [ "$pnn" = "$test_node" ] ; then
272 test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
276 echo "Selected node ${test_node} with IPs: ${test_node_ips}."
277 test_ip="${test_node_ips%% *}"
279 # test_prefix used by caller
280 # shellcheck disable=SC2034
282 *:*) test_prefix="${test_ip}/128" ;;
283 *) test_prefix="${test_ip}/32" ;;
286 [ -n "$test_node" ] || return 1
289 select_test_node_and_ips ()
292 while ! _select_test_node_and_ips ; do
293 echo "Unable to find a test node with IPs assigned"
294 if [ $timeout -le 0 ] ; then
295 ctdb_test_error "BAD: Too many attempts"
299 timeout=$((timeout - 1))
306 get_test_ip_mask_and_iface ()
309 ctdb_onnode "$test_node" "ip -v -X"
310 iface=$(awk -F'|' -v ip="$test_ip" '$2 == ip { print $4 }' "$outfile")
312 if ctdb_test_on_cluster ; then
314 try_command_on_node "$test_node" ip addr show to "$test_ip"
321 echo "$test_ip/$mask is on $iface"
326 try_command_on_node -q all "$CTDB pnn"
330 # The subtlety is that "ctdb delip" will fail if the IP address isn't
331 # configured on a node...
332 delete_ip_from_all_nodes ()
340 for _pnn in $all_pnns ; do
341 all_ips_on_node "$_pnn"
342 while read -r _i _ ; do
343 if [ "$_ip" = "$_i" ] ; then
344 _nodes="${_nodes}${_nodes:+,}${_pnn}"
349 try_command_on_node -pq "$_nodes" "$CTDB delip $_ip"
352 #######################################
357 for i in $(seq 1 "$1") ; do
364 _cluster_is_healthy ()
366 $CTDB nodestatus all >/dev/null
369 _cluster_is_recovered ()
371 node_has_status 0 recovered
376 _cluster_is_healthy && _cluster_is_recovered
379 cluster_is_healthy ()
381 if onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_healthy ; then
382 echo "Cluster is HEALTHY"
383 if ! onnode 0 "$CTDB_TEST_WRAPPER" _cluster_is_recovered ; then
384 echo "WARNING: cluster in recovery mode!"
389 echo "Cluster is UNHEALTHY"
391 echo "DEBUG AT $(date '+%F %T'):"
393 for i in "onnode -q 0 $CTDB status" \
394 "onnode -q 0 onnode all $CTDB scriptstatus" ; do
404 local timeout="${1:-120}"
406 echo "Waiting for cluster to become ready..."
408 wait_until "$timeout" onnode -q any "$CTDB_TEST_WRAPPER" _cluster_is_ready
411 # This function is becoming nicely overloaded. Soon it will collapse! :-)
419 ! $CTDB status -n "$pnn" | \
420 grep -Eq '^Recovery mode:RECOVERY \(1\)$'
424 ! $CTDB status | grep -Eq "^hash:.* lmaster:${pnn}\$"
431 unhealthy) bits="?|?|?|1|*" ;;
432 healthy) bits="?|?|?|0|*" ;;
433 disconnected) bits="1|*" ;;
434 connected) bits="0|*" ;;
435 banned) bits="?|1|*" ;;
436 unbanned) bits="?|0|*" ;;
437 disabled) bits="?|?|1|*" ;;
438 enabled) bits="?|?|0|*" ;;
439 stopped) bits="?|?|?|?|1|*" ;;
440 notstopped) bits="?|?|?|?|0|*" ;;
442 echo "node_has_status: unknown status \"$status\""
447 out=$($CTDB -X status 2>&1) || return 1
451 while read -r line ; do
452 # This needs to be done in 2 steps to
453 # avoid false matches.
454 local line_bits="${line#|${pnn}|*|}"
455 [ "$line_bits" = "$line" ] && continue
456 [ "${line_bits#${bits}}" != "$line_bits" ] && \
460 } <<<"$out" # Yay bash!
463 wait_until_node_has_status ()
467 local timeout="${3:-30}"
468 local proxy_pnn="${4:-any}"
470 echo "Waiting until node $pnn has status \"$status\"..."
472 if ! wait_until "$timeout" onnode "$proxy_pnn" \
473 "$CTDB_TEST_WRAPPER" node_has_status "$pnn" "$status" ; then
475 for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
485 # Useful for superficially testing IP failover.
486 # IPs must be on the given node.
487 # If the first argument is '!' then the IPs must not be on the given node.
491 if [ "$1" = "!" ] ; then
492 negating=true ; shift
494 local node="$1" ; shift
499 all_ips_on_node "$node"
502 for check in $ips ; do
504 while read -r ip pnn ; do
505 if [ "$check" = "$ip" ] ; then
506 if [ "$pnn" = "$node" ] ; then
507 if $negating ; then return 1 ; fi
509 if ! $negating ; then return 1 ; fi
511 ips="${ips/${ip}}" # Remove from list
514 # If we're negating and we didn't see the address then it
515 # isn't hosted by anyone!
517 ips="${ips/${check}}"
522 ips="${ips// }" # Remove any spaces.
526 wait_until_ips_are_on_node ()
528 # Go to some trouble to print a use description of what is happening
530 if [ "$1" == "!" ] ; then
537 [ "$i" != "!" ] || continue
538 if [ -z "$node" ] ; then
542 ips="${ips}${ips:+, }${i}"
544 echo "Waiting for ${ips} to ${not}be assigned to node ${node}"
546 wait_until 60 ips_are_on_node "$@"
555 all_ips_on_node "$node"
557 while read -r ip pnn ; do
558 if [ "$node" = "$pnn" ] ; then
566 wait_until_node_has_some_ips ()
568 echo "Waiting for some IPs to be assigned to node ${test_node}"
570 wait_until 60 node_has_some_ips "$@"
573 wait_until_node_has_no_ips ()
575 echo "Waiting until no IPs are assigned to node ${test_node}"
577 wait_until 60 ! node_has_some_ips "$@"
580 #######################################
584 ctdb_nodes_stop >/dev/null 2>&1 || :
586 ctdb_nodes_start || ctdb_test_error "Cluster start failed"
588 wait_until_ready 120 || ctdb_test_error "Cluster didn't become ready"
590 echo "Setting RerecoveryTimeout to 1"
591 onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
593 echo "Forcing a recovery..."
594 onnode -q 0 "$CTDB recover"
597 if ! onnode -q all "$CTDB_TEST_WRAPPER _cluster_is_recovered" ; then
598 echo "Cluster has gone into recovery again, waiting..."
599 wait_until 30/2 onnode -q all \
600 "$CTDB_TEST_WRAPPER _cluster_is_recovered" || \
601 ctdb_test_error "Cluster did not come out of recovery"
604 if ! onnode 0 "$CTDB_TEST_WRAPPER _cluster_is_healthy" ; then
605 ctdb_test_error "Cluster became UNHEALTHY again [$(date)]"
608 echo "Doing a sync..."
609 onnode -q 0 "$CTDB sync"
617 echo "${CTDB_BASE:-${CTDB_SCRIPTS_BASE}}"
620 #######################################
622 wait_for_monitor_event ()
627 echo "Waiting for a monitor event on node ${pnn}..."
629 ctdb_onnode "$pnn" scriptstatus || {
630 echo "Unable to get scriptstatus from node $pnn"
634 mv "$outfile" "${outfile}.orig"
636 wait_until 120 _ctdb_scriptstatus_changed
639 _ctdb_scriptstatus_changed ()
641 ctdb_onnode "$pnn" scriptstatus || {
642 echo "Unable to get scriptstatus from node $pnn"
646 ! diff "$outfile" "${outfile}.orig" >/dev/null
649 #######################################
651 # If the given IP is hosted then print 2 items: maskbits and iface
657 *:*) _family="inet6" ; _bits=128 ;;
658 *) _family="inet" ; _bits=32 ;;
661 # Literal backslashes in awk script
662 # shellcheck disable=SC1004
663 ip addr show to "${_addr}/${_bits}" 2>/dev/null | \
664 awk -v family="${_family}" \
665 'NR == 1 { iface = $2; sub(":$", "", iface) } \
666 $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \
667 print mask, iface, family }'
672 _addr="${1%/*}" # Remove optional maskbits
674 # Intentional word splitting
675 # shellcheck disable=SC2046,SC2086
676 set -- $(ip_maskbits_iface $_addr)
677 if [ -n "$1" ] ; then
680 echo "Removing public address $_addr/$_maskbits from device $_iface"
681 ip addr del "$_ip/$_maskbits" dev "$_iface" >/dev/null 2>&1 || true
692 #######################################
694 # $1: pnn, $2: DB name
697 ctdb_onnode -v "$1" "getdbstatus $2" | sed -n -e "s@^path: @@p"
700 # $1: pnn, $2: DB name
701 db_ctdb_cattdb_count_records ()
703 # Count the number of keys, excluding any that begin with '_'.
704 # This excludes at least the sequence number record in
705 # persistent/replicated databases. The trailing "|| :" forces
706 # the command to succeed when no records are matched.
707 ctdb_onnode "$1" "cattdb $2 | grep -c '^key([0-9][0-9]*) = \"[^_]' || :"
711 # $1: pnn, $2: DB name, $3: key string, $4: value string, $5: RSN (default 7)
714 _tdb=$(db_get_path "$1" "$2")
716 ctdb_onnode "$1" tstore "$_tdb" "$3" "$4" "$_rsn"
719 # $1: pnn, $2: DB name, $3: dbseqnum (must be < 255!!!!!)
720 db_ctdb_tstore_dbseqnum ()
722 # "__db_sequence_number__" + trailing 0x00
723 _key='0x5f5f64625f73657175656e63655f6e756d6265725f5f00'
725 # Construct 8 byte (unit64_t) database sequence number. This
726 # probably breaks if $3 > 255
727 _value=$(printf "0x%02x%014x" "$3" 0)
729 db_ctdb_tstore "$1" "$2" "$_key" "$_value"
732 ########################################
734 # Make sure that $CTDB is set.
735 if [ -z "$CTDB" ] ; then
739 if ctdb_test_on_cluster ; then
740 . "${TEST_SCRIPTS_DIR}/integration_real_cluster.bash"
742 . "${TEST_SCRIPTS_DIR}/integration_local_daemons.bash"
746 local="${CTDB_TEST_SUITE_DIR}/scripts/local.bash"
747 if [ -r "$local" ] ; then