tests/scripts/ctdb_test_functions.bash

   1 # Hey Emacs, this is a -*- shell-script -*- !!!  :-)
   2
   3 fail ()
   4 {
   5     echo "$*"
   6     exit 1
   7 }
   8
   9 ######################################################################
  10
  11 ctdb_test_begin ()
  12 {
  13     local name="$1"
  14
  15     teststarttime=$(date '+%s')
  16     testduration=0
  17
  18     echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
  19     echo "Running test $name ($(date '+%T'))"
  20     echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
  21 }
  22
  23 ctdb_test_end ()
  24 {
  25     local name="$1" ; shift
  26     local status="$1" ; shift
  27     # "$@" is command-line
  28
  29     local interp="SKIPPED"
  30     local statstr=" (reason $*)"
  31     if [ -n "$status" ] ; then
  32         if [ $status -eq 0 ] ; then
  33             interp="PASSED"
  34             statstr=""
  35             echo "ALL OK: $*"
  36         else
  37             interp="FAILED"
  38             statstr=" (status $status)"
  39             testfailures=$(($testfailures+1))
  40         fi
  41     fi
  42
  43     testduration=$(($(date +%s)-$teststarttime))
  44
  45     echo "=========================================================================="
  46     echo "TEST ${interp}: ${name}${statstr} (duration: ${testduration}s)"
  47     echo "=========================================================================="
  48
  49 }
  50
  51 test_exit ()
  52 {
  53     exit $(($testfailures+0))
  54 }
  55
  56 ctdb_test_exit ()
  57 {
  58     local status=$?
  59
  60     trap - 0
  61
  62     [ $(($testfailures+0)) -eq 0 -a $status -ne 0 ] && testfailures=$status
  63
  64     eval "$ctdb_test_exit_hook"
  65     unset ctdb_test_exit_hook
  66
  67     if ! onnode 0 $CTDB_TEST_WRAPPER cluster_is_healthy ; then
  68         echo "Restarting ctdb on all nodes to get back into known state..."
  69         restart_ctdb
  70     else
  71         # This could be made unconditional but then we might get
  72         # duplication from the recovery in restart_ctdb.  We want to
  73         # leave the recovery in restart_ctdb so that future tests that
  74         # might do a manual restart mid-test will benefit.
  75         echo "Forcing a recovery..."
  76         onnode 0 ctdb recover
  77     fi
  78
  79     test_exit
  80 }
  81
  82 ctdb_test_run ()
  83 {
  84     local name="$1" ; shift
  85
  86     [ -n "$1" ] || set -- "$name"
  87
  88     ctdb_test_begin "$name"
  89
  90     local status=0
  91     "$@" || status=$?
  92
  93     ctdb_test_end "$name" "$status" "$*"
  94
  95     return $status
  96 }
  97
  98 ctdb_test_usage()
  99 {
 100     local status=${1:-2}
 101
 102     cat <<EOF
 103 Usage: $0 [option]
 104
 105 Options:
 106     -h, --help          show this screen.
 107     -v, --version       show test case version.
 108     --category          show the test category (ACL, CTDB, Samba ...).
 109     -d, --description   show test case description.
 110     --summary           show short test case summary.
 111 EOF
 112
 113     exit $status
 114 }
 115
 116 ctdb_test_version ()
 117 {
 118     [ -n "$CTDB_DIR" ] || fail "Can not determine version."
 119
 120     (cd "$CTDB_DIR" && git describe)
 121 }
 122
 123 ctdb_test_cmd_options()
 124 {
 125     [ -n "$1" ] || return 0
 126
 127     case "$1" in
 128         -h|--help)        ctdb_test_usage 0   ;;
 129         -v|--version)     ctdb_test_version   ;;
 130         --category)       echo "CTDB"         ;;
 131         -d|--description) test_info           ;;
 132         --summary)        test_info | head -1 ;;
 133         *)
 134             echo "Error: Unknown parameter = $1"
 135             echo
 136             ctdb_test_usage 2
 137             ;;
 138     esac
 139
 140     exit 0
 141 }
 142
 143 ctdb_test_init ()
 144 {
 145     scriptname=$(basename "$0")
 146     testfailures=0
 147
 148     ctdb_test_cmd_options $@
 149
 150     trap "ctdb_test_exit" 0
 151 }
 152
 153 ########################################
 154
 155 # Sets: $out
 156 try_command_on_node ()
 157 {
 158     local nodespec="$1" ; shift
 159
 160     local verbose=false
 161     local onnode_opts=""
 162
 163     while [ "${nodespec#-}" != "$nodespec" ] ; do
 164         if [ "$nodespec" = "-v" ] ; then
 165             verbose=true
 166         else
 167             onnode_opts="$nodespec"
 168         fi
 169         nodespec="$1" ; shift
 170     done
 171
 172     local cmd="$*"
 173
 174     out=$(onnode -q $onnode_opts "$nodespec" "$cmd" 2>&1) || {
 175
 176         echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
 177         echo "$out"
 178         return 1
 179     }
 180
 181     if $verbose ; then
 182         echo "Output of \"$cmd\":"
 183         echo "$out"
 184     fi
 185 }
 186
 187 sanity_check_output ()
 188 {
 189     local min_lines="$1"
 190     local regexp="$2" # Should be anchored as necessary.
 191     local output="$3"
 192
 193     local ret=0
 194
 195     local num_lines=$(echo "$output" | wc -l)
 196     echo "There are $num_lines lines of output"
 197     if [ $num_lines -lt $min_lines ] ; then
 198         echo "BAD: that's less than the required number (${min_lines})"
 199         ret=1
 200     fi
 201
 202     local status=0
 203     local unexpected # local doesn't pass through status of command on RHS.
 204     unexpected=$(echo "$output" | egrep -v "$regexp") || status=$?
 205
 206     # Note that this is reversed.
 207     if [ $status -eq 0 ] ; then
 208         echo "BAD: unexpected lines in output:"
 209         echo "$unexpected" | cat -A
 210         ret=1
 211     else
 212         echo "Output lines look OK"
 213     fi
 214
 215     return $ret
 216 }
 217
 218 #######################################
 219
 220 # Wait until either timeout expires or command succeeds.  The command
 221 # will be tried once per second.
 222 wait_until ()
 223 {
 224     local timeout="$1" ; shift # "$@" is the command...
 225
 226     echo -n "<${timeout}|"
 227     while [ $timeout -gt 0 ] ; do
 228         if "$@" ; then
 229             echo '|'
 230             echo "OK"
 231             return 0
 232         fi
 233         echo -n .
 234         timeout=$(($timeout - 1))
 235         sleep 1
 236     done
 237
 238     echo "*TIMEOUT*"
 239
 240     return 1
 241 }
 242
 243 sleep_for ()
 244 {
 245     echo -n "=${1}|"
 246     for i in $(seq 1 $1) ; do
 247         echo -n '.'
 248         sleep 1
 249     done
 250     echo '|'
 251 }
 252
 253 _cluster_is_healthy ()
 254 {
 255     local out x count line
 256
 257     out=$(ctdb -Y status 2>&1) || return 1
 258
 259     {
 260         read x
 261         count=0
 262         while read line ; do
 263             count=$(($count + 1))
 264             [ "${line#:*:*:}" != "0:0:0:0:" ] && return 1
 265         done
 266         [ $count -gt 0 ] && return $?
 267     } <<<"$out" # Yay bash!
 268 }
 269
 270 cluster_is_healthy ()
 271 {
 272     if _cluster_is_healthy ; then
 273         echo "Cluster is HEALTHY"
 274         exit 0
 275     else
 276         echo "Cluster is UNHEALTHY"
 277         exit 1
 278     fi
 279 }
 280
 281 wait_until_healthy ()
 282 {
 283     local timeout="${1:-120}"
 284
 285     echo "Waiting for cluster to become healthy..."
 286
 287     wait_until 120 _cluster_is_healthy
 288 }
 289
 290 # This function is becoming nicely overloaded.  Soon it will collapse!  :-)
 291 node_has_status ()
 292 {
 293     local pnn="$1"
 294     local status="$2"
 295
 296     local bits fpat mpat
 297     case "$status" in
 298         (unhealthy)    bits="?:?:?:1" ;;
 299         (healthy)      bits="?:?:?:0" ;;
 300         (disconnected) bits="1:?:?:?" ;;
 301         (connected)    bits="0:?:?:?" ;;
 302         (banned)       bits="?:1:?:?" ;;
 303         (unbanned)     bits="?:0:?:?" ;;
 304         (disabled)     bits="?:?:1:?" ;;
 305         (enabled)      bits="?:?:0:?" ;;
 306         (frozen)       fpat='^[[:space:]]+frozen[[:space:]]+1$' ;;
 307         (unfrozen)     fpat='^[[:space:]]+frozen[[:space:]]+0$' ;;
 308         (monon)        mpat='^Monitoring mode:ACTIVE \(0\)$' ;;
 309         (monoff)       mpat='^Monitoring mode:DISABLED \(1\)$' ;;
 310         *)
 311             echo "node_has_status: unknown status \"$status\""
 312             return 1
 313     esac
 314
 315     if [ -n "$bits" ] ; then
 316         local out x line
 317
 318         out=$(ctdb -Y status 2>&1) || return 1
 319
 320         {
 321             read x
 322             while read line ; do
 323                 [ "${line#:${pnn}:*:${bits}:}" = "" ] && return 0
 324             done
 325             return 1
 326         } <<<"$out" # Yay bash!
 327     elif [ -n "$fpat" ] ; then
 328         ctdb statistics -n "$pnn" | egrep -q "$fpat"
 329     elif [ -n "$mpat" ] ; then
 330         ctdb getmonmode -n "$pnn" | egrep -q "$mpat"
 331     else
 332         echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
 333         return 1
 334     fi
 335 }
 336
 337 wait_until_node_has_status ()
 338 {
 339     local pnn="$1"
 340     local status="$2"
 341     local timeout="${3:-30}"
 342
 343     echo "Waiting until node $pnn has status \"$status\"..."
 344
 345     wait_until $timeout node_has_status "$pnn" "$status"
 346 }
 347
 348 # Useful for superficially testing IP failover.
 349 # IPs must be on nodes matching nodeglob.
 350 ips_are_on_nodeglob ()
 351 {
 352     local nodeglob="$1" ; shift
 353     local ips="$*"
 354
 355     local out
 356
 357     try_command_on_node 1 ctdb ip -n all
 358
 359     while read ip pnn ; do
 360         for check in $ips ; do
 361             if [ "$check" = "$ip" ] ; then
 362                 case "$pnn" in
 363                     ($nodeglob) : ;;
 364                     (*) return 1  ;;
 365                 esac
 366                 ips="${ips/${ip}}" # Remove from list
 367             fi
 368         done
 369     done <<<"$out" # bashism to avoid problem setting variable in pipeline.
 370
 371     ips="${ips// }" # Remove any spaces.
 372     [ -z "$ips" ]
 373 }
 374
 375 wait_until_ips_are_on_nodeglob ()
 376 {
 377     echo "Waiting for IPs to fail over..."
 378
 379     wait_until 60 ips_are_on_nodeglob "$@"
 380 }
 381
 382 #######################################
 383
 384 daemons_stop ()
 385 {
 386     echo "Attempting to politely shutdown daemons..."
 387     onnode 1 ctdb shutdown -n all || true
 388
 389     echo "Sleeping for a while..."
 390     sleep_for 1
 391
 392     if pgrep -f $CTDB_DIR/bin/ctdbd >/dev/null ; then
 393         echo "Killing remaining daemons..."
 394         pkill -f $CTDB_DIR/bin/ctdbd
 395
 396         if pgrep -f $CTDB_DIR/bin/ctdbd >/dev/null ; then
 397             echo "Once more with feeling.."
 398             pkill -9 $CTDB_DIR/bin/ctdbd
 399         fi
 400     fi
 401
 402     local var_dir=$CTDB_DIR/tests/var
 403     rm -rf $var_dir/test.db
 404 }
 405
 406 daemons_setup ()
 407 {
 408     local num_nodes="${1:-2}" # default is 2 nodes
 409
 410     local var_dir=$CTDB_DIR/tests/var
 411
 412     mkdir -p $var_dir/test.db/persistent
 413
 414     local nodes=$var_dir/nodes.txt
 415     local public_addresses=$var_dir/public_addresses.txt
 416     local no_public_addresses=$var_dir/no_public_addresses.txt
 417     rm -f $nodes $public_addresses $no_public_addresses
 418
 419     # If there are (strictly) greater than 2 nodes then we'll randomly
 420     # choose a node to have no public addresses.
 421     local no_public_ips=-1
 422     [ $num_nodes -gt 2 ] && no_public_ips=$(($RANDOM % $num_nodes))
 423     echo "$no_public_ips" >$no_public_addresses
 424
 425     local i
 426     for i in $(seq 1 $num_nodes) ; do
 427         if [ "${CTDB_USE_IPV6}x" != "x" ]; then
 428             echo ::$i >> $nodes
 429             ip addr add ::$i/128 dev lo
 430         else
 431             echo 127.0.0.$i >> $nodes
 432             # 2 public addresses on most nodes, just to make things interesting.
 433             if [ $(($i - 1)) -ne $no_public_ips ] ; then
 434                 echo "192.0.2.$i/24 lo" >> $public_addresses
 435                 echo "192.0.2.$(($i + $num_nodes))/24 lo" >> $public_addresses
 436             fi
 437         fi
 438     done
 439 }
 440
 441 daemons_start ()
 442 {
 443     local num_nodes="${1:-2}" # default is 2 nodes
 444     shift # "$@" gets passed to ctdbd
 445
 446     local var_dir=$CTDB_DIR/tests/var
 447
 448     local nodes=$var_dir/nodes.txt
 449     local public_addresses=$var_dir/public_addresses.txt
 450     local no_public_addresses=$var_dir/no_public_addresses.txt
 451
 452     local no_public_ips=-1
 453     [ -r $no_public_addresses ] && read no_public_ips <$no_public_addresses
 454
 455     local ctdb_options="--reclock=$var_dir/rec.lock --nlist $nodes --nopublicipcheck --event-script-dir=tests/events.d --logfile=$var_dir/daemons.log -d 0 --dbdir=$var_dir/test.db --dbdir-persistent=$var_dir/test.db/persistent"
 456
 457     echo "Starting $num_nodes ctdb daemons..."
 458     if  [ "$no_public_ips" != -1 ] ; then
 459         echo "Node $no_public_ips will have no public IPs."
 460     fi
 461
 462     for i in $(seq 0 $(($num_nodes - 1))) ; do
 463         if [ $(id -u) -eq 0 ]; then
 464             ctdb_options="$ctdb_options --public-interface=lo"
 465         fi
 466
 467         if [ $i -eq $no_public_ips ] ; then
 468             ctdb_options="$ctdb_options --public-addresses=/dev/null"
 469         else
 470             ctdb_options="$ctdb_options --public-addresses=$public_addresses"
 471         fi
 472
 473         # Need the $PWD so we can use "pkill -f" to kill the daemons.
 474         $VALGRIND $PWD/bin/ctdbd --socket=$var_dir/sock.$i $ctdb_options "$@" ||return 1
 475     done
 476
 477     if [ -L /tmp/ctdb.socket -o ! -S /tmp/ctdb.socket ] ; then
 478         ln -sf $var_dir/sock.0 /tmp/ctdb.socket || return 1
 479     fi
 480 }
 481
 482 #######################################
 483
 484 _restart_ctdb ()
 485 {
 486     if [ -e /etc/redhat-release ] ; then
 487         service ctdb restart
 488     else
 489         /etc/init.d/ctdb restart
 490     fi
 491 }
 492
 493 setup_ctdb ()
 494 {
 495     if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 496         daemons_setup $CTDB_TEST_NUM_DAEMONS
 497     fi
 498 }
 499
 500 restart_ctdb ()
 501 {
 502     if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 503         daemons_stop
 504         daemons_start $CTDB_TEST_NUM_DAEMONS
 505     else
 506         onnode -pq all $CTDB_TEST_WRAPPER _restart_ctdb
 507     fi || return 1
 508
 509     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 510
 511     echo "Setting RerecoveryTimeout to 1"
 512     onnode -pq all "ctdb setvar RerecoveryTimeout 1"
 513
 514     # In recent versions of CTDB, forcing a recovery like this blocks
 515     # until the recovery is complete.  Hopefully this will help the
 516     # cluster to stabilise before a subsequent test.
 517     echo "Forcing a recovery..."
 518     onnode -q 0 ctdb recover
 519
 520     #echo "Sleeping to allow ctdb to settle..."
 521     #sleep_for 10
 522
 523     echo "ctdb is ready"
 524 }
 525
 526 #######################################
 527
 528 install_eventscript ()
 529 {
 530     local script_name="$1"
 531     local script_contents="$2"
 532
 533     if [ -n "$CTDB_TEST_REAL_CLUSTER" ] ; then
 534         # The quoting here is *very* fragile.  However, we do
 535         # experience the joy of installing a short script using
 536         # onnode, and without needing to know the IP addresses of the
 537         # nodes.
 538         onnode all "f=\"\${CTDB_BASE:-/etc/ctdb}/events.d/${script_name}\" ; echo \"Installing \$f\" ; echo '${script_contents}' > \"\$f\" ; chmod 755 \"\$f\""
 539     else
 540         f="${CTDB_DIR}/tests/events.d/${script_name}"
 541         echo "$script_contents" >"$f"
 542         chmod 755 "$f"
 543     fi
 544 }
 545
 546 uninstall_eventscript ()
 547 {
 548     local script_name="$1"
 549
 550     if [ -n "$CTDB_TEST_REAL_CLUSTER" ] ; then
 551         onnode all "rm -vf \"\${CTDB_BASE:-/etc/ctdb}/events.d/${script_name}\""
 552     else
 553         rm -vf "${CTDB_DIR}/tests/events.d/${script_name}"
 554     fi
 555 }