ctdb/tests/scripts/ctdb_test_functions.bash

   1 # Hey Emacs, this is a -*- shell-script -*- !!!  :-)
   2
   3 fail ()
   4 {
   5     echo "$*"
   6     exit 1
   7 }
   8
   9 ######################################################################
  10
  11 ctdb_test_begin ()
  12 {
  13     local name="$1"
  14
  15     teststarttime=$(date '+%s')
  16     testduration=0
  17
  18     echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
  19     echo "Running test $name ($(date '+%T'))"
  20     echo "--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--"
  21 }
  22
  23 ctdb_test_end ()
  24 {
  25     local name="$1" ; shift
  26     local status="$1" ; shift
  27     # "$@" is command-line
  28
  29     local interp="SKIPPED"
  30     local statstr=" (reason $*)"
  31     if [ -n "$status" ] ; then
  32         if [ $status -eq 0 ] ; then
  33             interp="PASSED"
  34             statstr=""
  35             echo "ALL OK: $*"
  36         else
  37             interp="FAILED"
  38             statstr=" (status $status)"
  39             testfailures=$(($testfailures+1))
  40         fi
  41     fi
  42
  43     testduration=$(($(date +%s)-$teststarttime))
  44
  45     echo "=========================================================================="
  46     echo "TEST ${interp}: ${name}${statstr} (duration: ${testduration}s)"
  47     echo "=========================================================================="
  48
  49 }
  50
  51 test_exit ()
  52 {
  53     exit $(($testfailures+0))
  54 }
  55
  56 ctdb_test_exit ()
  57 {
  58     local status=$?
  59
  60     trap - 0
  61
  62     [ $(($testfailures+0)) -eq 0 -a $status -ne 0 ] && testfailures=$status
  63     status=$(($testfailures+0))
  64
  65     # Avoid making a test fail from this point onwards.  The test is
  66     # now complete.
  67     set +e
  68
  69     echo "*** TEST COMPLETE (RC=$status), CLEANING UP..."
  70
  71     eval "$ctdb_test_exit_hook" || true
  72     unset ctdb_test_exit_hook
  73
  74     if $ctdb_test_restart_scheduled || ! cluster_is_healthy ; then
  75
  76         restart_ctdb
  77     else
  78         # This could be made unconditional but then we might get
  79         # duplication from the recovery in restart_ctdb.  We want to
  80         # leave the recovery in restart_ctdb so that future tests that
  81         # might do a manual restart mid-test will benefit.
  82         echo "Forcing a recovery..."
  83         onnode 0 ctdb recover
  84     fi
  85
  86     exit $status
  87 }
  88
  89 ctdb_test_exit_hook_add ()
  90 {
  91     ctdb_test_exit_hook="${ctdb_test_exit_hook}${ctdb_test_exit_hook:+ ; }$*"
  92 }
  93
  94 ctdb_test_run ()
  95 {
  96     local name="$1" ; shift
  97
  98     [ -n "$1" ] || set -- "$name"
  99
 100     ctdb_test_begin "$name"
 101
 102     local status=0
 103     "$@" || status=$?
 104
 105     ctdb_test_end "$name" "$status" "$*"
 106
 107     return $status
 108 }
 109
 110 ctdb_test_usage()
 111 {
 112     local status=${1:-2}
 113
 114     cat <<EOF
 115 Usage: $0 [option]
 116
 117 Options:
 118     -h, --help          show this screen.
 119     -v, --version       show test case version.
 120     --category          show the test category (ACL, CTDB, Samba ...).
 121     -d, --description   show test case description.
 122     --summary           show short test case summary.
 123 EOF
 124
 125     exit $status
 126 }
 127
 128 ctdb_test_version ()
 129 {
 130     [ -n "$CTDB_DIR" ] || fail "Can not determine version."
 131
 132     (cd "$CTDB_DIR" && git describe)
 133 }
 134
 135 ctdb_test_cmd_options()
 136 {
 137     [ -n "$1" ] || return 0
 138
 139     case "$1" in
 140         -h|--help)        ctdb_test_usage 0   ;;
 141         -v|--version)     ctdb_test_version   ;;
 142         --category)       echo "CTDB"         ;;
 143         -d|--description) test_info           ;;
 144         --summary)        test_info | head -1 ;;
 145         *)
 146             echo "Error: Unknown parameter = $1"
 147             echo
 148             ctdb_test_usage 2
 149             ;;
 150     esac
 151
 152     exit 0
 153 }
 154
 155 ctdb_test_init ()
 156 {
 157     scriptname=$(basename "$0")
 158     testfailures=0
 159     ctdb_test_restart_scheduled=false
 160
 161     ctdb_test_cmd_options $@
 162
 163     trap "ctdb_test_exit" 0
 164 }
 165
 166 ctdb_test_check_real_cluster ()
 167 {
 168     [ -n "$CTDB_TEST_REAL_CLUSTER" ] && return 0
 169
 170     echo "ERROR: This test must be run on a real/virtual cluster, not local daemons."
 171     return 1
 172 }
 173
 174 ########################################
 175
 176 # Sets: $out
 177 try_command_on_node ()
 178 {
 179     local nodespec="$1" ; shift
 180
 181     local verbose=false
 182     local onnode_opts=""
 183
 184     while [ "${nodespec#-}" != "$nodespec" ] ; do
 185         if [ "$nodespec" = "-v" ] ; then
 186             verbose=true
 187         else
 188             onnode_opts="$nodespec"
 189         fi
 190         nodespec="$1" ; shift
 191     done
 192
 193     local cmd="$*"
 194
 195     out=$(onnode -q $onnode_opts "$nodespec" "$cmd" 2>&1) || {
 196
 197         echo "Failed to execute \"$cmd\" on node(s) \"$nodespec\""
 198         echo "$out"
 199         return 1
 200     }
 201
 202     if $verbose ; then
 203         echo "Output of \"$cmd\":"
 204         echo "$out"
 205     fi
 206 }
 207
 208 sanity_check_output ()
 209 {
 210     local min_lines="$1"
 211     local regexp="$2" # Should be anchored as necessary.
 212     local output="$3"
 213
 214     local ret=0
 215
 216     local num_lines=$(echo "$output" | wc -l)
 217     echo "There are $num_lines lines of output"
 218     if [ $num_lines -lt $min_lines ] ; then
 219         echo "BAD: that's less than the required number (${min_lines})"
 220         ret=1
 221     fi
 222
 223     local status=0
 224     local unexpected # local doesn't pass through status of command on RHS.
 225     unexpected=$(echo "$output" | egrep -v "$regexp") || status=$?
 226
 227     # Note that this is reversed.
 228     if [ $status -eq 0 ] ; then
 229         echo "BAD: unexpected lines in output:"
 230         echo "$unexpected" | cat -A
 231         ret=1
 232     else
 233         echo "Output lines look OK"
 234     fi
 235
 236     return $ret
 237 }
 238
 239 sanity_check_ips ()
 240 {
 241     local ips="$1" # Output of "ctdb ip -n all"
 242
 243     echo "Sanity checking IPs..."
 244
 245     local x ipp prev
 246     prev=""
 247     while read x ipp ; do
 248         [ "$ipp" = "-1" ] && break
 249         if [ -n "$prev" -a "$ipp" != "$prev" ] ; then
 250             echo "OK"
 251             return 0
 252         fi
 253         prev="$ipp"
 254     done <<<"$ips"
 255
 256     echo "BAD: a node was -1 or IPs are only assigned to one node"
 257     echo "Are you running an old version of CTDB?"
 258     return 1
 259 }
 260
 261 select_test_node_and_ips ()
 262 {
 263     try_command_on_node 0 "$CTDB ip -n all | sed -e '1d'"
 264
 265     # When selecting test_node we just want a node that has public
 266     # IPs.  This will work and is economically semi-random.  :-)
 267     local x
 268     read x test_node <<<"$out"
 269
 270     test_node_ips=""
 271     local ip pnn
 272     while read ip pnn ; do
 273         if [ "$pnn" = "$test_node" ] ; then
 274             test_node_ips="${test_node_ips}${test_node_ips:+ }${ip}"
 275         fi
 276     done <<<"$out" # bashism to avoid problem setting variable in pipeline.
 277
 278     echo "Selected node ${test_node} with IPs: ${test_node_ips}."
 279     test_ip="${test_node_ips%% *}"
 280 }
 281
 282 #######################################
 283
 284 # Wait until either timeout expires or command succeeds.  The command
 285 # will be tried once per second.
 286 wait_until ()
 287 {
 288     local timeout="$1" ; shift # "$@" is the command...
 289
 290     echo -n "<${timeout}|"
 291     local t=$timeout
 292     while [ $t -gt 0 ] ; do
 293         if "$@" ; then
 294             echo "|$(($timeout - $t))|"
 295             echo "OK"
 296             return 0
 297         fi
 298         echo -n .
 299         t=$(($t - 1))
 300         sleep 1
 301     done
 302
 303     echo "*TIMEOUT*"
 304
 305     return 1
 306 }
 307
 308 sleep_for ()
 309 {
 310     echo -n "=${1}|"
 311     for i in $(seq 1 $1) ; do
 312         echo -n '.'
 313         sleep 1
 314     done
 315     echo '|'
 316 }
 317
 318 _cluster_is_healthy ()
 319 {
 320     local out x count line
 321
 322     out=$(ctdb -Y status 2>&1) || return 1
 323
 324     {
 325         read x
 326         count=0
 327         while read line ; do
 328             count=$(($count + 1))
 329             [ "${line##:*:*:*1:}" != "$line" ] && return 1
 330         done
 331         [ $count -gt 0 ] && return $?
 332     } <<<"$out" # Yay bash!
 333 }
 334
 335 cluster_is_healthy ()
 336 {
 337     if onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
 338         echo "Cluster is HEALTHY"
 339         return 0
 340     else
 341         echo "Cluster is UNHEALTHY"
 342         if ! ${ctdb_test_restart_scheduled:-false} ; then
 343             echo "DEBUG:"
 344             local i
 345             for i in "onnode -q 0 ctdb status" "onnode -q 0 onnode all ctdb scriptstatus" ; do
 346                 echo "$i"
 347                 $i || true
 348             done
 349         fi
 350         return 1
 351     fi
 352 }
 353
 354 wait_until_healthy ()
 355 {
 356     local timeout="${1:-120}"
 357
 358     echo "Waiting for cluster to become healthy..."
 359
 360     wait_until 120 _cluster_is_healthy
 361 }
 362
 363 # This function is becoming nicely overloaded.  Soon it will collapse!  :-)
 364 node_has_status ()
 365 {
 366     local pnn="$1"
 367     local status="$2"
 368
 369     local bits fpat mpat
 370     case "$status" in
 371         (unhealthy)    bits="?:?:?:1:*" ;;
 372         (healthy)      bits="?:?:?:0:*" ;;
 373         (disconnected) bits="1:*" ;;
 374         (connected)    bits="0:*" ;;
 375         (banned)       bits="?:1:*" ;;
 376         (unbanned)     bits="?:0:*" ;;
 377         (disabled)     bits="?:?:1:*" ;;
 378         (enabled)      bits="?:?:0:*" ;;
 379         (stopped)      bits="?:?:?:?:1:*" ;;
 380         (notstopped)   bits="?:?:?:?:0:*" ;;
 381         (frozen)       fpat='^[[:space:]]+frozen[[:space:]]+1$' ;;
 382         (unfrozen)     fpat='^[[:space:]]+frozen[[:space:]]+0$' ;;
 383         (monon)        mpat='^Monitoring mode:ACTIVE \(0\)$' ;;
 384         (monoff)       mpat='^Monitoring mode:DISABLED \(1\)$' ;;
 385         *)
 386             echo "node_has_status: unknown status \"$status\""
 387             return 1
 388     esac
 389
 390     if [ -n "$bits" ] ; then
 391         local out x line
 392
 393         out=$(ctdb -Y status 2>&1) || return 1
 394
 395         {
 396             read x
 397             while read line ; do
 398                 [ "${line#:${pnn}:*:${bits}}" != "$line" ] && return 0
 399             done
 400             return 1
 401         } <<<"$out" # Yay bash!
 402     elif [ -n "$fpat" ] ; then
 403         ctdb statistics -n "$pnn" | egrep -q "$fpat"
 404     elif [ -n "$mpat" ] ; then
 405         ctdb getmonmode -n "$pnn" | egrep -q "$mpat"
 406     else
 407         echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
 408         return 1
 409     fi
 410 }
 411
 412 wait_until_node_has_status ()
 413 {
 414     local pnn="$1"
 415     local status="$2"
 416     local timeout="${3:-30}"
 417
 418     echo "Waiting until node $pnn has status \"$status\"..."
 419
 420     onnode any $CTDB_TEST_WRAPPER wait_until $timeout node_has_status "$pnn" "$status"
 421 }
 422
 423 # Useful for superficially testing IP failover.
 424 # IPs must be on nodes matching nodeglob.
 425 ips_are_on_nodeglob ()
 426 {
 427     local nodeglob="$1" ; shift
 428     local ips="$*"
 429
 430     local out
 431
 432     try_command_on_node 1 ctdb ip -n all
 433
 434     while read ip pnn ; do
 435         for check in $ips ; do
 436             if [ "$check" = "$ip" ] ; then
 437                 case "$pnn" in
 438                     ($nodeglob) : ;;
 439                     (*) return 1  ;;
 440                 esac
 441                 ips="${ips/${ip}}" # Remove from list
 442             fi
 443         done
 444     done <<<"$out" # bashism to avoid problem setting variable in pipeline.
 445
 446     ips="${ips// }" # Remove any spaces.
 447     [ -z "$ips" ]
 448 }
 449
 450 wait_until_ips_are_on_nodeglob ()
 451 {
 452     echo "Waiting for IPs to fail over..."
 453
 454     wait_until 60 ips_are_on_nodeglob "$@"
 455 }
 456
 457 get_src_socket ()
 458 {
 459     local proto="$1"
 460     local dst_socket="$2"
 461     local pid="$3"
 462     local prog="$4"
 463
 464     local pat="^${proto}[[:space:]]+[[:digit:]]+[[:space:]]+[[:digit:]]+[[:space:]]+[^[:space:]]+[[:space:]]+${dst_socket//./\\.}[[:space:]]+ESTABLISHED[[:space:]]+${pid}/${prog}[[:space:]]*\$"
 465     out=$(netstat -tanp |
 466         egrep "$pat" |
 467         awk '{ print $4 }')
 468
 469     [ -n "$out" ]
 470 }
 471
 472 wait_until_get_src_socket ()
 473 {
 474     local proto="$1"
 475     local dst_socket="$2"
 476     local pid="$3"
 477     local prog="$4"
 478
 479     echo "Waiting for ${prog} to establish connection to ${dst_socket}..."
 480
 481     wait_until 5 get_src_socket "$@"
 482 }
 483
 484 #######################################
 485
 486 # filename will be in $tcpdump_filename, pid in $tcpdump_pid
 487 tcpdump_start ()
 488 {
 489     tcpdump_filter="$1" # global
 490
 491     echo "Running tcpdump..."
 492     tcpdump_filename=$(mktemp)
 493     ctdb_test_exit_hook_add "rm -f $tcpdump_filename"
 494
 495     # The only way of being sure that tcpdump is listening is to send
 496     # some packets that it will see.  So we use dummy pings - the -U
 497     # option to tcpdump ensures that packets are flushed to the file
 498     # as they are captured.
 499     local dummy_addr="127.3.2.1"
 500     local dummy="icmp and dst host ${dummy_addr} and icmp[icmptype] == icmp-echo"
 501     tcpdump -n -p -s 0 -e -U -w $tcpdump_filename -i any "($tcpdump_filter) or ($dummy)" &
 502     ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
 503
 504     echo "Waiting for tcpdump output file to be ready..."
 505     ping -q "$dummy_addr" >/dev/null 2>&1 &
 506     ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
 507
 508     tcpdump_listen_for_dummy ()
 509     {
 510         tcpdump -n -r $tcpdump_filename -c 1 "$dummy" >/dev/null 2>&1
 511     }
 512
 513     wait_until 10 tcpdump_listen_for_dummy
 514 }
 515
 516 # By default, wait for 1 matching packet.
 517 tcpdump_wait ()
 518 {
 519     local count="${1:-1}"
 520     local filter="${2:-${tcpdump_filter}}"
 521
 522     tcpdump_check ()
 523     {
 524         local found=$(tcpdump -n -r $tcpdump_filename "$filter" 2>/dev/null | wc -l)
 525         [ $found -ge $count ]
 526     }
 527
 528     echo "Waiting for tcpdump to capture some packets..."
 529     if ! wait_until 30 tcpdump_check ; then
 530         echo "DEBUG:"
 531         local i
 532         for i in "onnode -q 0 ctdb status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
 533             echo "$i"
 534             $i || true
 535         done
 536         return 1
 537     fi
 538 }
 539
 540 tcpdump_show ()
 541 {
 542     local filter="${1:-${tcpdump_filter}}"
 543
 544     tcpdump -n -r $tcpdump_filename  "$filter" 2>/dev/null
 545 }
 546
 547 tcptickle_sniff_start ()
 548 {
 549     local src="$1"
 550     local dst="$2"
 551
 552     local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
 553     local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
 554     local tickle_ack="${in} and (tcp[tcpflags] & tcp-ack != 0) and (tcp[14] == 4) and (tcp[15] == 210)" # win == 1234
 555     local ack_ack="${out} and (tcp[tcpflags] & tcp-ack != 0)"
 556     tcptickle_reset="${in} and tcp[tcpflags] & tcp-rst != 0"
 557     local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
 558
 559     tcpdump_start "$filter"
 560 }
 561
 562 tcptickle_sniff_wait_show ()
 563 {
 564     tcpdump_wait 1 "$tcptickle_reset"
 565
 566     echo "GOOD: here are some TCP tickle packets:"
 567     tcpdump_show
 568 }
 569
 570 gratarp_sniff_start ()
 571 {
 572     tcpdump_start "arp host ${test_ip}"
 573 }
 574
 575 gratarp_sniff_wait_show ()
 576 {
 577     tcpdump_wait 2
 578
 579     echo "GOOD: this should be the some gratuitous ARPs:"
 580     tcpdump_show
 581 }
 582
 583
 584 #######################################
 585
 586 daemons_stop ()
 587 {
 588     echo "Attempting to politely shutdown daemons..."
 589     onnode 1 ctdb shutdown -n all || true
 590
 591     echo "Sleeping for a while..."
 592     sleep_for 1
 593
 594     if pgrep -f $CTDB_DIR/bin/ctdbd >/dev/null ; then
 595         echo "Killing remaining daemons..."
 596         pkill -f $CTDB_DIR/bin/ctdbd
 597
 598         if pgrep -f $CTDB_DIR/bin/ctdbd >/dev/null ; then
 599             echo "Once more with feeling.."
 600             pkill -9 $CTDB_DIR/bin/ctdbd
 601         fi
 602     fi
 603
 604     local var_dir=$CTDB_DIR/tests/var
 605     rm -rf $var_dir/test.db
 606 }
 607
 608 daemons_setup ()
 609 {
 610     local num_nodes="${CTDB_TEST_NUM_DAEMONS:-2}" # default is 2 nodes
 611
 612     local var_dir=$CTDB_DIR/tests/var
 613
 614     mkdir -p $var_dir/test.db/persistent
 615
 616     local nodes=$var_dir/nodes.txt
 617     local public_addresses=$var_dir/public_addresses.txt
 618     local no_public_addresses=$var_dir/no_public_addresses.txt
 619     rm -f $nodes $public_addresses $no_public_addresses
 620
 621     # If there are (strictly) greater than 2 nodes then we'll randomly
 622     # choose a node to have no public addresses.
 623     local no_public_ips=-1
 624     [ $num_nodes -gt 2 ] && no_public_ips=$(($RANDOM % $num_nodes))
 625     echo "$no_public_ips" >$no_public_addresses
 626
 627     local i
 628     for i in $(seq 1 $num_nodes) ; do
 629         if [ "${CTDB_USE_IPV6}x" != "x" ]; then
 630             echo ::$i >> $nodes
 631             ip addr add ::$i/128 dev lo
 632         else
 633             echo 127.0.0.$i >> $nodes
 634             # 2 public addresses on most nodes, just to make things interesting.
 635             if [ $(($i - 1)) -ne $no_public_ips ] ; then
 636                 echo "192.0.2.$i/24 lo" >> $public_addresses
 637                 echo "192.0.2.$(($i + $num_nodes))/24 lo" >> $public_addresses
 638             fi
 639         fi
 640     done
 641 }
 642
 643 daemons_start_1 ()
 644 {
 645     local pnn="$1"
 646     shift # "$@" gets passed to ctdbd
 647
 648     local var_dir=$CTDB_DIR/tests/var
 649
 650     local nodes=$var_dir/nodes.txt
 651     local public_addresses=$var_dir/public_addresses.txt
 652     local no_public_addresses=$var_dir/no_public_addresses.txt
 653
 654     local no_public_ips=-1
 655     [ -r $no_public_addresses ] && read no_public_ips <$no_public_addresses
 656
 657     if  [ "$no_public_ips" = $pnn ] ; then
 658         echo "Node $no_public_ips will have no public IPs."
 659     fi
 660
 661     local ctdb_options="--reclock=$var_dir/rec.lock --nlist $nodes --nopublicipcheck --event-script-dir=$CTDB_DIR/tests/events.d --logfile=$var_dir/daemons.log -d 0 --dbdir=$var_dir/test.db --dbdir-persistent=$var_dir/test.db/persistent"
 662
 663     if [ $(id -u) -eq 0 ]; then
 664         ctdb_options="$ctdb_options --public-interface=lo"
 665     fi
 666
 667     if [ $pnn -eq $no_public_ips ] ; then
 668         ctdb_options="$ctdb_options --public-addresses=/dev/null"
 669     else
 670         ctdb_options="$ctdb_options --public-addresses=$public_addresses"
 671     fi
 672
 673     # Need full path so we can use "pkill -f" to kill the daemons.
 674     $VALGRIND $CTDB_DIR/bin/ctdbd --socket=$var_dir/sock.$pnn $ctdb_options "$@" ||return 1
 675 }
 676
 677 daemons_start ()
 678 {
 679     # "$@" gets passed to ctdbd
 680
 681     local num_nodes="${CTDB_TEST_NUM_DAEMONS:-2}" # default is 2 nodes
 682
 683     echo "Starting $num_nodes ctdb daemons..."
 684
 685     for i in $(seq 0 $(($num_nodes - 1))) ; do
 686         daemons_start_1 $i "$@"
 687     done
 688
 689     local var_dir=$CTDB_DIR/tests/var
 690
 691     if [ -L /tmp/ctdb.socket -o ! -S /tmp/ctdb.socket ] ; then
 692         ln -sf $var_dir/sock.0 /tmp/ctdb.socket || return 1
 693     fi
 694 }
 695
 696 #######################################
 697
 698 _ctdb_hack_options ()
 699 {
 700     local ctdb_options="$*"
 701
 702     # We really just want to pass CTDB_OPTIONS but on RH
 703     # /etc/sysconfig/ctdb can, and frequently does, set that variable.
 704     # So instead, we hack badly.  We'll add these as we use them.
 705     # Note that these may still be overridden by the above file... but
 706     # we tend to use the exotic options here... so that is unlikely.
 707
 708     case "$ctdb_options" in
 709         *--start-as-stopped*)
 710             export CTDB_START_AS_STOPPED="yes"
 711     esac
 712 }
 713
 714 _restart_ctdb ()
 715 {
 716     _ctdb_hack_options "$@"
 717
 718     if [ -e /etc/redhat-release ] ; then
 719         service ctdb restart
 720     else
 721         /etc/init.d/ctdb restart
 722     fi
 723 }
 724
 725 _ctdb_start ()
 726 {
 727     _ctdb_hack_options "$@"
 728
 729     /etc/init.d/ctdb start
 730 }
 731
 732 setup_ctdb ()
 733 {
 734     if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 735         daemons_setup
 736     fi
 737 }
 738
 739 # Common things to do after starting one or more nodes.
 740 _ctdb_start_post ()
 741 {
 742     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 743
 744     echo "Setting RerecoveryTimeout to 1"
 745     onnode -pq all "ctdb setvar RerecoveryTimeout 1"
 746
 747     # In recent versions of CTDB, forcing a recovery like this blocks
 748     # until the recovery is complete.  Hopefully this will help the
 749     # cluster to stabilise before a subsequent test.
 750     echo "Forcing a recovery..."
 751     onnode -q 0 ctdb recover
 752     sleep_for 1
 753     echo "Forcing a recovery..."
 754     onnode -q 0 ctdb recover
 755
 756     echo "ctdb is ready"
 757 }
 758
 759 # This assumes that ctdbd is not running on the given node.
 760 ctdb_start_1 ()
 761 {
 762     local pnn="$1"
 763     shift # "$@" is passed to ctdbd start.
 764
 765     echo -n "Starting CTDB on node ${pnn}..."
 766
 767     if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 768         daemons_start_1 $pnn "$@"
 769     else
 770         onnode $pnn $CTDB_TEST_WRAPPER _ctdb_start "$@"
 771     fi
 772
 773     # If we're starting only 1 node then we're doing something weird.
 774     ctdb_restart_when_done
 775 }
 776
 777 restart_ctdb ()
 778 {
 779     # "$@" is passed to ctdbd start.
 780
 781     echo -n "Restarting CTDB"
 782     if $ctdb_test_restart_scheduled ; then
 783         echo -n " (scheduled)"
 784     fi
 785     echo "..."
 786
 787     local i=0
 788     while : ; do
 789         if [ -n "$CTDB_NODES_SOCKETS" ] ; then
 790             daemons_stop
 791             daemons_start "$@"
 792         else
 793             onnode -p all $CTDB_TEST_WRAPPER _restart_ctdb "$@"
 794         fi && break
 795
 796         i=$(($i + 1))
 797         [ $i -lt 5 ] || break
 798
 799         echo "That didn't seem to work - sleeping for a while..."
 800         sleep_for 5
 801     done
 802
 803     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 804
 805     echo "Setting RerecoveryTimeout to 1"
 806     onnode -pq all "ctdb setvar RerecoveryTimeout 1"
 807
 808     # In recent versions of CTDB, forcing a recovery like this blocks
 809     # until the recovery is complete.  Hopefully this will help the
 810     # cluster to stabilise before a subsequent test.
 811     echo "Forcing a recovery..."
 812     onnode -q 0 ctdb recover
 813     sleep_for 1
 814     echo "Forcing a recovery..."
 815     onnode -q 0 ctdb recover
 816
 817     echo "ctdb is ready"
 818 }
 819
 820 ctdb_restart_when_done ()
 821 {
 822     ctdb_test_restart_scheduled=true
 823 }
 824
 825 #######################################
 826
 827 install_eventscript ()
 828 {
 829     local script_name="$1"
 830     local script_contents="$2"
 831
 832     if [ -n "$CTDB_TEST_REAL_CLUSTER" ] ; then
 833         # The quoting here is *very* fragile.  However, we do
 834         # experience the joy of installing a short script using
 835         # onnode, and without needing to know the IP addresses of the
 836         # nodes.
 837         onnode all "f=\"\${CTDB_BASE:-/etc/ctdb}/events.d/${script_name}\" ; echo \"Installing \$f\" ; echo '${script_contents}' > \"\$f\" ; chmod 755 \"\$f\""
 838     else
 839         f="${CTDB_DIR}/tests/events.d/${script_name}"
 840         echo "$script_contents" >"$f"
 841         chmod 755 "$f"
 842     fi
 843 }
 844
 845 uninstall_eventscript ()
 846 {
 847     local script_name="$1"
 848
 849     if [ -n "$CTDB_TEST_REAL_CLUSTER" ] ; then
 850         onnode all "rm -vf \"\${CTDB_BASE:-/etc/ctdb}/events.d/${script_name}\""
 851     else
 852         rm -vf "${CTDB_DIR}/tests/events.d/${script_name}"
 853     fi
 854 }