ctdb/config/functions

   1 # Hey Emacs, this is a -*- shell-script -*- !!!
   2
   3 # utility functions for ctdb event scripts
   4
   5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
   6
   7 [ -z "$CTDB_VARDIR" ] && {
   8     if [ -d "/var/lib/ctdb" ] ; then
   9         export CTDB_VARDIR="/var/lib/ctdb"
  10     else
  11         export CTDB_VARDIR="/var/ctdb"
  12     fi
  13 }
  14 [ -z "$CTDB_ETCDIR" ] && {
  15     export CTDB_ETCDIR="/etc"
  16 }
  17
  18 #######################################
  19 # pull in a system config file, if any
  20 _loadconfig() {
  21
  22     if [ -z "$1" ] ; then
  23         foo="${service_config:-${service_name}}"
  24         if [ -n "$foo" ] ; then
  25             loadconfig "$foo"
  26             return
  27         fi
  28     fi
  29
  30     if [ "$1" != "ctdb" ] ; then
  31         loadconfig "ctdb"
  32     fi
  33
  34     if [ -z "$1" ] ; then
  35         return
  36     fi
  37
  38     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
  39         . $CTDB_ETCDIR/sysconfig/$1
  40     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
  41         . $CTDB_ETCDIR/default/$1
  42     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
  43         . $CTDB_BASE/sysconfig/$1
  44     fi
  45 }
  46
  47 loadconfig () {
  48     _loadconfig "$@"
  49 }
  50
  51 ##############################################################
  52
  53 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
  54 # configuration file.
  55 debug ()
  56 {
  57     if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
  58         # If there are arguments then echo them.  Otherwise expect to
  59         # use stdin, which allows us to pass lots of debug using a
  60         # here document.
  61         if [ -n "$1" ] ; then
  62             echo "DEBUG: $*"
  63         elif ! tty -s ; then
  64             sed -e 's@^@DEBUG: @'
  65         fi
  66     fi
  67 }
  68
  69 die ()
  70 {
  71     _msg="$1"
  72     _rc="${2:-1}"
  73
  74     echo "$_msg"
  75     exit $_rc
  76 }
  77
  78 # Log given message or stdin to either syslog or a CTDB log file
  79 # $1 is the tag passed to logger if syslog is in use.
  80 script_log ()
  81 {
  82     _tag="$1" ; shift
  83
  84     _using_syslog=false
  85     if [ "$CTDB_SYSLOG" = "yes" -o -z "$CTDB_LOGFILE" ] ; then
  86         _using_syslog=true
  87     fi
  88     case "$CTDB_OPTIONS" in
  89         *--syslog*) _using_syslog=true ;;
  90     esac
  91
  92     if $_using_syslog ; then
  93         logger -t "ctdbd: ${_tag}" $*
  94     else
  95         {
  96             if [ -n "$*" ] ; then
  97                 echo "$*"
  98             else
  99                 cat
 100             fi
 101         } >>"${CTDB_LOGFILE:-/var/log/log.ctdb}"
 102     fi
 103 }
 104
 105 # When things are run in the background in an eventscript then logging
 106 # output might get lost.  This is the "solution".  :-)
 107 background_with_logging ()
 108 {
 109     (
 110         "$@" 2>&1 </dev/null |
 111         script_log "${script_name}&"
 112     )&
 113
 114     return 0
 115 }
 116
 117 ##############################################################
 118 # check number of args for different events
 119 ctdb_check_args ()
 120 {
 121     case "$1" in
 122         takeip|releaseip)
 123             if [ $# != 4 ]; then
 124                 echo "ERROR: must supply interface, IP and maskbits"
 125                 exit 1
 126             fi
 127             ;;
 128         updateip)
 129             if [ $# != 5 ]; then
 130                 echo "ERROR: must supply old interface, new interface, IP and maskbits"
 131                 exit 1
 132             fi
 133             ;;
 134     esac
 135 }
 136
 137 ##############################################################
 138 # determine on what type of system (init style) we are running
 139 detect_init_style() {
 140     # only do detection if not already set:
 141     test "x$CTDB_INIT_STYLE" != "x" && return
 142
 143     if [ -x /sbin/startproc ]; then
 144         CTDB_INIT_STYLE="suse"
 145     elif [ -x /sbin/start-stop-daemon ]; then
 146         CTDB_INIT_STYLE="debian"
 147     else
 148         CTDB_INIT_STYLE="redhat"
 149     fi
 150 }
 151
 152 ######################################################
 153 # simulate /sbin/service on platforms that don't have it
 154 # _service() makes it easier to hook the service() function for
 155 # testing.
 156 _service ()
 157 {
 158   _service_name="$1"
 159   _op="$2"
 160
 161   # do nothing, when no service was specified
 162   [ -z "$_service_name" ] && return
 163
 164   if [ -x /sbin/service ]; then
 165       $_nice /sbin/service "$_service_name" "$_op"
 166   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
 167       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
 168   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
 169       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
 170   fi
 171 }
 172
 173 service()
 174 {
 175     _nice=""
 176     _service "$@"
 177 }
 178
 179 ######################################################
 180 # simulate /sbin/service (niced) on platforms that don't have it
 181 nice_service()
 182 {
 183     _nice="nice"
 184     _service "$@"
 185 }
 186
 187 ######################################################
 188 # wrapper around /proc/ settings to allow them to be hooked
 189 # for testing
 190 # 1st arg is relative path under /proc/, 2nd arg is value to set
 191 set_proc ()
 192 {
 193     echo "$2" >"/proc/$1"
 194 }
 195
 196 ######################################################
 197 # wrapper around getting file contents from /proc/ to allow
 198 # this to be hooked for testing
 199 # 1st arg is relative path under /proc/
 200 get_proc ()
 201 {
 202     cat "/proc/$1"
 203 }
 204
 205 ######################################################
 206 # Check that an RPC service is healthy -
 207 # this includes allowing a certain number of failures
 208 # before marking the NFS service unhealthy.
 209 #
 210 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 211 #
 212 # each triple is a set of 3 arguments: an operator, a
 213 # fail count limit and an action string.
 214 #
 215 # For example:
 216 #
 217 #       nfs_check_rpc_service "lockd" \
 218 #           -ge 15 "verbose restart unhealthy" \
 219 #           -eq 10 "restart:bs"
 220 #
 221 # says that if lockd is down for 15 iterations then do
 222 # a verbose restart of lockd and mark the node unhealthy.
 223 # Before this, after 10 iterations of failure, the
 224 # service is restarted silently in the background.
 225 # Order is important: the number of failures need to be
 226 # specified in reverse order because processing stops
 227 # after the first condition that is true.
 228 ######################################################
 229 nfs_check_rpc_service ()
 230 {
 231     _prog_name="$1" ; shift
 232
 233     if _nfs_check_rpc_common "$_prog_name" ; then
 234         return
 235     fi
 236
 237     while [ -n "$3" ] ; do
 238         if _nfs_check_rpc_action "$1" "$2" "$3" ; then
 239             break
 240         fi
 241         shift 3
 242     done
 243 }
 244
 245 # The new way of doing things...
 246 nfs_check_rpc_services ()
 247 {
 248     # Files must end with .check - avoids editor backups, RPM fu, ...
 249     for _f in "${CTDB_BASE}/nfs-rpc-checks.d/"[0-9][0-9].*.check ; do
 250         _t="${_f%.check}"
 251         _prog_name="${_t##*/[0-9][0-9].}"
 252
 253         if _nfs_check_rpc_common "$_prog_name" ; then
 254             # This RPC service is up, check next service...
 255             continue
 256         fi
 257
 258         # Check each line in the file in turn until one of the limit
 259         # checks is hit...
 260         while read _cmp _lim _rest ; do
 261             # Skip comments
 262             case "$_cmp" in
 263                 \#*) continue ;;
 264             esac
 265
 266             if _nfs_check_rpc_action "$_cmp" "$_lim" "$_rest" ; then
 267                 # Limit was hit on this line, no further checking...
 268                 break
 269             fi
 270         done <"$_f"
 271     done
 272 }
 273
 274 _nfs_check_rpc_common ()
 275 {
 276     _prog_name="$1"
 277
 278     # Some platforms don't have separate programs for all services.
 279     case "$_prog_name" in
 280         statd)
 281             which "rpc.${_prog_name}" >/dev/null 2>&1 || return 0
 282     esac
 283
 284     case "$_prog_name" in
 285         nfsd)
 286             _rpc_prog=nfs
 287             _version=3
 288             ;;
 289         mountd)
 290             _rpc_prog=mountd
 291             _version=1
 292             ;;
 293         rquotad)
 294             _rpc_prog=rquotad
 295             _version=1
 296             ;;
 297         lockd)
 298             _rpc_prog=nlockmgr
 299             _version=4
 300             ;;
 301         statd)
 302             _rpc_prog=status
 303             _version=1
 304             ;;
 305         *)
 306             echo "Internal error: unknown RPC program \"$_prog_name\"."
 307             exit 1
 308     esac
 309
 310     _service_name="nfs_${_prog_name}"
 311
 312     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
 313         ctdb_counter_init "$_service_name"
 314         return 0
 315     fi
 316
 317     ctdb_counter_incr "$_service_name"
 318
 319     return 1
 320 }
 321
 322 _nfs_check_rpc_action ()
 323 {
 324     _cmp="$1"
 325     _limit="$2"
 326     _actions="$3"
 327
 328     if ctdb_check_counter "quiet" "$_cmp" "$_limit" "$_service_name" ; then
 329         return 1
 330     fi
 331
 332     for _action in $_actions ; do
 333         case "$_action" in
 334             verbose)
 335                 echo "$ctdb_check_rpc_out"
 336                 ;;
 337             restart)
 338                 _nfs_restart_rpc_service "$_prog_name"
 339                 ;;
 340             restart:b)
 341                 _nfs_restart_rpc_service "$_prog_name" true
 342                 ;;
 343             unhealthy)
 344                 exit 1
 345                 ;;
 346             *)
 347                 echo "Internal error: unknown action \"$_action\"."
 348                 exit 1
 349         esac
 350     done
 351
 352     return 0
 353 }
 354
 355 _nfs_restart_rpc_service ()
 356 {
 357     _prog_name="$1"
 358     _background="${2:-false}"
 359
 360     if $_background ; then
 361         _maybe_background="background_with_logging"
 362     else
 363         _maybe_background=""
 364     fi
 365
 366     _p="rpc.${_prog_name}"
 367
 368     case "$_prog_name" in
 369         nfsd)
 370             echo "Trying to restart NFS service"
 371             $_maybe_background startstop_nfs restart
 372             ;;
 373         mountd)
 374             echo "Trying to restart $_prog_name [${_p}]"
 375             killall -q -9 "$_p"
 376             $_maybe_background $_p ${MOUNTD_PORT:+-p} $MOUNTD_PORT
 377             ;;
 378         rquotad)
 379             echo "Trying to restart $_prog_name [${_p}]"
 380             killall -q -9 "$_p"
 381             $_maybe_background $_p ${RQUOTAD_PORT:+-p} $RQUOTAD_PORT
 382             ;;
 383         lockd)
 384             echo "Trying to restart lock manager service"
 385             $_maybe_background startstop_nfslock restart
 386             ;;
 387         statd)
 388             echo "Trying to restart $_prog_name [${_p}]"
 389             killall -q -9 "$_p"
 390             $_maybe_background $_p \
 391                 ${STATD_HOSTNAME:+-n} $STATD_HOSTNAME \
 392                 ${STATD_PORT:+-p} $STATD_PORT \
 393                 ${STATD_OUTGOING_PORT:+-o} $STATD_OUTGOING_PORT
 394             ;;
 395         *)
 396             echo "Internal error: unknown RPC program \"$_prog_name\"."
 397             exit 1
 398     esac
 399 }
 400
 401 ######################################################
 402 # check that a rpc server is registered with portmap
 403 # and responding to requests
 404 # usage: ctdb_check_rpc SERVICE_NAME VERSION
 405 ######################################################
 406 ctdb_check_rpc ()
 407 {
 408     progname="$1"
 409     version="$2"
 410
 411     _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
 412
 413     if ! ctdb_check_rpc_out=$(rpcinfo -u $_localhost $progname $version 2>&1) ; then
 414         ctdb_check_rpc_out="ERROR: $progname failed RPC check:
 415 $ctdb_check_rpc_out"
 416         echo "$ctdb_check_rpc_out"
 417         return 1
 418     fi
 419 }
 420
 421 ######################################################
 422 # Ensure $service_name is set
 423 assert_service_name ()
 424 {
 425     [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
 426 }
 427
 428 ######################################################
 429 # check a set of directories is available
 430 # return 1 on a missing directory
 431 # directories are read from stdin
 432 ######################################################
 433 ctdb_check_directories_probe()
 434 {
 435     while IFS="" read d ; do
 436         case "$d" in
 437             *%*)
 438                 continue
 439                 ;;
 440             *)
 441                 [ -d "${d}/." ] || return 1
 442         esac
 443     done
 444 }
 445
 446 ######################################################
 447 # check a set of directories is available
 448 # directories are read from stdin
 449 ######################################################
 450 ctdb_check_directories()
 451 {
 452     ctdb_check_directories_probe || {
 453         echo "ERROR: $service_name directory \"$d\" not available"
 454         exit 1
 455     }
 456 }
 457
 458 ######################################################
 459 # check a set of tcp ports
 460 # usage: ctdb_check_tcp_ports <ports...>
 461 ######################################################
 462
 463 # This flag file is created when a service is initially started.  It
 464 # is deleted the first time TCP port checks for that service succeed.
 465 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 466 # message if a port check fails.
 467 _ctdb_check_tcp_common ()
 468 {
 469     assert_service_name
 470     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 471 }
 472
 473 ctdb_check_tcp_init ()
 474 {
 475     _ctdb_check_tcp_common
 476     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
 477     touch "$_ctdb_service_started_file"
 478 }
 479
 480 ctdb_check_tcp_ports()
 481 {
 482     if [ -z "$1" ] ; then
 483         echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
 484         exit 1
 485     fi
 486
 487     # Set default value for CTDB_TCP_PORT_CHECKS if unset.
 488     # If any of these defaults are unsupported then this variable can
 489     # be overridden in /etc/sysconfig/ctdb or via a file in
 490     # /etc/ctdb/rc.local.d/.
 491     : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
 492
 493     for _c in $CTDB_TCP_PORT_CHECKERS ; do
 494         ctdb_check_tcp_ports_$_c "$@"
 495         case "$?" in
 496             0)
 497                 _ctdb_check_tcp_common
 498                 rm -f "$_ctdb_service_started_file"
 499                 return 0
 500                 ;;
 501             1)
 502                 _ctdb_check_tcp_common
 503                 if [ ! -f "$_ctdb_service_started_file" ] ; then
 504                     echo "ERROR: $service_name tcp port $_p is not responding"
 505                     debug <<EOF
 506 $ctdb_check_tcp_ports_debug
 507 EOF
 508                 else
 509                     echo "INFO: $service_name tcp port $_p is not responding"
 510                 fi
 511
 512                 return 1
 513                 ;;
 514             127)
 515                 debug <<EOF
 516 ctdb_check_ports - checker $_c not implemented
 517 output from checker was:
 518 $ctdb_check_tcp_ports_debug
 519 EOF
 520                 ;;
 521             *)
 522
 523         esac
 524     done
 525
 526     echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
 527
 528     return 127
 529 }
 530
 531 ctdb_check_tcp_ports_netstat ()
 532 {
 533     _cmd='netstat -l -t -n'
 534     _ns=$($_cmd 2>&1)
 535     if [ $? -eq 127 ] ; then
 536         # netstat probably not installed - unlikely?
 537         ctdb_check_tcp_ports_debug="$_ns"
 538         return 127
 539     fi
 540
 541     for _p ; do  # process each function argument (port)
 542         for _a in '0\.0\.0\.0' '::' ; do
 543             _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
 544             if echo "$_ns" | grep -E -q "$_pat" ; then
 545                 # We matched the port, so process next port
 546                 continue 2
 547             fi
 548         done
 549
 550         # We didn't match the port, so flag an error.
 551         ctdb_check_tcp_ports_debug="$_cmd shows this output:
 552 $_ns"
 553         return 1
 554     done
 555
 556     return 0
 557 }
 558
 559 ctdb_check_tcp_ports_nmap ()
 560 {
 561     # nmap wants a comma-separated list of ports
 562     _ports=""
 563     for _p ; do
 564         _ports="${_ports}${_ports:+,}${_p}"
 565     done
 566
 567     _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
 568
 569     _nmap_out=$($_cmd 2>&1)
 570     if [ $? -eq 127 ] ; then
 571         # nmap probably not installed
 572         ctdb_check_tcp_ports_debug="$_nmap_out"
 573         return 127
 574     fi
 575
 576     # get the port-related output
 577     _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
 578
 579     for _p ; do
 580         # looking for something like this:
 581         #  445/open/tcp//microsoft-ds///
 582         # possibly followed by a comma
 583         _t="$_p/open/tcp//"
 584         case "$_port_info" in
 585             # The info we're after must be either at the beginning of
 586             # the string or it must follow a space.
 587             $_t*|*\ $_t*) : ;;
 588             *)
 589                 # Nope, flag an error...
 590                 ctdb_check_tcp_ports_debug="$_cmd shows this output:
 591 $_nmap_out"
 592                 return 1
 593         esac
 594     done
 595
 596     return 0
 597 }
 598
 599 # Use the new "ctdb checktcpport" command to check the port.
 600 # This is very cheap.
 601 ctdb_check_tcp_ports_ctdb ()
 602 {
 603     for _p ; do  # process each function argument (port)
 604         _cmd="ctdb checktcpport $_p"
 605         _out=$($_cmd 2>&1)
 606         _ret=$?
 607         case "$_ret" in
 608             0)
 609                 ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
 610                 return 1
 611                 ;;
 612             98)
 613                 # Couldn't bind, something already listening, next port...
 614                 continue
 615                 ;;
 616             *)
 617                 ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
 618 $_out"
 619                 # assume not implemented
 620                 return 127
 621         esac
 622     done
 623
 624     return 0
 625 }
 626
 627 ######################################################
 628 # check a unix socket
 629 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 630 ######################################################
 631 ctdb_check_unix_socket() {
 632     socket_path="$1"
 633     [ -z "$socket_path" ] && return
 634
 635     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
 636         echo "ERROR: $service_name socket $socket_path not found"
 637         return 1
 638     fi
 639 }
 640
 641 ######################################################
 642 # check a command returns zero status
 643 # usage: ctdb_check_command <command>
 644 ######################################################
 645 ctdb_check_command ()
 646 {
 647     _out=$("$@" 2>&1) || {
 648         echo "ERROR: $* returned error"
 649         echo "$_out" | debug
 650         exit 1
 651     }
 652 }
 653
 654 ################################################
 655 # kill off any TCP connections with the given IP
 656 ################################################
 657 kill_tcp_connections ()
 658 {
 659     _ip="$1"
 660
 661     _oneway=false
 662     if [ "$2" = "oneway" ] ; then
 663         _oneway=true
 664     fi
 665
 666     get_tcp_connections_for_ip "$_ip" | {
 667         _killcount=0
 668         _connections=""
 669         _nl="
 670 "
 671         while read _dst _src; do
 672             _destport="${_dst##*:}"
 673             __oneway=$_oneway
 674             case $_destport in
 675                 # we only do one-way killtcp for CIFS
 676                 139|445) __oneway=true ;;
 677             esac
 678
 679             echo "Killing TCP connection $_src $_dst"
 680             _connections="${_connections}${_nl}${_src} ${_dst}"
 681             if ! $__oneway ; then
 682                 _connections="${_connections}${_nl}${_dst} ${_src}"
 683             fi
 684
 685             _killcount=$(($_killcount + 1))
 686         done
 687
 688         if [ $_killcount -eq 0 ] ; then
 689             return
 690         fi
 691
 692         echo "$_connections" | ctdb killtcp || {
 693             echo "Failed to send killtcp control"
 694             return
 695         }
 696
 697         _count=0
 698         while : ; do
 699             _remaining=$(get_tcp_connections_for_ip $_ip | wc -l)
 700
 701             if [ $_remaining -eq 0 ] ; then
 702                 echo "Killed $_killcount TCP connections to released IP $_ip"
 703                 return
 704             fi
 705
 706             _count=$(($_count + 1))
 707             if [ $_count -gt 3 ] ; then
 708                 echo "Timed out killing tcp connections for IP $_ip"
 709                 return
 710             fi
 711
 712             echo "Waiting for $_remaining connections to be killed for IP $_ip"
 713             sleep 1
 714         done
 715     }
 716 }
 717
 718 ##################################################################
 719 # kill off the local end for any TCP connections with the given IP
 720 ##################################################################
 721 kill_tcp_connections_local_only ()
 722 {
 723     kill_tcp_connections "$1" "oneway"
 724 }
 725
 726 ##################################################################
 727 # tickle any TCP connections with the given IP
 728 ##################################################################
 729 tickle_tcp_connections ()
 730 {
 731     _ip="$1"
 732
 733     get_tcp_connections_for_ip "$_ip" |
 734     {
 735         _failed=false
 736
 737         while read dest src; do
 738             echo "Tickle TCP connection $src $dest"
 739             ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true
 740             echo "Tickle TCP connection $dest $src"
 741             ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true
 742         done
 743
 744         if $_failed ; then
 745             echo "Failed to send tickle control"
 746         fi
 747     }
 748 }
 749
 750 get_tcp_connections_for_ip ()
 751 {
 752     _ip="$1"
 753
 754     netstat -tn | awk -v ip=$_ip \
 755         'index($1, "tcp") == 1 && \
 756          (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \
 757          && $6 == "ESTABLISHED" \
 758          {print $4" "$5}'
 759 }
 760
 761 ########################################################
 762 # start/stop the Ganesha nfs service
 763 ########################################################
 764 startstop_ganesha()
 765 {
 766     _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
 767     case "$1" in
 768         start)
 769             service "$_service_name" start
 770             ;;
 771         stop)
 772             service "$_service_name" stop
 773             ;;
 774         restart)
 775             service "$_service_name" restart
 776             ;;
 777     esac
 778 }
 779
 780 ########################################################
 781 # start/stop the nfs service on different platforms
 782 ########################################################
 783 startstop_nfs() {
 784         PLATFORM="unknown"
 785         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 786                 PLATFORM="sles"
 787         }
 788         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 789                 PLATFORM="rhel"
 790         }
 791
 792         case $PLATFORM in
 793         sles)
 794                 case $1 in
 795                 start)
 796                         service nfsserver start
 797                         ;;
 798                 stop)
 799                         service nfsserver stop > /dev/null 2>&1
 800                         ;;
 801                 restart)
 802                         set_proc "fs/nfsd/threads" 0
 803                         service nfsserver stop > /dev/null 2>&1
 804                         pkill -9 nfsd
 805                         nfs_dump_some_threads
 806                         service nfsserver start
 807                         ;;
 808                 esac
 809                 ;;
 810         rhel)
 811                 case $1 in
 812                 start)
 813                         service nfslock start
 814                         service nfs start
 815                         ;;
 816                 stop)
 817                         service nfs stop
 818                         service nfslock stop
 819                         ;;
 820                 restart)
 821                         set_proc "fs/nfsd/threads" 0
 822                         service nfs stop > /dev/null 2>&1
 823                         service nfslock stop > /dev/null 2>&1
 824                         pkill -9 nfsd
 825                         nfs_dump_some_threads
 826                         service nfslock start
 827                         service nfs start
 828                         ;;
 829                 esac
 830                 ;;
 831         *)
 832                 echo "Unknown platform. NFS is not supported with ctdb"
 833                 exit 1
 834                 ;;
 835         esac
 836 }
 837
 838 # Dump up to the configured number of nfsd thread backtraces.
 839 nfs_dump_some_threads ()
 840 {
 841     [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
 842
 843     # Optimisation to avoid running an unnecessary pidof
 844     [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
 845
 846     _count=0
 847     for _pid in $(pidof nfsd) ; do
 848         [ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
 849
 850         # Do this first to avoid racing with thread exit
 851         _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
 852         if [ -n "$_stack" ] ; then
 853             echo "Stack trace for stuck nfsd thread [${_pid}]:"
 854             echo "$_stack"
 855             _count=$(($_count + 1))
 856         fi
 857     done
 858 }
 859
 860 ########################################################
 861 # start/stop the nfs lockmanager service on different platforms
 862 ########################################################
 863 startstop_nfslock() {
 864         PLATFORM="unknown"
 865         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 866                 PLATFORM="sles"
 867         }
 868         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 869                 PLATFORM="rhel"
 870         }
 871
 872         case $PLATFORM in
 873         sles)
 874                 # for sles there is no service for lockmanager
 875                 # so we instead just shutdown/restart nfs
 876                 case $1 in
 877                 start)
 878                         service nfsserver start
 879                         ;;
 880                 stop)
 881                         service nfsserver stop > /dev/null 2>&1
 882                         ;;
 883                 restart)
 884                         service nfsserver stop > /dev/null 2>&1
 885                         service nfsserver start
 886                         ;;
 887                 esac
 888                 ;;
 889         rhel)
 890                 case $1 in
 891                 start)
 892                         service nfslock start
 893                         ;;
 894                 stop)
 895                         service nfslock stop > /dev/null 2>&1
 896                         ;;
 897                 restart)
 898                         service nfslock stop > /dev/null 2>&1
 899                         service nfslock start
 900                         ;;
 901                 esac
 902                 ;;
 903         *)
 904                 echo "Unknown platform. NFS locking is not supported with ctdb"
 905                 exit 1
 906                 ;;
 907         esac
 908 }
 909
 910 # Periodically update the statd database
 911 nfs_statd_update ()
 912 {
 913     _update_period="$1"
 914
 915     _statd_update_trigger="$service_state_dir/update-trigger"
 916     [ -f "$_statd_update_trigger" ] || touch "$_statd_update_trigger"
 917
 918     _last_update=$(stat --printf="%Y" "$_statd_update_trigger")
 919     _current_time=$(date +"%s")
 920     if [ $(( $_current_time - $_last_update)) -ge $_update_period ] ; then
 921         touch "$_statd_update_trigger"
 922         $CTDB_BASE/statd-callout updatelocal &
 923         $CTDB_BASE/statd-callout updateremote &
 924     fi
 925 }
 926
 927 add_ip_to_iface()
 928 {
 929     _iface=$1
 930     _ip=$2
 931     _maskbits=$3
 932
 933     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 934     mkdir -p "${_lockfile%/*}" # dirname
 935     [ -f "$_lockfile" ] || touch "$_lockfile"
 936
 937     (
 938         # Note: use of return/exit/die() below only gets us out of the
 939         # sub-shell, which is actually what we want.  That is, the
 940         # function should just return non-zero.
 941
 942         flock --timeout 30 0 || \
 943             die "add_ip_to_iface: unable to get lock for ${_iface}"
 944
 945         # Ensure interface is up
 946         ip link set "$_iface" up || \
 947             die "Failed to bringup interface $_iface"
 948
 949         ip addr add "$_ip/$_maskbits" brd + dev "$_iface" || \
 950             die "Failed to add $_ip/$_maskbits on dev $_iface"
 951     ) <"$_lockfile"
 952
 953     # Do nothing here - return above only gets us out of the subshell
 954     # and doing anything here will affect the return code.
 955 }
 956
 957 delete_ip_from_iface()
 958 {
 959     _iface=$1
 960     _ip=$2
 961     _maskbits=$3
 962
 963     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 964     mkdir -p "${_lockfile%/*}" # dirname
 965     [ -f "$_lockfile" ] || touch "$_lockfile"
 966
 967     (
 968         # Note: use of return/exit/die() below only gets us out of the
 969         # sub-shell, which is actually what we want.  That is, the
 970         # function should just return non-zero.
 971
 972         flock --timeout 30 0 || \
 973             die "delete_ip_from_iface: unable to get lock for ${_iface}"
 974
 975         _im="$_ip/$_maskbits"  # shorthand for readability
 976
 977         # "ip addr del" will delete all secondary IPs if this is the
 978         # primary.  To work around this _very_ annoying behaviour we
 979         # have to keep a record of the secondaries and re-add them
 980         # afterwards.  Yuck!
 981
 982         _secondaries=""
 983         if ip addr list dev "$_iface" primary | grep -Fq "inet $_im " ; then
 984             _secondaries=$(ip addr list dev "$_iface" secondary | \
 985                 awk '$1 == "inet" { print $2 }')
 986         fi
 987
 988         local _rc=0
 989         ip addr del "$_im" dev "$_iface" || {
 990             echo "Failed to del $_ip on dev $_iface"
 991             _rc=1
 992         }
 993
 994         if [ -n "$_secondaries" ] ; then
 995             for _i in $_secondaries; do
 996                 if ip addr list dev "$_iface" | grep -Fq "inet $_i" ; then
 997                     echo "Kept secondary $_i on dev $_iface"
 998                 else
 999                     echo "Re-adding secondary address $_i to dev $_iface"
1000                     ip addr add $_i brd + dev $_iface || {
1001                         echo "Failed to re-add address $_i to dev $_iface"
1002                         _rc=1
1003                     }
1004                 fi
1005             done
1006         fi
1007
1008         return $_rc
1009     ) <"$_lockfile"
1010
1011     # Do nothing here - return above only gets us out of the subshell
1012     # and doing anything here will affect the return code.
1013 }
1014
1015 # If the given IP is hosted then print 2 items: maskbits and iface
1016 ip_maskbits_iface ()
1017 {
1018     _addr="$1"
1019
1020     ip addr show to "${_addr}/32" 2>/dev/null | \
1021         awk '$1 == "inet" { print gensub(".*/", "", 1, $2), $NF }'
1022 }
1023
1024 drop_ip ()
1025 {
1026     _addr="${1%/*}"  # Remove optional maskbits
1027
1028     set -- $(ip_maskbits_iface $_addr)
1029     if [ -n "$1" ] ; then
1030         _maskbits="$1"
1031         _iface="$2"
1032         echo "Removing public address $_addr/$_maskbits from device $_iface"
1033         delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
1034     fi
1035 }
1036
1037 drop_all_public_ips ()
1038 {
1039     while read _ip _x ; do
1040         drop_ip "$_ip"
1041     done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
1042 }
1043
1044 ########################################################
1045 # Simple counters
1046 _ctdb_counter_common () {
1047     _service_name="${1:-${service_name:-${script_name}}}"
1048     _counter_file="$ctdb_fail_dir/$_service_name"
1049     mkdir -p "${_counter_file%/*}" # dirname
1050 }
1051 ctdb_counter_init () {
1052     _ctdb_counter_common "$1"
1053
1054     >"$_counter_file"
1055 }
1056 ctdb_counter_incr () {
1057     _ctdb_counter_common "$1"
1058
1059     # unary counting!
1060     echo -n 1 >> "$_counter_file"
1061 }
1062 ctdb_check_counter () {
1063     _msg="${1:-error}"  # "error"  - anything else is silent on fail
1064     _op="${2:--ge}"  # an integer operator supported by test
1065     _limit="${3:-${service_fail_limit}}"
1066     shift 3
1067     _ctdb_counter_common "$1"
1068
1069     # unary counting!
1070     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
1071     _hit=false
1072     if [ "$_op" != "%" ] ; then
1073         if [ $_size $_op $_limit ] ; then
1074             _hit=true
1075         fi
1076     else
1077         if [ $(($_size $_op $_limit)) -eq 0 ] ; then
1078             _hit=true
1079         fi
1080     fi
1081     if $_hit ; then
1082         if [ "$_msg" = "error" ] ; then
1083             echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
1084             exit 1
1085         else
1086             return 1
1087         fi
1088     fi
1089 }
1090
1091 ########################################################
1092
1093 ctdb_status_dir="$CTDB_VARDIR/status"
1094 ctdb_fail_dir="$CTDB_VARDIR/failcount"
1095
1096 ctdb_setup_service_state_dir ()
1097 {
1098     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
1099     mkdir -p "$service_state_dir" || {
1100         echo "Error creating state dir \"$service_state_dir\""
1101         exit 1
1102     }
1103 }
1104
1105 ########################################################
1106 # Managed status history, for auto-start/stop
1107
1108 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
1109
1110 _ctdb_managed_common ()
1111 {
1112     _ctdb_managed_file="$ctdb_managed_dir/$service_name"
1113 }
1114
1115 ctdb_service_managed ()
1116 {
1117     _ctdb_managed_common
1118     mkdir -p "$ctdb_managed_dir"
1119     touch "$_ctdb_managed_file"
1120 }
1121
1122 ctdb_service_unmanaged ()
1123 {
1124     _ctdb_managed_common
1125     rm -f "$_ctdb_managed_file"
1126 }
1127
1128 is_ctdb_previously_managed_service ()
1129 {
1130     _ctdb_managed_common
1131     [ -f "$_ctdb_managed_file" ]
1132 }
1133
1134 ########################################################
1135 # Check and set status
1136
1137 log_status_cat ()
1138 {
1139     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
1140 }
1141
1142 ctdb_checkstatus ()
1143 {
1144     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
1145         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
1146         return 1
1147     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
1148         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
1149         return 2
1150     else
1151         return 0
1152     fi
1153 }
1154
1155 ctdb_setstatus ()
1156 {
1157     d="$ctdb_status_dir/$script_name"
1158     case "$1" in
1159         unhealthy|banned)
1160             mkdir -p "$d"
1161             cat "$2" >"$d/$1"
1162             ;;
1163         *)
1164             for i in "banned" "unhealthy" ; do
1165                 rm -f "$d/$i"
1166             done
1167             ;;
1168     esac
1169 }
1170
1171 ##################################################################
1172 # Reconfigure a service on demand
1173
1174 _ctdb_service_reconfigure_common ()
1175 {
1176     _d="$ctdb_status_dir/${service_name}"
1177     mkdir -p "$_d"
1178     _ctdb_service_reconfigure_flag="$_d/reconfigure"
1179 }
1180
1181 ctdb_service_needs_reconfigure ()
1182 {
1183     _ctdb_service_reconfigure_common
1184     [ -e "$_ctdb_service_reconfigure_flag" ]
1185 }
1186
1187 ctdb_service_set_reconfigure ()
1188 {
1189     _ctdb_service_reconfigure_common
1190     >"$_ctdb_service_reconfigure_flag"
1191 }
1192
1193 ctdb_service_unset_reconfigure ()
1194 {
1195     _ctdb_service_reconfigure_common
1196     rm -f "$_ctdb_service_reconfigure_flag"
1197 }
1198
1199 ctdb_service_reconfigure ()
1200 {
1201     echo "Reconfiguring service \"${service_name}\"..."
1202     ctdb_service_unset_reconfigure
1203     service_reconfigure || return $?
1204     ctdb_counter_init
1205 }
1206
1207 # Default service_reconfigure() function does nothing.
1208 service_reconfigure ()
1209 {
1210     :
1211 }
1212
1213 ctdb_reconfigure_try_lock ()
1214 {
1215     _ctdb_service_reconfigure_common
1216     _lock="${_d}/reconfigure_lock"
1217     mkdir -p "${_lock%/*}" # dirname
1218     touch "$_lock"
1219
1220     (
1221         flock 0
1222         # This is overkill but will work if we need to extend this to
1223         # allow certain events to run multiple times in parallel
1224         # (e.g. takeip) and write multiple PIDs to the file.
1225         read _locker_event
1226         if [ -n "$_locker_event" ] ; then
1227             while read _pid ; do
1228                 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1229                     kill -0 "$_pid" 2>/dev/null ; then
1230                     exit 1
1231                 fi
1232             done
1233         fi
1234
1235         printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1236         exit 0
1237     ) <"$_lock"
1238 }
1239
1240 ctdb_replay_monitor_status ()
1241 {
1242     echo "Replaying previous status for this script due to reconfigure..."
1243     # Leading colon (':') is missing in some versions...
1244     _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
1245     # Output looks like this:
1246     # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
1247     # This is the cheapest way of getting fields in the middle.
1248     set -- $(IFS=":" ; echo $_out)
1249     _code="$3"
1250     _status="$4"
1251     # The error output field can include colons so we'll try to
1252     # preserve them.  The weak checking at the beginning tries to make
1253     # this work for both broken (no leading ':') and fixed output.
1254     _out="${_out%:}"
1255     _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
1256     case "$_status" in
1257         OK) : ;;  # Do nothing special.
1258         TIMEDOUT)
1259             # Recast this as an error, since we can't exit with the
1260             # correct negative number.
1261             _code=1
1262             _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1263             ;;
1264         DISABLED)
1265             # Recast this as an OK, since we can't exit with the
1266             # correct negative number.
1267             _code=0
1268             _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1269             ;;
1270         *) : ;;  # Must be ERROR, do nothing special.
1271     esac
1272     if [ -n "$_err_out" ] ; then
1273         echo "$_err_out"
1274     fi
1275     exit $_code
1276 }
1277
1278 ctdb_service_check_reconfigure ()
1279 {
1280     assert_service_name
1281
1282     # We only care about some events in this function.  For others we
1283     # return now.
1284     case "$event_name" in
1285         monitor|ipreallocated|reconfigure) : ;;
1286         *) return 0 ;;
1287     esac
1288
1289     if ctdb_reconfigure_try_lock ; then
1290         # No events covered by this function are running, so proceed
1291         # with gay abandon.
1292         case "$event_name" in
1293             reconfigure)
1294                 (ctdb_service_reconfigure)
1295                 exit $?
1296                 ;;
1297             ipreallocated)
1298                 if ctdb_service_needs_reconfigure ; then
1299                     ctdb_service_reconfigure
1300                 fi
1301                 ;;
1302             monitor)
1303                 if ctdb_service_needs_reconfigure ; then
1304                     ctdb_service_reconfigure
1305                     # Given that the reconfigure might not have
1306                     # resulted in the service being stable yet, we
1307                     # replay the previous status since that's the best
1308                     # information we have.
1309                     ctdb_replay_monitor_status
1310                 fi
1311                 ;;
1312         esac
1313     else
1314         # Somebody else is running an event we don't want to collide
1315         # with.  We proceed with caution.
1316         case "$event_name" in
1317             reconfigure)
1318                 # Tell whoever called us to retry.
1319                 exit 2
1320                 ;;
1321             ipreallocated)
1322                 # Defer any scheduled reconfigure and just run the
1323                 # rest of the ipreallocated event, as per the
1324                 # eventscript.  There's an assumption here that the
1325                 # event doesn't depend on any scheduled reconfigure.
1326                 # This is true in the current code.
1327                 return 0
1328                 ;;
1329             monitor)
1330                 # There is most likely a reconfigure in progress so
1331                 # the service is possibly unstable.  As above, we
1332                 # defer any scheduled reconfigured.  We also replay
1333                 # the previous monitor status since that's the best
1334                 # information we have.
1335                 ctdb_replay_monitor_status
1336                 ;;
1337         esac
1338     fi
1339 }
1340
1341 ##################################################################
1342 # Does CTDB manage this service? - and associated auto-start/stop
1343
1344 ctdb_compat_managed_service ()
1345 {
1346     if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
1347         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1348     fi
1349 }
1350
1351 is_ctdb_managed_service ()
1352 {
1353     assert_service_name
1354
1355     # $t is used just for readability and to allow better accurate
1356     # matching via leading/trailing spaces
1357     t=" $CTDB_MANAGED_SERVICES "
1358
1359     # Return 0 if "<space>$service_name<space>" appears in $t
1360     if [ "${t#* ${service_name} }" != "${t}" ] ; then
1361         return 0
1362     fi
1363
1364     # If above didn't match then update $CTDB_MANAGED_SERVICES for
1365     # backward compatibility and try again.
1366     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
1367     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
1368     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
1369     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
1370     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
1371     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
1372     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
1373     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
1374     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
1375     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
1376
1377     t=" $CTDB_MANAGED_SERVICES "
1378
1379     # Return 0 if "<space>$service_name<space>" appears in $t
1380     [ "${t#* ${service_name} }" != "${t}" ]
1381 }
1382
1383 ctdb_start_stop_service ()
1384 {
1385     assert_service_name
1386
1387     # Allow service-start/service-stop pseudo-events to start/stop
1388     # services when we're not auto-starting/stopping and we're not
1389     # monitoring.
1390     case "$event_name" in
1391         service-start)
1392             if is_ctdb_managed_service ; then
1393                 die 'service-start event not permitted when service is managed'
1394             fi
1395             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1396                 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1397             fi
1398             ctdb_service_start
1399             exit $?
1400             ;;
1401         service-stop)
1402             if is_ctdb_managed_service ; then
1403                 die 'service-stop event not permitted when service is managed'
1404             fi
1405             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1406                 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1407             fi
1408             ctdb_service_stop
1409             exit $?
1410             ;;
1411     esac
1412
1413     # Do nothing unless configured to...
1414     [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1415
1416     [ "$event_name" = "monitor" ] || return 0
1417
1418     if is_ctdb_managed_service ; then
1419         if ! is_ctdb_previously_managed_service ; then
1420             echo "Starting service \"$service_name\" - now managed"
1421             background_with_logging ctdb_service_start
1422             exit $?
1423         fi
1424     else
1425         if is_ctdb_previously_managed_service ; then
1426             echo "Stopping service \"$service_name\" - no longer managed"
1427             background_with_logging ctdb_service_stop
1428             exit $?
1429         fi
1430     fi
1431 }
1432
1433 ctdb_service_start ()
1434 {
1435     # The service is marked managed if we've ever tried to start it.
1436     ctdb_service_managed
1437
1438     service_start || return $?
1439
1440     ctdb_counter_init
1441     ctdb_check_tcp_init
1442 }
1443
1444 ctdb_service_stop ()
1445 {
1446     ctdb_service_unmanaged
1447     service_stop
1448 }
1449
1450 # Default service_start() and service_stop() functions.
1451
1452 # These may be overridden in an eventscript.  When overriding, the
1453 # following convention must be followed.  If these functions are
1454 # called with no arguments then they may use internal logic to
1455 # determine whether the service is managed and, therefore, whether
1456 # they should take any action.  However, if the service name is
1457 # specified as an argument then an attempt must be made to start or
1458 # stop the service.  This is because the auto-start/stop code calls
1459 # them with the service name as an argument.
1460 service_start ()
1461 {
1462     service "$service_name" start
1463 }
1464
1465 service_stop ()
1466 {
1467     service "$service_name" stop
1468 }
1469
1470 ##################################################################
1471
1472 ctdb_standard_event_handler ()
1473 {
1474     case "$1" in
1475         status)
1476             ctdb_checkstatus
1477             exit
1478             ;;
1479         setstatus)
1480             shift
1481             ctdb_setstatus "$@"
1482             exit
1483             ;;
1484     esac
1485 }
1486
1487 # iptables doesn't like being re-entered, so flock-wrap it.
1488 iptables()
1489 {
1490         flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1491 }
1492
1493 # AIX (and perhaps others?) doesn't have mktemp
1494 if ! which mktemp >/dev/null 2>&1 ; then
1495     mktemp ()
1496     {
1497         _dir=false
1498         if [ "$1" = "-d" ] ; then
1499             _dir=true
1500             shift
1501         fi
1502         _d="${TMPDIR:-/tmp}"
1503         _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
1504             md5sum | \
1505             sed -e 's@\(..........\).*@\1@')
1506         _t="${_d}/tmp.${_hex10}"
1507         (
1508             umask 077
1509             if $_dir ; then
1510                 mkdir "$_t"
1511             else
1512                 >"$_t"
1513             fi
1514         )
1515         echo "$_t"
1516     }
1517 fi
1518
1519 ########################################################
1520 # tickle handling
1521 ########################################################
1522
1523 update_tickles ()
1524 {
1525         _port="$1"
1526
1527         tickledir="$CTDB_VARDIR/state/tickles"
1528         mkdir -p "$tickledir"
1529
1530         # Who am I?
1531         _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1532
1533         # What public IPs do I hold?
1534         _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1535
1536         # IPs as a regexp choice
1537         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1538
1539         # Record connections to our public IPs in a temporary file
1540         _my_connections="${tickledir}/${_port}.connections"
1541         rm -f "$_my_connections"
1542         netstat -tn |
1543         awk -v destpat="^${_ipschoice}:${_port}\$" \
1544           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1545         sort >"$_my_connections"
1546
1547         # Record our current tickles in a temporary file
1548         _my_tickles="${tickledir}/${_port}.tickles"
1549         rm -f "$_my_tickles"
1550         for _i in $_ips ; do
1551                 ctdb -Y gettickles $_i $_port |
1552                 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1553         done |
1554         sort >"$_my_tickles"
1555
1556         # Add tickles for connections that we haven't already got tickles for
1557         comm -23 "$_my_connections" "$_my_tickles" |
1558         while read _src _dst ; do
1559                 ctdb addtickle $_src $_dst
1560         done
1561
1562         # Remove tickles for connections that are no longer there
1563         comm -13 "$_my_connections" "$_my_tickles" |
1564         while read _src _dst ; do
1565                 ctdb deltickle $_src $_dst
1566         done
1567
1568         rm -f "$_my_connections" "$_my_tickles"
1569 }
1570
1571 ########################################################
1572 # load a site local config file
1573 ########################################################
1574
1575 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1576         . "$CTDB_RC_LOCAL"
1577 }
1578
1579 [ -x $CTDB_BASE/rc.local ] && {
1580         . $CTDB_BASE/rc.local
1581 }
1582
1583 [ -d $CTDB_BASE/rc.local.d ] && {
1584         for i in $CTDB_BASE/rc.local.d/* ; do
1585                 [ -x "$i" ] && . "$i"
1586         done
1587 }
1588
1589 script_name="${0##*/}"       # basename
1590 service_fail_limit=1
1591 event_name="$1"