ctdb/config/functions

   1 # Hey Emacs, this is a -*- shell-script -*- !!!
   2
   3 # utility functions for ctdb event scripts
   4
   5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
   6
   7 [ -z "$CTDB_VARDIR" ] && {
   8     if [ -d "/var/lib/ctdb" ] ; then
   9         export CTDB_VARDIR="/var/lib/ctdb"
  10     else
  11         export CTDB_VARDIR="/var/ctdb"
  12     fi
  13 }
  14 [ -z "$CTDB_ETCDIR" ] && {
  15     export CTDB_ETCDIR="/etc"
  16 }
  17
  18 #######################################
  19 # pull in a system config file, if any
  20 _loadconfig() {
  21
  22     if [ -z "$1" ] ; then
  23         foo="${service_config:-${service_name}}"
  24         if [ -n "$foo" ] ; then
  25             loadconfig "$foo"
  26             return
  27         fi
  28     fi
  29
  30     if [ "$1" != "ctdb" ] ; then
  31         loadconfig "ctdb"
  32     fi
  33
  34     if [ -z "$1" ] ; then
  35         return
  36     fi
  37
  38     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
  39         . $CTDB_ETCDIR/sysconfig/$1
  40     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
  41         . $CTDB_ETCDIR/default/$1
  42     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
  43         . $CTDB_BASE/sysconfig/$1
  44     fi
  45 }
  46
  47 loadconfig () {
  48     _loadconfig "$@"
  49 }
  50
  51 ##############################################################
  52
  53 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
  54 # configuration file.
  55 debug ()
  56 {
  57     if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
  58         # If there are arguments then echo them.  Otherwise expect to
  59         # use stdin, which allows us to pass lots of debug using a
  60         # here document.
  61         if [ -n "$1" ] ; then
  62             echo "DEBUG: $*"
  63         elif ! tty -s ; then
  64             sed -e 's@^@DEBUG: @'
  65         fi
  66     fi
  67 }
  68
  69 die ()
  70 {
  71     _msg="$1"
  72     _rc="${2:-1}"
  73
  74     echo "$_msg"
  75     exit $_rc
  76 }
  77
  78 # Log given message or stdin to either syslog or a CTDB log file
  79 # $1 is the tag passed to logger if syslog is in use.
  80 script_log ()
  81 {
  82     _tag="$1" ; shift
  83
  84     _using_syslog=false
  85     if [ "$CTDB_SYSLOG" = "yes" -o -z "$CTDB_LOGFILE" ] ; then
  86         _using_syslog=true
  87     fi
  88     case "$CTDB_OPTIONS" in
  89         *--syslog*) _using_syslog=true ;;
  90     esac
  91
  92     if $_using_syslog ; then
  93         logger -t "ctdbd: ${_tag}" $*
  94     else
  95         {
  96             if [ -n "$*" ] ; then
  97                 echo "$*"
  98             else
  99                 cat
 100             fi
 101         } >>"${CTDB_LOGFILE:-/var/log/log.ctdb}"
 102     fi
 103 }
 104
 105 # When things are run in the background in an eventscript then logging
 106 # output might get lost.  This is the "solution".  :-)
 107 background_with_logging ()
 108 {
 109     (
 110         "$@" 2>&1 </dev/null |
 111         script_log "${script_name}&"
 112     )&
 113
 114     return 0
 115 }
 116
 117 ##############################################################
 118 # check number of args for different events
 119 ctdb_check_args ()
 120 {
 121     case "$1" in
 122         takeip|releaseip)
 123             if [ $# != 4 ]; then
 124                 echo "ERROR: must supply interface, IP and maskbits"
 125                 exit 1
 126             fi
 127             ;;
 128         updateip)
 129             if [ $# != 5 ]; then
 130                 echo "ERROR: must supply old interface, new interface, IP and maskbits"
 131                 exit 1
 132             fi
 133             ;;
 134     esac
 135 }
 136
 137 ##############################################################
 138 # determine on what type of system (init style) we are running
 139 detect_init_style() {
 140     # only do detection if not already set:
 141     test "x$CTDB_INIT_STYLE" != "x" && return
 142
 143     if [ -x /sbin/startproc ]; then
 144         CTDB_INIT_STYLE="suse"
 145     elif [ -x /sbin/start-stop-daemon ]; then
 146         CTDB_INIT_STYLE="debian"
 147     else
 148         CTDB_INIT_STYLE="redhat"
 149     fi
 150 }
 151
 152 ######################################################
 153 # simulate /sbin/service on platforms that don't have it
 154 # _service() makes it easier to hook the service() function for
 155 # testing.
 156 _service ()
 157 {
 158   _service_name="$1"
 159   _op="$2"
 160
 161   # do nothing, when no service was specified
 162   [ -z "$_service_name" ] && return
 163
 164   if [ -x /sbin/service ]; then
 165       $_nice /sbin/service "$_service_name" "$_op"
 166   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
 167       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
 168   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
 169       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
 170   fi
 171 }
 172
 173 service()
 174 {
 175     _nice=""
 176     _service "$@"
 177 }
 178
 179 ######################################################
 180 # simulate /sbin/service (niced) on platforms that don't have it
 181 nice_service()
 182 {
 183     _nice="nice"
 184     _service "$@"
 185 }
 186
 187 ######################################################
 188 # wrapper around /proc/ settings to allow them to be hooked
 189 # for testing
 190 # 1st arg is relative path under /proc/, 2nd arg is value to set
 191 set_proc ()
 192 {
 193     echo "$2" >"/proc/$1"
 194 }
 195
 196 ######################################################
 197 # wrapper around getting file contents from /proc/ to allow
 198 # this to be hooked for testing
 199 # 1st arg is relative path under /proc/
 200 get_proc ()
 201 {
 202     cat "/proc/$1"
 203 }
 204
 205 ######################################################
 206 # Check that an RPC service is healthy -
 207 # this includes allowing a certain number of failures
 208 # before marking the NFS service unhealthy.
 209 #
 210 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
 211 #
 212 # each triple is a set of 3 arguments: an operator, a
 213 # fail count limit and an action string.
 214 #
 215 # For example:
 216 #
 217 #       nfs_check_rpc_service "lockd" \
 218 #           -ge 15 "verbose restart unhealthy" \
 219 #           -eq 10 "restart:bs"
 220 #
 221 # says that if lockd is down for 15 iterations then do
 222 # a verbose restart of lockd and mark the node unhealthy.
 223 # Before this, after 10 iterations of failure, the
 224 # service is restarted silently in the background.
 225 # Order is important: the number of failures need to be
 226 # specified in reverse order because processing stops
 227 # after the first condition that is true.
 228 ######################################################
 229 nfs_check_rpc_service ()
 230 {
 231     _prog_name="$1" ; shift
 232
 233     if _nfs_check_rpc_common "$_prog_name" ; then
 234         return
 235     fi
 236
 237     while [ -n "$3" ] ; do
 238         if _nfs_check_rpc_action "$1" "$2" "$3" ; then
 239             break
 240         fi
 241         shift 3
 242     done
 243 }
 244
 245 # The new way of doing things...
 246 nfs_check_rpc_services ()
 247 {
 248     # Files must end with .check - avoids editor backups, RPM fu, ...
 249     for _f in "${CTDB_BASE}/nfs-rpc-checks.d/"[0-9][0-9].*.check ; do
 250         _t="${_f%.check}"
 251         _prog_name="${_t##*/[0-9][0-9].}"
 252
 253         if _nfs_check_rpc_common "$_prog_name" ; then
 254             # This RPC service is up, check next service...
 255             continue
 256         fi
 257
 258         # Check each line in the file in turn until one of the limit
 259         # checks is hit...
 260         while read _cmp _lim _rest ; do
 261             # Skip comments
 262             case "$_cmp" in
 263                 \#*) continue ;;
 264             esac
 265
 266             if _nfs_check_rpc_action "$_cmp" "$_lim" "$_rest" ; then
 267                 # Limit was hit on this line, no further checking...
 268                 break
 269             fi
 270         done <"$_f"
 271     done
 272 }
 273
 274 _nfs_check_rpc_common ()
 275 {
 276     _prog_name="$1"
 277
 278     # Some platforms don't have separate programs for all services.
 279     case "$_prog_name" in
 280         statd)
 281             which "rpc.${_prog_name}" >/dev/null 2>&1 || return 0
 282     esac
 283
 284     _version=1
 285     _rpc_prog="$_prog_name"
 286     _restart=""
 287     _opts=""
 288     case "$_prog_name" in
 289         nfsd)
 290             _rpc_prog=nfs
 291             _version=3
 292             _restart="echo 'Trying to restart NFS service'"
 293             _restart="${_restart}; startstop_nfs restart"
 294             ;;
 295         mountd)
 296             _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
 297             ;;
 298         rquotad)
 299             _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
 300             ;;
 301         lockd)
 302             _rpc_prog=nlockmgr
 303             _version=4
 304             _restart="echo 'Trying to restart lock manager service'"
 305             _restart="${_restart}; startstop_nfslock restart"
 306             ;;
 307         statd)
 308             _rpc_prog=status
 309             _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
 310             _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
 311             _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
 312             ;;
 313         *)
 314             echo "Internal error: unknown RPC program \"$_prog_name\"."
 315             exit 1
 316     esac
 317
 318     _service_name="nfs_${_prog_name}"
 319
 320     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
 321         ctdb_counter_init "$_service_name"
 322         return 0
 323     fi
 324
 325     ctdb_counter_incr "$_service_name"
 326
 327     return 1
 328 }
 329
 330 _nfs_check_rpc_action ()
 331 {
 332     _cmp="$1"
 333     _limit="$2"
 334     _actions="$3"
 335
 336     if ctdb_check_counter "quiet" "$_cmp" "$_limit" "$_service_name" ; then
 337         return 1
 338     fi
 339
 340     for _action in $_actions ; do
 341         case "$_action" in
 342             verbose)
 343                 echo "$ctdb_check_rpc_out"
 344                 ;;
 345             restart|restart:*)
 346                 # No explicit command specified, construct rpc command.
 347                 if [ -z "$_restart" ] ; then
 348                     _p="rpc.${_prog_name}"
 349                     _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
 350                     _restart="${_restart}; killall -q -9 $_p"
 351                     _restart="${_restart}; $_p $_opts"
 352                 fi
 353
 354                 # Process restart flags...
 355                 _flags="${_action#restart:}"
 356                 # There may not have been a colon...
 357                 [ "$_flags" != "$_action" ] || _flags=""
 358                 # q=quiet - everything to /dev/null
 359                 if [ "${_flags#*q}" != "$_flags" ] ; then
 360                     _restart="{ ${_restart} ; } >/dev/null 2>&1"
 361                 fi
 362                 # s=stealthy - last command to /dev/null
 363                 if [ "${_flags#*s}" != "$_flags" ] ; then
 364                     _restart="${_restart} >/dev/null 2>&1"
 365                 fi
 366                 # b=background - the whole thing, easy and reliable
 367                 if [ "${_flags#*b}" != "$_flags" ] ; then
 368                     _restart="{ ${_restart} ; } &"
 369                 fi
 370
 371                 # Do it!
 372                 eval "${_restart}"
 373                 ;;
 374             unhealthy)
 375                 exit 1
 376                 ;;
 377             *)
 378                 echo "Internal error: unknown action \"$_action\"."
 379                 exit 1
 380         esac
 381     done
 382
 383     return 0
 384 }
 385
 386 ######################################################
 387 # check that a rpc server is registered with portmap
 388 # and responding to requests
 389 # usage: ctdb_check_rpc SERVICE_NAME VERSION
 390 ######################################################
 391 ctdb_check_rpc ()
 392 {
 393     progname="$1"
 394     version="$2"
 395
 396     if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
 397         ctdb_check_rpc_out="ERROR: $progname failed RPC check:
 398 $ctdb_check_rpc_out"
 399         echo "$ctdb_check_rpc_out"
 400         return 1
 401     fi
 402 }
 403
 404 ######################################################
 405 # Ensure $service_name is set
 406 assert_service_name ()
 407 {
 408     [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set"
 409 }
 410
 411 ######################################################
 412 # check a set of directories is available
 413 # return 1 on a missing directory
 414 # directories are read from stdin
 415 ######################################################
 416 ctdb_check_directories_probe()
 417 {
 418     while IFS="" read d ; do
 419         case "$d" in
 420             *%*)
 421                 continue
 422                 ;;
 423             *)
 424                 [ -d "${d}/." ] || return 1
 425         esac
 426     done
 427 }
 428
 429 ######################################################
 430 # check a set of directories is available
 431 # directories are read from stdin
 432 ######################################################
 433 ctdb_check_directories()
 434 {
 435     ctdb_check_directories_probe || {
 436         echo "ERROR: $service_name directory \"$d\" not available"
 437         exit 1
 438     }
 439 }
 440
 441 ######################################################
 442 # check a set of tcp ports
 443 # usage: ctdb_check_tcp_ports <ports...>
 444 ######################################################
 445
 446 # This flag file is created when a service is initially started.  It
 447 # is deleted the first time TCP port checks for that service succeed.
 448 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
 449 # message if a port check fails.
 450 _ctdb_check_tcp_common ()
 451 {
 452     assert_service_name
 453     _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
 454 }
 455
 456 ctdb_check_tcp_init ()
 457 {
 458     _ctdb_check_tcp_common
 459     mkdir -p "${_ctdb_service_started_file%/*}" # dirname
 460     touch "$_ctdb_service_started_file"
 461 }
 462
 463 ctdb_check_tcp_ports()
 464 {
 465     if [ -z "$1" ] ; then
 466         echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
 467         exit 1
 468     fi
 469
 470     # Set default value for CTDB_TCP_PORT_CHECKS if unset.
 471     # If any of these defaults are unsupported then this variable can
 472     # be overridden in /etc/sysconfig/ctdb or via a file in
 473     # /etc/ctdb/rc.local.d/.
 474     : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
 475
 476     for _c in $CTDB_TCP_PORT_CHECKERS ; do
 477         ctdb_check_tcp_ports_$_c "$@"
 478         case "$?" in
 479             0)
 480                 _ctdb_check_tcp_common
 481                 rm -f "$_ctdb_service_started_file"
 482                 return 0
 483                 ;;
 484             1)
 485                 _ctdb_check_tcp_common
 486                 if [ ! -f "$_ctdb_service_started_file" ] ; then
 487                     echo "ERROR: $service_name tcp port $_p is not responding"
 488                     debug <<EOF
 489 $ctdb_check_tcp_ports_debug
 490 EOF
 491                 else
 492                     echo "INFO: $service_name tcp port $_p is not responding"
 493                 fi
 494
 495                 return 1
 496                 ;;
 497             127)
 498                 debug <<EOF
 499 ctdb_check_ports - checker $_c not implemented
 500 output from checker was:
 501 $ctdb_check_tcp_ports_debug
 502 EOF
 503                 ;;
 504             *)
 505
 506         esac
 507     done
 508
 509     echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
 510
 511     return 127
 512 }
 513
 514 ctdb_check_tcp_ports_netstat ()
 515 {
 516     _cmd='netstat -l -t -n'
 517     _ns=$($_cmd 2>&1)
 518     if [ $? -eq 127 ] ; then
 519         # netstat probably not installed - unlikely?
 520         ctdb_check_tcp_ports_debug="$_ns"
 521         return 127
 522     fi
 523
 524     for _p ; do  # process each function argument (port)
 525         for _a in '0\.0\.0\.0' '::' ; do
 526             _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
 527             if echo "$_ns" | grep -E -q "$_pat" ; then
 528                 # We matched the port, so process next port
 529                 continue 2
 530             fi
 531         done
 532
 533         # We didn't match the port, so flag an error.
 534         ctdb_check_tcp_ports_debug="$_cmd shows this output:
 535 $_ns"
 536         return 1
 537     done
 538
 539     return 0
 540 }
 541
 542 ctdb_check_tcp_ports_nmap ()
 543 {
 544     # nmap wants a comma-separated list of ports
 545     _ports=""
 546     for _p ; do
 547         _ports="${_ports}${_ports:+,}${_p}"
 548     done
 549
 550     _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
 551
 552     _nmap_out=$($_cmd 2>&1)
 553     if [ $? -eq 127 ] ; then
 554         # nmap probably not installed
 555         ctdb_check_tcp_ports_debug="$_nmap_out"
 556         return 127
 557     fi
 558
 559     # get the port-related output
 560     _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
 561
 562     for _p ; do
 563         # looking for something like this:
 564         #  445/open/tcp//microsoft-ds///
 565         # possibly followed by a comma
 566         _t="$_p/open/tcp//"
 567         case "$_port_info" in
 568             # The info we're after must be either at the beginning of
 569             # the string or it must follow a space.
 570             $_t*|*\ $_t*) : ;;
 571             *)
 572                 # Nope, flag an error...
 573                 ctdb_check_tcp_ports_debug="$_cmd shows this output:
 574 $_nmap_out"
 575                 return 1
 576         esac
 577     done
 578
 579     return 0
 580 }
 581
 582 # Use the new "ctdb checktcpport" command to check the port.
 583 # This is very cheap.
 584 ctdb_check_tcp_ports_ctdb ()
 585 {
 586     for _p ; do  # process each function argument (port)
 587         _cmd="ctdb checktcpport $_p"
 588         _out=$($_cmd 2>&1)
 589         _ret=$?
 590         case "$_ret" in
 591             0)
 592                 ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
 593                 return 1
 594                 ;;
 595             98)
 596                 # Couldn't bind, something already listening, next port...
 597                 continue
 598                 ;;
 599             *)
 600                 ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
 601 $_out"
 602                 # assume not implemented
 603                 return 127
 604         esac
 605     done
 606
 607     return 0
 608 }
 609
 610 ######################################################
 611 # check a unix socket
 612 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
 613 ######################################################
 614 ctdb_check_unix_socket() {
 615     socket_path="$1"
 616     [ -z "$socket_path" ] && return
 617
 618     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
 619         echo "ERROR: $service_name socket $socket_path not found"
 620         return 1
 621     fi
 622 }
 623
 624 ######################################################
 625 # check a command returns zero status
 626 # usage: ctdb_check_command <command>
 627 ######################################################
 628 ctdb_check_command ()
 629 {
 630     _out=$("$@" 2>&1) || {
 631         echo "ERROR: $* returned error"
 632         echo "$_out" | debug
 633         exit 1
 634     }
 635 }
 636
 637 ################################################
 638 # kill off any TCP connections with the given IP
 639 ################################################
 640 kill_tcp_connections ()
 641 {
 642     _ip="$1"
 643
 644     _oneway=false
 645     if [ "$2" = "oneway" ] ; then
 646         _oneway=true
 647     fi
 648
 649     get_tcp_connections_for_ip "$_ip" | {
 650         _killcount=0
 651         _failed=false
 652
 653         while read dest src; do
 654             echo "Killing TCP connection $src $dest"
 655             ctdb killtcp $src $dest >/dev/null 2>&1 || _failed=true
 656             _destport="${dest##*:}"
 657             __oneway=$_oneway
 658             case $_destport in
 659                 # we only do one-way killtcp for CIFS
 660                 139|445) __oneway=true ;;
 661             esac
 662             if ! $__oneway ; then
 663                 ctdb killtcp $dest $src >/dev/null 2>&1 || _failed=true
 664             fi
 665
 666             _killcount=$(($_killcount + 1))
 667         done
 668
 669         if $_failed ; then
 670             echo "Failed to send killtcp control"
 671             return
 672         fi
 673         if [ $_killcount -eq 0 ] ; then
 674             return
 675         fi
 676
 677         _count=0
 678         while : ; do
 679             if [ -z "$(get_tcp_connections_for_ip $_ip)" ] ; then
 680                 echo "Killed $_killcount TCP connections to released IP $_ip"
 681                 return
 682             fi
 683
 684             _count=$(($_count + 1))
 685             if [ $_count -gt 3 ] ; then
 686                 echo "Timed out killing tcp connections for IP $_ip"
 687                 return
 688             fi
 689
 690             sleep 1
 691         done
 692     }
 693 }
 694
 695 ##################################################################
 696 # kill off the local end for any TCP connections with the given IP
 697 ##################################################################
 698 kill_tcp_connections_local_only ()
 699 {
 700     kill_tcp_connections "$1" "oneway"
 701 }
 702
 703 ##################################################################
 704 # tickle any TCP connections with the given IP
 705 ##################################################################
 706 tickle_tcp_connections ()
 707 {
 708     _ip="$1"
 709
 710     get_tcp_connections_for_ip "$_ip" |
 711     {
 712         _failed=false
 713
 714         while read dest src; do
 715             echo "Tickle TCP connection $src $dest"
 716             ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true
 717             echo "Tickle TCP connection $dest $src"
 718             ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true
 719         done
 720
 721         if $_failed ; then
 722             echo "Failed to send tickle control"
 723         fi
 724     }
 725 }
 726
 727 get_tcp_connections_for_ip ()
 728 {
 729     _ip="$1"
 730
 731     netstat -tn | awk -v ip=$_ip \
 732         'index($1, "tcp") == 1 && \
 733          (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \
 734          && $6 == "ESTABLISHED" \
 735          {print $4" "$5}'
 736 }
 737
 738 ########################################################
 739 # start/stop the Ganesha nfs service
 740 ########################################################
 741 startstop_ganesha()
 742 {
 743     _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
 744     case "$1" in
 745         start)
 746             service "$_service_name" start
 747             ;;
 748         stop)
 749             service "$_service_name" stop
 750             ;;
 751         restart)
 752             service "$_service_name" restart
 753             ;;
 754     esac
 755 }
 756
 757 ########################################################
 758 # start/stop the nfs service on different platforms
 759 ########################################################
 760 startstop_nfs() {
 761         PLATFORM="unknown"
 762         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 763                 PLATFORM="sles"
 764         }
 765         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 766                 PLATFORM="rhel"
 767         }
 768
 769         case $PLATFORM in
 770         sles)
 771                 case $1 in
 772                 start)
 773                         service nfsserver start
 774                         ;;
 775                 stop)
 776                         service nfsserver stop > /dev/null 2>&1
 777                         ;;
 778                 restart)
 779                         set_proc "fs/nfsd/threads" 0
 780                         service nfsserver stop > /dev/null 2>&1
 781                         pkill -9 nfsd
 782                         nfs_dump_some_threads
 783                         service nfsserver start
 784                         ;;
 785                 esac
 786                 ;;
 787         rhel)
 788                 case $1 in
 789                 start)
 790                         service nfslock start
 791                         service nfs start
 792                         ;;
 793                 stop)
 794                         service nfs stop
 795                         service nfslock stop
 796                         ;;
 797                 restart)
 798                         set_proc "fs/nfsd/threads" 0
 799                         service nfs stop > /dev/null 2>&1
 800                         service nfslock stop > /dev/null 2>&1
 801                         pkill -9 nfsd
 802                         nfs_dump_some_threads
 803                         service nfslock start
 804                         service nfs start
 805                         ;;
 806                 esac
 807                 ;;
 808         *)
 809                 echo "Unknown platform. NFS is not supported with ctdb"
 810                 exit 1
 811                 ;;
 812         esac
 813 }
 814
 815 # Dump up to the configured number of nfsd thread backtraces.
 816 nfs_dump_some_threads ()
 817 {
 818     [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
 819
 820     # Optimisation to avoid running an unnecessary pidof
 821     [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
 822
 823     _count=0
 824     for _pid in $(pidof nfsd) ; do
 825         [ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
 826
 827         # Do this first to avoid racing with thread exit
 828         _stack=$(get_proc "${_pid}/stack" 2>/dev/null)
 829         if [ -n "$_stack" ] ; then
 830             echo "Stack trace for stuck nfsd thread [${_pid}]:"
 831             echo "$_stack"
 832             _count=$(($_count + 1))
 833         fi
 834     done
 835 }
 836
 837 ########################################################
 838 # start/stop the nfs lockmanager service on different platforms
 839 ########################################################
 840 startstop_nfslock() {
 841         PLATFORM="unknown"
 842         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
 843                 PLATFORM="sles"
 844         }
 845         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
 846                 PLATFORM="rhel"
 847         }
 848
 849         case $PLATFORM in
 850         sles)
 851                 # for sles there is no service for lockmanager
 852                 # so we instead just shutdown/restart nfs
 853                 case $1 in
 854                 start)
 855                         service nfsserver start
 856                         ;;
 857                 stop)
 858                         service nfsserver stop > /dev/null 2>&1
 859                         ;;
 860                 restart)
 861                         service nfsserver stop
 862                         service nfsserver start
 863                         ;;
 864                 esac
 865                 ;;
 866         rhel)
 867                 case $1 in
 868                 start)
 869                         service nfslock start
 870                         ;;
 871                 stop)
 872                         service nfslock stop > /dev/null 2>&1
 873                         ;;
 874                 restart)
 875                         service nfslock stop
 876                         service nfslock start
 877                         ;;
 878                 esac
 879                 ;;
 880         *)
 881                 echo "Unknown platform. NFS locking is not supported with ctdb"
 882                 exit 1
 883                 ;;
 884         esac
 885 }
 886
 887 # Periodically update the statd database
 888 nfs_statd_update ()
 889 {
 890     _update_period="$1"
 891
 892     _statd_update_trigger="$service_state_dir/update-trigger"
 893     [ -f "$_statd_update_trigger" ] || touch "$_statd_update_trigger"
 894
 895     _last_update=$(stat --printf="%Y" "$_statd_update_trigger")
 896     _current_time=$(date +"%s")
 897     if [ $(( $_current_time - $_last_update)) -ge $_update_period ] ; then
 898         touch "$_statd_update_trigger"
 899         $CTDB_BASE/statd-callout updatelocal &
 900         $CTDB_BASE/statd-callout updateremote &
 901     fi
 902 }
 903
 904 add_ip_to_iface()
 905 {
 906     _iface=$1
 907     _ip=$2
 908     _maskbits=$3
 909
 910     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 911     mkdir -p "${_lockfile%/*}" # dirname
 912     [ -f "$_lockfile" ] || touch "$_lockfile"
 913
 914     (
 915         # Note: use of return/exit/die() below only gets us out of the
 916         # sub-shell, which is actually what we want.  That is, the
 917         # function should just return non-zero.
 918
 919         flock --timeout 30 0 || \
 920             die "add_ip_to_iface: unable to get lock for ${_iface}"
 921
 922         # Ensure interface is up
 923         ip link set "$_iface" up || \
 924             die "Failed to bringup interface $_iface"
 925
 926         ip addr add "$_ip/$_maskbits" brd + dev "$_iface" || \
 927             die "Failed to add $_ip/$_maskbits on dev $_iface"
 928     ) <"$_lockfile"
 929
 930     # Do nothing here - return above only gets us out of the subshell
 931     # and doing anything here will affect the return code.
 932 }
 933
 934 delete_ip_from_iface()
 935 {
 936     _iface=$1
 937     _ip=$2
 938     _maskbits=$3
 939
 940     _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
 941     mkdir -p "${_lockfile%/*}" # dirname
 942     [ -f "$_lockfile" ] || touch "$_lockfile"
 943
 944     (
 945         # Note: use of return/exit/die() below only gets us out of the
 946         # sub-shell, which is actually what we want.  That is, the
 947         # function should just return non-zero.
 948
 949         flock --timeout 30 0 || \
 950             die "delete_ip_from_iface: unable to get lock for ${_iface}"
 951
 952         _im="$_ip/$_maskbits"  # shorthand for readability
 953
 954         # "ip addr del" will delete all secondary IPs if this is the
 955         # primary.  To work around this _very_ annoying behaviour we
 956         # have to keep a record of the secondaries and re-add them
 957         # afterwards.  Yuck!
 958
 959         _secondaries=""
 960         if ip addr list dev "$_iface" primary | grep -Fq "inet $_im " ; then
 961             _secondaries=$(ip addr list dev "$_iface" secondary | \
 962                 awk '$1 == "inet" { print $2 }')
 963         fi
 964
 965         local _rc=0
 966         ip addr del "$_im" dev "$_iface" || {
 967             echo "Failed to del $_ip on dev $_iface"
 968             _rc=1
 969         }
 970
 971         if [ -n "$_secondaries" ] ; then
 972             for _i in $_secondaries; do
 973                 if ip addr list dev "$_iface" | grep -Fq "inet $_i" ; then
 974                     echo "Kept secondary $_i on dev $_iface"
 975                 else
 976                     echo "Re-adding secondary address $_i to dev $_iface"
 977                     ip addr add $_i brd + dev $_iface || {
 978                         echo "Failed to re-add address $_i to dev $_iface"
 979                         _rc=1
 980                     }
 981                 fi
 982             done
 983         fi
 984
 985         return $_rc
 986     ) <"$_lockfile"
 987
 988     # Do nothing here - return above only gets us out of the subshell
 989     # and doing anything here will affect the return code.
 990 }
 991
 992 # If the given IP is hosted then print 2 items: maskbits and iface
 993 ip_maskbits_iface ()
 994 {
 995     _addr="$1"
 996
 997     ip addr show to "${_addr}/32" 2>/dev/null | \
 998         awk '$1 == "inet" { print gensub(".*/", "", 1, $2), $NF }'
 999 }
1000
1001 drop_ip ()
1002 {
1003     _addr="${1%/*}"  # Remove optional maskbits
1004
1005     set -- $(ip_maskbits_iface $_addr)
1006     if [ -n "$1" ] ; then
1007         _maskbits="$1"
1008         _iface="$2"
1009         echo "Removing public address $_addr/$_maskbits from device $_iface"
1010         delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
1011     fi
1012 }
1013
1014 drop_all_public_ips ()
1015 {
1016     while read _ip _x ; do
1017         drop_ip "$_ip"
1018     done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
1019 }
1020
1021 ########################################################
1022 # some simple logic for counting events - per eventscript
1023 # usage: ctdb_counter_init
1024 #        ctdb_counter_incr
1025 #        ctdb_check_counter_limit <limit>
1026 # ctdb_check_counter_limit fails when count >= <limit>
1027 ########################################################
1028 _ctdb_counter_common () {
1029     _service_name="${1:-${service_name:-${script_name}}}"
1030     _counter_file="$ctdb_fail_dir/$_service_name"
1031     mkdir -p "${_counter_file%/*}" # dirname
1032 }
1033 ctdb_counter_init () {
1034     _ctdb_counter_common "$1"
1035
1036     >"$_counter_file"
1037 }
1038 ctdb_counter_incr () {
1039     _ctdb_counter_common "$1"
1040
1041     # unary counting!
1042     echo -n 1 >> "$_counter_file"
1043 }
1044 ctdb_check_counter () {
1045     _msg="${1:-error}"  # "error"  - anything else is silent on fail
1046     _op="${2:--ge}"  # an integer operator supported by test
1047     _limit="${3:-${service_fail_limit}}"
1048     shift 3
1049     _ctdb_counter_common "$1"
1050
1051     # unary counting!
1052     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
1053     if [ $_size $_op $_limit ] ; then
1054         if [ "$_msg" = "error" ] ; then
1055             echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
1056             exit 1
1057         else
1058             return 1
1059         fi
1060     fi
1061 }
1062
1063 ########################################################
1064
1065 ctdb_status_dir="$CTDB_VARDIR/status"
1066 ctdb_fail_dir="$CTDB_VARDIR/failcount"
1067
1068 ctdb_setup_service_state_dir ()
1069 {
1070     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
1071     mkdir -p "$service_state_dir" || {
1072         echo "Error creating state dir \"$service_state_dir\""
1073         exit 1
1074     }
1075 }
1076
1077 ########################################################
1078 # Managed status history, for auto-start/stop
1079
1080 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
1081
1082 _ctdb_managed_common ()
1083 {
1084     _ctdb_managed_file="$ctdb_managed_dir/$service_name"
1085 }
1086
1087 ctdb_service_managed ()
1088 {
1089     _ctdb_managed_common
1090     mkdir -p "$ctdb_managed_dir"
1091     touch "$_ctdb_managed_file"
1092 }
1093
1094 ctdb_service_unmanaged ()
1095 {
1096     _ctdb_managed_common
1097     rm -f "$_ctdb_managed_file"
1098 }
1099
1100 is_ctdb_previously_managed_service ()
1101 {
1102     _ctdb_managed_common
1103     [ -f "$_ctdb_managed_file" ]
1104 }
1105
1106 ########################################################
1107 # Check and set status
1108
1109 log_status_cat ()
1110 {
1111     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
1112 }
1113
1114 ctdb_checkstatus ()
1115 {
1116     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
1117         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
1118         return 1
1119     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
1120         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
1121         return 2
1122     else
1123         return 0
1124     fi
1125 }
1126
1127 ctdb_setstatus ()
1128 {
1129     d="$ctdb_status_dir/$script_name"
1130     case "$1" in
1131         unhealthy|banned)
1132             mkdir -p "$d"
1133             cat "$2" >"$d/$1"
1134             ;;
1135         *)
1136             for i in "banned" "unhealthy" ; do
1137                 rm -f "$d/$i"
1138             done
1139             ;;
1140     esac
1141 }
1142
1143 ##################################################################
1144 # Reconfigure a service on demand
1145
1146 _ctdb_service_reconfigure_common ()
1147 {
1148     _d="$ctdb_status_dir/${service_name}"
1149     mkdir -p "$_d"
1150     _ctdb_service_reconfigure_flag="$_d/reconfigure"
1151 }
1152
1153 ctdb_service_needs_reconfigure ()
1154 {
1155     _ctdb_service_reconfigure_common
1156     [ -e "$_ctdb_service_reconfigure_flag" ]
1157 }
1158
1159 ctdb_service_set_reconfigure ()
1160 {
1161     _ctdb_service_reconfigure_common
1162     >"$_ctdb_service_reconfigure_flag"
1163 }
1164
1165 ctdb_service_unset_reconfigure ()
1166 {
1167     _ctdb_service_reconfigure_common
1168     rm -f "$_ctdb_service_reconfigure_flag"
1169 }
1170
1171 ctdb_service_reconfigure ()
1172 {
1173     echo "Reconfiguring service \"${service_name}\"..."
1174     ctdb_service_unset_reconfigure
1175     service_reconfigure || return $?
1176     ctdb_counter_init
1177 }
1178
1179 # Default service_reconfigure() function does nothing.
1180 service_reconfigure ()
1181 {
1182     :
1183 }
1184
1185 ctdb_reconfigure_try_lock ()
1186 {
1187     _ctdb_service_reconfigure_common
1188     _lock="${_d}/reconfigure_lock"
1189     mkdir -p "${_lock%/*}" # dirname
1190     touch "$_lock"
1191
1192     (
1193         flock 0
1194         # This is overkill but will work if we need to extend this to
1195         # allow certain events to run multiple times in parallel
1196         # (e.g. takeip) and write multiple PIDs to the file.
1197         read _locker_event
1198         if [ -n "$_locker_event" ] ; then
1199             while read _pid ; do
1200                 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1201                     kill -0 "$_pid" 2>/dev/null ; then
1202                     exit 1
1203                 fi
1204             done
1205         fi
1206
1207         printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1208         exit 0
1209     ) <"$_lock"
1210 }
1211
1212 ctdb_replay_monitor_status ()
1213 {
1214     echo "Replaying previous status for this script due to reconfigure..."
1215     # Leading colon (':') is missing in some versions...
1216     _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
1217     # Output looks like this:
1218     # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
1219     # This is the cheapest way of getting fields in the middle.
1220     set -- $(IFS=":" ; echo $_out)
1221     _code="$3"
1222     _status="$4"
1223     # The error output field can include colons so we'll try to
1224     # preserve them.  The weak checking at the beginning tries to make
1225     # this work for both broken (no leading ':') and fixed output.
1226     _out="${_out%:}"
1227     _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
1228     case "$_status" in
1229         OK) : ;;  # Do nothing special.
1230         TIMEDOUT)
1231             # Recast this as an error, since we can't exit with the
1232             # correct negative number.
1233             _code=1
1234             _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1235             ;;
1236         DISABLED)
1237             # Recast this as an OK, since we can't exit with the
1238             # correct negative number.
1239             _code=0
1240             _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1241             ;;
1242         *) : ;;  # Must be ERROR, do nothing special.
1243     esac
1244     echo "$_err_out"
1245     exit $_code
1246 }
1247
1248 ctdb_service_check_reconfigure ()
1249 {
1250     assert_service_name
1251
1252     # We only care about some events in this function.  For others we
1253     # return now.
1254     case "$event_name" in
1255         monitor|ipreallocated|reconfigure) : ;;
1256         *) return 0 ;;
1257     esac
1258
1259     if ctdb_reconfigure_try_lock ; then
1260         # No events covered by this function are running, so proceed
1261         # with gay abandon.
1262         case "$event_name" in
1263             reconfigure)
1264                 (ctdb_service_reconfigure)
1265                 exit $?
1266                 ;;
1267             ipreallocated)
1268                 if ctdb_service_needs_reconfigure ; then
1269                     ctdb_service_reconfigure
1270                 fi
1271                 ;;
1272             monitor)
1273                 if ctdb_service_needs_reconfigure ; then
1274                     ctdb_service_reconfigure
1275                     # Given that the reconfigure might not have
1276                     # resulted in the service being stable yet, we
1277                     # replay the previous status since that's the best
1278                     # information we have.
1279                     ctdb_replay_monitor_status
1280                 fi
1281                 ;;
1282         esac
1283     else
1284         # Somebody else is running an event we don't want to collide
1285         # with.  We proceed with caution.
1286         case "$event_name" in
1287             reconfigure)
1288                 # Tell whoever called us to retry.
1289                 exit 2
1290                 ;;
1291             ipreallocated)
1292                 # Defer any scheduled reconfigure and just run the
1293                 # rest of the ipreallocated event, as per the
1294                 # eventscript.  There's an assumption here that the
1295                 # event doesn't depend on any scheduled reconfigure.
1296                 # This is true in the current code.
1297                 return 0
1298                 ;;
1299             monitor)
1300                 # There is most likely a reconfigure in progress so
1301                 # the service is possibly unstable.  As above, we
1302                 # defer any scheduled reconfigured.  We also replay
1303                 # the previous monitor status since that's the best
1304                 # information we have.
1305                 ctdb_replay_monitor_status
1306                 ;;
1307         esac
1308     fi
1309 }
1310
1311 ##################################################################
1312 # Does CTDB manage this service? - and associated auto-start/stop
1313
1314 ctdb_compat_managed_service ()
1315 {
1316     if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then
1317         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1318     fi
1319 }
1320
1321 is_ctdb_managed_service ()
1322 {
1323     assert_service_name
1324
1325     # $t is used just for readability and to allow better accurate
1326     # matching via leading/trailing spaces
1327     t=" $CTDB_MANAGED_SERVICES "
1328
1329     # Return 0 if "<space>$service_name<space>" appears in $t
1330     if [ "${t#* ${service_name} }" != "${t}" ] ; then
1331         return 0
1332     fi
1333
1334     # If above didn't match then update $CTDB_MANAGED_SERVICES for
1335     # backward compatibility and try again.
1336     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
1337     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
1338     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
1339     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
1340     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
1341     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
1342     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
1343     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
1344     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
1345     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
1346
1347     t=" $CTDB_MANAGED_SERVICES "
1348
1349     # Return 0 if "<space>$service_name<space>" appears in $t
1350     [ "${t#* ${service_name} }" != "${t}" ]
1351 }
1352
1353 ctdb_start_stop_service ()
1354 {
1355     assert_service_name
1356
1357     # Allow service-start/service-stop pseudo-events to start/stop
1358     # services when we're not auto-starting/stopping and we're not
1359     # monitoring.
1360     case "$event_name" in
1361         service-start)
1362             if is_ctdb_managed_service ; then
1363                 die 'service-start event not permitted when service is managed'
1364             fi
1365             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1366                 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1367             fi
1368             ctdb_service_start
1369             exit $?
1370             ;;
1371         service-stop)
1372             if is_ctdb_managed_service ; then
1373                 die 'service-stop event not permitted when service is managed'
1374             fi
1375             if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1376                 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1377             fi
1378             ctdb_service_stop
1379             exit $?
1380             ;;
1381     esac
1382
1383     # Do nothing unless configured to...
1384     [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1385
1386     [ "$event_name" = "monitor" ] || return 0
1387
1388     if is_ctdb_managed_service ; then
1389         if ! is_ctdb_previously_managed_service ; then
1390             echo "Starting service \"$service_name\" - now managed"
1391             background_with_logging ctdb_service_start
1392             exit $?
1393         fi
1394     else
1395         if is_ctdb_previously_managed_service ; then
1396             echo "Stopping service \"$service_name\" - no longer managed"
1397             background_with_logging ctdb_service_stop
1398             exit $?
1399         fi
1400     fi
1401 }
1402
1403 ctdb_service_start ()
1404 {
1405     # The service is marked managed if we've ever tried to start it.
1406     ctdb_service_managed
1407
1408     service_start || return $?
1409
1410     ctdb_counter_init
1411     ctdb_check_tcp_init
1412 }
1413
1414 ctdb_service_stop ()
1415 {
1416     ctdb_service_unmanaged
1417     service_stop
1418 }
1419
1420 # Default service_start() and service_stop() functions.
1421
1422 # These may be overridden in an eventscript.  When overriding, the
1423 # following convention must be followed.  If these functions are
1424 # called with no arguments then they may use internal logic to
1425 # determine whether the service is managed and, therefore, whether
1426 # they should take any action.  However, if the service name is
1427 # specified as an argument then an attempt must be made to start or
1428 # stop the service.  This is because the auto-start/stop code calls
1429 # them with the service name as an argument.
1430 service_start ()
1431 {
1432     service "$service_name" start
1433 }
1434
1435 service_stop ()
1436 {
1437     service "$service_name" stop
1438 }
1439
1440 ##################################################################
1441
1442 ctdb_standard_event_handler ()
1443 {
1444     case "$1" in
1445         status)
1446             ctdb_checkstatus
1447             exit
1448             ;;
1449         setstatus)
1450             shift
1451             ctdb_setstatus "$@"
1452             exit
1453             ;;
1454     esac
1455 }
1456
1457 # iptables doesn't like being re-entered, so flock-wrap it.
1458 iptables()
1459 {
1460         flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1461 }
1462
1463 # AIX (and perhaps others?) doesn't have mktemp
1464 if ! which mktemp >/dev/null 2>&1 ; then
1465     mktemp ()
1466     {
1467         _dir=false
1468         if [ "$1" = "-d" ] ; then
1469             _dir=true
1470             shift
1471         fi
1472         _d="${TMPDIR:-/tmp}"
1473         _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \
1474             md5sum | \
1475             sed -e 's@\(..........\).*@\1@')
1476         _t="${_d}/tmp.${_hex10}"
1477         (
1478             umask 077
1479             if $_dir ; then
1480                 mkdir "$_t"
1481             else
1482                 >"$_t"
1483             fi
1484         )
1485         echo "$_t"
1486     }
1487 fi
1488
1489 ########################################################
1490 # tickle handling
1491 ########################################################
1492
1493 update_tickles ()
1494 {
1495         _port="$1"
1496
1497         tickledir="$CTDB_VARDIR/state/tickles"
1498         mkdir -p "$tickledir"
1499
1500         # Who am I?
1501         _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1502
1503         # What public IPs do I hold?
1504         _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1505
1506         # IPs as a regexp choice
1507         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1508
1509         # Record connections to our public IPs in a temporary file
1510         _my_connections="${tickledir}/${_port}.connections"
1511         rm -f "$_my_connections"
1512         netstat -tn |
1513         awk -v destpat="^${_ipschoice}:${_port}\$" \
1514           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1515         sort >"$_my_connections"
1516
1517         # Record our current tickles in a temporary file
1518         _my_tickles="${tickledir}/${_port}.tickles"
1519         rm -f "$_my_tickles"
1520         for _i in $_ips ; do
1521                 ctdb -Y gettickles $_i $_port |
1522                 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1523         done |
1524         sort >"$_my_tickles"
1525
1526         # Add tickles for connections that we haven't already got tickles for
1527         comm -23 "$_my_connections" "$_my_tickles" |
1528         while read _src _dst ; do
1529                 ctdb addtickle $_src $_dst
1530         done
1531
1532         # Remove tickles for connections that are no longer there
1533         comm -13 "$_my_connections" "$_my_tickles" |
1534         while read _src _dst ; do
1535                 ctdb deltickle $_src $_dst
1536         done
1537
1538         rm -f "$_my_connections" "$_my_tickles"
1539 }
1540
1541 ########################################################
1542 # load a site local config file
1543 ########################################################
1544
1545 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1546         . "$CTDB_RC_LOCAL"
1547 }
1548
1549 [ -x $CTDB_BASE/rc.local ] && {
1550         . $CTDB_BASE/rc.local
1551 }
1552
1553 [ -d $CTDB_BASE/rc.local.d ] && {
1554         for i in $CTDB_BASE/rc.local.d/* ; do
1555                 [ -x "$i" ] && . "$i"
1556         done
1557 }
1558
1559 script_name="${0##*/}"       # basename
1560 service_fail_limit=1
1561 event_name="$1"