apache's service name is not always httpd
[ctdb.git] / config / functions
index 2c78b396f88f102c687cdb9188e63375bc77ccdb..c1891ba17ecbed08d31b648155dac6b089e01c47 100755 (executable)
@@ -1,10 +1,19 @@
+# Hey Emacs, this is a -*- shell-script -*- !!!
+
 # utility functions for ctdb event scripts
 
 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
 
+[ -z "$CTDB_VARDIR" ] && {
+    export CTDB_VARDIR="/var/ctdb"
+}
+[ -z "$CTDB_ETCDIR" ] && {
+    export CTDB_ETCDIR="/etc"
+}
+
 #######################################
 # pull in a system config file, if any
-loadconfig() {
+_loadconfig() {
 
     if [ -z "$1" ] ; then
        foo="${service_config:-${service_name}}"
@@ -15,16 +24,84 @@ loadconfig() {
        loadconfig "ctdb"
     fi
 
-
-    if [ -f /etc/sysconfig/$1 ]; then
-       . /etc/sysconfig/$1
-    elif [ -f /etc/default/$1 ]; then
-       . /etc/default/$1
+    if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
+       . $CTDB_ETCDIR/sysconfig/$1
+    elif [ -f $CTDB_ETCDIR/default/$1 ]; then
+       . $CTDB_ETCDIR/default/$1
     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
        . $CTDB_BASE/sysconfig/$1
     fi
 }
 
+loadconfig () {
+    _loadconfig "$@"
+}
+
+##############################################################
+# make sure CTDB_CURRENT_DEBUGLEVEL is set to the desired debug level
+# (integer)
+#
+# If it is already set then do nothing, since it might have been set
+# via a file in rc.local.d/.  If it is not set then set it by sourcing
+# /var/ctdb/eventscript_debuglevel. If this file does not exist then
+# create it using output from "ctdb getdebug".  If the option 1st arg
+# is "create" then don't source an existing file but create a new one
+# instead - this is useful for creating the file just once in each
+# event run in 00.ctdb.  If there's a problem getting the debug level
+# from ctdb then it is silently set to 0 - no use spamming logs if our
+# debug code is broken...
+ctdb_set_current_debuglevel ()
+{
+    [ -z "$CTDB_CURRENT_DEBUGLEVEL" ] || return 0
+
+    _f="$CTDB_VARDIR/eventscript_debuglevel"
+
+    if [ "$1" = "create" -o ! -r "$_f" ] ; then
+       _t=$(ctdb getdebug -Y 2>/dev/null)
+       # get last field of output
+       _t="${_t%:}"
+       _t="${_t##*:}"
+       # Defaults to 0
+       echo "export CTDB_CURRENT_DEBUGLEVEL=\"${_t:-0}\"" >"$_f"
+    fi
+
+    . "$_f"
+}
+
+debug ()
+{
+    if [ $CTDB_CURRENT_DEBUGLEVEL -ge 4 ] ; then
+       # If there are arguments then echo them.  Otherwise expect to
+       # use stdin, which allows us to pass lots of debug using a
+       # here document.
+       if [ -n "$1" ] ; then
+           echo "DEBUG: $*"
+       elif ! tty -s ; then
+           sed -e 's@^@DEBUG: @'
+       fi
+    fi
+}
+
+##############################################################
+# check number of args for different events
+ctdb_check_args ()
+{
+    case "$1" in
+       takeip|releaseip)
+           if [ $# != 4 ]; then
+               echo "ERROR: must supply interface, IP and maskbits"
+               exit 1
+           fi
+           ;;
+       updateip)
+           if [ $# != 5 ]; then
+               echo "ERROR: must supply old interface, new interface, IP and maskbits"
+               exit 1
+           fi
+           ;;
+    esac
+}
+
 ##############################################################
 # determine on what type of system (init style) we are running
 detect_init_style() {
@@ -42,7 +119,10 @@ detect_init_style() {
 
 ######################################################
 # simulate /sbin/service on platforms that don't have it
-service() { 
+# _service() makes it easier to hook the service() function for
+# testing.
+_service ()
+{
   _service_name="$1"
   _op="$2"
 
@@ -50,104 +130,185 @@ service() {
   [ -z "$_service_name" ] && return
 
   if [ -x /sbin/service ]; then
-      /sbin/service "$_service_name" "$_op"
-  elif [ -x /etc/init.d/$_service_name ]; then
-      /etc/init.d/$_service_name "$_op"
-  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
-      /etc/rc.d/init.d/$_service_name "$_op"
+      $_nice /sbin/service "$_service_name" "$_op"
+  elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
+      $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
+  elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
+      $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
   fi
 }
 
+service()
+{
+    _nice=""
+    _service "$@"
+}
+
 ######################################################
 # simulate /sbin/service (niced) on platforms that don't have it
-nice_service() { 
-  _service_name="$1"
-  _op="$2"
-
-  # do nothing, when no service was specified
-  [ -z "$_service_name" ] && return
-
-  if [ -x /sbin/service ]; then
-      nice /sbin/service "$_service_name" "$_op"
-  elif [ -x /etc/init.d/$_service_name ]; then
-      nice /etc/init.d/$_service_name "$_op"
-  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
-      nice /etc/rc.d/init.d/$_service_name "$_op"
-  fi
+nice_service()
+{
+    _nice="nice"
+    _service "$@"
 }
 
 ######################################################
-# wait for a command to return a zero exit status
-# usage: ctdb_wait_command SERVICE_NAME <command>
-######################################################
-ctdb_wait_command() {
-  service_name="$1"
-  wait_cmd="$2"
-  [ -z "$wait_cmd" ] && return;
-  all_ok=0
-  echo "Waiting for service $service_name to start"
-  while [ $all_ok -eq 0 ]; do
-         $wait_cmd > /dev/null 2>&1 && all_ok=1
-         ctdb status > /dev/null 2>&1 || {
-               echo "ctdb daemon has died. Exiting wait for $service_name"
-               exit 1
-         }
-         [ $all_ok -eq 1 ] || sleep 1
-  done
-  echo "Local service $service_name is up"
+# wrapper around /proc/ settings to allow them to be hooked
+# for testing
+# 1st arg is relative path under /proc/, 2nd arg is value to set
+set_proc ()
+{
+    echo "$2" >"/proc/$1"
 }
 
+######################################################
+# wrapper around getting file contents from /proc/ to allow
+# this to be hooked for testing
+# 1st arg is relative path under /proc/
+get_proc ()
+{
+    cat "/proc/$1"
+}
 
 ######################################################
-# wait for a set of tcp ports
-# usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
+# Check that an RPC service is healthy -
+# this includes allowing a certain number of failures
+# before marking the NFS service unhealthy.
+#
+# usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
+#
+# each triple is a set of 3 arguments: an operator, a 
+# fail count limit and an action string.
+#
+# For example:
+#
+#      nfs_check_rpc_service "lockd" \
+#          -ge 15 "verbose restart unhealthy" \
+#          -eq 10 "restart:bs"
+#
+# says that if lockd is down for 15 iterations then do
+# a verbose restart of lockd and mark the node unhealthy.
+# Before this, after 10 iterations of failure, the
+# service is restarted silently in the background.
+# Order is important: the number of failures need to be
+# specified in reverse order because processing stops
+# after the first condition that is true.
 ######################################################
-ctdb_wait_tcp_ports() {
-  service_name="$1"
-  shift
-  wait_ports="$*"
-  [ -z "$wait_ports" ] && return;
-  all_ok=0
-  echo "Waiting for tcp service $service_name to start"
-  while [ $all_ok -eq 0 ]; do
-         all_ok=1
-         for p in $wait_ports; do
-             if [ -x /usr/bin/netcat ]; then
-                 /usr/bin/netcat -z 127.0.0.1 $p > /dev/null || all_ok=0
-             elif [ -x /usr/bin/nc ]; then
-                 /usr/bin/nc -z 127.0.0.1 $p > /dev/null || all_ok=0
-             elif [ -x /usr/bin/netstat ]; then
-                 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
-             elif [ -x /bin/netstat ]; then
-                 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
-             else 
-                 echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
-                 return 127
-             fi
-         done
-         [ $all_ok -eq 1 ] || sleep 1
-         ctdb status > /dev/null 2>&1 || {
-               echo "ctdb daemon has died. Exiting tcp wait $service_name"
-               return 1
-         }
-  done
-  echo "Local tcp services for $service_name are up"
-}
+nfs_check_rpc_service ()
+{
+    _prog_name="$1" ; shift
+
+    _version=1
+    _rpc_prog="$_prog_name"
+    _restart=""
+    _opts=""
+    case "$_prog_name" in
+       knfsd)
+           _rpc_prog=nfs
+           _version=3
+           _restart="echo 'Trying to restart NFS service'"
+           _restart="${_restart}; startstop_nfs restart"
+           ;;
+       mountd)
+           _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+           ;;
+       rquotad)
+           _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
+           ;;
+       lockd)
+           _rpc_prog=nlockmgr
+           _version=4
+           _restart="echo 'Trying to restart lock manager service'"
+           _restart="${_restart}; startstop_nfslock restart"
+           ;;
+       statd)
+           _rpc_prog=status
+           _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
+           _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
+           _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
+           ;;
+       *)
+           echo "Internal error: unknown RPC program \"$_prog_name\"."
+           exit 1
+    esac
+
+    _service_name="nfs_${_prog_name}"
+
+    if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
+       ctdb_counter_init "$_service_name"
+       return 0
+    fi
 
+    ctdb_counter_incr "$_service_name"
+
+    while [ -n "$3" ] ; do
+       ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
+           for _action in $3 ; do
+               case "$_action" in
+                   verbose)
+                       echo "$ctdb_check_rpc_out"
+                       ;;
+                   restart|restart:*)
+                       # No explicit command specified, construct rpc command.
+                       if [ -z "$_restart" ] ; then
+                           _p="rpc.${_prog_name}"
+                           _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
+                           _restart="${_restart}; killall -q -9 $_p"
+                           _restart="${_restart}; $_p $_opts"
+                       fi
+
+                       # Process restart flags...
+                       _flags="${_action#restart:}"
+                       # There may not have been a colon...
+                       [ "$_flags" != "$_action" ] || _flags=""
+                       # q=quiet - everything to /dev/null
+                       if [ "${_flags#*q}" != "$_flags" ] ; then
+                           _restart="{ ${_restart} ; } >/dev/null 2>&1"
+                       fi
+                       # s=stealthy - last command to /dev/null
+                       if [ "${_flags#*s}" != "$_flags" ] ; then
+                           _restart="${_restart} >/dev/null 2>&1"
+                       fi
+                       # b=background - the whole thing, easy and reliable
+                       if [ "${_flags#*b}" != "$_flags" ] ; then
+                           _restart="{ ${_restart} ; } &"
+                       fi
+
+                       # Do it!
+                       eval "${_restart}"
+                       ;;
+                   unhealthy)
+                       exit 1
+                       ;;
+                   *)
+                       echo "Internal error: unknown action \"$_action\"."
+                       exit 1
+               esac
+           done
+
+           # Only process the first action group.
+           break
+       }
+       shift 3
+    done
+}
 
 ######################################################
 # check that a rpc server is registered with portmap
 # and responding to requests
-# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
+# usage: ctdb_check_rpc SERVICE_NAME VERSION
 ######################################################
-ctdb_check_rpc() {
+ctdb_check_rpc ()
+{
     progname="$1"
-    prognum="$2"
-    version="$3"
-    rpcinfo -u localhost $prognum $version > /dev/null || {
-           echo "ERROR: $progname not responding to rpc requests"
-           exit 1
-    }
+    version="$2"
+
+    if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
+       ctdb_check_rpc_out="ERROR: $progname failed RPC check:
+$ctdb_check_rpc_out"
+       echo "$ctdb_check_rpc_out"
+       return 1
+    fi
 }
 
 ######################################################
@@ -162,7 +323,7 @@ ctdb_check_directories_probe() {
                continue
                ;;
            *)
-               [ -d "$d" ] || return 1
+               [ -d "${d}/." ] || return 1
        esac
     done
 }
@@ -183,16 +344,168 @@ ctdb_check_directories() {
 # check a set of tcp ports
 # usage: ctdb_check_tcp_ports <ports...>
 ######################################################
-ctdb_check_tcp_ports() {
 
-    for p ; do
-       if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
-            if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
-               echo "ERROR: $service_name tcp port $p is not responding"
+# This flag file is created when a service is initially started.  It
+# is deleted the first time TCP port checks for that service succeed.
+# Until then ctdb_check_tcp_ports() prints a more subtle "error"
+# message if a port check fails.
+_ctdb_check_tcp_common ()
+{
+    _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
+}
+
+ctdb_check_tcp_init ()
+{
+    _ctdb_check_tcp_common
+    mkdir -p "${_ctdb_service_started_file%/*}" # dirname
+    touch "$_ctdb_service_started_file"
+}
+
+ctdb_check_tcp_ports()
+{
+    if [ -z "$1" ] ; then
+       echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
+       exit 1
+    fi
+
+    # Set default value for CTDB_TCP_PORT_CHECKS if unset.
+    # If any of these defaults are unsupported then this variable can
+    # be overridden in /etc/sysconfig/ctdb or via a file in
+    # /etc/ctdb/rc.local.d/.
+    : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
+
+    for _c in $CTDB_TCP_PORT_CHECKERS ; do
+       ctdb_check_tcp_ports_$_c "$@"
+       case "$?" in
+           0)
+               _ctdb_check_tcp_common
+               rm -f "$_ctdb_service_started_file"
+               return 0
+               ;;
+           1)
+               _ctdb_check_tcp_common
+               if [ ! -f "$_ctdb_service_started_file" ] ; then
+                   echo "ERROR: $service_name tcp port $_p is not responding"
+                   debug <<EOF
+$ctdb_check_tcp_ports_debug
+EOF
+               else
+                   echo "INFO: $service_name tcp port $_p is not responding"
+               fi
+
                return 1
-            fi
-       fi
+               ;;
+           127)
+               debug <<EOF
+ctdb_check_ports - checker $_c not implemented
+output from checker was:
+$ctdb_check_tcp_ports_debug
+EOF
+               ;;
+           *)
+               
+       esac
     done
+
+    echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
+
+    return 127
+}
+
+ctdb_check_tcp_ports_netstat ()
+{
+    _cmd='netstat -l -t -n'
+    _ns=$($_cmd 2>&1)
+    if [ $? -eq 127 ] ; then
+       # netstat probably not installed - unlikely?
+       ctdb_check_tcp_ports_debug="$_ns"
+       return 127
+    fi
+
+    for _p ; do  # process each function argument (port)
+       for _a in '0\.0\.0\.0' '::' ; do
+           _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
+           if echo "$_ns" | grep -E -q "$_pat" ; then
+               # We matched the port, so process next port
+               continue 2
+           fi
+       done
+
+       # We didn't match the port, so flag an error.
+       ctdb_check_tcp_ports_debug="$_cmd shows this output:
+$_ns"
+       return 1
+    done
+
+    return 0
+}
+
+ctdb_check_tcp_ports_nmap ()
+{
+    # nmap wants a comma-separated list of ports
+    _ports=""
+    for _p ; do
+       _ports="${_ports}${_ports:+,}${_p}"
+    done
+
+    _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
+
+    _nmap_out=$($_cmd 2>&1)
+    if [ $? -eq 127 ] ; then
+       # nmap probably not installed
+       ctdb_check_tcp_ports_debug="$_nmap_out"
+       return 127
+    fi
+
+    # get the port-related output
+    _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
+
+    for _p ; do
+       # looking for something like this:
+       #  445/open/tcp//microsoft-ds///
+       # possibly followed by a comma
+       _t="$_p/open/tcp//"
+       case "$_port_info" in
+           # The info we're after must be either at the beginning of
+           # the string or it must follow a space.
+            $_t*|*\ $_t*) : ;;
+           *)
+               # Nope, flag an error...
+               ctdb_check_tcp_ports_debug="$_cmd shows this output:
+$_nmap_out"
+               return 1
+       esac
+    done
+
+    return 0
+}
+
+# Use the new "ctdb checktcpport" command to check the port.
+# This is very cheap.
+ctdb_check_tcp_ports_ctdb ()
+{
+    for _p ; do  # process each function argument (port)
+       _cmd="ctdb checktcpport $_p"
+       _out=$($_cmd 2>&1)
+       _ret=$?
+       case "$_ret" in
+           0)
+               ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
+               return 1
+               ;;
+           98)
+               # Couldn't bind, something already listening, next port...
+               continue
+               ;;
+           *)
+               ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
+$_out"
+               # assume not implemented
+               return 127
+       esac
+    done
+
+    return 0
 }
 
 ######################################################
@@ -231,7 +544,7 @@ kill_tcp_connections() {
     _failed=0
 
     _killcount=0
-    connfile="$CTDB_BASE/state/connections.$_IP"
+    connfile="$CTDB_VARDIR/state/connections.$_IP"
     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 
@@ -252,7 +565,7 @@ kill_tcp_connections() {
        esac
        _killcount=`expr $_killcount + 1`
      done < $connfile
-    /bin/rm -f $connfile
+    rm -f $connfile
 
     [ $_failed = 0 ] || {
        echo "Failed to send killtcp control"
@@ -281,7 +594,7 @@ kill_tcp_connections_local_only() {
     _failed=0
 
     _killcount=0
-    connfile="$CTDB_BASE/state/connections.$_IP"
+    connfile="$CTDB_VARDIR/state/connections.$_IP"
     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 
@@ -294,7 +607,7 @@ kill_tcp_connections_local_only() {
        ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
        _killcount=`expr $_killcount + 1`
      done < $connfile
-    /bin/rm -f $connfile
+    rm -f $connfile
 
     [ $_failed = 0 ] || {
        echo "Failed to send killtcp control"
@@ -323,7 +636,7 @@ tickle_tcp_connections() {
     _failed=0
 
     _killcount=0
-    connfile="$CTDB_BASE/state/connections.$_IP"
+    connfile="$CTDB_VARDIR/state/connections.$_IP"
     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
 
@@ -337,7 +650,7 @@ tickle_tcp_connections() {
        echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
        ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
      done < $connfile
-    /bin/rm -f $connfile
+    rm -f $connfile
 
     [ $_failed = 0 ] || {
        echo "Failed to send tickle control"
@@ -350,10 +663,10 @@ tickle_tcp_connections() {
 ########################################################
 startstop_nfs() {
        PLATFORM="unknown"
-       [ -x /etc/init.d/nfsserver ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
                PLATFORM="sles"
        }
-       [ -x /etc/init.d/nfslock ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
                PLATFORM="rhel"
        }
 
@@ -366,6 +679,12 @@ startstop_nfs() {
                stop)
                        service nfsserver stop > /dev/null 2>&1
                        ;;
+               restart)
+                       set_proc "fs/nfsd/threads" 0
+                       service nfsserver stop > /dev/null 2>&1
+                       pkill -9 nfsd
+                       service nfsserver start
+                       ;;
                esac
                ;;
        rhel)
@@ -375,8 +694,16 @@ startstop_nfs() {
                        service nfs start
                        ;;
                stop)
+                       service nfs stop
+                       service nfslock stop
+                       ;;
+               restart)
+                       set_proc "fs/nfsd/threads" 0
                        service nfs stop > /dev/null 2>&1
                        service nfslock stop > /dev/null 2>&1
+                       pkill -9 nfsd
+                       service nfslock start
+                       service nfs start
                        ;;
                esac
                ;;
@@ -392,10 +719,10 @@ startstop_nfs() {
 ########################################################
 startstop_nfslock() {
        PLATFORM="unknown"
-       [ -x /etc/init.d/nfsserver ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
                PLATFORM="sles"
        }
-       [ -x /etc/init.d/nfslock ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
                PLATFORM="rhel"
        }
 
@@ -410,6 +737,10 @@ startstop_nfslock() {
                stop)
                        service nfsserver stop > /dev/null 2>&1
                        ;;
+               restart)
+                       service nfsserver stop
+                       service nfsserver start
+                       ;;
                esac
                ;;
        rhel)
@@ -420,6 +751,10 @@ startstop_nfslock() {
                stop)
                        service nfslock stop > /dev/null 2>&1
                        ;;
+               restart)
+                       service nfslock stop
+                       service nfslock start
+                       ;;
                esac
                ;;
        *)
@@ -429,24 +764,12 @@ startstop_nfslock() {
        esac
 }
 
-# better use delete_ip_from_iface() together with add_ip_to_iface
-# remove_ip should be removed in future
-remove_ip() {
-       local _ip_maskbits=$1
-       local _iface=$2
-       local _ip=`echo "$_ip_maskbits" | cut -d '/' -f1`
-       local _maskbits=`echo "$_ip_maskbits" | cut -d '/' -f2`
-
-       delete_ip_from_iface "$_iface" "$_ip" "$_maskbits"
-       return $?
-}
-
 add_ip_to_iface()
 {
        local _iface=$1
        local _ip=$2
        local _maskbits=$3
-       local _state_dir="$CTDB_BASE/state/interface_modify"
+       local _state_dir="$CTDB_VARDIR/state/interface_modify"
        local _lockfile="$_state_dir/$_iface.flock"
        local _readd_base="$_state_dir/$_iface.readd.d"
 
@@ -469,7 +792,7 @@ delete_ip_from_iface()
        local _iface=$1
        local _ip=$2
        local _maskbits=$3
-       local _state_dir="$CTDB_BASE/state/interface_modify"
+       local _state_dir="$CTDB_VARDIR/state/interface_modify"
        local _lockfile="$_state_dir/$_iface.flock"
        local _readd_base="$_state_dir/$_iface.readd.d"
 
@@ -493,7 +816,7 @@ setup_iface_ip_readd_script()
        local _ip=$2
        local _maskbits=$3
        local _readd_script=$4
-       local _state_dir="$CTDB_BASE/state/interface_modify"
+       local _state_dir="$CTDB_VARDIR/state/interface_modify"
        local _lockfile="$_state_dir/$_iface.flock"
        local _readd_base="$_state_dir/$_iface.readd.d"
 
@@ -519,16 +842,17 @@ setup_iface_ip_readd_script()
 # ctdb_check_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
-    _counter_file="$ctdb_fail_dir/$service_name"
+    _service_name="${1:-${service_name}}"
+    _counter_file="$ctdb_fail_dir/$_service_name"
     mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
-    _ctdb_counter_common
+    _ctdb_counter_common "$1"
 
     >"$_counter_file"
 }
 ctdb_counter_incr () {
-    _ctdb_counter_common
+    _ctdb_counter_common "$1"
 
     # unary counting!
     echo -n 1 >> "$_counter_file"
@@ -548,12 +872,83 @@ ctdb_check_counter_limit () {
        echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
     fi
 }
+ctdb_check_counter_equal () {
+    _ctdb_counter_common
+
+    _limit=$1
+
+    # unary counting!
+    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+    if [ $_size -eq $_limit ] ; then
+       return 1
+    fi
+    return 0
+}
+ctdb_check_counter () {
+    _msg="${1:-error}"  # "error"  - anything else is silent on fail
+    _op="${2:--ge}"  # an integer operator supported by test
+    _limit="${3:-${service_fail_limit}}"
+    shift 3
+    _ctdb_counter_common "$1"
+
+    # unary counting!
+    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+    if [ $_size $_op $_limit ] ; then
+       if [ "$_msg" = "error" ] ; then
+           echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
+           exit 1              
+       else
+           return 1
+       fi
+    fi
+}
+
 ########################################################
 
-ctdb_spool_dir="/var/spool/ctdb"
-ctdb_status_dir="$ctdb_spool_dir/status"
-ctdb_fail_dir="$ctdb_spool_dir/failcount"
-ctdb_active_dir="$ctdb_spool_dir/active"
+ctdb_status_dir="$CTDB_VARDIR/status"
+ctdb_fail_dir="$CTDB_VARDIR/failcount"
+
+ctdb_setup_service_state_dir ()
+{
+    service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
+    mkdir -p "$service_state_dir" || {
+       echo "Error creating state dir \"$service_state_dir\""
+       exit 1
+    }
+}
+
+########################################################
+# Managed status history, for auto-start/stop
+
+ctdb_managed_dir="$CTDB_VARDIR/managed_history"
+
+_ctdb_managed_common ()
+{
+    _service_name="${1:-${service_name}}"
+    _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
+}
+
+ctdb_service_managed ()
+{
+    _ctdb_managed_common "$@"
+    mkdir -p "$ctdb_managed_dir"
+    touch "$_ctdb_managed_file"
+}
+
+ctdb_service_unmanaged ()
+{
+    _ctdb_managed_common "$@"
+    rm -f "$_ctdb_managed_file"
+}
+
+is_ctdb_previously_managed_service ()
+{
+    _ctdb_managed_common "$@"
+    [ -f "$_ctdb_managed_file" ]
+}
+
+########################################################
+# Check and set status
 
 log_status_cat ()
 {
@@ -589,99 +984,281 @@ ctdb_setstatus ()
     esac
 }
 
+##################################################################
+# Reconfigure a service on demand
+
+_ctdb_service_reconfigure_common ()
+{
+    _d="$ctdb_status_dir/${1:-${service_name}}"
+    mkdir -p "$_d"
+    _ctdb_service_reconfigure_flag="$_d/reconfigure"
+}
+
 ctdb_service_needs_reconfigure ()
 {
-    [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
+    _ctdb_service_reconfigure_common "$@"
+    [ -e "$_ctdb_service_reconfigure_flag" ]
 }
 
 ctdb_service_set_reconfigure ()
 {
-    d="$ctdb_status_dir/$service_name"
-    mkdir -p "$d"
-    >"$d/reconfigure"
+    _ctdb_service_reconfigure_common "$@"
+    >"$_ctdb_service_reconfigure_flag"
 }
 
 ctdb_service_unset_reconfigure ()
 {
-    rm -f "$ctdb_status_dir/$service_name/reconfigure"
+    _ctdb_service_reconfigure_common "$@"
+    rm -f "$_ctdb_service_reconfigure_flag"
 }
 
 ctdb_service_reconfigure ()
 {
-    if [ -n "$service_reconfigure" ] ; then
-       eval $service_reconfigure
+    echo "Reconfiguring service \"$@\"..."
+    ctdb_service_unset_reconfigure "$@"
+    service_reconfigure "$@" || return $?
+    ctdb_counter_init "$@"
+}
+
+# Default service_reconfigure() function.
+service_reconfigure ()
+{
+    service "${1:-$service_name}" restart
+}
+
+ctdb_reconfigure_try_lock ()
+{
+    
+    _ctdb_service_reconfigure_common "$@"
+    _lock="${_d}/reconfigure_lock"
+    touch "$_lock"
+
+    (
+       flock 0
+       # This is overkill but will work if we need to extend this to
+       # allow certain events to run multiple times in parallel
+       # (e.g. takeip) and write multiple PIDs to the file.
+       read _locker_event 
+       if [ -n "$_locker_event" ] ; then
+           while read _pid ; do
+               if [ -n "$_pid" -a "$_pid" != $$ ] && \
+                   kill -0 "$_pid" 2>/dev/null ; then
+                   exit 1
+               fi
+           done
+       fi
+
+       printf "%s\n%s\n" "$event_name" $$ >"$_lock"
+       exit 0
+    ) <"$_lock"
+}
+
+ctdb_replay_monitor_status ()
+{
+    echo "Replaying previous status for this script due to reconfigure..."
+    # Leading colon (':') is missing in some versions...
+    _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
+    # Output looks like this:
+    # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
+    # This is the cheapest way of getting fields in the middle.
+    set -- $(IFS=":" ; echo $_out)
+    _code="$3"
+    _status="$4"
+    # The error output field can include colons so we'll try to
+    # preserve them.  The weak checking at the beginning tries to make
+    # this work for both broken (no leading ':') and fixed output.
+    _out="${_out%:}"
+    _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
+    case "$_status" in
+       OK) : ;;  # Do nothing special.
+       TIMEDOUT)
+           # Recast this as an error, since we can't exit with the
+           # correct negative number.
+           _code=1
+           _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
+           ;;
+       DISABLED)
+           # Recast this as an OK, since we can't exit with the
+           # correct negative number.
+           _code=0
+           _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
+           ;;
+       *) : ;;  # Must be ERROR, do nothing special.
+    esac
+    echo "$_err_out"
+    exit $_code
+}
+
+ctdb_service_check_reconfigure ()
+{
+    [ -n "$1" ] || set -- "$service_name"
+
+    # We only care about some events in this function.  For others we
+    # return now.
+    case "$event_name" in
+       monitor|ipreallocated|reconfigure) : ;;
+       *) return 0 ;;
+    esac
+
+    if ctdb_reconfigure_try_lock "$@" ; then
+       # No events covered by this function are running, so proceed
+       # with gay abandon.
+       case "$event_name" in
+           reconfigure)
+               (ctdb_service_reconfigure "$@")
+               exit $?
+               ;;
+           ipreallocated)
+               if ctdb_service_needs_reconfigure "$@" ; then
+                   ctdb_service_reconfigure "$@"
+               fi
+               ;;
+           monitor)
+               if ctdb_service_needs_reconfigure "$@" ; then
+                   ctdb_service_reconfigure "$@"
+                   # Given that the reconfigure might not have
+                   # resulted in the service being stable yet, we
+                   # replay the previous status since that's the best
+                   # information we have.
+                   ctdb_replay_monitor_status
+               fi
+               ;;
+       esac
     else
-       service "$service_name" restart
+       # Somebody else is running an event we don't want to collide
+       # with.  We proceed with caution.
+       case "$event_name" in
+           reconfigure)
+               # Tell whoever called us to retry.
+               exit 2
+               ;;
+           ipreallocated)
+               # Defer any scheduled reconfigure and just run the
+               # rest of the ipreallocated event, as per the
+               # eventscript.  There's an assumption here that the
+               # event doesn't depend on any scheduled reconfigure.
+               # This is true in the current code.
+               return 0
+               ;;
+           monitor)
+               # There is most likely a reconfigure in progress so
+               # the service is possibly unstable.  As above, we
+               # defer any scheduled reconfigured.  We also replay
+               # the previous monitor status since that's the best
+               # information we have.
+               ctdb_replay_monitor_status
+               ;;
+       esac
     fi
-    ctdb_service_unset_reconfigure
-    ctdb_counter_init
 }
 
+##################################################################
+# Does CTDB manage this service? - and associated auto-start/stop
+
 ctdb_compat_managed_service ()
 {
-    if [ "$1" = "yes" ] ; then
-       t="$t $2 "
+    if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
+       CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
     fi
 }
 
 is_ctdb_managed_service ()
 {
+    _service_name="${1:-${service_name}}"
+
+    # $t is used just for readability and to allow better accurate
+    # matching via leading/trailing spaces
     t=" $CTDB_MANAGED_SERVICES "
 
+    # Return 0 if "<space>$_service_name<space>" appears in $t
+    if [ "${t#* ${_service_name} }" != "${t}" ] ; then
+       return 0
+    fi
+
+    # If above didn't match then update $CTDB_MANAGED_SERVICES for
+    # backward compatibility and try again.
     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
-    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "apache2"
     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
+    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
 
-    # Returns 0 if "<space>$service_name<space>" appears in $t
-    [ "${t#* ${service_name} }" != "${t}" ]
+    t=" $CTDB_MANAGED_SERVICES "
+
+    # Return 0 if "<space>$_service_name<space>" appears in $t
+    [ "${t#* ${_service_name} }" != "${t}" ]
 }
 
 ctdb_start_stop_service ()
 {
-    _active="$ctdb_active_dir/$service_name"
+    # Do nothing unless configured to...
+    [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
+
+    _service_name="${1:-${service_name}}"
+
+    [ "$event_name" = "monitor" ] || return 0
 
-    if is_ctdb_managed_service ; then
-       if ! [ -e "$_active" ] ; then
-           echo "Starting service $service_name"
-           ctdb_service_start || exit $?
-           mkdir -p "$ctdb_active_dir"
-           touch "$_active"
-           exit 0
+    if is_ctdb_managed_service "$_service_name" ; then
+       if ! is_ctdb_previously_managed_service "$_service_name" ; then
+           echo "Starting service \"$_service_name\" - now managed"
+           ctdb_service_start "$_service_name"
+           exit $?
        fi
-    elif ! is_ctdb_managed_service ; then
-       if [ -e "$_active" ] ; then
-           echo "Stopping service $service_name"
-           ctdb_service_stop || exit $?
-           rm -f "$_active"
+    else
+       if is_ctdb_previously_managed_service "$_service_name" ; then
+           echo "Stopping service \"$_service_name\" - no longer managed"
+           ctdb_service_stop "$_service_name"
+           exit $?
        fi
-       exit 0
     fi
 }
 
 ctdb_service_start ()
 {
-    if [ -n "$service_start" ] ; then
-       eval $service_start
-    else
-       service "$service_name" start
-    fi
-    ctdb_counter_init
+    # The service is marked managed if we've ever tried to start it.
+    ctdb_service_managed "$@"
+
+    # Here we only want $1.  If no argument is passed then
+    # service_start needs to know.
+    service_start "$@" || return $?
+
+    ctdb_counter_init "$@"
+    ctdb_check_tcp_init
 }
 
 ctdb_service_stop ()
 {
-    if [ -n "$service_stop" ] ; then
-       eval $service_stop
-    else
-       service "$service_name" stop
-    fi
+    ctdb_service_unmanaged "$@"
+    service_stop "$@"
 }
 
+# Default service_start() and service_stop() functions.
+# These may be overridden in an eventscript.  When overriding, the
+# following convention must be followed.  If these functions are
+# called with no arguments then they may use internal logic to
+# determine whether the service is managed and, therefore, whether
+# they should take any action.  However, if the service name is
+# specified as an argument then an attempt must be made to start or
+# stop the service.  This is because the auto-start/stop code calls
+# them with the service name as an argument.
+service_start ()
+{
+    service "${1:-${service_name}}" start
+}
+
+service_stop ()
+{
+    service "${1:-${service_name}}" stop
+}
+
+##################################################################
+
 ctdb_standard_event_handler ()
 {
     case "$1" in
@@ -761,10 +1338,75 @@ ipv4_is_valid_addr()
        return 0;
 }
 
+# iptables doesn't like being re-entered, so flock-wrap it.
+iptables()
+{
+       flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
+}
+
+########################################################
+# tickle handling
+########################################################
+
+# Temporary directory for tickles.
+tickledir="$CTDB_VARDIR/state/tickles"
+mkdir -p "$tickledir"
+
+update_tickles ()
+{
+       _port="$1"
+
+       mkdir -p "$tickledir" # Just in case
+
+       # Who am I?
+       _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
+
+       # What public IPs do I hold?
+       _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
+
+       # IPs as a regexp choice
+       _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
+
+       # Record connections to our public IPs in a temporary file
+       _my_connections="${tickledir}/${_port}.connections"
+       rm -f "$_my_connections"
+       netstat -tn |
+       awk -v destpat="^${_ipschoice}:${_port}\$" \
+         '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
+       sort >"$_my_connections"
+
+       # Record our current tickles in a temporary file
+       _my_tickles="${tickledir}/${_port}.tickles"
+       rm -f "$_my_tickles"
+       for _i in $_ips ; do
+               ctdb -Y gettickles $_i $_port | 
+               awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+       done |
+       sort >"$_my_tickles"
+
+       # Add tickles for connections that we haven't already got tickles for
+       comm -23 "$_my_connections" "$_my_tickles" |
+       while read _src _dst ; do
+               ctdb addtickle $_src $_dst
+       done
+
+       # Remove tickles for connections that are no longer there
+       comm -13 "$_my_connections" "$_my_tickles" |
+       while read _src _dst ; do
+               ctdb deltickle $_src $_dst
+       done
+
+       rm -f "$_my_connections" "$_my_tickles" 
+}
+
 ########################################################
 # load a site local config file
 ########################################################
 
+[ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
+       . "$CTDB_RC_LOCAL"
+}
+
 [ -x $CTDB_BASE/rc.local ] && {
        . $CTDB_BASE/rc.local
 }
@@ -775,6 +1417,12 @@ ipv4_is_valid_addr()
        done
 }
 
+# We'll call this here to ensure $CTDB_CURRENT_DEBUGLEVEL is set.
+# This gives us a chance to override the debug level using a file in
+# $CTDB_BASE/rc.local.d/.
+ctdb_set_current_debuglevel
+
 script_name="${0##*/}"       # basename
 service_name="$script_name"  # default is just the script name
 service_fail_limit=1
+event_name="$1"