Eventscripts: In 60.nfs don't restart NFS when restarting rpc.lockd.

[ctdb.git] / config / functions
diff --git a/config/functions b/config/functions

old mode 100644 (file)

new mode 100755 (executable)

index 87026ac..b049652
--- a/config/functions
+++ b/config/functions
@@ -1,31 +1,42 @@
+# Hey Emacs, this is a -*- shell-script -*- !!!
+
  # utility functions for ctdb event scripts
  
  PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
  
+[ -z "$CTDB_VARDIR" ] && {
+    export CTDB_VARDIR="/var/ctdb"
+}
+[ -z "$CTDB_ETCDIR" ] && {
+    export CTDB_ETCDIR="/etc"
+}
+
  #######################################
  # pull in a system config file, if any
-loadconfig() {
-
-    if [ "$1" != "ctdb" ] ; then
-       loadconfig "ctdb"
-    fi
+_loadconfig() {
  
      if [ -z "$1" ] ; then
         foo="${service_config:-${service_name}}"
         if [ -n "$foo" ] ; then
             loadconfig "$foo"
         fi
+    elif [ "$1" != "ctdb" ] ; then
+       loadconfig "ctdb"
      fi
  
-    if [ -f /etc/sysconfig/$1 ]; then
-       . /etc/sysconfig/$1
-    elif [ -f /etc/default/$1 ]; then
-       . /etc/default/$1
+    if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
+       . $CTDB_ETCDIR/sysconfig/$1
+    elif [ -f $CTDB_ETCDIR/default/$1 ]; then
+       . $CTDB_ETCDIR/default/$1
      elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
         . $CTDB_BASE/sysconfig/$1
      fi
  }
  
+loadconfig () {
+    _loadconfig "$@"
+}
+
  ##############################################################
  # determine on what type of system (init style) we are running
  detect_init_style() {
@@ -43,7 +54,10 @@ detect_init_style() {
  
  ######################################################
  # simulate /sbin/service on platforms that don't have it
-service() { 
+# _service() makes it easier to hook the service() function for
+# testing.
+_service ()
+{
    _service_name="$1"
    _op="$2"
  
@@ -51,95 +65,185 @@ service() {
    [ -z "$_service_name" ] && return
  
    if [ -x /sbin/service ]; then
-      /sbin/service "$_service_name" "$_op"
-  elif [ -x /etc/init.d/$_service_name ]; then
-      /etc/init.d/$_service_name "$_op"
-  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
-      /etc/rc.d/init.d/$_service_name "$_op"
+      $_nice /sbin/service "$_service_name" "$_op"
+  elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
+      $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
+  elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
+      $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
    fi
  }
  
+service()
+{
+    _nice=""
+    _service "$@"
+}
+
  ######################################################
  # simulate /sbin/service (niced) on platforms that don't have it
-nice_service() { 
-  # do nothing, when no service was specified
-  [ -z "$1" ] && return
-
-    nice service "$@"
+nice_service()
+{
+    _nice="nice"
+    _service "$@"
  }
  
  ######################################################
-# wait for a command to return a zero exit status
-# usage: ctdb_wait_command SERVICE_NAME <command>
-######################################################
-ctdb_wait_command() {
-  service_name="$1"
-  wait_cmd="$2"
-  [ -z "$wait_cmd" ] && return;
-  all_ok=0
-  echo "Waiting for service $service_name to start"
-  while [ $all_ok -eq 0 ]; do
-         $wait_cmd > /dev/null 2>&1 && all_ok=1
-         ctdb status > /dev/null 2>&1 || {
-               echo "ctdb daemon has died. Exiting wait for $service_name"
-               exit 1
-         }
-         [ $all_ok -eq 1 ] || sleep 1
-  done
-  echo "Local service $service_name is up"
+# wrapper around /proc/ settings to allow them to be hooked
+# for testing
+# 1st arg is relative path under /proc/, 2nd arg is value to set
+set_proc ()
+{
+    echo "$2" >"/proc/$1"
  }
  
+######################################################
+# wrapper around getting file contents from /proc/ to allow
+# this to be hooked for testing
+# 1st arg is relative path under /proc/
+get_proc ()
+{
+    cat "/proc/$1"
+}
  
  ######################################################
-# wait for a set of tcp ports
-# usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
+# Check that an RPC service is healthy -
+# this includes allowing a certain number of failures
+# before marking the NFS service unhealthy.
+#
+# usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
+#
+# each triple is a set of 3 arguments: an operator, a 
+# fail count limit and an action string.
+#
+# For example:
+#
+#      nfs_check_rpc_service "lockd" \
+#          -ge 15 "verbose restart unhealthy" \
+#          -eq 10 "restart:bs"
+#
+# says that if lockd is down for 15 iterations then do
+# a verbose restart of lockd and mark the node unhealthy.
+# Before this, after 10 iterations of failure, the
+# service is restarted silently in the background.
+# Order is important: the number of failures need to be
+# specified in reverse order because processing stops
+# after the first condition that is true.
  ######################################################
-ctdb_wait_tcp_ports() {
-  service_name="$1"
-  shift
-  wait_ports="$*"
-  [ -z "$wait_ports" ] && return;
-  all_ok=0
-  echo "Waiting for tcp service $service_name to start"
-  while [ $all_ok -eq 0 ]; do
-         all_ok=1
-         for p in $wait_ports; do
-             if [ -x /usr/bin/netcat ]; then
-                 /usr/bin/netcat -z 127.0.0.1 $p > /dev/null || all_ok=0
-             elif [ -x /usr/bin/nc ]; then
-                 /usr/bin/nc -z 127.0.0.1 $p > /dev/null || all_ok=0
-             elif [ -x /usr/bin/netstat ]; then
-                 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
-             elif [ -x /bin/netstat ]; then
-                 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
-             else 
-                 echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
-                 return 127
-             fi
-         done
-         [ $all_ok -eq 1 ] || sleep 1
-         ctdb status > /dev/null 2>&1 || {
-               echo "ctdb daemon has died. Exiting tcp wait $service_name"
-               return 1
-         }
-  done
-  echo "Local tcp services for $service_name are up"
-}
+nfs_check_rpc_service ()
+{
+    _prog_name="$1" ; shift
  
+    _version=1
+    _rpc_prog="$_prog_name"
+    _restart=""
+    _opts=""
+    case "$_prog_name" in
+       knfsd)
+           _rpc_prog=nfs
+           _version=3
+           _restart="echo 'Trying to restart NFS service'"
+           _restart="${_restart}; startstop_nfs restart"
+           ;;
+       mountd)
+           _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+           ;;
+       rquotad)
+           _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
+           ;;
+       lockd)
+           _rpc_prog=nlockmgr
+           _version=4
+           _restart="echo 'Trying to restart lock manager service'"
+           _restart="${_restart}; startstop_nfslock restart"
+           ;;
+       statd)
+           _rpc_prog=status
+           _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
+           _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
+           _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
+           ;;
+       *)
+           echo "Internal error: unknown RPC program \"$_prog_name\"."
+           exit 1
+    esac
+
+    _service_name="nfs_${_prog_name}"
+
+    if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
+       ctdb_counter_init "$_service_name"
+       return 0
+    fi
+
+    ctdb_counter_incr "$_service_name"
+
+    while [ -n "$3" ] ; do
+       ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
+           for _action in $3 ; do
+               case "$_action" in
+                   verbose)
+                       echo "$ctdb_check_rpc_out"
+                       ;;
+                   restart|restart:*)
+                       # No explicit command specified, construct rpc command.
+                       if [ -z "$_restart" ] ; then
+                           _p="rpc.${_prog_name}"
+                           _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
+                           _restart="${_restart}; killall -q -9 $_p"
+                           _restart="${_restart}; $_p $_opts"
+                       fi
+
+                       # Process restart flags...
+                       _flags="${_action#restart:}"
+                       # There may not have been a colon...
+                       [ "$_flags" != "$_action" ] || _flags=""
+                       # q=quiet - everything to /dev/null
+                       if [ "${_flags#*q}" != "$_flags" ] ; then
+                           _restart="{ ${_restart} ; } >/dev/null 2>&1"
+                       fi
+                       # s=stealthy - last command to /dev/null
+                       if [ "${_flags#*s}" != "$_flags" ] ; then
+                           _restart="${_restart} >/dev/null 2>&1"
+                       fi
+                       # b=background - the whole thing, easy and reliable
+                       if [ "${_flags#*b}" != "$_flags" ] ; then
+                           _restart="{ ${_restart} ; } &"
+                       fi
+
+                       # Do it!
+                       eval "${_restart}"
+                       ;;
+                   unhealthy)
+                       exit 1
+                       ;;
+                   *)
+                       echo "Internal error: unknown action \"$_action\"."
+                       exit 1
+               esac
+           done
+
+           # Only process the first action group.
+           break
+       }
+       shift 3
+    done
+}
  
  ######################################################
  # check that a rpc server is registered with portmap
  # and responding to requests
-# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
+# usage: ctdb_check_rpc SERVICE_NAME VERSION
  ######################################################
-ctdb_check_rpc() {
+ctdb_check_rpc ()
+{
      progname="$1"
-    prognum="$2"
-    version="$3"
-    rpcinfo -u localhost $prognum $version > /dev/null || {
-           echo "ERROR: $progname not responding to rpc requests"
-           exit 1
-    }
+    version="$2"
+
+    if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
+       ctdb_check_rpc_out="ERROR: $progname failed RPC check:
+$ctdb_check_rpc_out"
+       echo "$ctdb_check_rpc_out"
+       return 1
+    fi
  }
  
  ######################################################
@@ -154,7 +258,7 @@ ctdb_check_directories_probe() {
                 continue
                 ;;
             *)
-               [ -d "$d" ] || return 1
+               [ -d "${d}/." ] || return 1
         esac
      done
  }
@@ -223,7 +327,7 @@ kill_tcp_connections() {
      _failed=0
  
      _killcount=0
-    connfile="$CTDB_BASE/state/connections.$_IP"
+    connfile="$CTDB_VARDIR/state/connections.$_IP"
      netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
      netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
  
@@ -244,7 +348,7 @@ kill_tcp_connections() {
         esac
         _killcount=`expr $_killcount + 1`
       done < $connfile
-    /bin/rm -f $connfile
+    rm -f $connfile
  
      [ $_failed = 0 ] || {
         echo "Failed to send killtcp control"
@@ -273,7 +377,7 @@ kill_tcp_connections_local_only() {
      _failed=0
  
      _killcount=0
-    connfile="$CTDB_BASE/state/connections.$_IP"
+    connfile="$CTDB_VARDIR/state/connections.$_IP"
      netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
      netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
  
@@ -286,7 +390,7 @@ kill_tcp_connections_local_only() {
         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
         _killcount=`expr $_killcount + 1`
       done < $connfile
-    /bin/rm -f $connfile
+    rm -f $connfile
  
      [ $_failed = 0 ] || {
         echo "Failed to send killtcp control"
@@ -307,15 +411,45 @@ kill_tcp_connections_local_only() {
      echo "killed $_killcount TCP connections to released IP $_IP"
  }
  
+##################################################################
+# tickle any TCP connections with the given IP
+##################################################################
+tickle_tcp_connections() {
+    _IP="$1"
+    _failed=0
+
+    _killcount=0
+    connfile="$CTDB_VARDIR/state/connections.$_IP"
+    netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
+    netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
+
+    while read dest src; do
+       srcip=`echo $src | sed -e "s/:[^:]*$//"`
+       srcport=`echo $src | sed -e "s/^.*://"`
+       destip=`echo $dest | sed -e "s/:[^:]*$//"`
+       destport=`echo $dest | sed -e "s/^.*://"`
+       echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
+       ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
+       echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
+       ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
+     done < $connfile
+    rm -f $connfile
+
+    [ $_failed = 0 ] || {
+       echo "Failed to send tickle control"
+       return;
+    }
+}
+
  ########################################################
  # start/stop the nfs service on different platforms
  ########################################################
  startstop_nfs() {
         PLATFORM="unknown"
-       [ -x /etc/init.d/nfsserver ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
                 PLATFORM="sles"
         }
-       [ -x /etc/init.d/nfslock ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
                 PLATFORM="rhel"
         }
  
@@ -328,6 +462,12 @@ startstop_nfs() {
                 stop)
                         service nfsserver stop > /dev/null 2>&1
                         ;;
+               restart)
+                       set_proc "fs/nfsd/threads" 0
+                       service nfsserver stop > /dev/null 2>&1
+                       pkill -9 nfsd
+                       service nfsserver start
+                       ;;
                 esac
                 ;;
         rhel)
@@ -337,8 +477,16 @@ startstop_nfs() {
                         service nfs start
                         ;;
                 stop)
+                       service nfs stop
+                       service nfslock stop
+                       ;;
+               restart)
+                       set_proc "fs/nfsd/threads" 0
                         service nfs stop > /dev/null 2>&1
                         service nfslock stop > /dev/null 2>&1
+                       pkill -9 nfsd
+                       service nfslock start
+                       service nfs start
                         ;;
                 esac
                 ;;
@@ -354,10 +502,10 @@ startstop_nfs() {
  ########################################################
  startstop_nfslock() {
         PLATFORM="unknown"
-       [ -x /etc/init.d/nfsserver ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
                 PLATFORM="sles"
         }
-       [ -x /etc/init.d/nfslock ] && {
+       [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
                 PLATFORM="rhel"
         }
  
@@ -372,6 +520,10 @@ startstop_nfslock() {
                 stop)
                         service nfsserver stop > /dev/null 2>&1
                         ;;
+               restart)
+                       service nfsserver stop
+                       service nfsserver start
+                       ;;
                 esac
                 ;;
         rhel)
@@ -382,6 +534,10 @@ startstop_nfslock() {
                 stop)
                         service nfslock stop > /dev/null 2>&1
                         ;;
+               restart)
+                       service nfslock stop
+                       service nfslock start
+                       ;;
                 esac
                 ;;
         *)
@@ -391,28 +547,74 @@ startstop_nfslock() {
         esac
  }
  
-########################################################
-# remove an ip address from an interface
-########################################################
-remove_ip() {
-       # the ip tool will delete all secondary IPs if this is the primary.
-       # To work around this _very_ annoying behaviour we have to keep a
-       # record of the secondaries and re-add them afterwards. yuck
-       secondaries=""
-       if ip addr list dev $2 primary | grep -q "inet $1 " ; then
-           secondaries=`ip addr list dev $2 secondary | grep " inet " | awk '{print $2}'`
-       fi
-       ip addr del $1 dev $2 >/dev/null 2>/dev/null || failed=1
-       [ -z "$secondaries" ] || {
-           for i in $secondaries; do
-               if ip addr list dev $2 | grep -q "inet $i" ; then
-                   echo "kept secondary $i on dev $2"
-               else 
-                   echo "re-adding secondary address $i to dev $2"
-                   ip addr add $i dev $2 || failed=1           
-               fi
-           done
+add_ip_to_iface()
+{
+       local _iface=$1
+       local _ip=$2
+       local _maskbits=$3
+       local _state_dir="$CTDB_VARDIR/state/interface_modify"
+       local _lockfile="$_state_dir/$_iface.flock"
+       local _readd_base="$_state_dir/$_iface.readd.d"
+
+       mkdir -p $_state_dir || {
+               ret=$?
+               echo "Failed to mkdir -p $_state_dir - $ret"
+               return $ret
+       }
+
+       test -f $_lockfile || {
+               touch $_lockfile
+       }
+
+       flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
+       return $?
+}
+
+delete_ip_from_iface()
+{
+       local _iface=$1
+       local _ip=$2
+       local _maskbits=$3
+       local _state_dir="$CTDB_VARDIR/state/interface_modify"
+       local _lockfile="$_state_dir/$_iface.flock"
+       local _readd_base="$_state_dir/$_iface.readd.d"
+
+       mkdir -p $_state_dir || {
+               ret=$?
+               echo "Failed to mkdir -p $_state_dir - $ret"
+               return $ret
+       }
+
+       test -f $_lockfile || {
+               touch $_lockfile
         }
+
+       flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
+       return $?
+}
+
+setup_iface_ip_readd_script()
+{
+       local _iface=$1
+       local _ip=$2
+       local _maskbits=$3
+       local _readd_script=$4
+       local _state_dir="$CTDB_VARDIR/state/interface_modify"
+       local _lockfile="$_state_dir/$_iface.flock"
+       local _readd_base="$_state_dir/$_iface.readd.d"
+
+       mkdir -p $_state_dir || {
+               ret=$?
+               echo "Failed to mkdir -p $_state_dir - $ret"
+               return $ret
+       }
+
+       test -f $_lockfile || {
+               touch $_lockfile
+       }
+
+       flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
+       return $?
  }
  
  ########################################################
@@ -423,16 +625,17 @@ remove_ip() {
  # ctdb_check_counter_limit succeeds when count >= <limit>
  ########################################################
  _ctdb_counter_common () {
-    _counter_file="$ctdb_fail_dir/$service_name"
+    _service_name="${1:-${service_name}}"
+    _counter_file="$ctdb_fail_dir/$_service_name"
      mkdir -p "${_counter_file%/*}" # dirname
  }
  ctdb_counter_init () {
-    _ctdb_counter_common
+    _ctdb_counter_common "$1"
  
      >"$_counter_file"
  }
  ctdb_counter_incr () {
-    _ctdb_counter_common
+    _ctdb_counter_common "$1"
  
      # unary counting!
      echo -n 1 >> "$_counter_file"
@@ -452,16 +655,87 @@ ctdb_check_counter_limit () {
         echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
      fi
  }
+ctdb_check_counter_equal () {
+    _ctdb_counter_common
+
+    _limit=$1
+
+    # unary counting!
+    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+    if [ $_size -eq $_limit ] ; then
+       return 1
+    fi
+    return 0
+}
+ctdb_check_counter () {
+    _msg="${1:-error}"  # "error"  - anything else is silent on fail
+    _op="${2:--ge}"  # an integer operator supported by test
+    _limit="${3:-${service_fail_limit}}"
+    shift 3
+    _ctdb_counter_common "$1"
+
+    # unary counting!
+    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+    if [ $_size $_op $_limit ] ; then
+       if [ "$_msg" = "error" ] ; then
+           echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
+           exit 1              
+       else
+           return 1
+       fi
+    fi
+}
+
  ########################################################
  
-ctdb_spool_dir="/var/spool/ctdb"
-ctdb_status_dir="$ctdb_spool_dir/status"
-ctdb_fail_dir="$ctdb_spool_dir/failcount"
-ctdb_active_dir="$ctdb_spool_dir/active"
+ctdb_status_dir="$CTDB_VARDIR/status"
+ctdb_fail_dir="$CTDB_VARDIR/failcount"
+
+ctdb_setup_service_state_dir ()
+{
+    service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
+    mkdir -p "$service_state_dir" || {
+       echo "Error creating state dir \"$service_state_dir\""
+       exit 1
+    }
+}
+
+########################################################
+# Managed status history, for auto-start/stop
+
+ctdb_managed_dir="$CTDB_VARDIR/managed_history"
+
+_ctdb_managed_common ()
+{
+    _service_name="${1:-${service_name}}"
+    _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
+}
+
+ctdb_service_managed ()
+{
+    _ctdb_managed_common "$@"
+    mkdir -p "$ctdb_managed_dir"
+    touch "$_ctdb_managed_file"
+}
+
+ctdb_service_unmanaged ()
+{
+    _ctdb_managed_common "$@"
+    rm -f "$_ctdb_managed_file"
+}
+
+is_ctdb_previously_managed_service ()
+{
+    _ctdb_managed_common "$@"
+    [ -f "$_ctdb_managed_file" ]
+}
+
+########################################################
+# Check and set status
  
  log_status_cat ()
  {
-    echo "node is \"$1\", problem with \"${script_name}\": $(cat $2)"
+    echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
  }
  
  ctdb_checkstatus ()
@@ -493,99 +767,175 @@ ctdb_setstatus ()
      esac
  }
  
+##################################################################
+# Reconfigure a service on demand
+
+_ctdb_service_reconfigure_common ()
+{
+    _d="$ctdb_status_dir/${1:-${service_name}}"
+    mkdir -p "$_d"
+    _ctdb_service_reconfigure_flag="$_d/reconfigure"
+}
+
  ctdb_service_needs_reconfigure ()
  {
-    [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
+    _ctdb_service_reconfigure_common "$@"
+    [ -e "$_ctdb_service_reconfigure_flag" ]
  }
  
  ctdb_service_set_reconfigure ()
  {
-    d="$ctdb_status_dir/$service_name"
-    mkdir -p "$d"
-    >"$d/reconfigure"
+    _ctdb_service_reconfigure_common "$@"
+    >"$_ctdb_service_reconfigure_flag"
  }
  
  ctdb_service_unset_reconfigure ()
  {
-    rm -f "$ctdb_status_dir/$service_name/reconfigure"
+    _ctdb_service_reconfigure_common "$@"
+    rm -f "$_ctdb_service_reconfigure_flag"
  }
  
  ctdb_service_reconfigure ()
  {
-    if [ -n "$service_reconfigure" ] ; then
-       eval $service_reconfigure
-    else
-       service "$service_name" restart
+    echo "Reconfiguring service \"$service_name\"..."
+    ctdb_service_unset_reconfigure "$@"
+    service_reconfigure "$@" || return $?
+    ctdb_counter_init "$@"
+}
+
+# Default service_reconfigure() function.
+service_reconfigure ()
+{
+    service "${1:-$service_name}" restart
+}
+
+ctdb_service_check_reconfigure ()
+{
+    # Only do this for certain events.
+    case "$event_name" in
+       monitor|ipreallocated) : ;;
+       *) return 0
+    esac
+
+    if ctdb_service_needs_reconfigure "$@" ; then
+       ctdb_service_reconfigure "$@"
+
+       # Fall through to non-monitor events.
+       [ "$event_name" = "monitor" ] || return 0
+
+       # We don't want to proceed with the rest of the monitor event
+       # here, so we exit.  However, if we exit 0 then, if the
+       # service was previously broken, we might return a false
+       # positive.  So we simply retrieve the status of this script
+       # from the previous monitor loop and exit with that status.
+       ctdb scriptstatus | \
+           grep -q -E "^${script_name}[[:space:]]+Status:OK[[:space:]]"
+       exit $?
      fi
-    ctdb_service_unset_reconfigure
-    ctdb_counter_init
  }
  
+##################################################################
+# Does CTDB manage this service? - and associated auto-start/stop
+
  ctdb_compat_managed_service ()
  {
-    if [ "$1" = "yes" ] ; then
-       t="$t $2 "
+    if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
+       CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
      fi
  }
  
  is_ctdb_managed_service ()
  {
+    _service_name="${1:-${service_name}}"
+
+    # $t is used just for readability and to allow better accurate
+    # matching via leading/trailing spaces
      t=" $CTDB_MANAGED_SERVICES "
  
+    # Return 0 if "<space>$_service_name<space>" appears in $t
+    if [ "${t#* ${_service_name} }" != "${t}" ] ; then
+       return 0
+    fi
+
+    # If above didn't match then update $CTDB_MANAGED_SERVICES for
+    # backward compatibility and try again.
      ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
      ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
      ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
-    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
      ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
      ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
      ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
      ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
+    ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
+
+    t=" $CTDB_MANAGED_SERVICES "
  
-    # Returns 0 if "<space>$service_name<space>" appears in $t
-    [ "${t#* ${service_name} }" != "${t}" ]
+    # Return 0 if "<space>$_service_name<space>" appears in $t
+    [ "${t#* ${_service_name} }" != "${t}" ]
  }
  
  ctdb_start_stop_service ()
  {
-    _active="$ctdb_active_dir/$service_name"
+    _service_name="${1:-${service_name}}"
  
-    if is_ctdb_managed_service ; then
-       if ! [ -e "$_active" ] ; then
-           echo "Starting service $service_name"
-           ctdb_service_start || exit $?
-           mkdir -p "$ctdb_active_dir"
-           touch "$_active"
-           exit 0
+    [ "$event_name" = "monitor" ] || return 0
+
+    if is_ctdb_managed_service "$_service_name" ; then
+       if ! is_ctdb_previously_managed_service "$_service_name" ; then
+           echo "Starting service \"$_service_name\" - now managed"
+           ctdb_service_start "$_service_name"
+           exit $?
         fi
-    elif ! is_ctdb_managed_service ; then
-       if [ -e "$_active" ] ; then
-           echo "Stopping service $service_name"
-           ctdb_service_stop || exit $?
-           rm -f "$_active"
+    else
+       if is_ctdb_previously_managed_service "$_service_name" ; then
+           echo "Stopping service \"$_service_name\" - no longer managed"
+           ctdb_service_stop "$_service_name"
+           exit $?
         fi
-       exit 0
      fi
  }
  
  ctdb_service_start ()
  {
-    if [ -n "$service_start" ] ; then
-       eval $service_start
-    else
-       service "$service_name" start
-    fi
-    ctdb_counter_init
+    # The service is marked managed if we've ever tried to start it.
+    ctdb_service_managed "$@"
+
+    # Here we only want $1.  If no argument is passed then
+    # service_start needs to know.
+    service_start "$@" || return $?
+
+    ctdb_counter_init "$@"
  }
  
  ctdb_service_stop ()
  {
-    if [ -n "$service_stop" ] ; then
-       eval $service_stop
-    else
-       service "$service_name" stop
-    fi
+    ctdb_service_unmanaged "$@"
+    service_stop "$@"
+}
+
+# Default service_start() and service_stop() functions.
+ 
+# These may be overridden in an eventscript.  When overriding, the
+# following convention must be followed.  If these functions are
+# called with no arguments then they may use internal logic to
+# determine whether the service is managed and, therefore, whether
+# they should take any action.  However, if the service name is
+# specified as an argument then an attempt must be made to start or
+# stop the service.  This is because the auto-start/stop code calls
+# them with the service name as an argument.
+service_start ()
+{
+    service "${1:-${service_name}}" start
  }
  
+service_stop ()
+{
+    service "${1:-${service_name}}" stop
+}
+
+##################################################################
+
  ctdb_standard_event_handler ()
  {
      case "$1" in
@@ -594,16 +944,146 @@ ctdb_standard_event_handler ()
             exit
             ;;
         setstatus)
+            shift
             ctdb_setstatus "$@"
             exit
             ;;
      esac
  }
  
+ipv4_host_addr_to_net_addr()
+{
+       local HOST=$1
+       local MASKBITS=$2
+
+       local HOST0=$(echo $HOST | awk -F . '{print $4}')
+       local HOST1=$(echo $HOST | awk -F . '{print $3}')
+       local HOST2=$(echo $HOST | awk -F . '{print $2}')
+       local HOST3=$(echo $HOST | awk -F . '{print $1}')
+
+       local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
+
+       local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
+
+       local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
+
+       local NET0=$(( $NET_NUM & 255 ))
+       local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
+       local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
+       local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
+
+       echo "$NET3.$NET2.$NET1.$NET0"
+}
+
+ipv4_maskbits_to_net_mask()
+{
+       local MASKBITS=$1
+
+       local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
+
+       local MASK0=$(( $MASK_NUM & 255 ))
+       local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
+       local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
+       local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
+
+       echo "$MASK3.$MASK2.$MASK1.$MASK0"
+}
+
+ipv4_is_valid_addr()
+{
+       local ADDR=$1
+       local fail=0
+
+       local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
+       test -n "$N" && fail=1
+
+       local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
+       local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
+       local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
+       local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
+
+       test "$ADDR0" -gt 255 && fail=1
+       test "$ADDR1" -gt 255 && fail=1
+       test "$ADDR2" -gt 255 && fail=1
+       test "$ADDR3" -gt 255 && fail=1
+
+       test x"$fail" != x"0" && {
+               #echo "IPv4: '$ADDR' is not a valid address"
+               return 1;
+       }
+
+       return 0;
+}
+
+# iptables doesn't like being re-entered, so flock-wrap it.
+iptables()
+{
+       flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
+}
+
+########################################################
+# tickle handling
+########################################################
+
+# Temporary directory for tickles.
+tickledir="$CTDB_VARDIR/state/tickles"
+mkdir -p "$tickledir"
+
+update_tickles ()
+{
+       _port="$1"
+
+       mkdir -p "$tickledir" # Just in case
+
+       # Who am I?
+       _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
+
+       # What public IPs do I hold?
+       _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
+
+       # IPs as a regexp choice
+       _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
+
+       # Record connections to our public IPs in a temporary file
+       _my_connections="${tickledir}/${_port}.connections"
+       rm -f "$_my_connections"
+       netstat -tn |
+       awk -v destpat="^${_ipschoice}:${_port}\$" \
+         '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
+       sort >"$_my_connections"
+
+       # Record our current tickles in a temporary file
+       _my_tickles="${tickledir}/${_port}.tickles"
+       rm -f "$_my_tickles"
+       for _i in $_ips ; do
+               ctdb -Y gettickles $_i $_port | 
+               awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+       done |
+       sort >"$_my_tickles"
+
+       # Add tickles for connections that we haven't already got tickles for
+       comm -23 "$_my_connections" "$_my_tickles" |
+       while read _src _dst ; do
+               ctdb addtickle $_src $_dst
+       done
+
+       # Remove tickles for connections that are no longer there
+       comm -13 "$_my_connections" "$_my_tickles" |
+       while read _src _dst ; do
+               ctdb deltickle $_src $_dst
+       done
+
+       rm -f "$_my_connections" "$_my_tickles" 
+}
+
  ########################################################
  # load a site local config file
  ########################################################
  
+[ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
+       . "$CTDB_RC_LOCAL"
+}
+
  [ -x $CTDB_BASE/rc.local ] && {
         . $CTDB_BASE/rc.local
  }
@@ -617,3 +1097,4 @@ ctdb_standard_event_handler ()
  script_name="${0##*/}"       # basename
  service_name="$script_name"  # default is just the script name
  service_fail_limit=1
+event_name="$1"