ctdb/config/events.d/60.nfs

   1 #!/bin/sh
   2 # script to manage nfs in a clustered environment
   3
   4 [ -n "$CTDB_BASE" ] || \
   5     CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD")
   6
   7 . "${CTDB_BASE}/functions"
   8
   9 service_name="nfs"
  10 loadconfig
  11 ctdb_setup_service_state_dir
  12
  13 ######################################################################
  14
  15 service_reconfigure ()
  16 {
  17     # Restart lock manager, notify clients
  18     if [ -x "${CTDB_BASE}/statd-callout" ] ; then
  19         "${CTDB_BASE}/statd-callout" notify &
  20     fi >/dev/null 2>&1
  21 }
  22
  23 ######################################################################
  24
  25 ######################################################
  26 # Check the health of NFS services
  27 #
  28 # Use .check files in $CTDB_NFS_CHECKS_DIR.
  29 # Default is "${CTDB_BASE}/nfs-checks.d/"
  30 ######################################################
  31 nfs_check_services ()
  32 {
  33     _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
  34
  35     # Files must end with .check - avoids editor backups, RPM fu, ...
  36     for _f in "$_dir"/[0-9][0-9].*.check ; do
  37         _t="${_f%.check}"
  38         _progname="${_t##*/[0-9][0-9].}"
  39
  40         nfs_check_service "$_progname" <"$_f"
  41     done
  42 }
  43
  44 ######################################################
  45 # Check the health of an NFS service
  46 #
  47 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
  48 #
  49 # Reads variables from stdin
  50 #
  51 # Variables are:
  52 #
  53 # * family             - "tcp" or "udp" or space separated list
  54 #                        default: tcp, not used with "service_check_cmd"
  55 # * version            - optional, RPC service version number
  56 #                        default is to omit to check for any version,
  57 #                        not used with "service_check_cmd"
  58 # * unhealthy_after    - number of check fails before unhealthy
  59 #                        default: 1
  60 # * restart_every      - number of check fails before restart
  61 #                        default: 0, meaning no restart
  62 # * service_stop_cmd   - command to stop service
  63 #                        default: no default, must be provided if
  64 #                                 restart_every > 0
  65 # * service_start_cmd  - command to start service
  66 #                        default: no default, must be provided if
  67 #                                 restart_every > 0
  68 # * service_check_cmd  - command to check health of service
  69 #                        default is to check RPC service using rpcinfo
  70 # * service_debug_cmd  - command to debug a service after trying to stop it;
  71 #                        for example, it can be useful to print stack
  72 #                        traces of threads that have not exited, since
  73 #                        they may be stuck doing I/O;
  74 #                        no default, see also function program_stack_traces()
  75 #
  76 # Quoting in values is not preserved
  77 #
  78 ######################################################
  79 nfs_check_service ()
  80 {
  81     _progname="$1"
  82
  83     (
  84         # Subshell to restrict scope variables...
  85
  86         # Defaults
  87         family="tcp"
  88         version=""
  89         unhealthy_after=1
  90         restart_every=0
  91         service_stop_cmd=""
  92         service_start_cmd=""
  93         service_check_cmd=""
  94         service_debug_cmd=""
  95
  96         # Eval line-by-line.  Expands variable references in values.
  97         # Also allows variable name checking, which seems useful.
  98         while read _line ; do
  99             case "$_line" in
 100                 \#*|"") : ;; # Ignore comments, blank lines
 101
 102                 family=*|version=*|\
 103                 unhealthy_after=*|restart_every=*|\
 104                 service_stop_cmd=*|service_start_cmd=*|\
 105                 service_check_cmd=*|service_debug_cmd=*)
 106
 107                     eval "$_line"
 108                     ;;
 109                 *)
 110                     echo "ERROR: Unknown variable for ${_progname}: ${_line}"
 111                     exit 1
 112             esac
 113         done
 114
 115         _service_name="nfs_${_progname}"
 116
 117         _ok=false
 118         if [ -n "$service_check_cmd" ] ; then
 119             # Using eval means variables can contain semicolon separated commands
 120             if eval "$service_check_cmd" ; then
 121                 _ok=true
 122             else
 123                 _err="monitoring service \"${_progname}\" failed"
 124             fi
 125         else
 126             if nfs_check_rpcinfo \
 127                    "$_progname" "$version" "$family" >/dev/null ; then
 128                 _ok=true
 129             else
 130                 _err="$ctdb_check_rpc_out"
 131             fi
 132         fi
 133
 134         if $_ok ; then
 135             if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
 136                 ctdb_counter_init "$_service_name"
 137             fi
 138             exit 0
 139         fi
 140
 141         ctdb_counter_incr "$_service_name"
 142         _failcount=$(ctdb_counter_get "$_service_name")
 143
 144         _unhealthy=false
 145         if [ $unhealthy_after -gt 0 ] ; then
 146             if [ $_failcount -ge $unhealthy_after ] ; then
 147                 _unhealthy=true
 148                 echo "ERROR: $_err"
 149             fi
 150         fi
 151
 152         if [ $restart_every -gt 0 ] ; then
 153             if [ $(($_failcount % $restart_every)) -eq 0 ] ; then
 154                 if ! $_unhealthy ; then
 155                     echo "WARNING: $_err"
 156                 fi
 157                 nfs_restart_service
 158             fi
 159         fi
 160
 161         if $_unhealthy ; then
 162             exit 1
 163         fi
 164
 165         return 0
 166     ) || exit 1
 167 }
 168
 169 # Uses: stop_service, start_service, debug_stuck_threads
 170 nfs_restart_service ()
 171 {
 172     if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
 173         die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
 174     fi
 175
 176     echo "Trying to restart service \"${_progname}\"..."
 177     # Using eval means variables can contain semicolon separated commands
 178     eval "$service_stop_cmd"
 179     if [ -n "$service_debug_cmd" ] ; then
 180         eval "$service_debug_cmd"
 181     fi
 182     background_with_logging eval "$service_start_cmd"
 183 }
 184
 185 ######################################################
 186 # Check an RPC service with rpcinfo
 187 ######################################################
 188 ctdb_check_rpc ()
 189 {
 190     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
 191     _version="$2"         # optional, not passed if empty/unset
 192     _family="${3:-tcp}"   # optional, default is "tcp"
 193
 194     case "$_family" in
 195         tcp6|udp6)
 196             _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}"
 197             ;;
 198         *)
 199             _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
 200     esac
 201
 202     if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \
 203                                       "$_progname" $_version 2>&1) ; then
 204         ctdb_check_rpc_out="$_progname failed RPC check:
 205 $ctdb_check_rpc_out"
 206         echo "$ctdb_check_rpc_out"
 207         return 1
 208     fi
 209 }
 210
 211 nfs_check_rpcinfo ()
 212 {
 213     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
 214     _versions="$2"        # optional, space separated, not passed if empty/unset
 215     _families="${3:-tcp}" # optional, space separated, default is "tcp"
 216
 217     for _family in $_families ; do
 218         if [ -n "$_versions" ] ; then
 219             for _version in $_versions ; do
 220                 ctdb_check_rpc "$_progname" "$_version" "$_family" || return $?
 221             done
 222         else
 223             ctdb_check_rpc "$_progname" "" "$_family" || return $?
 224         fi
 225     done
 226 }
 227
 228 ##################################################################
 229 # use statd-callout to update NFS lock info
 230 ##################################################################
 231 nfs_update_lock_info ()
 232 {
 233     if [ -x "$CTDB_BASE/statd-callout" ] ; then
 234         "$CTDB_BASE/statd-callout" update
 235     fi
 236 }
 237
 238 ######################################################################
 239
 240 nfs_callout_init
 241
 242 ctdb_start_stop_service
 243
 244 is_ctdb_managed_service || exit 0
 245
 246 ctdb_service_check_reconfigure
 247
 248 case "$1" in
 249     startup)
 250         nfs_callout "$@"
 251         ;;
 252
 253     shutdown)
 254          nfs_callout "$@"
 255         ;;
 256
 257     takeip)
 258         nfs_callout "$@"
 259         ctdb_service_set_reconfigure
 260         ;;
 261
 262     releaseip)
 263         nfs_callout "$@"
 264         ctdb_service_set_reconfigure
 265         ;;
 266
 267     monitor)
 268         nfs_callout "monitor-pre" || exit $?
 269
 270         # Check that directories for shares actually exist
 271         if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
 272             nfs_callout "monitor-list-shares" | ctdb_check_directories || \
 273                 exit $?
 274         fi
 275
 276         update_tickles 2049
 277         nfs_update_lock_info
 278
 279         nfs_check_services
 280
 281         nfs_callout "monitor-post" || exit $?
 282         ;;
 283
 284     *)
 285         ctdb_standard_event_handler "$@"
 286         ;;
 287 esac
 288
 289 exit 0