2 # script to manage nfs in a clustered environment
4 [ -n "$CTDB_BASE" ] || \
5 export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
11 if [ -z "$CTDB_NFS_CALLOUT" ] ; then
12 CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout"
14 # Always export, for statd callout
15 export CTDB_NFS_CALLOUT
19 eval "$CTDB_NFS_CALLOUT" "$@"
22 service_reconfigure ()
24 # Restart lock manager, notify clients
25 if [ -x "${CTDB_BASE}/statd-callout" ] ; then
26 "${CTDB_BASE}/statd-callout" notify &
30 ######################################################################
32 ######################################################
33 # Check the health of NFS services
35 # Use .check files in given directory.
36 # Default is "${CTDB_BASE}/nfs-checks.d/"
37 ######################################################
40 _dir="${1:-${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}}"
42 # Files must end with .check - avoids editor backups, RPM fu, ...
43 for _f in "$_dir"/[0-9][0-9].*.check ; do
45 _progname="${_t##*/[0-9][0-9].}"
47 nfs_check_service "$_progname" <"$_f"
51 ######################################################
52 # Check the health of an NFS service
54 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
56 # Reads variables from stdin
60 # * family - "tcp" or "udp" or space separated list
61 # default: tcp, not used with "service_check_cmd"
62 # * version - optional, RPC service version number
63 # default is to omit to check for any version,
64 # not used with "service_check_cmd"
65 # * unhealthy_after - number of check fails before unhealthy
67 # * restart_every - number of check fails before restart
68 # default: 0, meaning no restart
69 # * service_stop_cmd - command to stop service
70 # default: no default, must be provided if
72 # * service_start_cmd - command to start service
73 # default: no default, must be provided if
75 # * service_check_cmd - command to check health of service
76 # default is to check RPC service using rpcinfo
77 # * service_debug_cmd - command to debug a service after trying to stop it;
78 # for example, it can be useful to print stack
79 # traces of threads that have not exited, since
80 # they may be stuck doing I/O;
81 # no default, see also function program_stack_traces()
83 # Quoting in values is not preserved
85 ######################################################
91 # Subshell to restrict scope variables...
103 # Eval line-by-line. Expands variable references in values.
104 # Also allows variable name checking, which seems useful.
105 while read _line ; do
107 \#*|"") : ;; # Ignore comments, blank lines
110 unhealthy_after=*|restart_every=*|\
111 service_stop_cmd=*|service_start_cmd=*|\
112 service_check_cmd=*|service_debug_cmd=*)
117 echo "ERROR: Unknown variable for ${_progname}: ${_line}"
122 _service_name="nfs_${_progname}"
125 if [ -n "$service_check_cmd" ] ; then
126 # Using eval means variables can contain semicolon separated commands
127 if eval "$service_check_cmd" ; then
131 if nfs_check_rpcinfo \
132 "$_progname" "$version" "$family" >/dev/null ; then
138 if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
139 ctdb_counter_init "$_service_name"
144 ctdb_counter_incr "$_service_name"
145 _failcount=$(ctdb_counter_get "$_service_name")
148 if [ $unhealthy_after -gt 0 ] ; then
149 if [ $_failcount -ge $unhealthy_after ] ; then
151 echo "ERROR: $ctdb_check_rpc_out"
155 if [ $restart_every -gt 0 ] ; then
156 if [ $(($_failcount % $restart_every)) -eq 0 ] ; then
157 if ! $_unhealthy ; then
158 echo "WARNING: $ctdb_check_rpc_out"
164 if $_unhealthy ; then
172 # Uses: stop_service, start_service, debug_stuck_threads
173 nfs_restart_service ()
175 if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
176 die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
179 echo "Trying to restart service \"${_progname}\"..."
180 # Using eval means variables can contain semicolon separated commands
181 eval "$service_stop_cmd"
182 if [ -n "$service_debug_cmd" ] ; then
183 eval "$service_debug_cmd"
185 background_with_logging eval "$service_start_cmd"
188 ######################################################
189 # Check an RPC service with rpcinfo
190 ######################################################
193 _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
194 _version="$2" # optional, not passed if empty/unset
195 _family="${3:-tcp}" # optional, default is "tcp"
197 _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
199 if ! ctdb_check_rpc_out=$(rpcinfo -T $_family $_localhost \
200 $_progname $_version 2>&1) ; then
201 ctdb_check_rpc_out="$_progname failed RPC check:
203 echo "$ctdb_check_rpc_out"
210 _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
211 _versions="$2" # optional, space separated, not passed if empty/unset
212 _families="${3:-tcp}" # optional, space separated, default is "tcp"
214 for _family in $_families ; do
215 if [ -n "$_versions" ] ; then
216 for _version in $_versions ; do
217 ctdb_check_rpc $_progname $_version $_family || return $?
220 ctdb_check_rpc $_progname "" $_family || return $?
225 ##################################################################
226 # use statd-callout to update NFS lock info
227 ##################################################################
228 nfs_update_lock_info ()
230 if [ -x "$CTDB_BASE/statd-callout" ] ; then
231 "$CTDB_BASE/statd-callout" update
235 ######################################################################
239 ctdb_setup_service_state_dir
241 ctdb_start_stop_service
243 is_ctdb_managed_service || exit 0
245 ctdb_service_check_reconfigure
258 ctdb_service_set_reconfigure
263 ctdb_service_set_reconfigure
267 nfs_callout "monitor-pre" || exit $?
269 # Check that directories for shares actually exist
270 if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
271 nfs_callout "monitor-list-shares" | ctdb_check_directories || \
280 nfs_callout "monitor-post" || exit $?
284 ctdb_standard_event_handler "$@"