2 # script to manage nfs in a clustered environment
4 [ -n "$CTDB_BASE" ] || \
5 CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD")
7 . "${CTDB_BASE}/functions"
11 ctdb_setup_service_state_dir
13 ######################################################################
15 service_reconfigure ()
17 # Restart lock manager, notify clients
18 if [ -x "${CTDB_BASE}/statd-callout" ] ; then
19 "${CTDB_BASE}/statd-callout" notify &
23 ######################################################################
25 ######################################################
26 # Check the health of NFS services
28 # Use .check files in $CTDB_NFS_CHECKS_DIR.
29 # Default is "${CTDB_BASE}/nfs-checks.d/"
30 ######################################################
33 _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
35 # Files must end with .check - avoids editor backups, RPM fu, ...
36 for _f in "$_dir"/[0-9][0-9].*.check ; do
38 _progname="${_t##*/[0-9][0-9].}"
40 nfs_check_service "$_progname" <"$_f"
44 ######################################################
45 # Check the health of an NFS service
47 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
49 # Reads variables from stdin
53 # * family - "tcp" or "udp" or space separated list
54 # default: tcp, not used with "service_check_cmd"
55 # * version - optional, RPC service version number
56 # default is to omit to check for any version,
57 # not used with "service_check_cmd"
58 # * unhealthy_after - number of check fails before unhealthy
60 # * restart_every - number of check fails before restart
61 # default: 0, meaning no restart
62 # * service_stop_cmd - command to stop service
63 # default: no default, must be provided if
65 # * service_start_cmd - command to start service
66 # default: no default, must be provided if
68 # * service_check_cmd - command to check health of service
69 # default is to check RPC service using rpcinfo
70 # * service_debug_cmd - command to debug a service after trying to stop it;
71 # for example, it can be useful to print stack
72 # traces of threads that have not exited, since
73 # they may be stuck doing I/O;
74 # no default, see also function program_stack_traces()
76 # Quoting in values is not preserved
78 ######################################################
84 # Subshell to restrict scope variables...
96 # Eval line-by-line. Expands variable references in values.
97 # Also allows variable name checking, which seems useful.
100 \#*|"") : ;; # Ignore comments, blank lines
103 unhealthy_after=*|restart_every=*|\
104 service_stop_cmd=*|service_start_cmd=*|\
105 service_check_cmd=*|service_debug_cmd=*)
110 echo "ERROR: Unknown variable for ${_progname}: ${_line}"
115 _service_name="nfs_${_progname}"
118 if [ -n "$service_check_cmd" ] ; then
119 # Using eval means variables can contain semicolon separated commands
120 if eval "$service_check_cmd" ; then
123 _err="monitoring service \"${_progname}\" failed"
126 if nfs_check_rpcinfo \
127 "$_progname" "$version" "$family" >/dev/null ; then
130 _err="$ctdb_check_rpc_out"
135 if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
136 ctdb_counter_init "$_service_name"
141 ctdb_counter_incr "$_service_name"
142 _failcount=$(ctdb_counter_get "$_service_name")
145 if [ $unhealthy_after -gt 0 ] ; then
146 if [ $_failcount -ge $unhealthy_after ] ; then
152 if [ $restart_every -gt 0 ] ; then
153 if [ $(($_failcount % $restart_every)) -eq 0 ] ; then
154 if ! $_unhealthy ; then
155 echo "WARNING: $_err"
161 if $_unhealthy ; then
169 # Uses: stop_service, start_service, debug_stuck_threads
170 nfs_restart_service ()
172 if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
173 die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
176 echo "Trying to restart service \"${_progname}\"..."
177 # Using eval means variables can contain semicolon separated commands
178 eval "$service_stop_cmd"
179 if [ -n "$service_debug_cmd" ] ; then
180 eval "$service_debug_cmd"
182 background_with_logging eval "$service_start_cmd"
185 ######################################################
186 # Check an RPC service with rpcinfo
187 ######################################################
190 _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
191 _version="$2" # optional, not passed if empty/unset
192 _family="${3:-tcp}" # optional, default is "tcp"
196 _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}"
199 _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
202 if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \
203 "$_progname" $_version 2>&1) ; then
204 ctdb_check_rpc_out="$_progname failed RPC check:
206 echo "$ctdb_check_rpc_out"
213 _progname="$1" # passed to rpcinfo (looked up in /etc/rpc)
214 _versions="$2" # optional, space separated, not passed if empty/unset
215 _families="${3:-tcp}" # optional, space separated, default is "tcp"
217 for _family in $_families ; do
218 if [ -n "$_versions" ] ; then
219 for _version in $_versions ; do
220 ctdb_check_rpc "$_progname" "$_version" "$_family" || return $?
223 ctdb_check_rpc "$_progname" "" "$_family" || return $?
228 ##################################################################
229 # use statd-callout to update NFS lock info
230 ##################################################################
231 nfs_update_lock_info ()
233 if [ -x "$CTDB_BASE/statd-callout" ] ; then
234 "$CTDB_BASE/statd-callout" update
238 ######################################################################
242 ctdb_start_stop_service
244 is_ctdb_managed_service || exit 0
246 ctdb_service_check_reconfigure
259 ctdb_service_set_reconfigure
264 ctdb_service_set_reconfigure
268 nfs_callout "monitor-pre" || exit $?
270 # Check that directories for shares actually exist
271 if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
272 nfs_callout "monitor-list-shares" | ctdb_check_directories || \
281 nfs_callout "monitor-post" || exit $?
285 ctdb_standard_event_handler "$@"