ctdb-scripts: Move NFS support functions to 60.nfs
[samba.git] / ctdb / config / events.d / 60.nfs
1 #!/bin/sh
2 # script to manage nfs in a clustered environment
3
4 [ -n "$CTDB_BASE" ] || \
5     export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
6
7 . $CTDB_BASE/functions
8
9 service_name="nfs"
10
11 if [ -z "$CTDB_NFS_CALLOUT" ] ; then
12     CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout"
13 fi
14 # Always export, for statd callout
15 export CTDB_NFS_CALLOUT
16
17 nfs_callout ()
18 {
19     eval "$CTDB_NFS_CALLOUT" "$@"
20 }
21
22 service_reconfigure ()
23 {
24     # Restart lock manager, notify clients
25     if [ -x "${CTDB_BASE}/statd-callout" ] ; then
26         "${CTDB_BASE}/statd-callout" notify &
27     fi >/dev/null 2>&1
28 }
29
30 ######################################################################
31
32 ######################################################
33 # Check the health of NFS services
34 #
35 # Use .check files in given directory.
36 # Default is "${CTDB_BASE}/nfs-checks.d/"
37 ######################################################
38 nfs_check_services ()
39 {
40     _dir="${1:-${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}}"
41
42     # Files must end with .check - avoids editor backups, RPM fu, ...
43     for _f in "$_dir"/[0-9][0-9].*.check ; do
44         _t="${_f%.check}"
45         _progname="${_t##*/[0-9][0-9].}"
46
47         nfs_check_service "$_progname" <"$_f"
48     done
49 }
50
51 ######################################################
52 # Check the health of an NFS service
53 #
54 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
55 #
56 # Reads variables from stdin
57 #
58 # Variables are:
59 #
60 # * family             - "tcp" or "udp" or space separated list
61 #                        default: tcp, not used with "service_check_cmd"
62 # * version            - optional, RPC service version number
63 #                        default is to omit to check for any version,
64 #                        not used with "service_check_cmd"
65 # * unhealthy_after    - number of check fails before unhealthy
66 #                        default: 1
67 # * restart_every      - number of check fails before restart
68 #                        default: 0, meaning no restart
69 # * service_stop_cmd   - command to stop service
70 #                        default: no default, must be provided if
71 #                                 restart_every > 0
72 # * service_start_cmd  - command to start service
73 #                        default: no default, must be provided if
74 #                                 restart_every > 0
75 # * service_check_cmd  - command to check health of service
76 #                        default is to check RPC service using rpcinfo
77 # * service_debug_cmd  - command to debug a service after trying to stop it;
78 #                        for example, it can be useful to print stack
79 #                        traces of threads that have not exited, since
80 #                        they may be stuck doing I/O;
81 #                        no default, see also function program_stack_traces()
82 #
83 # Quoting in values is not preserved
84 #
85 ######################################################
86 nfs_check_service ()
87 {
88     _progname="$1"
89
90     (
91         # Subshell to restrict scope variables...
92
93         # Defaults
94         family="tcp"
95         version=""
96         unhealthy_after=1
97         restart_every=0
98         service_stop_cmd=""
99         service_start_cmd=""
100         service_check_cmd=""
101         service_debug_cmd=""
102
103         # Eval line-by-line.  Expands variable references in values.
104         # Also allows variable name checking, which seems useful.
105         while read _line ; do
106             case "$_line" in
107                 \#*|"") : ;; # Ignore comments, blank lines
108
109                 family=*|version=*|\
110                 unhealthy_after=*|restart_every=*|\
111                 service_stop_cmd=*|service_start_cmd=*|\
112                 service_check_cmd=*|service_debug_cmd=*)
113
114                     eval "$_line"
115                     ;;
116                 *)
117                     echo "ERROR: Unknown variable for ${_progname}: ${_line}"
118                     exit 1
119             esac
120         done
121
122         _service_name="nfs_${_progname}"
123
124         _ok=false
125         if [ -n "$service_check_cmd" ] ; then
126             # Using eval means variables can contain semicolon separated commands
127             if eval "$service_check_cmd" ; then
128                 _ok=true
129             fi
130         else
131             if nfs_check_rpcinfo \
132                    "$_progname" "$version" "$family" >/dev/null ; then
133                 _ok=true
134             fi
135         fi
136
137         if $_ok ; then
138             if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
139                 ctdb_counter_init "$_service_name"
140             fi
141             exit 0
142         fi
143
144         ctdb_counter_incr "$_service_name"
145         _failcount=$(ctdb_counter_get "$_service_name")
146
147         _unhealthy=false
148         if [ $unhealthy_after -gt 0 ] ; then
149             if [ $_failcount -ge $unhealthy_after ] ; then
150                 _unhealthy=true
151                 echo "ERROR: $ctdb_check_rpc_out"
152             fi
153         fi
154
155         if [ $restart_every -gt 0 ] ; then
156             if [ $(($_failcount % $restart_every)) -eq 0 ] ; then
157                 if ! $_unhealthy ; then
158                     echo "WARNING: $ctdb_check_rpc_out"
159                 fi
160                 nfs_restart_service
161             fi
162         fi
163
164         if $_unhealthy ; then
165             exit 1
166         fi
167
168         return 0
169     ) || exit 1
170 }
171
172 # Uses: stop_service, start_service, debug_stuck_threads
173 nfs_restart_service ()
174 {
175     if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
176         die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
177     fi
178
179     echo "Trying to restart service \"${_progname}\"..."
180     # Using eval means variables can contain semicolon separated commands
181     eval "$service_stop_cmd"
182     if [ -n "$service_debug_cmd" ] ; then
183         eval "$service_debug_cmd"
184     fi
185     background_with_logging eval "$service_start_cmd"
186 }
187
188 ######################################################
189 # Check an RPC service with rpcinfo
190 ######################################################
191 ctdb_check_rpc ()
192 {
193     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
194     _version="$2"         # optional, not passed if empty/unset
195     _family="${3:-tcp}"   # optional, default is "tcp"
196
197     _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
198
199     if ! ctdb_check_rpc_out=$(rpcinfo -T $_family $_localhost \
200                                       $_progname $_version 2>&1) ; then
201         ctdb_check_rpc_out="$_progname failed RPC check:
202 $ctdb_check_rpc_out"
203         echo "$ctdb_check_rpc_out"
204         return 1
205     fi
206 }
207
208 nfs_check_rpcinfo ()
209 {
210     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
211     _versions="$2"        # optional, space separated, not passed if empty/unset
212     _families="${3:-tcp}" # optional, space separated, default is "tcp"
213
214     for _family in $_families ; do
215         if [ -n "$_versions" ] ; then
216             for _version in $_versions ; do
217                 ctdb_check_rpc $_progname $_version $_family || return $?
218             done
219         else
220             ctdb_check_rpc $_progname "" $_family || return $?
221         fi
222     done
223 }
224
225 ##################################################################
226 # use statd-callout to update NFS lock info
227 ##################################################################
228 nfs_update_lock_info ()
229 {
230     if [ -x "$CTDB_BASE/statd-callout" ] ; then
231         "$CTDB_BASE/statd-callout" update
232     fi
233 }
234
235 ######################################################################
236
237 loadconfig
238
239 ctdb_setup_service_state_dir
240
241 ctdb_start_stop_service
242
243 is_ctdb_managed_service || exit 0
244
245 ctdb_service_check_reconfigure
246
247 case "$1" in
248     startup)
249         nfs_callout "$@"
250         ;;
251
252     shutdown)
253          nfs_callout "$@"
254         ;;
255
256     takeip)
257         nfs_callout "$@"
258         ctdb_service_set_reconfigure
259         ;;
260
261     releaseip)
262         nfs_callout "$@"
263         ctdb_service_set_reconfigure
264         ;;
265
266     monitor)
267         nfs_callout "monitor-pre" || exit $?
268
269         # Check that directories for shares actually exist
270         if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
271             nfs_callout "monitor-list-shares" | ctdb_check_directories || \
272                 exit $?
273         fi
274
275         update_tickles 2049
276         nfs_update_lock_info
277
278         nfs_check_services
279
280         nfs_callout "monitor-post" || exit $?
281         ;;
282
283     *)
284         ctdb_standard_event_handler "$@"
285         ;;
286 esac
287
288 exit 0