ctdb-scripts: Move event scripts to events/legacy/ directory
[samba.git] / ctdb / config / events / legacy / 60.nfs.script
1 #!/bin/sh
2 # script to manage nfs in a clustered environment
3
4 [ -n "$CTDB_BASE" ] || \
5     CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD")
6
7 . "${CTDB_BASE}/functions"
8
9 service_name="nfs"
10
11 load_system_config "nfs"
12
13 load_script_options
14
15 ctdb_setup_state_dir "service" "$service_name"
16
17 ######################################################################
18
19 service_reconfigure ()
20 {
21     # Restart lock manager, notify clients
22     if [ -x "${CTDB_BASE}/statd-callout" ] ; then
23         "${CTDB_BASE}/statd-callout" notify &
24     fi >/dev/null 2>&1
25 }
26
27 ######################################################################
28
29 ######################################################
30 # Check the health of NFS services
31 #
32 # Use .check files in $CTDB_NFS_CHECKS_DIR.
33 # Default is "${CTDB_BASE}/nfs-checks.d/"
34 ######################################################
35 nfs_check_services ()
36 {
37     _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"
38
39     # Files must end with .check - avoids editor backups, RPM fu, ...
40     for _f in "$_dir"/[0-9][0-9].*.check ; do
41         [ -r "$_f" ] || continue
42
43         _t="${_f%.check}"
44         _progname="${_t##*/[0-9][0-9].}"
45
46         nfs_check_service "$_progname" <"$_f"
47     done
48 }
49
50 ######################################################
51 # Check the health of an NFS service
52 #
53 # $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
54 #
55 # Reads variables from stdin
56 #
57 # Variables are:
58 #
59 # * family             - "tcp" or "udp" or space separated list
60 #                        default: tcp, not used with "service_check_cmd"
61 # * version            - optional, RPC service version number
62 #                        default is to omit to check for any version,
63 #                        not used with "service_check_cmd"
64 # * unhealthy_after    - number of check fails before unhealthy
65 #                        default: 1
66 # * restart_every      - number of check fails before restart
67 #                        default: 0, meaning no restart
68 # * service_stop_cmd   - command to stop service
69 #                        default: no default, must be provided if
70 #                                 restart_every > 0
71 # * service_start_cmd  - command to start service
72 #                        default: no default, must be provided if
73 #                                 restart_every > 0
74 # * service_check_cmd  - command to check health of service
75 #                        default is to check RPC service using rpcinfo
76 # * service_debug_cmd  - command to debug a service after trying to stop it;
77 #                        for example, it can be useful to print stack
78 #                        traces of threads that have not exited, since
79 #                        they may be stuck doing I/O;
80 #                        no default, see also function program_stack_traces()
81 #
82 # Quoting in values is not preserved
83 #
84 ######################################################
85 nfs_check_service ()
86 {
87     _progname="$1"
88
89     # This sub-shell is created to intentionally limit the scope of
90     # variable values read from the .check files.
91     # shellcheck disable=SC2030
92     (
93         # Subshell to restrict scope variables...
94
95         # Defaults
96         family="tcp"
97         version=""
98         unhealthy_after=1
99         restart_every=0
100         service_stop_cmd=""
101         service_start_cmd=""
102         service_check_cmd=""
103         service_debug_cmd=""
104
105         # Eval line-by-line.  Expands variable references in values.
106         # Also allows variable name checking, which seems useful.
107         while read _line ; do
108             case "$_line" in
109                 \#*|"") : ;; # Ignore comments, blank lines
110
111                 family=*|version=*|\
112                 unhealthy_after=*|restart_every=*|\
113                 service_stop_cmd=*|service_start_cmd=*|\
114                 service_check_cmd=*|service_debug_cmd=*)
115
116                     eval "$_line"
117                     ;;
118                 *)
119                     echo "ERROR: Unknown variable for ${_progname}: ${_line}"
120                     exit 1
121             esac
122         done
123
124         _ok=false
125         if [ -n "$service_check_cmd" ] ; then
126             # Using eval means variables can contain semicolon separated commands
127             if eval "$service_check_cmd" ; then
128                 _ok=true
129             else
130                 _err="monitoring service \"${_progname}\" failed"
131             fi
132         else
133             if nfs_check_rpcinfo \
134                    "$_progname" "$version" "$family" >/dev/null ; then
135                 _ok=true
136             else
137                 _err="$ctdb_check_rpc_out"
138             fi
139         fi
140
141         if $_ok ; then
142             if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
143                 ctdb_counter_init "$_progname"
144             fi
145             exit 0
146         fi
147
148         ctdb_counter_incr "$_progname"
149         _failcount=$(ctdb_counter_get "$_progname")
150
151         _unhealthy=false
152         if [ "$unhealthy_after" -gt 0 ] ; then
153             if [ "$_failcount" -ge "$unhealthy_after" ] ; then
154                 _unhealthy=true
155                 echo "ERROR: $_err"
156             fi
157         fi
158
159         if [ "$restart_every" -gt 0 ] ; then
160             if [ $((_failcount % restart_every)) -eq 0 ] ; then
161                 if ! $_unhealthy ; then
162                     echo "WARNING: $_err"
163                 fi
164                 nfs_restart_service
165             fi
166         fi
167
168         if $_unhealthy ; then
169             exit 1
170         fi
171
172         return 0
173     ) || exit 1
174 }
175
176 # Uses: service_stop_cmd, service_start_cmd, service_debug_cmd
177 # This function is called within the sub-shell that shellcheck thinks
178 # loses the above variable values.
179 # shellcheck disable=SC2031
180 nfs_restart_service ()
181 {
182     if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
183         die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
184     fi
185
186     echo "Trying to restart service \"${_progname}\"..."
187     # Using eval means variables can contain semicolon separated commands
188     eval "$service_stop_cmd"
189     if [ -n "$service_debug_cmd" ] ; then
190         eval "$service_debug_cmd"
191     fi
192     background_with_logging eval "$service_start_cmd"
193 }
194
195 ######################################################
196 # Check an RPC service with rpcinfo
197 ######################################################
198 ctdb_check_rpc ()
199 {
200     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
201     _version="$2"         # optional, not passed if empty/unset
202     _family="${3:-tcp}"   # optional, default is "tcp"
203
204     case "$_family" in
205         tcp6|udp6)
206             _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}"
207             ;;
208         *)
209             _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
210     esac
211
212     # $_version is not quoted because it is optional
213     # shellcheck disable=SC2086
214     if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \
215                                       "$_progname" $_version 2>&1) ; then
216         ctdb_check_rpc_out="$_progname failed RPC check:
217 $ctdb_check_rpc_out"
218         echo "$ctdb_check_rpc_out"
219         return 1
220     fi
221 }
222
223 nfs_check_rpcinfo ()
224 {
225     _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
226     _versions="$2"        # optional, space separated, not passed if empty/unset
227     _families="${3:-tcp}" # optional, space separated, default is "tcp"
228
229     for _family in $_families ; do
230         if [ -n "$_versions" ] ; then
231             for _version in $_versions ; do
232                 ctdb_check_rpc "$_progname" "$_version" "$_family" || return $?
233             done
234         else
235             ctdb_check_rpc "$_progname" "" "$_family" || return $?
236         fi
237     done
238 }
239
240 ##################################################################
241 # use statd-callout to update NFS lock info
242 ##################################################################
243 nfs_update_lock_info ()
244 {
245     if [ -x "$CTDB_BASE/statd-callout" ] ; then
246         "$CTDB_BASE/statd-callout" update
247     fi
248 }
249
250 ######################################################################
251
252 # script_state_dir set by ctdb_setup_state_dir()
253 # shellcheck disable=SC2154
254 nfs_callout_init "$script_state_dir"
255
256 [ "$CTDB_MANAGES_NFS" = "yes" ] || exit 0
257
258 case "$1" in
259 startup)
260         nfs_callout "$@" || exit $?
261         ;;
262
263 shutdown)
264         nfs_callout "$@" || exit $?
265         ;;
266
267 takeip)
268         nfs_callout "$@" || exit $?
269         ctdb_service_set_reconfigure
270         ;;
271
272 releaseip)
273         nfs_callout "$@" || exit $?
274         ctdb_service_set_reconfigure
275         ;;
276
277 ipreallocated)
278         if ctdb_service_needs_reconfigure ; then
279                 ctdb_service_reconfigure
280         fi
281         ;;
282
283 monitor)
284         nfs_callout "monitor-pre" || exit $?
285
286         # Check that directories for shares actually exist
287         if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
288             nfs_callout "monitor-list-shares" | ctdb_check_directories || \
289                 exit $?
290         fi
291
292         update_tickles 2049
293         nfs_update_lock_info
294
295         nfs_check_services
296
297         nfs_callout "monitor-post" || exit $?
298         ;;
299 esac
300
301 exit 0