ctdb-scripts: Extend NFS .check files with service_check_cmd variable
authorMartin Schwenke <martin@meltin.net>
Mon, 13 Jul 2015 01:30:51 +0000 (11:30 +1000)
committerAmitay Isaacs <amitay@samba.org>
Tue, 14 Jul 2015 07:57:18 +0000 (09:57 +0200)
$service_check_cmd specifies a command to run instead of the regular
rpcinfo-based check.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/config/functions
ctdb/config/nfs-checks.d/README
ctdb/tests/eventscripts/scripts/local.sh

index 5079fdf4d797ef1f81ff29890db349f8556a2385..ddfe5f9d0a0f6a7f9ce1ac52679891293171fd81 100755 (executable)
@@ -287,9 +287,10 @@ nfs_check_services ()
 # Variables are:
 #
 # * family             - "tcp" or "udp" or space separated list
-#                        default: tcp
+#                        default: tcp, not used with "service_check_cmd"
 # * version            - optional, RPC service version number
-#                        default is to omit to check for any version
+#                        default is to omit to check for any version,
+#                        not used with "service_check_cmd"
 # * unhealthy_after    - number of check fails before unhealthy
 #                        default: 1
 # * restart_every      - number of check fails before restart
@@ -300,6 +301,8 @@ nfs_check_services ()
 # * service_start_cmd  - command to start service
 #                        default: no default, must be provided if
 #                                 restart_every > 0
+# * service_check_cmd  - command to check health of service
+#                        default is to check RPC service using rpcinfo
 # * service_debug_cmd  - command to debug a service after trying to stop it;
 #                        for example, it can be useful to print stack
 #                        traces of threads that have not exited, since
@@ -323,6 +326,7 @@ nfs_check_service ()
        restart_every=0
        service_stop_cmd=""
        service_start_cmd=""
+       service_check_cmd=""
        service_debug_cmd=""
 
        # Eval line-by-line.  Expands variable references in values.
@@ -334,7 +338,7 @@ nfs_check_service ()
                family=*|version=*|\
                unhealthy_after=*|restart_every=*|\
                service_stop_cmd=*|service_start_cmd=*|\
-               service_debug_cmd=*)
+               service_check_cmd=*|service_debug_cmd=*)
 
                    eval "$_line"
                    ;;
@@ -346,8 +350,20 @@ nfs_check_service ()
 
        _service_name="nfs_${_progname}"
 
-       if nfs_check_rpcinfo \
-              "$_progname" "$version" "$family" >/dev/null ; then
+       _ok=false
+       if [ -n "$service_check_cmd" ] ; then
+           # Using eval means variables can contain semicolon separated commands
+           if eval "$service_check_cmd" ; then
+               _ok=true
+           fi
+       else
+           if nfs_check_rpcinfo \
+                  "$_progname" "$version" "$family" >/dev/null ; then
+               _ok=true
+           fi
+       fi
+
+       if $_ok ; then
            if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
                ctdb_counter_init "$_service_name"
            fi
index 51ba54b7373a772fdbc39294e8633d91d7c9381d..044067a527377c0e5ab9c8089431bf0a97394d34 100644 (file)
@@ -6,9 +6,10 @@ are ignored.
 Supported variables are:
 
 * family             - "tcp" or "udp" or space separated list
-                       default: tcp
+                       default: tcp, not used with "service_check_cmd"
 * version            - optional, RPC service version number
-                       default is to omit to check for any version
+                       default is to omit to check for any version,
+                       not used with "service_check_cmd"
 * unhealthy_after    - number of check fails before unhealthy
                        default: 1
 * restart_every      - number of check fails before restart
@@ -19,6 +20,8 @@ Supported variables are:
 * service_start_cmd  - command to start service
                        default: no default, must be provided if
                                 restart_every > 0
+* service_check_cmd  - command to check health of service
+                       default is to check RPC service using rpcinfo
 * service_debug_cmd  - command to debug a service after trying to stop it;
                        for example, it can be useful to print stack
                        traces of threads that have not exited, since
index 6bcf08278ae53a8041924b5b482800bfc2e431ad..9c92f8468298a034b0180606c7ef852a85f472e1 100644 (file)
@@ -973,6 +973,7 @@ rpc_set_service_failure_response ()
        restart_every=0
        service_stop_cmd=""
        service_start_cmd=""
+       service_check_cmd=""
        service_debug_cmd=""
 
        # Don't bother syntax checking, eventscript does that...
@@ -1184,8 +1185,8 @@ simple_test_command ()
 #
 # - 2nd argument is the NFS/RPC service being tested
 #
-#   rpcinfo is used on each iteration to test the availability of the
-#   service
+#   rpcinfo (or $service_check_cmd) is used on each iteration to test
+#   the availability of the service
 #
 #   If this is not set or null then no RPC service is checked and the
 #   required output is not reset on each iteration.  This is useful in
@@ -1224,7 +1225,18 @@ nfs_iterate_test ()
            shift 2
        fi
        if [ -n "$_rpc_service" ] ; then
-           if rpcinfo -T tcp localhost "$_rpc_service" >/dev/null 2>&1 ; then
+           _ok=false
+           if [ -n "$service_check_cmd" ] ; then
+               if eval "$service_check_cmd" ; then
+                   _ok=true
+               fi
+           else
+               if rpcinfo -T tcp localhost "$_rpc_service" >/dev/null 2>&1 ; then
+                   _ok=true
+               fi
+           fi
+
+           if $_ok ; then
                _iterate_failcount=0
            else
                _iterate_failcount=$(($_iterate_failcount + 1))