ctdb-scripts: Try to restart statd after every 10 failures
authorMartin Schwenke <martin@meltin.net>
Fri, 14 Nov 2014 05:42:01 +0000 (16:42 +1100)
committerAmitay Isaacs <amitay@samba.org>
Tue, 18 Nov 2014 03:17:10 +0000 (04:17 +0100)
Also add and update tests for statd stack dumps.  Update the existing
60.ganesha statd test to do more iterations.  Duplicate the result as
a new test for 60.nfs.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/config/events.d/60.ganesha
ctdb/config/nfs-rpc-checks.d/10.statd.check
ctdb/tests/eventscripts/60.ganesha.monitor.141.sh
ctdb/tests/eventscripts/60.nfs.monitor.144.sh [new file with mode: 0755]

index df0912d52677050ab30a05e3e78d3236e6649094..150be1f15942f33bd070a6788dd8331ed05b7a25 100755 (executable)
@@ -230,6 +230,7 @@ case "$1" in
        p="rpc.statd"
        which $p >/dev/null 2>/dev/null && \
            nfs_check_rpc_service "statd" \
+               %  10 "verbose restart:b unhealthy" \
                -ge 6 "verbose unhealthy" \
                -eq 4 "verbose restart" \
                -eq 2 "restart:b"
index d738a3245e5e2575b85b8d3a99d22495607fc29a..526e238bcccd1a14feb7847a502bd1fbb3730239 100644 (file)
@@ -1,3 +1,4 @@
+%  10 verbose restart:b unhealthy
 -ge 6 verbose unhealthy
 -eq 4 verbose restart
 -eq 2 restart:b
index 9cd82f84cc2f2d3951061c24f35b6b7e9ab3cac5..c9a5ab7658a96636cbe96073f382affb5ce2823c 100755 (executable)
@@ -2,7 +2,7 @@
 
 . "${TEST_SCRIPTS_DIR}/unit.sh"
 
-define_test "statd down, 6 iterations"
+define_test "statd down, 10 iterations"
 
 # statd fails and attempts to restart it fail.
 
@@ -37,3 +37,19 @@ rpcinfo: RPC: Program not registered
 program status version 1 is not available
 EOF
 simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+
+CTDB_NFS_DUMP_STUCK_THREADS=3
+FAKE_RPC_THREAD_PIDS=1234
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd]
+Stack trace for rpc.statd[1234]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_1234/stack+0x0/0xff
+EOF
+simple_test || exit $?
diff --git a/ctdb/tests/eventscripts/60.nfs.monitor.144.sh b/ctdb/tests/eventscripts/60.nfs.monitor.144.sh
new file mode 100755 (executable)
index 0000000..0a3beb7
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 10 iterations"
+
+# statd fails and attempts to restart it fail.
+
+setup_nfs
+rpc_services_down "status"
+
+ok_null
+simple_test || exit $?
+
+ok<<EOF
+Trying to restart statd [rpc.statd]
+EOF
+simple_test || exit $?
+
+ok_null
+simple_test || exit $?
+
+ok<<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd]
+EOF
+simple_test || exit $?
+
+ok_null
+simple_test || exit $?
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+EOF
+simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+simple_test || exit $?
+
+CTDB_NFS_DUMP_STUCK_THREADS=3
+FAKE_RPC_THREAD_PIDS=1234
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd]
+Stack trace for rpc.statd[1234]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_1234/stack+0x0/0xff
+EOF
+simple_test || exit $?