We reduce the number of failures before attempting a restart.
However, after 6 failures we mark the cluster unhealthy and no longer
try to restart. If the previous 2 attempts didn't work then there
isn't any use in bogging the system down with an attempted restart on
every monitor event.
Signed-off-by: Martin Schwenke <martin@meltin.net>
(This used to be ctdb commit
f654739080b40b7ac1b7f998cacc689d3d4e3193)
p="rpc.statd"
which $p >/dev/null 2>/dev/null && \
nfs_check_rpc_service "statd" \
- -ge 10 "verbose restart"
+ -ge 6 "verbose unhealthy" \
+ -eq 4 "verbose restart" \
+ -eq 2 "restart:bs"
# check that NFS responds to rpc requests
if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
nfs_check_rpc_service "knfsd" \
- -ge 15 "verbose restart unhealthy" \
- -eq 10 "restart:bs"
+ -ge 6 "verbose unhealthy" \
+ -eq 4 "verbose restart" \
+ -eq 2 "restart:bs"
fi
# check that lockd responds to rpc requests