ctdb-scripts: Only run unhealthy call-out when passing threshold
authorMartin Schwenke <martin@meltin.net>
Fri, 10 Jun 2022 00:32:01 +0000 (10:32 +1000)
committerAmitay Isaacs <amitay@samba.org>
Fri, 22 Jul 2022 07:32:54 +0000 (07:32 +0000)
For memory usage, no need to dump all of this data on every failed
monitor event.  The first call will be enough to diagnose the problem.
The node will then go unhealthy, drop clients and memory usage should
then drop.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Autobuild-User(master): Amitay Isaacs <amitay@samba.org>
Autobuild-Date(master): Fri Jul 22 07:32:54 UTC 2022 on sn-devel-184

ctdb/config/events/legacy/05.system.script

index 70e4678f45b2561e5bfdd25f4683048d9aee3b57..56a07c7cc77f0e84455431eb76a1bf39bc1369ae 100755 (executable)
@@ -44,13 +44,23 @@ check_thresholds()
        # script_state_dir set by ctdb_setup_state_dir()
        # shellcheck disable=SC2154
        _cache="${script_state_dir}/cache_${_t}"
+       if [ -r "$_cache" ]; then
+               read -r _prev <"$_cache"
+       else
+               _prev=0
+       fi
        if validate_percentage "$_unhealthy_threshold" "$_thing"; then
                if [ "$_usage" -ge "$_unhealthy_threshold" ]; then
                        printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \
                               "$_thing" \
                               "$_usage" \
                               "$_unhealthy_threshold"
-                       eval "$_unhealthy_callout"
+                       # Only run unhealthy callout if passing the
+                       # unhealthy threshold.  That is, if the
+                       # previous usage was below the threshold.
+                       if [ "$_prev" -lt "$_unhealthy_threshold" ]; then
+                               eval "$_unhealthy_callout"
+                       fi
                        echo "$_usage" >"$_cache"
                        exit 1
                fi
@@ -58,11 +68,6 @@ check_thresholds()
 
        if validate_percentage "$_warn_threshold" "$_thing"; then
                if [ "$_usage" -ge "$_warn_threshold" ]; then
-                       if [ -r "$_cache" ]; then
-                               read -r _prev <"$_cache"
-                       else
-                               _prev=""
-                       fi
                        if [ "$_usage" = "$_prev" ]; then
                                return
                        fi