New variables CTDB_MONITOR_MEMORY_USAGE and CTDB_MONITOR_SWAP_USAGE.
Both take a pair of <warn_threshold>:<unhealthy_threshold> where each
theshold is specified as a percentage.
This adds a callout to check_thresholds() that is run when the
unhealthy threshold is reached.
Add some combination tests.
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
_thing="$1"
_thresholds="$2"
_usage="$3"
+ _unhealthy_callout="$4"
case "$_thresholds" in
*:*)
if validate_percentage "$_unhealthy_threshold" "$_thing" ; then
if [ "$_usage" -ge "$_unhealthy_threshold" ] ; then
- die "ERROR: ${_thing} utilization ${_usage}% >= threshold ${_unhealthy_threshold}%"
+ echo "ERROR: ${_thing} utilization ${_usage}% >= threshold ${_unhealthy_threshold}%"
+ eval "$_unhealthy_callout"
+ exit 1
fi
fi
done
}
+dump_memory_info ()
+{
+ echo "CRITICAL: Shutting down CTDB!!!"
+ get_proc "meminfo"
+ ps auxfww
+ set_proc "sysrq-trigger" "m"
+ ctdb disable
+ sleep 3
+ ctdb shutdown
+}
+
monitor_memory_usage ()
{
- if [ -z "$CTDB_MONITOR_FREE_MEMORY_WARN" -a \
- -z "$CTDB_MONITOR_FREE_MEMORY" -a \
- "$CTDB_CHECK_SWAP_IS_NOT_USED" != "yes" ] ; then
+ if [ -z "$CTDB_MONITOR_MEMORY_USAGE" -a \
+ -z "$CTDB_MONITOR_SWAP_USAGE" ] ; then
return
fi
_mem_usage="$1"
_swap_usage="$2"
- # Shutdown CTDB when memory is below the configured limit
- if [ -n "$CTDB_MONITOR_FREE_MEMORY" ] ; then
- if [ $_mem_usage -ge $CTDB_MONITOR_FREE_MEMORY ] ; then
- echo "CRITICAL: OOM - ${_mem_usage}% usage >= ${CTDB_MONITOR_FREE_MEMORY}% (CTDB threshold)"
- echo "CRITICAL: Shutting down CTDB!!!"
- echo "$_meminfo"
- ps auxfww
- set_proc "sysrq-trigger" "m"
- ctdb disable
- sleep 3
- ctdb shutdown
- fi
- fi
+ check_thresholds "System memory" \
+ "$CTDB_MONITOR_MEMORY_USAGE" \
+ "$_mem_usage" \
+ dump_memory_info
- # Warn when low on memory
- if [ -n "$CTDB_MONITOR_FREE_MEMORY_WARN" ] ; then
- if [ $_mem_usage -ge $CTDB_MONITOR_FREE_MEMORY_WARN ] ; then
- echo "WARNING: memory usage is excessive - ${_mem_usage}% >= ${CTDB_MONITOR_FREE_MEMORY_WARN}% (CTDB threshold)"
- fi
- fi
-
- # We should never enter swap, so SwapTotal == SwapFree.
- if [ "$CTDB_CHECK_SWAP_IS_NOT_USED" = "yes" ] ; then
- if [ $_swap_usage -gt 0 ] ; then
- echo We are swapping:
- echo "$_meminfo"
- ps auxfww
- fi
- fi
+ check_thresholds "System swap" \
+ "$CTDB_MONITOR_SWAP_USAGE" \
+ "$_swap_usage" \
+ dump_memory_info
}
</varlistentry>
<varlistentry>
- <term>CTDB_CHECK_SWAP_IS_NOT_USED=yes|no</term>
+ <term>CTDB_MONITOR_MEMORY_USAGE=<parameter>MEM-LIMITS</parameter></term>
<listitem>
<para>
- Should a warning be logged if swap space is in use.
- </para>
- <para>
- Default is no.
- </para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
- <term>CTDB_MONITOR_FREE_MEMORY=<parameter>NUM</parameter></term>
- <listitem>
- <para>
- NUM is threshold of acceptable memory usage, expressed
- as a percentage. If this is set and memory usage
- reaches this limit then some debug information will be
- logged, the node will be disabled and then CTDB will be
- shut down.
+ MEM-LIMITS takes the form
+ <parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
+ indicating that warnings should be logged if memory
+ usage reaches WARN_LIMIT%. If usage reaches
+ UNHEALTHY_LIMIT then the node should be flagged
+ unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
+ left blank, meaning that check will be omitted.
</para>
<para>
No default.
</varlistentry>
<varlistentry>
- <term>CTDB_MONITOR_FREE_MEMORY_WARN=<parameter>NUM</parameter></term>
+ <term>CTDB_MONITOR_SWAP_USAGE=<parameter>SWAP-LIMITS</parameter></term>
<listitem>
<para>
- NUM is threshold of acceptable memory usage, expressed
- as a percentage. If this is set and memory usage
- reaches this limit then a warning will be logged.
+ SWAP-LIMITS takes the form
+ <parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
+ indicating that warnings should be logged if
+ swap usage reaches WARN_LIMIT%. If usage reaches
+ UNHEALTHY_LIMIT then the node should be flagged
+ unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
+ left blank, meaning that check will be omitted.
</para>
<para>
No default.
setup_memcheck 100 100
-CTDB_MONITOR_FREE_MEMORY=""
-CTDB_MONITOR_FREE_MEMORY_WARN=""
-CTDB_CHECK_SWAP_IS_NOT_USED="no"
+CTDB_MONITOR_MEMORY_USAGE=""
+CTDB_MONITOR_SWAP_USAGE=""
ok_null
setup_memcheck
-CTDB_MONITOR_FREE_MEMORY="90"
-CTDB_MONITOR_FREE_MEMORY_WARN="80"
-CTDB_CHECK_SWAP_IS_NOT_USED="yes"
+CTDB_MONITOR_MEMORY_USAGE="80:90"
+CTDB_MONITOR_SWAP_USAGE="1:50"
ok_null
define_test "Memory check, bad situation, only swap check"
-setup_memcheck 100 10
+setup_memcheck 100 90
-CTDB_MONITOR_FREE_MEMORY=""
-CTDB_MONITOR_FREE_MEMORY_WARN=""
-CTDB_CHECK_SWAP_IS_NOT_USED="yes"
+CTDB_MONITOR_MEMORY_USAGE=""
+CTDB_MONITOR_SWAP_USAGE=":50"
-ok <<EOF
-We are swapping:
+required_result 1 <<EOF
+ERROR: System swap utilization 90% >= threshold 50%
+CRITICAL: Shutting down CTDB!!!
$FAKE_PROC_MEMINFO
$(ps foobar)
+CTDB says BYE!
EOF
simple_test
setup_memcheck 90 10
-CTDB_MONITOR_FREE_MEMORY=""
-CTDB_MONITOR_FREE_MEMORY_WARN="85"
-CTDB_CHECK_SWAP_IS_NOT_USED="no"
+CTDB_MONITOR_MEMORY_USAGE="85:"
+CTDB_MONITOR_SWAP_USAGE=""
ok <<EOF
-WARNING: memory usage is excessive - 90% >= 85% (CTDB threshold)
+WARNING: System memory utilization 90% >= threshold 85%
EOF
simple_test
setup_memcheck 90 0
-CTDB_MONITOR_FREE_MEMORY="85"
-CTDB_MONITOR_FREE_MEMORY_WARN=""
-CTDB_CHECK_SWAP_IS_NOT_USED="no"
+CTDB_MONITOR_MEMORY_USAGE=":85"
+CTDB_MONITOR_SWAP_USAGE=""
-ok <<EOF
-CRITICAL: OOM - 90% usage >= 85% (CTDB threshold)
+required_result 1 <<EOF
+ERROR: System memory utilization 90% >= threshold 85%
CRITICAL: Shutting down CTDB!!!
$FAKE_PROC_MEMINFO
$(ps foobar)
--- /dev/null
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check, bad situation, both memory checks, causes warning"
+
+setup_memcheck 87 0
+
+CTDB_MONITOR_MEMORY_USAGE="80:90"
+CTDB_MONITOR_SWAP_USAGE=""
+
+ok <<EOF
+WARNING: System memory utilization 87% >= threshold 80%
+EOF
+
+simple_test
--- /dev/null
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Memory check, bad situation, both memory checks, causes unhealthy"
+
+setup_memcheck 87 0
+
+CTDB_MONITOR_MEMORY_USAGE="70:80"
+CTDB_MONITOR_SWAP_USAGE=""
+
+required_result 1 <<EOF
+ERROR: System memory utilization 87% >= threshold 80%
+CRITICAL: Shutting down CTDB!!!
+MemTotal: 3940712 kB
+MemFree: 225268 kB
+Buffers: 146120 kB
+Cached: 140904 kB
+SwapCached: 56016 kB
+Active: 2422104 kB
+Inactive: 1019928 kB
+Active(anon): 1917580 kB
+Inactive(anon): 523080 kB
+Active(file): 504524 kB
+Inactive(file): 496848 kB
+Unevictable: 4844 kB
+Mlocked: 4844 kB
+SwapTotal: 5857276 kB
+SwapFree: 5857276 kB
+...
+USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
+root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
+root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
+...
+root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
+root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
+...
+[MORE FAKE ps OUTPUT]
+CTDB says BYE!
+EOF
+
+simple_test
SwapFree: ${_swap_free} kB
..."
- export CTDB_MONITOR_FREE_MEMORY
- export CTDB_MONITOR_FREE_MEMORY_WARN
- export CTDB_CHECK_SWAP_IS_NOT_USED
+ export CTDB_MONITOR_MEMORY_USAGE
+ export CTDB_MONITOR_SWAP_USAGE
}
setup_fscheck ()