ctdb-scripts: Factor out new function check_thresholds()
[sfrench/samba-autobuild/.git] / ctdb / config / events.d / 05.system
1 #!/bin/sh
2 # ctdb event script for checking local file system utilization
3
4 [ -n "$CTDB_BASE" ] || \
5     export CTDB_BASE=$(cd -P $(dirname "$0") ; dirname "$PWD")
6
7 . $CTDB_BASE/functions
8 loadconfig
9
10 validate_percentage ()
11 {
12     case "$1" in
13         "") return 1 ;;  # A failure that doesn't need a warning
14         [0-9]|[0-9][0-9]|100) return 0 ;;
15         *) echo "WARNING: ${1} is an invalid percentage${2:+ in \"}${2}${2:+\"} check"
16            return 1
17     esac
18 }
19
20 check_thresholds ()
21 {
22     _thing="$1"
23     _thresholds="$2"
24     _usage="$3"
25
26     case "$_thresholds" in
27         *:*)
28             _warn_threshold="${_thresholds%:*}"
29             _unhealthy_threshold="${_thresholds#*:}"
30             ;;
31         *)
32             _warn_threshold="$_thresholds"
33             _unhealthy_threshold=""
34     esac
35
36     if validate_percentage "$_unhealthy_threshold" "$_thing" ; then
37         if [ "$_usage" -ge "$_unhealthy_threshold" ] ; then
38             die "ERROR: ${_thing} utilization ${_usage}% >= threshold ${_unhealthy_threshold}%"
39         fi
40     fi
41
42     if validate_percentage "$_warn_threshold" "$_what" ; then
43         if [ "$_usage" -ge "$_warn_threshold" ] ; then
44             echo "WARNING: ${_thing} utilization ${_usage}% >= threshold ${_warn_threshold}%"
45         fi
46     fi
47 }
48
49 monitor_filesystem_usage ()
50 {
51     # Check each specified filesystem, specified in format
52     # <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold]
53     for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE ; do
54         _fs_mount="${_fs%%:*}"
55         _fs_thresholds="${_fs#*:}"
56
57         if [ ! -d "$_fs_mount" ]; then
58             echo "WARNING: Directory ${_fs_mount} does not exist"
59             continue
60         fi
61
62         # Get current utilization
63         _fs_usage=$(df -kP "$_fs_mount" | \
64                            sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p')
65         if [ -z "$_fs_usage" ] ; then
66             echo "WARNING: Unable to get FS utilization for ${_fs_mount}"
67             continue
68         fi
69
70         check_thresholds "Filesystem ${_fs_mount}" \
71                          "$_fs_thresholds" \
72                          "$_fs_usage"
73     done
74 }
75
76 monitor_memory_usage ()
77 {
78     if [ -z "$CTDB_MONITOR_FREE_MEMORY_WARN" -a \
79          -z "$CTDB_MONITOR_FREE_MEMORY" -a \
80          "$CTDB_CHECK_SWAP_IS_NOT_USED" != "yes" ] ; then
81         return
82     fi
83
84     _meminfo=$(get_proc "meminfo")
85     set -- $(echo "$_meminfo" | awk '
86 $1 == "MemAvailable:" { memavail += $2 }
87 $1 == "MemFree:"      { memfree  += $2 }
88 $1 == "Cached:"       { memfree  += $2 }
89 $1 == "Buffers:"      { memfree  += $2 }
90 $1 == "MemTotal:"     { memtotal  = $2 }
91 $1 == "SwapFree:"     { swapfree  = $2 }
92 $1 == "SwapTotal:"    { swaptotal = $2 }
93 END {
94     if (memavail != 0) { memfree = memavail ; }
95     print int((memtotal -  memfree)  / memtotal * 100),
96           int((swaptotal - swapfree) / swaptotal * 100)
97 }')
98     _mem_usage="$1"
99     _swap_usage="$2"
100
101     # Shutdown CTDB when memory is below the configured limit
102     if [ -n "$CTDB_MONITOR_FREE_MEMORY" ] ; then
103         if [ $_mem_usage -ge $CTDB_MONITOR_FREE_MEMORY ] ; then
104             echo "CRITICAL: OOM - ${_mem_usage}% usage >= ${CTDB_MONITOR_FREE_MEMORY}% (CTDB threshold)"
105             echo "CRITICAL: Shutting down CTDB!!!"
106             echo "$_meminfo"
107             ps auxfww
108             set_proc "sysrq-trigger" "m"
109             ctdb disable
110             sleep 3
111             ctdb shutdown
112         fi
113     fi
114
115     # Warn when low on memory
116     if [ -n "$CTDB_MONITOR_FREE_MEMORY_WARN" ] ; then
117         if [ $_mem_usage -ge $CTDB_MONITOR_FREE_MEMORY_WARN ] ; then
118             echo "WARNING: memory usage is excessive - ${_mem_usage}% >=  ${CTDB_MONITOR_FREE_MEMORY_WARN}% (CTDB threshold)"
119         fi
120     fi
121
122     # We should never enter swap, so SwapTotal == SwapFree.
123     if [ "$CTDB_CHECK_SWAP_IS_NOT_USED" = "yes" ] ; then
124         if [ $_swap_usage -gt 0 ] ; then
125             echo We are swapping:
126             echo "$_meminfo"
127             ps auxfww
128         fi
129     fi
130 }
131
132
133 case "$1" in
134     monitor)
135         monitor_filesystem_usage
136         monitor_memory_usage
137         ;;
138
139     *)
140         ctdb_standard_event_handler "$@"
141         ;;
142 esac
143
144 exit 0