1 # Hey Emacs, this is a -*- shell-script -*- !!!
3 # utility functions for ctdb event scripts
5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
7 [ -z "$CTDB_VARDIR" ] && {
8 export CTDB_VARDIR="/var/ctdb"
10 [ -z "$CTDB_ETCDIR" ] && {
11 export CTDB_ETCDIR="/etc"
14 #######################################
15 # pull in a system config file, if any
19 foo="${service_config:-${service_name}}"
20 if [ -n "$foo" ] ; then
23 elif [ "$1" != "ctdb" ] ; then
27 if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
28 . $CTDB_ETCDIR/sysconfig/$1
29 elif [ -f $CTDB_ETCDIR/default/$1 ]; then
30 . $CTDB_ETCDIR/default/$1
31 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
32 . $CTDB_BASE/sysconfig/$1
40 ##############################################################
41 # determine on what type of system (init style) we are running
43 # only do detection if not already set:
44 test "x$CTDB_INIT_STYLE" != "x" && return
46 if [ -x /sbin/startproc ]; then
47 CTDB_INIT_STYLE="suse"
48 elif [ -x /sbin/start-stop-daemon ]; then
49 CTDB_INIT_STYLE="debian"
51 CTDB_INIT_STYLE="redhat"
55 ######################################################
56 # simulate /sbin/service on platforms that don't have it
57 # _service() makes it easier to hook the service() function for
64 # do nothing, when no service was specified
65 [ -z "$_service_name" ] && return
67 if [ -x /sbin/service ]; then
68 $_nice /sbin/service "$_service_name" "$_op"
69 elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
70 $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
71 elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
72 $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
82 ######################################################
83 # simulate /sbin/service (niced) on platforms that don't have it
90 ######################################################
91 # wrapper around /proc/ settings to allow them to be hooked
93 # 1st arg is relative path under /proc/, 2nd arg is value to set
99 ######################################################
100 # wrapper around getting file contents from /proc/ to allow
101 # this to be hooked for testing
102 # 1st arg is relative path under /proc/
108 ######################################################
109 # check that a rpc server is registered with portmap
110 # and responding to requests
111 # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
112 ######################################################
118 ctdb_check_rpc_out=$(rpcinfo -u localhost $prognum $version 2>&1)
119 if [ $? -ne 0 ] ; then
120 ctdb_check_rpc_out="ERROR: $progname failed RPC check:
122 echo "$ctdb_check_rpc_out"
127 ######################################################
128 # check a set of directories is available
129 # return 1 on a missing directory
130 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
131 ######################################################
132 ctdb_check_directories_probe() {
133 while IFS="" read d ; do
139 [ -d "${d}/." ] || return 1
144 ######################################################
145 # check a set of directories is available
146 # usage: ctdb_check_directories SERVICE_NAME <directories...>
147 ######################################################
148 ctdb_check_directories() {
149 n="${1:-${service_name}}"
150 ctdb_check_directories_probe || {
151 echo "ERROR: $n directory \"$d\" not available"
156 ######################################################
157 # check a set of tcp ports
158 # usage: ctdb_check_tcp_ports <ports...>
159 ######################################################
160 ctdb_check_tcp_ports() {
163 if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
164 if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
165 echo "ERROR: $service_name tcp port $p is not responding"
172 ######################################################
173 # check a unix socket
174 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
175 ######################################################
176 ctdb_check_unix_socket() {
178 [ -z "$socket_path" ] && return
180 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
181 echo "ERROR: $service_name socket $socket_path not found"
186 ######################################################
187 # check a command returns zero status
188 # usage: ctdb_check_command SERVICE_NAME <command>
189 ######################################################
190 ctdb_check_command() {
193 [ -z "$wait_cmd" ] && return;
194 $wait_cmd > /dev/null 2>&1 || {
195 echo "ERROR: $service_name - $wait_cmd returned error"
200 ################################################
201 # kill off any TCP connections with the given IP
202 ################################################
203 kill_tcp_connections() {
208 connfile="$CTDB_VARDIR/state/connections.$_IP"
209 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
210 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
212 while read dest src; do
213 srcip=`echo $src | sed -e "s/:[^:]*$//"`
214 srcport=`echo $src | sed -e "s/^.*://"`
215 destip=`echo $dest | sed -e "s/:[^:]*$//"`
216 destport=`echo $dest | sed -e "s/^.*://"`
217 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
218 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
220 # we only do one-way killtcp for CIFS
222 # for all others we do 2-way
224 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
227 _killcount=`expr $_killcount + 1`
231 [ $_failed = 0 ] || {
232 echo "Failed to send killtcp control"
235 [ $_killcount -gt 0 ] || {
239 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
241 _count=`expr $_count + 1`
242 [ $_count -gt 3 ] && {
243 echo "Timed out killing tcp connections for IP $_IP"
247 echo "killed $_killcount TCP connections to released IP $_IP"
250 ##################################################################
251 # kill off the local end for any TCP connections with the given IP
252 ##################################################################
253 kill_tcp_connections_local_only() {
258 connfile="$CTDB_VARDIR/state/connections.$_IP"
259 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
260 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
262 while read dest src; do
263 srcip=`echo $src | sed -e "s/:[^:]*$//"`
264 srcport=`echo $src | sed -e "s/^.*://"`
265 destip=`echo $dest | sed -e "s/:[^:]*$//"`
266 destport=`echo $dest | sed -e "s/^.*://"`
267 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
268 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
269 _killcount=`expr $_killcount + 1`
273 [ $_failed = 0 ] || {
274 echo "Failed to send killtcp control"
277 [ $_killcount -gt 0 ] || {
281 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
283 _count=`expr $_count + 1`
284 [ $_count -gt 3 ] && {
285 echo "Timed out killing tcp connections for IP $_IP"
289 echo "killed $_killcount TCP connections to released IP $_IP"
292 ##################################################################
293 # tickle any TCP connections with the given IP
294 ##################################################################
295 tickle_tcp_connections() {
300 connfile="$CTDB_VARDIR/state/connections.$_IP"
301 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
302 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
304 while read dest src; do
305 srcip=`echo $src | sed -e "s/:[^:]*$//"`
306 srcport=`echo $src | sed -e "s/^.*://"`
307 destip=`echo $dest | sed -e "s/:[^:]*$//"`
308 destport=`echo $dest | sed -e "s/^.*://"`
309 echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
310 ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
311 echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
312 ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
316 [ $_failed = 0 ] || {
317 echo "Failed to send tickle control"
322 ########################################################
323 # start/stop the nfs service on different platforms
324 ########################################################
327 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
330 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
338 service nfsserver start
341 service nfsserver stop > /dev/null 2>&1
344 set_proc "fs/nfsd/threads" 0
345 service nfsserver stop > /dev/null 2>&1
347 service nfsserver start
354 service nfslock start
362 set_proc "fs/nfsd/threads" 0
363 service nfs stop > /dev/null 2>&1
364 service nfslock stop > /dev/null 2>&1
366 service nfslock start
372 echo "Unknown platform. NFS is not supported with ctdb"
378 ########################################################
379 # start/stop the nfs lockmanager service on different platforms
380 ########################################################
381 startstop_nfslock() {
383 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
386 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
392 # for sles there is no service for lockmanager
393 # so we instead just shutdown/restart nfs
396 service nfsserver start
399 service nfsserver stop > /dev/null 2>&1
402 service nfsserver stop
403 service nfsserver start
410 service nfslock start
413 service nfslock stop > /dev/null 2>&1
417 service nfslock start
422 echo "Unknown platform. NFS locking is not supported with ctdb"
433 local _state_dir="$CTDB_VARDIR/state/interface_modify"
434 local _lockfile="$_state_dir/$_iface.flock"
435 local _readd_base="$_state_dir/$_iface.readd.d"
437 mkdir -p $_state_dir || {
439 echo "Failed to mkdir -p $_state_dir - $ret"
443 test -f $_lockfile || {
447 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
451 delete_ip_from_iface()
456 local _state_dir="$CTDB_VARDIR/state/interface_modify"
457 local _lockfile="$_state_dir/$_iface.flock"
458 local _readd_base="$_state_dir/$_iface.readd.d"
460 mkdir -p $_state_dir || {
462 echo "Failed to mkdir -p $_state_dir - $ret"
466 test -f $_lockfile || {
470 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
474 setup_iface_ip_readd_script()
479 local _readd_script=$4
480 local _state_dir="$CTDB_VARDIR/state/interface_modify"
481 local _lockfile="$_state_dir/$_iface.flock"
482 local _readd_base="$_state_dir/$_iface.readd.d"
484 mkdir -p $_state_dir || {
486 echo "Failed to mkdir -p $_state_dir - $ret"
490 test -f $_lockfile || {
494 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
498 ########################################################
499 # some simple logic for counting events - per eventscript
500 # usage: ctdb_counter_init
502 # ctdb_check_counter_limit <limit>
503 # ctdb_check_counter_limit succeeds when count >= <limit>
504 ########################################################
505 _ctdb_counter_common () {
506 _service_name="${1:-${service_name}}"
507 _counter_file="$ctdb_fail_dir/$_service_name"
508 mkdir -p "${_counter_file%/*}" # dirname
510 ctdb_counter_init () {
511 _ctdb_counter_common "$1"
515 ctdb_counter_incr () {
516 _ctdb_counter_common "$1"
519 echo -n 1 >> "$_counter_file"
521 ctdb_check_counter_limit () {
524 _limit="${1:-${service_fail_limit}}"
528 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
529 if [ $_size -ge $_limit ] ; then
530 echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
532 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
533 echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
536 ctdb_check_counter_equal () {
542 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
543 if [ $_size -eq $_limit ] ; then
549 ########################################################
551 ctdb_status_dir="$CTDB_VARDIR/status"
552 ctdb_fail_dir="$CTDB_VARDIR/failcount"
554 ctdb_setup_service_state_dir ()
556 service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
557 mkdir -p "$service_state_dir" || {
558 echo "Error creating state dir \"$service_state_dir\""
563 ########################################################
564 # Managed status history, for auto-start/stop
566 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
568 _ctdb_managed_common ()
570 _service_name="${1:-${service_name}}"
571 _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
574 ctdb_service_managed ()
576 _ctdb_managed_common "$@"
577 mkdir -p "$ctdb_managed_dir"
578 touch "$_ctdb_managed_file"
581 ctdb_service_unmanaged ()
583 _ctdb_managed_common "$@"
584 rm -f "$_ctdb_managed_file"
587 is_ctdb_previously_managed_service ()
589 _ctdb_managed_common "$@"
590 [ -f "$_ctdb_managed_file" ]
593 ########################################################
594 # Check and set status
598 echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
603 if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
604 log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
606 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
607 log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
616 d="$ctdb_status_dir/$script_name"
623 for i in "banned" "unhealthy" ; do
630 ##################################################################
631 # Reconfigure a service on demand
633 _ctdb_service_reconfigure_common ()
635 _d="$ctdb_status_dir/${1:-${service_name}}"
637 _ctdb_service_reconfigure_flag="$_d/reconfigure"
640 ctdb_service_needs_reconfigure ()
642 _ctdb_service_reconfigure_common "$@"
643 [ -e "$_ctdb_service_reconfigure_flag" ]
646 ctdb_service_set_reconfigure ()
648 _ctdb_service_reconfigure_common "$@"
649 >"$_ctdb_service_reconfigure_flag"
652 ctdb_service_unset_reconfigure ()
654 _ctdb_service_reconfigure_common "$@"
655 rm -f "$_ctdb_service_reconfigure_flag"
658 ctdb_service_reconfigure ()
660 echo "Reconfiguring service \"$service_name\"..."
661 ctdb_service_unset_reconfigure "$@"
662 service_reconfigure "$@" || return $?
663 ctdb_counter_init "$@"
666 # Default service_reconfigure() function.
667 service_reconfigure ()
669 service "${1:-$service_name}" restart
672 ctdb_service_check_reconfigure ()
674 [ "$event_name" = "monitor" ] || return 0
676 if ctdb_service_needs_reconfigure "$@" ; then
677 ctdb_service_reconfigure "$@"
682 ##################################################################
683 # Does CTDB manage this service? - and associated auto-start/stop
685 ctdb_compat_managed_service ()
687 if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
688 CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
692 is_ctdb_managed_service ()
694 _service_name="${1:-${service_name}}"
696 # $t is used just for readability and to allow better accurate
697 # matching via leading/trailing spaces
698 t=" $CTDB_MANAGED_SERVICES "
700 # Return 0 if "<space>$_service_name<space>" appears in $t
701 if [ "${t#* ${_service_name} }" != "${t}" ] ; then
705 # If above didn't match then update $CTDB_MANAGED_SERVICES for
706 # backward compatibility and try again.
707 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
708 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
709 ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
710 ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
711 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
712 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
713 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
714 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
715 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
717 t=" $CTDB_MANAGED_SERVICES "
719 # Return 0 if "<space>$_service_name<space>" appears in $t
720 [ "${t#* ${_service_name} }" != "${t}" ]
723 ctdb_start_stop_service ()
725 _service_name="${1:-${service_name}}"
727 [ "$event_name" = "monitor" ] || return 0
729 if is_ctdb_managed_service "$_service_name" ; then
730 if ! is_ctdb_previously_managed_service "$_service_name" ; then
731 echo "Starting service \"$_service_name\" - now managed"
732 ctdb_service_start "$_service_name"
736 if is_ctdb_previously_managed_service "$_service_name" ; then
737 echo "Stopping service \"$_service_name\" - no longer managed"
738 ctdb_service_stop "$_service_name"
744 ctdb_service_start ()
746 # The service is marked managed if we've ever tried to start it.
747 ctdb_service_managed "$@"
749 # Here we only want $1. If no argument is passed then
750 # service_start needs to know.
751 service_start "$@" || return $?
753 ctdb_counter_init "$@"
758 ctdb_service_unmanaged "$@"
762 # Default service_start() and service_stop() functions.
764 # These may be overridden in an eventscript. When overriding, the
765 # following convention must be followed. If these functions are
766 # called with no arguments then they may use internal logic to
767 # determine whether the service is managed and, therefore, whether
768 # they should take any action. However, if the service name is
769 # specified as an argument then an attempt must be made to start or
770 # stop the service. This is because the auto-start/stop code calls
771 # them with the service name as an argument.
774 service "${1:-${service_name}}" start
779 service "${1:-${service_name}}" stop
782 ##################################################################
784 ctdb_standard_event_handler ()
799 ipv4_host_addr_to_net_addr()
804 local HOST0=$(echo $HOST | awk -F . '{print $4}')
805 local HOST1=$(echo $HOST | awk -F . '{print $3}')
806 local HOST2=$(echo $HOST | awk -F . '{print $2}')
807 local HOST3=$(echo $HOST | awk -F . '{print $1}')
809 local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
811 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
813 local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
815 local NET0=$(( $NET_NUM & 255 ))
816 local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
817 local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
818 local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
820 echo "$NET3.$NET2.$NET1.$NET0"
823 ipv4_maskbits_to_net_mask()
827 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
829 local MASK0=$(( $MASK_NUM & 255 ))
830 local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
831 local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
832 local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
834 echo "$MASK3.$MASK2.$MASK1.$MASK0"
842 local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
843 test -n "$N" && fail=1
845 local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
846 local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
847 local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
848 local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
850 test "$ADDR0" -gt 255 && fail=1
851 test "$ADDR1" -gt 255 && fail=1
852 test "$ADDR2" -gt 255 && fail=1
853 test "$ADDR3" -gt 255 && fail=1
855 test x"$fail" != x"0" && {
856 #echo "IPv4: '$ADDR' is not a valid address"
863 # iptables doesn't like being re-entered, so flock-wrap it.
866 flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
869 ########################################################
871 ########################################################
873 # Temporary directory for tickles.
874 tickledir="$CTDB_VARDIR/state/tickles"
875 mkdir -p "$tickledir"
881 mkdir -p "$tickledir" # Just in case
884 _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
886 # What public IPs do I hold?
887 _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
889 # IPs as a regexp choice
890 _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
892 # Record connections to our public IPs in a temporary file
893 _my_connections="${tickledir}/${_port}.connections"
894 rm -f "$_my_connections"
896 awk -v destpat="^${_ipschoice}:${_port}\$" \
897 '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
898 sort >"$_my_connections"
900 # Record our current tickles in a temporary file
901 _my_tickles="${tickledir}/${_port}.tickles"
904 ctdb -Y gettickles $_i $_port |
905 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
909 # Add tickles for connections that we haven't already got tickles for
910 comm -23 "$_my_connections" "$_my_tickles" |
911 while read _src _dst ; do
912 ctdb addtickle $_src $_dst
915 # Remove tickles for connections that are no longer there
916 comm -13 "$_my_connections" "$_my_tickles" |
917 while read _src _dst ; do
918 ctdb deltickle $_src $_dst
921 rm -f "$_my_connections" "$_my_tickles"
924 ########################################################
925 # load a site local config file
926 ########################################################
928 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
932 [ -x $CTDB_BASE/rc.local ] && {
933 . $CTDB_BASE/rc.local
936 [ -d $CTDB_BASE/rc.local.d ] && {
937 for i in $CTDB_BASE/rc.local.d/* ; do
938 [ -x "$i" ] && . "$i"
942 script_name="${0##*/}" # basename
943 service_name="$script_name" # default is just the script name