1 # utility functions for ctdb event scripts
3 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
5 [ -z "$CTDB_VARDIR" ] && {
6 export CTDB_VARDIR="/var/ctdb"
8 [ -z "$CTDB_ETCDIR" ] && {
9 export CTDB_ETCDIR="/etc"
12 #######################################
13 # pull in a system config file, if any
17 foo="${service_config:-${service_name}}"
18 if [ -n "$foo" ] ; then
21 elif [ "$1" != "ctdb" ] ; then
25 if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
26 . $CTDB_ETCDIR/sysconfig/$1
27 elif [ -f $CTDB_ETCDIR/default/$1 ]; then
28 . $CTDB_ETCDIR/default/$1
29 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
30 . $CTDB_BASE/sysconfig/$1
38 ##############################################################
39 # determine on what type of system (init style) we are running
41 # only do detection if not already set:
42 test "x$CTDB_INIT_STYLE" != "x" && return
44 if [ -x /sbin/startproc ]; then
45 CTDB_INIT_STYLE="suse"
46 elif [ -x /sbin/start-stop-daemon ]; then
47 CTDB_INIT_STYLE="debian"
49 CTDB_INIT_STYLE="redhat"
53 ######################################################
54 # simulate /sbin/service on platforms that don't have it
55 # _service() makes it easier to hook the service() function for
62 # do nothing, when no service was specified
63 [ -z "$_service_name" ] && return
65 if [ -x /sbin/service ]; then
66 $_nice /sbin/service "$_service_name" "$_op"
67 elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
68 $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
69 elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
70 $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
80 ######################################################
81 # simulate /sbin/service (niced) on platforms that don't have it
88 ######################################################
89 # wait for a command to return a zero exit status
90 # usage: ctdb_wait_command SERVICE_NAME <command>
91 ######################################################
95 [ -z "$wait_cmd" ] && return;
97 echo "Waiting for service $service_name to start"
98 while [ $all_ok -eq 0 ]; do
99 $wait_cmd > /dev/null 2>&1 && all_ok=1
100 ctdb status > /dev/null 2>&1 || {
101 echo "ctdb daemon has died. Exiting wait for $service_name"
104 [ $all_ok -eq 1 ] || sleep 1
106 echo "Local service $service_name is up"
110 ######################################################
111 # wait for a set of tcp ports
112 # usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
113 ######################################################
114 ctdb_wait_tcp_ports() {
118 [ -z "$wait_ports" ] && return;
120 echo "Waiting for tcp service $service_name to start"
121 while [ $all_ok -eq 0 ]; do
123 for p in $wait_ports; do
124 if [ -x /usr/bin/netcat ]; then
125 /usr/bin/netcat -z 127.0.0.1 $p > /dev/null || all_ok=0
126 elif [ -x /usr/bin/nc ]; then
127 /usr/bin/nc -z 127.0.0.1 $p > /dev/null || all_ok=0
128 elif [ -x /usr/bin/netstat ]; then
129 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
130 elif [ -x /bin/netstat ]; then
131 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
133 echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
137 [ $all_ok -eq 1 ] || sleep 1
138 ctdb status > /dev/null 2>&1 || {
139 echo "ctdb daemon has died. Exiting tcp wait $service_name"
143 echo "Local tcp services for $service_name are up"
147 ######################################################
148 # check that a rpc server is registered with portmap
149 # and responding to requests
150 # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
151 ######################################################
157 ctdb_check_rpc_out=$(rpcinfo -u localhost $prognum $version 2>&1)
158 if [ $? -ne 0 ] ; then
159 ctdb_check_rpc_out="ERROR: $progname failed RPC check:
161 echo "$ctdb_check_rpc_out"
166 ######################################################
167 # check a set of directories is available
168 # return 1 on a missing directory
169 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
170 ######################################################
171 ctdb_check_directories_probe() {
172 while IFS="" read d ; do
178 [ -d "${d}/." ] || return 1
183 ######################################################
184 # check a set of directories is available
185 # usage: ctdb_check_directories SERVICE_NAME <directories...>
186 ######################################################
187 ctdb_check_directories() {
188 n="${1:-${service_name}}"
189 ctdb_check_directories_probe || {
190 echo "ERROR: $n directory \"$d\" not available"
195 ######################################################
196 # check a set of tcp ports
197 # usage: ctdb_check_tcp_ports <ports...>
198 ######################################################
199 ctdb_check_tcp_ports() {
202 if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
203 if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
204 echo "ERROR: $service_name tcp port $p is not responding"
211 ######################################################
212 # check a unix socket
213 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
214 ######################################################
215 ctdb_check_unix_socket() {
217 [ -z "$socket_path" ] && return
219 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
220 echo "ERROR: $service_name socket $socket_path not found"
225 ######################################################
226 # check a command returns zero status
227 # usage: ctdb_check_command SERVICE_NAME <command>
228 ######################################################
229 ctdb_check_command() {
232 [ -z "$wait_cmd" ] && return;
233 $wait_cmd > /dev/null 2>&1 || {
234 echo "ERROR: $service_name - $wait_cmd returned error"
239 ################################################
240 # kill off any TCP connections with the given IP
241 ################################################
242 kill_tcp_connections() {
247 connfile="$CTDB_VARDIR/state/connections.$_IP"
248 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
249 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
251 while read dest src; do
252 srcip=`echo $src | sed -e "s/:[^:]*$//"`
253 srcport=`echo $src | sed -e "s/^.*://"`
254 destip=`echo $dest | sed -e "s/:[^:]*$//"`
255 destport=`echo $dest | sed -e "s/^.*://"`
256 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
257 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
259 # we only do one-way killtcp for CIFS
261 # for all others we do 2-way
263 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
266 _killcount=`expr $_killcount + 1`
270 [ $_failed = 0 ] || {
271 echo "Failed to send killtcp control"
274 [ $_killcount -gt 0 ] || {
278 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
280 _count=`expr $_count + 1`
281 [ $_count -gt 3 ] && {
282 echo "Timed out killing tcp connections for IP $_IP"
286 echo "killed $_killcount TCP connections to released IP $_IP"
289 ##################################################################
290 # kill off the local end for any TCP connections with the given IP
291 ##################################################################
292 kill_tcp_connections_local_only() {
297 connfile="$CTDB_VARDIR/state/connections.$_IP"
298 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
299 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
301 while read dest src; do
302 srcip=`echo $src | sed -e "s/:[^:]*$//"`
303 srcport=`echo $src | sed -e "s/^.*://"`
304 destip=`echo $dest | sed -e "s/:[^:]*$//"`
305 destport=`echo $dest | sed -e "s/^.*://"`
306 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
307 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
308 _killcount=`expr $_killcount + 1`
312 [ $_failed = 0 ] || {
313 echo "Failed to send killtcp control"
316 [ $_killcount -gt 0 ] || {
320 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
322 _count=`expr $_count + 1`
323 [ $_count -gt 3 ] && {
324 echo "Timed out killing tcp connections for IP $_IP"
328 echo "killed $_killcount TCP connections to released IP $_IP"
331 ##################################################################
332 # tickle any TCP connections with the given IP
333 ##################################################################
334 tickle_tcp_connections() {
339 connfile="$CTDB_VARDIR/state/connections.$_IP"
340 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
341 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
343 while read dest src; do
344 srcip=`echo $src | sed -e "s/:[^:]*$//"`
345 srcport=`echo $src | sed -e "s/^.*://"`
346 destip=`echo $dest | sed -e "s/:[^:]*$//"`
347 destport=`echo $dest | sed -e "s/^.*://"`
348 echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
349 ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
350 echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
351 ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
355 [ $_failed = 0 ] || {
356 echo "Failed to send tickle control"
361 ########################################################
362 # start/stop the nfs service on different platforms
363 ########################################################
366 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
369 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
377 service nfsserver start
380 service nfsserver stop > /dev/null 2>&1
383 echo 0 >/proc/fs/nfsd/threads
384 service nfsserver stop > /dev/null 2>&1
386 service nfsserver start
393 service nfslock start
397 service nfs stop > /dev/null 2>&1
398 service nfslock stop > /dev/null 2>&1
401 echo 0 >/proc/fs/nfsd/threads
402 service nfs stop > /dev/null 2>&1
403 service nfslock stop > /dev/null 2>&1
405 service nfslock start
411 echo "Unknown platform. NFS is not supported with ctdb"
417 ########################################################
418 # start/stop the nfs lockmanager service on different platforms
419 ########################################################
420 startstop_nfslock() {
422 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
425 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
431 # for sles there is no service for lockmanager
432 # so we instead just shutdown/restart nfs
435 service nfsserver start
438 service nfsserver stop > /dev/null 2>&1
441 service nfsserver stop
442 service nfsserver start
449 service nfslock start
452 service nfslock stop > /dev/null 2>&1
456 service nfslock start
461 echo "Unknown platform. NFS locking is not supported with ctdb"
467 # better use delete_ip_from_iface() together with add_ip_to_iface
468 # remove_ip should be removed in future
470 local _ip_maskbits=$1
472 local _ip=`echo "$_ip_maskbits" | cut -d '/' -f1`
473 local _maskbits=`echo "$_ip_maskbits" | cut -d '/' -f2`
475 delete_ip_from_iface "$_iface" "$_ip" "$_maskbits"
484 local _state_dir="$CTDB_VARDIR/state/interface_modify"
485 local _lockfile="$_state_dir/$_iface.flock"
486 local _readd_base="$_state_dir/$_iface.readd.d"
488 mkdir -p $_state_dir || {
490 echo "Failed to mkdir -p $_state_dir - $ret"
494 test -f $_lockfile || {
498 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
502 delete_ip_from_iface()
507 local _state_dir="$CTDB_VARDIR/state/interface_modify"
508 local _lockfile="$_state_dir/$_iface.flock"
509 local _readd_base="$_state_dir/$_iface.readd.d"
511 mkdir -p $_state_dir || {
513 echo "Failed to mkdir -p $_state_dir - $ret"
517 test -f $_lockfile || {
521 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
525 setup_iface_ip_readd_script()
530 local _readd_script=$4
531 local _state_dir="$CTDB_VARDIR/state/interface_modify"
532 local _lockfile="$_state_dir/$_iface.flock"
533 local _readd_base="$_state_dir/$_iface.readd.d"
535 mkdir -p $_state_dir || {
537 echo "Failed to mkdir -p $_state_dir - $ret"
541 test -f $_lockfile || {
545 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
549 ########################################################
550 # some simple logic for counting events - per eventscript
551 # usage: ctdb_counter_init
553 # ctdb_check_counter_limit <limit>
554 # ctdb_check_counter_limit succeeds when count >= <limit>
555 ########################################################
556 _ctdb_counter_common () {
557 _counter_file="$ctdb_fail_dir/$service_name"
558 mkdir -p "${_counter_file%/*}" # dirname
560 ctdb_counter_init () {
565 ctdb_counter_incr () {
569 echo -n 1 >> "$_counter_file"
571 ctdb_check_counter_limit () {
574 _limit="${1:-${service_fail_limit}}"
578 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
579 if [ $_size -ge $_limit ] ; then
580 echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
582 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
583 echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
586 ctdb_check_counter_equal () {
592 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
593 if [ $_size -eq $_limit ] ; then
599 ########################################################
601 ctdb_spool_dir="/var/spool/ctdb"
602 ctdb_status_dir="$ctdb_spool_dir/status"
603 ctdb_fail_dir="$ctdb_spool_dir/failcount"
604 ctdb_active_dir="$ctdb_spool_dir/active"
608 echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
613 if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
614 log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
616 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
617 log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
626 d="$ctdb_status_dir/$script_name"
633 for i in "banned" "unhealthy" ; do
640 ctdb_service_needs_reconfigure ()
642 [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
645 ctdb_service_set_reconfigure ()
647 d="$ctdb_status_dir/$service_name"
652 ctdb_service_unset_reconfigure ()
654 rm -f "$ctdb_status_dir/$service_name/reconfigure"
657 ctdb_service_reconfigure ()
659 echo "Reconfiguring service \"$service_name\"..."
660 if [ -n "$service_reconfigure" ] ; then
661 eval $service_reconfigure
663 service "$service_name" restart
665 ctdb_service_unset_reconfigure
669 ctdb_compat_managed_service ()
671 if [ "$1" = "yes" ] ; then
676 is_ctdb_managed_service ()
678 _service_name="${1:-${service_name}}"
680 t=" $CTDB_MANAGED_SERVICES "
682 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
683 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
684 ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
685 ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
686 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
687 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
688 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
689 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
690 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
692 # Returns 0 if "<space>$_service_name<space>" appears in $t
693 [ "${t#* ${_service_name} }" != "${t}" ]
696 ctdb_start_stop_service ()
698 _service_name="${1:-${service_name}}"
700 [ "$event_name" = "monitor" ] || return 0
702 _active="$ctdb_active_dir/$_service_name"
703 if is_ctdb_managed_service "$_service_name"; then
704 if ! [ -e "$_active" ] ; then
705 echo "Starting service $_service_name"
706 ctdb_service_start || exit $?
707 mkdir -p "$ctdb_active_dir"
712 if [ -e "$_active" ] ; then
713 echo "Stopping service $_service_name"
714 ctdb_service_stop || exit $?
721 ctdb_service_start ()
723 if [ -n "$service_start" ] ; then
724 eval $service_start || return $?
726 service "$service_name" start || return $?
733 if [ -n "$service_stop" ] ; then
736 service "$service_name" stop
740 ctdb_standard_event_handler ()
755 ipv4_host_addr_to_net_addr()
760 local HOST0=$(echo $HOST | awk -F . '{print $4}')
761 local HOST1=$(echo $HOST | awk -F . '{print $3}')
762 local HOST2=$(echo $HOST | awk -F . '{print $2}')
763 local HOST3=$(echo $HOST | awk -F . '{print $1}')
765 local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
767 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
769 local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
771 local NET0=$(( $NET_NUM & 255 ))
772 local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
773 local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
774 local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
776 echo "$NET3.$NET2.$NET1.$NET0"
779 ipv4_maskbits_to_net_mask()
783 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
785 local MASK0=$(( $MASK_NUM & 255 ))
786 local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
787 local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
788 local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
790 echo "$MASK3.$MASK2.$MASK1.$MASK0"
798 local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
799 test -n "$N" && fail=1
801 local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
802 local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
803 local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
804 local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
806 test "$ADDR0" -gt 255 && fail=1
807 test "$ADDR1" -gt 255 && fail=1
808 test "$ADDR2" -gt 255 && fail=1
809 test "$ADDR3" -gt 255 && fail=1
811 test x"$fail" != x"0" && {
812 #echo "IPv4: '$ADDR' is not a valid address"
819 # iptables doesn't like being re-entered, so flock-wrap it.
822 flock -w 30 /var/ctdb/iptables-ctdb.flock /sbin/iptables "$@"
825 ########################################################
827 ########################################################
829 # Temporary directory for tickles.
830 tickledir="$CTDB_VARDIR/state/tickles"
831 mkdir -p "$tickledir"
837 mkdir -p "$tickledir" # Just in case
840 _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
842 # What public IPs do I hold?
843 _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
845 # IPs as a regexp choice
846 _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
848 # Record connections to our public IPs in a temporary file
849 _my_connections="${tickledir}/${_port}.connections"
850 rm -f "$_my_connections"
852 awk -v destpat="^${_ipschoice}:${_port}\$" \
853 '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
854 sort >"$_my_connections"
856 # Record our current tickles in a temporary file
857 _my_tickles="${tickledir}/${_port}.tickles"
860 ctdb -Y gettickles $_i $_port |
861 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
865 # Add tickles for connections that we haven't already got tickles for
866 comm -23 "$_my_connections" "$_my_tickles" |
867 while read _src _dst ; do
868 ctdb addtickle $_src $_dst
871 # Remove tickles for connections that are no longer there
872 comm -13 "$_my_connections" "$_my_tickles" |
873 while read _src _dst ; do
874 ctdb deltickle $_src $_dst
877 rm -f "$_my_connections" "$_my_tickles"
880 ########################################################
881 # load a site local config file
882 ########################################################
884 [ -x $CTDB_BASE/rc.local ] && {
885 . $CTDB_BASE/rc.local
888 [ -d $CTDB_BASE/rc.local.d ] && {
889 for i in $CTDB_BASE/rc.local.d/* ; do
890 [ -x "$i" ] && . "$i"
894 script_name="${0##*/}" # basename
895 service_name="$script_name" # default is just the script name