Eventscripts: clean up 60.nfs monitor event.
[ctdb.git] / config / functions
1 # Hey Emacs, this is a -*- shell-script -*- !!!
2
3 # utility functions for ctdb event scripts
4
5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
6
7 [ -z "$CTDB_VARDIR" ] && {
8     export CTDB_VARDIR="/var/ctdb"
9 }
10 [ -z "$CTDB_ETCDIR" ] && {
11     export CTDB_ETCDIR="/etc"
12 }
13
14 #######################################
15 # pull in a system config file, if any
16 _loadconfig() {
17
18     if [ -z "$1" ] ; then
19         foo="${service_config:-${service_name}}"
20         if [ -n "$foo" ] ; then
21             loadconfig "$foo"
22         fi
23     elif [ "$1" != "ctdb" ] ; then
24         loadconfig "ctdb"
25     fi
26
27     if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
28         . $CTDB_ETCDIR/sysconfig/$1
29     elif [ -f $CTDB_ETCDIR/default/$1 ]; then
30         . $CTDB_ETCDIR/default/$1
31     elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
32         . $CTDB_BASE/sysconfig/$1
33     fi
34 }
35
36 loadconfig () {
37     _loadconfig "$@"
38 }
39
40 ##############################################################
41 # determine on what type of system (init style) we are running
42 detect_init_style() {
43     # only do detection if not already set:
44     test "x$CTDB_INIT_STYLE" != "x" && return
45
46     if [ -x /sbin/startproc ]; then
47         CTDB_INIT_STYLE="suse"
48     elif [ -x /sbin/start-stop-daemon ]; then
49         CTDB_INIT_STYLE="debian"
50     else
51         CTDB_INIT_STYLE="redhat"
52     fi
53 }
54
55 ######################################################
56 # simulate /sbin/service on platforms that don't have it
57 # _service() makes it easier to hook the service() function for
58 # testing.
59 _service ()
60 {
61   _service_name="$1"
62   _op="$2"
63
64   # do nothing, when no service was specified
65   [ -z "$_service_name" ] && return
66
67   if [ -x /sbin/service ]; then
68       $_nice /sbin/service "$_service_name" "$_op"
69   elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
70       $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
71   elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
72       $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
73   fi
74 }
75
76 service()
77 {
78     _nice=""
79     _service "$@"
80 }
81
82 ######################################################
83 # simulate /sbin/service (niced) on platforms that don't have it
84 nice_service()
85 {
86     _nice="nice"
87     _service "$@"
88 }
89
90 ######################################################
91 # wrapper around /proc/ settings to allow them to be hooked
92 # for testing
93 # 1st arg is relative path under /proc/, 2nd arg is value to set
94 set_proc ()
95 {
96     echo "$2" >"/proc/$1"
97 }
98
99 ######################################################
100 # wrapper around getting file contents from /proc/ to allow
101 # this to be hooked for testing
102 # 1st arg is relative path under /proc/
103 get_proc ()
104 {
105     cat "/proc/$1"
106 }
107
108 ######################################################
109 # Check that an RPC service is healthy -
110 # this includes allowing a certain number of failures
111 # before marking the NFS service unhealthy.
112 #
113 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
114 #
115 # each triple is a set of 3 arguments: an operator, a 
116 # fail count limit and an action string.
117 #
118 # For example:
119 #
120 #       nfs_check_rpc_service "lockd" \
121 #           -ge 15 "verbose restart unhealthy" \
122 #           -eq 10 "restart:bs"
123 #
124 # says that if lockd is down for 15 iterations then do
125 # a verbose restart of lockd and mark the node unhealthy.
126 # Before this, after 10 iterations of failure, the
127 # service is restarted silently in the background.
128 # Order is important: the number of failures need to be
129 # specified in reverse order because processing stops
130 # after the first condition that is true.
131 ######################################################
132 nfs_check_rpc_service ()
133 {
134     _prog_name="$1" ; shift
135
136     _version=1
137     _rpc_prog="$_prog_name"
138     _restart=""
139     _opts=""
140     case "$_prog_name" in
141         knfsd)
142             _rpc_prog=nfs
143             _version=3
144             _restart="echo 'Trying to restart NFS service'"
145             _restart="${_restart}; startstop_nfs restart"
146             ;;
147         mountd)
148             _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
149             ;;
150         rquotad)
151             _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
152             ;;
153         lockd)
154             _rpc_prog=nlockmgr
155             _version=4
156             _restart="echo 'Trying to restart lock manager service'"
157             _restart="${_restart}; startstop_nfs restart"
158             _restart="${_restart}; startstop_nfslock restart"
159             ;;
160         statd)
161             _rpc_prog=status
162             _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
163             _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
164             _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
165             ;;
166         *)
167             echo "Internal error: unknown RPC program \"$_prog_name\"."
168             exit 1
169     esac
170
171     _service_name="nfs_${_prog_name}"
172
173     if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
174         ctdb_counter_init "$_service_name"
175         return 0
176     fi
177
178     ctdb_counter_incr "$_service_name"
179
180     while [ -n "$3" ] ; do
181         ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
182             for _action in $3 ; do
183                 case "$_action" in
184                     verbose)
185                         echo "$ctdb_check_rpc_out"
186                         ;;
187                     restart|restart:*)
188                         # No explicit command specified, construct rpc command.
189                         if [ -z "$_restart" ] ; then
190                             _p="rpc.${_prog_name}"
191                             _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
192                             _restart="${_restart}; killall -q -9 $_p"
193                             _restart="${_restart}; $_p $_opts"
194                         fi
195
196                         # Process restart flags...
197                         _flags="${_action#restart:}"
198                         # There may not have been a colon...
199                         [ "$_flags" != "$_action" ] || _flags=""
200                         # q=quiet - everything to /dev/null
201                         if [ "${_flags#*q}" != "$_flags" ] ; then
202                             _restart="{ ${_restart} ; } >/dev/null 2>&1"
203                         fi
204                         # s=stealthy - last command to /dev/null
205                         if [ "${_flags#*s}" != "$_flags" ] ; then
206                             _restart="${_restart} >/dev/null 2>&1"
207                         fi
208                         # b=background - the whole thing, easy and reliable
209                         if [ "${_flags#*b}" != "$_flags" ] ; then
210                             _restart="{ ${_restart} ; } &"
211                         fi
212
213                         # Do it!
214                         eval "${_restart}"
215                         ;;
216                     unhealthy)
217                         exit 1
218                         ;;
219                     *)
220                         echo "Internal error: unknown action \"$_action\"."
221                         exit 1
222                 esac
223             done
224
225             # Only process the first action group.
226             break
227         }
228         shift 3
229     done
230 }
231
232 ######################################################
233 # check that a rpc server is registered with portmap
234 # and responding to requests
235 # usage: ctdb_check_rpc SERVICE_NAME VERSION
236 ######################################################
237 ctdb_check_rpc ()
238 {
239     progname="$1"
240     version="$2"
241
242     if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
243         ctdb_check_rpc_out="ERROR: $progname failed RPC check:
244 $ctdb_check_rpc_out"
245         echo "$ctdb_check_rpc_out"
246         return 1
247     fi
248 }
249
250 ######################################################
251 # check a set of directories is available
252 # return 1 on a missing directory
253 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
254 ######################################################
255 ctdb_check_directories_probe() {
256     while IFS="" read d ; do
257         case "$d" in
258             *%*)
259                 continue
260                 ;;
261             *)
262                 [ -d "${d}/." ] || return 1
263         esac
264     done
265 }
266
267 ######################################################
268 # check a set of directories is available
269 # usage: ctdb_check_directories SERVICE_NAME <directories...>
270 ######################################################
271 ctdb_check_directories() {
272     n="${1:-${service_name}}"
273     ctdb_check_directories_probe || {
274         echo "ERROR: $n directory \"$d\" not available"
275         exit 1
276     }
277 }
278
279 ######################################################
280 # check a set of tcp ports
281 # usage: ctdb_check_tcp_ports <ports...>
282 ######################################################
283 ctdb_check_tcp_ports() {
284
285     for p ; do
286         if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
287             if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
288                 echo "ERROR: $service_name tcp port $p is not responding"
289                 return 1
290             fi
291         fi
292     done
293 }
294
295 ######################################################
296 # check a unix socket
297 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
298 ######################################################
299 ctdb_check_unix_socket() {
300     socket_path="$1"
301     [ -z "$socket_path" ] && return
302
303     if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
304         echo "ERROR: $service_name socket $socket_path not found"
305         return 1
306     fi
307 }
308
309 ######################################################
310 # check a command returns zero status
311 # usage: ctdb_check_command SERVICE_NAME <command>
312 ######################################################
313 ctdb_check_command() {
314   service_name="$1"
315   wait_cmd="$2"
316   [ -z "$wait_cmd" ] && return;
317   $wait_cmd > /dev/null 2>&1 || {
318       echo "ERROR: $service_name - $wait_cmd returned error"
319       exit 1
320   }
321 }
322
323 ################################################
324 # kill off any TCP connections with the given IP
325 ################################################
326 kill_tcp_connections() {
327     _IP="$1"    
328     _failed=0
329
330     _killcount=0
331     connfile="$CTDB_VARDIR/state/connections.$_IP"
332     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
333     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
334
335     while read dest src; do
336         srcip=`echo $src | sed -e "s/:[^:]*$//"`
337         srcport=`echo $src | sed -e "s/^.*://"`
338         destip=`echo $dest | sed -e "s/:[^:]*$//"`
339         destport=`echo $dest | sed -e "s/^.*://"`
340         echo "Killing TCP connection $srcip:$srcport $destip:$destport"
341         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
342         case $destport in
343           # we only do one-way killtcp for CIFS
344           139|445) : ;;
345           # for all others we do 2-way
346           *) 
347                 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
348                 ;;
349         esac
350         _killcount=`expr $_killcount + 1`
351      done < $connfile
352     rm -f $connfile
353
354     [ $_failed = 0 ] || {
355         echo "Failed to send killtcp control"
356         return;
357     }
358     [ $_killcount -gt 0 ] || {
359         return;
360     }
361     _count=0
362     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
363         sleep 1
364         _count=`expr $_count + 1`
365         [ $_count -gt 3 ] && {
366             echo "Timed out killing tcp connections for IP $_IP"
367             return;
368         }
369     done
370     echo "killed $_killcount TCP connections to released IP $_IP"
371 }
372
373 ##################################################################
374 # kill off the local end for any TCP connections with the given IP
375 ##################################################################
376 kill_tcp_connections_local_only() {
377     _IP="$1"    
378     _failed=0
379
380     _killcount=0
381     connfile="$CTDB_VARDIR/state/connections.$_IP"
382     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
383     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
384
385     while read dest src; do
386         srcip=`echo $src | sed -e "s/:[^:]*$//"`
387         srcport=`echo $src | sed -e "s/^.*://"`
388         destip=`echo $dest | sed -e "s/:[^:]*$//"`
389         destport=`echo $dest | sed -e "s/^.*://"`
390         echo "Killing TCP connection $srcip:$srcport $destip:$destport"
391         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
392         _killcount=`expr $_killcount + 1`
393      done < $connfile
394     rm -f $connfile
395
396     [ $_failed = 0 ] || {
397         echo "Failed to send killtcp control"
398         return;
399     }
400     [ $_killcount -gt 0 ] || {
401         return;
402     }
403     _count=0
404     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
405         sleep 1
406         _count=`expr $_count + 1`
407         [ $_count -gt 3 ] && {
408             echo "Timed out killing tcp connections for IP $_IP"
409             return;
410         }
411     done
412     echo "killed $_killcount TCP connections to released IP $_IP"
413 }
414
415 ##################################################################
416 # tickle any TCP connections with the given IP
417 ##################################################################
418 tickle_tcp_connections() {
419     _IP="$1"
420     _failed=0
421
422     _killcount=0
423     connfile="$CTDB_VARDIR/state/connections.$_IP"
424     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
425     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
426
427     while read dest src; do
428         srcip=`echo $src | sed -e "s/:[^:]*$//"`
429         srcport=`echo $src | sed -e "s/^.*://"`
430         destip=`echo $dest | sed -e "s/:[^:]*$//"`
431         destport=`echo $dest | sed -e "s/^.*://"`
432         echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
433         ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
434         echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
435         ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
436      done < $connfile
437     rm -f $connfile
438
439     [ $_failed = 0 ] || {
440         echo "Failed to send tickle control"
441         return;
442     }
443 }
444
445 ########################################################
446 # start/stop the nfs service on different platforms
447 ########################################################
448 startstop_nfs() {
449         PLATFORM="unknown"
450         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
451                 PLATFORM="sles"
452         }
453         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
454                 PLATFORM="rhel"
455         }
456
457         case $PLATFORM in
458         sles)
459                 case $1 in
460                 start)
461                         service nfsserver start
462                         ;;
463                 stop)
464                         service nfsserver stop > /dev/null 2>&1
465                         ;;
466                 restart)
467                         set_proc "fs/nfsd/threads" 0
468                         service nfsserver stop > /dev/null 2>&1
469                         pkill -9 nfsd
470                         service nfsserver start
471                         ;;
472                 esac
473                 ;;
474         rhel)
475                 case $1 in
476                 start)
477                         service nfslock start
478                         service nfs start
479                         ;;
480                 stop)
481                         service nfs stop
482                         service nfslock stop
483                         ;;
484                 restart)
485                         set_proc "fs/nfsd/threads" 0
486                         service nfs stop > /dev/null 2>&1
487                         service nfslock stop > /dev/null 2>&1
488                         pkill -9 nfsd
489                         service nfslock start
490                         service nfs start
491                         ;;
492                 esac
493                 ;;
494         *)
495                 echo "Unknown platform. NFS is not supported with ctdb"
496                 exit 1
497                 ;;
498         esac
499 }
500
501 ########################################################
502 # start/stop the nfs lockmanager service on different platforms
503 ########################################################
504 startstop_nfslock() {
505         PLATFORM="unknown"
506         [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
507                 PLATFORM="sles"
508         }
509         [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
510                 PLATFORM="rhel"
511         }
512
513         case $PLATFORM in
514         sles)
515                 # for sles there is no service for lockmanager
516                 # so we instead just shutdown/restart nfs
517                 case $1 in
518                 start)
519                         service nfsserver start
520                         ;;
521                 stop)
522                         service nfsserver stop > /dev/null 2>&1
523                         ;;
524                 restart)
525                         service nfsserver stop
526                         service nfsserver start
527                         ;;
528                 esac
529                 ;;
530         rhel)
531                 case $1 in
532                 start)
533                         service nfslock start
534                         ;;
535                 stop)
536                         service nfslock stop > /dev/null 2>&1
537                         ;;
538                 restart)
539                         service nfslock stop
540                         service nfslock start
541                         ;;
542                 esac
543                 ;;
544         *)
545                 echo "Unknown platform. NFS locking is not supported with ctdb"
546                 exit 1
547                 ;;
548         esac
549 }
550
551 add_ip_to_iface()
552 {
553         local _iface=$1
554         local _ip=$2
555         local _maskbits=$3
556         local _state_dir="$CTDB_VARDIR/state/interface_modify"
557         local _lockfile="$_state_dir/$_iface.flock"
558         local _readd_base="$_state_dir/$_iface.readd.d"
559
560         mkdir -p $_state_dir || {
561                 ret=$?
562                 echo "Failed to mkdir -p $_state_dir - $ret"
563                 return $ret
564         }
565
566         test -f $_lockfile || {
567                 touch $_lockfile
568         }
569
570         flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
571         return $?
572 }
573
574 delete_ip_from_iface()
575 {
576         local _iface=$1
577         local _ip=$2
578         local _maskbits=$3
579         local _state_dir="$CTDB_VARDIR/state/interface_modify"
580         local _lockfile="$_state_dir/$_iface.flock"
581         local _readd_base="$_state_dir/$_iface.readd.d"
582
583         mkdir -p $_state_dir || {
584                 ret=$?
585                 echo "Failed to mkdir -p $_state_dir - $ret"
586                 return $ret
587         }
588
589         test -f $_lockfile || {
590                 touch $_lockfile
591         }
592
593         flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
594         return $?
595 }
596
597 setup_iface_ip_readd_script()
598 {
599         local _iface=$1
600         local _ip=$2
601         local _maskbits=$3
602         local _readd_script=$4
603         local _state_dir="$CTDB_VARDIR/state/interface_modify"
604         local _lockfile="$_state_dir/$_iface.flock"
605         local _readd_base="$_state_dir/$_iface.readd.d"
606
607         mkdir -p $_state_dir || {
608                 ret=$?
609                 echo "Failed to mkdir -p $_state_dir - $ret"
610                 return $ret
611         }
612
613         test -f $_lockfile || {
614                 touch $_lockfile
615         }
616
617         flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
618         return $?
619 }
620
621 ########################################################
622 # some simple logic for counting events - per eventscript
623 # usage: ctdb_counter_init
624 #        ctdb_counter_incr
625 #        ctdb_check_counter_limit <limit>
626 # ctdb_check_counter_limit succeeds when count >= <limit>
627 ########################################################
628 _ctdb_counter_common () {
629     _service_name="${1:-${service_name}}"
630     _counter_file="$ctdb_fail_dir/$_service_name"
631     mkdir -p "${_counter_file%/*}" # dirname
632 }
633 ctdb_counter_init () {
634     _ctdb_counter_common "$1"
635
636     >"$_counter_file"
637 }
638 ctdb_counter_incr () {
639     _ctdb_counter_common "$1"
640
641     # unary counting!
642     echo -n 1 >> "$_counter_file"
643 }
644 ctdb_check_counter_limit () {
645     _ctdb_counter_common
646
647     _limit="${1:-${service_fail_limit}}"
648     _quiet="$2"
649
650     # unary counting!
651     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
652     if [ $_size -ge $_limit ] ; then
653         echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
654         exit 1
655     elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
656         echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
657     fi
658 }
659 ctdb_check_counter_equal () {
660     _ctdb_counter_common
661
662     _limit=$1
663
664     # unary counting!
665     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
666     if [ $_size -eq $_limit ] ; then
667         return 1
668     fi
669     return 0
670 }
671 ctdb_check_counter () {
672     _msg="${1:-error}"  # "error"  - anything else is silent on fail
673     _op="${2:--ge}"  # an integer operator supported by test
674     _limit="${3:-${service_fail_limit}}"
675     shift 3
676     _ctdb_counter_common "$1"
677
678     # unary counting!
679     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
680     if [ $_size $_op $_limit ] ; then
681         if [ "$_msg" = "error" ] ; then
682             echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
683             exit 1              
684         else
685             return 1
686         fi
687     fi
688 }
689
690 ########################################################
691
692 ctdb_status_dir="$CTDB_VARDIR/status"
693 ctdb_fail_dir="$CTDB_VARDIR/failcount"
694
695 ctdb_setup_service_state_dir ()
696 {
697     service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
698     mkdir -p "$service_state_dir" || {
699         echo "Error creating state dir \"$service_state_dir\""
700         exit 1
701     }
702 }
703
704 ########################################################
705 # Managed status history, for auto-start/stop
706
707 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
708
709 _ctdb_managed_common ()
710 {
711     _service_name="${1:-${service_name}}"
712     _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
713 }
714
715 ctdb_service_managed ()
716 {
717     _ctdb_managed_common "$@"
718     mkdir -p "$ctdb_managed_dir"
719     touch "$_ctdb_managed_file"
720 }
721
722 ctdb_service_unmanaged ()
723 {
724     _ctdb_managed_common "$@"
725     rm -f "$_ctdb_managed_file"
726 }
727
728 is_ctdb_previously_managed_service ()
729 {
730     _ctdb_managed_common "$@"
731     [ -f "$_ctdb_managed_file" ]
732 }
733
734 ########################################################
735 # Check and set status
736
737 log_status_cat ()
738 {
739     echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
740 }
741
742 ctdb_checkstatus ()
743 {
744     if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
745         log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
746         return 1
747     elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
748         log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
749         return 2
750     else
751         return 0
752     fi
753 }
754
755 ctdb_setstatus ()
756 {
757     d="$ctdb_status_dir/$script_name"
758     case "$1" in
759         unhealthy|banned)
760             mkdir -p "$d"
761             cat "$2" >"$d/$1"
762             ;;
763         *)
764             for i in "banned" "unhealthy" ; do
765                 rm -f "$d/$i"
766             done
767             ;;
768     esac
769 }
770
771 ##################################################################
772 # Reconfigure a service on demand
773
774 _ctdb_service_reconfigure_common ()
775 {
776     _d="$ctdb_status_dir/${1:-${service_name}}"
777     mkdir -p "$_d"
778     _ctdb_service_reconfigure_flag="$_d/reconfigure"
779 }
780
781 ctdb_service_needs_reconfigure ()
782 {
783     _ctdb_service_reconfigure_common "$@"
784     [ -e "$_ctdb_service_reconfigure_flag" ]
785 }
786
787 ctdb_service_set_reconfigure ()
788 {
789     _ctdb_service_reconfigure_common "$@"
790     >"$_ctdb_service_reconfigure_flag"
791 }
792
793 ctdb_service_unset_reconfigure ()
794 {
795     _ctdb_service_reconfigure_common "$@"
796     rm -f "$_ctdb_service_reconfigure_flag"
797 }
798
799 ctdb_service_reconfigure ()
800 {
801     echo "Reconfiguring service \"$service_name\"..."
802     ctdb_service_unset_reconfigure "$@"
803     service_reconfigure "$@" || return $?
804     ctdb_counter_init "$@"
805 }
806
807 # Default service_reconfigure() function.
808 service_reconfigure ()
809 {
810     service "${1:-$service_name}" restart
811 }
812
813 ctdb_service_check_reconfigure ()
814 {
815     # Only do this for certain events.
816     case "$event_name" in
817         monitor|ipreallocated) : ;;
818         *) return 0
819     esac
820
821     if ctdb_service_needs_reconfigure "$@" ; then
822         ctdb_service_reconfigure "$@"
823
824         # Fall through to non-monitor events.
825         [ "$event_name" = "monitor" ] || return 0
826
827         # We don't want to proceed with the rest of the monitor event
828         # here, so we exit.  However, if we exit 0 then, if the
829         # service was previously broken, we might return a false
830         # positive.  So we simply retrieve the status of this script
831         # from the previous monitor loop and exit with that status.
832         ctdb scriptstatus | \
833             grep -q -E "^${script_name}[[:space:]]+Status:OK[[:space:]]"
834         exit $?
835     fi
836 }
837
838 ##################################################################
839 # Does CTDB manage this service? - and associated auto-start/stop
840
841 ctdb_compat_managed_service ()
842 {
843     if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
844         CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
845     fi
846 }
847
848 is_ctdb_managed_service ()
849 {
850     _service_name="${1:-${service_name}}"
851
852     # $t is used just for readability and to allow better accurate
853     # matching via leading/trailing spaces
854     t=" $CTDB_MANAGED_SERVICES "
855
856     # Return 0 if "<space>$_service_name<space>" appears in $t
857     if [ "${t#* ${_service_name} }" != "${t}" ] ; then
858         return 0
859     fi
860
861     # If above didn't match then update $CTDB_MANAGED_SERVICES for
862     # backward compatibility and try again.
863     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
864     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
865     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
866     ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND"  "winbind"
867     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
868     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
869     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
870     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs"
871     ctdb_compat_managed_service "$CTDB_MANAGES_NFS"      "nfs-ganesha-gpfs"
872
873     t=" $CTDB_MANAGED_SERVICES "
874
875     # Return 0 if "<space>$_service_name<space>" appears in $t
876     [ "${t#* ${_service_name} }" != "${t}" ]
877 }
878
879 ctdb_start_stop_service ()
880 {
881     _service_name="${1:-${service_name}}"
882
883     [ "$event_name" = "monitor" ] || return 0
884
885     if is_ctdb_managed_service "$_service_name" ; then
886         if ! is_ctdb_previously_managed_service "$_service_name" ; then
887             echo "Starting service \"$_service_name\" - now managed"
888             ctdb_service_start "$_service_name"
889             exit $?
890         fi
891     else
892         if is_ctdb_previously_managed_service "$_service_name" ; then
893             echo "Stopping service \"$_service_name\" - no longer managed"
894             ctdb_service_stop "$_service_name"
895             exit $?
896         fi
897     fi
898 }
899
900 ctdb_service_start ()
901 {
902     # The service is marked managed if we've ever tried to start it.
903     ctdb_service_managed "$@"
904
905     # Here we only want $1.  If no argument is passed then
906     # service_start needs to know.
907     service_start "$@" || return $?
908
909     ctdb_counter_init "$@"
910 }
911
912 ctdb_service_stop ()
913 {
914     ctdb_service_unmanaged "$@"
915     service_stop "$@"
916 }
917
918 # Default service_start() and service_stop() functions.
919  
920 # These may be overridden in an eventscript.  When overriding, the
921 # following convention must be followed.  If these functions are
922 # called with no arguments then they may use internal logic to
923 # determine whether the service is managed and, therefore, whether
924 # they should take any action.  However, if the service name is
925 # specified as an argument then an attempt must be made to start or
926 # stop the service.  This is because the auto-start/stop code calls
927 # them with the service name as an argument.
928 service_start ()
929 {
930     service "${1:-${service_name}}" start
931 }
932
933 service_stop ()
934 {
935     service "${1:-${service_name}}" stop
936 }
937
938 ##################################################################
939
940 ctdb_standard_event_handler ()
941 {
942     case "$1" in
943         status)
944             ctdb_checkstatus
945             exit
946             ;;
947         setstatus)
948             shift
949             ctdb_setstatus "$@"
950             exit
951             ;;
952     esac
953 }
954
955 ipv4_host_addr_to_net_addr()
956 {
957         local HOST=$1
958         local MASKBITS=$2
959
960         local HOST0=$(echo $HOST | awk -F . '{print $4}')
961         local HOST1=$(echo $HOST | awk -F . '{print $3}')
962         local HOST2=$(echo $HOST | awk -F . '{print $2}')
963         local HOST3=$(echo $HOST | awk -F . '{print $1}')
964
965         local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
966
967         local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
968
969         local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
970
971         local NET0=$(( $NET_NUM & 255 ))
972         local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
973         local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
974         local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
975
976         echo "$NET3.$NET2.$NET1.$NET0"
977 }
978
979 ipv4_maskbits_to_net_mask()
980 {
981         local MASKBITS=$1
982
983         local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
984
985         local MASK0=$(( $MASK_NUM & 255 ))
986         local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
987         local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
988         local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
989
990         echo "$MASK3.$MASK2.$MASK1.$MASK0"
991 }
992
993 ipv4_is_valid_addr()
994 {
995         local ADDR=$1
996         local fail=0
997
998         local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
999         test -n "$N" && fail=1
1000
1001         local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
1002         local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
1003         local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
1004         local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
1005
1006         test "$ADDR0" -gt 255 && fail=1
1007         test "$ADDR1" -gt 255 && fail=1
1008         test "$ADDR2" -gt 255 && fail=1
1009         test "$ADDR3" -gt 255 && fail=1
1010
1011         test x"$fail" != x"0" && {
1012                 #echo "IPv4: '$ADDR' is not a valid address"
1013                 return 1;
1014         }
1015
1016         return 0;
1017 }
1018
1019 # iptables doesn't like being re-entered, so flock-wrap it.
1020 iptables()
1021 {
1022         flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1023 }
1024
1025 ########################################################
1026 # tickle handling
1027 ########################################################
1028
1029 # Temporary directory for tickles.
1030 tickledir="$CTDB_VARDIR/state/tickles"
1031 mkdir -p "$tickledir"
1032
1033 update_tickles ()
1034 {
1035         _port="$1"
1036
1037         mkdir -p "$tickledir" # Just in case
1038
1039         # Who am I?
1040         _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1041
1042         # What public IPs do I hold?
1043         _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1044
1045         # IPs as a regexp choice
1046         _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1047
1048         # Record connections to our public IPs in a temporary file
1049         _my_connections="${tickledir}/${_port}.connections"
1050         rm -f "$_my_connections"
1051         netstat -tn |
1052         awk -v destpat="^${_ipschoice}:${_port}\$" \
1053           '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1054         sort >"$_my_connections"
1055
1056         # Record our current tickles in a temporary file
1057         _my_tickles="${tickledir}/${_port}.tickles"
1058         rm -f "$_my_tickles"
1059         for _i in $_ips ; do
1060                 ctdb -Y gettickles $_i $_port | 
1061                 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1062         done |
1063         sort >"$_my_tickles"
1064
1065         # Add tickles for connections that we haven't already got tickles for
1066         comm -23 "$_my_connections" "$_my_tickles" |
1067         while read _src _dst ; do
1068                 ctdb addtickle $_src $_dst
1069         done
1070
1071         # Remove tickles for connections that are no longer there
1072         comm -13 "$_my_connections" "$_my_tickles" |
1073         while read _src _dst ; do
1074                 ctdb deltickle $_src $_dst
1075         done
1076
1077         rm -f "$_my_connections" "$_my_tickles" 
1078 }
1079
1080 ########################################################
1081 # load a site local config file
1082 ########################################################
1083
1084 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1085         . "$CTDB_RC_LOCAL"
1086 }
1087
1088 [ -x $CTDB_BASE/rc.local ] && {
1089         . $CTDB_BASE/rc.local
1090 }
1091
1092 [ -d $CTDB_BASE/rc.local.d ] && {
1093         for i in $CTDB_BASE/rc.local.d/* ; do
1094                 [ -x "$i" ] && . "$i"
1095         done
1096 }
1097
1098 script_name="${0##*/}"       # basename
1099 service_name="$script_name"  # default is just the script name
1100 service_fail_limit=1
1101 event_name="$1"