Now vaguely tested initscript updates.
[vlendec/samba-autobuild/.git] / ctdb / config / functions
1 # utility functions for ctdb event scripts
2
3 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
4
5 #######################################
6 # pull in a system config file, if any
7 loadconfig() {
8     name="$1"
9
10     if [ "$name" != "ctdb" ] ; then
11         loadconfig "ctdb"
12     fi
13     if [ -z "$name" ] ; then
14         foo="${service_config:-${service_name}}"
15         if [ -n "$foo" ] ; then
16             loadconfig "$foo"
17         fi
18     fi
19
20     if [ -f /etc/sysconfig/$name ]; then
21         . /etc/sysconfig/$name
22     elif [ -f /etc/default/$name ]; then
23         . /etc/default/$name
24     elif [ -f $CTDB_BASE/sysconfig/$name ]; then
25         . $CTDB_BASE/sysconfig/$name
26     fi
27 }
28
29 ##############################################################
30 # determine on what type of system (init style) we are running
31 detect_init_style() {
32     # only do detection if not already set:
33     test "x$CTDB_INIT_STYLE" != "x" && return
34
35     if [ -x /sbin/startproc ]; then
36         CTDB_INIT_STYLE="suse"
37     elif [ -x /sbin/start-stop-daemon ]; then
38         CTDB_INIT_STYLE="debian"
39     else
40         CTDB_INIT_STYLE="redhat"
41     fi
42 }
43
44 ######################################################
45 # simulate /sbin/service on platforms that don't have it
46 service() { 
47   _service_name="$1"
48   _op="$2"
49
50   # do nothing, when no service was specified
51   test "x$_service_name" = "x" && return
52
53   if [ -x /sbin/service ]; then
54       /sbin/service "$_service_name" "$_op"
55   elif [ -x /etc/init.d/$_service_name ]; then
56       /etc/init.d/$_service_name "$_op"
57   elif [ -x /etc/rc.d/init.d/$_service_name ]; then
58       /etc/rc.d/init.d/$_service_name "$_op"
59   fi
60 }
61
62 ######################################################
63 # simulate /sbin/service (niced) on platforms that don't have it
64 nice_service() { 
65     nice service "$@"
66 }
67
68 ######################################################
69 # wait for a command to return a zero exit status
70 # usage: ctdb_wait_command SERVICE_NAME <command>
71 ######################################################
72 ctdb_wait_command() {
73   service_name="$1"
74   wait_cmd="$2"
75   [ -z "$wait_cmd" ] && return;
76   all_ok=0
77   echo "Waiting for service $service_name to start"
78   while [ $all_ok -eq 0 ]; do
79           $wait_cmd > /dev/null 2>&1 && all_ok=1
80           ctdb status > /dev/null 2>&1 || {
81                 echo "ctdb daemon has died. Exiting wait for $service_name"
82                 exit 1
83           }
84           [ $all_ok -eq 1 ] || sleep 1
85   done
86   echo "Local service $service_name is up"
87 }
88
89
90 ######################################################
91 # wait for a set of tcp ports
92 # usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
93 ######################################################
94 ctdb_wait_tcp_ports() {
95   service_name="$1"
96   shift
97   wait_ports="$*"
98   [ -z "$wait_ports" ] && return;
99   all_ok=0
100   echo "Waiting for tcp service $service_name to start"
101   while [ $all_ok -eq 0 ]; do
102           all_ok=1
103           for p in $wait_ports; do
104               if [ -x /usr/bin/netcat ]; then
105                   /usr/bin/netcat -z 127.0.0.1 $p > /dev/null || all_ok=0
106               elif [ -x /usr/bin/nc ]; then
107                   /usr/bin/nc -z 127.0.0.1 $p > /dev/null || all_ok=0
108               elif [ -x /usr/bin/netstat ]; then
109                   (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
110               elif [ -x /bin/netstat ]; then
111                   (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
112               else 
113                   echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
114                   return 127
115               fi
116           done
117           [ $all_ok -eq 1 ] || sleep 1
118           ctdb status > /dev/null 2>&1 || {
119                 echo "ctdb daemon has died. Exiting tcp wait $service_name"
120                 return 1
121           }
122   done
123   echo "Local tcp services for $service_name are up"
124 }
125
126
127
128 ######################################################
129 # wait for a set of directories
130 # usage: ctdb_wait_directories SERVICE_NAME <directories...>
131 ######################################################
132 ctdb_wait_directories() {
133   service_name="$1"
134   shift
135   wait_dirs="$*"
136   [ -z "$wait_dirs" ] && return;
137   all_ok=0
138   echo "Waiting for local directories for $service_name"
139   while [ $all_ok -eq 0 ]; do
140           all_ok=1
141           for d in $wait_dirs; do
142               [ -d $d ] || all_ok=0
143           done
144           [ $all_ok -eq 1 ] || sleep 1
145           ctdb status > /dev/null 2>&1 || {
146                 echo "ctdb daemon has died. Exiting directory wait for $service_name"
147                 exit 1
148           }
149   done
150   echo "Local directories for $service_name are available"
151 }
152
153
154 ######################################################
155 # check that a rpc server is registered with portmap
156 # and responding to requests
157 # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
158 ######################################################
159 ctdb_check_rpc() {
160     service_name="$1"
161     prognum="$2"
162     version="$3"
163     rpcinfo -u localhost $prognum $version > /dev/null || {
164             echo "ERROR: $service_name not responding to rpc requests"
165             exit 1
166     }
167 }
168
169 ######################################################
170 # check a set of directories is available
171 # return 1 on a missing directory
172 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
173 ######################################################
174 ctdb_check_directories_probe() {
175   service_name="$1"
176   shift
177   for d ; do
178       case "$d" in
179           *%*)
180               continue
181               ;;
182           *)
183               [ -d "$d" ] || return 1
184       esac
185   done
186   return 0
187 }
188
189 ######################################################
190 # check a set of directories is available
191 # usage: ctdb_check_directories SERVICE_NAME <directories...>
192 ######################################################
193 ctdb_check_directories() {
194   # Note: ctdb_check_directories_probe sets both $service_name and $d.
195   ctdb_check_directories_probe "$@" || {
196       echo "ERROR: $service_name directory $d not available"
197       exit 1
198   }
199 }
200
201 ######################################################
202 # check a set of tcp ports
203 # usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
204 ######################################################
205 ctdb_check_tcp_ports() {
206   [ -z "$1" ] && return;
207
208   # check availability of netcat or netstat first
209   NETCAT=""
210   NETSTAT=""
211   if [ -x /usr/bin/netstat ]; then
212       NETSTAT=/usr/bin/netstat
213   elif [ -x /bin/netstat ]; then
214       NETSTAT=/bin/netstat
215   elif [ -x /usr/bin/netcat ]; then
216       NETCAT=/usr/bin/netcat
217   elif [ -x /bin/netcat ]; then
218       NETCAT=/bin/netcat
219   elif [ -x /usr/bin/nc ]; then
220       NETCAT=/usr/bin/nc
221   elif [ -x /bin/nc ]; then
222       NETCAT=/bin/nc
223   fi
224
225   for p ; do
226       all_ok=1
227
228       if [ "x${NETCAT}" != "x" ]; then
229           ${NETCAT} -z 127.0.0.1 $p > /dev/null || all_ok=0
230       elif [ "x${NETSTAT}" != "x" ]; then
231           if ! ${NETSTAT} -a -n | egrep "0.0.0.0:$p .*LISTEN" > /dev/null ; then
232               if ! ${NETSTAT} -a -n | egrep ":::$p .*LISTEN" > /dev/null ; then
233                   all_ok=0
234               fi
235           fi
236       else
237           echo "ERROR: neither netcat (or nc) nor netstat found!"
238           echo "ERROR: can't monitor ${service_name} tcp port ${p}"
239           all_ok=0
240       fi
241
242       [ $all_ok -eq 1 ] || {
243           echo "ERROR: $service_name tcp port $p is not responding"
244           return 1
245       }
246   done
247 }
248
249 ######################################################
250 # check a unix socket
251 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
252 ######################################################
253 ctdb_check_unix_socket() {
254   socket_path="$1"
255   [ -z "$socket_path" ] && return;
256
257   # check availability of netstat first
258   NETSTAT=""
259   if [ -x $(type -p netstat) ]; then
260         NETSTAT=$(type -p netstat)
261   elif [ -x /usr/bin/netstat ]; then
262       NETSTAT=/usr/bin/netstat
263   elif [ -x /bin/netstat ]; then
264       NETSTAT=/bin/netstat
265   fi
266
267   all_ok=1
268   if [ "x$NETSTAT" != "x" ]; then
269     if $NETSTAT -l -a -n | grep -qE "^unix.*LISTEN.*${socket_path}$"; then
270       all_ok=1
271     else
272       all_ok=0
273     fi
274     else
275     [ -S ${socket_path} ] && all_ok=1 || all_ok=0
276   fi
277
278   [ $all_ok -eq 1 ] || {
279     echo "ERROR: $service_name socket $socket_path not found"
280     return 1
281   }
282 }
283
284 ######################################################
285 # check a command returns zero status
286 # usage: ctdb_check_command SERVICE_NAME <command>
287 ######################################################
288 ctdb_check_command() {
289   service_name="$1"
290   wait_cmd="$2"
291   [ -z "$wait_cmd" ] && return;
292   $wait_cmd > /dev/null 2>&1 || {
293       echo "ERROR: $service_name - $wait_cmd returned error"
294       exit 1
295   }
296 }
297
298 ################################################
299 # kill off any TCP connections with the given IP
300 ################################################
301 kill_tcp_connections() {
302     _IP="$1"    
303     _failed=0
304
305     _killcount=0
306     connfile="$CTDB_BASE/state/connections.$_IP"
307     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
308     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
309
310     while read dest src; do
311         srcip=`echo $src | sed -e "s/:[^:]*$//"`
312         srcport=`echo $src | sed -e "s/^.*://"`
313         destip=`echo $dest | sed -e "s/:[^:]*$//"`
314         destport=`echo $dest | sed -e "s/^.*://"`
315         echo "Killing TCP connection $srcip:$srcport $destip:$destport"
316         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
317         case $destport in
318           # we only do one-way killtcp for CIFS
319           139|445) : ;;
320           # for all others we do 2-way
321           *) 
322                 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
323                 ;;
324         esac
325         _killcount=`expr $_killcount + 1`
326      done < $connfile
327     /bin/rm -f $connfile
328
329     [ $_failed = 0 ] || {
330         echo "Failed to send killtcp control"
331         return;
332     }
333     [ $_killcount -gt 0 ] || {
334         return;
335     }
336     _count=0
337     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
338         sleep 1
339         _count=`expr $_count + 1`
340         [ $_count -gt 3 ] && {
341             echo "Timed out killing tcp connections for IP $_IP"
342             return;
343         }
344     done
345     echo "killed $_killcount TCP connections to released IP $_IP"
346 }
347
348 ##################################################################
349 # kill off the local end for any TCP connections with the given IP
350 ##################################################################
351 kill_tcp_connections_local_only() {
352     _IP="$1"    
353     _failed=0
354
355     _killcount=0
356     connfile="$CTDB_BASE/state/connections.$_IP"
357     netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
358     netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
359
360     while read dest src; do
361         srcip=`echo $src | sed -e "s/:[^:]*$//"`
362         srcport=`echo $src | sed -e "s/^.*://"`
363         destip=`echo $dest | sed -e "s/:[^:]*$//"`
364         destport=`echo $dest | sed -e "s/^.*://"`
365         echo "Killing TCP connection $srcip:$srcport $destip:$destport"
366         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
367         _killcount=`expr $_killcount + 1`
368      done < $connfile
369     /bin/rm -f $connfile
370
371     [ $_failed = 0 ] || {
372         echo "Failed to send killtcp control"
373         return;
374     }
375     [ $_killcount -gt 0 ] || {
376         return;
377     }
378     _count=0
379     while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
380         sleep 1
381         _count=`expr $_count + 1`
382         [ $_count -gt 3 ] && {
383             echo "Timed out killing tcp connections for IP $_IP"
384             return;
385         }
386     done
387     echo "killed $_killcount TCP connections to released IP $_IP"
388 }
389
390 ########################################################
391 # start/stop the nfs service on different platforms
392 ########################################################
393 startstop_nfs() {
394         PLATFORM="unknown"
395         [ -x /etc/init.d/nfsserver ] && {
396                 PLATFORM="sles"
397         }
398         [ -x /etc/init.d/nfslock ] && {
399                 PLATFORM="rhel"
400         }
401
402         case $PLATFORM in
403         sles)
404                 case $1 in
405                 start)
406                         service nfsserver start
407                         ;;
408                 stop)
409                         service nfsserver stop > /dev/null 2>&1
410                         ;;
411                 esac
412                 ;;
413         rhel)
414                 case $1 in
415                 start)
416                         service nfslock start
417                         service nfs start
418                         ;;
419                 stop)
420                         service nfs stop > /dev/null 2>&1
421                         service nfslock stop > /dev/null 2>&1
422                         ;;
423                 esac
424                 ;;
425         *)
426                 echo "Unknown platform. NFS is not supported with ctdb"
427                 exit 1
428                 ;;
429         esac
430 }
431
432 ########################################################
433 # start/stop the nfs lockmanager service on different platforms
434 ########################################################
435 startstop_nfslock() {
436         PLATFORM="unknown"
437         [ -x /etc/init.d/nfsserver ] && {
438                 PLATFORM="sles"
439         }
440         [ -x /etc/init.d/nfslock ] && {
441                 PLATFORM="rhel"
442         }
443
444         case $PLATFORM in
445         sles)
446                 # for sles there is no service for lockmanager
447                 # so we instead just shutdown/restart nfs
448                 case $1 in
449                 start)
450                         service nfsserver start
451                         ;;
452                 stop)
453                         service nfsserver stop > /dev/null 2>&1
454                         ;;
455                 esac
456                 ;;
457         rhel)
458                 case $1 in
459                 start)
460                         service nfslock start
461                         ;;
462                 stop)
463                         service nfslock stop > /dev/null 2>&1
464                         ;;
465                 esac
466                 ;;
467         *)
468                 echo "Unknown platform. NFS locking is not supported with ctdb"
469                 exit 1
470                 ;;
471         esac
472 }
473
474 ########################################################
475 # remove an ip address from an interface
476 ########################################################
477 remove_ip() {
478         # the ip tool will delete all secondary IPs if this is the primary.
479         # To work around this _very_ annoying behaviour we have to keep a
480         # record of the secondaries and re-add them afterwards. yuck
481         secondaries=""
482         if ip addr list dev $2 primary | grep -q "inet $1 " ; then
483             secondaries=`ip addr list dev $2 secondary | grep " inet " | awk '{print $2}'`
484         fi
485         ip addr del $1 dev $2 >/dev/null 2>/dev/null || failed=1
486         [ -z "$secondaries" ] || {
487             for i in $secondaries; do
488                 if ip addr list dev $2 | grep -q "inet $i" ; then
489                     echo "kept secondary $i on dev $2"
490                 else 
491                     echo "re-adding secondary address $i to dev $2"
492                     ip addr add $i dev $2 || failed=1           
493                 fi
494             done
495         }
496 }
497
498 ########################################################
499 # some simple logic for counting events - per eventscript
500 # usage: ctdb_counter_init
501 #        ctdb_counter_incr
502 #        ctdb_check_counter_limit <limit>
503 # ctdb_check_counter_limit succeeds when count >= <limit>
504 ########################################################
505 _ctdb_counter_common () {
506     _counter_file="$ctdb_fail_dir/$service_name"
507     mkdir -p "${_counter_file%/*}" # dirname
508 }
509 ctdb_counter_init () {
510     _ctdb_counter_common
511
512     >"$_counter_file"
513 }
514 ctdb_counter_incr () {
515     _ctdb_counter_common
516
517     # unary counting!
518     echo -n 1 >> "$_counter_file"
519 }
520 ctdb_check_counter_limit () {
521     _ctdb_counter_common
522
523     _limit="${1:-${service_fail_limit}}"
524     _quiet="$2"
525
526     # unary counting!
527     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
528     if [ $_size -ge $_limit ] ; then
529         echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
530         exit 1
531     elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
532         echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
533     fi
534 }
535 ########################################################
536
537 ctdb_spool_dir="/var/spool/ctdb"
538 ctdb_status_dir="$ctdb_spool_dir/status"
539 ctdb_fail_dir="$ctdb_spool_dir/failcount"
540 ctdb_active_dir="$ctdb_spool_dir/active"
541
542 ctdb_checkstatus ()
543 {
544     if [ -r "$ctdb_status_dir/$service_name/unhealthy" ] ; then
545         log_status_cat "unhealthy" "$ctdb_status_dir/$service_name/unhealthy"
546         return 1
547     elif [ -r "$ctdb_status_dir/$service_name/banned" ] ; then
548         log_status_cat "banned" "$ctdb_status_dir/$service_name/banned"
549         return 2
550     else
551         return 0
552     fi
553 }
554
555 ctdb_setstatus ()
556 {
557     d="$ctdb_status_dir/$service_name"
558     case "$1" in
559         unhealthy|banned)
560             mkdir -p "$d"
561             cat "$2" >"$d/$1"
562             ;;
563         *)
564             for i in "banned" "unhealthy" ; do
565                 rm -f "$d/$i"
566             done
567             ;;
568     esac
569 }
570
571 ctdb_service_needs_reconfigure ()
572 {
573     [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
574 }
575
576 ctdb_service_set_reconfigure ()
577 {
578     d="$ctdb_status_dir/$service_name"
579     mkdir -p "$d"
580     >"$d/reconfigure"
581 }
582
583 ctdb_service_unset_reconfigure ()
584 {
585     rm -f "$ctdb_status_dir/$service_name/reconfigure"
586 }
587
588 ctdb_service_reconfigure ()
589 {
590     if [ -n "$service_reconfigure" ] ; then
591         eval $service_reconfigure
592     else
593         service "$service_name" restart
594     fi
595     ctdb_service_unset_reconfigure
596     ctdb_counter_init
597 }
598
599 ctdb_compat_managed_service ()
600 {
601     if [ "$1" = "yes" ] ; then
602         t="$t $2 "
603     fi
604 }
605
606 is_ctdb_managed_service ()
607 {
608     t=" $CTDB_MANAGED_SERVICES "
609
610     ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
611     ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
612     ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
613     ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
614     ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
615     ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
616     ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
617
618     # Returns 0 if "<space>$service_name<space>" appears in $t
619     [ "${t#* ${service_name} }" != "${t}" ]
620 }
621
622 ctdb_start_stop_service ()
623 {
624     _active="$ctdb_active_dir/$service_name"
625
626     if is_ctdb_managed_service ; then
627         if ! [ -e "$_active" ] ; then
628             echo "Starting service $service_name"
629             ctdb_service_start || exit $?
630             mkdir -p "$ctdb_active_dir"
631             touch "$_active"
632             exit 0
633         fi
634     elif ! is_ctdb_managed_service ; then
635         if [ -e "$_active" ] ; then
636             echo "Stopping service $service_name"
637             ctdb_service_stop || exit $?
638             rm -f "$_active"
639         fi
640         exit 0
641     fi
642 }
643
644 ctdb_service_start ()
645 {
646     if [ -n "$service_start" ] ; then
647         eval $service_start
648     else
649         service "$service_name" start
650     fi
651     ctdb_counter_init
652 }
653
654 ctdb_service_stop ()
655 {
656     if [ -n "$service_stop" ] ; then
657         eval $service_stop
658     else
659         service "$service_name" stop
660     fi
661 }
662
663 ########################################################
664 # load a site local config file
665 ########################################################
666
667 [ -x $CTDB_BASE/rc.local ] && {
668         . $CTDB_BASE/rc.local
669 }
670
671 [ -d $CTDB_BASE/rc.local.d ] && {
672         for i in $CTDB_BASE/rc.local.d/* ; do
673                 [ -x "$i" ] && . "$i"
674         done
675 }
676
677 # A reasonable default is the basename of the eventscript.
678 service_name="${0##*/}" # basename
679 service_fail_limit=1
680
681 ctdb_event="$1" ; shift
682 cmd="$ctdb_event"