1 # utility functions for ctdb event scripts
3 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
5 #######################################
6 # pull in a system config file, if any
10 if [ "$name" != "ctdb" ] ; then
13 if [ -z "$name" ] ; then
14 foo="${service_config:-${service_name}}"
15 if [ -n "$foo" ] ; then
20 if [ -f /etc/sysconfig/$name ]; then
21 . /etc/sysconfig/$name
22 elif [ -f /etc/default/$name ]; then
24 elif [ -f $CTDB_BASE/sysconfig/$name ]; then
25 . $CTDB_BASE/sysconfig/$name
29 ##############################################################
30 # determine on what type of system (init style) we are running
32 # only do detection if not already set:
33 test "x$CTDB_INIT_STYLE" != "x" && return
35 if [ -x /sbin/startproc ]; then
36 CTDB_INIT_STYLE="suse"
37 elif [ -x /sbin/start-stop-daemon ]; then
38 CTDB_INIT_STYLE="debian"
40 CTDB_INIT_STYLE="redhat"
44 ######################################################
45 # simulate /sbin/service on platforms that don't have it
50 # do nothing, when no service was specified
51 test "x$_service_name" = "x" && return
53 if [ -x /sbin/service ]; then
54 /sbin/service "$_service_name" "$_op"
55 elif [ -x /etc/init.d/$_service_name ]; then
56 /etc/init.d/$_service_name "$_op"
57 elif [ -x /etc/rc.d/init.d/$_service_name ]; then
58 /etc/rc.d/init.d/$_service_name "$_op"
62 ######################################################
63 # simulate /sbin/service (niced) on platforms that don't have it
68 ######################################################
69 # wait for a command to return a zero exit status
70 # usage: ctdb_wait_command SERVICE_NAME <command>
71 ######################################################
75 [ -z "$wait_cmd" ] && return;
77 echo "Waiting for service $service_name to start"
78 while [ $all_ok -eq 0 ]; do
79 $wait_cmd > /dev/null 2>&1 && all_ok=1
80 ctdb status > /dev/null 2>&1 || {
81 echo "ctdb daemon has died. Exiting wait for $service_name"
84 [ $all_ok -eq 1 ] || sleep 1
86 echo "Local service $service_name is up"
90 ######################################################
91 # wait for a set of tcp ports
92 # usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
93 ######################################################
94 ctdb_wait_tcp_ports() {
98 [ -z "$wait_ports" ] && return;
100 echo "Waiting for tcp service $service_name to start"
101 while [ $all_ok -eq 0 ]; do
103 for p in $wait_ports; do
104 if [ -x /usr/bin/netcat ]; then
105 /usr/bin/netcat -z 127.0.0.1 $p > /dev/null || all_ok=0
106 elif [ -x /usr/bin/nc ]; then
107 /usr/bin/nc -z 127.0.0.1 $p > /dev/null || all_ok=0
108 elif [ -x /usr/bin/netstat ]; then
109 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
110 elif [ -x /bin/netstat ]; then
111 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
113 echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
117 [ $all_ok -eq 1 ] || sleep 1
118 ctdb status > /dev/null 2>&1 || {
119 echo "ctdb daemon has died. Exiting tcp wait $service_name"
123 echo "Local tcp services for $service_name are up"
128 ######################################################
129 # wait for a set of directories
130 # usage: ctdb_wait_directories SERVICE_NAME <directories...>
131 ######################################################
132 ctdb_wait_directories() {
136 [ -z "$wait_dirs" ] && return;
138 echo "Waiting for local directories for $service_name"
139 while [ $all_ok -eq 0 ]; do
141 for d in $wait_dirs; do
142 [ -d $d ] || all_ok=0
144 [ $all_ok -eq 1 ] || sleep 1
145 ctdb status > /dev/null 2>&1 || {
146 echo "ctdb daemon has died. Exiting directory wait for $service_name"
150 echo "Local directories for $service_name are available"
154 ######################################################
155 # check that a rpc server is registered with portmap
156 # and responding to requests
157 # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
158 ######################################################
163 rpcinfo -u localhost $prognum $version > /dev/null || {
164 echo "ERROR: $service_name not responding to rpc requests"
169 ######################################################
170 # check a set of directories is available
171 # return 1 on a missing directory
172 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
173 ######################################################
174 ctdb_check_directories_probe() {
183 [ -d "$d" ] || return 1
189 ######################################################
190 # check a set of directories is available
191 # usage: ctdb_check_directories SERVICE_NAME <directories...>
192 ######################################################
193 ctdb_check_directories() {
194 # Note: ctdb_check_directories_probe sets both $service_name and $d.
195 ctdb_check_directories_probe "$@" || {
196 echo "ERROR: $service_name directory $d not available"
201 ######################################################
202 # check a set of tcp ports
203 # usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
204 ######################################################
205 ctdb_check_tcp_ports() {
206 [ -z "$1" ] && return;
208 # check availability of netcat or netstat first
211 if [ -x /usr/bin/netstat ]; then
212 NETSTAT=/usr/bin/netstat
213 elif [ -x /bin/netstat ]; then
215 elif [ -x /usr/bin/netcat ]; then
216 NETCAT=/usr/bin/netcat
217 elif [ -x /bin/netcat ]; then
219 elif [ -x /usr/bin/nc ]; then
221 elif [ -x /bin/nc ]; then
228 if [ "x${NETCAT}" != "x" ]; then
229 ${NETCAT} -z 127.0.0.1 $p > /dev/null || all_ok=0
230 elif [ "x${NETSTAT}" != "x" ]; then
231 if ! ${NETSTAT} -a -n | egrep "0.0.0.0:$p .*LISTEN" > /dev/null ; then
232 if ! ${NETSTAT} -a -n | egrep ":::$p .*LISTEN" > /dev/null ; then
237 echo "ERROR: neither netcat (or nc) nor netstat found!"
238 echo "ERROR: can't monitor ${service_name} tcp port ${p}"
242 [ $all_ok -eq 1 ] || {
243 echo "ERROR: $service_name tcp port $p is not responding"
249 ######################################################
250 # check a unix socket
251 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
252 ######################################################
253 ctdb_check_unix_socket() {
255 [ -z "$socket_path" ] && return;
257 # check availability of netstat first
259 if [ -x $(type -p netstat) ]; then
260 NETSTAT=$(type -p netstat)
261 elif [ -x /usr/bin/netstat ]; then
262 NETSTAT=/usr/bin/netstat
263 elif [ -x /bin/netstat ]; then
268 if [ "x$NETSTAT" != "x" ]; then
269 if $NETSTAT -l -a -n | grep -qE "^unix.*LISTEN.*${socket_path}$"; then
275 [ -S ${socket_path} ] && all_ok=1 || all_ok=0
278 [ $all_ok -eq 1 ] || {
279 echo "ERROR: $service_name socket $socket_path not found"
284 ######################################################
285 # check a command returns zero status
286 # usage: ctdb_check_command SERVICE_NAME <command>
287 ######################################################
288 ctdb_check_command() {
291 [ -z "$wait_cmd" ] && return;
292 $wait_cmd > /dev/null 2>&1 || {
293 echo "ERROR: $service_name - $wait_cmd returned error"
298 ################################################
299 # kill off any TCP connections with the given IP
300 ################################################
301 kill_tcp_connections() {
306 connfile="$CTDB_BASE/state/connections.$_IP"
307 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
308 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
310 while read dest src; do
311 srcip=`echo $src | sed -e "s/:[^:]*$//"`
312 srcport=`echo $src | sed -e "s/^.*://"`
313 destip=`echo $dest | sed -e "s/:[^:]*$//"`
314 destport=`echo $dest | sed -e "s/^.*://"`
315 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
316 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
318 # we only do one-way killtcp for CIFS
320 # for all others we do 2-way
322 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
325 _killcount=`expr $_killcount + 1`
329 [ $_failed = 0 ] || {
330 echo "Failed to send killtcp control"
333 [ $_killcount -gt 0 ] || {
337 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
339 _count=`expr $_count + 1`
340 [ $_count -gt 3 ] && {
341 echo "Timed out killing tcp connections for IP $_IP"
345 echo "killed $_killcount TCP connections to released IP $_IP"
348 ##################################################################
349 # kill off the local end for any TCP connections with the given IP
350 ##################################################################
351 kill_tcp_connections_local_only() {
356 connfile="$CTDB_BASE/state/connections.$_IP"
357 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
358 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
360 while read dest src; do
361 srcip=`echo $src | sed -e "s/:[^:]*$//"`
362 srcport=`echo $src | sed -e "s/^.*://"`
363 destip=`echo $dest | sed -e "s/:[^:]*$//"`
364 destport=`echo $dest | sed -e "s/^.*://"`
365 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
366 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
367 _killcount=`expr $_killcount + 1`
371 [ $_failed = 0 ] || {
372 echo "Failed to send killtcp control"
375 [ $_killcount -gt 0 ] || {
379 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
381 _count=`expr $_count + 1`
382 [ $_count -gt 3 ] && {
383 echo "Timed out killing tcp connections for IP $_IP"
387 echo "killed $_killcount TCP connections to released IP $_IP"
390 ########################################################
391 # start/stop the nfs service on different platforms
392 ########################################################
395 [ -x /etc/init.d/nfsserver ] && {
398 [ -x /etc/init.d/nfslock ] && {
406 service nfsserver start
409 service nfsserver stop > /dev/null 2>&1
416 service nfslock start
420 service nfs stop > /dev/null 2>&1
421 service nfslock stop > /dev/null 2>&1
426 echo "Unknown platform. NFS is not supported with ctdb"
432 ########################################################
433 # start/stop the nfs lockmanager service on different platforms
434 ########################################################
435 startstop_nfslock() {
437 [ -x /etc/init.d/nfsserver ] && {
440 [ -x /etc/init.d/nfslock ] && {
446 # for sles there is no service for lockmanager
447 # so we instead just shutdown/restart nfs
450 service nfsserver start
453 service nfsserver stop > /dev/null 2>&1
460 service nfslock start
463 service nfslock stop > /dev/null 2>&1
468 echo "Unknown platform. NFS locking is not supported with ctdb"
474 ########################################################
475 # remove an ip address from an interface
476 ########################################################
478 # the ip tool will delete all secondary IPs if this is the primary.
479 # To work around this _very_ annoying behaviour we have to keep a
480 # record of the secondaries and re-add them afterwards. yuck
482 if ip addr list dev $2 primary | grep -q "inet $1 " ; then
483 secondaries=`ip addr list dev $2 secondary | grep " inet " | awk '{print $2}'`
485 ip addr del $1 dev $2 >/dev/null 2>/dev/null || failed=1
486 [ -z "$secondaries" ] || {
487 for i in $secondaries; do
488 if ip addr list dev $2 | grep -q "inet $i" ; then
489 echo "kept secondary $i on dev $2"
491 echo "re-adding secondary address $i to dev $2"
492 ip addr add $i dev $2 || failed=1
498 ########################################################
499 # some simple logic for counting events - per eventscript
500 # usage: ctdb_counter_init
502 # ctdb_check_counter_limit <limit>
503 # ctdb_check_counter_limit succeeds when count >= <limit>
504 ########################################################
505 _ctdb_counter_common () {
506 _counter_file="$ctdb_fail_dir/$service_name"
507 mkdir -p "${_counter_file%/*}" # dirname
509 ctdb_counter_init () {
514 ctdb_counter_incr () {
518 echo -n 1 >> "$_counter_file"
520 ctdb_check_counter_limit () {
523 _limit="${1:-${service_fail_limit}}"
527 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
528 if [ $_size -ge $_limit ] ; then
529 echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
531 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
532 echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
535 ########################################################
537 ctdb_spool_dir="/var/spool/ctdb"
538 ctdb_status_dir="$ctdb_spool_dir/status"
539 ctdb_fail_dir="$ctdb_spool_dir/failcount"
540 ctdb_active_dir="$ctdb_spool_dir/active"
544 if [ -r "$ctdb_status_dir/$service_name/unhealthy" ] ; then
545 log_status_cat "unhealthy" "$ctdb_status_dir/$service_name/unhealthy"
547 elif [ -r "$ctdb_status_dir/$service_name/banned" ] ; then
548 log_status_cat "banned" "$ctdb_status_dir/$service_name/banned"
557 d="$ctdb_status_dir/$service_name"
564 for i in "banned" "unhealthy" ; do
571 ctdb_service_needs_reconfigure ()
573 [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
576 ctdb_service_set_reconfigure ()
578 d="$ctdb_status_dir/$service_name"
583 ctdb_service_unset_reconfigure ()
585 rm -f "$ctdb_status_dir/$service_name/reconfigure"
588 ctdb_service_reconfigure ()
590 if [ -n "$service_reconfigure" ] ; then
591 eval $service_reconfigure
593 service "$service_name" restart
595 ctdb_service_unset_reconfigure
599 ctdb_compat_managed_service ()
601 if [ "$1" = "yes" ] ; then
606 is_ctdb_managed_service ()
608 t=" $CTDB_MANAGED_SERVICES "
610 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
611 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
612 ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
613 ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
614 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
615 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
616 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
618 # Returns 0 if "<space>$service_name<space>" appears in $t
619 [ "${t#* ${service_name} }" != "${t}" ]
622 ctdb_start_stop_service ()
624 _active="$ctdb_active_dir/$service_name"
626 if is_ctdb_managed_service ; then
627 if ! [ -e "$_active" ] ; then
628 echo "Starting service $service_name"
629 ctdb_service_start || exit $?
630 mkdir -p "$ctdb_active_dir"
634 elif ! is_ctdb_managed_service ; then
635 if [ -e "$_active" ] ; then
636 echo "Stopping service $service_name"
637 ctdb_service_stop || exit $?
644 ctdb_service_start ()
646 if [ -n "$service_start" ] ; then
649 service "$service_name" start
656 if [ -n "$service_stop" ] ; then
659 service "$service_name" stop
663 ########################################################
664 # load a site local config file
665 ########################################################
667 [ -x $CTDB_BASE/rc.local ] && {
668 . $CTDB_BASE/rc.local
671 [ -d $CTDB_BASE/rc.local.d ] && {
672 for i in $CTDB_BASE/rc.local.d/* ; do
673 [ -x "$i" ] && . "$i"
677 # A reasonable default is the basename of the eventscript.
678 service_name="${0##*/}" # basename
681 ctdb_event="$1" ; shift