1 # utility functions for ctdb event scripts
3 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
5 #######################################
6 # pull in a system config file, if any
9 if [ "$1" != "ctdb" ] ; then
14 foo="${service_config:-${service_name}}"
15 if [ -n "$foo" ] ; then
20 if [ -f /etc/sysconfig/$1 ]; then
22 elif [ -f /etc/default/$1 ]; then
24 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
25 . $CTDB_BASE/sysconfig/$1
29 ##############################################################
30 # determine on what type of system (init style) we are running
32 # only do detection if not already set:
33 test "x$CTDB_INIT_STYLE" != "x" && return
35 if [ -x /sbin/startproc ]; then
36 CTDB_INIT_STYLE="suse"
37 elif [ -x /sbin/start-stop-daemon ]; then
38 CTDB_INIT_STYLE="debian"
40 CTDB_INIT_STYLE="redhat"
44 ######################################################
45 # simulate /sbin/service on platforms that don't have it
50 # do nothing, when no service was specified
51 [ -z "$_service_name" ] && return
53 if [ -x /sbin/service ]; then
54 /sbin/service "$_service_name" "$_op"
55 elif [ -x /etc/init.d/$_service_name ]; then
56 /etc/init.d/$_service_name "$_op"
57 elif [ -x /etc/rc.d/init.d/$_service_name ]; then
58 /etc/rc.d/init.d/$_service_name "$_op"
62 ######################################################
63 # simulate /sbin/service (niced) on platforms that don't have it
65 # do nothing, when no service was specified
71 ######################################################
72 # wait for a command to return a zero exit status
73 # usage: ctdb_wait_command SERVICE_NAME <command>
74 ######################################################
78 [ -z "$wait_cmd" ] && return;
80 echo "Waiting for service $service_name to start"
81 while [ $all_ok -eq 0 ]; do
82 $wait_cmd > /dev/null 2>&1 && all_ok=1
83 ctdb status > /dev/null 2>&1 || {
84 echo "ctdb daemon has died. Exiting wait for $service_name"
87 [ $all_ok -eq 1 ] || sleep 1
89 echo "Local service $service_name is up"
93 ######################################################
94 # wait for a set of tcp ports
95 # usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
96 ######################################################
97 ctdb_wait_tcp_ports() {
101 [ -z "$wait_ports" ] && return;
103 echo "Waiting for tcp service $service_name to start"
104 while [ $all_ok -eq 0 ]; do
106 for p in $wait_ports; do
107 if [ -x /usr/bin/netcat ]; then
108 /usr/bin/netcat -z 127.0.0.1 $p > /dev/null || all_ok=0
109 elif [ -x /usr/bin/nc ]; then
110 /usr/bin/nc -z 127.0.0.1 $p > /dev/null || all_ok=0
111 elif [ -x /usr/bin/netstat ]; then
112 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
113 elif [ -x /bin/netstat ]; then
114 (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
116 echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
120 [ $all_ok -eq 1 ] || sleep 1
121 ctdb status > /dev/null 2>&1 || {
122 echo "ctdb daemon has died. Exiting tcp wait $service_name"
126 echo "Local tcp services for $service_name are up"
130 ######################################################
131 # check that a rpc server is registered with portmap
132 # and responding to requests
133 # usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
134 ######################################################
139 rpcinfo -u localhost $prognum $version > /dev/null || {
140 echo "ERROR: $progname not responding to rpc requests"
145 ######################################################
146 # check a set of directories is available
147 # return 1 on a missing directory
148 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
149 ######################################################
150 ctdb_check_directories_probe() {
151 while IFS="" read d ; do
157 [ -d "$d" ] || return 1
162 ######################################################
163 # check a set of directories is available
164 # usage: ctdb_check_directories SERVICE_NAME <directories...>
165 ######################################################
166 ctdb_check_directories() {
167 n="${1:-${service_name}}"
168 ctdb_check_directories_probe || {
169 echo "ERROR: $n directory \"$d\" not available"
174 ######################################################
175 # check a set of tcp ports
176 # usage: ctdb_check_tcp_ports <ports...>
177 ######################################################
178 ctdb_check_tcp_ports() {
181 if ! netstat -a -t -n | grep -q "0\.0\.0\.0:$p .*LISTEN" ; then
182 if ! netstat -a -t -n | grep -q ":::$p .*LISTEN" ; then
183 echo "ERROR: $service_name tcp port $p is not responding"
190 ######################################################
191 # check a unix socket
192 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
193 ######################################################
194 ctdb_check_unix_socket() {
196 [ -z "$socket_path" ] && return
198 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
199 echo "ERROR: $service_name socket $socket_path not found"
204 ######################################################
205 # check a command returns zero status
206 # usage: ctdb_check_command SERVICE_NAME <command>
207 ######################################################
208 ctdb_check_command() {
211 [ -z "$wait_cmd" ] && return;
212 $wait_cmd > /dev/null 2>&1 || {
213 echo "ERROR: $service_name - $wait_cmd returned error"
218 ################################################
219 # kill off any TCP connections with the given IP
220 ################################################
221 kill_tcp_connections() {
226 connfile="$CTDB_BASE/state/connections.$_IP"
227 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
228 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
230 while read dest src; do
231 srcip=`echo $src | sed -e "s/:[^:]*$//"`
232 srcport=`echo $src | sed -e "s/^.*://"`
233 destip=`echo $dest | sed -e "s/:[^:]*$//"`
234 destport=`echo $dest | sed -e "s/^.*://"`
235 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
236 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
238 # we only do one-way killtcp for CIFS
240 # for all others we do 2-way
242 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
245 _killcount=`expr $_killcount + 1`
249 [ $_failed = 0 ] || {
250 echo "Failed to send killtcp control"
253 [ $_killcount -gt 0 ] || {
257 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
259 _count=`expr $_count + 1`
260 [ $_count -gt 3 ] && {
261 echo "Timed out killing tcp connections for IP $_IP"
265 echo "killed $_killcount TCP connections to released IP $_IP"
268 ##################################################################
269 # kill off the local end for any TCP connections with the given IP
270 ##################################################################
271 kill_tcp_connections_local_only() {
276 connfile="$CTDB_BASE/state/connections.$_IP"
277 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
278 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
280 while read dest src; do
281 srcip=`echo $src | sed -e "s/:[^:]*$//"`
282 srcport=`echo $src | sed -e "s/^.*://"`
283 destip=`echo $dest | sed -e "s/:[^:]*$//"`
284 destport=`echo $dest | sed -e "s/^.*://"`
285 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
286 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
287 _killcount=`expr $_killcount + 1`
291 [ $_failed = 0 ] || {
292 echo "Failed to send killtcp control"
295 [ $_killcount -gt 0 ] || {
299 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
301 _count=`expr $_count + 1`
302 [ $_count -gt 3 ] && {
303 echo "Timed out killing tcp connections for IP $_IP"
307 echo "killed $_killcount TCP connections to released IP $_IP"
310 ########################################################
311 # start/stop the nfs service on different platforms
312 ########################################################
315 [ -x /etc/init.d/nfsserver ] && {
318 [ -x /etc/init.d/nfslock ] && {
326 service nfsserver start
329 service nfsserver stop > /dev/null 2>&1
336 service nfslock start
340 service nfs stop > /dev/null 2>&1
341 service nfslock stop > /dev/null 2>&1
346 echo "Unknown platform. NFS is not supported with ctdb"
352 ########################################################
353 # start/stop the nfs lockmanager service on different platforms
354 ########################################################
355 startstop_nfslock() {
357 [ -x /etc/init.d/nfsserver ] && {
360 [ -x /etc/init.d/nfslock ] && {
366 # for sles there is no service for lockmanager
367 # so we instead just shutdown/restart nfs
370 service nfsserver start
373 service nfsserver stop > /dev/null 2>&1
380 service nfslock start
383 service nfslock stop > /dev/null 2>&1
388 echo "Unknown platform. NFS locking is not supported with ctdb"
394 ########################################################
395 # remove an ip address from an interface
396 ########################################################
398 # the ip tool will delete all secondary IPs if this is the primary.
399 # To work around this _very_ annoying behaviour we have to keep a
400 # record of the secondaries and re-add them afterwards. yuck
402 if ip addr list dev $2 primary | grep -q "inet $1 " ; then
403 secondaries=`ip addr list dev $2 secondary | grep " inet " | awk '{print $2}'`
405 ip addr del $1 dev $2 >/dev/null 2>/dev/null || failed=1
406 [ -z "$secondaries" ] || {
407 for i in $secondaries; do
408 if ip addr list dev $2 | grep -q "inet $i" ; then
409 echo "kept secondary $i on dev $2"
411 echo "re-adding secondary address $i to dev $2"
412 ip addr add $i dev $2 || failed=1
418 ########################################################
419 # some simple logic for counting events - per eventscript
420 # usage: ctdb_counter_init
422 # ctdb_check_counter_limit <limit>
423 # ctdb_check_counter_limit succeeds when count >= <limit>
424 ########################################################
425 _ctdb_counter_common () {
426 _counter_file="$ctdb_fail_dir/$service_name"
427 mkdir -p "${_counter_file%/*}" # dirname
429 ctdb_counter_init () {
434 ctdb_counter_incr () {
438 echo -n 1 >> "$_counter_file"
440 ctdb_check_counter_limit () {
443 _limit="${1:-${service_fail_limit}}"
447 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
448 if [ $_size -ge $_limit ] ; then
449 echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
451 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
452 echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
455 ########################################################
457 ctdb_spool_dir="/var/spool/ctdb"
458 ctdb_status_dir="$ctdb_spool_dir/status"
459 ctdb_fail_dir="$ctdb_spool_dir/failcount"
460 ctdb_active_dir="$ctdb_spool_dir/active"
464 echo "node is \"$1\", problem with \"${script_name}\": $(cat $2)"
469 if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
470 log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
472 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
473 log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
482 d="$ctdb_status_dir/$script_name"
489 for i in "banned" "unhealthy" ; do
496 ctdb_service_needs_reconfigure ()
498 [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
501 ctdb_service_set_reconfigure ()
503 d="$ctdb_status_dir/$service_name"
508 ctdb_service_unset_reconfigure ()
510 rm -f "$ctdb_status_dir/$service_name/reconfigure"
513 ctdb_service_reconfigure ()
515 if [ -n "$service_reconfigure" ] ; then
516 eval $service_reconfigure
518 service "$service_name" restart
520 ctdb_service_unset_reconfigure
524 ctdb_compat_managed_service ()
526 if [ "$1" = "yes" ] ; then
531 is_ctdb_managed_service ()
533 t=" $CTDB_MANAGED_SERVICES "
535 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
536 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
537 ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
538 ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
539 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
540 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
541 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
542 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
544 # Returns 0 if "<space>$service_name<space>" appears in $t
545 [ "${t#* ${service_name} }" != "${t}" ]
548 ctdb_start_stop_service ()
550 _active="$ctdb_active_dir/$service_name"
552 if is_ctdb_managed_service ; then
553 if ! [ -e "$_active" ] ; then
554 echo "Starting service $service_name"
555 ctdb_service_start || exit $?
556 mkdir -p "$ctdb_active_dir"
560 elif ! is_ctdb_managed_service ; then
561 if [ -e "$_active" ] ; then
562 echo "Stopping service $service_name"
563 ctdb_service_stop || exit $?
570 ctdb_service_start ()
572 if [ -n "$service_start" ] ; then
575 service "$service_name" start
582 if [ -n "$service_stop" ] ; then
585 service "$service_name" stop
589 ########################################################
590 # load a site local config file
591 ########################################################
593 [ -x $CTDB_BASE/rc.local ] && {
594 . $CTDB_BASE/rc.local
597 [ -d $CTDB_BASE/rc.local.d ] && {
598 for i in $CTDB_BASE/rc.local.d/* ; do
599 [ -x "$i" ] && . "$i"
603 # A reasonable default is the basename of the eventscript.
604 script_name="${0##*/}" # basename
605 service_name="$script_name"
608 ctdb_event="$1" ; shift
611 case "$ctdb_event" in