1 # Hey Emacs, this is a -*- shell-script -*- !!!
3 # utility functions for ctdb event scripts
5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
7 [ -z "$CTDB_VARDIR" ] && {
8 if [ -d "/var/lib/ctdb" ] ; then
9 export CTDB_VARDIR="/var/lib/ctdb"
11 export CTDB_VARDIR="/var/ctdb"
14 [ -z "$CTDB_ETCDIR" ] && {
15 export CTDB_ETCDIR="/etc"
18 #######################################
19 # pull in a system config file, if any
23 foo="${service_config:-${service_name}}"
24 if [ -n "$foo" ] ; then
27 elif [ "$1" != "ctdb" ] ; then
31 if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
32 . $CTDB_ETCDIR/sysconfig/$1
33 elif [ -f $CTDB_ETCDIR/default/$1 ]; then
34 . $CTDB_ETCDIR/default/$1
35 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
36 . $CTDB_BASE/sysconfig/$1
44 ##############################################################
46 # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a
50 if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then
51 # If there are arguments then echo them. Otherwise expect to
52 # use stdin, which allows us to pass lots of debug using a
71 # Log given message or stdin to either syslog or a CTDB log file
72 # $1 is the tag passed to logger if syslog is in use.
78 if [ "$CTDB_SYSLOG" = "yes" -o -z "$CTDB_LOGFILE" ] ; then
81 case "$CTDB_OPTIONS" in
82 *--syslog*) _using_syslog=true ;;
85 if $_using_syslog ; then
86 logger -t "ctdbd: ${_tag}" $*
94 } >>"${CTDB_LOGFILE:-/var/log/log.ctdb}"
98 # When things are run in the background in an eventscript then logging
99 # output might get lost. This is the "solution". :-)
100 background_with_logging ()
103 "$@" 2>&1 </dev/null |
104 script_log "${script_name}&"
110 ##############################################################
111 # check number of args for different events
117 echo "ERROR: must supply interface, IP and maskbits"
123 echo "ERROR: must supply old interface, new interface, IP and maskbits"
130 ##############################################################
131 # determine on what type of system (init style) we are running
132 detect_init_style() {
133 # only do detection if not already set:
134 test "x$CTDB_INIT_STYLE" != "x" && return
136 if [ -x /sbin/startproc ]; then
137 CTDB_INIT_STYLE="suse"
138 elif [ -x /sbin/start-stop-daemon ]; then
139 CTDB_INIT_STYLE="debian"
141 CTDB_INIT_STYLE="redhat"
145 ######################################################
146 # simulate /sbin/service on platforms that don't have it
147 # _service() makes it easier to hook the service() function for
154 # do nothing, when no service was specified
155 [ -z "$_service_name" ] && return
157 if [ -x /sbin/service ]; then
158 $_nice /sbin/service "$_service_name" "$_op"
159 elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
160 $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
161 elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
162 $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
172 ######################################################
173 # simulate /sbin/service (niced) on platforms that don't have it
180 ######################################################
181 # wrapper around /proc/ settings to allow them to be hooked
183 # 1st arg is relative path under /proc/, 2nd arg is value to set
186 echo "$2" >"/proc/$1"
189 ######################################################
190 # wrapper around getting file contents from /proc/ to allow
191 # this to be hooked for testing
192 # 1st arg is relative path under /proc/
198 ######################################################
199 # Check that an RPC service is healthy -
200 # this includes allowing a certain number of failures
201 # before marking the NFS service unhealthy.
203 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
205 # each triple is a set of 3 arguments: an operator, a
206 # fail count limit and an action string.
210 # nfs_check_rpc_service "lockd" \
211 # -ge 15 "verbose restart unhealthy" \
212 # -eq 10 "restart:bs"
214 # says that if lockd is down for 15 iterations then do
215 # a verbose restart of lockd and mark the node unhealthy.
216 # Before this, after 10 iterations of failure, the
217 # service is restarted silently in the background.
218 # Order is important: the number of failures need to be
219 # specified in reverse order because processing stops
220 # after the first condition that is true.
221 ######################################################
222 nfs_check_rpc_service ()
224 _prog_name="$1" ; shift
229 *) _v="$1" ; shift ;;
233 _rpc_prog="$_prog_name"
236 case "$_prog_name" in
240 _restart="echo 'Trying to restart NFS service'"
241 _restart="${_restart}; startstop_nfs restart"
246 _restart="echo 'Trying to restart Ganesha NFS service'"
247 _restart="${_restart}; startstop_ganesha restart"
250 _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
253 _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
258 _restart="echo 'Trying to restart lock manager service'"
259 _restart="${_restart}; startstop_nfslock restart"
263 _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
264 _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
265 _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
268 echo "Internal error: unknown RPC program \"$_prog_name\"."
272 _service_name="nfs_${_prog_name}"
274 if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
275 ctdb_counter_init "$_service_name"
279 ctdb_counter_incr "$_service_name"
281 while [ -n "$3" ] ; do
282 ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
283 for _action in $3 ; do
286 echo "$ctdb_check_rpc_out"
289 # No explicit command specified, construct rpc command.
290 if [ -z "$_restart" ] ; then
291 _p="rpc.${_prog_name}"
292 _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
293 _restart="${_restart}; killall -q -9 $_p"
294 _restart="${_restart}; $_p $_opts"
297 # Process restart flags...
298 _flags="${_action#restart:}"
299 # There may not have been a colon...
300 [ "$_flags" != "$_action" ] || _flags=""
301 # q=quiet - everything to /dev/null
302 if [ "${_flags#*q}" != "$_flags" ] ; then
303 _restart="{ ${_restart} ; } >/dev/null 2>&1"
305 # s=stealthy - last command to /dev/null
306 if [ "${_flags#*s}" != "$_flags" ] ; then
307 _restart="${_restart} >/dev/null 2>&1"
309 # b=background - the whole thing, easy and reliable
310 if [ "${_flags#*b}" != "$_flags" ] ; then
311 _restart="{ ${_restart} ; } &"
321 echo "Internal error: unknown action \"$_action\"."
326 # Only process the first action group.
333 ######################################################
334 # check that a rpc server is registered with portmap
335 # and responding to requests
336 # usage: ctdb_check_rpc SERVICE_NAME VERSION
337 ######################################################
343 if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
344 ctdb_check_rpc_out="ERROR: $progname failed RPC check:
346 echo "$ctdb_check_rpc_out"
351 ######################################################
352 # check a set of directories is available
353 # return 1 on a missing directory
354 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
355 ######################################################
356 ctdb_check_directories_probe() {
357 while IFS="" read d ; do
363 [ -d "${d}/." ] || return 1
368 ######################################################
369 # check a set of directories is available
370 # usage: ctdb_check_directories SERVICE_NAME <directories...>
371 ######################################################
372 ctdb_check_directories() {
373 n="${1:-${service_name}}"
374 ctdb_check_directories_probe || {
375 echo "ERROR: $n directory \"$d\" not available"
380 ######################################################
381 # check a set of tcp ports
382 # usage: ctdb_check_tcp_ports <ports...>
383 ######################################################
385 # This flag file is created when a service is initially started. It
386 # is deleted the first time TCP port checks for that service succeed.
387 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
388 # message if a port check fails.
389 _ctdb_check_tcp_common ()
391 _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
394 ctdb_check_tcp_init ()
396 _ctdb_check_tcp_common
397 mkdir -p "${_ctdb_service_started_file%/*}" # dirname
398 touch "$_ctdb_service_started_file"
401 ctdb_check_tcp_ports()
403 if [ -z "$1" ] ; then
404 echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
408 # Set default value for CTDB_TCP_PORT_CHECKS if unset.
409 # If any of these defaults are unsupported then this variable can
410 # be overridden in /etc/sysconfig/ctdb or via a file in
411 # /etc/ctdb/rc.local.d/.
412 : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
414 for _c in $CTDB_TCP_PORT_CHECKERS ; do
415 ctdb_check_tcp_ports_$_c "$@"
418 _ctdb_check_tcp_common
419 rm -f "$_ctdb_service_started_file"
423 _ctdb_check_tcp_common
424 if [ ! -f "$_ctdb_service_started_file" ] ; then
425 echo "ERROR: $service_name tcp port $_p is not responding"
427 $ctdb_check_tcp_ports_debug
430 echo "INFO: $service_name tcp port $_p is not responding"
437 ctdb_check_ports - checker $_c not implemented
438 output from checker was:
439 $ctdb_check_tcp_ports_debug
447 echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
452 ctdb_check_tcp_ports_netstat ()
454 _cmd='netstat -l -t -n'
456 if [ $? -eq 127 ] ; then
457 # netstat probably not installed - unlikely?
458 ctdb_check_tcp_ports_debug="$_ns"
462 for _p ; do # process each function argument (port)
463 for _a in '0\.0\.0\.0' '::' ; do
464 _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
465 if echo "$_ns" | grep -E -q "$_pat" ; then
466 # We matched the port, so process next port
471 # We didn't match the port, so flag an error.
472 ctdb_check_tcp_ports_debug="$_cmd shows this output:
480 ctdb_check_tcp_ports_nmap ()
482 # nmap wants a comma-separated list of ports
485 _ports="${_ports}${_ports:+,}${_p}"
488 _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
490 _nmap_out=$($_cmd 2>&1)
491 if [ $? -eq 127 ] ; then
492 # nmap probably not installed
493 ctdb_check_tcp_ports_debug="$_nmap_out"
497 # get the port-related output
498 _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
501 # looking for something like this:
502 # 445/open/tcp//microsoft-ds///
503 # possibly followed by a comma
505 case "$_port_info" in
506 # The info we're after must be either at the beginning of
507 # the string or it must follow a space.
510 # Nope, flag an error...
511 ctdb_check_tcp_ports_debug="$_cmd shows this output:
520 # Use the new "ctdb checktcpport" command to check the port.
521 # This is very cheap.
522 ctdb_check_tcp_ports_ctdb ()
524 for _p ; do # process each function argument (port)
525 _cmd="ctdb checktcpport $_p"
530 ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
534 # Couldn't bind, something already listening, next port...
538 ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
540 # assume not implemented
548 ######################################################
549 # check a unix socket
550 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
551 ######################################################
552 ctdb_check_unix_socket() {
554 [ -z "$socket_path" ] && return
556 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
557 echo "ERROR: $service_name socket $socket_path not found"
562 ######################################################
563 # check a command returns zero status
564 # usage: ctdb_check_command SERVICE_NAME <command>
565 ######################################################
566 ctdb_check_command() {
569 [ -z "$wait_cmd" ] && return;
570 $wait_cmd > /dev/null 2>&1 || {
571 echo "ERROR: $service_name - $wait_cmd returned error"
576 ################################################
577 # kill off any TCP connections with the given IP
578 ################################################
579 kill_tcp_connections() {
584 connfile="$CTDB_VARDIR/state/connections.$_IP"
585 mkdir -p "${connfile%/*}" # dirname
586 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
587 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
589 while read dest src; do
590 srcip=`echo $src | sed -e "s/:[^:]*$//"`
591 srcport=`echo $src | sed -e "s/^.*://"`
592 destip=`echo $dest | sed -e "s/:[^:]*$//"`
593 destport=`echo $dest | sed -e "s/^.*://"`
594 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
595 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
597 # we only do one-way killtcp for CIFS
599 # for all others we do 2-way
601 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
604 _killcount=`expr $_killcount + 1`
608 [ $_failed = 0 ] || {
609 echo "Failed to send killtcp control"
612 [ $_killcount -gt 0 ] || {
616 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
618 _count=`expr $_count + 1`
619 [ $_count -gt 3 ] && {
620 echo "Timed out killing tcp connections for IP $_IP"
624 echo "killed $_killcount TCP connections to released IP $_IP"
627 ##################################################################
628 # kill off the local end for any TCP connections with the given IP
629 ##################################################################
630 kill_tcp_connections_local_only() {
635 connfile="$CTDB_VARDIR/state/connections.$_IP"
636 mkdir -p "${connfile%/*}" # dirname
637 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
638 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
640 while read dest src; do
641 srcip=`echo $src | sed -e "s/:[^:]*$//"`
642 srcport=`echo $src | sed -e "s/^.*://"`
643 destip=`echo $dest | sed -e "s/:[^:]*$//"`
644 destport=`echo $dest | sed -e "s/^.*://"`
645 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
646 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
647 _killcount=`expr $_killcount + 1`
651 [ $_failed = 0 ] || {
652 echo "Failed to send killtcp control"
655 [ $_killcount -gt 0 ] || {
659 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
661 _count=`expr $_count + 1`
662 [ $_count -gt 3 ] && {
663 echo "Timed out killing tcp connections for IP $_IP"
667 echo "killed $_killcount TCP connections to released IP $_IP"
670 ##################################################################
671 # tickle any TCP connections with the given IP
672 ##################################################################
673 tickle_tcp_connections() {
678 connfile="$CTDB_VARDIR/state/connections.$_IP"
679 mkdir -p "${connfile%/*}" # dirname
680 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
681 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
683 while read dest src; do
684 srcip=`echo $src | sed -e "s/:[^:]*$//"`
685 srcport=`echo $src | sed -e "s/^.*://"`
686 destip=`echo $dest | sed -e "s/:[^:]*$//"`
687 destport=`echo $dest | sed -e "s/^.*://"`
688 echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
689 ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
690 echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
691 ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
695 [ $_failed = 0 ] || {
696 echo "Failed to send tickle control"
701 ########################################################
702 # start/stop the Ganesha nfs service
703 ########################################################
706 _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"
709 service "$_service_name" start
712 service "$_service_name" stop
715 service "$_service_name" restart
720 ########################################################
721 # start/stop the nfs service on different platforms
722 ########################################################
725 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
728 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
736 service nfsserver start
739 service nfsserver stop > /dev/null 2>&1
742 set_proc "fs/nfsd/threads" 0
743 service nfsserver stop > /dev/null 2>&1
745 service nfsserver start
752 service nfslock start
760 set_proc "fs/nfsd/threads" 0
761 service nfs stop > /dev/null 2>&1
762 service nfslock stop > /dev/null 2>&1
764 service nfslock start
770 echo "Unknown platform. NFS is not supported with ctdb"
776 ########################################################
777 # start/stop the nfs lockmanager service on different platforms
778 ########################################################
779 startstop_nfslock() {
781 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
784 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
790 # for sles there is no service for lockmanager
791 # so we instead just shutdown/restart nfs
794 service nfsserver start
797 service nfsserver stop > /dev/null 2>&1
800 service nfsserver stop
801 service nfsserver start
808 service nfslock start
811 service nfslock stop > /dev/null 2>&1
815 service nfslock start
820 echo "Unknown platform. NFS locking is not supported with ctdb"
832 _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
833 mkdir -p "${_lockfile%/*}" # dirname
834 [ -f "$_lockfile" ] || touch "$_lockfile"
837 # Note: use of return/exit/die() below only gets us out of the
838 # sub-shell, which is actually what we want. That is, the
839 # function should just return non-zero.
841 flock --timeout 30 0 || \
842 die "add_ip_to_iface: unable to get lock for ${_iface}"
844 # Ensure interface is up
845 ip link set "$_iface" up || \
846 die "Failed to bringup interface $_iface"
848 ip addr add "$_ip/$_maskbits" brd + dev "$_iface" || \
849 die "Failed to add $_ip/$_maskbits on dev $_iface"
852 # Do nothing here - return above only gets us out of the subshell
853 # and doing anything here will affect the return code.
856 delete_ip_from_iface()
862 _lockfile="${CTDB_VARDIR}/state/interface_modify_${_iface}.flock"
863 mkdir -p "${_lockfile%/*}" # dirname
864 [ -f "$_lockfile" ] || touch "$_lockfile"
867 # Note: use of return/exit/die() below only gets us out of the
868 # sub-shell, which is actually what we want. That is, the
869 # function should just return non-zero.
871 flock --timeout 30 0 || \
872 die "delete_ip_from_iface: unable to get lock for ${_iface}"
874 _im="$_ip/$_maskbits" # shorthand for readability
876 # "ip addr del" will delete all secondary IPs if this is the
877 # primary. To work around this _very_ annoying behaviour we
878 # have to keep a record of the secondaries and re-add them
882 if ip addr list dev "$_iface" primary | grep -Fq "inet $_im " ; then
883 _secondaries=$(ip addr list dev "$_iface" secondary | \
884 awk '$1 == "inet" { print $2 }')
888 ip addr del "$_im" dev "$_iface" || {
889 echo "Failed to del $_ip on dev $_iface"
893 if [ -n "$_secondaries" ] ; then
894 for _i in $_secondaries; do
895 if ip addr list dev "$_iface" | grep -Fq "inet $_i" ; then
896 echo "Kept secondary $_i on dev $_iface"
898 echo "Re-adding secondary address $_i to dev $_iface"
899 ip addr add $_i brd + dev $_iface || {
900 echo "Failed to re-add address $_i to dev $_iface"
910 # Do nothing here - return above only gets us out of the subshell
911 # and doing anything here will affect the return code.
914 # If the given IP is hosted then print 2 items: maskbits and iface
919 ip addr show to "${_addr}/32" 2>/dev/null | \
920 awk '$1 == "inet" { print gensub(".*/", "", 1, $2), $NF }'
925 _addr="${1%/*}" # Remove optional maskbits
928 set -- $(ip_maskbits_iface $_addr)
929 if [ -n "$1" ] ; then
932 if [ -n "$_log_tag" ] ; then
933 script_log "$_log_tag" \
934 "Removing public address $_addr/$_maskbits from device $_iface"
936 ip addr del $_addr/$_maskbits dev $_iface >/dev/null 2>&1
940 drop_all_public_ips ()
944 while read _ip _x ; do
945 drop_ip "$_ip" "$_log_tag"
946 done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
949 ########################################################
950 # some simple logic for counting events - per eventscript
951 # usage: ctdb_counter_init
953 # ctdb_check_counter_limit <limit>
954 # ctdb_check_counter_limit fails when count >= <limit>
955 ########################################################
956 _ctdb_counter_common () {
957 _service_name="${1:-${service_name}}"
958 _counter_file="$ctdb_fail_dir/$_service_name"
959 mkdir -p "${_counter_file%/*}" # dirname
961 ctdb_counter_init () {
962 _ctdb_counter_common "$1"
966 ctdb_counter_incr () {
967 _ctdb_counter_common "$1"
970 echo -n 1 >> "$_counter_file"
972 ctdb_check_counter_limit () {
975 _limit="${1:-${service_fail_limit}}"
979 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
980 if [ $_size -ge $_limit ] ; then
981 echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
983 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
984 echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
987 ctdb_check_counter () {
988 _msg="${1:-error}" # "error" - anything else is silent on fail
989 _op="${2:--ge}" # an integer operator supported by test
990 _limit="${3:-${service_fail_limit}}"
992 _ctdb_counter_common "$1"
995 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
996 if [ $_size $_op $_limit ] ; then
997 if [ "$_msg" = "error" ] ; then
998 echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
1006 ########################################################
1008 ctdb_status_dir="$CTDB_VARDIR/status"
1009 ctdb_fail_dir="$CTDB_VARDIR/failcount"
1011 ctdb_setup_service_state_dir ()
1013 service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
1014 mkdir -p "$service_state_dir" || {
1015 echo "Error creating state dir \"$service_state_dir\""
1020 ########################################################
1021 # Managed status history, for auto-start/stop
1023 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
1025 _ctdb_managed_common ()
1027 _service_name="${1:-${service_name}}"
1028 _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
1031 ctdb_service_managed ()
1033 _ctdb_managed_common "$@"
1034 mkdir -p "$ctdb_managed_dir"
1035 touch "$_ctdb_managed_file"
1038 ctdb_service_unmanaged ()
1040 _ctdb_managed_common "$@"
1041 rm -f "$_ctdb_managed_file"
1044 is_ctdb_previously_managed_service ()
1046 _ctdb_managed_common "$@"
1047 [ -f "$_ctdb_managed_file" ]
1050 ########################################################
1051 # Check and set status
1055 echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
1060 if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
1061 log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
1063 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
1064 log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
1073 d="$ctdb_status_dir/$script_name"
1080 for i in "banned" "unhealthy" ; do
1087 ##################################################################
1088 # Reconfigure a service on demand
1090 _ctdb_service_reconfigure_common ()
1092 _d="$ctdb_status_dir/${service_name}"
1094 _ctdb_service_reconfigure_flag="$_d/reconfigure"
1097 ctdb_service_needs_reconfigure ()
1099 _ctdb_service_reconfigure_common
1100 [ -e "$_ctdb_service_reconfigure_flag" ]
1103 ctdb_service_set_reconfigure ()
1105 _ctdb_service_reconfigure_common
1106 >"$_ctdb_service_reconfigure_flag"
1109 ctdb_service_unset_reconfigure ()
1111 _ctdb_service_reconfigure_common
1112 rm -f "$_ctdb_service_reconfigure_flag"
1115 ctdb_service_reconfigure ()
1117 echo "Reconfiguring service \"${service_name}\"..."
1118 ctdb_service_unset_reconfigure
1119 service_reconfigure || return $?
1123 # Default service_reconfigure() function does nothing.
1124 service_reconfigure ()
1129 ctdb_reconfigure_try_lock ()
1131 _ctdb_service_reconfigure_common
1132 _lock="${_d}/reconfigure_lock"
1133 mkdir -p "${_lock%/*}" # dirname
1138 # This is overkill but will work if we need to extend this to
1139 # allow certain events to run multiple times in parallel
1140 # (e.g. takeip) and write multiple PIDs to the file.
1142 if [ -n "$_locker_event" ] ; then
1143 while read _pid ; do
1144 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1145 kill -0 "$_pid" 2>/dev/null ; then
1151 printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1156 ctdb_replay_monitor_status ()
1158 echo "Replaying previous status for this script due to reconfigure..."
1159 # Leading colon (':') is missing in some versions...
1160 _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
1161 # Output looks like this:
1162 # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
1163 # This is the cheapest way of getting fields in the middle.
1164 set -- $(IFS=":" ; echo $_out)
1167 # The error output field can include colons so we'll try to
1168 # preserve them. The weak checking at the beginning tries to make
1169 # this work for both broken (no leading ':') and fixed output.
1171 _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
1173 OK) : ;; # Do nothing special.
1175 # Recast this as an error, since we can't exit with the
1176 # correct negative number.
1178 _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1181 # Recast this as an OK, since we can't exit with the
1182 # correct negative number.
1184 _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1186 *) : ;; # Must be ERROR, do nothing special.
1192 ctdb_service_check_reconfigure ()
1194 # We only care about some events in this function. For others we
1196 case "$event_name" in
1197 monitor|ipreallocated|reconfigure) : ;;
1201 if ctdb_reconfigure_try_lock ; then
1202 # No events covered by this function are running, so proceed
1204 case "$event_name" in
1206 (ctdb_service_reconfigure)
1210 if ctdb_service_needs_reconfigure ; then
1211 ctdb_service_reconfigure
1215 if ctdb_service_needs_reconfigure ; then
1216 ctdb_service_reconfigure
1217 # Given that the reconfigure might not have
1218 # resulted in the service being stable yet, we
1219 # replay the previous status since that's the best
1220 # information we have.
1221 ctdb_replay_monitor_status
1226 # Somebody else is running an event we don't want to collide
1227 # with. We proceed with caution.
1228 case "$event_name" in
1230 # Tell whoever called us to retry.
1234 # Defer any scheduled reconfigure and just run the
1235 # rest of the ipreallocated event, as per the
1236 # eventscript. There's an assumption here that the
1237 # event doesn't depend on any scheduled reconfigure.
1238 # This is true in the current code.
1242 # There is most likely a reconfigure in progress so
1243 # the service is possibly unstable. As above, we
1244 # defer any scheduled reconfigured. We also replay
1245 # the previous monitor status since that's the best
1246 # information we have.
1247 ctdb_replay_monitor_status
1253 ##################################################################
1254 # Does CTDB manage this service? - and associated auto-start/stop
1256 ctdb_compat_managed_service ()
1258 if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
1259 CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1263 is_ctdb_managed_service ()
1265 _service_name="${1:-${service_name}}"
1267 # $t is used just for readability and to allow better accurate
1268 # matching via leading/trailing spaces
1269 t=" $CTDB_MANAGED_SERVICES "
1271 # Return 0 if "<space>$_service_name<space>" appears in $t
1272 if [ "${t#* ${_service_name} }" != "${t}" ] ; then
1276 # If above didn't match then update $CTDB_MANAGED_SERVICES for
1277 # backward compatibility and try again.
1278 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
1279 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
1280 ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
1281 ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
1282 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "apache2"
1283 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
1284 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
1285 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
1286 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
1287 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
1289 t=" $CTDB_MANAGED_SERVICES "
1291 # Return 0 if "<space>$_service_name<space>" appears in $t
1292 [ "${t#* ${_service_name} }" != "${t}" ]
1295 ctdb_start_stop_service ()
1297 _service_name="${1:-${service_name}}"
1299 # Allow service-start/service-stop pseudo-events to start/stop
1300 # services when we're not auto-starting/stopping and we're not
1302 case "$event_name" in
1304 if is_ctdb_managed_service "$_service_name" ; then
1305 die 'service-start event not permitted when service is managed'
1307 if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1308 die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1310 ctdb_service_start "$_service_name"
1314 if is_ctdb_managed_service "$_service_name" ; then
1315 die 'service-stop event not permitted when service is managed'
1317 if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then
1318 die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes'
1320 ctdb_service_stop "$_service_name"
1325 # Do nothing unless configured to...
1326 [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1328 [ "$event_name" = "monitor" ] || return 0
1330 if is_ctdb_managed_service "$_service_name" ; then
1331 if ! is_ctdb_previously_managed_service "$_service_name" ; then
1332 echo "Starting service \"$_service_name\" - now managed"
1333 background_with_logging ctdb_service_start "$_service_name"
1337 if is_ctdb_previously_managed_service "$_service_name" ; then
1338 echo "Stopping service \"$_service_name\" - no longer managed"
1339 background_with_logging ctdb_service_stop "$_service_name"
1345 ctdb_service_start ()
1347 # The service is marked managed if we've ever tried to start it.
1348 ctdb_service_managed "$@"
1350 # Here we only want $1. If no argument is passed then
1351 # service_start needs to know.
1352 service_start "$@" || return $?
1354 ctdb_counter_init "$@"
1358 ctdb_service_stop ()
1360 ctdb_service_unmanaged "$@"
1364 # Default service_start() and service_stop() functions.
1366 # These may be overridden in an eventscript. When overriding, the
1367 # following convention must be followed. If these functions are
1368 # called with no arguments then they may use internal logic to
1369 # determine whether the service is managed and, therefore, whether
1370 # they should take any action. However, if the service name is
1371 # specified as an argument then an attempt must be made to start or
1372 # stop the service. This is because the auto-start/stop code calls
1373 # them with the service name as an argument.
1376 service "${1:-${service_name}}" start
1381 service "${1:-${service_name}}" stop
1384 ##################################################################
1386 ctdb_standard_event_handler ()
1401 # iptables doesn't like being re-entered, so flock-wrap it.
1404 flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1407 ########################################################
1409 ########################################################
1415 tickledir="$CTDB_VARDIR/state/tickles"
1416 mkdir -p "$tickledir"
1419 _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1421 # What public IPs do I hold?
1422 _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1424 # IPs as a regexp choice
1425 _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1427 # Record connections to our public IPs in a temporary file
1428 _my_connections="${tickledir}/${_port}.connections"
1429 rm -f "$_my_connections"
1431 awk -v destpat="^${_ipschoice}:${_port}\$" \
1432 '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1433 sort >"$_my_connections"
1435 # Record our current tickles in a temporary file
1436 _my_tickles="${tickledir}/${_port}.tickles"
1437 rm -f "$_my_tickles"
1438 for _i in $_ips ; do
1439 ctdb -Y gettickles $_i $_port |
1440 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1442 sort >"$_my_tickles"
1444 # Add tickles for connections that we haven't already got tickles for
1445 comm -23 "$_my_connections" "$_my_tickles" |
1446 while read _src _dst ; do
1447 ctdb addtickle $_src $_dst
1450 # Remove tickles for connections that are no longer there
1451 comm -13 "$_my_connections" "$_my_tickles" |
1452 while read _src _dst ; do
1453 ctdb deltickle $_src $_dst
1456 rm -f "$_my_connections" "$_my_tickles"
1459 ########################################################
1460 # load a site local config file
1461 ########################################################
1463 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1467 [ -x $CTDB_BASE/rc.local ] && {
1468 . $CTDB_BASE/rc.local
1471 [ -d $CTDB_BASE/rc.local.d ] && {
1472 for i in $CTDB_BASE/rc.local.d/* ; do
1473 [ -x "$i" ] && . "$i"
1477 script_name="${0##*/}" # basename
1478 service_name="$script_name" # default is just the script name
1479 service_fail_limit=1