1 # Hey Emacs, this is a -*- shell-script -*- !!!
3 # utility functions for ctdb event scripts
5 PATH=/bin:/usr/bin:/usr/sbin:/sbin:$PATH
7 [ -z "$CTDB_VARDIR" ] && {
8 export CTDB_VARDIR="/var/ctdb"
10 [ -z "$CTDB_ETCDIR" ] && {
11 export CTDB_ETCDIR="/etc"
14 #######################################
15 # pull in a system config file, if any
19 foo="${service_config:-${service_name}}"
20 if [ -n "$foo" ] ; then
23 elif [ "$1" != "ctdb" ] ; then
27 if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then
28 . $CTDB_ETCDIR/sysconfig/$1
29 elif [ -f $CTDB_ETCDIR/default/$1 ]; then
30 . $CTDB_ETCDIR/default/$1
31 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then
32 . $CTDB_BASE/sysconfig/$1
40 ##############################################################
41 # make sure CTDB_CURRENT_DEBUGLEVEL is set to the desired debug level
44 # If it is already set then do nothing, since it might have been set
45 # via a file in rc.local.d/. If it is not set then set it by sourcing
46 # /var/ctdb/eventscript_debuglevel. If this file does not exist then
47 # create it using output from "ctdb getdebug". If the option 1st arg
48 # is "create" then don't source an existing file but create a new one
49 # instead - this is useful for creating the file just once in each
50 # event run in 00.ctdb. If there's a problem getting the debug level
51 # from ctdb then it is silently set to 0 - no use spamming logs if our
52 # debug code is broken...
53 ctdb_set_current_debuglevel ()
55 [ -z "$CTDB_CURRENT_DEBUGLEVEL" ] || return 0
57 _f="$CTDB_VARDIR/eventscript_debuglevel"
59 if [ "$1" = "create" -o ! -r "$_f" ] ; then
60 _t=$(ctdb getdebug -Y 2>/dev/null)
61 # get last field of output
65 echo "export CTDB_CURRENT_DEBUGLEVEL=\"${_t:-0}\"" >"$_f"
73 if [ $CTDB_CURRENT_DEBUGLEVEL -ge 4 ] ; then
74 # If there are arguments then echo them. Otherwise expect to
75 # use stdin, which allows us to pass lots of debug using a
85 ##############################################################
86 # check number of args for different events
92 echo "ERROR: must supply interface, IP and maskbits"
98 echo "ERROR: must supply old interface, new interface, IP and maskbits"
105 ##############################################################
106 # determine on what type of system (init style) we are running
107 detect_init_style() {
108 # only do detection if not already set:
109 test "x$CTDB_INIT_STYLE" != "x" && return
111 if [ -x /sbin/startproc ]; then
112 CTDB_INIT_STYLE="suse"
113 elif [ -x /sbin/start-stop-daemon ]; then
114 CTDB_INIT_STYLE="debian"
116 CTDB_INIT_STYLE="redhat"
120 ######################################################
121 # simulate /sbin/service on platforms that don't have it
122 # _service() makes it easier to hook the service() function for
129 # do nothing, when no service was specified
130 [ -z "$_service_name" ] && return
132 if [ -x /sbin/service ]; then
133 $_nice /sbin/service "$_service_name" "$_op"
134 elif [ -x $CTDB_ETCDIR/init.d/$_service_name ]; then
135 $_nice $CTDB_ETCDIR/init.d/$_service_name "$_op"
136 elif [ -x $CTDB_ETCDIR/rc.d/init.d/$_service_name ]; then
137 $_nice $CTDB_ETCDIR/rc.d/init.d/$_service_name "$_op"
147 ######################################################
148 # simulate /sbin/service (niced) on platforms that don't have it
155 ######################################################
156 # wrapper around /proc/ settings to allow them to be hooked
158 # 1st arg is relative path under /proc/, 2nd arg is value to set
161 echo "$2" >"/proc/$1"
164 ######################################################
165 # wrapper around getting file contents from /proc/ to allow
166 # this to be hooked for testing
167 # 1st arg is relative path under /proc/
173 ######################################################
174 # Check that an RPC service is healthy -
175 # this includes allowing a certain number of failures
176 # before marking the NFS service unhealthy.
178 # usage: nfs_check_rpc_service SERVICE_NAME [ triple ...]
180 # each triple is a set of 3 arguments: an operator, a
181 # fail count limit and an action string.
185 # nfs_check_rpc_service "lockd" \
186 # -ge 15 "verbose restart unhealthy" \
187 # -eq 10 "restart:bs"
189 # says that if lockd is down for 15 iterations then do
190 # a verbose restart of lockd and mark the node unhealthy.
191 # Before this, after 10 iterations of failure, the
192 # service is restarted silently in the background.
193 # Order is important: the number of failures need to be
194 # specified in reverse order because processing stops
195 # after the first condition that is true.
196 ######################################################
197 nfs_check_rpc_service ()
199 _prog_name="$1" ; shift
202 _rpc_prog="$_prog_name"
205 case "$_prog_name" in
209 _restart="echo 'Trying to restart NFS service'"
210 _restart="${_restart}; startstop_nfs restart"
213 _opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
216 _opts="${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
221 _restart="echo 'Trying to restart lock manager service'"
222 _restart="${_restart}; startstop_nfslock restart"
226 _opts="${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
227 _opts="${_opts}${STATD_PORT:+ -p }${STATD_PORT}"
228 _opts="${_opts}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
231 echo "Internal error: unknown RPC program \"$_prog_name\"."
235 _service_name="nfs_${_prog_name}"
237 if ctdb_check_rpc "$_rpc_prog" $_version >/dev/null ; then
238 ctdb_counter_init "$_service_name"
242 ctdb_counter_incr "$_service_name"
244 while [ -n "$3" ] ; do
245 ctdb_check_counter "quiet" "$1" "$2" "$_service_name" || {
246 for _action in $3 ; do
249 echo "$ctdb_check_rpc_out"
252 # No explicit command specified, construct rpc command.
253 if [ -z "$_restart" ] ; then
254 _p="rpc.${_prog_name}"
255 _restart="echo 'Trying to restart $_prog_name [${_p}${_opts}]'"
256 _restart="${_restart}; killall -q -9 $_p"
257 _restart="${_restart}; $_p $_opts"
260 # Process restart flags...
261 _flags="${_action#restart:}"
262 # There may not have been a colon...
263 [ "$_flags" != "$_action" ] || _flags=""
264 # q=quiet - everything to /dev/null
265 if [ "${_flags#*q}" != "$_flags" ] ; then
266 _restart="{ ${_restart} ; } >/dev/null 2>&1"
268 # s=stealthy - last command to /dev/null
269 if [ "${_flags#*s}" != "$_flags" ] ; then
270 _restart="${_restart} >/dev/null 2>&1"
272 # b=background - the whole thing, easy and reliable
273 if [ "${_flags#*b}" != "$_flags" ] ; then
274 _restart="{ ${_restart} ; } &"
284 echo "Internal error: unknown action \"$_action\"."
289 # Only process the first action group.
296 ######################################################
297 # check that a rpc server is registered with portmap
298 # and responding to requests
299 # usage: ctdb_check_rpc SERVICE_NAME VERSION
300 ######################################################
306 if ! ctdb_check_rpc_out=$(rpcinfo -u localhost $progname $version 2>&1) ; then
307 ctdb_check_rpc_out="ERROR: $progname failed RPC check:
309 echo "$ctdb_check_rpc_out"
314 ######################################################
315 # check a set of directories is available
316 # return 1 on a missing directory
317 # usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
318 ######################################################
319 ctdb_check_directories_probe() {
320 while IFS="" read d ; do
326 [ -d "${d}/." ] || return 1
331 ######################################################
332 # check a set of directories is available
333 # usage: ctdb_check_directories SERVICE_NAME <directories...>
334 ######################################################
335 ctdb_check_directories() {
336 n="${1:-${service_name}}"
337 ctdb_check_directories_probe || {
338 echo "ERROR: $n directory \"$d\" not available"
343 ######################################################
344 # check a set of tcp ports
345 # usage: ctdb_check_tcp_ports <ports...>
346 ######################################################
348 # This flag file is created when a service is initially started. It
349 # is deleted the first time TCP port checks for that service succeed.
350 # Until then ctdb_check_tcp_ports() prints a more subtle "error"
351 # message if a port check fails.
352 _ctdb_check_tcp_common ()
354 _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started"
357 ctdb_check_tcp_init ()
359 _ctdb_check_tcp_common
360 mkdir -p "${_ctdb_service_started_file%/*}" # dirname
361 touch "$_ctdb_service_started_file"
364 ctdb_check_tcp_ports()
366 if [ -z "$1" ] ; then
367 echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified"
371 # Set default value for CTDB_TCP_PORT_CHECKS if unset.
372 # If any of these defaults are unsupported then this variable can
373 # be overridden in /etc/sysconfig/ctdb or via a file in
374 # /etc/ctdb/rc.local.d/.
375 : ${CTDB_TCP_PORT_CHECKERS:=ctdb nmap netstat}
377 for _c in $CTDB_TCP_PORT_CHECKERS ; do
378 ctdb_check_tcp_ports_$_c "$@"
381 _ctdb_check_tcp_common
382 rm -f "$_ctdb_service_started_file"
386 _ctdb_check_tcp_common
387 if [ ! -f "$_ctdb_service_started_file" ] ; then
388 echo "ERROR: $service_name tcp port $_p is not responding"
390 $ctdb_check_tcp_ports_debug
393 echo "INFO: $service_name tcp port $_p is not responding"
400 ctdb_check_ports - checker $_c not implemented
401 output from checker was:
402 $ctdb_check_tcp_ports_debug
410 echo "INTERNAL ERROR: ctdb_check_ports - no working checkers in CTDB_TCP_PORT_CHECKERS=\"$CTDB_TCP_PORT_CHECKERS\""
415 ctdb_check_tcp_ports_netstat ()
417 _cmd='netstat -l -t -n'
419 if [ $? -eq 127 ] ; then
420 # netstat probably not installed - unlikely?
421 ctdb_check_tcp_ports_debug="$_ns"
425 for _p ; do # process each function argument (port)
426 for _a in '0\.0\.0\.0' '::' ; do
427 _pat="[[:space:]]${_a}:${_p}[[:space:]]+[^[:space:]]+[[:space:]]+LISTEN"
428 if echo "$_ns" | grep -E -q "$_pat" ; then
429 # We matched the port, so process next port
434 # We didn't match the port, so flag an error.
435 ctdb_check_tcp_ports_debug="$_cmd shows this output:
443 ctdb_check_tcp_ports_nmap ()
445 # nmap wants a comma-separated list of ports
448 _ports="${_ports}${_ports:+,}${_p}"
451 _cmd="nmap -n -oG - -PS 127.0.0.1 -p $_ports"
453 _nmap_out=$($_cmd 2>&1)
454 if [ $? -eq 127 ] ; then
455 # nmap probably not installed
456 ctdb_check_tcp_ports_debug="$_nmap_out"
460 # get the port-related output
461 _port_info=$(echo "$_nmap_out" | sed -n -r -e 's@^.*Ports:[[:space:]]@@p')
464 # looking for something like this:
465 # 445/open/tcp//microsoft-ds///
466 # possibly followed by a comma
468 case "$_port_info" in
469 # The info we're after must be either at the beginning of
470 # the string or it must follow a space.
473 # Nope, flag an error...
474 ctdb_check_tcp_ports_debug="$_cmd shows this output:
483 # Use the new "ctdb checktcpport" command to check the port.
484 # This is very cheap.
485 ctdb_check_tcp_ports_ctdb ()
487 for _p ; do # process each function argument (port)
488 _cmd="ctdb checktcpport $_p"
493 ctdb_check_tcp_ports_debug="\"$_cmd\" was able to bind to port"
497 # Couldn't bind, something already listening, next port...
501 ctdb_check_tcp_ports_debug="$_cmd (exited with $_ret) with output:
503 # assume not implemented
511 ######################################################
512 # check a unix socket
513 # usage: ctdb_check_unix_socket SERVICE_NAME <socket_path>
514 ######################################################
515 ctdb_check_unix_socket() {
517 [ -z "$socket_path" ] && return
519 if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then
520 echo "ERROR: $service_name socket $socket_path not found"
525 ######################################################
526 # check a command returns zero status
527 # usage: ctdb_check_command SERVICE_NAME <command>
528 ######################################################
529 ctdb_check_command() {
532 [ -z "$wait_cmd" ] && return;
533 $wait_cmd > /dev/null 2>&1 || {
534 echo "ERROR: $service_name - $wait_cmd returned error"
539 ################################################
540 # kill off any TCP connections with the given IP
541 ################################################
542 kill_tcp_connections() {
547 connfile="$CTDB_VARDIR/state/connections.$_IP"
548 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
549 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
551 while read dest src; do
552 srcip=`echo $src | sed -e "s/:[^:]*$//"`
553 srcport=`echo $src | sed -e "s/^.*://"`
554 destip=`echo $dest | sed -e "s/:[^:]*$//"`
555 destport=`echo $dest | sed -e "s/^.*://"`
556 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
557 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
559 # we only do one-way killtcp for CIFS
561 # for all others we do 2-way
563 ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
566 _killcount=`expr $_killcount + 1`
570 [ $_failed = 0 ] || {
571 echo "Failed to send killtcp control"
574 [ $_killcount -gt 0 ] || {
578 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
580 _count=`expr $_count + 1`
581 [ $_count -gt 3 ] && {
582 echo "Timed out killing tcp connections for IP $_IP"
586 echo "killed $_killcount TCP connections to released IP $_IP"
589 ##################################################################
590 # kill off the local end for any TCP connections with the given IP
591 ##################################################################
592 kill_tcp_connections_local_only() {
597 connfile="$CTDB_VARDIR/state/connections.$_IP"
598 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
599 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
601 while read dest src; do
602 srcip=`echo $src | sed -e "s/:[^:]*$//"`
603 srcport=`echo $src | sed -e "s/^.*://"`
604 destip=`echo $dest | sed -e "s/:[^:]*$//"`
605 destport=`echo $dest | sed -e "s/^.*://"`
606 echo "Killing TCP connection $srcip:$srcport $destip:$destport"
607 ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
608 _killcount=`expr $_killcount + 1`
612 [ $_failed = 0 ] || {
613 echo "Failed to send killtcp control"
616 [ $_killcount -gt 0 ] || {
620 while netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" > /dev/null; do
622 _count=`expr $_count + 1`
623 [ $_count -gt 3 ] && {
624 echo "Timed out killing tcp connections for IP $_IP"
628 echo "killed $_killcount TCP connections to released IP $_IP"
631 ##################################################################
632 # tickle any TCP connections with the given IP
633 ##################################################################
634 tickle_tcp_connections() {
639 connfile="$CTDB_VARDIR/state/connections.$_IP"
640 netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
641 netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
643 while read dest src; do
644 srcip=`echo $src | sed -e "s/:[^:]*$//"`
645 srcport=`echo $src | sed -e "s/^.*://"`
646 destip=`echo $dest | sed -e "s/:[^:]*$//"`
647 destport=`echo $dest | sed -e "s/^.*://"`
648 echo "Tickle TCP connection $srcip:$srcport $destip:$destport"
649 ctdb tickle $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
650 echo "Tickle TCP connection $destip:$destport $srcip:$srcport"
651 ctdb tickle $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
655 [ $_failed = 0 ] || {
656 echo "Failed to send tickle control"
661 ########################################################
662 # start/stop the nfs service on different platforms
663 ########################################################
666 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
669 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
677 service nfsserver start
680 service nfsserver stop > /dev/null 2>&1
683 set_proc "fs/nfsd/threads" 0
684 service nfsserver stop > /dev/null 2>&1
686 service nfsserver start
693 service nfslock start
701 set_proc "fs/nfsd/threads" 0
702 service nfs stop > /dev/null 2>&1
703 service nfslock stop > /dev/null 2>&1
705 service nfslock start
711 echo "Unknown platform. NFS is not supported with ctdb"
717 ########################################################
718 # start/stop the nfs lockmanager service on different platforms
719 ########################################################
720 startstop_nfslock() {
722 [ -x $CTDB_ETCDIR/init.d/nfsserver ] && {
725 [ -x $CTDB_ETCDIR/init.d/nfslock ] && {
731 # for sles there is no service for lockmanager
732 # so we instead just shutdown/restart nfs
735 service nfsserver start
738 service nfsserver stop > /dev/null 2>&1
741 service nfsserver stop
742 service nfsserver start
749 service nfslock start
752 service nfslock stop > /dev/null 2>&1
756 service nfslock start
761 echo "Unknown platform. NFS locking is not supported with ctdb"
772 local _state_dir="$CTDB_VARDIR/state/interface_modify"
773 local _lockfile="$_state_dir/$_iface.flock"
774 local _readd_base="$_state_dir/$_iface.readd.d"
776 mkdir -p $_state_dir || {
778 echo "Failed to mkdir -p $_state_dir - $ret"
782 test -f $_lockfile || {
786 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh add "$_iface" "$_ip" "$_maskbits" "$_readd_base"
790 delete_ip_from_iface()
795 local _state_dir="$CTDB_VARDIR/state/interface_modify"
796 local _lockfile="$_state_dir/$_iface.flock"
797 local _readd_base="$_state_dir/$_iface.readd.d"
799 mkdir -p $_state_dir || {
801 echo "Failed to mkdir -p $_state_dir - $ret"
805 test -f $_lockfile || {
809 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh delete "$_iface" "$_ip" "$_maskbits" "$_readd_base"
813 setup_iface_ip_readd_script()
818 local _readd_script=$4
819 local _state_dir="$CTDB_VARDIR/state/interface_modify"
820 local _lockfile="$_state_dir/$_iface.flock"
821 local _readd_base="$_state_dir/$_iface.readd.d"
823 mkdir -p $_state_dir || {
825 echo "Failed to mkdir -p $_state_dir - $ret"
829 test -f $_lockfile || {
833 flock --timeout 30 $_lockfile $CTDB_BASE/interface_modify.sh readd_script "$_iface" "$_ip" "$_maskbits" "$_readd_base" "$_readd_script"
837 ########################################################
838 # some simple logic for counting events - per eventscript
839 # usage: ctdb_counter_init
841 # ctdb_check_counter_limit <limit>
842 # ctdb_check_counter_limit succeeds when count >= <limit>
843 ########################################################
844 _ctdb_counter_common () {
845 _service_name="${1:-${service_name}}"
846 _counter_file="$ctdb_fail_dir/$_service_name"
847 mkdir -p "${_counter_file%/*}" # dirname
849 ctdb_counter_init () {
850 _ctdb_counter_common "$1"
854 ctdb_counter_incr () {
855 _ctdb_counter_common "$1"
858 echo -n 1 >> "$_counter_file"
860 ctdb_check_counter_limit () {
863 _limit="${1:-${service_fail_limit}}"
867 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
868 if [ $_size -ge $_limit ] ; then
869 echo "ERROR: more than $_limit consecutive failures for $service_name, marking cluster unhealthy"
871 elif [ $_size -gt 0 -a -z "$_quiet" ] ; then
872 echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet"
875 ctdb_check_counter_equal () {
881 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
882 if [ $_size -eq $_limit ] ; then
887 ctdb_check_counter () {
888 _msg="${1:-error}" # "error" - anything else is silent on fail
889 _op="${2:--ge}" # an integer operator supported by test
890 _limit="${3:-${service_fail_limit}}"
892 _ctdb_counter_common "$1"
895 _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
896 if [ $_size $_op $_limit ] ; then
897 if [ "$_msg" = "error" ] ; then
898 echo "ERROR: $_limit consecutive failures for $_service_name, marking node unhealthy"
906 ########################################################
908 ctdb_status_dir="$CTDB_VARDIR/status"
909 ctdb_fail_dir="$CTDB_VARDIR/failcount"
911 ctdb_setup_service_state_dir ()
913 service_state_dir="$CTDB_VARDIR/state/${1:-${service_name}}"
914 mkdir -p "$service_state_dir" || {
915 echo "Error creating state dir \"$service_state_dir\""
920 ########################################################
921 # Managed status history, for auto-start/stop
923 ctdb_managed_dir="$CTDB_VARDIR/managed_history"
925 _ctdb_managed_common ()
927 _service_name="${1:-${service_name}}"
928 _ctdb_managed_file="$ctdb_managed_dir/$_service_name"
931 ctdb_service_managed ()
933 _ctdb_managed_common "$@"
934 mkdir -p "$ctdb_managed_dir"
935 touch "$_ctdb_managed_file"
938 ctdb_service_unmanaged ()
940 _ctdb_managed_common "$@"
941 rm -f "$_ctdb_managed_file"
944 is_ctdb_previously_managed_service ()
946 _ctdb_managed_common "$@"
947 [ -f "$_ctdb_managed_file" ]
950 ########################################################
951 # Check and set status
955 echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)"
960 if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then
961 log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy"
963 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then
964 log_status_cat "banned" "$ctdb_status_dir/$script_name/banned"
973 d="$ctdb_status_dir/$script_name"
980 for i in "banned" "unhealthy" ; do
987 ##################################################################
988 # Reconfigure a service on demand
990 _ctdb_service_reconfigure_common ()
992 _d="$ctdb_status_dir/${1:-${service_name}}"
994 _ctdb_service_reconfigure_flag="$_d/reconfigure"
997 ctdb_service_needs_reconfigure ()
999 _ctdb_service_reconfigure_common "$@"
1000 [ -e "$_ctdb_service_reconfigure_flag" ]
1003 ctdb_service_set_reconfigure ()
1005 _ctdb_service_reconfigure_common "$@"
1006 >"$_ctdb_service_reconfigure_flag"
1009 ctdb_service_unset_reconfigure ()
1011 _ctdb_service_reconfigure_common "$@"
1012 rm -f "$_ctdb_service_reconfigure_flag"
1015 ctdb_service_reconfigure ()
1017 echo "Reconfiguring service \"$@\"..."
1018 ctdb_service_unset_reconfigure "$@"
1019 service_reconfigure "$@" || return $?
1020 ctdb_counter_init "$@"
1023 # Default service_reconfigure() function.
1024 service_reconfigure ()
1026 service "${1:-$service_name}" restart
1029 ctdb_reconfigure_try_lock ()
1032 _ctdb_service_reconfigure_common "$@"
1033 _lock="${_d}/reconfigure_lock"
1038 # This is overkill but will work if we need to extend this to
1039 # allow certain events to run multiple times in parallel
1040 # (e.g. takeip) and write multiple PIDs to the file.
1042 if [ -n "$_locker_event" ] ; then
1043 while read _pid ; do
1044 if [ -n "$_pid" -a "$_pid" != $$ ] && \
1045 kill -0 "$_pid" 2>/dev/null ; then
1051 printf "%s\n%s\n" "$event_name" $$ >"$_lock"
1056 ctdb_replay_monitor_status ()
1058 echo "Replaying previous status for this script due to reconfigure..."
1059 # Leading colon (':') is missing in some versions...
1060 _out=$(ctdb scriptstatus -Y | grep -E "^:?monitor:${script_name}:")
1061 # Output looks like this:
1062 # :monitor:60.nfs:1:ERROR:1314764004.030861:1314764004.035514:foo bar:
1063 # This is the cheapest way of getting fields in the middle.
1064 set -- $(IFS=":" ; echo $_out)
1067 # The error output field can include colons so we'll try to
1068 # preserve them. The weak checking at the beginning tries to make
1069 # this work for both broken (no leading ':') and fixed output.
1071 _err_out="${_out#*monitor:${script_name}:*:*:*:*:}"
1073 OK) : ;; # Do nothing special.
1075 # Recast this as an error, since we can't exit with the
1076 # correct negative number.
1078 _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}"
1081 # Recast this as an OK, since we can't exit with the
1082 # correct negative number.
1084 _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}"
1086 *) : ;; # Must be ERROR, do nothing special.
1092 ctdb_service_check_reconfigure ()
1094 [ -n "$1" ] || set -- "$service_name"
1096 # We only care about some events in this function. For others we
1098 case "$event_name" in
1099 monitor|ipreallocated|reconfigure) : ;;
1103 if ctdb_reconfigure_try_lock "$@" ; then
1104 # No events covered by this function are running, so proceed
1106 case "$event_name" in
1108 (ctdb_service_reconfigure "$@")
1112 if ctdb_service_needs_reconfigure "$@" ; then
1113 ctdb_service_reconfigure "$@"
1117 if ctdb_service_needs_reconfigure "$@" ; then
1118 ctdb_service_reconfigure "$@"
1119 # Given that the reconfigure might not have
1120 # resulted in the service being stable yet, we
1121 # replay the previous status since that's the best
1122 # information we have.
1123 ctdb_replay_monitor_status
1128 # Somebody else is running an event we don't want to collide
1129 # with. We proceed with caution.
1130 case "$event_name" in
1132 # Tell whoever called us to retry.
1136 # Defer any scheduled reconfigure and just run the
1137 # rest of the ipreallocated event, as per the
1138 # eventscript. There's an assumption here that the
1139 # event doesn't depend on any scheduled reconfigure.
1140 # This is true in the current code.
1144 # There is most likely a reconfigure in progress so
1145 # the service is possibly unstable. As above, we
1146 # defer any scheduled reconfigured. We also replay
1147 # the previous monitor status since that's the best
1148 # information we have.
1149 ctdb_replay_monitor_status
1155 ##################################################################
1156 # Does CTDB manage this service? - and associated auto-start/stop
1158 ctdb_compat_managed_service ()
1160 if [ "$1" = "yes" -a "$2" = "$_service_name" ] ; then
1161 CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2"
1165 is_ctdb_managed_service ()
1167 _service_name="${1:-${service_name}}"
1169 # $t is used just for readability and to allow better accurate
1170 # matching via leading/trailing spaces
1171 t=" $CTDB_MANAGED_SERVICES "
1173 # Return 0 if "<space>$_service_name<space>" appears in $t
1174 if [ "${t#* ${_service_name} }" != "${t}" ] ; then
1178 # If above didn't match then update $CTDB_MANAGED_SERVICES for
1179 # backward compatibility and try again.
1180 ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd"
1181 ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba"
1182 ctdb_compat_managed_service "$CTDB_MANAGES_SCP" "scp"
1183 ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind"
1184 ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd"
1185 ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi"
1186 ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd"
1187 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs"
1188 ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs"
1190 t=" $CTDB_MANAGED_SERVICES "
1192 # Return 0 if "<space>$_service_name<space>" appears in $t
1193 [ "${t#* ${_service_name} }" != "${t}" ]
1196 ctdb_start_stop_service ()
1198 # Do nothing unless configured to...
1199 [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0
1201 _service_name="${1:-${service_name}}"
1203 [ "$event_name" = "monitor" ] || return 0
1205 if is_ctdb_managed_service "$_service_name" ; then
1206 if ! is_ctdb_previously_managed_service "$_service_name" ; then
1207 echo "Starting service \"$_service_name\" - now managed"
1208 ctdb_service_start "$_service_name"
1212 if is_ctdb_previously_managed_service "$_service_name" ; then
1213 echo "Stopping service \"$_service_name\" - no longer managed"
1214 ctdb_service_stop "$_service_name"
1220 ctdb_service_start ()
1222 # The service is marked managed if we've ever tried to start it.
1223 ctdb_service_managed "$@"
1225 # Here we only want $1. If no argument is passed then
1226 # service_start needs to know.
1227 service_start "$@" || return $?
1229 ctdb_counter_init "$@"
1233 ctdb_service_stop ()
1235 ctdb_service_unmanaged "$@"
1239 # Default service_start() and service_stop() functions.
1241 # These may be overridden in an eventscript. When overriding, the
1242 # following convention must be followed. If these functions are
1243 # called with no arguments then they may use internal logic to
1244 # determine whether the service is managed and, therefore, whether
1245 # they should take any action. However, if the service name is
1246 # specified as an argument then an attempt must be made to start or
1247 # stop the service. This is because the auto-start/stop code calls
1248 # them with the service name as an argument.
1251 service "${1:-${service_name}}" start
1256 service "${1:-${service_name}}" stop
1259 ##################################################################
1261 ctdb_standard_event_handler ()
1276 ipv4_host_addr_to_net_addr()
1281 local HOST0=$(echo $HOST | awk -F . '{print $4}')
1282 local HOST1=$(echo $HOST | awk -F . '{print $3}')
1283 local HOST2=$(echo $HOST | awk -F . '{print $2}')
1284 local HOST3=$(echo $HOST | awk -F . '{print $1}')
1286 local HOST_NUM=$(( $HOST0 + $HOST1 * 256 + $HOST2 * (256 ** 2) + $HOST3 * (256 ** 3) ))
1288 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
1290 local NET_NUM=$(( $HOST_NUM & $MASK_NUM))
1292 local NET0=$(( $NET_NUM & 255 ))
1293 local NET1=$(( ($NET_NUM & (255 * 256)) / 256 ))
1294 local NET2=$(( ($NET_NUM & (255 * 256**2)) / 256**2 ))
1295 local NET3=$(( ($NET_NUM & (255 * 256**3)) / 256**3 ))
1297 echo "$NET3.$NET2.$NET1.$NET0"
1300 ipv4_maskbits_to_net_mask()
1304 local MASK_NUM=$(( ( (2**32 - 1) * (2**(32 - $MASKBITS)) ) & (2**32 - 1) ))
1306 local MASK0=$(( $MASK_NUM & 255 ))
1307 local MASK1=$(( ($MASK_NUM & (255 * 256)) / 256 ))
1308 local MASK2=$(( ($MASK_NUM & (255 * 256**2)) / 256**2 ))
1309 local MASK3=$(( ($MASK_NUM & (255 * 256**3)) / 256**3 ))
1311 echo "$MASK3.$MASK2.$MASK1.$MASK0"
1314 ipv4_is_valid_addr()
1319 local N=`echo $ADDR | sed -e 's/[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*//'`
1320 test -n "$N" && fail=1
1322 local ADDR0=$(echo $ADDR | awk -F . '{print $4}')
1323 local ADDR1=$(echo $ADDR | awk -F . '{print $3}')
1324 local ADDR2=$(echo $ADDR | awk -F . '{print $2}')
1325 local ADDR3=$(echo $ADDR | awk -F . '{print $1}')
1327 test "$ADDR0" -gt 255 && fail=1
1328 test "$ADDR1" -gt 255 && fail=1
1329 test "$ADDR2" -gt 255 && fail=1
1330 test "$ADDR3" -gt 255 && fail=1
1332 test x"$fail" != x"0" && {
1333 #echo "IPv4: '$ADDR' is not a valid address"
1340 # iptables doesn't like being re-entered, so flock-wrap it.
1343 flock -w 30 $CTDB_VARDIR/iptables-ctdb.flock /sbin/iptables "$@"
1346 ########################################################
1348 ########################################################
1350 # Temporary directory for tickles.
1351 tickledir="$CTDB_VARDIR/state/tickles"
1352 mkdir -p "$tickledir"
1358 mkdir -p "$tickledir" # Just in case
1361 _pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
1363 # What public IPs do I hold?
1364 _ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
1366 # IPs as a regexp choice
1367 _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
1369 # Record connections to our public IPs in a temporary file
1370 _my_connections="${tickledir}/${_port}.connections"
1371 rm -f "$_my_connections"
1373 awk -v destpat="^${_ipschoice}:${_port}\$" \
1374 '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
1375 sort >"$_my_connections"
1377 # Record our current tickles in a temporary file
1378 _my_tickles="${tickledir}/${_port}.tickles"
1379 rm -f "$_my_tickles"
1380 for _i in $_ips ; do
1381 ctdb -Y gettickles $_i $_port |
1382 awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
1384 sort >"$_my_tickles"
1386 # Add tickles for connections that we haven't already got tickles for
1387 comm -23 "$_my_connections" "$_my_tickles" |
1388 while read _src _dst ; do
1389 ctdb addtickle $_src $_dst
1392 # Remove tickles for connections that are no longer there
1393 comm -13 "$_my_connections" "$_my_tickles" |
1394 while read _src _dst ; do
1395 ctdb deltickle $_src $_dst
1398 rm -f "$_my_connections" "$_my_tickles"
1401 ########################################################
1402 # load a site local config file
1403 ########################################################
1405 [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && {
1409 [ -x $CTDB_BASE/rc.local ] && {
1410 . $CTDB_BASE/rc.local
1413 [ -d $CTDB_BASE/rc.local.d ] && {
1414 for i in $CTDB_BASE/rc.local.d/* ; do
1415 [ -x "$i" ] && . "$i"
1419 # We'll call this here to ensure $CTDB_CURRENT_DEBUGLEVEL is set.
1420 # This gives us a chance to override the debug level using a file in
1421 # $CTDB_BASE/rc.local.d/.
1422 ctdb_set_current_debuglevel
1424 script_name="${0##*/}" # basename
1425 service_name="$script_name" # default is just the script name
1426 service_fail_limit=1