# Hey Emacs, this is a -*- shell-script -*- !!! # utility functions for ctdb event scripts [ -z "$CTDB_VARDIR" ] && { if [ -d "/var/lib/ctdb" ] ; then export CTDB_VARDIR="/var/lib/ctdb" else export CTDB_VARDIR="/var/ctdb" fi } [ -z "$CTDB_ETCDIR" ] && { export CTDB_ETCDIR="/etc" } ####################################### # pull in a system config file, if any _loadconfig() { if [ -z "$1" ] ; then foo="${service_config:-${service_name}}" if [ -n "$foo" ] ; then loadconfig "$foo" return fi fi if [ "$1" != "ctdb" ] ; then loadconfig "ctdb" fi if [ -z "$1" ] ; then return fi if [ -f $CTDB_ETCDIR/sysconfig/$1 ]; then . $CTDB_ETCDIR/sysconfig/$1 elif [ -f $CTDB_ETCDIR/default/$1 ]; then . $CTDB_ETCDIR/default/$1 elif [ -f $CTDB_BASE/sysconfig/$1 ]; then . $CTDB_BASE/sysconfig/$1 fi if [ "$1" = "ctdb" ] ; then _config="${CTDB_BASE}/ctdbd.conf" if [ -r "$_config" ] ; then . "$_config" fi fi } loadconfig () { _loadconfig "$@" } ############################################################## # CTDB_SCRIPT_DEBUGLEVEL can be overwritten by setting it in a # configuration file. debug () { if [ ${CTDB_SCRIPT_DEBUGLEVEL:-2} -ge 4 ] ; then # If there are arguments then echo them. Otherwise expect to # use stdin, which allows us to pass lots of debug using a # here document. if [ -n "$1" ] ; then echo "DEBUG: $*" else sed -e 's@^@DEBUG: @' fi else if [ -z "$1" ] ; then cat >/dev/null fi fi } die () { _msg="$1" _rc="${2:-1}" echo "$_msg" exit $_rc } # Log given message or stdin to either syslog or a CTDB log file # $1 is the tag passed to logger if syslog is in use. script_log () { _tag="$1" ; shift case "$CTDB_LOGGING" in file:*|"") if [ -n "$CTDB_LOGGING" ] ; then _file="${CTDB_LOGGING#file:}" else _file="/var/log/log.ctdb" fi { if [ -n "$*" ] ; then echo "$*" else cat fi } >>"$_file" ;; *) # Handle all syslog:* variants here too. There's no tool to do # the lossy things, so just use logger. logger -t "ctdbd: ${_tag}" $* ;; esac } # When things are run in the background in an eventscript then logging # output might get lost. This is the "solution". :-) background_with_logging () { ( "$@" 2>&1 "$_pnn_file" fi read pnn <"$_pnn_file" } ###################################################### # wrapper around /proc/ settings to allow them to be hooked # for testing # 1st arg is relative path under /proc/, 2nd arg is value to set set_proc () { echo "$2" >"/proc/$1" } set_proc_maybe () { if [ -w "/proc/$1" ] ; then set_proc "$1" "$2" fi } ###################################################### # wrapper around getting file contents from /proc/ to allow # this to be hooked for testing # 1st arg is relative path under /proc/ get_proc () { cat "/proc/$1" } ###################################################### # Print up to $_max kernel stack traces for processes named $_program program_stack_traces () { _prog="$1" _max="${2:-1}" _count=1 for _pid in $(pidof "$_prog") ; do [ $_count -le $_max ] || break # Do this first to avoid racing with process exit _stack=$(get_proc "${_pid}/stack" 2>/dev/null) if [ -n "$_stack" ] ; then echo "Stack trace for ${_prog}[${_pid}]:" echo "$_stack" _count=$(($_count + 1)) fi done } ###################################################### # Check the health of NFS services # # Use .check files in given directory. # Default is "${CTDB_BASE}/nfs-checks.d/" ###################################################### nfs_check_services () { _dir="${1:-${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}}" # Files must end with .check - avoids editor backups, RPM fu, ... for _f in "$_dir"/[0-9][0-9].*.check ; do _t="${_f%.check}" _progname="${_t##*/[0-9][0-9].}" nfs_check_service "$_progname" <"$_f" done } ###################################################### # Check the health of an NFS service # # $1 - progname, passed to rpcinfo (looked up in /etc/rpc) # # Reads variables from stdin # # Variables are: # # * family - "tcp" or "udp" or space separated list # default: tcp # * version - optional, RPC service version number # default is to omit to check for any version # * unhealthy_after - number of check fails before unhealthy # default: 1 # * restart_every - number of check fails before restart # default: 0, meaning no restart # * service_stop_cmd - command to stop service # default: no default, must be provided if # restart_every > 0 # * service_start_cmd - command to start service # default: no default, must be provided if # restart_every > 0 # * service_debug_cmd - command to debug a service after trying to stop it; # for example, it can be useful to print stack # traces of threads that have not exited, since # they may be stuck doing I/O; # no default, see also function program_stack_traces() # # Quoting in values is not preserved # ###################################################### nfs_check_service () { _progname="$1" ( # Subshell to restrict scope variables... # Defaults family="tcp" version="" unhealthy_after=1 restart_every=0 service_stop_cmd="" service_start_cmd="" service_debug_cmd="" # Eval line-by-line. Expands variable references in values. # Also allows variable name checking, which seems useful. while read _line ; do case "$_line" in \#*|"") : ;; # Ignore comments, blank lines family=*|version=*|\ unhealthy_after=*|restart_every=*|\ service_stop_cmd=*|service_start_cmd=*|\ service_debug_cmd=*) eval "$_line" ;; *) echo "ERROR: Unknown variable for ${_progname}: ${_line}" exit 1 esac done _service_name="nfs_${_progname}" if nfs_check_rpcinfo \ "$_progname" "$version" "$family" >/dev/null ; then if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then ctdb_counter_init "$_service_name" fi exit 0 fi ctdb_counter_incr "$_service_name" _failcount=$(ctdb_counter_get "$_service_name") _unhealthy=false if [ $unhealthy_after -gt 0 ] ; then if [ $_failcount -ge $unhealthy_after ] ; then _unhealthy=true echo "ERROR: $ctdb_check_rpc_out" fi fi if [ $restart_every -gt 0 ] ; then if [ $(($_failcount % $restart_every)) -eq 0 ] ; then if ! $_unhealthy ; then echo "WARNING: $ctdb_check_rpc_out" fi nfs_restart_service fi fi if $_unhealthy ; then exit 1 fi return 0 ) || exit 1 } # Uses: stop_service, start_service, debug_stuck_threads nfs_restart_service () { if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings" fi echo "Trying to restart service \"${_progname}\"..." # Using eval means variables can contain semicolon separated commands eval "$service_stop_cmd" if [ -n "$service_debug_cmd" ] ; then eval "$service_debug_cmd" fi background_with_logging eval "$service_start_cmd" } ###################################################### # Check an RPC service with rpcinfo ###################################################### ctdb_check_rpc () { _progname="$1" # passed to rpcinfo (looked up in /etc/rpc) _version="$2" # optional, not passed if empty/unset _family="${3:-tcp}" # optional, default is "tcp" _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}" if ! ctdb_check_rpc_out=$(rpcinfo -T $_family $_localhost \ $_progname $_version 2>&1) ; then ctdb_check_rpc_out="$_progname failed RPC check: $ctdb_check_rpc_out" echo "$ctdb_check_rpc_out" return 1 fi } nfs_check_rpcinfo () { _progname="$1" # passed to rpcinfo (looked up in /etc/rpc) _versions="$2" # optional, space separated, not passed if empty/unset _families="${3:-tcp}" # optional, space separated, default is "tcp" for _family in $_families ; do if [ -n "$_versions" ] ; then for _version in $_versions ; do ctdb_check_rpc $_progname $_version $_family || return $? done else ctdb_check_rpc $_progname "" $_family || return $? fi done } ###################################################### # Ensure $service_name is set assert_service_name () { [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set" } ###################################################### # check a set of directories is available # return 1 on a missing directory # directories are read from stdin ###################################################### ctdb_check_directories_probe() { while IFS="" read d ; do case "$d" in *%*) continue ;; *) [ -d "${d}/." ] || return 1 esac done } ###################################################### # check a set of directories is available # directories are read from stdin ###################################################### ctdb_check_directories() { ctdb_check_directories_probe || { echo "ERROR: $service_name directory \"$d\" not available" exit 1 } } ###################################################### # check a set of tcp ports # usage: ctdb_check_tcp_ports ###################################################### # This flag file is created when a service is initially started. It # is deleted the first time TCP port checks for that service succeed. # Until then ctdb_check_tcp_ports() prints a more subtle "error" # message if a port check fails. _ctdb_check_tcp_common () { assert_service_name _ctdb_service_started_file="$ctdb_fail_dir/$service_name.started" } ctdb_check_tcp_init () { _ctdb_check_tcp_common mkdir -p "${_ctdb_service_started_file%/*}" # dirname touch "$_ctdb_service_started_file" } # Check whether something is listening on all of the given TCP ports # using the "ctdb checktcpport" command. ctdb_check_tcp_ports() { if [ -z "$1" ] ; then echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified" exit 1 fi for _p ; do # process each function argument (port) _cmd="ctdb checktcpport $_p" _out=$($_cmd 2>&1) _ret=$? case "$_ret" in 0) _ctdb_check_tcp_common if [ ! -f "$_ctdb_service_started_file" ] ; then echo "ERROR: $service_name tcp port $_p is not responding" debug "\"ctdb checktcpport $_p\" was able to bind to port" else echo "INFO: $service_name tcp port $_p is not responding" fi return 1 ;; 98) # Couldn't bind, something already listening, next port... continue ;; *) echo "ERROR: unexpected error running \"ctdb checktcpport\"" debug < ###################################################### ctdb_check_unix_socket() { socket_path="$1" [ -z "$socket_path" ] && return if ! netstat --unix -a -n | grep -q "^unix.*LISTEN.*${socket_path}$"; then echo "ERROR: $service_name socket $socket_path not found" return 1 fi } ###################################################### # check a command returns zero status # usage: ctdb_check_command ###################################################### ctdb_check_command () { _out=$("$@" 2>&1) || { echo "ERROR: $* returned error" echo "$_out" | debug exit 1 } } ################################################ # kill off any TCP connections with the given IP ################################################ kill_tcp_connections () { _ip="$1" _oneway=false if [ "$2" = "oneway" ] ; then _oneway=true fi get_tcp_connections_for_ip "$_ip" | { _killcount=0 _connections="" _nl=" " while read _dst _src; do _destport="${_dst##*:}" __oneway=$_oneway case $_destport in # we only do one-way killtcp for CIFS 139|445) __oneway=true ;; esac echo "Killing TCP connection $_src $_dst" _connections="${_connections}${_nl}${_src} ${_dst}" if ! $__oneway ; then _connections="${_connections}${_nl}${_dst} ${_src}" fi _killcount=$(($_killcount + 1)) done if [ $_killcount -eq 0 ] ; then return fi echo "$_connections" | ctdb killtcp || { echo "Failed to send killtcp control" return } _count=0 while : ; do _remaining=$(get_tcp_connections_for_ip $_ip | wc -l) if [ $_remaining -eq 0 ] ; then echo "Killed $_killcount TCP connections to released IP $_ip" return fi _count=$(($_count + 1)) if [ $_count -gt 3 ] ; then echo "Timed out killing tcp connections for IP $_ip ($_remaining remaining)" return fi echo "Waiting for $_remaining connections to be killed for IP $_ip" sleep 1 done } } ################################################################## # kill off the local end for any TCP connections with the given IP ################################################################## kill_tcp_connections_local_only () { kill_tcp_connections "$1" "oneway" } ################################################################## # tickle any TCP connections with the given IP ################################################################## tickle_tcp_connections () { _ip="$1" get_tcp_connections_for_ip "$_ip" | { _failed=false while read dest src; do echo "Tickle TCP connection $src $dest" ctdb tickle $src $dest >/dev/null 2>&1 || _failed=true echo "Tickle TCP connection $dest $src" ctdb tickle $dest $src >/dev/null 2>&1 || _failed=true done if $_failed ; then echo "Failed to send tickle control" fi } } get_tcp_connections_for_ip () { _ip="$1" netstat -tn | awk -v ip=$_ip \ 'index($1, "tcp") == 1 && \ (index($4, ip ":") == 1 || index($4, "::ffff:" ip ":") == 1) \ && $6 == "ESTABLISHED" \ {print $4" "$5}' } ################################################################## # use statd-callout to update NFS lock info ################################################################## nfs_update_lock_info () { if [ -x "$CTDB_BASE/statd-callout" ] ; then "$CTDB_BASE/statd-callout" update fi } ######################################################## # start/stop the Ganesha nfs service ######################################################## startstop_ganesha() { _service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE" case "$1" in start) service "$_service_name" start ;; stop) service "$_service_name" stop ;; restart) service "$_service_name" stop nfs_dump_some_threads "rpc.statd" service "$_service_name" start ;; esac } ######################################################## # start/stop the nfs service on different platforms ######################################################## startstop_nfs() { PLATFORM="unknown" [ -x $CTDB_ETCDIR/init.d/nfsserver ] && { PLATFORM="sles" } [ -x $CTDB_ETCDIR/init.d/nfslock -o \ -r /usr/lib/systemd/system/nfs-lock.service ] && { PLATFORM="rhel" } case $PLATFORM in sles) case $1 in start) service nfsserver start ;; stop) service nfsserver stop > /dev/null 2>&1 ;; restart) set_proc "fs/nfsd/threads" 0 service nfsserver stop > /dev/null 2>&1 pkill -9 nfsd nfs_dump_some_threads service nfsserver start ;; restart-stop) set_proc "fs/nfsd/threads" 0 service nfsserver stop > /dev/null 2>&1 pkill -9 nfsd ;; esac ;; rhel) case $1 in start) service nfslock start service nfs start ;; stop) service nfs stop service nfslock stop ;; restart) set_proc "fs/nfsd/threads" 0 service nfs stop > /dev/null 2>&1 service nfslock stop > /dev/null 2>&1 pkill -9 nfsd nfs_dump_some_threads service nfslock start service nfs start ;; restart-stop) set_proc "fs/nfsd/threads" 0 service nfs stop > /dev/null 2>&1 service nfslock stop > /dev/null 2>&1 pkill -9 nfsd ;; esac ;; *) echo "Unknown platform. NFS is not supported with ctdb" exit 1 ;; esac } # Dump up to the configured number of nfsd thread backtraces. nfs_dump_some_threads () { _prog="${1:-nfsd}" _num="${CTDB_NFS_DUMP_STUCK_THREADS:-5}" [ $_num -gt 0 ] || return 0 program_stack_traces "$_prog" $_num } ######################################################## # start/stop the nfs lockmanager service on different platforms ######################################################## startstop_nfslock() { PLATFORM="unknown" [ -x $CTDB_ETCDIR/init.d/nfsserver ] && { PLATFORM="sles" } [ -x $CTDB_ETCDIR/init.d/nfslock -o \ -r /usr/lib/systemd/system/nfs-lock.service ] && { PLATFORM="rhel" } case $PLATFORM in sles) # for sles there is no service for lockmanager # so we instead just shutdown/restart nfs case $1 in start) service nfsserver start ;; stop) service nfsserver stop > /dev/null 2>&1 ;; restart) service nfsserver stop > /dev/null 2>&1 service nfsserver start ;; esac ;; rhel) case $1 in start) service nfslock start ;; stop) service nfslock stop > /dev/null 2>&1 ;; restart) service nfslock stop > /dev/null 2>&1 service nfslock start ;; esac ;; *) echo "Unknown platform. NFS locking is not supported with ctdb" exit 1 ;; esac } ######################################################## add_ip_to_iface () { _iface=$1 _ip=$2 _maskbits=$3 # Ensure interface is up ip link set "$_iface" up || \ die "Failed to bringup interface $_iface" # Only need to define broadcast for IPv4 case "$ip" in *:*) _bcast="" ;; *) _bcast="brd +" ;; esac ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || { echo "Failed to add $_ip/$_maskbits on dev $_iface" return 1 } # Wait 5 seconds for IPv6 addresses to stop being tentative... if [ -z "$_bcast" ] ; then for _x in $(seq 1 10) ; do ip addr show to "${_ip}/128" | grep -q "tentative" || break sleep 0.5 done # If the address was a duplicate then it won't be on the # interface so flag an error. _t=$(ip addr show to "${_ip}/128") case "$_t" in "") echo "Failed to add $_ip/$_maskbits on dev $_iface" return 1 ;; *tentative*|*dadfailed*) echo "Failed to add $_ip/$_maskbits on dev $_iface" ip addr del "$_ip/$_maskbits" dev "$_iface" return 1 ;; esac fi } delete_ip_from_iface() { _iface=$1 _ip=$2 _maskbits=$3 # This could be set globally for all interfaces but it is probably # better to avoid surprises, so limit it the interfaces where CTDB # has public IP addresses. There isn't anywhere else convenient # to do this so just set it each time. This is much cheaper than # remembering and re-adding secondaries. set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1 ip addr del "$_ip/$_maskbits" dev "$_iface" || { echo "Failed to del $_ip on dev $_iface" return 1 } } # If the given IP is hosted then print 2 items: maskbits and iface ip_maskbits_iface () { _addr="$1" case "$_addr" in *:*) _family="inet6" ; _bits=128 ;; *) _family="inet" ; _bits=32 ;; esac ip addr show to "${_addr}/${_bits}" 2>/dev/null | \ awk -v family="${_family}" \ 'NR == 1 { iface = $2; sub(":$", "", iface) ; \ sub("@.*", "", iface) } \ $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); \ print mask, iface, family }' } drop_ip () { _addr="${1%/*}" # Remove optional maskbits set -- $(ip_maskbits_iface $_addr) if [ -n "$1" ] ; then _maskbits="$1" _iface="$2" echo "Removing public address $_addr/$_maskbits from device $_iface" delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1 fi } drop_all_public_ips () { while read _ip _x ; do drop_ip "$_ip" done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}" } flush_route_cache () { set_proc_maybe sys/net/ipv4/route/flush 1 set_proc_maybe sys/net/ipv6/route/flush 1 } ######################################################## # Simple counters _ctdb_counter_common () { _service_name="${1:-${service_name:-${script_name}}}" _counter_file="$ctdb_fail_dir/$_service_name" mkdir -p "${_counter_file%/*}" # dirname } ctdb_counter_init () { _ctdb_counter_common "$1" >"$_counter_file" } ctdb_counter_incr () { _ctdb_counter_common "$1" # unary counting! echo -n 1 >> "$_counter_file" } ctdb_counter_get () { _ctdb_counter_common "$1" # unary counting! stat -c "%s" "$_counter_file" 2>/dev/null || echo 0 } ctdb_check_counter () { _msg="${1:-error}" # "error" - anything else is silent on fail _op="${2:--ge}" # an integer operator supported by test _limit="${3:-${service_fail_limit}}" shift 3 _size=$(ctdb_counter_get "$1") _hit=false if [ "$_op" != "%" ] ; then if [ $_size $_op $_limit ] ; then _hit=true fi else if [ $(($_size $_op $_limit)) -eq 0 ] ; then _hit=true fi fi if $_hit ; then if [ "$_msg" = "error" ] ; then echo "ERROR: $_size consecutive failures for $_service_name, marking node unhealthy" exit 1 else return 1 fi fi } ######################################################## ctdb_status_dir="$CTDB_VARDIR/state/service_status" ctdb_fail_dir="$CTDB_VARDIR/state/failcount" ctdb_setup_service_state_dir () { service_state_dir="$CTDB_VARDIR/state/service_state/${1:-${service_name}}" mkdir -p "$service_state_dir" || { echo "Error creating state dir \"$service_state_dir\"" exit 1 } } ######################################################## # Managed status history, for auto-start/stop ctdb_managed_dir="$CTDB_VARDIR/state/managed_history" _ctdb_managed_common () { _ctdb_managed_file="$ctdb_managed_dir/$service_name" } ctdb_service_managed () { _ctdb_managed_common mkdir -p "$ctdb_managed_dir" touch "$_ctdb_managed_file" } ctdb_service_unmanaged () { _ctdb_managed_common rm -f "$_ctdb_managed_file" } is_ctdb_previously_managed_service () { _ctdb_managed_common [ -f "$_ctdb_managed_file" ] } ######################################################## # Check and set status log_status_cat () { echo "node is \"$1\", \"${script_name}\" reports problem: $(cat $2)" } ctdb_checkstatus () { if [ -r "$ctdb_status_dir/$script_name/unhealthy" ] ; then log_status_cat "unhealthy" "$ctdb_status_dir/$script_name/unhealthy" return 1 elif [ -r "$ctdb_status_dir/$script_name/banned" ] ; then log_status_cat "banned" "$ctdb_status_dir/$script_name/banned" return 2 else return 0 fi } ctdb_setstatus () { d="$ctdb_status_dir/$script_name" case "$1" in unhealthy|banned) mkdir -p "$d" cat "$2" >"$d/$1" ;; *) for i in "banned" "unhealthy" ; do rm -f "$d/$i" done ;; esac } ################################################################## # Reconfigure a service on demand _ctdb_service_reconfigure_common () { _d="$ctdb_status_dir/${service_name}" mkdir -p "$_d" _ctdb_service_reconfigure_flag="$_d/reconfigure" } ctdb_service_needs_reconfigure () { _ctdb_service_reconfigure_common [ -e "$_ctdb_service_reconfigure_flag" ] } ctdb_service_set_reconfigure () { _ctdb_service_reconfigure_common >"$_ctdb_service_reconfigure_flag" } ctdb_service_unset_reconfigure () { _ctdb_service_reconfigure_common rm -f "$_ctdb_service_reconfigure_flag" } ctdb_service_reconfigure () { echo "Reconfiguring service \"${service_name}\"..." ctdb_service_unset_reconfigure service_reconfigure || return $? ctdb_counter_init } # Default service_reconfigure() function does nothing. service_reconfigure () { : } ctdb_reconfigure_take_lock () { _ctdb_service_reconfigure_common _lock="${_d}/reconfigure_lock" mkdir -p "${_lock%/*}" # dirname touch "$_lock" ( flock 0 # This is overkill but will work if we need to extend this to # allow certain events to run multiple times in parallel # (e.g. takeip) and write multiple PIDs to the file. read _locker_event if [ -n "$_locker_event" ] ; then while read _pid ; do if [ -n "$_pid" -a "$_pid" != $$ ] && \ kill -0 "$_pid" 2>/dev/null ; then exit 1 fi done fi printf "%s\n%s\n" "$event_name" $$ >"$_lock" exit 0 ) <"$_lock" } ctdb_reconfigure_release_lock () { _ctdb_service_reconfigure_common _lock="${_d}/reconfigure_lock" rm -f "$_lock" } ctdb_replay_monitor_status () { echo "Replaying previous status for this script due to reconfigure..." # Leading separator ('|') is missing in some versions... _out=$(ctdb scriptstatus -X | grep -E "^\|?monitor\|${script_name}\|") # Output looks like this: # |monitor|60.nfs|1|ERROR|1314764004.030861|1314764004.035514|foo bar| # This is the cheapest way of getting fields in the middle. set -- $(IFS="|" ; echo $_out) _code="$3" _status="$4" # The error output field can include colons so we'll try to # preserve them. The weak checking at the beginning tries to make # this work for both broken (no leading '|') and fixed output. _out="${_out%|}" _err_out="${_out#*monitor|${script_name}|*|*|*|*|}" case "$_status" in OK) : ;; # Do nothing special. TIMEDOUT) # Recast this as an error, since we can't exit with the # correct negative number. _code=1 _err_out="[Replay of TIMEDOUT scriptstatus - note incorrect return code.] ${_err_out}" ;; DISABLED) # Recast this as an OK, since we can't exit with the # correct negative number. _code=0 _err_out="[Replay of DISABLED scriptstatus - note incorrect return code.] ${_err_out}" ;; *) : ;; # Must be ERROR, do nothing special. esac if [ -n "$_err_out" ] ; then echo "$_err_out" fi exit $_code } ctdb_service_check_reconfigure () { assert_service_name # We only care about some events in this function. For others we # return now. case "$event_name" in monitor|ipreallocated|reconfigure) : ;; *) return 0 ;; esac if ctdb_reconfigure_take_lock ; then # No events covered by this function are running, so proceed # with gay abandon. case "$event_name" in reconfigure) (ctdb_service_reconfigure) exit $? ;; ipreallocated) if ctdb_service_needs_reconfigure ; then ctdb_service_reconfigure fi ;; esac ctdb_reconfigure_release_lock else # Somebody else is running an event we don't want to collide # with. We proceed with caution. case "$event_name" in reconfigure) # Tell whoever called us to retry. exit 2 ;; ipreallocated) # Defer any scheduled reconfigure and just run the # rest of the ipreallocated event, as per the # eventscript. There's an assumption here that the # event doesn't depend on any scheduled reconfigure. # This is true in the current code. return 0 ;; monitor) # There is most likely a reconfigure in progress so # the service is possibly unstable. As above, we # defer any scheduled reconfigured. We also replay # the previous monitor status since that's the best # information we have. ctdb_replay_monitor_status ;; esac fi } ################################################################## # Does CTDB manage this service? - and associated auto-start/stop ctdb_compat_managed_service () { if [ "$1" = "yes" -a "$2" = "$service_name" ] ; then CTDB_MANAGED_SERVICES="$CTDB_MANAGED_SERVICES $2" fi } is_ctdb_managed_service () { assert_service_name # $t is used just for readability and to allow better accurate # matching via leading/trailing spaces t=" $CTDB_MANAGED_SERVICES " # Return 0 if "$service_name" appears in $t if [ "${t#* ${service_name} }" != "${t}" ] ; then return 0 fi # If above didn't match then update $CTDB_MANAGED_SERVICES for # backward compatibility and try again. ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD" "vsftpd" ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA" "samba" ctdb_compat_managed_service "$CTDB_MANAGES_WINBIND" "winbind" ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "apache2" ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD" "httpd" ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI" "iscsi" ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD" "clamd" ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs" ctdb_compat_managed_service "$CTDB_MANAGES_NFS" "nfs-ganesha-gpfs" t=" $CTDB_MANAGED_SERVICES " # Return 0 if "$service_name" appears in $t [ "${t#* ${service_name} }" != "${t}" ] } ctdb_start_stop_service () { assert_service_name # Allow service-start/service-stop pseudo-events to start/stop # services when we're not auto-starting/stopping and we're not # monitoring. case "$event_name" in service-start) if is_ctdb_managed_service ; then die 'service-start event not permitted when service is managed' fi if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then die 'service-start event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes' fi ctdb_service_start exit $? ;; service-stop) if is_ctdb_managed_service ; then die 'service-stop event not permitted when service is managed' fi if [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] ; then die 'service-stop event not permitted with $CTDB_SERVICE_AUTOSTARTSTOP = yes' fi ctdb_service_stop exit $? ;; esac # Do nothing unless configured to... [ "$CTDB_SERVICE_AUTOSTARTSTOP" = "yes" ] || return 0 [ "$event_name" = "monitor" ] || return 0 if is_ctdb_managed_service ; then if ! is_ctdb_previously_managed_service ; then echo "Starting service \"$service_name\" - now managed" background_with_logging ctdb_service_start exit $? fi else if is_ctdb_previously_managed_service ; then echo "Stopping service \"$service_name\" - no longer managed" background_with_logging ctdb_service_stop exit $? fi fi } ctdb_service_start () { # The service is marked managed if we've ever tried to start it. ctdb_service_managed service_start || return $? ctdb_counter_init ctdb_check_tcp_init } ctdb_service_stop () { ctdb_service_unmanaged service_stop } # Default service_start() and service_stop() functions. # These may be overridden in an eventscript. service_start () { service "$service_name" start } service_stop () { service "$service_name" stop } ################################################################## ctdb_standard_event_handler () { case "$1" in status) ctdb_checkstatus exit ;; setstatus) shift ctdb_setstatus "$@" exit ;; esac } iptables_wrapper () { _family="$1" ; shift if [ "$_family" = "inet6" ] ; then _iptables_cmd="ip6tables" else _iptables_cmd="iptables" fi # iptables doesn't like being re-entered, so flock-wrap it. flock -w 30 "${CTDB_VARDIR}/iptables-ctdb.flock" "$_iptables_cmd" "$@" } # AIX (and perhaps others?) doesn't have mktemp if ! type mktemp >/dev/null 2>&1 ; then mktemp () { _dir=false if [ "$1" = "-d" ] ; then _dir=true shift fi _d="${TMPDIR:-/tmp}" _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \ md5sum | \ sed -e 's@\(..........\).*@\1@') _t="${_d}/tmp.${_hex10}" ( umask 077 if $_dir ; then mkdir "$_t" else >"$_t" fi ) echo "$_t" } fi ######################################################## # tickle handling ######################################################## update_tickles () { _port="$1" tickledir="$CTDB_VARDIR/state/tickles" mkdir -p "$tickledir" ctdb_get_pnn # What public IPs do I hold? _ips=$(ctdb -X ip | awk -F'|' -v pnn=$pnn '$3 == pnn {print $2}') # IPs as a regexp choice _ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))" # Record connections to our public IPs in a temporary file _my_connections="${tickledir}/${_port}.connections" rm -f "$_my_connections" netstat -tn | awk -v destpat="^${_ipschoice}:${_port}\$" \ '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' | sort >"$_my_connections" # Record our current tickles in a temporary file _my_tickles="${tickledir}/${_port}.tickles" rm -f "$_my_tickles" for _i in $_ips ; do ctdb -X gettickles $_i $_port | awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }' done | sort >"$_my_tickles" # Add tickles for connections that we haven't already got tickles for comm -23 "$_my_connections" "$_my_tickles" | while read _src _dst ; do ctdb addtickle $_src $_dst done # Remove tickles for connections that are no longer there comm -13 "$_my_connections" "$_my_tickles" | while read _src _dst ; do ctdb deltickle $_src $_dst done rm -f "$_my_connections" "$_my_tickles" } ######################################################## # load a site local config file ######################################################## [ -n "$CTDB_RC_LOCAL" -a -x "$CTDB_RC_LOCAL" ] && { . "$CTDB_RC_LOCAL" } [ -x $CTDB_BASE/rc.local ] && { . $CTDB_BASE/rc.local } [ -d $CTDB_BASE/rc.local.d ] && { for i in $CTDB_BASE/rc.local.d/* ; do [ -x "$i" ] && . "$i" done } script_name="${0##*/}" # basename service_fail_limit=1 event_name="$1"