3 # Run commands on CTDB nodes.
5 # See http://ctdb.samba.org/ for more information about CTDB.
7 # Copyright (C) Martin Schwenke 2008
9 # Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.
11 # Copyright (C) Andrew Tridgell 2007
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 3 of the License, or
16 # (at your option) any later version.
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, see <http://www.gnu.org/licenses/>.
31 Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
33 -c Run in current working directory on specified nodes.
34 -o <prefix> Save standard output from each node to file <prefix>.<ip>
35 -p Run command in parallel on specified nodes.
36 -q Do not print node addresses (overrides -v).
37 -n Allow nodes to be specified by name.
38 -f Specify nodes file, overrides CTDB_NODES_FILE.
39 -v Print node address even for a single node.
40 <NODES> "all", "any", "ok" (or "healthy"), "con" (or "connected"),
41 "rm" (or "recmaster"), "lvs" (or "lvsmaster"),
42 "natgw" (or "natgwlist"); or
43 a node number (0 base); or
44 a hostname (if -n is specified); or
45 list (comma separated) of <NODES>; or
46 range (hyphen separated) of node numbers.
54 echo "Invalid <nodespec>" >&2 ; echo >&2
66 ctdb_base="${CTDB_BASE:-/etc/ctdb}"
70 # $POSIXLY_CORRECT means that the command passed to onnode can
71 # take options and getopt won't reorder things to make them
74 # Not on the previous line - local returns 0!
75 temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cf:hno:pqv" -l help -- "$@")
83 -c) current=true ; shift ;;
84 -f) CTDB_NODES_FILE="$2" ; shift 2 ;;
85 -n) names_ok=true ; shift ;;
86 -o) prefix="$2" ; shift 2 ;;
87 -p) parallel=true ; shift ;;
88 -q) quiet=true ; shift ;;
89 -v) verbose=true ; shift ;;
91 -h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
108 if [ -n "$node" -a "$node" != "#DEAD" ] ; then
111 echo "${prog}: \"node ${n}\" does not exist" >&2
118 # Subshell avoids hacks to restore $IFS.
123 *-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
124 # Separate lines for readability.
125 all|any|ok|healthy|con|connected) echo "$i" ;;
126 rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
128 [ $i -gt -1 ] 2>/dev/null || $names_ok || invalid_nodespec
135 ctdb_status_output="" # cache
136 get_nodes_with_status ()
141 if [ -z "$ctdb_status_output" ] ; then
142 ctdb_status_output=$(ctdb -Y status 2>&1)
143 if [ $? -ne 0 ] ; then
144 echo "${prog}: unable to get status of CTDB nodes" >&2
145 echo "$ctdb_status_output" >&2
150 ctdb_status_output="${ctdb_status_output#*${nl}}"
156 while IFS="" read i ; do
158 set -- $i # split line on colons
159 shift # line starts with : so 1st field is empty
160 local pnn="$1" ; shift
161 local ip="$1" ; shift
165 # If any bit is not 0, don't match this address.
168 [ "$s" = "0" ] || continue 2
172 # If disconnected bit is not 0, don't match this address.
173 [ "$1" = "0" ] || continue
179 echo_nth "$pnn" $all_nodes
180 done <<<"$ctdb_status_output"
184 ctdb_props="" # cache
185 get_node_with_property ()
191 if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
193 prop_node=$(ctdb "$prop" -Y 2>/dev/null)
194 if [ $? -eq 0 ] ; then
195 if [ "$prop" = "natgwlist" ] ; then
196 prop_node="${prop_node%% *}" # 1st word
197 if [ "$prop_node" = "-1" ] ; then
198 # This works around natgwlist returning 0 even
199 # when there's no natgw.
203 # We only want the first line.
206 prop_node="${prop_node%%${nl}*}"
212 if [ -n "$prop_node" ] ; then
214 ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
218 prop_node="${ctdb_props##:${prop}:}"
219 prop_node="${prop_node%% *}"
222 if [ -n "$prop_node" ] ; then
223 echo_nth "$prop_node" $all_nodes
225 echo "${prog}: No ${prop} available" >&2
230 get_any_available_node ()
234 # We do a recursive onnode to find which nodes are up and running.
235 local out=$($0 -pq all ctdb pnn 2>&1)
238 local pnn="${line#PNN:}"
239 if [ "$pnn" != "$line" ] ; then
240 echo_nth "$pnn" $all_nodes
243 # Else must be an error message from a down node.
252 if [ -n "$CTDB_NODES_SOCKETS" ] ; then
253 all_nodes="$CTDB_NODES_SOCKETS"
255 local f="${ctdb_base}/nodes"
256 if [ -n "$CTDB_NODES_FILE" ] ; then
258 if [ ! -e "$f" -a "${f#/}" = "$f" ] ; then
259 # $f is relative, try in $ctdb_base
260 f="${ctdb_base}/${f}"
264 if [ ! -r "$f" ] ; then
265 echo "${prog}: unable to open nodes file \"${f}\"" >&2
269 all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' "$f")
274 for n in $(parse_nodespec "$1") ; do
275 [ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
278 echo "${all_nodes//#DEAD/}"
281 get_any_available_node "$all_nodes" || exit 1
284 get_nodes_with_status "$all_nodes" "healthy" || exit 1
287 get_nodes_with_status "$all_nodes" "connected" || exit 1
290 get_node_with_property "$all_nodes" "recmaster" || exit 1
293 get_node_with_property "$all_nodes" "lvsmaster" || exit 1
296 get_node_with_property "$all_nodes" "natgwlist" || exit 1
298 [0-9]|[0-9][0-9]|[0-9][0-9][0-9])
299 echo_nth $n $all_nodes
302 $names_ok || invalid_nodespec
310 CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
315 if [ -n "$prefix" ] ; then
316 cat >"${prefix}.${n//\//_}"
317 elif $verbose && $parallel ; then
326 if $verbose && $parallel ; then
333 ######################################################################
337 $current && command="cd $PWD && $command"
340 if [ -n "$CTDB_NODES_SOCKETS" ] ; then
344 # Could "2>/dev/null || true" but want to see errors from typos in file.
345 [ -r "${ctdb_base}/onnode.conf" ] && . "${ctdb_base}/onnode.conf"
346 [ -n "$SSH" ] || SSH=ssh
347 if [ "$SSH" = "ssh" ] ; then
350 : # rsh? All bets are off!
354 ######################################################################
356 nodes=$(get_nodes "$nodespec")
357 [ $? != 0 ] && exit 1 # Required to catch exit in above subshell.
362 # If $nodes contains a space or a newline then assume multiple nodes.
365 [ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
369 trap 'kill -TERM $pids 2>/dev/null' INT TERM
370 # There's a small race here where the kill can fail if no processes
371 # have been added to $pids and the script is interrupted. However,
372 # the part of the window where it matter is very small.
375 set -o pipefail 2>/dev/null
377 { exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
381 echo >&2 ; echo ">> NODE: $n <<" >&2
384 { exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
385 [ $? = 0 ] || retcode=$?
392 [ $? = 0 ] || retcode=$?