}
-nodes=$(ctdb listnodes -Y | cut -d: -f2)
+nodes=$(ctdb listnodes -X | cut -d'|' -f2)
+bad_nodes=""
diff_opts=
no_ads=false
{
temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
- [ $? != 0 ] && usage
+ # No! Checking the exit code afterwards is actually clearer...
+ # shellcheck disable=SC2181
+ [ $? -eq 0 ] || usage
eval set -- "$temp"
parse_options "$@"
-nodes_comma=$(echo $nodes | sed -e 's@[[:space:]]@,@g')
+# Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
+case "$EXTRA_SSH_OPTS" in
+ *ConnectTimeout=*) : ;;
+ *)
+ export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
+esac
+
+# Filter nodes. Remove any nodes we can't contact from $node and add
+# them to $bad_nodes.
+_nodes=""
+for _i in $nodes ; do
+ if onnode "$_i" true >/dev/null 2>&1 ; then
+ _nodes="${_nodes}${_nodes:+ }${_i}"
+ else
+ bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
+ fi
+done
+nodes="$_nodes"
+
+nodes_comma=$(echo "$nodes" | sed -e 's@[[:space:]]@,@g')
PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
# list of config files that must exist and that we check are the same
# on the nodes
-CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+if [ -d /etc/sysconfig ] ; then
+ CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+else
+ CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+fi
# list of config files that may exist and should be checked that they
# are the same on the nodes
-CONFIG_FILES_MAY="/etc/ctdb/public_addresses /etc/ctdb/static-routes"
+CONFIG_FILES_MAY="/usr/local/etc/ctdb/public_addresses /usr/local/etc/ctdb/static-routes"
-2>&1
+exec 2>&1
cat <<EOF
--------------------------------------------------------------------
error() {
msg="$1"
echo "ERROR: $msg"
- NUM_ERRORS=`expr $NUM_ERRORS + 1`
- echo " ERROR[$NUM_ERRORS]: $msg" >> $ERRORS
+ NUM_ERRORS=$((NUM_ERRORS + 1))
+ echo " ERROR[$NUM_ERRORS]: $msg" >> "$ERRORS"
}
show_file() {
fname="$1"
+ _fdetails=$(ls -l "$fname" 2>&1)
echo " ================================"
echo " File: $fname"
- echo " `ls -l $fname 2>&1`"
- cat "$fname" 2>&1 | sed 's/^/ /'
+ echo " $_fdetails"
+ sed 's/^/ /' "$fname" 2>&1
echo " ================================"
}
show_all() {
echo "running $1 on nodes $nodes_comma"
- onnode $nodes_comma "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
+ onnode "$nodes_comma" "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
}
show_and_compare_files () {
fmt="$1" ; shift
for f ; do
-
+ _bf=$(basename "$f")
first=true
for n in $nodes ; do
if $first ; then
- onnode $n [ -r "$f" ] || {
- msg=$(printf "$fmt" "$f" $n)
+ onnode "$n" [ -r "$f" ] || {
+ # This function takes a format string
+ # shellcheck disable=SC2059
+ msg=$(printf "$fmt" "$f" "$n")
error "$msg"
continue 2;
}
- fstf=/tmp/`basename $f`.node$n
- onnode $n cat $f > $fstf 2>&1
+ fstf="${tmpdir}/${_bf}.node${n}"
+ onnode "$n" cat "$f" >"$fstf" 2>&1
+ _fdetails=$(onnode "$n" ls -l "$f" 2>&1)
echo " ================================"
echo " File (on node $n): $f"
- echo " `onnode $n ls -l $f 2>&1`"
- cat "$fstf" | sed 's/^/ /'
+ echo " $_fdetails"
+ sed 's/^/ /' "$fstf"
echo " ================================"
first=false
else
echo "Testing for same config file $f on node $n"
- tmpf=/tmp/`basename $f`.node$n
- onnode $n cat $f > $tmpf 2>&1
- diff $diff_opts $fstf $tmpf >/dev/null 2>&1 || {
+ tmpf="${tmpdir}/${_bf}.node${n}"
+ onnode "$n" cat "$f" >"$tmpf" 2>&1
+ # Intentional multi-word splitting on diff_opts
+ # shellcheck disable=SC2086
+ diff $diff_opts "$fstf" "$tmpf" >/dev/null 2>&1 || {
error "File $f is different on node $n"
- diff -u $diff_opts $fstf $tmpf
+ diff -u $diff_opts "$fstf" "$tmpf"
}
- rm -f $tmpf
+ rm -f "$tmpf"
fi
done
- rm -f $fstf
+ rm -f "$fstf"
done
}
-ERRORS="/tmp/diag_err.$$"
+if ! tmpdir=$(mktemp -d) ; then
+ echo "Unable to create a temporary directory"
+ exit 1
+fi
+ERRORS="${tmpdir}/diag_err"
NUM_ERRORS=0
cat <<EOF
Diagnosis started on these nodes:
$nodes_comma
+EOF
+
+if [ -n "$bad_nodes" ] ; then
+ cat <<EOF
+
+NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
+$bad_nodes
+EOF
+
+fi
+
+cat <<EOF
For reference, here is the nodes file on the current node...
EOF
-show_file /etc/ctdb/nodes
+show_file /usr/local/etc/ctdb/nodes
cat <<EOF
--------------------------------------------------------------------
Comping critical config files on nodes $nodes_comma
EOF
+# Intentional multi-word splitting on CONFIG_FILES_MUST
+# shellcheck disable=SC2086
show_and_compare_files \
"%s is missing on node %d" \
$CONFIG_FILES_MUST
+# Intentional multi-word splitting on CONFIG_FILES_MAY
+# shellcheck disable=SC2086
show_and_compare_files \
"Optional file %s is not present on node %d" \
$CONFIG_FILES_MAY
--------------------------------------------------------------------
Checking for clock drift
EOF
-t=`date +%s`
+t=$(date +%s)
for i in $nodes; do
- t2=`onnode $i date +%s`
- d=`expr $t2 - $t`
- if [ $d -gt 30 -o $d -lt -30 ]; then
+ t2=$(onnode "$i" date +%s)
+ d=$((t2 - t))
+ if [ "$d" -gt 30 ] || [ "$d" -lt -30 ]; then
error "time on node $i differs by $d seconds"
fi
done
EOF
show_all "uname -a"
[ -x /bin/rpm ] && {
- show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
+ show_all "rpm -qa | grep -E 'samba|ctdb|gpfs'"
}
[ -x /usr/bin/dpkg-query ] && {
show_all "/usr/bin/dpkg-query --show 'ctdb'"
show_all "ctdb status; ctdb ip"
show_all "ctdb statistics"
show_all "ctdb uptime"
+show_all "ctdb listvars"
+show_all "ctdb getdbmap"
+show_all "ctdb -X getdbmap | awk -F'|' 'NR > 1 {print \$3}' | sort | xargs -n 1 ctdb dbstatistics"
echo "Showing log.ctdb"
-show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
-
-echo "Showing log.ctdb"
-show_all "test -f /var/log/log.ctdb && tail -100 /var/log/log.ctdb"
+show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb"
show_all "tail -200 /var/log/messages"
-show_all "tail -200 /etc/ctdb/state/vacuum.log"
-show_all "ls -lRs /var/ctdb"
-show_all "ls -lRs /etc/ctdb"
+show_all "ls -lRs /usr/local/var/lib/ctdb"
+show_all "ls -lRs /usr/local/etc/ctdb"
cat <<EOF
show_all "cat /proc/scsi/scsi"
show_all "/sbin/ifconfig -a"
show_all "/sbin/ifconfig -a"
+show_all "cat /proc/net/dev"
show_all "/sbin/ip addr list"
show_all "/sbin/route -n"
-show_all "netstat -s"
+show_all "ss -s"
show_all "free"
show_all "crontab -l"
show_all "sysctl -a"
done
}
-[ -d /usr/lpp/mmfs ] && {
-cat <<EOF
---------------------------------------------------------------------
-Showing GPFS status and recent log entries
-EOF
- show_all "tail -100 /var/adm/ras/mmfs.log.latest"
- show_all "/usr/lpp/mmfs/bin/mmlsconfig"
- show_all "/usr/lpp/mmfs/bin/mmlsfs all"
- show_all "/usr/lpp/mmfs/bin/mmlsnsd"
- show_all "/usr/lpp/mmfs/bin/mmlsnsd -X"
- show_all "/usr/lpp/mmfs/bin/mmfsadm dump version"
- show_all "/usr/lpp/mmfs/bin/mmfsadm dump waiters"
- show_all "/usr/lpp/mmfs/bin/mmlsmount all"
- show_all "/usr/lpp/mmfs/bin/mmlsquota"
- show_all "/usr/lpp/mmfs/bin/mmlscluster"
- show_all "/usr/lpp/mmfs/bin/mmlsmgr"
- devlist=`mmlsfs all|grep ^File.system.attributes | cut -d/ -f3 | cut -d: -f1`
- for d in $devlist; do
- show_all "mmdf $d"
- show_all "mmlsdisk $d"
- show_all "mmlsfileset $d"
- show_all "mmlspolicy $d"
- show_all "mmlssnapshot $d"
- done
- fslist=`mount|grep type.gpfs|awk '{print $1}'`
- for fs in $fslist; do
- show_all "/usr/lpp/mmfs/bin/mmlssnapshot $fs"
- show_all "/usr/lpp/mmfs/bin/mmlsdisk $fs"
- show_all "/usr/lpp/mmfs/bin/mmlsfileset $fs"
- done
-}
-
cat <<EOF
--------------------------------------------------------------------
Showing Samba status
fi
show_all "date"
show_all "smbclient -U% -L 127.0.0.1"
-WORKGROUP=`testparm -s --parameter-name=WORKGROUP 2> /dev/null`
+WORKGROUP=$(testparm -s --parameter-name=WORKGROUP 2> /dev/null)
show_all id "$WORKGROUP/Administrator"
show_all "wbinfo -p"
show_all "wbinfo --online-status"
date
echo "Diagnostics finished with $NUM_ERRORS errors"
-[ -r $ERRORS ] && {
- cat $ERRORS
- rm -f $ERRORS
+[ -r "$ERRORS" ] && {
+ cat "$ERRORS"
+ rm -f "$ERRORS"
}
+
+rm -rf "$tmpdir"
+
exit $NUM_ERRORS