2 # a script to test the basic setup of a CTDB/Samba install
3 # tridge@samba.org September 2007
4 # martin@meltin.net August 2010
9 Usage: ctdb_diagnostics [OPTION] ...
11 -n <nodes> Comma separated list of nodes to operate on
12 -c Ignore comment lines (starting with '#') in file comparisons
13 -w Ignore whitespace in file comparisons
14 --no-ads Do not use commands that assume an Active Directory Server
20 nodes=$(ctdb listnodes -X | cut -d'|' -f2)
27 temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
35 -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
36 -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
37 -w) diff_opts="${diff_opts} -w" ; shift ;;
38 --no-ads) no_ads=true ; shift ;;
49 # Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
50 case "$EXTRA_SSH_OPTS" in
51 *ConnectTimeout=*) : ;;
53 export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
56 # Filter nodes. Remove any nodes we can't contact from $node and add
60 if onnode "$_i" true >/dev/null 2>&1 ; then
61 _nodes="${_nodes}${_nodes:+ }${_i}"
63 bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
68 nodes_comma=$(echo "$nodes" | sed -e 's@[[:space:]]@,@g')
70 PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
72 # list of config files that must exist and that we check are the same
74 if [ -d /etc/sysconfig ] ; then
75 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
77 CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
80 # list of config files that may exist and should be checked that they
81 # are the same on the nodes
82 CONFIG_FILES_MAY="/usr/local/etc/ctdb/public_addresses /usr/local/etc/ctdb/static-routes"
87 --------------------------------------------------------------------
88 ctdb_diagnostics starting. This script will gather information about
89 your ctdb cluster. You should send the output of this script along
90 with any ctdb or clustered Samba bug reports.
91 --------------------------------------------------------------------
99 NUM_ERRORS=$((NUM_ERRORS + 1))
100 echo " ERROR[$NUM_ERRORS]: $msg" >> "$ERRORS"
105 _fdetails=$(ls -l "$fname" 2>&1)
106 echo " ================================"
109 sed 's/^/ /' "$fname" 2>&1
110 echo " ================================"
114 echo "running $1 on nodes $nodes_comma"
115 onnode "$nodes_comma" "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
118 show_and_compare_files () {
129 onnode "$n" [ -r "$f" ] || {
130 # This function takes a format string
131 # shellcheck disable=SC2059
132 msg=$(printf "$fmt" "$f" "$n")
137 fstf="${tmpdir}/${_bf}.node${n}"
138 onnode "$n" cat "$f" >"$fstf" 2>&1
140 _fdetails=$(onnode "$n" ls -l "$f" 2>&1)
141 echo " ================================"
142 echo " File (on node $n): $f"
145 echo " ================================"
148 echo "Testing for same config file $f on node $n"
149 tmpf="${tmpdir}/${_bf}.node${n}"
150 onnode "$n" cat "$f" >"$tmpf" 2>&1
151 # Intentional multi-word splitting on diff_opts
152 # shellcheck disable=SC2086
153 diff $diff_opts "$fstf" "$tmpf" >/dev/null 2>&1 || {
154 error "File $f is different on node $n"
155 diff -u $diff_opts "$fstf" "$tmpf"
165 if ! tmpdir=$(mktemp -d) ; then
166 echo "Unable to create a temporary directory"
169 ERRORS="${tmpdir}/diag_err"
173 Diagnosis started on these nodes:
177 if [ -n "$bad_nodes" ] ; then
180 NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
188 For reference, here is the nodes file on the current node...
191 show_file /usr/local/etc/ctdb/nodes
194 --------------------------------------------------------------------
195 Comping critical config files on nodes $nodes_comma
198 # Intentional multi-word splitting on CONFIG_FILES_MUST
199 # shellcheck disable=SC2086
200 show_and_compare_files \
201 "%s is missing on node %d" \
204 # Intentional multi-word splitting on CONFIG_FILES_MAY
205 # shellcheck disable=SC2086
206 show_and_compare_files \
207 "Optional file %s is not present on node %d" \
211 --------------------------------------------------------------------
212 Checking for clock drift
216 t2=$(onnode "$i" date +%s)
218 if [ "$d" -gt 30 -o "$d" -lt -30 ]; then
219 error "time on node $i differs by $d seconds"
224 --------------------------------------------------------------------
225 Showing software versions
229 show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
231 [ -x /usr/bin/dpkg-query ] && {
232 show_all "/usr/bin/dpkg-query --show 'ctdb'"
233 show_all "/usr/bin/dpkg-query --show 'samba'"
234 #show_all "/usr/bin/dpkg-query --show 'gpfs'"
239 --------------------------------------------------------------------
240 Showing ctdb status and recent log entries
242 show_all "ctdb status; ctdb ip"
243 show_all "ctdb statistics"
244 show_all "ctdb uptime"
245 show_all "ctdb listvars"
246 show_all "ctdb getdbmap"
247 show_all "ctdb -X getdbmap | awk -F'|' 'NR > 1 {print \$3}' | sort | xargs -n 1 ctdb dbstatistics"
249 echo "Showing log.ctdb"
250 show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb"
252 echo "Showing log.ctdb"
253 show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb"
255 show_all "tail -200 /var/log/messages"
256 show_all "ls -lRs /usr/local/var/lib/ctdb"
257 show_all "ls -lRs /usr/local/etc/ctdb"
261 --------------------------------------------------------------------
262 Showing system and process status
270 show_all "/sbin/lspci"
272 show_all "cat /proc/partitions"
273 show_all "cat /proc/cpuinfo"
274 show_all "cat /proc/scsi/scsi"
275 show_all "/sbin/ifconfig -a"
276 show_all "/sbin/ifconfig -a"
277 show_all "/sbin/ip addr list"
278 show_all "/sbin/route -n"
279 show_all "netstat -s"
281 show_all "crontab -l"
283 show_all "iptables -L -n"
284 show_all "iptables -L -n -t nat"
285 show_all "/usr/sbin/rpcinfo -p"
286 show_all "/usr/sbin/showmount -a"
287 show_all "/usr/sbin/showmount -e"
288 show_all "/usr/sbin/nfsstat -v"
289 [ -x /sbin/multipath ] && {
290 show_all "/sbin/multipath -ll"
292 [ -x /sbin/chkconfig ] && {
293 show_all "/sbin/chkconfig --list"
295 [ -x /usr/sbin/getenforce ] && {
296 show_all "/usr/sbin/getenforce"
298 [ -d /proc/net/bonding ] && {
299 for f in /proc/net/bonding/*; do
305 --------------------------------------------------------------------
308 show_all "smbstatus -n -B"
311 echo "Skipping \"net ads testjoin\" as requested"
314 show_all "net ads testjoin"
316 show_all "net conf list"
317 show_all "lsof -n | grep smbd"
318 show_all "lsof -n | grep ctdbd"
319 show_all "netstat -tan"
322 echo "Skipping \"net ads info\" as requested"
325 show_all "net ads info"
328 show_all "smbclient -U% -L 127.0.0.1"
329 WORKGROUP=$(testparm -s --parameter-name=WORKGROUP 2> /dev/null)
330 show_all id "$WORKGROUP/Administrator"
332 show_all "wbinfo --online-status"
336 echo "Diagnostics finished with $NUM_ERRORS errors"
338 [ -r "$ERRORS" ] && {