fuzzing: fix fuzz_stable_sort_r_unstable comparison
[vlendec/samba-autobuild/.git] / ctdb / config / debug_locks.sh
index 8c9b46657faf333ebc5d4a26e3ac48743214664b..6c730ee18023cf947ba5d013d54b6f2b81cec15f 100755 (executable)
@@ -1,68 +1,65 @@
 #!/bin/sh
 
-# This script parses /proc/locks and finds the processes that are holding
-# locks on CTDB databases.  For all those processes the script dumps a
-# stack trace.
+# This script attempts to find processes holding locks on a particular
+# CTDB database and dumps a stack trace for each such processe.
 #
-# This script can be used only if Samba is configured to use fcntl locks
-# rather than mutex locks.
+# There are 2 cases:
+#
+# * Samba is configured to use fcntl locks
+#
+#   In this case /proc/locks is parsed to find potential lock holders
+#
+# * Samba is configured to use POSIX robust mutexes
+#
+#   In this case the helper program tdb_mutex_check is used to find
+#   potential lock holders.
+#
+#   This helper program uses a private glibc struct field, so is
+#   neither portable nor supported.  If this field is not available
+#   then the helper is not built.  Unexpected changes in internal
+#   glibc structures may cause unexpected results, including crashes.
+#   Bug reports for this helper program are not accepted without an
+#   accompanying patch.
 
 [ -n "$CTDB_BASE" ] || \
-    CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD")
+       CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD")
 
 . "${CTDB_BASE}/functions"
 
-# Default fallback location for database directories.
-# These can be overwritten from CTDB configuration
-CTDB_DBDIR="${CTDB_VARDIR}"
-CTDB_DBDIR_PERSISTENT="${CTDB_VARDIR}/persistent"
+if [ $# -ne 4 ] ; then
+       die "usage: $0 <pid> { DB | RECORD } <tdb_path> { FCNTL | MUTEX }"
+fi
 
-loadconfig ctdb
+lock_helper_pid="$1"
+# lock_scope is unused for now
+# shellcheck disable=SC2034
+lock_scope="$2"
+tdb_path="$3"
+lock_type="$4"
 
-(
-    flock -n 9 || exit 1
-
-    echo "===== Start of debug locks PID=$$ ====="
-
-    # Create sed expression to convert inodes to names.
-    # Filenames don't contain dashes and we want basenames
-    # shellcheck disable=SC2035
-    sed_cmd=$(cd "$CTDB_DBDIR" &&
-                 stat -c "s#[0-9a-f]*:[0-9a-f]*:%i #%n #" *.tdb.* 2>/dev/null ;
-             cd "$CTDB_DBDIR_PERSISTENT" &&
-                 stat -c "s#[0-9a-f]*:[0-9a-f]*:%i #%n #" *.tdb.* 2>/dev/null)
-
-    # Parse /proc/locks and extract following information
-    #    pid process_name tdb_name offsets [W]
-    out=$( grep -F "POSIX  ADVISORY  WRITE" /proc/locks |
-    awk '{ if($2 == "->") { print $6, $7, $8, $9, "W" } else { print $5, $6, $7, $8 } }' |
-    while read pid rest ; do
-       pname=$(readlink "/proc/${pid}/exe")
-       echo "$pid $pname $rest"
-    done | sed -e "$sed_cmd" | grep "\.tdb" )
-
-    if [ -n "$out" ]; then
-       # Log information about locks
-       echo "$out"
-
-       # Find processes that are waiting for locks
-       dbs=$(echo "$out" | grep "W$" | awk '{print $3}')
-       all_pids=""
-       for db in $dbs ; do
-           pids=$(echo "$out" | grep -v "W$" | grep "$db" | grep -v ctdbd | awk '{print $1}')
-           all_pids="$all_pids $pids"
-       done
-       # Use word splitting to squash whitespace
-       # shellcheck disable=SC2086
-       pids=$(echo $all_pids | tr " " "\n" | sort -u)
-
-       # For each process waiting, log stack trace
-       for pid in $pids ; do
-           echo "----- Stack trace for PID=$pid -----"
-           # x is intentionally ignored
-           # shellcheck disable=SC2034
-           read x x state x <"/proc/${pid}/stat"
-           if [ "$state" = "D" ] ; then
+# type is at least mentioned in POSIX and more is portable than which(1)
+# shellcheck disable=SC2039
+if ! type gstack >/dev/null 2>&1 ; then
+       gstack ()
+       {
+               _pid="$1"
+
+               gdb -batch --quiet -nx "/proc/${_pid}/exe" "$_pid" \
+                   -ex "thread apply all bt" 2>/dev/null |
+                       grep '^\(#\|Thread \)'
+       }
+fi
+
+# Load/cache database options from configuration file
+ctdb_get_db_options
+
+dump_stack ()
+{
+       _pid="$1"
+
+       echo "----- Stack trace for PID=${_pid} -----"
+       _state=$(ps -p "$_pid" -o state= | cut -c 1)
+       if [ "$_state" = "D" ] ; then
                # Don't run gstack on a process in D state since
                # gstack will hang until the process exits D state.
                # Although it is possible for a process to transition
@@ -73,15 +70,149 @@ loadconfig ctdb
                # deadlock... but it will probably give us someone to
                # blame!
                echo "----- Process in D state, printing kernel stack only"
-               cat "/proc/${pid}/stack"
-           else
-               gstack "$pid"
-               # gcore -o /var/log/core-deadlock-ctdb $pid
-           fi
+               get_proc "${_pid}/stack"
+       else
+               gstack "$_pid"
+       fi
+}
+
+dump_stacks ()
+{
+       _pids="$1"
+
+       # Use word splitting to squash whitespace
+       # shellcheck disable=SC2086
+       _pids=$(echo $_pids | tr ' ' '\n' | sort -u)
+
+       for _pid in $_pids; do
+               dump_stack "$_pid"
        done
-    fi
+}
+
+get_tdb_file_id ()
+{
+       if ! _device_inode=$(stat -c "%d:%i" "$tdb_path" 2>/dev/null) ; then
+               die "Unable to stat \"${tdb_path}\""
+       fi
+       _device="${_device_inode%%:*}"
+       _device_major=$((_device >> 8))
+       _device_minor=$((_device & 0xff))
+       _inode="${_device_inode#*:}"
+       printf '%02x:%02x:%u\n' "$_device_major" "$_device_minor" "$_inode"
+}
+
+debug_via_proc_locks ()
+{
+       # Get file ID to match relevant column in /proc/locks
+       _file_id=$(get_tdb_file_id)
+
+       # Log information from /proc/locks about the waiting process
+       _tdb=$(basename "$tdb_path")
+       _comm=$(ps -p "$lock_helper_pid" -o comm=)
+       _out=$(get_proc "locks" |
+              awk -v pid="$lock_helper_pid" \
+                  -v file_id="$_file_id" \
+                  -v file="$_tdb" \
+                  -v comm="$_comm" \
+                  '$2 == "->" &&
+                   $3 == "POSIX" &&
+                   $4 == "ADVISORY" &&
+                   $5 == "WRITE" &&
+                   $6 == pid &&
+                   $7 == file_id { print $6, comm, file, $8, $9 }')
+       if [ -n "$_out" ] ; then
+               echo "Waiter:"
+               echo "$_out"
+       fi
+
+       # Parse /proc/locks and find process holding locks on $tdb_path
+       # extract following information
+       #    pid process_name tdb_name offsets
+       _out=$(get_proc "locks" |
+              awk -v pid="$lock_helper_pid" \
+                  -v file_id="$_file_id" \
+                  -v file="$_tdb" \
+                  '$2 == "POSIX" &&
+                   $3 == "ADVISORY" &&
+                   $4 == "WRITE" &&
+                   $5 != pid &&
+                   $6 == file_id { print $5, file, $7, $8 }' |
+              while read -r _pid _rest ; do
+                      _pname=$(ps -p "$_pid" -o comm=)
+                      echo "$_pid $_pname $_rest"
+              done)
+
+       if [ -z "$_out" ]; then
+               return
+       fi
+
+       # Log information about locks
+       echo "Lock holders:"
+       echo "$_out"
+
+       _pids=$(echo "$_out" | awk '{ print $1 }')
+
+       lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}"
+}
+
+debug_via_tdb_mutex ()
+{
+       _helper="${CTDB_HELPER_BINDIR}/tdb_mutex_check"
+       if [ ! -x "$_helper" ] ; then
+               # Mutex helper not available - not supported?
+               # Avoid not found error...
+               return
+       fi
+
+       # Helper should always succeed
+       if ! _t=$("$_helper" "$tdb_path") ; then
+               return
+       fi
+
+       _out=$(echo "$_t" | sed -n -e 's#^\[\(.*\)\] pid=\(.*\)#\2 \1#p')
+
+       if [ -z "$_out" ]; then
+               if [ -n "$_t" ] ; then
+                       echo "$_t" | grep -F 'trylock failed'
+               fi
+               return
+       fi
+
+       # Get process names, append $tdb_path
+       _out=$(echo "$_out" |
+              while read -r _pid _rest ; do
+                      _pname=$(ps -p "$_pid" -o comm=)
+                      _tdb=$(basename "$tdb_path")
+                      echo "${_pid} ${_pname} ${_tdb} ${_rest}"
+              done)
+
+       # Log information about locks
+       echo "Lock holders:"
+       echo "$_out"
+
+       # Get PIDs of processes that are holding locks
+       _pids=$(echo "$_out" |
+               awk -v pid="$lock_helper_pid" '$1 != pid {print $1}')
+
+       lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}"
+}
+
+(
+       flock -n 9 || exit 1
+
+       echo "===== Start of debug locks PID=$$ ====="
+
+       lock_holder_pids=""
+
+       debug_via_proc_locks
+
+       if [ "$lock_type" = "MUTEX" ] ; then
+               debug_via_tdb_mutex
+       fi
+
+       dump_stacks "$lock_holder_pids"
 
-    echo "===== End of debug locks PID=$$ ====="
+       echo "===== End of debug locks PID=$$ ====="
 )9>"${CTDB_SCRIPT_VARDIR}/debug_locks.lock" | script_log "ctdbd-lock"
 
 exit 0