locking: Apply contention tracepoints in the slow path
authorNamhyung Kim <namhyung@kernel.org>
Tue, 22 Mar 2022 18:57:09 +0000 (11:57 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 5 Apr 2022 08:24:35 +0000 (10:24 +0200)
Adding the lock contention tracepoints in various lock function slow
paths.  Note that each arch can define spinlock differently, I only
added it only to the generic qspinlock for now.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Link: https://lkml.kernel.org/r/20220322185709.141236-3-namhyung@kernel.org
kernel/locking/mutex.c
kernel/locking/percpu-rwsem.c
kernel/locking/qrwlock.c
kernel/locking/qspinlock.c
kernel/locking/rtmutex.c
kernel/locking/rwbase_rt.c
kernel/locking/rwsem.c
kernel/locking/semaphore.c

index ee2fd7614a9352797aa87f93d939e9e2ad89b80e..c88deda77cf2f64fd3c2ac478e942e642aa7f6a8 100644 (file)
@@ -644,6 +644,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
        }
 
        set_current_state(state);
+       trace_contention_begin(lock, 0);
        for (;;) {
                bool first;
 
@@ -710,6 +711,7 @@ acquired:
 skip_wait:
        /* got the lock - cleanup and rejoice! */
        lock_acquired(&lock->dep_map, ip);
+       trace_contention_end(lock, 0);
 
        if (ww_ctx)
                ww_mutex_lock_acquired(ww, ww_ctx);
@@ -721,6 +723,7 @@ skip_wait:
 err:
        __set_current_state(TASK_RUNNING);
        __mutex_remove_waiter(lock, &waiter);
+       trace_contention_end(lock, ret);
 err_early_kill:
        raw_spin_unlock(&lock->wait_lock);
        debug_mutex_free_waiter(&waiter);
index c9fdae94e098ebd92fa612d3ce2cae0bbaf40f34..5fe4c5495ba3c87c8505fe0dde278d0112579985 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/debug.h>
 #include <linux/errno.h>
+#include <trace/events/lock.h>
 
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
                        const char *name, struct lock_class_key *key)
@@ -171,9 +172,11 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
        if (try)
                return false;
 
+       trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
        preempt_enable();
        percpu_rwsem_wait(sem, /* .reader = */ true);
        preempt_disable();
+       trace_contention_end(sem, 0);
 
        return true;
 }
@@ -216,6 +219,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
 {
        might_sleep();
        rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+       trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
 
        /* Notify readers to take the slow path. */
        rcu_sync_enter(&sem->rss);
@@ -237,6 +241,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
 
        /* Wait for all active readers to complete. */
        rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
+       trace_contention_end(sem, 0);
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
index ec36b73f4733b1b065e63ba2440372bee4f4d799..7f42e52a648f4396b9d45a23d0e88448ed145d79 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/spinlock.h>
+#include <trace/events/lock.h>
 
 /**
  * queued_read_lock_slowpath - acquire read lock of a queue rwlock
@@ -34,6 +35,8 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
        }
        atomic_sub(_QR_BIAS, &lock->cnts);
 
+       trace_contention_begin(lock, LCB_F_SPIN | LCB_F_READ);
+
        /*
         * Put the reader into the wait queue
         */
@@ -51,6 +54,8 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
         * Signal the next one in queue to become queue head
         */
        arch_spin_unlock(&lock->wait_lock);
+
+       trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_read_lock_slowpath);
 
@@ -62,6 +67,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 {
        int cnts;
 
+       trace_contention_begin(lock, LCB_F_SPIN | LCB_F_WRITE);
+
        /* Put the writer into the wait queue */
        arch_spin_lock(&lock->wait_lock);
 
@@ -79,5 +86,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
        } while (!atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED));
 unlock:
        arch_spin_unlock(&lock->wait_lock);
+
+       trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_write_lock_slowpath);
index cbff6ba53d563634791e27ad8d11e7a683065679..65a9a10caa6f50de9a87d28174c6ce707035b3f2 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/prefetch.h>
 #include <asm/byteorder.h>
 #include <asm/qspinlock.h>
+#include <trace/events/lock.h>
 
 /*
  * Include queued spinlock statistics code
@@ -401,6 +402,8 @@ pv_queue:
        idx = node->count++;
        tail = encode_tail(smp_processor_id(), idx);
 
+       trace_contention_begin(lock, LCB_F_SPIN);
+
        /*
         * 4 nodes are allocated based on the assumption that there will
         * not be nested NMIs taking spinlocks. That may not be true in
@@ -554,6 +557,8 @@ locked:
        pv_kick_node(lock, next);
 
 release:
+       trace_contention_end(lock, 0);
+
        /*
         * release the node
         */
index 8555c4efe97c47ad52f3fc595463da05264d5388..7779ee8abc2a08b4a9830b375ea65fa2a3ca013f 100644 (file)
@@ -24,6 +24,8 @@
 #include <linux/sched/wake_q.h>
 #include <linux/ww_mutex.h>
 
+#include <trace/events/lock.h>
+
 #include "rtmutex_common.h"
 
 #ifndef WW_RT
@@ -1579,6 +1581,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 
        set_current_state(state);
 
+       trace_contention_begin(lock, LCB_F_RT);
+
        ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
        if (likely(!ret))
                ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
@@ -1601,6 +1605,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
         * unconditionally. We might have to fix that up.
         */
        fixup_rt_mutex_waiters(lock);
+
+       trace_contention_end(lock, ret);
+
        return ret;
 }
 
@@ -1683,6 +1690,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
        /* Save current state and set state to TASK_RTLOCK_WAIT */
        current_save_and_set_rtlock_wait_state();
 
+       trace_contention_begin(lock, LCB_F_RT);
+
        task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
 
        for (;;) {
@@ -1712,6 +1721,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
         */
        fixup_rt_mutex_waiters(lock);
        debug_rt_mutex_free_waiter(&waiter);
+
+       trace_contention_end(lock, 0);
 }
 
 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
index 6fd3162e4098ffa60795c38d9394c0459512708e..c201aadb9301722357745d632c49ac652e0179d6 100644 (file)
@@ -112,6 +112,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
         * Reader2 to call up_read(), which might be unbound.
         */
 
+       trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ);
+
        /*
         * For rwlocks this returns 0 unconditionally, so the below
         * !ret conditionals are optimized out.
@@ -130,6 +132,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
        raw_spin_unlock_irq(&rtm->wait_lock);
        if (!ret)
                rwbase_rtmutex_unlock(rtm);
+
+       trace_contention_end(rwb, ret);
        return ret;
 }
 
@@ -247,11 +251,13 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
                goto out_unlock;
 
        rwbase_set_and_save_current_state(state);
+       trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE);
        for (;;) {
                /* Optimized out for rwlocks */
                if (rwbase_signal_pending_state(state, current)) {
                        rwbase_restore_current_state();
                        __rwbase_write_unlock(rwb, 0, flags);
+                       trace_contention_end(rwb, -EINTR);
                        return -EINTR;
                }
 
@@ -265,6 +271,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
                set_current_state(state);
        }
        rwbase_restore_current_state();
+       trace_contention_end(rwb, 0);
 
 out_unlock:
        raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
index 16b532bb5b921c33bcfe7edc79e2cd584a8a8a67..9d1db4a54d34e9db4ba2c8b1a4e3d231b52f1b37 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/export.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
+#include <trace/events/lock.h>
 
 #ifndef CONFIG_PREEMPT_RT
 #include "lock_events.h"
@@ -1056,6 +1057,8 @@ queue:
        if (!wake_q_empty(&wake_q))
                wake_up_q(&wake_q);
 
+       trace_contention_begin(sem, LCB_F_READ);
+
        /* wait to be given the lock */
        for (;;) {
                set_current_state(state);
@@ -1077,12 +1080,14 @@ queue:
 
        __set_current_state(TASK_RUNNING);
        lockevent_inc(rwsem_rlock);
+       trace_contention_end(sem, 0);
        return sem;
 
 out_nolock:
        rwsem_del_wake_waiter(sem, &waiter, &wake_q);
        __set_current_state(TASK_RUNNING);
        lockevent_inc(rwsem_rlock_fail);
+       trace_contention_end(sem, -EINTR);
        return ERR_PTR(-EINTR);
 }
 
@@ -1132,6 +1137,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 
        /* wait until we successfully acquire the lock */
        set_current_state(state);
+       trace_contention_begin(sem, LCB_F_WRITE);
+
        for (;;) {
                if (rwsem_try_write_lock(sem, &waiter)) {
                        /* rwsem_try_write_lock() implies ACQUIRE on success */
@@ -1171,6 +1178,7 @@ trylock_again:
        __set_current_state(TASK_RUNNING);
        raw_spin_unlock_irq(&sem->wait_lock);
        lockevent_inc(rwsem_wlock);
+       trace_contention_end(sem, 0);
        return sem;
 
 out_nolock:
@@ -1178,6 +1186,7 @@ out_nolock:
        raw_spin_lock_irq(&sem->wait_lock);
        rwsem_del_wake_waiter(sem, &waiter, &wake_q);
        lockevent_inc(rwsem_wlock_fail);
+       trace_contention_end(sem, -EINTR);
        return ERR_PTR(-EINTR);
 }
 
index 9ee381e4d2a4d02f5fc2956233f0c3d6964c1674..f2654d2fe43aa17f56235d4676c4264fb9922fc4 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/ftrace.h>
+#include <trace/events/lock.h>
 
 static noinline void __down(struct semaphore *sem);
 static noinline int __down_interruptible(struct semaphore *sem);
@@ -205,7 +206,7 @@ struct semaphore_waiter {
  * constant, and thus optimised away by the compiler.  Likewise the
  * 'timeout' parameter for the cases without timeouts.
  */
-static inline int __sched __down_common(struct semaphore *sem, long state,
+static inline int __sched ___down_common(struct semaphore *sem, long state,
                                                                long timeout)
 {
        struct semaphore_waiter waiter;
@@ -236,6 +237,18 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
        return -EINTR;
 }
 
+static inline int __sched __down_common(struct semaphore *sem, long state,
+                                       long timeout)
+{
+       int ret;
+
+       trace_contention_begin(sem, 0);
+       ret = ___down_common(sem, state, timeout);
+       trace_contention_end(sem, ret);
+
+       return ret;
+}
+
 static noinline void __sched __down(struct semaphore *sem)
 {
        __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);