debugobjects,locking: Annotate debug_object_fill_pool() wait type violation
authorPeter Zijlstra <peterz@infradead.org>
Tue, 25 Apr 2023 15:03:13 +0000 (17:03 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 2 May 2023 12:48:14 +0000 (14:48 +0200)
There is an explicit wait-type violation in debug_object_fill_pool()
for PREEMPT_RT=n kernels which allows them to more easily fill the
object pool and reduce the chance of allocation failures.

Lockdep's wait-type checks are designed to check the PREEMPT_RT
locking rules even for PREEMPT_RT=n kernels and object to this, so
create a lockdep annotation to allow this to stand.

Specifically, create a 'lock' type that overrides the inner wait-type
while it is held -- allowing one to temporarily raise it, such that
the violation is hidden.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Qi Zheng <zhengqi.arch@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Qi Zheng <zhengqi.arch@bytedance.com>
Link: https://lkml.kernel.org/r/20230429100614.GA1489784@hirez.programming.kicks-ass.net
include/linux/lockdep.h
include/linux/lockdep_types.h
kernel/locking/lockdep.c
lib/debugobjects.c

index 1023f349af716a1fb7b1cadaff3224ae0b4bbdf7..a3329fb49b339bae30ddbf11b7ce026a11a2a985 100644 (file)
@@ -339,6 +339,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 #define lockdep_repin_lock(l,c)        lock_repin_lock(&(l)->dep_map, (c))
 #define lockdep_unpin_lock(l,c)        lock_unpin_lock(&(l)->dep_map, (c))
 
+/*
+ * Must use lock_map_aquire_try() with override maps to avoid
+ * lockdep thinking they participate in the block chain.
+ */
+#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)    \
+       struct lockdep_map _name = {                    \
+               .name = #_name "-wait-type-override",   \
+               .wait_type_inner = _wait_type,          \
+               .lock_type = LD_LOCK_WAIT_OVERRIDE, }
+
 #else /* !CONFIG_LOCKDEP */
 
 static inline void lockdep_init_task(struct task_struct *task)
@@ -427,6 +437,9 @@ extern int lockdep_is_held(const void *);
 #define lockdep_repin_lock(l, c)               do { (void)(l); (void)(c); } while (0)
 #define lockdep_unpin_lock(l, c)               do { (void)(l); (void)(c); } while (0)
 
+#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)    \
+       struct lockdep_map __maybe_unused _name = {}
+
 #endif /* !LOCKDEP */
 
 enum xhlock_context_t {
@@ -551,6 +564,7 @@ do {                                                                        \
 #define rwsem_release(l, i)                    lock_release(l, i)
 
 #define lock_map_acquire(l)                    lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_try(l)                        lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_)
 #define lock_map_acquire_read(l)               lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
 #define lock_map_acquire_tryread(l)            lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
 #define lock_map_release(l)                    lock_release(l, _THIS_IP_)
index d22430840b53f99451cff84abdc202588a45e81d..59f4fb1626ea60e76c27d8d4e9a0f0eaf831bac4 100644 (file)
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
 enum lockdep_lock_type {
        LD_LOCK_NORMAL = 0,     /* normal, catch all */
        LD_LOCK_PERCPU,         /* percpu */
+       LD_LOCK_WAIT_OVERRIDE,  /* annotation */
        LD_LOCK_MAX,
 };
 
index 50d4863974e7a218a3c4b855d3934712c80c614a..62ef295e07e60ee4a627bd41a54a3db9155b32ee 100644 (file)
@@ -2253,6 +2253,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask)
 
 static inline bool usage_skip(struct lock_list *entry, void *mask)
 {
+       if (entry->class->lock_type == LD_LOCK_NORMAL)
+               return false;
+
        /*
         * Skip local_lock() for irq inversion detection.
         *
@@ -2279,14 +2282,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
         * As a result, we will skip local_lock(), when we search for irq
         * inversion bugs.
         */
-       if (entry->class->lock_type == LD_LOCK_PERCPU) {
-               if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
-                       return false;
+       if (entry->class->lock_type == LD_LOCK_PERCPU &&
+           DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+               return false;
 
-               return true;
-       }
+       /*
+        * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually
+        * a lock and only used to override the wait_type.
+        */
 
-       return false;
+       return true;
 }
 
 /*
@@ -4752,7 +4757,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
 
        for (; depth < curr->lockdep_depth; depth++) {
                struct held_lock *prev = curr->held_locks + depth;
-               u8 prev_inner = hlock_class(prev)->wait_type_inner;
+               struct lock_class *class = hlock_class(prev);
+               u8 prev_inner = class->wait_type_inner;
 
                if (prev_inner) {
                        /*
@@ -4762,6 +4768,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
                         * Also due to trylocks.
                         */
                        curr_inner = min(curr_inner, prev_inner);
+
+                       /*
+                        * Allow override for annotations -- this is typically
+                        * only valid/needed for code that only exists when
+                        * CONFIG_PREEMPT_RT=n.
+                        */
+                       if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE))
+                               curr_inner = prev_inner;
                }
        }
 
index 003edc5ebd6730feede2d4999372b6d339ec7d65..826c617b10a7582069c8f90d3020701344282046 100644 (file)
@@ -591,10 +591,21 @@ static void debug_objects_fill_pool(void)
 {
        /*
         * On RT enabled kernels the pool refill must happen in preemptible
-        * context:
+        * context -- for !RT kernels we rely on the fact that spinlock_t and
+        * raw_spinlock_t are basically the same type and this lock-type
+        * inversion works just fine.
         */
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+               /*
+                * Annotate away the spinlock_t inside raw_spinlock_t warning
+                * by temporarily raising the wait-type to WAIT_SLEEP, matching
+                * the preemptible() condition above.
+                */
+               static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
+               lock_map_acquire_try(&fill_pool_map);
                fill_pool();
+               lock_map_release(&fill_pool_map);
+       }
 }
 
 static void