x86/entry: Fix NMI vs IRQ state tracking
authorPeter Zijlstra <peterz@infradead.org>
Wed, 27 May 2020 13:50:29 +0000 (15:50 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 10 Jul 2020 10:00:01 +0000 (12:00 +0200)
While the nmi_enter() users did
trace_hardirqs_{off_prepare,on_finish}() there was no matching
lockdep_hardirqs_*() calls to complete the picture.

Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state
tracking across the NMIs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20200623083721.216740948@infradead.org
arch/x86/entry/common.c
arch/x86/include/asm/idtentry.h
arch/x86/kernel/nmi.c
arch/x86/kernel/traps.c
include/linux/hardirq.h

index 0521546022cbfba9fe2c6170fd5f24747b5294a7..63c607dd6c5268191221fae0090c46dcb389d9af 100644 (file)
@@ -592,7 +592,7 @@ SYSCALL_DEFINE0(ni_syscall)
  * The return value must be fed into the state argument of
  * idtentry_exit().
  */
-idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
+noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs)
 {
        idtentry_state_t ret = {
                .exit_rcu = false,
@@ -687,7 +687,7 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
  * Counterpart to idtentry_enter(). The return value of the entry
  * function must be fed into the @state argument.
  */
-void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
+noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
 {
        lockdep_assert_irqs_disabled();
 
@@ -731,7 +731,7 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
  * Invokes enter_from_user_mode() to establish the proper context for
  * NOHZ_FULL. Otherwise scheduling on exit would not be possible.
  */
-void noinstr idtentry_enter_user(struct pt_regs *regs)
+noinstr void idtentry_enter_user(struct pt_regs *regs)
 {
        check_user_regs(regs);
        enter_from_user_mode();
@@ -749,13 +749,47 @@ void noinstr idtentry_enter_user(struct pt_regs *regs)
  *
  * Counterpart to idtentry_enter_user().
  */
-void noinstr idtentry_exit_user(struct pt_regs *regs)
+noinstr void idtentry_exit_user(struct pt_regs *regs)
 {
        lockdep_assert_irqs_disabled();
 
        prepare_exit_to_usermode(regs);
 }
 
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+       bool irq_state = lockdep_hardirqs_enabled(current);
+
+       __nmi_enter();
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       lockdep_hardirq_enter();
+       rcu_nmi_enter();
+
+       instrumentation_begin();
+       trace_hardirqs_off_finish();
+       ftrace_nmi_enter();
+       instrumentation_end();
+
+       return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+       instrumentation_begin();
+       ftrace_nmi_exit();
+       if (restore) {
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       }
+       instrumentation_end();
+
+       rcu_nmi_exit();
+       lockdep_hardirq_exit();
+       if (restore)
+               lockdep_hardirqs_on(CALLER_ADDR0);
+       __nmi_exit();
+}
+
 #ifdef CONFIG_XEN_PV
 #ifndef CONFIG_PREEMPTION
 /*
index 7227225cf45df3dfc50d61c2b2c5730f005c2d9e..2b0497486525e7a480f44d9808d0e445f8dd7d32 100644 (file)
@@ -20,6 +20,9 @@ typedef struct idtentry_state {
 idtentry_state_t idtentry_enter(struct pt_regs *regs);
 void idtentry_exit(struct pt_regs *regs, idtentry_state_t state);
 
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
+
 /**
  * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
  *                   No error code pushed by hardware
index d7c5e44b26f73ea78eb1d9916f530b32e0ec48e8..4fc9954a956003d42f3c6e4fc3847f7d1f837f30 100644 (file)
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
        __this_cpu_write(last_nmi_rip, regs->ip);
 
        instrumentation_begin();
-       trace_hardirqs_off_finish();
 
        handled = nmi_handle(NMI_LOCAL, regs);
        __this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
                unknown_nmi_error(reason, regs);
 
 out:
-       if (regs->flags & X86_EFLAGS_IF)
-               trace_hardirqs_on_prepare();
        instrumentation_end();
 }
 
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
 
 DEFINE_IDTENTRY_RAW(exc_nmi)
 {
+       bool irq_state;
+
        if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
                return;
 
@@ -491,14 +490,14 @@ nmi_restart:
 
        this_cpu_write(nmi_dr7, local_db_save());
 
-       nmi_enter();
+       irq_state = idtentry_enter_nmi(regs);
 
        inc_irq_stat(__nmi_count);
 
        if (!ignore_nmis)
                default_do_nmi(regs);
 
-       nmi_exit();
+       idtentry_exit_nmi(regs, irq_state);
 
        local_db_restore(this_cpu_read(nmi_dr7));
 
index 4627f826fb5788a0ae2fe8f74a324d307fde7a18..cdd73829e637edc3193a05ad79f88ec273077b72 100644 (file)
@@ -403,7 +403,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
        }
 #endif
 
-       nmi_enter();
+       idtentry_enter_nmi(regs);
        instrumentation_begin();
        notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
@@ -649,15 +649,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
                instrumentation_end();
                idtentry_exit_user(regs);
        } else {
-               nmi_enter();
+               bool irq_state = idtentry_enter_nmi(regs);
                instrumentation_begin();
-               trace_hardirqs_off_finish();
                if (!do_int3(regs))
                        die("int3", regs, 0);
-               if (regs->flags & X86_EFLAGS_IF)
-                       trace_hardirqs_on_prepare();
                instrumentation_end();
-               nmi_exit();
+               idtentry_exit_nmi(regs, irq_state);
        }
 }
 
@@ -865,9 +862,8 @@ out:
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
                                             unsigned long dr6)
 {
-       nmi_enter();
+       bool irq_state = idtentry_enter_nmi(regs);
        instrumentation_begin();
-       trace_hardirqs_off_finish();
 
        /*
         * If something gets miswired and we end up here for a user mode
@@ -884,10 +880,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
 
        handle_debug(regs, dr6, false);
 
-       if (regs->flags & X86_EFLAGS_IF)
-               trace_hardirqs_on_prepare();
        instrumentation_end();
-       nmi_exit();
+       idtentry_exit_nmi(regs, irq_state);
 }
 
 static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -903,6 +897,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
        instrumentation_begin();
 
        handle_debug(regs, dr6, true);
+
        instrumentation_end();
        idtentry_exit_user(regs);
 }
index 03c9fece7d43af7f9ee170f65507ae3b6e63e003..754f67ac4326a7f9cd33efe117911abc305be978 100644 (file)
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
 /*
  * nmi_enter() can nest up to 15 times; see NMI_BITS.
  */
-#define nmi_enter()                                            \
+#define __nmi_enter()                                          \
        do {                                                    \
+               lockdep_off();                                  \
                arch_nmi_enter();                               \
                printk_nmi_enter();                             \
-               lockdep_off();                                  \
                BUG_ON(in_nmi() == NMI_MASK);                   \
                __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);       \
-               rcu_nmi_enter();                                \
+       } while (0)
+
+#define nmi_enter()                                            \
+       do {                                                    \
+               __nmi_enter();                                  \
                lockdep_hardirq_enter();                        \
+               rcu_nmi_enter();                                \
                instrumentation_begin();                        \
                ftrace_nmi_enter();                             \
                instrumentation_end();                          \
        } while (0)
 
+#define __nmi_exit()                                           \
+       do {                                                    \
+               BUG_ON(!in_nmi());                              \
+               __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);       \
+               printk_nmi_exit();                              \
+               arch_nmi_exit();                                \
+               lockdep_on();                                   \
+       } while (0)
+
 #define nmi_exit()                                             \
        do {                                                    \
                instrumentation_begin();                        \
                ftrace_nmi_exit();                              \
                instrumentation_end();                          \
-               lockdep_hardirq_exit();                         \
                rcu_nmi_exit();                                 \
-               BUG_ON(!in_nmi());                              \
-               __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);       \
-               lockdep_on();                                   \
-               printk_nmi_exit();                              \
-               arch_nmi_exit();                                \
+               lockdep_hardirq_exit();                         \
+               __nmi_exit();                                   \
        } while (0)
 
 #endif /* LINUX_HARDIRQ_H */