powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless perf is in use
authorNicholas Piggin <npiggin@gmail.com>
Wed, 22 Sep 2021 14:54:50 +0000 (00:54 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 16 Dec 2021 10:31:45 +0000 (21:31 +1100)
Enabling MSR[EE] in interrupt handlers while interrupts are still soft
masked allows PMIs to profile interrupt handlers to some degree, beyond
what SIAR latching allows.

When perf is not being used, this is almost useless work. It requires an
extra mtmsrd in the irq handler, and it also opens the door to masked
interrupts hitting and requiring replay, which is more expensive than
just taking them directly. This effect can be noticable in high IRQ
workloads.

Avoid enabling MSR[EE] unless perf is currently in use. This saves about
60 cycles (or 8%) on a simple decrementer interrupt microbenchmark.
Replayed interrupts drop from 1.4% of all interrupts taken, to 0.003%.

This does prevent the soft-nmi interrupt being taken in these handlers,
but that's not too reliable anyway. The SMP watchdog will continue to be
the reliable way to catch lockups.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210922145452.352571-5-npiggin@gmail.com
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/kernel/dbell.c
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/time.c

index 8d6f80101edaddcb660cfc871e977bf1a94a1ca1..a58fb4aa6c81d68b7188cb36a900f59189c7c39c 100644 (file)
@@ -345,17 +345,54 @@ static inline bool lazy_irq_pending_nocheck(void)
 bool power_pmu_wants_prompt_pmi(void);
 
 /*
- * This is called by asynchronous interrupts to conditionally
- * re-enable hard interrupts after having cleared the source
- * of the interrupt. They are kept disabled if there is a different
- * soft-masked interrupt pending that requires hard masking.
+ * This is called by asynchronous interrupts to check whether to
+ * conditionally re-enable hard interrupts after having cleared
+ * the source of the interrupt. They are kept disabled if there
+ * is a different soft-masked interrupt pending that requires hard
+ * masking.
  */
-static inline void may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
 {
-       if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) {
-               get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-               __hard_irq_enable();
-       }
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+       WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+       WARN_ON(mfmsr() & MSR_EE);
+#endif
+#ifdef CONFIG_PERF_EVENTS
+       /*
+        * If the PMU is not running, there is not much reason to enable
+        * MSR[EE] in irq handlers because any interrupts would just be
+        * soft-masked.
+        *
+        * TODO: Add test for 64e
+        */
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi())
+               return false;
+
+       if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
+               return false;
+
+       return true;
+#else
+       return false;
+#endif
+}
+
+/*
+ * Do the hard enabling, only call this if should_hard_irq_enable is true.
+ */
+static inline void do_hard_irq_enable(void)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+       WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+       WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK);
+       WARN_ON(mfmsr() & MSR_EE);
+#endif
+       /*
+        * This allows PMI interrupts (and watchdog soft-NMIs) through.
+        * There is no other reason to enable this way.
+        */
+       get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
+       __hard_irq_enable();
 }
 
 static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
@@ -436,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
        return !(regs->msr & MSR_EE);
 }
 
-static inline bool may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
 {
        return false;
 }
index 5545c9cd17c1c9ae088e379db0760a48506dde78..f55c6fb34a3a03b3d48c4d52a6e1d6492d8fec62 100644 (file)
@@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
 
        ppc_msgsync();
 
-       may_hard_irq_enable();
+       if (should_hard_irq_enable())
+               do_hard_irq_enable();
 
        kvmppc_clear_host_ipi(smp_processor_id());
        __this_cpu_inc(irq_stat.doorbell_irqs);
index 8207f97d51e8c6d7b944ec5a73eac4e6cc0ee6fd..2cf31a97126ce6f6c37809f8826a9b869fe9ef6f 100644 (file)
@@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs)
        irq = ppc_md.get_irq();
 
        /* We can hard enable interrupts now to allow perf interrupts */
-       may_hard_irq_enable();
+       if (should_hard_irq_enable())
+               do_hard_irq_enable();
 
        /* And finally process it */
        if (unlikely(!irq))
index 42df9dd7fb4183a5bffeb0f6f3d0923762d89fa1..62361cc7281cd74f2fe5aab0e6df36e0098cdaba 100644 (file)
@@ -609,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
                return;
        }
 
-       /* Ensure a positive value is written to the decrementer, or else
-        * some CPUs will continue to take decrementer exceptions. When the
-        * PPC_WATCHDOG (decrementer based) is configured, keep this at most
-        * 31 bits, which is about 4 seconds on most systems, which gives
-        * the watchdog a chance of catching timer interrupt hard lockups.
-        */
-       if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
-               set_dec(0x7fffffff);
-       else
-               set_dec(decrementer_max);
-
-       /* Conditionally hard-enable interrupts now that the DEC has been
-        * bumped to its maximum value
-        */
-       may_hard_irq_enable();
+       /* Conditionally hard-enable interrupts. */
+       if (should_hard_irq_enable()) {
+               /*
+                * Ensure a positive value is written to the decrementer, or
+                * else some CPUs will continue to take decrementer exceptions.
+                * When the PPC_WATCHDOG (decrementer based) is configured,
+                * keep this at most 31 bits, which is about 4 seconds on most
+                * systems, which gives the watchdog a chance of catching timer
+                * interrupt hard lockups.
+                */
+               if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+                       set_dec(0x7fffffff);
+               else
+                       set_dec(decrementer_max);
 
+               do_hard_irq_enable();
+       }
 
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
        if (atomic_read(&ppc_n_lost_interrupts) != 0)