powerpc/powernv: call OPAL_QUIESCE before OPAL_SIGNAL_SYSTEM_RESET
authorNicholas Piggin <npiggin@gmail.com>
Thu, 10 May 2018 12:21:48 +0000 (22:21 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Sun, 3 Jun 2018 10:40:30 +0000 (20:40 +1000)
Although it is often possible to recover a CPU that was interrupted
from OPAL with a system reset NMI, it's undesirable to interrupt them
for a few reasons. Firstly because dump/debug code itself needs to
call firmware, so it could hang on a lock or possibly corrupt a
per-cpu data structure if it or another CPU was interrupted from
OPAL. Secondly, the kexec crash dump code will not return from
interrupt to unwind the OPAL call.

Call OPAL_QUIESCE with QUIESCE_HOLD before sending an NMI IPI to
another CPU, which wait for it to leave firmware (or time out) to
avoid this problem in normal conditions. Firmware bugs may still
result in a timeout and interrupting OPAL, but that is the best
option (stops the CPU, and possibly allows firmware to be debugged).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/opal.h
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/smp.c

index f34d173a2ebfe4df34096e3fb601e394901f2ef1..3bab299eda491649979a99d235376ecd096e8f2b 100644 (file)
 #define OPAL_SET_POWER_SHIFT_RATIO             155
 #define OPAL_SENSOR_GROUP_CLEAR                        156
 #define OPAL_PCI_SET_P2P                       157
+#define OPAL_QUIESCE                           158
 #define OPAL_NPU_SPA_SETUP                     159
 #define OPAL_NPU_SPA_CLEAR_CACHE               160
 #define OPAL_NPU_TL_SET                                161
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR           165
 #define OPAL_LAST                              165
 
+#define QUIESCE_HOLD                   1 /* Spin all calls at entry */
+#define QUIESCE_REJECT                 2 /* Fail all calls with OPAL_BUSY */
+#define QUIESCE_LOCK_BREAK             3 /* Set to ignore locks. */
+#define QUIESCE_RESUME                 4 /* Un-quiesce */
+#define QUIESCE_RESUME_FAST_REBOOT     5 /* Un-quiesce, fast reboot */
+
 /* Device tree flags */
 
 /*
index 3960def0e39c49edb17d42c8d1cccbc3b30d2b96..1dbeb6cd68faa77fc5cc3f5db9176e08ff496050 100644 (file)
@@ -294,6 +294,7 @@ int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
 int opal_sensor_group_clear(u32 group_hndl, int token);
 
 s64 opal_signal_system_reset(s32 cpu);
+s64 opal_quiesce(u64 shutdown_type, s32 cpu);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
index 8482df255969c25aa8fe6009325e21485a78a515..a8d9b4089c3157b65417d4af69ee9e4815a958ca 100644 (file)
@@ -320,6 +320,7 @@ OPAL_CALL(opal_set_powercap,                        OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,          OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,          OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear,             OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_quiesce,                                OPAL_QUIESCE);
 OPAL_CALL(opal_npu_spa_setup,                  OPAL_NPU_SPA_SETUP);
 OPAL_CALL(opal_npu_spa_clear_cache,            OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,                     OPAL_NPU_TL_SET);
index 19af6de6b6f00f602a7630770875d8cdfa2fdc24..b80909957792fac09de832c2c6eebb904efb3a83 100644 (file)
@@ -334,7 +334,16 @@ static int pnv_cause_nmi_ipi(int cpu)
        int64_t rc;
 
        if (cpu >= 0) {
-               rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
+               int h = get_hard_smp_processor_id(cpu);
+
+               if (opal_check_token(OPAL_QUIESCE))
+                       opal_quiesce(QUIESCE_HOLD, h);
+
+               rc = opal_signal_system_reset(h);
+
+               if (opal_check_token(OPAL_QUIESCE))
+                       opal_quiesce(QUIESCE_RESUME, h);
+
                if (rc != OPAL_SUCCESS)
                        return 0;
                return 1;
@@ -343,6 +352,8 @@ static int pnv_cause_nmi_ipi(int cpu)
                bool success = true;
                int c;
 
+               if (opal_check_token(OPAL_QUIESCE))
+                       opal_quiesce(QUIESCE_HOLD, -1);
 
                /*
                 * We do not use broadcasts (yet), because it's not clear
@@ -358,6 +369,10 @@ static int pnv_cause_nmi_ipi(int cpu)
                        if (rc != OPAL_SUCCESS)
                                success = false;
                }
+
+               if (opal_check_token(OPAL_QUIESCE))
+                       opal_quiesce(QUIESCE_RESUME, -1);
+
                if (success)
                        return 1;