watchdog/hardlockup/perf: Use atomics to track in-use cpu counter
[sfrench/cifs-2.6.git] / kernel / watchdog_hld.c
index 3a09ea1b1d3d5e6e284d058052403ac1396804ca..a84b205fac9a0be08c6e96570857260efe5b3f41 100644 (file)
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) "NMI watchdog: " fmt
 
 #include <linux/nmi.h>
+#include <linux/atomic.h>
 #include <linux/module.h>
 #include <linux/sched/debug.h>
 
 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+static DEFINE_PER_CPU(struct perf_event *, dead_event);
+static struct cpumask dead_events_mask;
 
 static unsigned long hardlockup_allcpu_dumped;
+static atomic_t watchdog_cpus = ATOMIC_INIT(0);
 
 void arch_touch_nmi_watchdog(void)
 {
@@ -103,15 +107,12 @@ static struct perf_event_attr wd_hw_attr = {
 
 /* Callback function for perf event subsystem */
 static void watchdog_overflow_callback(struct perf_event *event,
-                struct perf_sample_data *data,
-                struct pt_regs *regs)
+                                      struct perf_sample_data *data,
+                                      struct pt_regs *regs)
 {
        /* Ensure the watchdog never gets throttled */
        event->hw.interrupts = 0;
 
-       if (atomic_read(&watchdog_park_in_progress) != 0)
-               return;
-
        if (__this_cpu_read(watchdog_nmi_touch) == true) {
                __this_cpu_write(watchdog_nmi_touch, false);
                return;
@@ -160,104 +161,134 @@ static void watchdog_overflow_callback(struct perf_event *event,
        return;
 }
 
-/*
- * People like the simple clean cpu node info on boot.
- * Reduce the watchdog noise by only printing messages
- * that are different from what cpu0 displayed.
- */
-static unsigned long firstcpu_err;
-static atomic_t watchdog_cpus;
-
-int watchdog_nmi_enable(unsigned int cpu)
+static int hardlockup_detector_event_create(void)
 {
+       unsigned int cpu = smp_processor_id();
        struct perf_event_attr *wd_attr;
-       struct perf_event *event = per_cpu(watchdog_ev, cpu);
-       int firstcpu = 0;
-
-       /* nothing to do if the hard lockup detector is disabled */
-       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-               goto out;
-
-       /* is it already setup and enabled? */
-       if (event && event->state > PERF_EVENT_STATE_OFF)
-               goto out;
-
-       /* it is setup but not enabled */
-       if (event != NULL)
-               goto out_enable;
-
-       if (atomic_inc_return(&watchdog_cpus) == 1)
-               firstcpu = 1;
+       struct perf_event *evt;
 
        wd_attr = &wd_hw_attr;
        wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
 
        /* Try to register using hardware perf events */
-       event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+       evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
+                                              watchdog_overflow_callback, NULL);
+       if (IS_ERR(evt)) {
+               pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
+                       PTR_ERR(evt));
+               return PTR_ERR(evt);
+       }
+       this_cpu_write(watchdog_ev, evt);
+       return 0;
+}
 
-       /* save the first cpu's error for future comparision */
-       if (firstcpu && IS_ERR(event))
-               firstcpu_err = PTR_ERR(event);
+/**
+ * hardlockup_detector_perf_enable - Enable the local event
+ */
+void hardlockup_detector_perf_enable(void)
+{
+       if (hardlockup_detector_event_create())
+               return;
 
-       if (!IS_ERR(event)) {
-               /* only print for the first cpu initialized */
-               if (firstcpu || firstcpu_err)
-                       pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
-               goto out_save;
-       }
+       /* use original value for check */
+       if (!atomic_fetch_inc(&watchdog_cpus))
+               pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
 
-       /*
-        * Disable the hard lockup detector if _any_ CPU fails to set up
-        * set up the hardware perf event. The watchdog() function checks
-        * the NMI_WATCHDOG_ENABLED bit periodically.
-        *
-        * The barriers are for syncing up watchdog_enabled across all the
-        * cpus, as clear_bit() does not use barriers.
-        */
-       smp_mb__before_atomic();
-       clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
-       smp_mb__after_atomic();
-
-       /* skip displaying the same error again */
-       if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
-               return PTR_ERR(event);
-
-       /* vary the KERN level based on the returned errno */
-       if (PTR_ERR(event) == -EOPNOTSUPP)
-               pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
-       else if (PTR_ERR(event) == -ENOENT)
-               pr_warn("disabled (cpu%i): hardware events not enabled\n",
-                        cpu);
-       else
-               pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
-                       cpu, PTR_ERR(event));
-
-       pr_info("Shutting down hard lockup detector on all cpus\n");
-
-       return PTR_ERR(event);
-
-       /* success path */
-out_save:
-       per_cpu(watchdog_ev, cpu) = event;
-out_enable:
-       perf_event_enable(per_cpu(watchdog_ev, cpu));
-out:
-       return 0;
+       perf_event_enable(this_cpu_read(watchdog_ev));
 }
 
-void watchdog_nmi_disable(unsigned int cpu)
+/**
+ * hardlockup_detector_perf_disable - Disable the local event
+ */
+void hardlockup_detector_perf_disable(void)
 {
-       struct perf_event *event = per_cpu(watchdog_ev, cpu);
+       struct perf_event *event = this_cpu_read(watchdog_ev);
 
        if (event) {
                perf_event_disable(event);
-               per_cpu(watchdog_ev, cpu) = NULL;
+               this_cpu_write(watchdog_ev, NULL);
+               this_cpu_write(dead_event, event);
+               cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
+               atomic_dec(&watchdog_cpus);
+       }
+}
+
+/**
+ * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
+ *
+ * Called from lockup_detector_cleanup(). Serialized by the caller.
+ */
+void hardlockup_detector_perf_cleanup(void)
+{
+       int cpu;
+
+       for_each_cpu(cpu, &dead_events_mask) {
+               struct perf_event *event = per_cpu(dead_event, cpu);
 
-               /* should be in cleanup, but blocks oprofile */
-               perf_event_release_kernel(event);
+               /*
+                * Required because for_each_cpu() reports  unconditionally
+                * CPU0 as set on UP kernels. Sigh.
+                */
+               if (event)
+                       perf_event_release_kernel(event);
+               per_cpu(dead_event, cpu) = NULL;
+       }
+       cpumask_clear(&dead_events_mask);
+}
+
+/**
+ * hardlockup_detector_perf_stop - Globally stop watchdog events
+ *
+ * Special interface for x86 to handle the perf HT bug.
+ */
+void __init hardlockup_detector_perf_stop(void)
+{
+       int cpu;
+
+       lockdep_assert_cpus_held();
+
+       for_each_online_cpu(cpu) {
+               struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+               if (event)
+                       perf_event_disable(event);
+       }
+}
+
+/**
+ * hardlockup_detector_perf_restart - Globally restart watchdog events
+ *
+ * Special interface for x86 to handle the perf HT bug.
+ */
+void __init hardlockup_detector_perf_restart(void)
+{
+       int cpu;
+
+       lockdep_assert_cpus_held();
+
+       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+               return;
+
+       for_each_online_cpu(cpu) {
+               struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+               if (event)
+                       perf_event_enable(event);
+       }
+}
+
+/**
+ * hardlockup_detector_perf_init - Probe whether NMI event is available at all
+ */
+int __init hardlockup_detector_perf_init(void)
+{
+       int ret = hardlockup_detector_event_create();
 
-               /* watchdog_nmi_enable() expects this to be zero initially. */
-               if (atomic_dec_and_test(&watchdog_cpus))
-                       firstcpu_err = 0;
+       if (ret) {
+               pr_info("Perf NMI watchdog permanently disabled\n");
+       } else {
+               perf_event_release_kernel(this_cpu_read(watchdog_ev));
+               this_cpu_write(watchdog_ev, NULL);
        }
+       return ret;
 }