Merge branch 'perf-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / kernel / watchdog.c
index a8d6914030fe6ec5aa9e08154ce29b480d2c0745..ff7fd80bef994d07730ac87de9602dc750600934 100644 (file)
 #include <linux/cpu.h>
 #include <linux/nmi.h>
 #include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/lockdep.h>
-#include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
@@ -47,6 +42,7 @@ static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
+static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
@@ -333,8 +329,22 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                        return HRTIMER_RESTART;
 
                /* only warn once */
-               if (__this_cpu_read(soft_watchdog_warn) == true)
+               if (__this_cpu_read(soft_watchdog_warn) == true) {
+                       /*
+                        * When multiple processes are causing softlockups the
+                        * softlockup detector only warns on the first one
+                        * because the code relies on a full quiet cycle to
+                        * re-arm.  The second process prevents the quiet cycle
+                        * and never gets reported.  Use task pointers to detect
+                        * this.
+                        */
+                       if (__this_cpu_read(softlockup_task_ptr_saved) !=
+                           current) {
+                               __this_cpu_write(soft_watchdog_warn, false);
+                               __touch_watchdog();
+                       }
                        return HRTIMER_RESTART;
+               }
 
                if (softlockup_all_cpu_backtrace) {
                        /* Prevent multiple soft-lockup reports if one cpu is already
@@ -350,6 +360,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
+               __this_cpu_write(softlockup_task_ptr_saved, current);
                print_modules();
                print_irqtrace_events(current);
                if (regs)
@@ -514,7 +525,10 @@ static void watchdog_nmi_disable(unsigned int cpu)
                /* should be in cleanup, but blocks oprofile */
                perf_event_release_kernel(event);
        }
-       return;
+       if (cpu == 0) {
+               /* watchdog_nmi_enable() expects this to be zero initially. */
+               cpu0_err = 0;
+       }
 }
 #else
 static int watchdog_nmi_enable(unsigned int cpu) { return 0; }