Merge tag 'v3.6-rc6' into x86/mce
authorIngo Molnar <mingo@kernel.org>
Wed, 19 Sep 2012 15:01:25 +0000 (17:01 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 19 Sep 2012 15:01:25 +0000 (17:01 +0200)
Merge Linux v3.6-rc6, to refresh this tree.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/cpu/mcheck/mce-inject.c
arch/x86/kernel/cpu/mcheck/mce-internal.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce_intel.c

index fc4beb3935771eab1b404b3333cdb7842dac7c5c..ddc72f8393321de0ca989d2476f7ccd96eed3639 100644 (file)
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
 }
 
 static cpumask_var_t mce_inject_cpumask;
+static DEFINE_MUTEX(mce_inject_mutex);
 
 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
 {
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
                put_online_cpus();
        } else
 #endif
+       {
+               preempt_disable();
                raise_local();
+               preempt_enable();
+       }
 }
 
 /* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
         * so do it a jiffie or two later everywhere.
         */
        schedule_timeout(2);
+
+       mutex_lock(&mce_inject_mutex);
        raise_mce(&m);
+       mutex_unlock(&mce_inject_mutex);
        return usize;
 }
 
index ed44c8a65858623b64b313fe17b9e9f9d88a28bc..6a05c1d327a9627819729c83a762f2c3fe9bc923 100644 (file)
@@ -28,6 +28,18 @@ extern int mce_ser;
 
 extern struct mce_bank *mce_banks;
 
+#ifdef CONFIG_X86_MCE_INTEL
+unsigned long mce_intel_adjust_timer(unsigned long interval);
+void mce_intel_cmci_poll(void);
+void mce_intel_hcpu_update(unsigned long cpu);
+#else
+# define mce_intel_adjust_timer mce_adjust_timer_default
+static inline void mce_intel_cmci_poll(void) { }
+static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+#endif
+
+void mce_timer_kick(unsigned long interval);
+
 #ifdef CONFIG_ACPI_APEI
 int apei_write_mce(struct mce *m);
 ssize_t apei_read_mce(struct mce *m, u64 *record_id);
index 292d0258311c82d04c5ec0aeab43924d00c669b4..c311122ea838301781d8d5e41723a37cf0e68dde 100644 (file)
@@ -1266,6 +1266,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
+static unsigned long mce_adjust_timer_default(unsigned long interval)
+{
+       return interval;
+}
+
+static unsigned long (*mce_adjust_timer)(unsigned long interval) =
+       mce_adjust_timer_default;
+
 static void mce_timer_fn(unsigned long data)
 {
        struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1276,6 +1284,7 @@ static void mce_timer_fn(unsigned long data)
        if (mce_available(__this_cpu_ptr(&cpu_info))) {
                machine_check_poll(MCP_TIMESTAMP,
                                &__get_cpu_var(mce_poll_banks));
+               mce_intel_cmci_poll();
        }
 
        /*
@@ -1283,14 +1292,38 @@ static void mce_timer_fn(unsigned long data)
         * polling interval, otherwise increase the polling interval.
         */
        iv = __this_cpu_read(mce_next_interval);
-       if (mce_notify_irq())
+       if (mce_notify_irq()) {
                iv = max(iv / 2, (unsigned long) HZ/100);
-       else
+       } else {
                iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+               iv = mce_adjust_timer(iv);
+       }
        __this_cpu_write(mce_next_interval, iv);
+       /* Might have become 0 after CMCI storm subsided */
+       if (iv) {
+               t->expires = jiffies + iv;
+               add_timer_on(t, smp_processor_id());
+       }
+}
 
-       t->expires = jiffies + iv;
-       add_timer_on(t, smp_processor_id());
+/*
+ * Ensure that the timer is firing in @interval from now.
+ */
+void mce_timer_kick(unsigned long interval)
+{
+       struct timer_list *t = &__get_cpu_var(mce_timer);
+       unsigned long when = jiffies + interval;
+       unsigned long iv = __this_cpu_read(mce_next_interval);
+
+       if (timer_pending(t)) {
+               if (time_before(when, t->expires))
+                       mod_timer_pinned(t, when);
+       } else {
+               t->expires = round_jiffies(when);
+               add_timer_on(t, smp_processor_id());
+       }
+       if (interval < iv)
+               __this_cpu_write(mce_next_interval, interval);
 }
 
 /* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1585,6 +1618,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
        switch (c->x86_vendor) {
        case X86_VENDOR_INTEL:
                mce_intel_feature_init(c);
+               mce_adjust_timer = mce_intel_adjust_timer;
                break;
        case X86_VENDOR_AMD:
                mce_amd_feature_init(c);
@@ -1594,23 +1628,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
        }
 }
 
-static void __mcheck_cpu_init_timer(void)
+static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-       struct timer_list *t = &__get_cpu_var(mce_timer);
-       unsigned long iv = check_interval * HZ;
+       unsigned long iv = mce_adjust_timer(check_interval * HZ);
 
-       setup_timer(t, mce_timer_fn, smp_processor_id());
+       __this_cpu_write(mce_next_interval, iv);
 
-       if (mce_ignore_ce)
+       if (mce_ignore_ce || !iv)
                return;
 
-       __this_cpu_write(mce_next_interval, iv);
-       if (!iv)
-               return;
        t->expires = round_jiffies(jiffies + iv);
        add_timer_on(t, smp_processor_id());
 }
 
+static void __mcheck_cpu_init_timer(void)
+{
+       struct timer_list *t = &__get_cpu_var(mce_timer);
+       unsigned int cpu = smp_processor_id();
+
+       setup_timer(t, mce_timer_fn, cpu);
+       mce_start_timer(cpu, t);
+}
+
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
@@ -2294,38 +2333,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
        unsigned int cpu = (unsigned long)hcpu;
        struct timer_list *t = &per_cpu(mce_timer, cpu);
 
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
                mce_device_create(cpu);
                if (threshold_cpu_callback)
                        threshold_cpu_callback(action, cpu);
                break;
        case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
                if (threshold_cpu_callback)
                        threshold_cpu_callback(action, cpu);
                mce_device_remove(cpu);
+               mce_intel_hcpu_update(cpu);
                break;
        case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               del_timer_sync(t);
                smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+               del_timer_sync(t);
                break;
        case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               if (!mce_ignore_ce && check_interval) {
-                       t->expires = round_jiffies(jiffies +
-                                       per_cpu(mce_next_interval, cpu));
-                       add_timer_on(t, cpu);
-               }
                smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+               mce_start_timer(cpu, t);
                break;
-       case CPU_POST_DEAD:
+       }
+
+       if (action == CPU_POST_DEAD) {
                /* intentionally ignoring frozen here */
                cmci_rediscover(cpu);
-               break;
        }
+
        return NOTIFY_OK;
 }
 
index 38e49bc95ffcc5eba26631a7ac59618b1dafc660..098386fed48e82d239d0061fb704c2ad790b372c 100644 (file)
@@ -15,6 +15,8 @@
 #include <asm/msr.h>
 #include <asm/mce.h>
 
+#include "mce-internal.h"
+
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows
  * the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  */
 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
-#define CMCI_THRESHOLD 1
+#define CMCI_THRESHOLD         1
+#define CMCI_POLL_INTERVAL     (30 * HZ)
+#define CMCI_STORM_INTERVAL    (1 * HZ)
+#define CMCI_STORM_THRESHOLD   15
+
+static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
+
+enum {
+       CMCI_STORM_NONE,
+       CMCI_STORM_ACTIVE,
+       CMCI_STORM_SUBSIDED,
+};
+
+static atomic_t cmci_storm_on_cpus;
 
 static int cmci_supported(int *banks)
 {
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
        return !!(cap & MCG_CMCI_P);
 }
 
+void mce_intel_cmci_poll(void)
+{
+       if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
+               return;
+       machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+}
+
+void mce_intel_hcpu_update(unsigned long cpu)
+{
+       if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
+               atomic_dec(&cmci_storm_on_cpus);
+
+       per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
+}
+
+unsigned long mce_intel_adjust_timer(unsigned long interval)
+{
+       int r;
+
+       if (interval < CMCI_POLL_INTERVAL)
+               return interval;
+
+       switch (__this_cpu_read(cmci_storm_state)) {
+       case CMCI_STORM_ACTIVE:
+               /*
+                * We switch back to interrupt mode once the poll timer has
+                * silenced itself. That means no events recorded and the
+                * timer interval is back to our poll interval.
+                */
+               __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
+               r = atomic_sub_return(1, &cmci_storm_on_cpus);
+               if (r == 0)
+                       pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+               /* FALLTHROUGH */
+
+       case CMCI_STORM_SUBSIDED:
+               /*
+                * We wait for all cpus to go back to SUBSIDED
+                * state. When that happens we switch back to
+                * interrupt mode.
+                */
+               if (!atomic_read(&cmci_storm_on_cpus)) {
+                       __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
+                       cmci_reenable();
+                       cmci_recheck();
+               }
+               return CMCI_POLL_INTERVAL;
+       default:
+               /*
+                * We have shiny weather. Let the poll do whatever it
+                * thinks.
+                */
+               return interval;
+       }
+}
+
+static bool cmci_storm_detect(void)
+{
+       unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
+       unsigned long ts = __this_cpu_read(cmci_time_stamp);
+       unsigned long now = jiffies;
+       int r;
+
+       if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
+               return true;
+
+       if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
+               cnt++;
+       } else {
+               cnt = 1;
+               __this_cpu_write(cmci_time_stamp, now);
+       }
+       __this_cpu_write(cmci_storm_cnt, cnt);
+
+       if (cnt <= CMCI_STORM_THRESHOLD)
+               return false;
+
+       cmci_clear();
+       __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
+       r = atomic_add_return(1, &cmci_storm_on_cpus);
+       mce_timer_kick(CMCI_POLL_INTERVAL);
+
+       if (r == 1)
+               pr_notice("CMCI storm detected: switching to poll mode\n");
+       return true;
+}
+
 /*
  * The interrupt handler. This is called on every event.
  * Just call the poller directly to log any events.
@@ -61,28 +165,21 @@ static int cmci_supported(int *banks)
  */
 static void intel_threshold_interrupt(void)
 {
+       if (cmci_storm_detect())
+               return;
        machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
        mce_notify_irq();
 }
 
-static void print_update(char *type, int *hdr, int num)
-{
-       if (*hdr == 0)
-               printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
-       *hdr = 1;
-       printk(KERN_CONT " %s:%d", type, num);
-}
-
 /*
  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
  * on this CPU. Use the algorithm recommended in the SDM to discover shared
  * banks.
  */
-static void cmci_discover(int banks, int boot)
+static void cmci_discover(int banks)
 {
        unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
        unsigned long flags;
-       int hdr = 0;
        int i;
 
        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
@@ -96,8 +193,7 @@ static void cmci_discover(int banks, int boot)
 
                /* Already owned by someone else? */
                if (val & MCI_CTL2_CMCI_EN) {
-                       if (test_and_clear_bit(i, owned) && !boot)
-                               print_update("SHD", &hdr, i);
+                       clear_bit(i, owned);
                        __clear_bit(i, __get_cpu_var(mce_poll_banks));
                        continue;
                }
@@ -109,16 +205,13 @@ static void cmci_discover(int banks, int boot)
 
                /* Did the enable bit stick? -- the bank supports CMCI */
                if (val & MCI_CTL2_CMCI_EN) {
-                       if (!test_and_set_bit(i, owned) && !boot)
-                               print_update("CMCI", &hdr, i);
+                       set_bit(i, owned);
                        __clear_bit(i, __get_cpu_var(mce_poll_banks));
                } else {
                        WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
                }
        }
        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-       if (hdr)
-               printk(KERN_CONT "\n");
 }
 
 /*
@@ -186,7 +279,7 @@ void cmci_rediscover(int dying)
                        continue;
                /* Recheck banks in case CPUs don't all have the same */
                if (cmci_supported(&banks))
-                       cmci_discover(banks, 0);
+                       cmci_discover(banks);
        }
 
        set_cpus_allowed_ptr(current, old);
@@ -200,7 +293,7 @@ void cmci_reenable(void)
 {
        int banks;
        if (cmci_supported(&banks))
-               cmci_discover(banks, 0);
+               cmci_discover(banks);
 }
 
 static void intel_init_cmci(void)
@@ -211,7 +304,7 @@ static void intel_init_cmci(void)
                return;
 
        mce_threshold_vector = intel_threshold_interrupt;
-       cmci_discover(banks, 1);
+       cmci_discover(banks);
        /*
         * For CPU #0 this runs with still disabled APIC, but that's
         * ok because only the vector is set up. We still do another