Merge branches 'release', 'APERF', 'ARAT', 'misc', 'kelvin', 'device-lock' and 'bjorn...
authorLen Brown <len.brown@intel.com>
Tue, 7 Apr 2009 22:18:42 +0000 (18:18 -0400)
committerLen Brown <len.brown@intel.com>
Tue, 7 Apr 2009 22:18:42 +0000 (18:18 -0400)
45 files changed:
Makefile
arch/x86/include/asm/cpufeature.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/addon_cpuid_features.c
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/x86/kernel/cpu/cpufreq/longhaul.c
arch/x86/kernel/ftrace.c
block/blk-core.c
drivers/acpi/acpica/hwvalid.c
drivers/acpi/battery.c
drivers/acpi/proc.c
drivers/acpi/processor_idle.c
drivers/acpi/scan.c
drivers/acpi/sleep.h
drivers/acpi/thermal.c
drivers/acpi/wakeup.c
drivers/platform/x86/panasonic-laptop.c
fs/nfs/file.c
include/acpi/acpi_bus.h
include/linux/compiler.h
include/linux/ftrace.h
include/linux/hardirq.h
include/linux/init_task.h
include/linux/interrupt.h
include/linux/irq.h
include/linux/irqreturn.h
include/linux/sched.h
kernel/Makefile
kernel/exit.c
kernel/fork.c
kernel/hung_task.c [new file with mode: 0644]
kernel/irq/devres.c
kernel/irq/handle.c
kernel/irq/manage.c
kernel/softlockup.c
kernel/sysctl.c
kernel/trace/blktrace.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_export.c
kernel/trace/trace_output.c
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
lib/Kconfig.debug
scripts/tracing/power.pl [moved from scripts/trace/power.pl with 100% similarity]

index c6307b6d069f8ec9a969ca6ad617245b95486d94..e5ad5fd961771f10f4e9e94ca0fbeb4246f1dad4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
-SUBLEVEL = 29
-EXTRAVERSION =
+SUBLEVEL = 30
+EXTRAVERSION = -rc1
 NAME = Temporary Tasmanian Devil
 
 # *DOCUMENTATION*
index 0beba0d1468db24bceaa1e81cdf8c652eb5180ee..bb83b1c397aad8d05251db9dcd71880b1967b1f4 100644 (file)
  * CPUID levels like 0x6, 0xA etc
  */
 #define X86_FEATURE_IDA                (7*32+ 0) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT       (7*32+ 1) /* Always Running APIC Timer */
 
 /* Virtualization flags: Linux defined */
 #define X86_FEATURE_TPR_SHADOW  (8*32+ 0) /* Intel TPR Shadow */
index 098ec84b8c0054d1f0fcff0e02aa357a02b33dd3..f2870920f246a9f1d7e2075c37b65b05de27dbf1 100644 (file)
@@ -431,6 +431,12 @@ static void __cpuinit setup_APIC_timer(void)
 {
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
+       if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
+               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+               /* Make LAPIC timer preferrable over percpu HPET */
+               lapic_clockevent.rating = 150;
+       }
+
        memcpy(levt, &lapic_clockevent, sizeof(*levt));
        levt->cpumask = cpumask_of(smp_processor_id());
 
index 8220ae69849d4aa3e5405a412a6bca2b03c4b958..c965e5212714ee66cfe04e847544e213f44a7b2d 100644 (file)
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
        static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
                { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+               { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
                { 0, 0, 0, 0 }
        };
 
index 19f6b9d27e83288fb516e59a490c6e08f55a9a64..9d3af380c6bdfc41a847578ae8eb78a0a151bc38 100644 (file)
@@ -68,6 +68,7 @@ struct acpi_cpufreq_data {
        unsigned int max_freq;
        unsigned int resume;
        unsigned int cpu_feature;
+       u64 saved_aperf, saved_mperf;
 };
 
 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
@@ -241,26 +242,23 @@ static u32 get_cur_val(const struct cpumask *mask)
        return cmd.val;
 }
 
-struct perf_cur {
+struct perf_pair {
        union {
                struct {
                        u32 lo;
                        u32 hi;
                } split;
                u64 whole;
-       } aperf_cur, mperf_cur;
+       } aperf, mperf;
 };
 
 
 static long read_measured_perf_ctrs(void *_cur)
 {
-       struct perf_cur *cur = _cur;
+       struct perf_pair *cur = _cur;
 
-       rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
-       rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
-
-       wrmsr(MSR_IA32_APERF, 0, 0);
-       wrmsr(MSR_IA32_MPERF, 0, 0);
+       rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
+       rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
 
        return 0;
 }
@@ -281,52 +279,57 @@ static long read_measured_perf_ctrs(void *_cur)
 static unsigned int get_measured_perf(struct cpufreq_policy *policy,
                                      unsigned int cpu)
 {
-       struct perf_cur cur;
+       struct perf_pair readin, cur;
        unsigned int perf_percent;
        unsigned int retval;
 
-       if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
+       if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin))
                return 0;
 
+       cur.aperf.whole = readin.aperf.whole -
+                               per_cpu(drv_data, cpu)->saved_aperf;
+       cur.mperf.whole = readin.mperf.whole -
+                               per_cpu(drv_data, cpu)->saved_mperf;
+       per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole;
+       per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole;
+
 #ifdef __i386__
        /*
         * We dont want to do 64 bit divide with 32 bit kernel
         * Get an approximate value. Return failure in case we cannot get
         * an approximate value.
         */
-       if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
+       if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
                int shift_count;
                u32 h;
 
-               h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
+               h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
                shift_count = fls(h);
 
-               cur.aperf_cur.whole >>= shift_count;
-               cur.mperf_cur.whole >>= shift_count;
+               cur.aperf.whole >>= shift_count;
+               cur.mperf.whole >>= shift_count;
        }
 
-       if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
+       if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
                int shift_count = 7;
-               cur.aperf_cur.split.lo >>= shift_count;
-               cur.mperf_cur.split.lo >>= shift_count;
+               cur.aperf.split.lo >>= shift_count;
+               cur.mperf.split.lo >>= shift_count;
        }
 
-       if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
-               perf_percent = (cur.aperf_cur.split.lo * 100) /
-                               cur.mperf_cur.split.lo;
+       if (cur.aperf.split.lo && cur.mperf.split.lo)
+               perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
        else
                perf_percent = 0;
 
 #else
-       if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
+       if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
                int shift_count = 7;
-               cur.aperf_cur.whole >>= shift_count;
-               cur.mperf_cur.whole >>= shift_count;
+               cur.aperf.whole >>= shift_count;
+               cur.mperf.whole >>= shift_count;
        }
 
-       if (cur.aperf_cur.whole && cur.mperf_cur.whole)
-               perf_percent = (cur.aperf_cur.whole * 100) /
-                               cur.mperf_cur.whole;
+       if (cur.aperf.whole && cur.mperf.whole)
+               perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
        else
                perf_percent = 0;
 
index 0bd48e65a0caa3a77335bd1e8d5a53bc8764fae3..ce2ed3e4aad96a200fe0280dea4d66a4b2fb9268 100644 (file)
@@ -33,7 +33,6 @@
 #include <linux/timex.h>
 #include <linux/io.h>
 #include <linux/acpi.h>
-#include <linux/kernel.h>
 
 #include <asm/msr.h>
 #include <acpi/processor.h>
index 61df77532120e1d559b71ea3321d43742750c97a..70a10ca100f68273e0371a58f8236ff5d3075acf 100644 (file)
@@ -20,7 +20,6 @@
 
 #include <asm/cacheflush.h>
 #include <asm/ftrace.h>
-#include <linux/ftrace.h>
 #include <asm/nops.h>
 #include <asm/nmi.h>
 
index 43fdedc524ee1e62ac403dd0190e55da8ebf4f10..07ab75403e1a5d2f753bcf0c904fda2f3bd760ef 100644 (file)
@@ -131,6 +131,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        INIT_HLIST_NODE(&rq->hash);
        RB_CLEAR_NODE(&rq->rb_node);
        rq->cmd = rq->__cmd;
+       rq->cmd_len = BLK_MAX_CDB;
        rq->tag = -1;
        rq->ref_count = 1;
 }
index bd3c937b0ac094aecff5f6fa2bd7a5007805a7d4..7737afb157c35645bb2de73f7f5a4f95e4674ec8 100644 (file)
@@ -90,7 +90,6 @@ static const struct acpi_port_info acpi_protected_ports[] = {
        {"PIT2", 0x0048, 0x004B, ACPI_OSI_WIN_XP},
        {"RTC", 0x0070, 0x0071, ACPI_OSI_WIN_XP},
        {"CMOS", 0x0074, 0x0076, ACPI_OSI_WIN_XP},
-       {"DMA1", 0x0081, 0x0083, ACPI_OSI_WIN_XP},
        {"DMA1L", 0x0087, 0x0087, ACPI_OSI_WIN_XP},
        {"DMA2", 0x0089, 0x008B, ACPI_OSI_WIN_XP},
        {"DMA2L", 0x008F, 0x008F, ACPI_OSI_WIN_XP},
index b0de6312919a82a455f20f8d06dddd8f1b672d80..3c7d8942f23b4dd126d1a8e2145adfa0a1ad499b 100644 (file)
@@ -903,7 +903,7 @@ static struct acpi_driver acpi_battery_driver = {
                },
 };
 
-static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie)
+static void acpi_battery_init_async(void *unused, async_cookie_t cookie)
 {
        if (acpi_disabled)
                return;
index 05dfdc96802e2fa0d4b2967868ea0f3ebdaad425..d0d550d22a6d43a14ccecec6658e9fb2773d6b70 100644 (file)
@@ -343,9 +343,6 @@ acpi_system_write_alarm(struct file *file,
 }
 #endif                         /* HAVE_ACPI_LEGACY_ALARM */
 
-extern struct list_head acpi_wakeup_device_list;
-extern spinlock_t acpi_device_lock;
-
 static int
 acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
 {
@@ -353,7 +350,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
 
        seq_printf(seq, "Device\tS-state\t  Status   Sysfs node\n");
 
-       spin_lock(&acpi_device_lock);
+       mutex_lock(&acpi_device_lock);
        list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                struct acpi_device *dev =
                    container_of(node, struct acpi_device, wakeup_list);
@@ -361,7 +358,6 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
 
                if (!dev->wakeup.flags.valid)
                        continue;
-               spin_unlock(&acpi_device_lock);
 
                ldev = acpi_get_physical_device(dev->handle);
                seq_printf(seq, "%s\t  S%d\t%c%-8s  ",
@@ -376,9 +372,8 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
                seq_printf(seq, "\n");
                put_device(ldev);
 
-               spin_lock(&acpi_device_lock);
        }
-       spin_unlock(&acpi_device_lock);
+       mutex_unlock(&acpi_device_lock);
        return 0;
 }
 
@@ -409,7 +404,7 @@ acpi_system_write_wakeup_device(struct file *file,
        strbuf[len] = '\0';
        sscanf(strbuf, "%s", str);
 
-       spin_lock(&acpi_device_lock);
+       mutex_lock(&acpi_device_lock);
        list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                struct acpi_device *dev =
                    container_of(node, struct acpi_device, wakeup_list);
@@ -446,7 +441,7 @@ acpi_system_write_wakeup_device(struct file *file,
                        }
                }
        }
-       spin_unlock(&acpi_device_lock);
+       mutex_unlock(&acpi_device_lock);
        return count;
 }
 
index 4e6e758bd397f50686b3d951edabb8c93742fd38..6fe121434ffb36ec67a91b45823e3df3d45873c5 100644 (file)
@@ -145,6 +145,9 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr,
        struct acpi_processor_power *pwr = &pr->power;
        u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
 
+       if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
+               return;
+
        /*
         * Check, if one of the previous states already marked the lapic
         * unstable
index 20c23c04920777259fc426acc7a9154d612a8cd0..8ff510b91d88f4f38e473afab76425a9cf392477 100644 (file)
@@ -24,7 +24,7 @@ extern struct acpi_device *acpi_root;
 
 static LIST_HEAD(acpi_device_list);
 static LIST_HEAD(acpi_bus_id_list);
-DEFINE_SPINLOCK(acpi_device_lock);
+DEFINE_MUTEX(acpi_device_lock);
 LIST_HEAD(acpi_wakeup_device_list);
 
 struct acpi_device_bus_id{
@@ -491,7 +491,6 @@ static int acpi_device_register(struct acpi_device *device,
         */
        INIT_LIST_HEAD(&device->children);
        INIT_LIST_HEAD(&device->node);
-       INIT_LIST_HEAD(&device->g_list);
        INIT_LIST_HEAD(&device->wakeup_list);
 
        new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL);
@@ -500,7 +499,7 @@ static int acpi_device_register(struct acpi_device *device,
                return -ENOMEM;
        }
 
-       spin_lock(&acpi_device_lock);
+       mutex_lock(&acpi_device_lock);
        /*
         * Find suitable bus_id and instance number in acpi_bus_id_list
         * If failed, create one and link it into acpi_bus_id_list
@@ -521,14 +520,12 @@ static int acpi_device_register(struct acpi_device *device,
        }
        dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, acpi_device_bus_id->instance_no);
 
-       if (device->parent) {
+       if (device->parent)
                list_add_tail(&device->node, &device->parent->children);
-               list_add_tail(&device->g_list, &device->parent->g_list);
-       } else
-               list_add_tail(&device->g_list, &acpi_device_list);
+
        if (device->wakeup.flags.valid)
                list_add_tail(&device->wakeup_list, &acpi_wakeup_device_list);
-       spin_unlock(&acpi_device_lock);
+       mutex_unlock(&acpi_device_lock);
 
        if (device->parent)
                device->dev.parent = &parent->dev;
@@ -549,28 +546,22 @@ static int acpi_device_register(struct acpi_device *device,
        device->removal_type = ACPI_BUS_REMOVAL_NORMAL;
        return 0;
   end:
-       spin_lock(&acpi_device_lock);
-       if (device->parent) {
+       mutex_lock(&acpi_device_lock);
+       if (device->parent)
                list_del(&device->node);
-               list_del(&device->g_list);
-       } else
-               list_del(&device->g_list);
        list_del(&device->wakeup_list);
-       spin_unlock(&acpi_device_lock);
+       mutex_unlock(&acpi_device_lock);
        return result;
 }
 
 static void acpi_device_unregister(struct acpi_device *device, int type)
 {
-       spin_lock(&acpi_device_lock);
-       if (device->parent) {
+       mutex_lock(&acpi_device_lock);
+       if (device->parent)
                list_del(&device->node);
-               list_del(&device->g_list);
-       } else
-               list_del(&device->g_list);
 
        list_del(&device->wakeup_list);
-       spin_unlock(&acpi_device_lock);
+       mutex_unlock(&acpi_device_lock);
 
        acpi_detach_data(device->handle, acpi_bus_data_handler);
 
index cfaf8f5b0a149b3b7bdcf056ebfcd2bfc7fb9870..8a8f3b3382a672483924ea5a3dd1cdd742e9a11d 100644 (file)
@@ -5,3 +5,6 @@ extern int acpi_suspend (u32 state);
 extern void acpi_enable_wakeup_device_prep(u8 sleep_state);
 extern void acpi_enable_wakeup_device(u8 sleep_state);
 extern void acpi_disable_wakeup_device(u8 sleep_state);
+
+extern struct list_head acpi_wakeup_device_list;
+extern struct mutex acpi_device_lock;
index 0914eaa9a097762a8bca397e29926dc3da71d1de..9cd15e8c893226288958d1b1253169d85eb36620 100644 (file)
@@ -194,6 +194,7 @@ struct acpi_thermal {
        struct acpi_handle_list devices;
        struct thermal_zone_device *thermal_zone;
        int tz_enabled;
+       int kelvin_offset;
        struct mutex lock;
 };
 
@@ -583,7 +584,7 @@ static void acpi_thermal_check(void *data)
 }
 
 /* sys I/F for generic thermal sysfs support */
-#define KELVIN_TO_MILLICELSIUS(t) (t * 100 - 273200)
+#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
 
 static int thermal_get_temp(struct thermal_zone_device *thermal,
                            unsigned long *temp)
@@ -598,7 +599,7 @@ static int thermal_get_temp(struct thermal_zone_device *thermal,
        if (result)
                return result;
 
-       *temp = KELVIN_TO_MILLICELSIUS(tz->temperature);
+       *temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset);
        return 0;
 }
 
@@ -704,7 +705,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
        if (tz->trips.critical.flags.valid) {
                if (!trip) {
                        *temp = KELVIN_TO_MILLICELSIUS(
-                               tz->trips.critical.temperature);
+                               tz->trips.critical.temperature,
+                               tz->kelvin_offset);
                        return 0;
                }
                trip--;
@@ -713,7 +715,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
        if (tz->trips.hot.flags.valid) {
                if (!trip) {
                        *temp = KELVIN_TO_MILLICELSIUS(
-                               tz->trips.hot.temperature);
+                               tz->trips.hot.temperature,
+                               tz->kelvin_offset);
                        return 0;
                }
                trip--;
@@ -722,7 +725,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
        if (tz->trips.passive.flags.valid) {
                if (!trip) {
                        *temp = KELVIN_TO_MILLICELSIUS(
-                               tz->trips.passive.temperature);
+                               tz->trips.passive.temperature,
+                               tz->kelvin_offset);
                        return 0;
                }
                trip--;
@@ -732,7 +736,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
                tz->trips.active[i].flags.valid; i++) {
                if (!trip) {
                        *temp = KELVIN_TO_MILLICELSIUS(
-                               tz->trips.active[i].temperature);
+                               tz->trips.active[i].temperature,
+                               tz->kelvin_offset);
                        return 0;
                }
                trip--;
@@ -747,7 +752,8 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
 
        if (tz->trips.critical.flags.valid) {
                *temperature = KELVIN_TO_MILLICELSIUS(
-                               tz->trips.critical.temperature);
+                               tz->trips.critical.temperature,
+                               tz->kelvin_offset);
                return 0;
        } else
                return -EINVAL;
@@ -1331,6 +1337,25 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz)
        return 0;
 }
 
+/*
+ * The exact offset between Kelvin and degree Celsius is 273.15. However ACPI
+ * handles temperature values with a single decimal place. As a consequence,
+ * some implementations use an offset of 273.1 and others use an offset of
+ * 273.2. Try to find out which one is being used, to present the most
+ * accurate and visually appealing number.
+ *
+ * The heuristic below should work for all ACPI thermal zones which have a
+ * critical trip point with a value being a multiple of 0.5 degree Celsius.
+ */
+static void acpi_thermal_guess_offset(struct acpi_thermal *tz)
+{
+       if (tz->trips.critical.flags.valid &&
+           (tz->trips.critical.temperature % 5) == 1)
+               tz->kelvin_offset = 2731;
+       else
+               tz->kelvin_offset = 2732;
+}
+
 static int acpi_thermal_add(struct acpi_device *device)
 {
        int result = 0;
@@ -1356,6 +1381,8 @@ static int acpi_thermal_add(struct acpi_device *device)
        if (result)
                goto free_memory;
 
+       acpi_thermal_guess_offset(tz);
+
        result = acpi_thermal_register_thermal_zone(tz);
        if (result)
                goto free_memory;
index 5aee8c26cc9fb93996569ca464987f4451876092..88725dcdf8bc813e42641b6e81cbdf79c8057159 100644 (file)
 #include "internal.h"
 #include "sleep.h"
 
+/*
+ * We didn't lock acpi_device_lock in the file, because it invokes oops in
+ * suspend/resume and isn't really required as this is called in S-state. At
+ * that time, there is no device hotplug
+ **/
 #define _COMPONENT             ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("wakeup_devices")
 
-extern struct list_head acpi_wakeup_device_list;
-extern spinlock_t acpi_device_lock;
-
 /**
  * acpi_enable_wakeup_device_prep - prepare wakeup devices
  *     @sleep_state:   ACPI state
@@ -29,7 +31,6 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state)
 {
        struct list_head *node, *next;
 
-       spin_lock(&acpi_device_lock);
        list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                struct acpi_device *dev = container_of(node,
                                                       struct acpi_device,
@@ -40,11 +41,8 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state)
                    (sleep_state > (u32) dev->wakeup.sleep_state))
                        continue;
 
-               spin_unlock(&acpi_device_lock);
                acpi_enable_wakeup_device_power(dev, sleep_state);
-               spin_lock(&acpi_device_lock);
        }
-       spin_unlock(&acpi_device_lock);
 }
 
 /**
@@ -60,7 +58,6 @@ void acpi_enable_wakeup_device(u8 sleep_state)
         * Caution: this routine must be invoked when interrupt is disabled 
         * Refer ACPI2.0: P212
         */
-       spin_lock(&acpi_device_lock);
        list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                struct acpi_device *dev =
                        container_of(node, struct acpi_device, wakeup_list);
@@ -74,22 +71,17 @@ void acpi_enable_wakeup_device(u8 sleep_state)
                if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
                    || sleep_state > (u32) dev->wakeup.sleep_state) {
                        if (dev->wakeup.flags.run_wake) {
-                               spin_unlock(&acpi_device_lock);
                                /* set_gpe_type will disable GPE, leave it like that */
                                acpi_set_gpe_type(dev->wakeup.gpe_device,
                                                  dev->wakeup.gpe_number,
                                                  ACPI_GPE_TYPE_RUNTIME);
-                               spin_lock(&acpi_device_lock);
                        }
                        continue;
                }
-               spin_unlock(&acpi_device_lock);
                if (!dev->wakeup.flags.run_wake)
                        acpi_enable_gpe(dev->wakeup.gpe_device,
                                        dev->wakeup.gpe_number);
-               spin_lock(&acpi_device_lock);
        }
-       spin_unlock(&acpi_device_lock);
 }
 
 /**
@@ -101,7 +93,6 @@ void acpi_disable_wakeup_device(u8 sleep_state)
 {
        struct list_head *node, *next;
 
-       spin_lock(&acpi_device_lock);
        list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                struct acpi_device *dev =
                        container_of(node, struct acpi_device, wakeup_list);
@@ -112,19 +103,16 @@ void acpi_disable_wakeup_device(u8 sleep_state)
                if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
                    || sleep_state > (u32) dev->wakeup.sleep_state) {
                        if (dev->wakeup.flags.run_wake) {
-                               spin_unlock(&acpi_device_lock);
                                acpi_set_gpe_type(dev->wakeup.gpe_device,
                                                  dev->wakeup.gpe_number,
                                                  ACPI_GPE_TYPE_WAKE_RUN);
                                /* Re-enable it, since set_gpe_type will disable it */
                                acpi_enable_gpe(dev->wakeup.gpe_device,
                                                dev->wakeup.gpe_number);
-                               spin_lock(&acpi_device_lock);
                        }
                        continue;
                }
 
-               spin_unlock(&acpi_device_lock);
                acpi_disable_wakeup_device_power(dev);
                /* Never disable run-wake GPE */
                if (!dev->wakeup.flags.run_wake) {
@@ -133,16 +121,14 @@ void acpi_disable_wakeup_device(u8 sleep_state)
                        acpi_clear_gpe(dev->wakeup.gpe_device,
                                       dev->wakeup.gpe_number, ACPI_NOT_ISR);
                }
-               spin_lock(&acpi_device_lock);
        }
-       spin_unlock(&acpi_device_lock);
 }
 
 int __init acpi_wakeup_device_init(void)
 {
        struct list_head *node, *next;
 
-       spin_lock(&acpi_device_lock);
+       mutex_lock(&acpi_device_lock);
        list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                struct acpi_device *dev = container_of(node,
                                                       struct acpi_device,
@@ -150,15 +136,13 @@ int __init acpi_wakeup_device_init(void)
                /* In case user doesn't load button driver */
                if (!dev->wakeup.flags.run_wake || dev->wakeup.state.enabled)
                        continue;
-               spin_unlock(&acpi_device_lock);
                acpi_set_gpe_type(dev->wakeup.gpe_device,
                                  dev->wakeup.gpe_number,
                                  ACPI_GPE_TYPE_WAKE_RUN);
                acpi_enable_gpe(dev->wakeup.gpe_device,
                                dev->wakeup.gpe_number);
                dev->wakeup.state.enabled = 1;
-               spin_lock(&acpi_device_lock);
        }
-       spin_unlock(&acpi_device_lock);
+       mutex_unlock(&acpi_device_lock);
        return 0;
 }
index 1a11de0d3e6d7ed5f5eb3e48f39f0b4815e03277..fe7cf0188acc12df1794ebf9c8107de5f1bac7f6 100644 (file)
@@ -273,7 +273,7 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc, u32 *sinf)
        union acpi_object *hkey = NULL;
        int i;
 
-       status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, 0,
+       status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, NULL,
                                      &buffer);
        if (ACPI_FAILURE(status)) {
                ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
index 3523b895eb4b3b598a89399aceca8d77833cb595..5a97bcfe03e5e0d25b78d8122a6695f8d1c66acc 100644 (file)
@@ -516,8 +516,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                goto out_unlock;
 
        ret = nfs_updatepage(filp, page, 0, pagelen);
-       if (ret == 0)
-               ret = pagelen;
 out_unlock:
        unlock_page(page);
        if (ret)
index a2228511d4be9433ca290bae158583148507f04f..c34b11022908a01c521367ddf85249fcacb98ffe 100644 (file)
@@ -270,7 +270,6 @@ struct acpi_device {
        struct list_head children;
        struct list_head node;
        struct list_head wakeup_list;
-       struct list_head g_list;
        struct acpi_device_status status;
        struct acpi_device_flags flags;
        struct acpi_device_pnp pnp;
index cebfdcd3dbddc3e7715b637f58452332586cdfa3..37bcb50a4d7c85b1ac203982cc71f11e75511cde 100644 (file)
@@ -76,7 +76,8 @@ struct ftrace_branch_data {
  * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
  * to disable branch tracing on a per file basis.
  */
-#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
+    && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
 void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
 #define likely_notrace(x)      __builtin_expect(!!(x), 1)
index 015a3d22cf7434deb500233623ae14b778d05e5b..da5405dce34746aa4c7ab757af6a46e430f9b538 100644 (file)
@@ -356,6 +356,9 @@ struct ftrace_graph_ret {
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
+/* for init task */
+#define INIT_FTRACE_GRAPH              .ret_stack = NULL
+
 /*
  * Stack of return addresses for functions
  * of a thread.
@@ -430,10 +433,11 @@ static inline void unpause_graph_tracing(void)
 {
        atomic_dec(&current->tracing_graph_pause);
 }
-#else
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
 
 #define __notrace_funcgraph
 #define __irq_entry
+#define INIT_FTRACE_GRAPH
 
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
@@ -445,7 +449,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
 
 static inline void pause_graph_tracing(void) { }
 static inline void unpause_graph_tracing(void) { }
-#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 #ifdef CONFIG_TRACING
 #include <linux/sched.h>
index faa1cf848bcd38e5c59725c3c0bb3e7bfdf86f63..45257475623cad94c90304d82b4b5cd68bc498d8 100644 (file)
 # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
 #endif
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
 extern void synchronize_irq(unsigned int irq);
 #else
 # define synchronize_irq(irq)  barrier()
index af1de95e711ea3d6667b03b0ed82d701da56f02b..dcfb93337e9a2e902938c561575c9b337320985d 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/irqflags.h>
 #include <linux/utsname.h>
 #include <linux/lockdep.h>
+#include <linux/ftrace.h>
 #include <linux/ipc.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
@@ -185,6 +186,7 @@ extern struct cred init_cred;
        INIT_IDS                                                        \
        INIT_TRACE_IRQFLAGS                                             \
        INIT_LOCKDEP                                                    \
+       INIT_FTRACE_GRAPH                                               \
 }
 
 
index 8a9613d0c67401c2462df62ee9225bed14d00b53..91bb76f44f14e6488e909669ba3e3e94fba1104a 100644 (file)
 #define IRQF_NOBALANCING       0x00000800
 #define IRQF_IRQPOLL           0x00001000
 
+/*
+ * Bits used by threaded handlers:
+ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
+ * IRQTF_DIED      - handler thread died
+ * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ */
+enum {
+       IRQTF_RUNTHREAD,
+       IRQTF_DIED,
+       IRQTF_WARNED,
+};
+
 typedef irqreturn_t (*irq_handler_t)(int, void *);
 
 /**
@@ -71,6 +83,9 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
  * @next:      pointer to the next irqaction for shared interrupts
  * @irq:       interrupt number
  * @dir:       pointer to the proc/irq/NN/name entry
+ * @thread_fn: interupt handler function for threaded interrupts
+ * @thread:    thread pointer for threaded interrupts
+ * @thread_flags:      flags related to @thread
  */
 struct irqaction {
        irq_handler_t handler;
@@ -81,18 +96,68 @@ struct irqaction {
        struct irqaction *next;
        int irq;
        struct proc_dir_entry *dir;
+       irq_handler_t thread_fn;
+       struct task_struct *thread;
+       unsigned long thread_flags;
 };
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
-extern int __must_check request_irq(unsigned int, irq_handler_t handler,
-                      unsigned long, const char *, void *);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+extern int __must_check
+request_threaded_irq(unsigned int irq, irq_handler_t handler,
+                    irq_handler_t thread_fn,
+                    unsigned long flags, const char *name, void *dev);
+
+static inline int __must_check
+request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+           const char *name, void *dev)
+{
+       return request_threaded_irq(irq, handler, NULL, flags, name, dev);
+}
+
+extern void exit_irq_thread(void);
+#else
+
+extern int __must_check
+request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+           const char *name, void *dev);
+
+/*
+ * Special function to avoid ifdeffery in kernel/irq/devres.c which
+ * gets magically built by GENERIC_HARDIRQS=n architectures (sparc,
+ * m68k). I really love these $@%#!* obvious Makefile references:
+ * ../../../kernel/irq/devres.o
+ */
+static inline int __must_check
+request_threaded_irq(unsigned int irq, irq_handler_t handler,
+                    irq_handler_t thread_fn,
+                    unsigned long flags, const char *name, void *dev)
+{
+       return request_irq(irq, handler, flags, name, dev);
+}
+
+static inline void exit_irq_thread(void) { }
+#endif
+
 extern void free_irq(unsigned int, void *);
 
 struct device;
 
-extern int __must_check devm_request_irq(struct device *dev, unsigned int irq,
-                           irq_handler_t handler, unsigned long irqflags,
-                           const char *devname, void *dev_id);
+extern int __must_check
+devm_request_threaded_irq(struct device *dev, unsigned int irq,
+                         irq_handler_t handler, irq_handler_t thread_fn,
+                         unsigned long irqflags, const char *devname,
+                         void *dev_id);
+
+static inline int __must_check
+devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
+                unsigned long irqflags, const char *devname, void *dev_id)
+{
+       return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
+                                        devname, dev_id);
+}
+
 extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
 
 /*
index 974890b3c52fbfe0048df3cf7d736798caf873b6..ca507c9426b00972af3254d180837d7bdb05b471 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/irqnr.h>
 #include <linux/errno.h>
 #include <linux/topology.h>
+#include <linux/wait.h>
 
 #include <asm/irq.h>
 #include <asm/ptrace.h>
@@ -158,6 +159,8 @@ struct irq_2_iommu;
  * @affinity:          IRQ affinity on SMP
  * @cpu:               cpu index useful for balancing
  * @pending_mask:      pending rebalanced interrupts
+ * @threads_active:    number of irqaction threads currently running
+ * @wait_for_threads:  wait queue for sync_irq to wait for threaded handlers
  * @dir:               /proc/irq/ procfs entry
  * @name:              flow handler name for /proc/interrupts output
  */
@@ -189,6 +192,8 @@ struct irq_desc {
        cpumask_var_t           pending_mask;
 #endif
 #endif
+       atomic_t                threads_active;
+       wait_queue_head_t       wait_for_threads;
 #ifdef CONFIG_PROC_FS
        struct proc_dir_entry   *dir;
 #endif
index c5584ca5b8c94e07ecc9340eb14255bd52606915..819acaaac3f5877bc4802bbbc928a5c21686fde2 100644 (file)
@@ -5,10 +5,12 @@
  * enum irqreturn
  * @IRQ_NONE           interrupt was not from this device
  * @IRQ_HANDLED                interrupt was handled by this device
+ * @IRQ_WAKE_THREAD    handler requests to wake the handler thread
  */
 enum irqreturn {
        IRQ_NONE,
        IRQ_HANDLED,
+       IRQ_WAKE_THREAD,
 };
 
 typedef enum irqreturn irqreturn_t;
index b94f3541f67be00802c28f22fa5b56f9e8c91e55..98e1fe51601df0066786500ba78e0648ec896287 100644 (file)
@@ -300,17 +300,11 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
                                    struct file *filp, void __user *buffer,
                                    size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
 extern int softlockup_thresh;
 #else
 static inline void softlockup_tick(void)
 {
 }
-static inline void spawn_softlockup_task(void)
-{
-}
 static inline void touch_softlockup_watchdog(void)
 {
 }
@@ -319,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void)
 }
 #endif
 
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+                                        struct file *filp, void __user *buffer,
+                                        size_t *lenp, loff_t *ppos);
+#endif
 
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched                __attribute__((__section__(".sched.text")))
@@ -1255,9 +1258,8 @@ struct task_struct {
 /* ipc stuff */
        struct sysv_sem sysvsem;
 #endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_DETECT_HUNG_TASK
 /* hung task detection */
-       unsigned long last_switch_timestamp;
        unsigned long last_switch_count;
 #endif
 /* CPU-specific state of this task */
@@ -1294,6 +1296,11 @@ struct task_struct {
 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
        spinlock_t alloc_lock;
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+       /* IRQ handler threads */
+       struct irqaction *irqaction;
+#endif
+
        /* Protection of the PI data structures: */
        spinlock_t pi_lock;
 
index bab1dffe37e94013d19795e33c2754f8ea0222c0..42423665660a3d6e0a1fc6a37dd0da997643b47f 100644 (file)
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += kgdb.o
 obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
index 32cbf2607cb0dab54f8463196aca522b29a624a5..abf9cf3b95c609f12ccb0c6992cc3a8221bdcf8b 100644 (file)
@@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code)
                schedule();
        }
 
+       exit_irq_thread();
+
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
         * tsk->flags are checked in the futex code to protect against
index 660c2b8765bce0e112c26460fe037714b6ea3ac5..989c7c202b3d831ef1aa9ad024872edd864c08ca 100644 (file)
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 
        tsk->min_flt = tsk->maj_flt = 0;
        tsk->nvcsw = tsk->nivcsw = 0;
+#ifdef CONFIG_DETECT_HUNG_TASK
+       tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+#endif
 
        tsk->mm = NULL;
        tsk->active_mm = NULL;
@@ -1032,11 +1035,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        p->default_timer_slack_ns = current->timer_slack_ns;
 
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-       p->last_switch_count = 0;
-       p->last_switch_timestamp = 0;
-#endif
-
        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);
 
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644 (file)
index 0000000..022a492
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * Detect Hung Task
+ *
+ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+/*
+ * The number of tasks checked:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+
+/*
+ * Limit number of tasks checked in a batch.
+ *
+ * This value controls the preemptibility of khungtaskd since preemption
+ * is disabled during the critical section. It also controls the size of
+ * the RCU grace period. So it needs to be upper-bound.
+ */
+#define HUNG_TASK_BATCHING 1024
+
+/*
+ * Zero means infinite timeout - no checking done:
+ */
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+
+unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+
+static int __read_mostly did_panic;
+
+static struct task_struct *watchdog_task;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * hung task is detected:
+ */
+unsigned int __read_mostly sysctl_hung_task_panic =
+                               CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+
+static int __init hung_task_panic_setup(char *str)
+{
+       sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+
+       return 1;
+}
+__setup("hung_task_panic=", hung_task_panic_setup);
+
+static int
+hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       did_panic = 1;
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+       .notifier_call = hung_task_panic,
+};
+
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
+{
+       unsigned long switch_count = t->nvcsw + t->nivcsw;
+
+       /*
+        * Ensure the task is not frozen.
+        * Also, when a freshly created task is scheduled once, changes
+        * its state to TASK_UNINTERRUPTIBLE without having ever been
+        * switched out once, it musn't be checked.
+        */
+       if (unlikely(t->flags & PF_FROZEN || !switch_count))
+               return;
+
+       if (switch_count != t->last_switch_count) {
+               t->last_switch_count = switch_count;
+               return;
+       }
+       if (!sysctl_hung_task_warnings)
+               return;
+       sysctl_hung_task_warnings--;
+
+       /*
+        * Ok, the task did not get scheduled for more than 2 minutes,
+        * complain:
+        */
+       printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+                       "%ld seconds.\n", t->comm, t->pid, timeout);
+       printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+                       " disables this message.\n");
+       sched_show_task(t);
+       __debug_show_held_locks(t);
+
+       touch_nmi_watchdog();
+
+       if (sysctl_hung_task_panic)
+               panic("hung_task: blocked tasks");
+}
+
+/*
+ * To avoid extending the RCU grace period for an unbounded amount of time,
+ * periodically exit the critical section and enter a new one.
+ *
+ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
+ * exit the grace period. For classic RCU, a reschedule is required.
+ */
+static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+{
+       get_task_struct(g);
+       get_task_struct(t);
+       rcu_read_unlock();
+       cond_resched();
+       rcu_read_lock();
+       put_task_struct(t);
+       put_task_struct(g);
+}
+
+/*
+ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+ * a really long time (120 seconds). If that happens, print out
+ * a warning.
+ */
+static void check_hung_uninterruptible_tasks(unsigned long timeout)
+{
+       int max_count = sysctl_hung_task_check_count;
+       int batch_count = HUNG_TASK_BATCHING;
+       struct task_struct *g, *t;
+
+       /*
+        * If the system crashed already then all bets are off,
+        * do not report extra hung tasks:
+        */
+       if (test_taint(TAINT_DIE) || did_panic)
+               return;
+
+       rcu_read_lock();
+       do_each_thread(g, t) {
+               if (!--max_count)
+                       goto unlock;
+               if (!--batch_count) {
+                       batch_count = HUNG_TASK_BATCHING;
+                       rcu_lock_break(g, t);
+                       /* Exit if t or g was unhashed during refresh. */
+                       if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+                               goto unlock;
+               }
+               /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+               if (t->state == TASK_UNINTERRUPTIBLE)
+                       check_hung_task(t, timeout);
+       } while_each_thread(g, t);
+ unlock:
+       rcu_read_unlock();
+}
+
+static unsigned long timeout_jiffies(unsigned long timeout)
+{
+       /* timeout of 0 will disable the watchdog */
+       return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
+
+/*
+ * Process updating of timeout sysctl
+ */
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+                                 struct file *filp, void __user *buffer,
+                                 size_t *lenp, loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+       if (ret || !write)
+               goto out;
+
+       wake_up_process(watchdog_task);
+
+ out:
+       return ret;
+}
+
+/*
+ * kthread which checks for tasks stuck in D state
+ */
+static int watchdog(void *dummy)
+{
+       set_user_nice(current, 0);
+
+       for ( ; ; ) {
+               unsigned long timeout = sysctl_hung_task_timeout_secs;
+
+               while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+                       timeout = sysctl_hung_task_timeout_secs;
+
+               check_hung_uninterruptible_tasks(timeout);
+       }
+
+       return 0;
+}
+
+static int __init hung_task_init(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+       watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+
+       return 0;
+}
+
+module_init(hung_task_init);
index 38a25b8d8bff6d15448270d4f56983537335fc2d..d06df9c41cba2cf4aee477737e341ad98d6c1493 100644 (file)
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
 }
 
 /**
- *     devm_request_irq - allocate an interrupt line for a managed device
+ *     devm_request_threaded_irq - allocate an interrupt line for a managed device
  *     @dev: device to request interrupt for
  *     @irq: Interrupt line to allocate
  *     @handler: Function to be called when the IRQ occurs
+ *     @thread_fn: function to be called in a threaded interrupt context. NULL
+ *                 for devices which handle everything in @handler
  *     @irqflags: Interrupt type flags
  *     @devname: An ascii name for the claiming device
  *     @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
  *     If an IRQ allocated with this function needs to be freed
  *     separately, dev_free_irq() must be used.
  */
-int devm_request_irq(struct device *dev, unsigned int irq,
-                    irq_handler_t handler, unsigned long irqflags,
-                    const char *devname, void *dev_id)
+int devm_request_threaded_irq(struct device *dev, unsigned int irq,
+                             irq_handler_t handler, irq_handler_t thread_fn,
+                             unsigned long irqflags, const char *devname,
+                             void *dev_id)
 {
        struct irq_devres *dr;
        int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
        if (!dr)
                return -ENOMEM;
 
-       rc = request_irq(irq, handler, irqflags, devname, dev_id);
+       rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
+                                 dev_id);
        if (rc) {
                devres_free(dr);
                return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
 
        return 0;
 }
-EXPORT_SYMBOL(devm_request_irq);
+EXPORT_SYMBOL(devm_request_threaded_irq);
 
 /**
  *     devm_free_irq - free an interrupt
index 343acecae629ff37c32013daede12da10b21f7fb..d82142be8dd2c8fddc340058ff9cd73373f6a31b 100644 (file)
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
        return IRQ_NONE;
 }
 
+static void warn_no_thread(unsigned int irq, struct irqaction *action)
+{
+       if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
+               return;
+
+       printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
+              "but no thread function available.", irq, action->name);
+}
+
 DEFINE_TRACE(irq_handler_entry);
 DEFINE_TRACE(irq_handler_exit);
 
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
                trace_irq_handler_entry(irq, action);
                ret = action->handler(irq, action->dev_id);
                trace_irq_handler_exit(irq, action, ret);
-               if (ret == IRQ_HANDLED)
+
+               switch (ret) {
+               case IRQ_WAKE_THREAD:
+                       /*
+                        * Set result to handled so the spurious check
+                        * does not trigger.
+                        */
+                       ret = IRQ_HANDLED;
+
+                       /*
+                        * Catch drivers which return WAKE_THREAD but
+                        * did not set up a thread function
+                        */
+                       if (unlikely(!action->thread_fn)) {
+                               warn_no_thread(irq, action);
+                               break;
+                       }
+
+                       /*
+                        * Wake up the handler thread for this
+                        * action. In case the thread crashed and was
+                        * killed we just pretend that we handled the
+                        * interrupt. The hardirq handler above has
+                        * disabled the device interrupt, so no irq
+                        * storm is lurking.
+                        */
+                       if (likely(!test_bit(IRQTF_DIED,
+                                            &action->thread_flags))) {
+                               set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+                               wake_up_process(action->thread);
+                       }
+
+                       /* Fall through to add to randomness */
+               case IRQ_HANDLED:
                        status |= action->flags;
+                       break;
+
+               default:
+                       break;
+               }
+
                retval |= ret;
                action = action->next;
        } while (action);
index 1516ab77355c928bd1e1f5c8b6759e53d3b0c7e3..7e2e7dd4cd2f70e5d619f893161225e230bf487c 100644 (file)
@@ -8,16 +8,15 @@
  */
 
 #include <linux/irq.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 
 #include "internals.h"
 
-#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
-cpumask_var_t irq_default_affinity;
-
 /**
  *     synchronize_irq - wait for pending IRQ handlers (on other CPUs)
  *     @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
 
                /* Oops, that failed? */
        } while (status & IRQ_INPROGRESS);
+
+       /*
+        * We made sure that no hardirq handler is running. Now verify
+        * that no threaded handlers are active.
+        */
+       wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
 }
 EXPORT_SYMBOL(synchronize_irq);
 
+#ifdef CONFIG_SMP
+cpumask_var_t irq_default_affinity;
+
 /**
  *     irq_can_set_affinity - Check if the affinity of a given irq can be set
  *     @irq:           Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
        return 1;
 }
 
+static void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+{
+       struct irqaction *action = desc->action;
+
+       while (action) {
+               if (action->thread)
+                       set_cpus_allowed_ptr(action->thread, cpumask);
+               action = action->next;
+       }
+}
+
 /**
  *     irq_set_affinity - Set the irq affinity of a given irq
  *     @irq:           Interrupt to set affinity
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
        cpumask_copy(desc->affinity, cpumask);
        desc->chip->set_affinity(irq, cpumask);
 #endif
+       irq_set_thread_affinity(desc, cpumask);
        desc->status |= IRQ_AFFINITY_SET;
        spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
 
        spin_lock_irqsave(&desc->lock, flags);
        ret = setup_affinity(irq, desc);
+       if (!ret)
+               irq_set_thread_affinity(desc, desc->affinity);
        spin_unlock_irqrestore(&desc->lock, flags);
 
        return ret;
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
        return ret;
 }
 
+static int irq_wait_for_interrupt(struct irqaction *action)
+{
+       while (!kthread_should_stop()) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (test_and_clear_bit(IRQTF_RUNTHREAD,
+                                      &action->thread_flags)) {
+                       __set_current_state(TASK_RUNNING);
+                       return 0;
+               }
+               schedule();
+       }
+       return -1;
+}
+
+/*
+ * Interrupt handler thread
+ */
+static int irq_thread(void *data)
+{
+       struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+       struct irqaction *action = data;
+       struct irq_desc *desc = irq_to_desc(action->irq);
+       int wake;
+
+       sched_setscheduler(current, SCHED_FIFO, &param);
+       current->irqaction = action;
+
+       while (!irq_wait_for_interrupt(action)) {
+
+               atomic_inc(&desc->threads_active);
+
+               spin_lock_irq(&desc->lock);
+               if (unlikely(desc->status & IRQ_DISABLED)) {
+                       /*
+                        * CHECKME: We might need a dedicated
+                        * IRQ_THREAD_PENDING flag here, which
+                        * retriggers the thread in check_irq_resend()
+                        * but AFAICT IRQ_PENDING should be fine as it
+                        * retriggers the interrupt itself --- tglx
+                        */
+                       desc->status |= IRQ_PENDING;
+                       spin_unlock_irq(&desc->lock);
+               } else {
+                       spin_unlock_irq(&desc->lock);
+
+                       action->thread_fn(action->irq, action->dev_id);
+               }
+
+               wake = atomic_dec_and_test(&desc->threads_active);
+
+               if (wake && waitqueue_active(&desc->wait_for_threads))
+                       wake_up(&desc->wait_for_threads);
+       }
+
+       /*
+        * Clear irqaction. Otherwise exit_irq_thread() would make
+        * fuzz about an active irq thread going into nirvana.
+        */
+       current->irqaction = NULL;
+       return 0;
+}
+
+/*
+ * Called from do_exit()
+ */
+void exit_irq_thread(void)
+{
+       struct task_struct *tsk = current;
+
+       if (!tsk->irqaction)
+               return;
+
+       printk(KERN_ERR
+              "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+              tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+
+       /*
+        * Set the THREAD DIED flag to prevent further wakeups of the
+        * soon to be gone threaded handler.
+        */
+       set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+}
+
 /*
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
@@ -436,6 +543,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                rand_initialize_irq(irq);
        }
 
+       /*
+        * Threaded handler ?
+        */
+       if (new->thread_fn) {
+               struct task_struct *t;
+
+               t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+                                  new->name);
+               if (IS_ERR(t))
+                       return PTR_ERR(t);
+               /*
+                * We keep the reference to the task struct even if
+                * the thread dies to avoid that the interrupt code
+                * references an already freed task_struct.
+                */
+               get_task_struct(t);
+               new->thread = t;
+               wake_up_process(t);
+       }
+
        /*
         * The following block of code has to be executed atomically
         */
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        if (!shared) {
                irq_chip_set_defaults(desc->chip);
 
+               init_waitqueue_head(&desc->wait_for_threads);
+
                /* Setup the type (level, edge polarity) if configured: */
                if (new->flags & IRQF_TRIGGER_MASK) {
                        ret = __irq_set_trigger(desc, irq,
                                        new->flags & IRQF_TRIGGER_MASK);
 
-                       if (ret) {
-                               spin_unlock_irqrestore(&desc->lock, flags);
-                               return ret;
-                       }
+                       if (ret)
+                               goto out_thread;
                } else
                        compat_irq_chip_set_default_handler(desc);
 #if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +676,19 @@ mismatch:
                dump_stack();
        }
 #endif
+       ret = -EBUSY;
+
+out_thread:
        spin_unlock_irqrestore(&desc->lock, flags);
-       return -EBUSY;
+       if (new->thread) {
+               struct task_struct *t = new->thread;
+
+               new->thread = NULL;
+               if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
+                       kthread_stop(t);
+               put_task_struct(t);
+       }
+       return ret;
 }
 
 /**
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irqaction *action, **action_ptr;
+       struct task_struct *irqthread;
        unsigned long flags;
 
        WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                else
                        desc->chip->disable(irq);
        }
+
+       irqthread = action->thread;
+       action->thread = NULL;
+
        spin_unlock_irqrestore(&desc->lock, flags);
 
        unregister_handler_proc(irq, action);
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
        /* Make sure it's not being used on another CPU: */
        synchronize_irq(irq);
 
+       if (irqthread) {
+               if (!test_bit(IRQTF_DIED, &action->thread_flags))
+                       kthread_stop(irqthread);
+               put_task_struct(irqthread);
+       }
+
 #ifdef CONFIG_DEBUG_SHIRQ
        /*
         * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
 EXPORT_SYMBOL(free_irq);
 
 /**
- *     request_irq - allocate an interrupt line
+ *     request_threaded_irq - allocate an interrupt line
  *     @irq: Interrupt line to allocate
- *     @handler: Function to be called when the IRQ occurs
+ *     @handler: Function to be called when the IRQ occurs.
+ *               Primary handler for threaded interrupts
+ *     @thread_fn: Function called from the irq handler thread
+ *                 If NULL, no irq thread is created
  *     @irqflags: Interrupt type flags
  *     @devname: An ascii name for the claiming device
  *     @dev_id: A cookie passed back to the handler function
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
  *     raises, you must take care both to initialise your hardware
  *     and to set up the interrupt handler in the right order.
  *
+ *     If you want to set up a threaded irq handler for your device
+ *     then you need to supply @handler and @thread_fn. @handler ist
+ *     still called in hard interrupt context and has to check
+ *     whether the interrupt originates from the device. If yes it
+ *     needs to disable the interrupt on the device and return
+ *     IRQ_THREAD_WAKE which will wake up the handler thread and run
+ *     @thread_fn. This split handler design is necessary to support
+ *     shared interrupts.
+ *
  *     Dev_id must be globally unique. Normally the address of the
  *     device data structure is used as the cookie. Since the handler
  *     receives this value it makes sense to use it.
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
  *     IRQF_TRIGGER_*          Specify active edge(s) or level
  *
  */
-int request_irq(unsigned int irq, irq_handler_t handler,
-               unsigned long irqflags, const char *devname, void *dev_id)
+int request_threaded_irq(unsigned int irq, irq_handler_t handler,
+                        irq_handler_t thread_fn, unsigned long irqflags,
+                        const char *devname, void *dev_id)
 {
        struct irqaction *action;
        struct irq_desc *desc;
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
                return -ENOMEM;
 
        action->handler = handler;
+       action->thread_fn = thread_fn;
        action->flags = irqflags;
        action->name = devname;
        action->dev_id = dev_id;
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 #endif
        return retval;
 }
-EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(request_threaded_irq);
index 85d5a2455103397015973321359a45e7d6d1928d..88796c330838dd66c24b8628f8b56ce27d3a3d23 100644 (file)
@@ -165,98 +165,12 @@ void softlockup_tick(void)
                panic("softlockup: hung tasks");
 }
 
-/*
- * Have a reasonable limit on the number of tasks checked:
- */
-unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
-
-/*
- * Zero means infinite timeout - no checking done:
- */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
-
-unsigned long __read_mostly sysctl_hung_task_warnings = 10;
-
-/*
- * Only do the hung-tasks check on one CPU:
- */
-static int check_cpu __read_mostly = -1;
-
-static void check_hung_task(struct task_struct *t, unsigned long now)
-{
-       unsigned long switch_count = t->nvcsw + t->nivcsw;
-
-       if (t->flags & PF_FROZEN)
-               return;
-
-       if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
-               t->last_switch_count = switch_count;
-               t->last_switch_timestamp = now;
-               return;
-       }
-       if ((long)(now - t->last_switch_timestamp) <
-                                       sysctl_hung_task_timeout_secs)
-               return;
-       if (!sysctl_hung_task_warnings)
-               return;
-       sysctl_hung_task_warnings--;
-
-       /*
-        * Ok, the task did not get scheduled for more than 2 minutes,
-        * complain:
-        */
-       printk(KERN_ERR "INFO: task %s:%d blocked for more than "
-                       "%ld seconds.\n", t->comm, t->pid,
-                       sysctl_hung_task_timeout_secs);
-       printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
-                       " disables this message.\n");
-       sched_show_task(t);
-       __debug_show_held_locks(t);
-
-       t->last_switch_timestamp = now;
-       touch_nmi_watchdog();
-
-       if (softlockup_panic)
-               panic("softlockup: blocked tasks");
-}
-
-/*
- * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
- * a really long time (120 seconds). If that happens, print out
- * a warning.
- */
-static void check_hung_uninterruptible_tasks(int this_cpu)
-{
-       int max_count = sysctl_hung_task_check_count;
-       unsigned long now = get_timestamp(this_cpu);
-       struct task_struct *g, *t;
-
-       /*
-        * If the system crashed already then all bets are off,
-        * do not report extra hung tasks:
-        */
-       if (test_taint(TAINT_DIE) || did_panic)
-               return;
-
-       read_lock(&tasklist_lock);
-       do_each_thread(g, t) {
-               if (!--max_count)
-                       goto unlock;
-               /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
-               if (t->state == TASK_UNINTERRUPTIBLE)
-                       check_hung_task(t, now);
-       } while_each_thread(g, t);
- unlock:
-       read_unlock(&tasklist_lock);
-}
-
 /*
  * The watchdog thread - runs every second and touches the timestamp.
  */
 static int watchdog(void *__bind_cpu)
 {
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-       int this_cpu = (long)__bind_cpu;
 
        sched_setscheduler(current, SCHED_FIFO, &param);
 
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
                if (kthread_should_stop())
                        break;
 
-               if (this_cpu == check_cpu) {
-                       if (sysctl_hung_task_timeout_secs)
-                               check_hung_uninterruptible_tasks(this_cpu);
-               }
-
                set_current_state(TASK_INTERRUPTIBLE);
        }
        __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-               check_cpu = cpumask_any(cpu_online_mask);
                wake_up_process(per_cpu(watchdog_task, hotcpu));
                break;
 #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               if (hotcpu == check_cpu) {
-                       /* Pick any other online cpu. */
-                       check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
-               }
-               break;
-
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
                if (!per_cpu(watchdog_task, hotcpu))
index 72eb1a41dcabef2a7d46ab7199cae10edcc3aa5c..4286b62b34a0a2d0f9a00f96ae2e8822647238bf 100644 (file)
@@ -814,6 +814,19 @@ static struct ctl_table kern_table[] = {
                .extra1         = &neg_one,
                .extra2         = &sixty,
        },
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "hung_task_panic",
+               .data           = &sysctl_hung_task_panic,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        {
                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "hung_task_check_count",
@@ -829,7 +842,7 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_hung_task_timeout_secs,
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = &proc_doulongvec_minmax,
+               .proc_handler   = &proc_dohung_task_timeout_secs,
                .strategy       = &sysctl_intvec,
        },
        {
index 947c5b3f90c4ce42dd511b04bd9b28ded2842ac3..b32ff446c3fb3c1d4e88829ac46a9e0adc705031 100644 (file)
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
        char *msg;
        struct blk_trace *bt;
 
-       if (count > BLK_TN_MAX_MSG)
+       if (count >= BLK_TN_MAX_MSG)
                return -EINVAL;
 
-       msg = kmalloc(count, GFP_KERNEL);
+       msg = kmalloc(count + 1, GFP_KERNEL);
        if (msg == NULL)
                return -ENOMEM;
 
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
                return -EFAULT;
        }
 
+       msg[count] = '\0';
        bt = filp->private_data;
        __trace_note_message(bt, "%s", msg);
        kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
        if (blk_pc_request(rq)) {
                what |= BLK_TC_ACT(BLK_TC_PC);
                __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
-                               sizeof(rq->cmd), rq->cmd);
+                               rq->cmd_len, rq->cmd);
        } else  {
                what |= BLK_TC_ACT(BLK_TC_FS);
                __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
index a0174a40c563a5141de2d33735ae0f16c791a6ae..9d28476a9851d9f1f9136fe369b4d326e140fe47 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/percpu.h>
 #include <linux/splice.h>
 #include <linux/kdebug.h>
+#include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
 {
        nsec += 500;
        do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
                return;
 
        cpumask_set_cpu(iter->cpu, iter->started);
-       trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+       /* Don't print started cpu buffer for the first entry of the trace */
+       if (iter->idx > 1)
+               trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+                               iter->cpu);
 }
 
 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
        if (current_trace)
                *iter->trace = *current_trace;
 
+       if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+               goto fail;
+
+       cpumask_clear(iter->started);
+
        if (current_trace && current_trace->print_max)
                iter->tr = &max_tr;
        else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
                if (iter->buffer_iter[cpu])
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
+       free_cpumask_var(iter->started);
  fail:
        mutex_unlock(&trace_types_lock);
        kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
 
        seq_release(inode, file);
        mutex_destroy(&iter->mutex);
+       free_cpumask_var(iter->started);
        kfree(iter->trace);
        kfree(iter);
        return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
        "# mkdir /debug\n"
        "# mount -t debugfs nodev /debug\n\n"
        "# cat /debug/tracing/available_tracers\n"
-       "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+       "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
        "# cat /debug/tracing/current_tracer\n"
-       "none\n"
+       "nop\n"
        "# echo sched_switch > /debug/tracing/current_tracer\n"
        "# cat /debug/tracing/current_tracer\n"
        "sched_switch\n"
index cbc168f1e43db4e78e41a990cd5b0265cf9a96d4..e685ac2b2ba10f1dcf24fef92180d94b75f76367 100644 (file)
@@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
 extern int
 trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
 extern int
index 4d9952d3df5076edddeefd723b0e4904fdc9c699..07a22c33ebf3c31b4d07b7125a9062118ca90490 100644 (file)
@@ -40,7 +40,7 @@
 
 #undef TRACE_FIELD_ZERO_CHAR
 #define TRACE_FIELD_ZERO_CHAR(item)                                    \
-       ret = trace_seq_printf(s, "\tfield: char " #item ";\t"          \
+       ret = trace_seq_printf(s, "\tfield:char " #item ";\t"           \
                               "offset:%u;\tsize:0;\n",                 \
                               (unsigned int)offsetof(typeof(field), item)); \
        if (!ret)                                                       \
index d72b9a63b2476ca5cf01e43dd06c5ec1b347069e..64b54a59c55b585ff3b979f216ba0451273ee6e4 100644 (file)
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
 
                trace_find_cmdline(entry->pid, comm);
 
-               ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
+               ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
                                       " %ld.%03ldms (+%ld.%03ldms): ", comm,
                                       entry->pid, iter->cpu, entry->flags,
                                       entry->preempt_count, iter->idx,
index de35f200abd37321944f1fae494146aee60e3a6f..9117cea6f1ae78f17f2be7fc9894aa9b0abe2075 100644 (file)
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
        pc = preempt_count();
        tracing_record_cmdline(current);
 
+       if (sched_stopped)
+               return;
+
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
        data = ctx_trace->data[cpu];
index 3c5ad6b2ec846256472d83348dbe5747d034c963..5bc00e8f153ebb8682589caa37b553b05800f6b0 100644 (file)
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        if (unlikely(!tracer_enabled || next != wakeup_task))
                goto out_unlock;
 
-       trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+       trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
        tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
 
        /*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
        data = wakeup_trace->data[wakeup_cpu];
        data->preempt_timestamp = ftrace_now(cpu);
        tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+
+       /*
+        * We must be careful in using CALLER_ADDR2. But since wake_up
+        * is not called by an assembly function  (where as schedule is)
+        * it should be safe to use it here.
+        */
        trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
index 9638d99644afe4b5ff04d4ac8fcd194d1b67f614..c6e854f215fad96cc02392cbf30d575b366e05ff 100644 (file)
@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
        default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
        default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
 
+config DETECT_HUNG_TASK
+       bool "Detect Hung Tasks"
+       depends on DEBUG_KERNEL
+       default DETECT_SOFTLOCKUP
+       help
+         Say Y here to enable the kernel to detect "hung tasks",
+         which are bugs that cause the task to be stuck in
+         uninterruptible "D" state indefinitiley.
+
+         When a hung task is detected, the kernel will print the
+         current stack trace (which you should report), but the
+         task will stay in uninterruptible state. If lockdep is
+         enabled then all held locks will also be reported. This
+         feature has negligible overhead.
+
+config BOOTPARAM_HUNG_TASK_PANIC
+       bool "Panic (Reboot) On Hung Tasks"
+       depends on DETECT_HUNG_TASK
+       help
+         Say Y here to enable the kernel to panic on "hung tasks",
+         which are bugs that cause the kernel to leave a task stuck
+         in uninterruptible "D" state.
+
+         The panic can be used in combination with panic_timeout,
+         to cause the system to reboot automatically after a
+         hung task has been detected. This feature is useful for
+         high-availability systems that have uptime guarantees and
+         where a hung tasks must be resolved ASAP.
+
+         Say N if unsure.
+
+config BOOTPARAM_HUNG_TASK_PANIC_VALUE
+       int
+       depends on DETECT_HUNG_TASK
+       range 0 1
+       default 0 if !BOOTPARAM_HUNG_TASK_PANIC
+       default 1 if BOOTPARAM_HUNG_TASK_PANIC
+
 config SCHED_DEBUG
        bool "Collect scheduler debugging info"
        depends on DEBUG_KERNEL && PROC_FS