VERSION = 2
PATCHLEVEL = 6
-SUBLEVEL = 29
-EXTRAVERSION =
+SUBLEVEL = 30
+EXTRAVERSION = -rc1
NAME = Temporary Tasmanian Devil
# *DOCUMENTATION*
* CPUID levels like 0x6, 0xA etc
*/
#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */
/* Virtualization flags: Linux defined */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) {
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+ /* Make LAPIC timer preferrable over percpu HPET */
+ lapic_clockevent.rating = 150;
+ }
+
memcpy(levt, &lapic_clockevent, sizeof(*levt));
levt->cpumask = cpumask_of(smp_processor_id());
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
{ 0, 0, 0, 0 }
};
unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
+ u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
return cmd.val;
}
-struct perf_cur {
+struct perf_pair {
union {
struct {
u32 lo;
u32 hi;
} split;
u64 whole;
- } aperf_cur, mperf_cur;
+ } aperf, mperf;
};
static long read_measured_perf_ctrs(void *_cur)
{
- struct perf_cur *cur = _cur;
+ struct perf_pair *cur = _cur;
- rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
- rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
-
- wrmsr(MSR_IA32_APERF, 0, 0);
- wrmsr(MSR_IA32_MPERF, 0, 0);
+ rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
+ rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
return 0;
}
static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int cpu)
{
- struct perf_cur cur;
+ struct perf_pair readin, cur;
unsigned int perf_percent;
unsigned int retval;
- if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
+ if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin))
return 0;
+ cur.aperf.whole = readin.aperf.whole -
+ per_cpu(drv_data, cpu)->saved_aperf;
+ cur.mperf.whole = readin.mperf.whole -
+ per_cpu(drv_data, cpu)->saved_mperf;
+ per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole;
+ per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole;
+
#ifdef __i386__
/*
* We dont want to do 64 bit divide with 32 bit kernel
* Get an approximate value. Return failure in case we cannot get
* an approximate value.
*/
- if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
+ if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
int shift_count;
u32 h;
- h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
+ h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
shift_count = fls(h);
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
+ if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
- cur.aperf_cur.split.lo >>= shift_count;
- cur.mperf_cur.split.lo >>= shift_count;
+ cur.aperf.split.lo >>= shift_count;
+ cur.mperf.split.lo >>= shift_count;
}
- if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
- perf_percent = (cur.aperf_cur.split.lo * 100) /
- cur.mperf_cur.split.lo;
+ if (cur.aperf.split.lo && cur.mperf.split.lo)
+ perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
perf_percent = 0;
#else
- if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
+ if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
int shift_count = 7;
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (cur.aperf_cur.whole && cur.mperf_cur.whole)
- perf_percent = (cur.aperf_cur.whole * 100) /
- cur.mperf_cur.whole;
+ if (cur.aperf.whole && cur.mperf.whole)
+ perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
else
perf_percent = 0;
#include <linux/timex.h>
#include <linux/io.h>
#include <linux/acpi.h>
-#include <linux/kernel.h>
#include <asm/msr.h>
#include <acpi/processor.h>
#include <asm/cacheflush.h>
#include <asm/ftrace.h>
-#include <linux/ftrace.h>
#include <asm/nops.h>
#include <asm/nmi.h>
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
rq->cmd = rq->__cmd;
+ rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1;
rq->ref_count = 1;
}
{"PIT2", 0x0048, 0x004B, ACPI_OSI_WIN_XP},
{"RTC", 0x0070, 0x0071, ACPI_OSI_WIN_XP},
{"CMOS", 0x0074, 0x0076, ACPI_OSI_WIN_XP},
- {"DMA1", 0x0081, 0x0083, ACPI_OSI_WIN_XP},
{"DMA1L", 0x0087, 0x0087, ACPI_OSI_WIN_XP},
{"DMA2", 0x0089, 0x008B, ACPI_OSI_WIN_XP},
{"DMA2L", 0x008F, 0x008F, ACPI_OSI_WIN_XP},
},
};
-static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie)
+static void acpi_battery_init_async(void *unused, async_cookie_t cookie)
{
if (acpi_disabled)
return;
}
#endif /* HAVE_ACPI_LEGACY_ALARM */
-extern struct list_head acpi_wakeup_device_list;
-extern spinlock_t acpi_device_lock;
-
static int
acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
{
seq_printf(seq, "Device\tS-state\t Status Sysfs node\n");
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
if (!dev->wakeup.flags.valid)
continue;
- spin_unlock(&acpi_device_lock);
ldev = acpi_get_physical_device(dev->handle);
seq_printf(seq, "%s\t S%d\t%c%-8s ",
seq_printf(seq, "\n");
put_device(ldev);
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return 0;
}
strbuf[len] = '\0';
sscanf(strbuf, "%s", str);
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
}
}
}
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return count;
}
struct acpi_processor_power *pwr = &pr->power;
u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
+ if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
+ return;
+
/*
* Check, if one of the previous states already marked the lapic
* unstable
static LIST_HEAD(acpi_device_list);
static LIST_HEAD(acpi_bus_id_list);
-DEFINE_SPINLOCK(acpi_device_lock);
+DEFINE_MUTEX(acpi_device_lock);
LIST_HEAD(acpi_wakeup_device_list);
struct acpi_device_bus_id{
*/
INIT_LIST_HEAD(&device->children);
INIT_LIST_HEAD(&device->node);
- INIT_LIST_HEAD(&device->g_list);
INIT_LIST_HEAD(&device->wakeup_list);
new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL);
return -ENOMEM;
}
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
/*
* Find suitable bus_id and instance number in acpi_bus_id_list
* If failed, create one and link it into acpi_bus_id_list
}
dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, acpi_device_bus_id->instance_no);
- if (device->parent) {
+ if (device->parent)
list_add_tail(&device->node, &device->parent->children);
- list_add_tail(&device->g_list, &device->parent->g_list);
- } else
- list_add_tail(&device->g_list, &acpi_device_list);
+
if (device->wakeup.flags.valid)
list_add_tail(&device->wakeup_list, &acpi_wakeup_device_list);
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
if (device->parent)
device->dev.parent = &parent->dev;
device->removal_type = ACPI_BUS_REMOVAL_NORMAL;
return 0;
end:
- spin_lock(&acpi_device_lock);
- if (device->parent) {
+ mutex_lock(&acpi_device_lock);
+ if (device->parent)
list_del(&device->node);
- list_del(&device->g_list);
- } else
- list_del(&device->g_list);
list_del(&device->wakeup_list);
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return result;
}
static void acpi_device_unregister(struct acpi_device *device, int type)
{
- spin_lock(&acpi_device_lock);
- if (device->parent) {
+ mutex_lock(&acpi_device_lock);
+ if (device->parent)
list_del(&device->node);
- list_del(&device->g_list);
- } else
- list_del(&device->g_list);
list_del(&device->wakeup_list);
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
acpi_detach_data(device->handle, acpi_bus_data_handler);
extern void acpi_enable_wakeup_device_prep(u8 sleep_state);
extern void acpi_enable_wakeup_device(u8 sleep_state);
extern void acpi_disable_wakeup_device(u8 sleep_state);
+
+extern struct list_head acpi_wakeup_device_list;
+extern struct mutex acpi_device_lock;
struct acpi_handle_list devices;
struct thermal_zone_device *thermal_zone;
int tz_enabled;
+ int kelvin_offset;
struct mutex lock;
};
}
/* sys I/F for generic thermal sysfs support */
-#define KELVIN_TO_MILLICELSIUS(t) (t * 100 - 273200)
+#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
static int thermal_get_temp(struct thermal_zone_device *thermal,
unsigned long *temp)
if (result)
return result;
- *temp = KELVIN_TO_MILLICELSIUS(tz->temperature);
+ *temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset);
return 0;
}
if (tz->trips.critical.flags.valid) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.critical.temperature);
+ tz->trips.critical.temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
if (tz->trips.hot.flags.valid) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.hot.temperature);
+ tz->trips.hot.temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
if (tz->trips.passive.flags.valid) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.passive.temperature);
+ tz->trips.passive.temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
tz->trips.active[i].flags.valid; i++) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.active[i].temperature);
+ tz->trips.active[i].temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
if (tz->trips.critical.flags.valid) {
*temperature = KELVIN_TO_MILLICELSIUS(
- tz->trips.critical.temperature);
+ tz->trips.critical.temperature,
+ tz->kelvin_offset);
return 0;
} else
return -EINVAL;
return 0;
}
+/*
+ * The exact offset between Kelvin and degree Celsius is 273.15. However ACPI
+ * handles temperature values with a single decimal place. As a consequence,
+ * some implementations use an offset of 273.1 and others use an offset of
+ * 273.2. Try to find out which one is being used, to present the most
+ * accurate and visually appealing number.
+ *
+ * The heuristic below should work for all ACPI thermal zones which have a
+ * critical trip point with a value being a multiple of 0.5 degree Celsius.
+ */
+static void acpi_thermal_guess_offset(struct acpi_thermal *tz)
+{
+ if (tz->trips.critical.flags.valid &&
+ (tz->trips.critical.temperature % 5) == 1)
+ tz->kelvin_offset = 2731;
+ else
+ tz->kelvin_offset = 2732;
+}
+
static int acpi_thermal_add(struct acpi_device *device)
{
int result = 0;
if (result)
goto free_memory;
+ acpi_thermal_guess_offset(tz);
+
result = acpi_thermal_register_thermal_zone(tz);
if (result)
goto free_memory;
#include "internal.h"
#include "sleep.h"
+/*
+ * We didn't lock acpi_device_lock in the file, because it invokes oops in
+ * suspend/resume and isn't really required as this is called in S-state. At
+ * that time, there is no device hotplug
+ **/
#define _COMPONENT ACPI_SYSTEM_COMPONENT
ACPI_MODULE_NAME("wakeup_devices")
-extern struct list_head acpi_wakeup_device_list;
-extern spinlock_t acpi_device_lock;
-
/**
* acpi_enable_wakeup_device_prep - prepare wakeup devices
* @sleep_state: ACPI state
{
struct list_head *node, *next;
- spin_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev = container_of(node,
struct acpi_device,
(sleep_state > (u32) dev->wakeup.sleep_state))
continue;
- spin_unlock(&acpi_device_lock);
acpi_enable_wakeup_device_power(dev, sleep_state);
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
}
/**
* Caution: this routine must be invoked when interrupt is disabled
* Refer ACPI2.0: P212
*/
- spin_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
|| sleep_state > (u32) dev->wakeup.sleep_state) {
if (dev->wakeup.flags.run_wake) {
- spin_unlock(&acpi_device_lock);
/* set_gpe_type will disable GPE, leave it like that */
acpi_set_gpe_type(dev->wakeup.gpe_device,
dev->wakeup.gpe_number,
ACPI_GPE_TYPE_RUNTIME);
- spin_lock(&acpi_device_lock);
}
continue;
}
- spin_unlock(&acpi_device_lock);
if (!dev->wakeup.flags.run_wake)
acpi_enable_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number);
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
}
/**
{
struct list_head *node, *next;
- spin_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
|| sleep_state > (u32) dev->wakeup.sleep_state) {
if (dev->wakeup.flags.run_wake) {
- spin_unlock(&acpi_device_lock);
acpi_set_gpe_type(dev->wakeup.gpe_device,
dev->wakeup.gpe_number,
ACPI_GPE_TYPE_WAKE_RUN);
/* Re-enable it, since set_gpe_type will disable it */
acpi_enable_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number);
- spin_lock(&acpi_device_lock);
}
continue;
}
- spin_unlock(&acpi_device_lock);
acpi_disable_wakeup_device_power(dev);
/* Never disable run-wake GPE */
if (!dev->wakeup.flags.run_wake) {
acpi_clear_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number, ACPI_NOT_ISR);
}
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
}
int __init acpi_wakeup_device_init(void)
{
struct list_head *node, *next;
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev = container_of(node,
struct acpi_device,
/* In case user doesn't load button driver */
if (!dev->wakeup.flags.run_wake || dev->wakeup.state.enabled)
continue;
- spin_unlock(&acpi_device_lock);
acpi_set_gpe_type(dev->wakeup.gpe_device,
dev->wakeup.gpe_number,
ACPI_GPE_TYPE_WAKE_RUN);
acpi_enable_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number);
dev->wakeup.state.enabled = 1;
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return 0;
}
union acpi_object *hkey = NULL;
int i;
- status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, 0,
+ status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, NULL,
&buffer);
if (ACPI_FAILURE(status)) {
ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
goto out_unlock;
ret = nfs_updatepage(filp, page, 0, pagelen);
- if (ret == 0)
- ret = pagelen;
out_unlock:
unlock_page(page);
if (ret)
struct list_head children;
struct list_head node;
struct list_head wakeup_list;
- struct list_head g_list;
struct acpi_device_status status;
struct acpi_device_flags flags;
struct acpi_device_pnp pnp;
* Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
* to disable branch tracing on a per file basis.
*/
-#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
+ && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
#define likely_notrace(x) __builtin_expect(!!(x), 1)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* for init task */
+#define INIT_FTRACE_GRAPH .ret_stack = NULL
+
/*
* Stack of return addresses for functions
* of a thread.
{
atomic_dec(¤t->tracing_graph_pause);
}
-#else
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
#define __notrace_funcgraph
#define __irq_entry
+#define INIT_FTRACE_GRAPH
static inline void ftrace_graph_init_task(struct task_struct *t) { }
static inline void ftrace_graph_exit_task(struct task_struct *t) { }
static inline void pause_graph_tracing(void) { }
static inline void unpause_graph_tracing(void) { }
-#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_TRACING
#include <linux/sched.h>
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
#endif
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
extern void synchronize_irq(unsigned int irq);
#else
# define synchronize_irq(irq) barrier()
#include <linux/irqflags.h>
#include <linux/utsname.h>
#include <linux/lockdep.h>
+#include <linux/ftrace.h>
#include <linux/ipc.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
INIT_IDS \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
+ INIT_FTRACE_GRAPH \
}
#define IRQF_NOBALANCING 0x00000800
#define IRQF_IRQPOLL 0x00001000
+/*
+ * Bits used by threaded handlers:
+ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
+ * IRQTF_DIED - handler thread died
+ * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ */
+enum {
+ IRQTF_RUNTHREAD,
+ IRQTF_DIED,
+ IRQTF_WARNED,
+};
+
typedef irqreturn_t (*irq_handler_t)(int, void *);
/**
* @next: pointer to the next irqaction for shared interrupts
* @irq: interrupt number
* @dir: pointer to the proc/irq/NN/name entry
+ * @thread_fn: interupt handler function for threaded interrupts
+ * @thread: thread pointer for threaded interrupts
+ * @thread_flags: flags related to @thread
*/
struct irqaction {
irq_handler_t handler;
struct irqaction *next;
int irq;
struct proc_dir_entry *dir;
+ irq_handler_t thread_fn;
+ struct task_struct *thread;
+ unsigned long thread_flags;
};
extern irqreturn_t no_action(int cpl, void *dev_id);
-extern int __must_check request_irq(unsigned int, irq_handler_t handler,
- unsigned long, const char *, void *);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+extern int __must_check
+request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ unsigned long flags, const char *name, void *dev);
+
+static inline int __must_check
+request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+ const char *name, void *dev)
+{
+ return request_threaded_irq(irq, handler, NULL, flags, name, dev);
+}
+
+extern void exit_irq_thread(void);
+#else
+
+extern int __must_check
+request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+ const char *name, void *dev);
+
+/*
+ * Special function to avoid ifdeffery in kernel/irq/devres.c which
+ * gets magically built by GENERIC_HARDIRQS=n architectures (sparc,
+ * m68k). I really love these $@%#!* obvious Makefile references:
+ * ../../../kernel/irq/devres.o
+ */
+static inline int __must_check
+request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ unsigned long flags, const char *name, void *dev)
+{
+ return request_irq(irq, handler, flags, name, dev);
+}
+
+static inline void exit_irq_thread(void) { }
+#endif
+
extern void free_irq(unsigned int, void *);
struct device;
-extern int __must_check devm_request_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, unsigned long irqflags,
- const char *devname, void *dev_id);
+extern int __must_check
+devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ unsigned long irqflags, const char *devname,
+ void *dev_id);
+
+static inline int __must_check
+devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
+ unsigned long irqflags, const char *devname, void *dev_id)
+{
+ return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
+ devname, dev_id);
+}
+
extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
/*
#include <linux/irqnr.h>
#include <linux/errno.h>
#include <linux/topology.h>
+#include <linux/wait.h>
#include <asm/irq.h>
#include <asm/ptrace.h>
* @affinity: IRQ affinity on SMP
* @cpu: cpu index useful for balancing
* @pending_mask: pending rebalanced interrupts
+ * @threads_active: number of irqaction threads currently running
+ * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
* @dir: /proc/irq/ procfs entry
* @name: flow handler name for /proc/interrupts output
*/
cpumask_var_t pending_mask;
#endif
#endif
+ atomic_t threads_active;
+ wait_queue_head_t wait_for_threads;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *dir;
#endif
* enum irqreturn
* @IRQ_NONE interrupt was not from this device
* @IRQ_HANDLED interrupt was handled by this device
+ * @IRQ_WAKE_THREAD handler requests to wake the handler thread
*/
enum irqreturn {
IRQ_NONE,
IRQ_HANDLED,
+ IRQ_WAKE_THREAD,
};
typedef enum irqreturn irqreturn_t;
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
extern int softlockup_thresh;
#else
static inline void softlockup_tick(void)
{
}
-static inline void spawn_softlockup_task(void)
-{
-}
static inline void touch_softlockup_watchdog(void)
{
}
}
#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos);
+#endif
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text")))
/* ipc stuff */
struct sysv_sem sysvsem;
#endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
- unsigned long last_switch_timestamp;
unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
spinlock_t alloc_lock;
+#ifdef CONFIG_GENERIC_HARDIRQS
+ /* IRQ handler threads */
+ struct irqaction *irqaction;
+#endif
+
/* Protection of the PI data structures: */
spinlock_t pi_lock;
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
schedule();
}
+ exit_irq_thread();
+
exit_signals(tsk); /* sets PF_EXITING */
/*
* tsk->flags are checked in the futex code to protect against
tsk->min_flt = tsk->maj_flt = 0;
tsk->nvcsw = tsk->nivcsw = 0;
+#ifdef CONFIG_DETECT_HUNG_TASK
+ tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+#endif
tsk->mm = NULL;
tsk->active_mm = NULL;
p->default_timer_slack_ns = current->timer_slack_ns;
-#ifdef CONFIG_DETECT_SOFTLOCKUP
- p->last_switch_count = 0;
- p->last_switch_timestamp = 0;
-#endif
-
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
--- /dev/null
+/*
+ * Detect Hung Task
+ *
+ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+/*
+ * The number of tasks checked:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+
+/*
+ * Limit number of tasks checked in a batch.
+ *
+ * This value controls the preemptibility of khungtaskd since preemption
+ * is disabled during the critical section. It also controls the size of
+ * the RCU grace period. So it needs to be upper-bound.
+ */
+#define HUNG_TASK_BATCHING 1024
+
+/*
+ * Zero means infinite timeout - no checking done:
+ */
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+
+unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+
+static int __read_mostly did_panic;
+
+static struct task_struct *watchdog_task;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * hung task is detected:
+ */
+unsigned int __read_mostly sysctl_hung_task_panic =
+ CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+
+static int __init hung_task_panic_setup(char *str)
+{
+ sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+
+ return 1;
+}
+__setup("hung_task_panic=", hung_task_panic_setup);
+
+static int
+hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ did_panic = 1;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+ .notifier_call = hung_task_panic,
+};
+
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
+{
+ unsigned long switch_count = t->nvcsw + t->nivcsw;
+
+ /*
+ * Ensure the task is not frozen.
+ * Also, when a freshly created task is scheduled once, changes
+ * its state to TASK_UNINTERRUPTIBLE without having ever been
+ * switched out once, it musn't be checked.
+ */
+ if (unlikely(t->flags & PF_FROZEN || !switch_count))
+ return;
+
+ if (switch_count != t->last_switch_count) {
+ t->last_switch_count = switch_count;
+ return;
+ }
+ if (!sysctl_hung_task_warnings)
+ return;
+ sysctl_hung_task_warnings--;
+
+ /*
+ * Ok, the task did not get scheduled for more than 2 minutes,
+ * complain:
+ */
+ printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+ "%ld seconds.\n", t->comm, t->pid, timeout);
+ printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+ " disables this message.\n");
+ sched_show_task(t);
+ __debug_show_held_locks(t);
+
+ touch_nmi_watchdog();
+
+ if (sysctl_hung_task_panic)
+ panic("hung_task: blocked tasks");
+}
+
+/*
+ * To avoid extending the RCU grace period for an unbounded amount of time,
+ * periodically exit the critical section and enter a new one.
+ *
+ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
+ * exit the grace period. For classic RCU, a reschedule is required.
+ */
+static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+{
+ get_task_struct(g);
+ get_task_struct(t);
+ rcu_read_unlock();
+ cond_resched();
+ rcu_read_lock();
+ put_task_struct(t);
+ put_task_struct(g);
+}
+
+/*
+ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+ * a really long time (120 seconds). If that happens, print out
+ * a warning.
+ */
+static void check_hung_uninterruptible_tasks(unsigned long timeout)
+{
+ int max_count = sysctl_hung_task_check_count;
+ int batch_count = HUNG_TASK_BATCHING;
+ struct task_struct *g, *t;
+
+ /*
+ * If the system crashed already then all bets are off,
+ * do not report extra hung tasks:
+ */
+ if (test_taint(TAINT_DIE) || did_panic)
+ return;
+
+ rcu_read_lock();
+ do_each_thread(g, t) {
+ if (!--max_count)
+ goto unlock;
+ if (!--batch_count) {
+ batch_count = HUNG_TASK_BATCHING;
+ rcu_lock_break(g, t);
+ /* Exit if t or g was unhashed during refresh. */
+ if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+ goto unlock;
+ }
+ /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+ if (t->state == TASK_UNINTERRUPTIBLE)
+ check_hung_task(t, timeout);
+ } while_each_thread(g, t);
+ unlock:
+ rcu_read_unlock();
+}
+
+static unsigned long timeout_jiffies(unsigned long timeout)
+{
+ /* timeout of 0 will disable the watchdog */
+ return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
+
+/*
+ * Process updating of timeout sysctl
+ */
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+ if (ret || !write)
+ goto out;
+
+ wake_up_process(watchdog_task);
+
+ out:
+ return ret;
+}
+
+/*
+ * kthread which checks for tasks stuck in D state
+ */
+static int watchdog(void *dummy)
+{
+ set_user_nice(current, 0);
+
+ for ( ; ; ) {
+ unsigned long timeout = sysctl_hung_task_timeout_secs;
+
+ while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+ timeout = sysctl_hung_task_timeout_secs;
+
+ check_hung_uninterruptible_tasks(timeout);
+ }
+
+ return 0;
+}
+
+static int __init hung_task_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+ watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+
+ return 0;
+}
+
+module_init(hung_task_init);
}
/**
- * devm_request_irq - allocate an interrupt line for a managed device
+ * devm_request_threaded_irq - allocate an interrupt line for a managed device
* @dev: device to request interrupt for
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs
+ * @thread_fn: function to be called in a threaded interrupt context. NULL
+ * for devices which handle everything in @handler
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device
* @dev_id: A cookie passed back to the handler function
* If an IRQ allocated with this function needs to be freed
* separately, dev_free_irq() must be used.
*/
-int devm_request_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, unsigned long irqflags,
- const char *devname, void *dev_id)
+int devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ unsigned long irqflags, const char *devname,
+ void *dev_id)
{
struct irq_devres *dr;
int rc;
if (!dr)
return -ENOMEM;
- rc = request_irq(irq, handler, irqflags, devname, dev_id);
+ rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
+ dev_id);
if (rc) {
devres_free(dr);
return rc;
return 0;
}
-EXPORT_SYMBOL(devm_request_irq);
+EXPORT_SYMBOL(devm_request_threaded_irq);
/**
* devm_free_irq - free an interrupt
return IRQ_NONE;
}
+static void warn_no_thread(unsigned int irq, struct irqaction *action)
+{
+ if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
+ return;
+
+ printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
+ "but no thread function available.", irq, action->name);
+}
+
DEFINE_TRACE(irq_handler_entry);
DEFINE_TRACE(irq_handler_exit);
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, ret);
- if (ret == IRQ_HANDLED)
+
+ switch (ret) {
+ case IRQ_WAKE_THREAD:
+ /*
+ * Set result to handled so the spurious check
+ * does not trigger.
+ */
+ ret = IRQ_HANDLED;
+
+ /*
+ * Catch drivers which return WAKE_THREAD but
+ * did not set up a thread function
+ */
+ if (unlikely(!action->thread_fn)) {
+ warn_no_thread(irq, action);
+ break;
+ }
+
+ /*
+ * Wake up the handler thread for this
+ * action. In case the thread crashed and was
+ * killed we just pretend that we handled the
+ * interrupt. The hardirq handler above has
+ * disabled the device interrupt, so no irq
+ * storm is lurking.
+ */
+ if (likely(!test_bit(IRQTF_DIED,
+ &action->thread_flags))) {
+ set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+ wake_up_process(action->thread);
+ }
+
+ /* Fall through to add to randomness */
+ case IRQ_HANDLED:
status |= action->flags;
+ break;
+
+ default:
+ break;
+ }
+
retval |= ret;
action = action->next;
} while (action);
*/
#include <linux/irq.h>
+#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include "internals.h"
-#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
-cpumask_var_t irq_default_affinity;
-
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
/* Oops, that failed? */
} while (status & IRQ_INPROGRESS);
+
+ /*
+ * We made sure that no hardirq handler is running. Now verify
+ * that no threaded handlers are active.
+ */
+ wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
}
EXPORT_SYMBOL(synchronize_irq);
+#ifdef CONFIG_SMP
+cpumask_var_t irq_default_affinity;
+
/**
* irq_can_set_affinity - Check if the affinity of a given irq can be set
* @irq: Interrupt to check
return 1;
}
+static void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+{
+ struct irqaction *action = desc->action;
+
+ while (action) {
+ if (action->thread)
+ set_cpus_allowed_ptr(action->thread, cpumask);
+ action = action->next;
+ }
+}
+
/**
* irq_set_affinity - Set the irq affinity of a given irq
* @irq: Interrupt to set affinity
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
+ irq_set_thread_affinity(desc, cpumask);
desc->status |= IRQ_AFFINITY_SET;
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
spin_lock_irqsave(&desc->lock, flags);
ret = setup_affinity(irq, desc);
+ if (!ret)
+ irq_set_thread_affinity(desc, desc->affinity);
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
return ret;
}
+static int irq_wait_for_interrupt(struct irqaction *action)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (test_and_clear_bit(IRQTF_RUNTHREAD,
+ &action->thread_flags)) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+ schedule();
+ }
+ return -1;
+}
+
+/*
+ * Interrupt handler thread
+ */
+static int irq_thread(void *data)
+{
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+ struct irqaction *action = data;
+ struct irq_desc *desc = irq_to_desc(action->irq);
+ int wake;
+
+ sched_setscheduler(current, SCHED_FIFO, ¶m);
+ current->irqaction = action;
+
+ while (!irq_wait_for_interrupt(action)) {
+
+ atomic_inc(&desc->threads_active);
+
+ spin_lock_irq(&desc->lock);
+ if (unlikely(desc->status & IRQ_DISABLED)) {
+ /*
+ * CHECKME: We might need a dedicated
+ * IRQ_THREAD_PENDING flag here, which
+ * retriggers the thread in check_irq_resend()
+ * but AFAICT IRQ_PENDING should be fine as it
+ * retriggers the interrupt itself --- tglx
+ */
+ desc->status |= IRQ_PENDING;
+ spin_unlock_irq(&desc->lock);
+ } else {
+ spin_unlock_irq(&desc->lock);
+
+ action->thread_fn(action->irq, action->dev_id);
+ }
+
+ wake = atomic_dec_and_test(&desc->threads_active);
+
+ if (wake && waitqueue_active(&desc->wait_for_threads))
+ wake_up(&desc->wait_for_threads);
+ }
+
+ /*
+ * Clear irqaction. Otherwise exit_irq_thread() would make
+ * fuzz about an active irq thread going into nirvana.
+ */
+ current->irqaction = NULL;
+ return 0;
+}
+
+/*
+ * Called from do_exit()
+ */
+void exit_irq_thread(void)
+{
+ struct task_struct *tsk = current;
+
+ if (!tsk->irqaction)
+ return;
+
+ printk(KERN_ERR
+ "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+ tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+
+ /*
+ * Set the THREAD DIED flag to prevent further wakeups of the
+ * soon to be gone threaded handler.
+ */
+ set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
rand_initialize_irq(irq);
}
+ /*
+ * Threaded handler ?
+ */
+ if (new->thread_fn) {
+ struct task_struct *t;
+
+ t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+ new->name);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+ /*
+ * We keep the reference to the task struct even if
+ * the thread dies to avoid that the interrupt code
+ * references an already freed task_struct.
+ */
+ get_task_struct(t);
+ new->thread = t;
+ wake_up_process(t);
+ }
+
/*
* The following block of code has to be executed atomically
*/
if (!shared) {
irq_chip_set_defaults(desc->chip);
+ init_waitqueue_head(&desc->wait_for_threads);
+
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc, irq,
new->flags & IRQF_TRIGGER_MASK);
- if (ret) {
- spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
- }
+ if (ret)
+ goto out_thread;
} else
compat_irq_chip_set_default_handler(desc);
#if defined(CONFIG_IRQ_PER_CPU)
dump_stack();
}
#endif
+ ret = -EBUSY;
+
+out_thread:
spin_unlock_irqrestore(&desc->lock, flags);
- return -EBUSY;
+ if (new->thread) {
+ struct task_struct *t = new->thread;
+
+ new->thread = NULL;
+ if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
+ kthread_stop(t);
+ put_task_struct(t);
+ }
+ return ret;
}
/**
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
+ struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
else
desc->chip->disable(irq);
}
+
+ irqthread = action->thread;
+ action->thread = NULL;
+
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
+ if (irqthread) {
+ if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ kthread_stop(irqthread);
+ put_task_struct(irqthread);
+ }
+
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
EXPORT_SYMBOL(free_irq);
/**
- * request_irq - allocate an interrupt line
+ * request_threaded_irq - allocate an interrupt line
* @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
+ * @handler: Function to be called when the IRQ occurs.
+ * Primary handler for threaded interrupts
+ * @thread_fn: Function called from the irq handler thread
+ * If NULL, no irq thread is created
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device
* @dev_id: A cookie passed back to the handler function
* raises, you must take care both to initialise your hardware
* and to set up the interrupt handler in the right order.
*
+ * If you want to set up a threaded irq handler for your device
+ * then you need to supply @handler and @thread_fn. @handler ist
+ * still called in hard interrupt context and has to check
+ * whether the interrupt originates from the device. If yes it
+ * needs to disable the interrupt on the device and return
+ * IRQ_THREAD_WAKE which will wake up the handler thread and run
+ * @thread_fn. This split handler design is necessary to support
+ * shared interrupts.
+ *
* Dev_id must be globally unique. Normally the address of the
* device data structure is used as the cookie. Since the handler
* receives this value it makes sense to use it.
* IRQF_TRIGGER_* Specify active edge(s) or level
*
*/
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
+int request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn, unsigned long irqflags,
+ const char *devname, void *dev_id)
{
struct irqaction *action;
struct irq_desc *desc;
return -ENOMEM;
action->handler = handler;
+ action->thread_fn = thread_fn;
action->flags = irqflags;
action->name = devname;
action->dev_id = dev_id;
#endif
return retval;
}
-EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(request_threaded_irq);
panic("softlockup: hung tasks");
}
-/*
- * Have a reasonable limit on the number of tasks checked:
- */
-unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
-
-/*
- * Zero means infinite timeout - no checking done:
- */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
-
-unsigned long __read_mostly sysctl_hung_task_warnings = 10;
-
-/*
- * Only do the hung-tasks check on one CPU:
- */
-static int check_cpu __read_mostly = -1;
-
-static void check_hung_task(struct task_struct *t, unsigned long now)
-{
- unsigned long switch_count = t->nvcsw + t->nivcsw;
-
- if (t->flags & PF_FROZEN)
- return;
-
- if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
- t->last_switch_count = switch_count;
- t->last_switch_timestamp = now;
- return;
- }
- if ((long)(now - t->last_switch_timestamp) <
- sysctl_hung_task_timeout_secs)
- return;
- if (!sysctl_hung_task_warnings)
- return;
- sysctl_hung_task_warnings--;
-
- /*
- * Ok, the task did not get scheduled for more than 2 minutes,
- * complain:
- */
- printk(KERN_ERR "INFO: task %s:%d blocked for more than "
- "%ld seconds.\n", t->comm, t->pid,
- sysctl_hung_task_timeout_secs);
- printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
- " disables this message.\n");
- sched_show_task(t);
- __debug_show_held_locks(t);
-
- t->last_switch_timestamp = now;
- touch_nmi_watchdog();
-
- if (softlockup_panic)
- panic("softlockup: blocked tasks");
-}
-
-/*
- * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
- * a really long time (120 seconds). If that happens, print out
- * a warning.
- */
-static void check_hung_uninterruptible_tasks(int this_cpu)
-{
- int max_count = sysctl_hung_task_check_count;
- unsigned long now = get_timestamp(this_cpu);
- struct task_struct *g, *t;
-
- /*
- * If the system crashed already then all bets are off,
- * do not report extra hung tasks:
- */
- if (test_taint(TAINT_DIE) || did_panic)
- return;
-
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
- if (!--max_count)
- goto unlock;
- /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
- if (t->state == TASK_UNINTERRUPTIBLE)
- check_hung_task(t, now);
- } while_each_thread(g, t);
- unlock:
- read_unlock(&tasklist_lock);
-}
-
/*
* The watchdog thread - runs every second and touches the timestamp.
*/
static int watchdog(void *__bind_cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
- int this_cpu = (long)__bind_cpu;
sched_setscheduler(current, SCHED_FIFO, ¶m);
if (kthread_should_stop())
break;
- if (this_cpu == check_cpu) {
- if (sysctl_hung_task_timeout_secs)
- check_hung_uninterruptible_tasks(this_cpu);
- }
-
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- check_cpu = cpumask_any(cpu_online_mask);
wake_up_process(per_cpu(watchdog_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- if (hotcpu == check_cpu) {
- /* Pick any other online cpu. */
- check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
- }
- break;
-
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!per_cpu(watchdog_task, hotcpu))
.extra1 = &neg_one,
.extra2 = &sixty,
},
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_check_count",
.data = &sysctl_hung_task_timeout_secs,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_dohung_task_timeout_secs,
.strategy = &sysctl_intvec,
},
{
char *msg;
struct blk_trace *bt;
- if (count > BLK_TN_MAX_MSG)
+ if (count >= BLK_TN_MAX_MSG)
return -EINVAL;
- msg = kmalloc(count, GFP_KERNEL);
+ msg = kmalloc(count + 1, GFP_KERNEL);
if (msg == NULL)
return -ENOMEM;
return -EFAULT;
}
+ msg[count] = '\0';
bt = filp->private_data;
__trace_note_message(bt, "%s", msg);
kfree(msg);
if (blk_pc_request(rq)) {
what |= BLK_TC_ACT(BLK_TC_PC);
__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
- sizeof(rq->cmd), rq->cmd);
+ rq->cmd_len, rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
#include <linux/percpu.h>
#include <linux/splice.h>
#include <linux/kdebug.h>
+#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
{
nsec += 500;
do_div(nsec, 1000);
return;
cpumask_set_cpu(iter->cpu, iter->started);
- trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+ /* Don't print started cpu buffer for the first entry of the trace */
+ if (iter->idx > 1)
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+ iter->cpu);
}
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
if (current_trace)
*iter->trace = *current_trace;
+ if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+ goto fail;
+
+ cpumask_clear(iter->started);
+
if (current_trace && current_trace->print_max)
iter->tr = &max_tr;
else
if (iter->buffer_iter[cpu])
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
+ free_cpumask_var(iter->started);
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
seq_release(inode, file);
mutex_destroy(&iter->mutex);
+ free_cpumask_var(iter->started);
kfree(iter->trace);
kfree(iter);
return 0;
"# mkdir /debug\n"
"# mount -t debugfs nodev /debug\n\n"
"# cat /debug/tracing/available_tracers\n"
- "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+ "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
"# cat /debug/tracing/current_tracer\n"
- "none\n"
+ "nop\n"
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
extern int
trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
extern int
#undef TRACE_FIELD_ZERO_CHAR
#define TRACE_FIELD_ZERO_CHAR(item) \
- ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \
+ ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
"offset:%u;\tsize:0;\n", \
(unsigned int)offsetof(typeof(field), item)); \
if (!ret) \
trace_find_cmdline(entry->pid, comm);
- ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
+ ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
" %ld.%03ldms (+%ld.%03ldms): ", comm,
entry->pid, iter->cpu, entry->flags,
entry->preempt_count, iter->idx,
pc = preempt_count();
tracing_record_cmdline(current);
+ if (sched_stopped)
+ return;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = ctx_trace->data[cpu];
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
/*
data = wakeup_trace->data[wakeup_cpu];
data->preempt_timestamp = ftrace_now(cpu);
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+
+ /*
+ * We must be careful in using CALLER_ADDR2. But since wake_up
+ * is not called by an assembly function (where as schedule is)
+ * it should be safe to use it here.
+ */
trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+config DETECT_HUNG_TASK
+ bool "Detect Hung Tasks"
+ depends on DEBUG_KERNEL
+ default DETECT_SOFTLOCKUP
+ help
+ Say Y here to enable the kernel to detect "hung tasks",
+ which are bugs that cause the task to be stuck in
+ uninterruptible "D" state indefinitiley.
+
+ When a hung task is detected, the kernel will print the
+ current stack trace (which you should report), but the
+ task will stay in uninterruptible state. If lockdep is
+ enabled then all held locks will also be reported. This
+ feature has negligible overhead.
+
+config BOOTPARAM_HUNG_TASK_PANIC
+ bool "Panic (Reboot) On Hung Tasks"
+ depends on DETECT_HUNG_TASK
+ help
+ Say Y here to enable the kernel to panic on "hung tasks",
+ which are bugs that cause the kernel to leave a task stuck
+ in uninterruptible "D" state.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ hung task has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a hung tasks must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_HUNG_TASK_PANIC_VALUE
+ int
+ depends on DETECT_HUNG_TASK
+ range 0 1
+ default 0 if !BOOTPARAM_HUNG_TASK_PANIC
+ default 1 if BOOTPARAM_HUNG_TASK_PANIC
+
config SCHED_DEBUG
bool "Collect scheduler debugging info"
depends on DEBUG_KERNEL && PROC_FS