sched: Introduce the 'trace_sched_waking' tracepoint
authorPeter Zijlstra <peterz@infradead.org>
Tue, 9 Jun 2015 09:13:36 +0000 (11:13 +0200)
committerIngo Molnar <mingo@kernel.org>
Mon, 3 Aug 2015 10:21:22 +0000 (12:21 +0200)
Mathieu reported that since 317f394160e9 ("sched: Move the second half
of ttwu() to the remote cpu") trace_sched_wakeup() can happen out of
context of the waker.

This is a problem when you want to analyse wakeup paths because it is
now very hard to correlate the wakeup event to whoever issued the
wakeup.

OTOH trace_sched_wakeup() is issued at the point where we set
p->state = TASK_RUNNING, which is right were we hand the task off to
the scheduler, so this is an important point when looking at
scheduling behaviour, up to here its been the wakeup path everything
hereafter is due to scheduler policy.

To bridge this gap, introduce a second tracepoint: trace_sched_waking.
It is guaranteed to be called in the waker context.

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Francis Giraldeau <francis.giraldeau@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150609091336.GQ3644@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/trace/events/sched.h
kernel/sched/core.c
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c

index d57a575fe31fc5796e9866470e3dd40a881ca12a..539d6bc3216a3784f9ad5b4d1e3ef06e7a4cc223 100644 (file)
@@ -55,9 +55,9 @@ TRACE_EVENT(sched_kthread_stop_ret,
  */
 DECLARE_EVENT_CLASS(sched_wakeup_template,
 
-       TP_PROTO(struct task_struct *p, int success),
+       TP_PROTO(struct task_struct *p),
 
-       TP_ARGS(__perf_task(p), success),
+       TP_ARGS(__perf_task(p)),
 
        TP_STRUCT__entry(
                __array(        char,   comm,   TASK_COMM_LEN   )
@@ -71,25 +71,37 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
                memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
                __entry->pid            = p->pid;
                __entry->prio           = p->prio;
-               __entry->success        = success;
+               __entry->success        = 1; /* rudiment, kill when possible */
                __entry->target_cpu     = task_cpu(p);
        ),
 
-       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+       TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
                  __entry->comm, __entry->pid, __entry->prio,
-                 __entry->success, __entry->target_cpu)
+                 __entry->target_cpu)
 );
 
+/*
+ * Tracepoint called when waking a task; this tracepoint is guaranteed to be
+ * called from the waking context.
+ */
+DEFINE_EVENT(sched_wakeup_template, sched_waking,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
+
+/*
+ * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
+ * It it not always called from the waking context.
+ */
 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
-            TP_PROTO(struct task_struct *p, int success),
-            TP_ARGS(p, success));
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
 
 /*
  * Tracepoint for waking up a new task:
  */
 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
-            TP_PROTO(struct task_struct *p, int success),
-            TP_ARGS(p, success));
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
 
 #ifdef CREATE_TRACE_POINTS
 static inline long __trace_sched_switch_state(struct task_struct *p)
index 48be7dc3d497d634bdb827921fd55e4eddb5b157..fa5826cc612f4336a8b942d45fd7f379759a128e 100644 (file)
@@ -1654,9 +1654,9 @@ static void
 ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 {
        check_preempt_curr(rq, p, wake_flags);
-       trace_sched_wakeup(p, true);
-
        p->state = TASK_RUNNING;
+       trace_sched_wakeup(p);
+
 #ifdef CONFIG_SMP
        if (p->sched_class->task_woken) {
                /*
@@ -1874,6 +1874,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        if (!(p->state & state))
                goto out;
 
+       trace_sched_waking(p);
+
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
 
@@ -1949,6 +1951,8 @@ static void try_to_wake_up_local(struct task_struct *p)
        if (!(p->state & TASK_NORMAL))
                goto out;
 
+       trace_sched_waking(p);
+
        if (!task_on_rq_queued(p))
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
@@ -2307,7 +2311,7 @@ void wake_up_new_task(struct task_struct *p)
        rq = __task_rq_lock(p);
        activate_task(rq, p, 0);
        p->on_rq = TASK_ON_RQ_QUEUED;
-       trace_sched_wakeup_new(p, true);
+       trace_sched_wakeup_new(p);
        check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
        if (p->sched_class->task_woken)
index 419ca37e72c954593755ed38c07be1e5a95017e1..f270088e9929aa2e085a15960828929140ae1037 100644 (file)
@@ -26,7 +26,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n
 }
 
 static void
-probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
+probe_sched_wakeup(void *ignore, struct task_struct *wakee)
 {
        if (unlikely(!sched_ref))
                return;
index 9b33dd117f3f5b63cf8380417293e70beb9e0c0b..12cbe77b413620cb80436ab55d3758237e31a5b9 100644 (file)
@@ -514,7 +514,7 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-probe_wakeup(void *ignore, struct task_struct *p, int success)
+probe_wakeup(void *ignore, struct task_struct *p)
 {
        struct trace_array_cpu *data;
        int cpu = smp_processor_id();