sched: Fix cgroup movement of forking process
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Thu, 15 Dec 2011 05:36:55 +0000 (14:36 +0900)
committerIngo Molnar <mingo@elte.hu>
Wed, 21 Dec 2011 09:34:49 +0000 (10:34 +0100)
There is a small race between task_fork_fair() and sched_move_task(),
which is trying to move the parent.

        task_fork_fair()                 sched_move_task()
--------------------------------+---------------------------------
  cfs_rq = task_cfs_rq(current)
    -> cfs_rq is the "old" one.
  curr = cfs_rq->curr
    -> curr is set to the parent.
                                    task_rq_lock()
                                    dequeue_task()
                                      ->parent.se.vruntime -= (old)cfs_rq->min_vruntime
                                    enqueue_task()
                                      ->parent.se.vruntime += (new)cfs_rq->min_vruntime
                                    task_rq_unlock()
  raw_spin_lock_irqsave(rq->lock)
  se->vruntime = curr->vruntime
    -> vruntime of the child is set to that of the parent
       which has already been updated by sched_move_task().
  se->vruntime -= (old)cfs_rq->min_vruntime.
  raw_spin_unlock_irqrestore(rq->lock)

As a result, vruntime of the child becomes far bigger than expected,
if (new)cfs_rq->min_vruntime >> (old)cfs_rq->min_vruntime.

This patch fixes this problem by setting "cfs_rq" and "curr" after
holding the rq->lock.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Paul Turner <pjt@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20111215143655.662676b0.nishimura@mxp.nes.nec.co.jp
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched/fair.c

index cea2fa85327486d26c4f79db911cf9006045fff3..525d69e5fb789ab95e17a31c2a69375f5e7f99df 100644 (file)
@@ -5190,8 +5190,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  */
 static void task_fork_fair(struct task_struct *p)
 {
-       struct cfs_rq *cfs_rq = task_cfs_rq(current);
-       struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+       struct cfs_rq *cfs_rq;
+       struct sched_entity *se = &p->se, *curr;
        int this_cpu = smp_processor_id();
        struct rq *rq = this_rq();
        unsigned long flags;
@@ -5200,6 +5200,9 @@ static void task_fork_fair(struct task_struct *p)
 
        update_rq_clock(rq);
 
+       cfs_rq = task_cfs_rq(current);
+       curr = cfs_rq->curr;
+
        if (unlikely(task_cpu(p) != this_cpu)) {
                rcu_read_lock();
                __set_task_cpu(p, this_cpu);