sched: RT-balance, add new methods to sched_class
authorSteven Rostedt <rostedt@goodmis.org>
Fri, 25 Jan 2008 20:08:22 +0000 (21:08 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 25 Jan 2008 20:08:22 +0000 (21:08 +0100)
Dmitry Adamushko found that the current implementation of the RT
balancing code left out changes to the sched_setscheduler and
rt_mutex_setprio.

This patch addresses this issue by adding methods to the schedule classes
to handle being switched out of (switched_from) and being switched into
(switched_to) a sched_class. Also a method for changing of priorities
is also added (prio_changed).

This patch also removes some duplicate logic between rt_mutex_setprio and
sched_setscheduler.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/linux/sched.h
kernel/sched.c
kernel/sched_fair.c
kernel/sched_idletask.c
kernel/sched_rt.c

index c67d2c2f0111754e76b824dba2d05df8fda981c1..f2044e70700483827ffdab539d3877410f44262a 100644 (file)
@@ -855,6 +855,13 @@ struct sched_class {
 
        void (*join_domain)(struct rq *rq);
        void (*leave_domain)(struct rq *rq);
+
+       void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+                              int running);
+       void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+                            int running);
+       void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+                            int oldprio, int running);
 };
 
 struct load_weight {
index 2368a0d882e3c9c06034a2a1b26024650dd14f3e..5834c7fb79a5d45529fb2582e91121e5d61034b8 100644 (file)
@@ -1152,6 +1152,18 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 #endif
 }
 
+static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+                                      const struct sched_class *prev_class,
+                                      int oldprio, int running)
+{
+       if (prev_class != p->sched_class) {
+               if (prev_class->switched_from)
+                       prev_class->switched_from(rq, p, running);
+               p->sched_class->switched_to(rq, p, running);
+       } else
+               p->sched_class->prio_changed(rq, p, oldprio, running);
+}
+
 #ifdef CONFIG_SMP
 
 /*
@@ -4017,6 +4029,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
        unsigned long flags;
        int oldprio, on_rq, running;
        struct rq *rq;
+       const struct sched_class *prev_class = p->sched_class;
 
        BUG_ON(prio < 0 || prio > MAX_PRIO);
 
@@ -4042,18 +4055,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
        if (on_rq) {
                if (running)
                        p->sched_class->set_curr_task(rq);
+
                enqueue_task(rq, p, 0);
-               /*
-                * Reschedule if we are currently running on this runqueue and
-                * our priority decreased, or if we are not currently running on
-                * this runqueue and our priority is higher than the current's
-                */
-               if (running) {
-                       if (p->prio > oldprio)
-                               resched_task(rq->curr);
-               } else {
-                       check_preempt_curr(rq, p);
-               }
+
+               check_class_changed(rq, p, prev_class, oldprio, running);
        }
        task_rq_unlock(rq, &flags);
 }
@@ -4253,6 +4258,7 @@ int sched_setscheduler(struct task_struct *p, int policy,
 {
        int retval, oldprio, oldpolicy = -1, on_rq, running;
        unsigned long flags;
+       const struct sched_class *prev_class = p->sched_class;
        struct rq *rq;
 
        /* may grab non-irq protected spin_locks */
@@ -4346,18 +4352,10 @@ recheck:
        if (on_rq) {
                if (running)
                        p->sched_class->set_curr_task(rq);
+
                activate_task(rq, p, 0);
-               /*
-                * Reschedule if we are currently running on this runqueue and
-                * our priority decreased, or if we are not currently running on
-                * this runqueue and our priority is higher than the current's
-                */
-               if (running) {
-                       if (p->prio > oldprio)
-                               resched_task(rq->curr);
-               } else {
-                       check_preempt_curr(rq, p);
-               }
+
+               check_class_changed(rq, p, prev_class, oldprio, running);
        }
        __task_rq_unlock(rq);
        spin_unlock_irqrestore(&p->pi_lock, flags);
index 10aa6e1ae3ddebdf9915f7fa69d29a11c183b234..dfa18d55561dbfbedaf1e7c3f4bc5cf5ccf2b2b0 100644 (file)
@@ -1280,6 +1280,42 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
        resched_task(rq->curr);
 }
 
+/*
+ * Priority of the task has changed. Check to see if we preempt
+ * the current task.
+ */
+static void prio_changed_fair(struct rq *rq, struct task_struct *p,
+                             int oldprio, int running)
+{
+       /*
+        * Reschedule if we are currently running on this runqueue and
+        * our priority decreased, or if we are not currently running on
+        * this runqueue and our priority is higher than the current's
+        */
+       if (running) {
+               if (p->prio > oldprio)
+                       resched_task(rq->curr);
+       } else
+               check_preempt_curr(rq, p);
+}
+
+/*
+ * We switched to the sched_fair class.
+ */
+static void switched_to_fair(struct rq *rq, struct task_struct *p,
+                            int running)
+{
+       /*
+        * We were most likely switched from sched_rt, so
+        * kick off the schedule if running, otherwise just see
+        * if we can still preempt the current task.
+        */
+       if (running)
+               resched_task(rq->curr);
+       else
+               check_preempt_curr(rq, p);
+}
+
 /* Account for a task changing its policy or group.
  *
  * This routine is mostly called to set cfs_rq->curr field when a task
@@ -1318,6 +1354,9 @@ static const struct sched_class fair_sched_class = {
        .set_curr_task          = set_curr_task_fair,
        .task_tick              = task_tick_fair,
        .task_new               = task_new_fair,
+
+       .prio_changed           = prio_changed_fair,
+       .switched_to            = switched_to_fair,
 };
 
 #ifdef CONFIG_SCHED_DEBUG
index ca5374860aefbc7df952e7144c396ae183479fd1..ef7a2661fa101b56a25a8d16dc3fb4737ad7b4f3 100644 (file)
@@ -69,6 +69,33 @@ static void set_curr_task_idle(struct rq *rq)
 {
 }
 
+static void switched_to_idle(struct rq *rq, struct task_struct *p,
+                            int running)
+{
+       /* Can this actually happen?? */
+       if (running)
+               resched_task(rq->curr);
+       else
+               check_preempt_curr(rq, p);
+}
+
+static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+                             int oldprio, int running)
+{
+       /* This can happen for hot plug CPUS */
+
+       /*
+        * Reschedule if we are currently running on this runqueue and
+        * our priority decreased, or if we are not currently running on
+        * this runqueue and our priority is higher than the current's
+        */
+       if (running) {
+               if (p->prio > oldprio)
+                       resched_task(rq->curr);
+       } else
+               check_preempt_curr(rq, p);
+}
+
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
@@ -94,5 +121,9 @@ const struct sched_class idle_sched_class = {
 
        .set_curr_task          = set_curr_task_idle,
        .task_tick              = task_tick_idle,
+
+       .prio_changed           = prio_changed_idle,
+       .switched_to            = switched_to_idle,
+
        /* no .task_new for idle tasks */
 };
index a5a45104603a442d2d58742a09089abd4339fa77..57fa3d96847bb505320f65ad137528d21a7c1949 100644 (file)
@@ -779,7 +779,92 @@ static void leave_domain_rt(struct rq *rq)
        if (rq->rt.overloaded)
                rt_clear_overload(rq);
 }
+
+/*
+ * When switch from the rt queue, we bring ourselves to a position
+ * that we might want to pull RT tasks from other runqueues.
+ */
+static void switched_from_rt(struct rq *rq, struct task_struct *p,
+                          int running)
+{
+       /*
+        * If there are other RT tasks then we will reschedule
+        * and the scheduling of the other RT tasks will handle
+        * the balancing. But if we are the last RT task
+        * we may need to handle the pulling of RT tasks
+        * now.
+        */
+       if (!rq->rt.rt_nr_running)
+               pull_rt_task(rq);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * When switching a task to RT, we may overload the runqueue
+ * with RT tasks. In this case we try to push them off to
+ * other runqueues.
+ */
+static void switched_to_rt(struct rq *rq, struct task_struct *p,
+                          int running)
+{
+       int check_resched = 1;
+
+       /*
+        * If we are already running, then there's nothing
+        * that needs to be done. But if we are not running
+        * we may need to preempt the current running task.
+        * If that current running task is also an RT task
+        * then see if we can move to another run queue.
+        */
+       if (!running) {
+#ifdef CONFIG_SMP
+               if (rq->rt.overloaded && push_rt_task(rq) &&
+                   /* Don't resched if we changed runqueues */
+                   rq != task_rq(p))
+                       check_resched = 0;
+#endif /* CONFIG_SMP */
+               if (check_resched && p->prio < rq->curr->prio)
+                       resched_task(rq->curr);
+       }
+}
+
+/*
+ * Priority of the task has changed. This may cause
+ * us to initiate a push or pull.
+ */
+static void prio_changed_rt(struct rq *rq, struct task_struct *p,
+                           int oldprio, int running)
+{
+       if (running) {
+#ifdef CONFIG_SMP
+               /*
+                * If our priority decreases while running, we
+                * may need to pull tasks to this runqueue.
+                */
+               if (oldprio < p->prio)
+                       pull_rt_task(rq);
+               /*
+                * If there's a higher priority task waiting to run
+                * then reschedule.
+                */
+               if (p->prio > rq->rt.highest_prio)
+                       resched_task(p);
+#else
+               /* For UP simply resched on drop of prio */
+               if (oldprio < p->prio)
+                       resched_task(p);
 #endif /* CONFIG_SMP */
+       } else {
+               /*
+                * This task is not running, but if it is
+                * greater than the current running task
+                * then reschedule.
+                */
+               if (p->prio < rq->curr->prio)
+                       resched_task(rq->curr);
+       }
+}
+
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p)
 {
@@ -837,8 +922,12 @@ const struct sched_class rt_sched_class = {
        .pre_schedule           = pre_schedule_rt,
        .post_schedule          = post_schedule_rt,
        .task_wake_up           = task_wake_up_rt,
+       .switched_from          = switched_from_rt,
 #endif
 
        .set_curr_task          = set_curr_task_rt,
        .task_tick              = task_tick_rt,
+
+       .prio_changed           = prio_changed_rt,
+       .switched_to            = switched_to_rt,
 };