cpufreq/schedutil: Rewrite CPUFREQ_RT support
authorPeter Zijlstra <peterz@infradead.org>
Wed, 20 Dec 2017 15:26:12 +0000 (16:26 +0100)
committerIngo Molnar <mingo@kernel.org>
Fri, 9 Mar 2018 06:59:15 +0000 (07:59 +0100)
Instead of trying to duplicate scheduler state to track if an RT task
is running, directly use the scheduler runqueue state for it.

This vastly simplifies things and fixes a number of bugs related to
sugov and the scheduler getting out of sync wrt this state.

As a consequence we not also update the remove cfs/dl state when
iterating the shared mask.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/sched/cpufreq.h
kernel/sched/cpufreq_schedutil.c
kernel/sched/rt.c

index d963cfd3a0c277b3b809e82130a0ba0dbc71b83b..b48f2fb3b316dcc07268f7718ca5d98bcd4010d4 100644 (file)
@@ -8,8 +8,7 @@
  * Interface between cpufreq drivers and the scheduler:
  */
 
-#define SCHED_CPUFREQ_RT       (1U << 0)
-#define SCHED_CPUFREQ_IOWAIT   (1U << 1)
+#define SCHED_CPUFREQ_IOWAIT   (1U << 0)
 
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
index feb5f89020f2d629af6bb28500e8c8a83f725818..89fe78ecb88ca431064bb10b30dbabe5a7f70a1d 100644 (file)
@@ -57,7 +57,6 @@ struct sugov_cpu {
        unsigned long           util_cfs;
        unsigned long           util_dl;
        unsigned long           max;
-       unsigned int            flags;
 
        /* The field below is for single-CPU policies only: */
 #ifdef CONFIG_NO_HZ_COMMON
@@ -183,17 +182,28 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
 
 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 {
+       struct rq *rq = cpu_rq(sg_cpu->cpu);
+       unsigned long util;
+
+       if (rq->rt.rt_nr_running) {
+               util = sg_cpu->max;
+       } else {
+               util = sg_cpu->util_dl;
+               if (rq->cfs.h_nr_running)
+                       util += sg_cpu->util_cfs;
+       }
+
        /*
         * Ideally we would like to set util_dl as min/guaranteed freq and
         * util_cfs + util_dl as requested freq. However, cpufreq is not yet
         * ready for such an interface. So, we only do the latter for now.
         */
-       return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
+       return min(util, sg_cpu->max);
 }
 
-static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
 {
-       if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
+       if (flags & SCHED_CPUFREQ_IOWAIT) {
                if (sg_cpu->iowait_boost_pending)
                        return;
 
@@ -262,12 +272,11 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 {
        struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
-       struct cpufreq_policy *policy = sg_policy->policy;
        unsigned long util, max;
        unsigned int next_f;
        bool busy;
 
-       sugov_set_iowait_boost(sg_cpu, time);
+       sugov_set_iowait_boost(sg_cpu, time, flags);
        sg_cpu->last_update = time;
 
        if (!sugov_should_update_freq(sg_policy, time))
@@ -275,25 +284,22 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 
        busy = sugov_cpu_is_busy(sg_cpu);
 
-       if (flags & SCHED_CPUFREQ_RT) {
-               next_f = policy->cpuinfo.max_freq;
-       } else {
-               sugov_get_util(sg_cpu);
-               max = sg_cpu->max;
-               util = sugov_aggregate_util(sg_cpu);
-               sugov_iowait_boost(sg_cpu, &util, &max);
-               next_f = get_next_freq(sg_policy, util, max);
-               /*
-                * Do not reduce the frequency if the CPU has not been idle
-                * recently, as the reduction is likely to be premature then.
-                */
-               if (busy && next_f < sg_policy->next_freq) {
-                       next_f = sg_policy->next_freq;
+       sugov_get_util(sg_cpu);
+       max = sg_cpu->max;
+       util = sugov_aggregate_util(sg_cpu);
+       sugov_iowait_boost(sg_cpu, &util, &max);
+       next_f = get_next_freq(sg_policy, util, max);
+       /*
+        * Do not reduce the frequency if the CPU has not been idle
+        * recently, as the reduction is likely to be premature then.
+        */
+       if (busy && next_f < sg_policy->next_freq) {
+               next_f = sg_policy->next_freq;
 
-                       /* Reset cached freq as next_freq has changed */
-                       sg_policy->cached_raw_freq = 0;
-               }
+               /* Reset cached freq as next_freq has changed */
+               sg_policy->cached_raw_freq = 0;
        }
+
        sugov_update_commit(sg_policy, time, next_f);
 }
 
@@ -309,6 +315,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
                unsigned long j_util, j_max;
                s64 delta_ns;
 
+               sugov_get_util(j_sg_cpu);
+
                /*
                 * If the CFS CPU utilization was last updated before the
                 * previous frequency update and the time elapsed between the
@@ -322,21 +330,15 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
                if (delta_ns > TICK_NSEC) {
                        j_sg_cpu->iowait_boost = 0;
                        j_sg_cpu->iowait_boost_pending = false;
-                       j_sg_cpu->util_cfs = 0;
-                       if (j_sg_cpu->util_dl == 0)
-                               continue;
                }
-               if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
-                       return policy->cpuinfo.max_freq;
 
                j_max = j_sg_cpu->max;
                j_util = sugov_aggregate_util(j_sg_cpu);
+               sugov_iowait_boost(j_sg_cpu, &j_util, &j_max);
                if (j_util * max > j_max * util) {
                        util = j_util;
                        max = j_max;
                }
-
-               sugov_iowait_boost(j_sg_cpu, &util, &max);
        }
 
        return get_next_freq(sg_policy, util, max);
@@ -351,18 +353,11 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 
        raw_spin_lock(&sg_policy->update_lock);
 
-       sugov_get_util(sg_cpu);
-       sg_cpu->flags = flags;
-
-       sugov_set_iowait_boost(sg_cpu, time);
+       sugov_set_iowait_boost(sg_cpu, time, flags);
        sg_cpu->last_update = time;
 
        if (sugov_should_update_freq(sg_policy, time)) {
-               if (flags & SCHED_CPUFREQ_RT)
-                       next_f = sg_policy->policy->cpuinfo.max_freq;
-               else
-                       next_f = sugov_next_freq_shared(sg_cpu, time);
-
+               next_f = sugov_next_freq_shared(sg_cpu, time);
                sugov_update_commit(sg_policy, time, next_f);
        }
 
@@ -673,7 +668,6 @@ static int sugov_start(struct cpufreq_policy *policy)
                memset(sg_cpu, 0, sizeof(*sg_cpu));
                sg_cpu->cpu                     = cpu;
                sg_cpu->sg_policy               = sg_policy;
-               sg_cpu->flags                   = 0;
                sg_cpu->iowait_boost_max        = policy->cpuinfo.max_freq;
        }
 
index 4f4fd3b157f1c46f8341b952ffb79079d616b0ee..86b77987435e1f8f2745c035016bf85d223ca690 100644 (file)
@@ -957,9 +957,6 @@ static void update_curr_rt(struct rq *rq)
        if (unlikely((s64)delta_exec <= 0))
                return;
 
-       /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-       cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
-
        schedstat_set(curr->se.statistics.exec_max,
                      max(curr->se.statistics.exec_max, delta_exec));
 
@@ -1001,6 +998,9 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 
        sub_nr_running(rq, rt_rq->rt_nr_running);
        rt_rq->rt_queued = 0;
+
+       /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+       cpufreq_update_util(rq, 0);
 }
 
 static void
@@ -1017,6 +1017,9 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
 
        add_nr_running(rq, rt_rq->rt_nr_running);
        rt_rq->rt_queued = 1;
+
+       /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+       cpufreq_update_util(rq, 0);
 }
 
 #if defined CONFIG_SMP