sched/pelt: Sync util/runnable_sum with PELT window when propagating

author Vincent Guittot <vincent.guittot@linaro.org>

Wed, 6 May 2020 15:53:01 +0000 (17:53 +0200)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 19 May 2020 18:34:14 +0000 (20:34 +0200)
author Vincent Guittot <vincent.guittot@linaro.org>
Wed, 6 May 2020 15:53:01 +0000 (17:53 +0200)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 19 May 2020 18:34:14 +0000 (20:34 +0200)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 4e586863827b23871bdcc948ce253da82797128e..44b0c8edc2607c3e679ad7b49195aa17198a74a1 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3441,52 +3441,46 @@ static inline void
  update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
  {
         long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
  
         /* Nothing to update */
         if (!delta)
                 return;
  
-       /*
-        * The relation between sum and avg is:
-        *
-        *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-        *
-        * however, the PELT windows are not aligned between grq and gse.
-        */
-
         /* Set new sched_entity's utilization */
         se->avg.util_avg = gcfs_rq->avg.util_avg;
-       se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+       se->avg.util_sum = se->avg.util_avg * divider;
  
         /* Update parent cfs_rq utilization */
         add_positive(&cfs_rq->avg.util_avg, delta);
-       cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+       cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
  }
  
  static inline void
  update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
  {
         long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
  
         /* Nothing to update */
         if (!delta)
                 return;
  
-       /*
-        * The relation between sum and avg is:
-        *
-        *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-        *
-        * however, the PELT windows are not aligned between grq and gse.
-        */
-
         /* Set new sched_entity's runnable */
         se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
-       se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
+       se->avg.runnable_sum = se->avg.runnable_avg * divider;
  
         /* Update parent cfs_rq runnable */
         add_positive(&cfs_rq->avg.runnable_avg, delta);
-       cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
+       cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
  }
  
  static inline void
@@ -3496,19 +3490,26 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
         unsigned long load_avg;
         u64 load_sum = 0;
         s64 delta_sum;
+       u32 divider;
  
         if (!runnable_sum)
                 return;
  
         gcfs_rq->prop_runnable_sum = 0;
  
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+
         if (runnable_sum >= 0) {
                 /*
                  * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
                  * the CPU is saturated running == runnable.
                  */
                 runnable_sum += se->avg.load_sum;
-               runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
+               runnable_sum = min_t(long, runnable_sum, divider);
         } else {
                 /*
                  * Estimate the new unweighted runnable_sum of the gcfs_rq by
@@ -3533,7 +3534,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
         runnable_sum = max(runnable_sum, running_sum);
  
         load_sum = (s64)se_weight(se) * runnable_sum;
-       load_avg = div_s64(load_sum, LOAD_AVG_MAX);
+       load_avg = div_s64(load_sum, divider);
  
         delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
         delta_avg = load_avg - se->avg.load_avg;
@@ -3697,6 +3698,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
   */
  static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
         u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
  
         /*
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c

index b647d04d9c8bc674e2a5bb2d2780c08c10818ff5..b4b1ff96642ff830185383af901eecdc9eb39158 100644 (file)
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
         return 1;
  }
  
+/*
+ * When syncing *_avg with *_sum, we must take into account the current
+ * position in the PELT segment otherwise the remaining part of the segment
+ * will be considered as idle time whereas it's not yet elapsed and this will
+ * generate unwanted oscillation in the range [1002..1024[.
+ *
+ * The max value of *_sum varies with the position in the time segment and is
+ * equals to :
+ *
+ *   LOAD_AVG_MAX*y + sa->period_contrib
+ *
+ * which can be simplified into:
+ *
+ *   LOAD_AVG_MAX - 1024 + sa->period_contrib
+ *
+ * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
+ *
+ * The same care must be taken when a sched entity is added, updated or
+ * removed from a cfs_rq and we need to update sched_avg. Scheduler entities
+ * and the cfs rq, to which they are attached, have the same position in the
+ * time segment because they use the same clock. This means that we can use
+ * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
+ * if it's more convenient.
+ */
  static __always_inline void
  ___update_load_avg(struct sched_avg *sa, unsigned long load)
  {
author	Vincent Guittot <vincent.guittot@linaro.org>
	Wed, 6 May 2020 15:53:01 +0000 (17:53 +0200)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 19 May 2020 18:34:14 +0000 (20:34 +0200)
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/pelt.c		patch \| blob \| history