Merge branch 'auto-ftrace-next' into tracing/for-linus

[sfrench/cifs-2.6.git] / kernel / sched_fair.c
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index e87f1a52f625ce7304103d3f60e8ee1adc532079..f2aa987027d695750f2ca4b8f917d02171eeb3b8 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -726,21 +726,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
                 __enqueue_entity(cfs_rq, se);
  }
  
-static void update_avg(u64 *avg, u64 sample)
-{
-       s64 diff = sample - *avg;
-       *avg += diff >> 3;
-}
-
-static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-       if (!se->last_wakeup)
-               return;
-
-       update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
-       se->last_wakeup = 0;
-}
-
  static void
  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
  {
@@ -751,7 +736,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
  
         update_stats_dequeue(cfs_rq, se);
         if (sleep) {
-               update_avg_stats(cfs_rq, se);
  #ifdef CONFIG_SCHEDSTATS
                 if (entity_is_task(se)) {
                         struct task_struct *tsk = task_of(se);
@@ -921,7 +905,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
                 hrtick_start(rq, delta, requeue);
         }
  }
-#else
+#else /* !CONFIG_SCHED_HRTICK */
  static inline void
  hrtick_start_fair(struct rq *rq, struct task_struct *p)
  {
@@ -1062,7 +1046,7 @@ static int wake_idle(int cpu, struct task_struct *p)
         }
         return cpu;
  }
-#else
+#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
  static inline int wake_idle(int cpu, struct task_struct *p)
  {
         return cpu;
@@ -1074,6 +1058,27 @@ static inline int wake_idle(int cpu, struct task_struct *p)
  static const struct sched_class fair_sched_class;
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * effective_load() calculates the load change as seen from the root_task_group
+ *
+ * Adding load to a group doesn't make a group heavier, but can cause movement
+ * of group shares between cpus. Assuming the shares were perfectly aligned one
+ * can calculate the shift in shares.
+ *
+ * The problem is that perfectly aligning the shares is rather expensive, hence
+ * we try to avoid doing that too often - see update_shares(), which ratelimits
+ * this change.
+ *
+ * We compensate this by not only taking the current delta into account, but
+ * also considering the delta between when the shares were last adjusted and
+ * now.
+ *
+ * We still saw a performance dip, some tracing learned us that between
+ * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
+ * significantly. Therefore try to bias the error in direction of failing
+ * the affine wakeup.
+ *
+ */
  static long effective_load(struct task_group *tg, int cpu,
                 long wl, long wg)
  {
@@ -1083,6 +1088,13 @@ static long effective_load(struct task_group *tg, int cpu,
         if (!tg->parent)
                 return wl;
  
+       /*
+        * By not taking the decrease of shares on the other cpu into
+        * account our error leans towards reducing the affine wakeups.
+        */
+       if (!wl && sched_feat(ASYM_EFF_LOAD))
+               return wl;
+
         /*
          * Instead of using this increment, also add the difference
          * between when the shares were last updated and now.
@@ -1168,9 +1180,9 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
          * a reasonable amount of time then attract this newly
          * woken task:
          */
-       if (sync && balanced && curr->sched_class == &fair_sched_class) {
+       if (sync && balanced) {
                 if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
-                               p->se.avg_overlap < sysctl_sched_migration_cost)
+                   p->se.avg_overlap < sysctl_sched_migration_cost)
                         return 1;
         }
  
@@ -1331,7 +1343,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
                 return;
         }
  
-       se->last_wakeup = se->sum_exec_runtime;
         if (unlikely(se == pse))
                 return;
  
@@ -1558,7 +1569,7 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
  
         return 0;
  }
-#endif
+#endif /* CONFIG_SMP */
  
  /*
   * scheduler tick hitting a task of our scheduling class: