Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[sfrench/cifs-2.6.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 2fe3aa853e4dbacef363b70246390f94cc67932c..7b65359875009cc44b2f900d8f5cba166eb33ac3 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3020,9 +3020,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
                 /*
                  * There are a few boundary cases this might miss but it should
                  * get called often enough that that should (hopefully) not be
-                * a real problem -- added to that it only calls on the local
-                * CPU, so if we enqueue remotely we'll miss an update, but
-                * the next tick/schedule should update.
+                * a real problem.
                  *
                  * It will not get called when we go idle, because the idle
                  * thread is a different class (!fair), nor will the utilization
@@ -3091,8 +3089,6 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
         return c1 + c2 + c3;
  }
  
-#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
-
  /*
   * Accumulate the three separate parts of the sum; d1 the remainder
   * of the last (incomplete) period, d2 the span of full periods and d3
@@ -3122,7 +3118,7 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
         u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
         u64 periods;
  
-       scale_freq = arch_scale_freq_capacity(NULL, cpu);
+       scale_freq = arch_scale_freq_capacity(cpu);
         scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
  
         delta += sa->period_contrib;
@@ -4365,12 +4361,12 @@ static inline bool cfs_bandwidth_used(void)
  
  void cfs_bandwidth_usage_inc(void)
  {
-       static_key_slow_inc(&__cfs_bandwidth_used);
+       static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
  }
  
  void cfs_bandwidth_usage_dec(void)
  {
-       static_key_slow_dec(&__cfs_bandwidth_used);
+       static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
  }
  #else /* HAVE_JUMP_LABEL */
  static bool cfs_bandwidth_used(void)
@@ -5689,8 +5685,8 @@ static int wake_wide(struct task_struct *p)
   * soonest. For the purpose of speed we only consider the waking and previous
   * CPU.
   *
- * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or
- *                     will be) idle.
+ * wake_affine_idle() - only considers 'now', it check if the waking CPU is
+ *                     cache-affine and is (or will be) idle.
   *
   * wake_affine_weight() - considers the weight to reflect the average
   *                       scheduling latency of the CPUs. This seems to work
@@ -5701,7 +5697,13 @@ static bool
  wake_affine_idle(struct sched_domain *sd, struct task_struct *p,
                  int this_cpu, int prev_cpu, int sync)
  {
-       if (idle_cpu(this_cpu))
+       /*
+        * If this_cpu is idle, it implies the wakeup is from interrupt
+        * context. Only allow the move if cache is shared. Otherwise an
+        * interrupt intensive workload could force all tasks onto one
+        * node depending on the IO topology or IRQ affinity settings.
+        */
+       if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
                 return true;
  
         if (sync && cpu_rq(this_cpu)->nr_running == 1)
@@ -5765,12 +5767,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
         return affine;
  }
  
-static inline int task_util(struct task_struct *p);
-static int cpu_util_wake(int cpu, struct task_struct *p);
+static inline unsigned long task_util(struct task_struct *p);
+static unsigned long cpu_util_wake(int cpu, struct task_struct *p);
  
  static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
  {
-       return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
+       return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0);
  }
  
  /*
@@ -5950,7 +5952,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                         }
                 } else if (shallowest_idle_cpu == -1) {
                         load = weighted_cpuload(cpu_rq(i));
-                       if (load < min_load || (load == min_load && i == this_cpu)) {
+                       if (load < min_load) {
                                 min_load = load;
                                 least_loaded_cpu = i;
                         }
@@ -6247,7 +6249,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
   * capacity_orig) as it useful for predicting the capacity required after task
   * migrations (scheduler-driven DVFS).
   */
-static int cpu_util(int cpu)
+static unsigned long cpu_util(int cpu)
  {
         unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
         unsigned long capacity = capacity_orig_of(cpu);
@@ -6255,7 +6257,7 @@ static int cpu_util(int cpu)
         return (util >= capacity) ? capacity : util;
  }
  
-static inline int task_util(struct task_struct *p)
+static inline unsigned long task_util(struct task_struct *p)
  {
         return p->se.avg.util_avg;
  }
@@ -6264,7 +6266,7 @@ static inline int task_util(struct task_struct *p)
   * cpu_util_wake: Compute cpu utilization with any contributions from
   * the waking task p removed.
   */
-static int cpu_util_wake(int cpu, struct task_struct *p)
+static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
  {
         unsigned long util, capacity;
  
@@ -6449,8 +6451,7 @@ static void task_dead_fair(struct task_struct *p)
  }
  #endif /* CONFIG_SMP */
  
-static unsigned long
-wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
+static unsigned long wakeup_gran(struct sched_entity *se)
  {
         unsigned long gran = sysctl_sched_wakeup_granularity;
  
@@ -6492,7 +6493,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
         if (vdiff <= 0)
                 return -1;
  
-       gran = wakeup_gran(curr, se);
+       gran = wakeup_gran(se);
         if (vdiff > gran)
                 return 1;