sched/fair: Restructure nohz_balance_kick()
authorPeter Zijlstra <peterz@infradead.org>
Thu, 21 Dec 2017 09:47:48 +0000 (10:47 +0100)
committerIngo Molnar <mingo@kernel.org>
Fri, 9 Mar 2018 06:59:17 +0000 (07:59 +0100)
The current:

if (nohz_kick_needed())
nohz_balancer_kick()

is pointless complexity, fold them into a single call and avoid the
various conditions at the call site.

When we introduce multiple different needs to kick the ilb, the above
construct also becomes a problem.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/fair.c

index fc058967c999d1f0e8b2733667033871e19d40b7..fa483d889f07f86bfa1e47d3c1c87c87be60a5c0 100644 (file)
@@ -9065,12 +9065,29 @@ static inline int find_new_ilb(void)
        return nr_cpu_ids;
 }
 
+static inline void set_cpu_sd_state_busy(void)
+{
+       struct sched_domain *sd;
+       int cpu = smp_processor_id();
+
+       rcu_read_lock();
+       sd = rcu_dereference(per_cpu(sd_llc, cpu));
+
+       if (!sd || !sd->nohz_idle)
+               goto unlock;
+       sd->nohz_idle = 0;
+
+       atomic_inc(&sd->shared->nr_busy_cpus);
+unlock:
+       rcu_read_unlock();
+}
+
 /*
  * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
  * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
  * CPU (if there is one).
  */
-static void nohz_balancer_kick(void)
+static void kick_ilb(void)
 {
        unsigned int flags;
        int ilb_cpu;
@@ -9085,6 +9102,7 @@ static void nohz_balancer_kick(void)
        flags = atomic_fetch_or(NOHZ_KICK_MASK, nohz_flags(ilb_cpu));
        if (flags & NOHZ_KICK_MASK)
                return;
+
        /*
         * Use smp_send_reschedule() instead of resched_cpu().
         * This way we generate a sched IPI on the target CPU which
@@ -9092,7 +9110,94 @@ static void nohz_balancer_kick(void)
         * will be run before returning from the IPI.
         */
        smp_send_reschedule(ilb_cpu);
-       return;
+}
+
+/*
+ * Current heuristic for kicking the idle load balancer in the presence
+ * of an idle cpu in the system.
+ *   - This rq has more than one task.
+ *   - This rq has at least one CFS task and the capacity of the CPU is
+ *     significantly reduced because of RT tasks or IRQs.
+ *   - At parent of LLC scheduler domain level, this cpu's scheduler group has
+ *     multiple busy cpu.
+ *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
+ *     domain span are idle.
+ */
+static void nohz_balancer_kick(struct rq *rq)
+{
+       unsigned long now = jiffies;
+       struct sched_domain_shared *sds;
+       struct sched_domain *sd;
+       int nr_busy, i, cpu = rq->cpu;
+       bool kick = false;
+
+       if (unlikely(rq->idle_balance))
+               return;
+
+       /*
+        * We may be recently in ticked or tickless idle mode. At the first
+        * busy tick after returning from idle, we will update the busy stats.
+        */
+       set_cpu_sd_state_busy();
+       nohz_balance_exit_idle(cpu);
+
+       /*
+        * None are in tickless mode and hence no need for NOHZ idle load
+        * balancing.
+        */
+       if (likely(!atomic_read(&nohz.nr_cpus)))
+               return;
+
+       if (time_before(now, nohz.next_balance))
+               return;
+
+       if (rq->nr_running >= 2) {
+               kick = true;
+               goto out;
+       }
+
+       rcu_read_lock();
+       sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+       if (sds) {
+               /*
+                * XXX: write a coherent comment on why we do this.
+                * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
+                */
+               nr_busy = atomic_read(&sds->nr_busy_cpus);
+               if (nr_busy > 1) {
+                       kick = true;
+                       goto unlock;
+               }
+
+       }
+
+       sd = rcu_dereference(rq->sd);
+       if (sd) {
+               if ((rq->cfs.h_nr_running >= 1) &&
+                               check_cpu_capacity(rq, sd)) {
+                       kick = true;
+                       goto unlock;
+               }
+       }
+
+       sd = rcu_dereference(per_cpu(sd_asym, cpu));
+       if (sd) {
+               for_each_cpu(i, sched_domain_span(sd)) {
+                       if (i == cpu ||
+                           !cpumask_test_cpu(i, nohz.idle_cpus_mask))
+                               continue;
+
+                       if (sched_asym_prefer(i, cpu)) {
+                               kick = true;
+                               goto unlock;
+                       }
+               }
+       }
+unlock:
+       rcu_read_unlock();
+out:
+       if (kick)
+               kick_ilb();
 }
 
 void nohz_balance_exit_idle(unsigned int cpu)
@@ -9112,23 +9217,6 @@ void nohz_balance_exit_idle(unsigned int cpu)
        }
 }
 
-static inline void set_cpu_sd_state_busy(void)
-{
-       struct sched_domain *sd;
-       int cpu = smp_processor_id();
-
-       rcu_read_lock();
-       sd = rcu_dereference(per_cpu(sd_llc, cpu));
-
-       if (!sd || !sd->nohz_idle)
-               goto unlock;
-       sd->nohz_idle = 0;
-
-       atomic_inc(&sd->shared->nr_busy_cpus);
-unlock:
-       rcu_read_unlock();
-}
-
 void set_cpu_sd_state_idle(void)
 {
        struct sched_domain *sd;
@@ -9171,6 +9259,8 @@ void nohz_balance_enter_idle(int cpu)
        atomic_inc(&nohz.nr_cpus);
        atomic_or(NOHZ_TICK_STOPPED, nohz_flags(cpu));
 }
+#else
+static inline void nohz_balancer_kick(struct rq *rq) { }
 #endif
 
 static DEFINE_SPINLOCK(balancing);
@@ -9369,90 +9459,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
 
        return true;
 }
-
-/*
- * Current heuristic for kicking the idle load balancer in the presence
- * of an idle CPU in the system.
- *   - This rq has more than one task.
- *   - This rq has at least one CFS task and the capacity of the CPU is
- *     significantly reduced because of RT tasks or IRQs.
- *   - At parent of LLC scheduler domain level, this CPU's scheduler group has
- *     multiple busy CPUs.
- *   - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler
- *     domain span are idle.
- */
-static inline bool nohz_kick_needed(struct rq *rq)
-{
-       unsigned long now = jiffies;
-       struct sched_domain_shared *sds;
-       struct sched_domain *sd;
-       int nr_busy, i, cpu = rq->cpu;
-       bool kick = false;
-
-       if (unlikely(rq->idle_balance))
-               return false;
-
-       /*
-       * We may be recently in ticked or tickless idle mode. At the first
-       * busy tick after returning from idle, we will update the busy stats.
-       */
-       set_cpu_sd_state_busy();
-       nohz_balance_exit_idle(cpu);
-
-       /*
-        * None are in tickless mode and hence no need for NOHZ idle load
-        * balancing.
-        */
-       if (likely(!atomic_read(&nohz.nr_cpus)))
-               return false;
-
-       if (time_before(now, nohz.next_balance))
-               return false;
-
-       if (rq->nr_running >= 2)
-               return true;
-
-       rcu_read_lock();
-       sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-       if (sds) {
-               /*
-                * XXX: write a coherent comment on why we do this.
-                * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
-                */
-               nr_busy = atomic_read(&sds->nr_busy_cpus);
-               if (nr_busy > 1) {
-                       kick = true;
-                       goto unlock;
-               }
-
-       }
-
-       sd = rcu_dereference(rq->sd);
-       if (sd) {
-               if ((rq->cfs.h_nr_running >= 1) &&
-                               check_cpu_capacity(rq, sd)) {
-                       kick = true;
-                       goto unlock;
-               }
-       }
-
-       sd = rcu_dereference(per_cpu(sd_asym, cpu));
-       if (sd) {
-               for_each_cpu(i, sched_domain_span(sd)) {
-                       if (i == cpu ||
-                           !cpumask_test_cpu(i, nohz.idle_cpus_mask))
-                               continue;
-
-                       if (sched_asym_prefer(i, cpu)) {
-                               kick = true;
-                               goto unlock;
-                       }
-               }
-       }
-unlock:
-       rcu_read_unlock();
-       return kick;
-}
 #else
 static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
 {
@@ -9497,10 +9503,8 @@ void trigger_load_balance(struct rq *rq)
 
        if (time_after_eq(jiffies, rq->next_balance))
                raise_softirq(SCHED_SOFTIRQ);
-#ifdef CONFIG_NO_HZ_COMMON
-       if (nohz_kick_needed(rq))
-               nohz_balancer_kick();
-#endif
+
+       nohz_balancer_kick(rq);
 }
 
 static void rq_online_fair(struct rq *rq)