Merge tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / kernel / sched / core.c
index 60fdc0faf1c9d96fe60987b48515c73f3e43dca0..8cd1b5a8f61331a642db3c4b56c2383cd7e4fca3 100644 (file)
@@ -143,11 +143,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
  */
-#ifdef CONFIG_PREEMPT_RT
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
-#else
-const_debug unsigned int sysctl_sched_nr_migrate = 32;
-#endif
+const_debug unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
 
 __read_mostly int scheduler_running;
 
@@ -482,8 +478,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
  *                             p->se.load, p->rt_priority,
  *                             p->dl.dl_{runtime, deadline, period, flags, bw, density}
  *  - sched_setnuma():         p->numa_preferred_nid
- *  - sched_move_task()/
- *    cpu_cgroup_fork():       p->sched_task_group
+ *  - sched_move_task():       p->sched_task_group
  *  - uclamp_update_active()   p->uclamp*
  *
  * p->state <- TASK_*:
@@ -2329,7 +2324,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
        rq = cpu_rq(new_cpu);
 
        rq_lock(rq, rf);
-       BUG_ON(task_cpu(p) != new_cpu);
+       WARN_ON_ONCE(task_cpu(p) != new_cpu);
        activate_task(rq, p, 0);
        check_preempt_curr(rq, p, 0);
 
@@ -2779,7 +2774,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                return -EINVAL;
        }
 
-       if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
+       if (task_on_cpu(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
                /*
                 * MIGRATE_ENABLE gets here because 'p == current', but for
                 * anything else we cannot do is_migration_disabled(), punt
@@ -3255,12 +3250,12 @@ out:
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
- * If @match_state is nonzero, it's the @p->state value just checked and
- * not expected to change.  If it changes, i.e. @p might have woken up,
- * then return zero.  When we succeed in waiting for @p to be off its CPU,
- * we return a positive number (its total switch count).  If a second call
- * a short while later returns the same number, the caller can be sure that
- * @p has remained unscheduled the whole time.
+ * Wait for the thread to block in any of the states set in @match_state.
+ * If it changes, i.e. @p might have woken up, then return zero.  When we
+ * succeed in waiting for @p to be off its CPU, we return a positive number
+ * (its total switch count).  If a second call a short while later returns the
+ * same number, the caller can be sure that @p has remained unscheduled the
+ * whole time.
  *
  * The caller must ensure that the task *will* unschedule sometime soon,
  * else this function might spin for a *long* time. This function can't
@@ -3291,12 +3286,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
                 *
                 * NOTE! Since we don't hold any locks, it's not
                 * even sure that "rq" stays as the right runqueue!
-                * But we don't care, since "task_running()" will
+                * But we don't care, since "task_on_cpu()" will
                 * return false if the runqueue has changed and p
                 * is actually now running somewhere else!
                 */
-               while (task_running(rq, p)) {
-                       if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
+               while (task_on_cpu(rq, p)) {
+                       if (!(READ_ONCE(p->__state) & match_state))
                                return 0;
                        cpu_relax();
                }
@@ -3308,10 +3303,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
                 */
                rq = task_rq_lock(p, &rf);
                trace_sched_wait_task(p);
-               running = task_running(rq, p);
+               running = task_on_cpu(rq, p);
                queued = task_on_rq_queued(p);
                ncsw = 0;
-               if (!match_state || READ_ONCE(p->__state) == match_state)
+               if (READ_ONCE(p->__state) & match_state)
                        ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                task_rq_unlock(rq, p, &rf);
 
@@ -6430,7 +6425,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
                        prev->sched_contributes_to_load =
                                (prev_state & TASK_UNINTERRUPTIBLE) &&
                                !(prev_state & TASK_NOLOAD) &&
-                               !(prev->flags & PF_FROZEN);
+                               !(prev_state & TASK_FROZEN);
 
                        if (prev->sched_contributes_to_load)
                                rq->nr_uninterruptible++;
@@ -8650,7 +8645,7 @@ again:
        if (curr->sched_class != p->sched_class)
                goto out_unlock;
 
-       if (task_running(p_rq, p) || !task_is_running(p))
+       if (task_on_cpu(p_rq, p) || !task_is_running(p))
                goto out_unlock;
 
        yielded = curr->sched_class->yield_to_task(rq, p);
@@ -8862,7 +8857,7 @@ void sched_show_task(struct task_struct *p)
        if (pid_alive(p))
                ppid = task_pid_nr(rcu_dereference(p->real_parent));
        rcu_read_unlock();
-       pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
+       pr_cont(" stack:%-5lu pid:%-5d ppid:%-6d flags:0x%08lx\n",
                free, task_pid_nr(p), ppid,
                read_task_thread_flags(p));
 
@@ -8890,7 +8885,7 @@ state_filter_match(unsigned long state_filter, struct task_struct *p)
         * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
         * TASK_KILLABLE).
         */
-       if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE)
+       if (state_filter == TASK_UNINTERRUPTIBLE && (state & TASK_NOLOAD))
                return false;
 
        return true;
@@ -9602,9 +9597,6 @@ LIST_HEAD(task_groups);
 static struct kmem_cache *task_group_cache __read_mostly;
 #endif
 
-DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
-DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
-
 void __init sched_init(void)
 {
        unsigned long ptr = 0;
@@ -9648,14 +9640,6 @@ void __init sched_init(void)
 
 #endif /* CONFIG_RT_GROUP_SCHED */
        }
-#ifdef CONFIG_CPUMASK_OFFSTACK
-       for_each_possible_cpu(i) {
-               per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
-                       cpumask_size(), GFP_KERNEL, cpu_to_node(i));
-               per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
-                       cpumask_size(), GFP_KERNEL, cpu_to_node(i));
-       }
-#endif /* CONFIG_CPUMASK_OFFSTACK */
 
        init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
 
@@ -10164,7 +10148,7 @@ void sched_release_group(struct task_group *tg)
        spin_unlock_irqrestore(&task_group_lock, flags);
 }
 
-static void sched_change_group(struct task_struct *tsk, int type)
+static void sched_change_group(struct task_struct *tsk)
 {
        struct task_group *tg;
 
@@ -10180,7 +10164,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
        if (tsk->sched_class->task_change_group)
-               tsk->sched_class->task_change_group(tsk, type);
+               tsk->sched_class->task_change_group(tsk);
        else
 #endif
                set_task_rq(tsk, task_cpu(tsk));
@@ -10211,7 +10195,7 @@ void sched_move_task(struct task_struct *tsk)
        if (running)
                put_prev_task(rq, tsk);
 
-       sched_change_group(tsk, TASK_MOVE_GROUP);
+       sched_change_group(tsk);
 
        if (queued)
                enqueue_task(rq, tsk, queue_flags);
@@ -10289,53 +10273,19 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
        sched_unregister_group(tg);
 }
 
-/*
- * This is called before wake_up_new_task(), therefore we really only
- * have to set its group bits, all the other stuff does not apply.
- */
-static void cpu_cgroup_fork(struct task_struct *task)
-{
-       struct rq_flags rf;
-       struct rq *rq;
-
-       rq = task_rq_lock(task, &rf);
-
-       update_rq_clock(rq);
-       sched_change_group(task, TASK_SET_GROUP);
-
-       task_rq_unlock(rq, task, &rf);
-}
-
+#ifdef CONFIG_RT_GROUP_SCHED
 static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 {
        struct task_struct *task;
        struct cgroup_subsys_state *css;
-       int ret = 0;
 
        cgroup_taskset_for_each(task, css, tset) {
-#ifdef CONFIG_RT_GROUP_SCHED
                if (!sched_rt_can_attach(css_tg(css), task))
                        return -EINVAL;
-#endif
-               /*
-                * Serialize against wake_up_new_task() such that if it's
-                * running, we're sure to observe its full state.
-                */
-               raw_spin_lock_irq(&task->pi_lock);
-               /*
-                * Avoid calling sched_move_task() before wake_up_new_task()
-                * has happened. This would lead to problems with PELT, due to
-                * move wanting to detach+attach while we're not attached yet.
-                */
-               if (READ_ONCE(task->__state) == TASK_NEW)
-                       ret = -EINVAL;
-               raw_spin_unlock_irq(&task->pi_lock);
-
-               if (ret)
-                       break;
        }
-       return ret;
+       return 0;
 }
+#endif
 
 static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 {
@@ -11171,8 +11121,9 @@ struct cgroup_subsys cpu_cgrp_subsys = {
        .css_released   = cpu_cgroup_css_released,
        .css_free       = cpu_cgroup_css_free,
        .css_extra_stat_show = cpu_extra_stat_show,
-       .fork           = cpu_cgroup_fork,
+#ifdef CONFIG_RT_GROUP_SCHED
        .can_attach     = cpu_cgroup_can_attach,
+#endif
        .attach         = cpu_cgroup_attach,
        .legacy_cftypes = cpu_legacy_files,
        .dfl_cftypes    = cpu_files,