sched/fair: use folio_xchg_access_time() in numa_hint_fault_latency()
[sfrench/cifs-2.6.git] / kernel / sched / fair.c
index 8dbff6e7ad4f53af8965f5ab5ace0c1f2a758c63..3ee9d3564c20cb90211bf8350a86ddaff28f79e4 100644 (file)
@@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
  * The smaller the hint page fault latency, the higher the possibility
  * for the page to be hot.
  */
-static int numa_hint_fault_latency(struct page *page)
+static int numa_hint_fault_latency(struct folio *folio)
 {
        int last_time, time;
 
        time = jiffies_to_msecs(jiffies);
-       last_time = xchg_page_access_time(page, time);
+       last_time = folio_xchg_access_time(folio, time);
 
        return (time - last_time) & PAGE_ACCESS_TIME_MASK;
 }
@@ -1784,7 +1784,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat,
        }
 }
 
-bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
+bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
                                int src_nid, int dst_cpu)
 {
        struct numa_group *ng = deref_curr_numa_group(p);
@@ -1814,16 +1814,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
                numa_promotion_adjust_threshold(pgdat, rate_limit, def_th);
 
                th = pgdat->nbp_threshold ? : def_th;
-               latency = numa_hint_fault_latency(page);
+               latency = numa_hint_fault_latency(folio);
                if (latency >= th)
                        return false;
 
                return !numa_promotion_rate_limit(pgdat, rate_limit,
-                                                 thp_nr_pages(page));
+                                                 folio_nr_pages(folio));
        }
 
        this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
-       last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+       last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid);
 
        if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
            !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
@@ -6619,6 +6619,7 @@ dequeue_throttle:
 /* Working cpumask for: load_balance, load_balance_newidle. */
 static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
+static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask);
 
 #ifdef CONFIG_NO_HZ_COMMON
 
@@ -9579,7 +9580,7 @@ static inline long sibling_imbalance(struct lb_env *env,
        imbalance /= ncores_local + ncores_busiest;
 
        /* Take advantage of resource in an empty sched group */
-       if (imbalance == 0 && local->sum_nr_running == 0 &&
+       if (imbalance <= 1 && local->sum_nr_running == 0 &&
            busiest->sum_nr_running > 1)
                imbalance = 2;
 
@@ -9767,6 +9768,15 @@ static bool update_sd_pick_busiest(struct lb_env *env,
                break;
 
        case group_smt_balance:
+               /*
+                * Check if we have spare CPUs on either SMT group to
+                * choose has spare or fully busy handling.
+                */
+               if (sgs->idle_cpus != 0 || busiest->idle_cpus != 0)
+                       goto has_spare;
+
+               fallthrough;
+
        case group_fully_busy:
                /*
                 * Select the fully busy group with highest avg_load. In
@@ -9806,6 +9816,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
                        else
                                return true;
                }
+has_spare:
 
                /*
                 * Select not overloaded group with lowest number of idle cpus
@@ -10917,6 +10928,7 @@ static int active_load_balance_cpu_stop(void *data);
 
 static int should_we_balance(struct lb_env *env)
 {
+       struct cpumask *swb_cpus = this_cpu_cpumask_var_ptr(should_we_balance_tmpmask);
        struct sched_group *sg = env->sd->groups;
        int cpu, idle_smt = -1;
 
@@ -10940,8 +10952,9 @@ static int should_we_balance(struct lb_env *env)
                return 1;
        }
 
+       cpumask_copy(swb_cpus, group_balance_mask(sg));
        /* Try to find first idle CPU */
-       for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
+       for_each_cpu_and(cpu, swb_cpus, env->cpus) {
                if (!idle_cpu(cpu))
                        continue;
 
@@ -10953,6 +10966,14 @@ static int should_we_balance(struct lb_env *env)
                if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) {
                        if (idle_smt == -1)
                                idle_smt = cpu;
+                       /*
+                        * If the core is not idle, and first SMT sibling which is
+                        * idle has been found, then its not needed to check other
+                        * SMT siblings for idleness:
+                        */
+#ifdef CONFIG_SCHED_SMT
+                       cpumask_andnot(swb_cpus, swb_cpus, cpu_smt_mask(cpu));
+#endif
                        continue;
                }
 
@@ -12918,6 +12939,8 @@ __init void init_sched_fair_class(void)
        for_each_possible_cpu(i) {
                zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
                zalloc_cpumask_var_node(&per_cpu(select_rq_mask,    i), GFP_KERNEL, cpu_to_node(i));
+               zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i),
+                                       GFP_KERNEL, cpu_to_node(i));
 
 #ifdef CONFIG_CFS_BANDWIDTH
                INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i));