sched: add throttled time stat for throttled children
authorJosh Don <joshdon@google.com>
Tue, 20 Jun 2023 18:32:47 +0000 (11:32 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 13 Jul 2023 13:21:49 +0000 (15:21 +0200)
We currently export the total throttled time for cgroups that are given
a bandwidth limit. This patch extends this accounting to also account
the total time that each children cgroup has been throttled.

This is useful to understand the degree to which children have been
affected by the throttling control. Children which are not runnable
during the entire throttled period, for example, will not show any
self-throttling time during this period.

Expose this in a new interface, 'cpu.stat.local', which is similar to
how non-hierarchical events are accounted in 'memory.events.local'.

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230620183247.737942-2-joshdon@google.com
include/linux/cgroup-defs.h
kernel/cgroup/cgroup.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/sched.h

index 8a0d5466c7be1533d29d8d5d5ece2cec919b2886..ae20dbb885d66e3d470952f702c14210cbc39c4d 100644 (file)
@@ -661,6 +661,8 @@ struct cgroup_subsys {
        void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
        int (*css_extra_stat_show)(struct seq_file *seq,
                                   struct cgroup_subsys_state *css);
+       int (*css_local_stat_show)(struct seq_file *seq,
+                                  struct cgroup_subsys_state *css);
 
        int (*can_attach)(struct cgroup_taskset *tset);
        void (*cancel_attach)(struct cgroup_taskset *tset);
index bfe3cd8ccf3668416a544594eb8eea55258cbf92..4e3ee13217ce7165587280d18da30d4d4df2463a 100644 (file)
@@ -3685,6 +3685,36 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
        return ret;
 }
 
+static int __maybe_unused cgroup_local_stat_show(struct seq_file *seq,
+                                                struct cgroup *cgrp, int ssid)
+{
+       struct cgroup_subsys *ss = cgroup_subsys[ssid];
+       struct cgroup_subsys_state *css;
+       int ret;
+
+       if (!ss->css_local_stat_show)
+               return 0;
+
+       css = cgroup_tryget_css(cgrp, ss);
+       if (!css)
+               return 0;
+
+       ret = ss->css_local_stat_show(seq, css);
+       css_put(css);
+       return ret;
+}
+
+static int cpu_local_stat_show(struct seq_file *seq, void *v)
+{
+       struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
+       int ret = 0;
+
+#ifdef CONFIG_CGROUP_SCHED
+       ret = cgroup_local_stat_show(seq, cgrp, cpu_cgrp_id);
+#endif
+       return ret;
+}
+
 #ifdef CONFIG_PSI
 static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
 {
@@ -5235,6 +5265,10 @@ static struct cftype cgroup_base_files[] = {
                .name = "cpu.stat",
                .seq_show = cpu_stat_show,
        },
+       {
+               .name = "cpu.stat.local",
+               .seq_show = cpu_local_stat_show,
+       },
        { }     /* terminate */
 };
 
index c52c2eba7c739fc92fdf98664eb645cac3c5876e..2291f9d91c8624d612f527744aa90b474ec48c61 100644 (file)
@@ -11139,6 +11139,27 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
 
        return 0;
 }
+
+static u64 throttled_time_self(struct task_group *tg)
+{
+       int i;
+       u64 total = 0;
+
+       for_each_possible_cpu(i) {
+               total += READ_ONCE(tg->cfs_rq[i]->throttled_clock_self_time);
+       }
+
+       return total;
+}
+
+static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
+{
+       struct task_group *tg = css_tg(seq_css(sf));
+
+       seq_printf(sf, "throttled_time %llu\n", throttled_time_self(tg));
+
+       return 0;
+}
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -11215,6 +11236,10 @@ static struct cftype cpu_legacy_files[] = {
                .name = "stat",
                .seq_show = cpu_cfs_stat_show,
        },
+       {
+               .name = "stat.local",
+               .seq_show = cpu_cfs_local_stat_show,
+       },
 #endif
 #ifdef CONFIG_RT_GROUP_SCHED
        {
@@ -11271,6 +11296,24 @@ static int cpu_extra_stat_show(struct seq_file *sf,
        return 0;
 }
 
+static int cpu_local_stat_show(struct seq_file *sf,
+                              struct cgroup_subsys_state *css)
+{
+#ifdef CONFIG_CFS_BANDWIDTH
+       {
+               struct task_group *tg = css_tg(css);
+               u64 throttled_self_usec;
+
+               throttled_self_usec = throttled_time_self(tg);
+               do_div(throttled_self_usec, NSEC_PER_USEC);
+
+               seq_printf(sf, "throttled_usec %llu\n",
+                          throttled_self_usec);
+       }
+#endif
+       return 0;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
                               struct cftype *cft)
@@ -11449,6 +11492,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
        .css_released   = cpu_cgroup_css_released,
        .css_free       = cpu_cgroup_css_free,
        .css_extra_stat_show = cpu_extra_stat_show,
+       .css_local_stat_show = cpu_local_stat_show,
 #ifdef CONFIG_RT_GROUP_SCHED
        .can_attach     = cpu_cgroup_can_attach,
 #endif
index 51ccae74779591bdd78adbc9dbeb5b6dbb981f34..159b20296dd525b6e3346ad932a629ba47b1f7af 100644 (file)
@@ -4878,8 +4878,12 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
                        list_add_leaf_cfs_rq(cfs_rq);
                } else {
 #ifdef CONFIG_CFS_BANDWIDTH
+                       struct rq *rq = rq_of(cfs_rq);
+
                        if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
-                               cfs_rq->throttled_clock = rq_clock(rq_of(cfs_rq));
+                               cfs_rq->throttled_clock = rq_clock(rq);
+                       if (!cfs_rq->throttled_clock_self)
+                               cfs_rq->throttled_clock_self = rq_clock(rq);
 #endif
                }
        }
@@ -5384,6 +5388,17 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
                /* Add cfs_rq with load or one or more already running entities to the list */
                if (!cfs_rq_is_decayed(cfs_rq))
                        list_add_leaf_cfs_rq(cfs_rq);
+
+               if (cfs_rq->throttled_clock_self) {
+                       u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+
+                       cfs_rq->throttled_clock_self = 0;
+
+                       if (SCHED_WARN_ON((s64)delta < 0))
+                               delta = 0;
+
+                       cfs_rq->throttled_clock_self_time += delta;
+               }
        }
 
        return 0;
@@ -5398,6 +5413,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
        if (!cfs_rq->throttle_count) {
                cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
                list_del_leaf_cfs_rq(cfs_rq);
+
+               SCHED_WARN_ON(cfs_rq->throttled_clock_self);
+               if (cfs_rq->nr_running)
+                       cfs_rq->throttled_clock_self = rq_clock(rq);
        }
        cfs_rq->throttle_count++;
 
index e93e006a942b9088406ccc0f9a1a5edb2d006e2c..1dcea9bfa0a8668554648a2dfdb002f4f21f2b12 100644 (file)
@@ -636,6 +636,8 @@ struct cfs_rq {
        u64                     throttled_clock;
        u64                     throttled_clock_pelt;
        u64                     throttled_clock_pelt_time;
+       u64                     throttled_clock_self;
+       u64                     throttled_clock_self_time;
        int                     throttled;
        int                     throttle_count;
        struct list_head        throttled_list;