Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

[sfrench/cifs-2.6.git] / include / linux / cgroup-defs.h
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h

index 1dff0a478b45aace3903af82883cb3bf39194774..8b7fd8eeccee26c5694530a45f8f9332aaf681c7 100644 (file)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -17,6 +17,7 @@
  #include <linux/refcount.h>
  #include <linux/percpu-refcount.h>
  #include <linux/percpu-rwsem.h>
+#include <linux/u64_stats_sync.h>
  #include <linux/workqueue.h>
  #include <linux/bpf-cgroup.h>
  
@@ -255,6 +256,57 @@ struct css_set {
         struct rcu_head rcu_head;
  };
  
+/*
+ * cgroup basic resource usage statistics.  Accounting is done per-cpu in
+ * cgroup_cpu_stat which is then lazily propagated up the hierarchy on
+ * reads.
+ *
+ * When a stat gets updated, the cgroup_cpu_stat and its ancestors are
+ * linked into the updated tree.  On the following read, propagation only
+ * considers and consumes the updated tree.  This makes reading O(the
+ * number of descendants which have been active since last read) instead of
+ * O(the total number of descendants).
+ *
+ * This is important because there can be a lot of (draining) cgroups which
+ * aren't active and stat may be read frequently.  The combination can
+ * become very expensive.  By propagating selectively, increasing reading
+ * frequency decreases the cost of each read.
+ */
+struct cgroup_cpu_stat {
+       /*
+        * ->sync protects all the current counters.  These are the only
+        * fields which get updated in the hot path.
+        */
+       struct u64_stats_sync sync;
+       struct task_cputime cputime;
+
+       /*
+        * Snapshots at the last reading.  These are used to calculate the
+        * deltas to propagate to the global counters.
+        */
+       struct task_cputime last_cputime;
+
+       /*
+        * Child cgroups with stat updates on this cpu since the last read
+        * are linked on the parent's ->updated_children through
+        * ->updated_next.
+        *
+        * In addition to being more compact, singly-linked list pointing
+        * to the cgroup makes it unnecessary for each per-cpu struct to
+        * point back to the associated cgroup.
+        *
+        * Protected by per-cpu cgroup_cpu_stat_lock.
+        */
+       struct cgroup *updated_children;        /* terminated by self cgroup */
+       struct cgroup *updated_next;            /* NULL iff not on the list */
+};
+
+struct cgroup_stat {
+       /* per-cpu statistics are collected into the folowing global counters */
+       struct task_cputime cputime;
+       struct prev_cputime prev_cputime;
+};
+
  struct cgroup {
         /* self css with NULL ->ss, points back to this cgroup */
         struct cgroup_subsys_state self;
@@ -354,6 +406,11 @@ struct cgroup {
          */
         struct cgroup *dom_cgrp;
  
+       /* cgroup basic resource statistics */
+       struct cgroup_cpu_stat __percpu *cpu_stat;
+       struct cgroup_stat pending_stat;        /* pending from children */
+       struct cgroup_stat stat;
+
         /*
          * list of pidlists, up to two for each namespace (one for procs, one
          * for tasks); created on demand.
@@ -513,6 +570,8 @@ struct cgroup_subsys {
         void (*css_released)(struct cgroup_subsys_state *css);
         void (*css_free)(struct cgroup_subsys_state *css);
         void (*css_reset)(struct cgroup_subsys_state *css);
+       int (*css_extra_stat_show)(struct seq_file *seq,
+                                  struct cgroup_subsys_state *css);
  
         int (*can_attach)(struct cgroup_taskset *tset);
         void (*cancel_attach)(struct cgroup_taskset *tset);