Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2019 20:52:12 +0000 (13:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2019 20:52:12 +0000 (13:52 -0700)
Pull cgroup updates from Tejun Heo:
 "This includes Roman's cgroup2 freezer implementation.

  It's a separate machanism from cgroup1 freezer. Instead of blocking
  user tasks in arbitrary uninterruptible sleeps, the new implementation
  extends jobctl stop - frozen tasks are trapped in jobctl stop until
  thawed and can be killed and ptraced. Lots of thanks to Oleg for
  sheperding the effort.

  Other than that, there are a few trivial changes"

* 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: never call do_group_exit() with task->frozen bit set
  kernel: cgroup: fix misuse of %x
  cgroup: get rid of cgroup_freezer_frozen_exit()
  cgroup: prevent spurious transition into non-frozen state
  cgroup: Remove unused cgrp variable
  cgroup: document cgroup v2 freezer interface
  cgroup: add tracing points for cgroup v2 freezer
  cgroup: make TRACE_CGROUP_PATH irq-safe
  kselftests: cgroup: add freezer controller self-tests
  kselftests: cgroup: don't fail on cg_kill_all() error in cg_destroy()
  cgroup: cgroup v2 freezer
  cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock
  cgroup: implement __cgroup_task_count() helper
  cgroup: rename freezer.c into legacy_freezer.c
  cgroup: remove extra cgroup_migrate_finish() call

20 files changed:
Documentation/admin-guide/cgroup-v2.rst
include/linux/cgroup-defs.h
include/linux/cgroup.h
include/linux/sched.h
include/linux/sched/jobctl.h
include/trace/events/cgroup.h
kernel/cgroup/Makefile
kernel/cgroup/cgroup-internal.h
kernel/cgroup/cgroup-v1.c
kernel/cgroup/cgroup.c
kernel/cgroup/debug.c
kernel/cgroup/freezer.c
kernel/cgroup/legacy_freezer.c [new file with mode: 0644]
kernel/fork.c
kernel/signal.c
tools/testing/selftests/cgroup/.gitignore
tools/testing/selftests/cgroup/Makefile
tools/testing/selftests/cgroup/cgroup_util.c
tools/testing/selftests/cgroup/cgroup_util.h
tools/testing/selftests/cgroup/test_freezer.c [new file with mode: 0644]

index 20f92c16ffbf2c6e89ae090719d2e7225383985f..88e746074252298ecae8f0aab9db2bd1c9f4fb99 100644 (file)
@@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
          populated
                1 if the cgroup or its descendants contains any live
                processes; otherwise, 0.
+         frozen
+               1 if the cgroup is frozen; otherwise, 0.
 
   cgroup.max.descendants
        A read-write single value files.  The default is "max".
@@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
                A dying cgroup can consume system resources not exceeding
                limits, which were active at the moment of cgroup deletion.
 
+  cgroup.freeze
+       A read-write single value file which exists on non-root cgroups.
+       Allowed values are "0" and "1". The default is "0".
+
+       Writing "1" to the file causes freezing of the cgroup and all
+       descendant cgroups. This means that all belonging processes will
+       be stopped and will not run until the cgroup will be explicitly
+       unfrozen. Freezing of the cgroup may take some time; when this action
+       is completed, the "frozen" value in the cgroup.events control file
+       will be updated to "1" and the corresponding notification will be
+       issued.
+
+       A cgroup can be frozen either by its own settings, or by settings
+       of any ancestor cgroups. If any of ancestor cgroups is frozen, the
+       cgroup will remain frozen.
+
+       Processes in the frozen cgroup can be killed by a fatal signal.
+       They also can enter and leave a frozen cgroup: either by an explicit
+       move by a user, or if freezing of the cgroup races with fork().
+       If a process is moved to a frozen cgroup, it stops. If a process is
+       moved out of a frozen cgroup, it becomes running.
+
+       Frozen status of a cgroup doesn't affect any cgroup tree operations:
+       it's possible to delete a frozen (and empty) cgroup, as well as
+       create new sub-cgroups.
 
 Controllers
 ===========
index 1c70803e9f77056873e18aad6e1f3ce7195a25a1..77258d276f9350c54b1aca6c713a39d826a9715d 100644 (file)
@@ -65,6 +65,12 @@ enum {
         * specified at mount time and thus is implemented here.
         */
        CGRP_CPUSET_CLONE_CHILDREN,
+
+       /* Control group has to be frozen. */
+       CGRP_FREEZE,
+
+       /* Cgroup is frozen. */
+       CGRP_FROZEN,
 };
 
 /* cgroup_root->flags */
@@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
        struct cgroup *updated_next;            /* NULL iff not on the list */
 };
 
+struct cgroup_freezer_state {
+       /* Should the cgroup and its descendants be frozen. */
+       bool freeze;
+
+       /* Should the cgroup actually be frozen? */
+       int e_freeze;
+
+       /* Fields below are protected by css_set_lock */
+
+       /* Number of frozen descendant cgroups */
+       int nr_frozen_descendants;
+
+       /*
+        * Number of tasks, which are counted as frozen:
+        * frozen, SIGSTOPped, and PTRACEd.
+        */
+       int nr_frozen_tasks;
+};
+
 struct cgroup {
        /* self css with NULL ->ss, points back to this cgroup */
        struct cgroup_subsys_state self;
@@ -349,6 +374,11 @@ struct cgroup {
         * Dying cgroups are cgroups which were deleted by a user,
         * but are still existing because someone else is holding a reference.
         * max_descendants is a maximum allowed number of descent cgroups.
+        *
+        * nr_descendants and nr_dying_descendants are protected
+        * by cgroup_mutex and css_set_lock. It's fine to read them holding
+        * any of cgroup_mutex and css_set_lock; for writing both locks
+        * should be held.
         */
        int nr_descendants;
        int nr_dying_descendants;
@@ -448,6 +478,9 @@ struct cgroup {
        /* If there is block congestion on this cgroup. */
        atomic_t congestion_count;
 
+       /* Used to store internal freezer state */
+       struct cgroup_freezer_state freezer;
+
        /* ids of the ancestors at each level including self */
        int ancestor_ids[];
 };
index 81f58b4a5418da9bf57d1c6f78a2782aeba61377..c0077adeea8334dc136233de439351ca3e742eff 100644 (file)
@@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
                free_cgroup_ns(ns);
 }
 
+#ifdef CONFIG_CGROUPS
+
+void cgroup_enter_frozen(void);
+void cgroup_leave_frozen(bool always_leave);
+void cgroup_update_frozen(struct cgroup *cgrp);
+void cgroup_freeze(struct cgroup *cgrp, bool freeze);
+void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
+                                struct cgroup *dst);
+
+static inline bool cgroup_task_freeze(struct task_struct *task)
+{
+       bool ret;
+
+       if (task->flags & PF_KTHREAD)
+               return false;
+
+       rcu_read_lock();
+       ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+       return task->frozen;
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void cgroup_enter_frozen(void) { }
+static inline void cgroup_leave_frozen(bool always_leave) { }
+static inline bool cgroup_task_freeze(struct task_struct *task)
+{
+       return false;
+}
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+       return false;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
 #endif /* _LINUX_CGROUP_H */
index 50606a6e73d686ea6a3dad3c1d7342f620cdea7b..a2cd15855bad87f3b10540e5c045c6a36db051af 100644 (file)
@@ -726,6 +726,8 @@ struct task_struct {
 #ifdef CONFIG_CGROUPS
        /* disallow userland-initiated cgroup migration */
        unsigned                        no_cgroup_migration:1;
+       /* task is frozen/stopped (used by the cgroup freezer) */
+       unsigned                        frozen:1;
 #endif
 #ifdef CONFIG_BLK_CGROUP
        /* to be used once the psi infrastructure lands upstream. */
index 98228bd48aeea29eb1d456586b58b17ee2aef60a..fa067de9f1a94843f7402f2fd258d8b6339b59f0 100644 (file)
@@ -18,6 +18,7 @@ struct task_struct;
 #define JOBCTL_TRAP_NOTIFY_BIT 20      /* trap for NOTIFY */
 #define JOBCTL_TRAPPING_BIT    21      /* switching to TRACED */
 #define JOBCTL_LISTENING_BIT   22      /* ptracer is listening for events */
+#define JOBCTL_TRAP_FREEZE_BIT 23      /* trap for cgroup freezer */
 
 #define JOBCTL_STOP_DEQUEUED   (1UL << JOBCTL_STOP_DEQUEUED_BIT)
 #define JOBCTL_STOP_PENDING    (1UL << JOBCTL_STOP_PENDING_BIT)
@@ -26,6 +27,7 @@ struct task_struct;
 #define JOBCTL_TRAP_NOTIFY     (1UL << JOBCTL_TRAP_NOTIFY_BIT)
 #define JOBCTL_TRAPPING                (1UL << JOBCTL_TRAPPING_BIT)
 #define JOBCTL_LISTENING       (1UL << JOBCTL_LISTENING_BIT)
+#define JOBCTL_TRAP_FREEZE     (1UL << JOBCTL_TRAP_FREEZE_BIT)
 
 #define JOBCTL_TRAP_MASK       (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
 #define JOBCTL_PENDING_MASK    (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
index a401ff5e784711e574233e74eddb3aec974fe522..a566cc5214764665e9345b2159cbd7985fb2155f 100644 (file)
@@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename,
        TP_ARGS(cgrp, path)
 );
 
+DEFINE_EVENT(cgroup, cgroup_freeze,
+
+       TP_PROTO(struct cgroup *cgrp, const char *path),
+
+       TP_ARGS(cgrp, path)
+);
+
+DEFINE_EVENT(cgroup, cgroup_unfreeze,
+
+       TP_PROTO(struct cgroup *cgrp, const char *path),
+
+       TP_ARGS(cgrp, path)
+);
+
 DECLARE_EVENT_CLASS(cgroup_migrate,
 
        TP_PROTO(struct cgroup *dst_cgrp, const char *path,
@@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
        TP_ARGS(dst_cgrp, path, task, threadgroup)
 );
 
+DECLARE_EVENT_CLASS(cgroup_event,
+
+       TP_PROTO(struct cgroup *cgrp, const char *path, int val),
+
+       TP_ARGS(cgrp, path, val),
+
+       TP_STRUCT__entry(
+               __field(        int,            root                    )
+               __field(        int,            id                      )
+               __field(        int,            level                   )
+               __string(       path,           path                    )
+               __field(        int,            val                     )
+       ),
+
+       TP_fast_assign(
+               __entry->root = cgrp->root->hierarchy_id;
+               __entry->id = cgrp->id;
+               __entry->level = cgrp->level;
+               __assign_str(path, path);
+               __entry->val = val;
+       ),
+
+       TP_printk("root=%d id=%d level=%d path=%s val=%d",
+                 __entry->root, __entry->id, __entry->level, __get_str(path),
+                 __entry->val)
+);
+
+DEFINE_EVENT(cgroup_event, cgroup_notify_populated,
+
+       TP_PROTO(struct cgroup *cgrp, const char *path, int val),
+
+       TP_ARGS(cgrp, path, val)
+);
+
+DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
+
+       TP_PROTO(struct cgroup *cgrp, const char *path, int val),
+
+       TP_ARGS(cgrp, path, val)
+);
+
 #endif /* _TRACE_CGROUP_H */
 
 /* This part must be outside protection */
index bfcdae8961227acab958192a7164d98c72a48ea7..5d7a76bfbbb769c41ff5ee072584f92f847fdaaf 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
+obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
 
-obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
+obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
 obj-$(CONFIG_CGROUP_RDMA) += rdma.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
index 30e39f3932ad06bb3f89fe08bc11c5988bd4ea73..809e34a3c017260d1fba519a5fd3640cf19bf135 100644 (file)
@@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void);
 #define TRACE_CGROUP_PATH(type, cgrp, ...)                             \
        do {                                                            \
                if (trace_cgroup_##type##_enabled()) {                  \
-                       spin_lock(&trace_cgroup_path_lock);             \
+                       unsigned long flags;                            \
+                       spin_lock_irqsave(&trace_cgroup_path_lock,      \
+                                         flags);                       \
                        cgroup_path(cgrp, trace_cgroup_path,            \
                                    TRACE_CGROUP_PATH_LEN);             \
                        trace_cgroup_##type(cgrp, trace_cgroup_path,    \
                                            ##__VA_ARGS__);             \
-                       spin_unlock(&trace_cgroup_path_lock);           \
+                       spin_unlock_irqrestore(&trace_cgroup_path_lock, \
+                                              flags);                  \
                }                                                       \
        } while (0)
 
@@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn);
 int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
                     struct kernfs_root *kf_root);
 
+int __cgroup_task_count(const struct cgroup *cgrp);
 int cgroup_task_count(const struct cgroup *cgrp);
 
 /*
index c126b34fd4ff583af524f52bf973b9734acf9b9e..68ca5de7ec2772b0f1d0ba62c4f4035c96dc922a 100644 (file)
@@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
        return l;
 }
 
-/**
- * cgroup_task_count - count the number of tasks in a cgroup.
- * @cgrp: the cgroup in question
- */
-int cgroup_task_count(const struct cgroup *cgrp)
-{
-       int count = 0;
-       struct cgrp_cset_link *link;
-
-       spin_lock_irq(&css_set_lock);
-       list_for_each_entry(link, &cgrp->cset_links, cset_link)
-               count += link->cset->nr_tasks;
-       spin_unlock_irq(&css_set_lock);
-       return count;
-}
-
 /*
  * Load a cgroup's pidarray with either procs' tgids or tasks' pids
  */
index 3f2b4bde0f9c3134659867f67a3ab485da0613e9..327f37c9fdfaaf4ca9ea475129fa8b28cb75fe6a 100644 (file)
@@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp)
        css_get(&cgrp->self);
 }
 
+/**
+ * __cgroup_task_count - count the number of tasks in a cgroup. The caller
+ * is responsible for taking the css_set_lock.
+ * @cgrp: the cgroup in question
+ */
+int __cgroup_task_count(const struct cgroup *cgrp)
+{
+       int count = 0;
+       struct cgrp_cset_link *link;
+
+       lockdep_assert_held(&css_set_lock);
+
+       list_for_each_entry(link, &cgrp->cset_links, cset_link)
+               count += link->cset->nr_tasks;
+
+       return count;
+}
+
+/**
+ * cgroup_task_count - count the number of tasks in a cgroup.
+ * @cgrp: the cgroup in question
+ */
+int cgroup_task_count(const struct cgroup *cgrp)
+{
+       int count;
+
+       spin_lock_irq(&css_set_lock);
+       count = __cgroup_task_count(cgrp);
+       spin_unlock_irq(&css_set_lock);
+
+       return count;
+}
+
 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 {
        struct cgroup *cgrp = of->kn->parent->priv;
@@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
                        break;
 
                cgroup1_check_for_release(cgrp);
+               TRACE_CGROUP_PATH(notify_populated, cgrp,
+                                 cgroup_is_populated(cgrp));
                cgroup_file_notify(&cgrp->events_file);
 
                child = cgrp;
@@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
                        get_css_set(to_cset);
                        to_cset->nr_tasks++;
                        css_set_move_task(task, from_cset, to_cset, true);
-                       put_css_set_locked(from_cset);
                        from_cset->nr_tasks--;
+                       /*
+                        * If the source or destination cgroup is frozen,
+                        * the task might require to change its state.
+                        */
+                       cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
+                                                   to_cset->dfl_cgrp);
+                       put_css_set_locked(from_cset);
+
                }
        }
        spin_unlock_irq(&css_set_lock);
@@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
 
                dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
                if (!dst_cset)
-                       goto err;
+                       return -ENOMEM;
 
                WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
 
@@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
        }
 
        return 0;
-err:
-       cgroup_migrate_finish(mgctx);
-       return -ENOMEM;
 }
 
 /**
@@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
 
 static int cgroup_events_show(struct seq_file *seq, void *v)
 {
-       seq_printf(seq, "populated %d\n",
-                  cgroup_is_populated(seq_css(seq)->cgroup));
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
+       seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
+
        return 0;
 }
 
@@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
 }
 #endif
 
+static int cgroup_freeze_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       seq_printf(seq, "%d\n", cgrp->freezer.freeze);
+
+       return 0;
+}
+
+static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
+                                  char *buf, size_t nbytes, loff_t off)
+{
+       struct cgroup *cgrp;
+       ssize_t ret;
+       int freeze;
+
+       ret = kstrtoint(strstrip(buf), 0, &freeze);
+       if (ret)
+               return ret;
+
+       if (freeze < 0 || freeze > 1)
+               return -ERANGE;
+
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENOENT;
+
+       cgroup_freeze(cgrp, freeze);
+
+       cgroup_kn_unlock(of->kn);
+
+       return nbytes;
+}
+
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
        struct cftype *cft = of->kn->priv;
@@ -4653,6 +4729,12 @@ static struct cftype cgroup_base_files[] = {
                .name = "cgroup.stat",
                .seq_show = cgroup_stat_show,
        },
+       {
+               .name = "cgroup.freeze",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = cgroup_freeze_show,
+               .write = cgroup_freeze_write,
+       },
        {
                .name = "cpu.stat",
                .flags = CFTYPE_NOT_ON_ROOT,
@@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work)
                if (cgroup_on_dfl(cgrp))
                        cgroup_rstat_flush(cgrp);
 
+               spin_lock_irq(&css_set_lock);
                for (tcgrp = cgroup_parent(cgrp); tcgrp;
                     tcgrp = cgroup_parent(tcgrp))
                        tcgrp->nr_dying_descendants--;
+               spin_unlock_irq(&css_set_lock);
 
                cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                cgrp->id = -1;
@@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        if (ret)
                goto out_psi_free;
 
+       /*
+        * New cgroup inherits effective freeze counter, and
+        * if the parent has to be frozen, the child has too.
+        */
+       cgrp->freezer.e_freeze = parent->freezer.e_freeze;
+       if (cgrp->freezer.e_freeze)
+               set_bit(CGRP_FROZEN, &cgrp->flags);
+
+       spin_lock_irq(&css_set_lock);
        for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
                cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
 
-               if (tcgrp != cgrp)
+               if (tcgrp != cgrp) {
                        tcgrp->nr_descendants++;
+
+                       /*
+                        * If the new cgroup is frozen, all ancestor cgroups
+                        * get a new frozen descendant, but their state can't
+                        * change because of this.
+                        */
+                       if (cgrp->freezer.e_freeze)
+                               tcgrp->freezer.nr_frozen_descendants++;
+               }
        }
+       spin_unlock_irq(&css_set_lock);
 
        if (notify_on_release(parent))
                set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
        if (parent && cgroup_is_threaded(cgrp))
                parent->nr_threaded_children--;
 
+       spin_lock_irq(&css_set_lock);
        for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
                tcgrp->nr_descendants--;
                tcgrp->nr_dying_descendants++;
+               /*
+                * If the dying cgroup is frozen, decrease frozen descendants
+                * counters of ancestor cgroups.
+                */
+               if (test_bit(CGRP_FROZEN, &cgrp->flags))
+                       tcgrp->freezer.nr_frozen_descendants--;
        }
+       spin_unlock_irq(&css_set_lock);
 
        cgroup1_check_for_release(parent);
 
@@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child)
                        cset->nr_tasks++;
                        css_set_move_task(child, NULL, cset, false);
                }
+
+               /*
+                * If the cgroup has to be frozen, the new task has too.
+                * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
+                * the task into the frozen state.
+                */
+               if (unlikely(cgroup_task_freeze(child))) {
+                       spin_lock(&child->sighand->siglock);
+                       WARN_ON_ONCE(child->frozen);
+                       child->jobctl |= JOBCTL_TRAP_FREEZE;
+                       spin_unlock(&child->sighand->siglock);
+
+                       /*
+                        * Calling cgroup_update_frozen() isn't required here,
+                        * because it will be called anyway a bit later
+                        * from do_freezer_trap(). So we avoid cgroup's
+                        * transient switch from the frozen state and back.
+                        */
+               }
+
                spin_unlock_irq(&css_set_lock);
        }
 
@@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk)
                spin_lock_irq(&css_set_lock);
                css_set_move_task(tsk, cset, NULL, false);
                cset->nr_tasks--;
+
+               WARN_ON_ONCE(cgroup_task_frozen(tsk));
+               if (unlikely(cgroup_task_freeze(tsk)))
+                       cgroup_update_frozen(task_dfl_cgroup(tsk));
+
                spin_unlock_irq(&css_set_lock);
        } else {
                get_css_set(cset);
index 5f1b87330beef61bfc3bb885774b7766f392cc21..80aa3f027ac3b1a5592d7b272f23efb35a8b0b3a 100644 (file)
@@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v)
                css = cset->subsys[ss->id];
                if (!css)
                        continue;
-               seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
-                         (unsigned long)css, css->id);
+               seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name,
+                         css, css->id);
        }
        rcu_read_unlock();
        spin_unlock_irq(&css_set_lock);
@@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
                if (css->parent)
                        snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
                                 css->parent->id);
-               seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
-                         (unsigned long)css, css->id,
+               seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
+                         css, css->id,
                          atomic_read(&css->online_cnt), pbuf);
        }
 
index 08236798d17315622d73540d62a89dcbafdc5c77..8cf010680678949cd131c9913b5cd08e521227f9 100644 (file)
-/*
- * cgroup_freezer.c -  control group freezer subsystem
- *
- * Copyright IBM Corporation, 2007
- *
- * Author : Cedric Le Goater <clg@fr.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#include <linux/export.h>
-#include <linux/slab.h>
+//SPDX-License-Identifier: GPL-2.0
 #include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/freezer.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-/*
- * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
- * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
- * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING
- * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of
- * its ancestors has FREEZING_SELF set.
- */
-enum freezer_state_flags {
-       CGROUP_FREEZER_ONLINE   = (1 << 0), /* freezer is fully online */
-       CGROUP_FREEZING_SELF    = (1 << 1), /* this freezer is freezing */
-       CGROUP_FREEZING_PARENT  = (1 << 2), /* the parent freezer is freezing */
-       CGROUP_FROZEN           = (1 << 3), /* this and its descendants frozen */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/signal.h>
 
-       /* mask for all FREEZING flags */
-       CGROUP_FREEZING         = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
-};
+#include "cgroup-internal.h"
 
-struct freezer {
-       struct cgroup_subsys_state      css;
-       unsigned int                    state;
-};
+#include <trace/events/cgroup.h>
 
-static DEFINE_MUTEX(freezer_mutex);
-
-static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
+/*
+ * Propagate the cgroup frozen state upwards by the cgroup tree.
+ */
+static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
 {
-       return css ? container_of(css, struct freezer, css) : NULL;
-}
+       int desc = 1;
 
-static inline struct freezer *task_freezer(struct task_struct *task)
-{
-       return css_freezer(task_css(task, freezer_cgrp_id));
+       /*
+        * If the new state is frozen, some freezing ancestor cgroups may change
+        * their state too, depending on if all their descendants are frozen.
+        *
+        * Otherwise, all ancestor cgroups are forced into the non-frozen state.
+        */
+       while ((cgrp = cgroup_parent(cgrp))) {
+               if (frozen) {
+                       cgrp->freezer.nr_frozen_descendants += desc;
+                       if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
+                           test_bit(CGRP_FREEZE, &cgrp->flags) &&
+                           cgrp->freezer.nr_frozen_descendants ==
+                           cgrp->nr_descendants) {
+                               set_bit(CGRP_FROZEN, &cgrp->flags);
+                               cgroup_file_notify(&cgrp->events_file);
+                               TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
+                               desc++;
+                       }
+               } else {
+                       cgrp->freezer.nr_frozen_descendants -= desc;
+                       if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
+                               clear_bit(CGRP_FROZEN, &cgrp->flags);
+                               cgroup_file_notify(&cgrp->events_file);
+                               TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
+                               desc++;
+                       }
+               }
+       }
 }
 
-static struct freezer *parent_freezer(struct freezer *freezer)
+/*
+ * Revisit the cgroup frozen state.
+ * Checks if the cgroup is really frozen and perform all state transitions.
+ */
+void cgroup_update_frozen(struct cgroup *cgrp)
 {
-       return css_freezer(freezer->css.parent);
-}
+       bool frozen;
 
-bool cgroup_freezing(struct task_struct *task)
-{
-       bool ret;
+       lockdep_assert_held(&css_set_lock);
 
-       rcu_read_lock();
-       ret = task_freezer(task)->state & CGROUP_FREEZING;
-       rcu_read_unlock();
+       /*
+        * If the cgroup has to be frozen (CGRP_FREEZE bit set),
+        * and all tasks are frozen and/or stopped, let's consider
+        * the cgroup frozen. Otherwise it's not frozen.
+        */
+       frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
+               cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
 
-       return ret;
-}
+       if (frozen) {
+               /* Already there? */
+               if (test_bit(CGRP_FROZEN, &cgrp->flags))
+                       return;
 
-static const char *freezer_state_strs(unsigned int state)
-{
-       if (state & CGROUP_FROZEN)
-               return "FROZEN";
-       if (state & CGROUP_FREEZING)
-               return "FREEZING";
-       return "THAWED";
-};
-
-static struct cgroup_subsys_state *
-freezer_css_alloc(struct cgroup_subsys_state *parent_css)
-{
-       struct freezer *freezer;
+               set_bit(CGRP_FROZEN, &cgrp->flags);
+       } else {
+               /* Already there? */
+               if (!test_bit(CGRP_FROZEN, &cgrp->flags))
+                       return;
 
-       freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
-       if (!freezer)
-               return ERR_PTR(-ENOMEM);
+               clear_bit(CGRP_FROZEN, &cgrp->flags);
+       }
+       cgroup_file_notify(&cgrp->events_file);
+       TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
 
-       return &freezer->css;
+       /* Update the state of ancestor cgroups. */
+       cgroup_propagate_frozen(cgrp, frozen);
 }
 
-/**
- * freezer_css_online - commit creation of a freezer css
- * @css: css being created
- *
- * We're committing to creation of @css.  Mark it online and inherit
- * parent's freezing state while holding both parent's and our
- * freezer->lock.
+/*
+ * Increment cgroup's nr_frozen_tasks.
  */
-static int freezer_css_online(struct cgroup_subsys_state *css)
+static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
 {
-       struct freezer *freezer = css_freezer(css);
-       struct freezer *parent = parent_freezer(freezer);
-
-       mutex_lock(&freezer_mutex);
-
-       freezer->state |= CGROUP_FREEZER_ONLINE;
-
-       if (parent && (parent->state & CGROUP_FREEZING)) {
-               freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
-               atomic_inc(&system_freezing_cnt);
-       }
-
-       mutex_unlock(&freezer_mutex);
-       return 0;
+       cgrp->freezer.nr_frozen_tasks++;
 }
 
-/**
- * freezer_css_offline - initiate destruction of a freezer css
- * @css: css being destroyed
- *
- * @css is going away.  Mark it dead and decrement system_freezing_count if
- * it was holding one.
+/*
+ * Decrement cgroup's nr_frozen_tasks.
  */
-static void freezer_css_offline(struct cgroup_subsys_state *css)
+static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
 {
-       struct freezer *freezer = css_freezer(css);
-
-       mutex_lock(&freezer_mutex);
-
-       if (freezer->state & CGROUP_FREEZING)
-               atomic_dec(&system_freezing_cnt);
-
-       freezer->state = 0;
-
-       mutex_unlock(&freezer_mutex);
+       cgrp->freezer.nr_frozen_tasks--;
+       WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
 }
 
-static void freezer_css_free(struct cgroup_subsys_state *css)
+/*
+ * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
+ */
+void cgroup_enter_frozen(void)
 {
-       kfree(css_freezer(css));
+       struct cgroup *cgrp;
+
+       if (current->frozen)
+               return;
+
+       spin_lock_irq(&css_set_lock);
+       current->frozen = true;
+       cgrp = task_dfl_cgroup(current);
+       cgroup_inc_frozen_cnt(cgrp);
+       cgroup_update_frozen(cgrp);
+       spin_unlock_irq(&css_set_lock);
 }
 
 /*
- * Tasks can be migrated into a different freezer anytime regardless of its
- * current state.  freezer_attach() is responsible for making new tasks
- * conform to the current state.
+ * Conditionally leave frozen/stopped state. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
  *
- * Freezer state changes and task migration are synchronized via
- * @freezer->lock.  freezer_attach() makes the new tasks conform to the
- * current state and all following state changes can see the new tasks.
+ * If always_leave is not set, and the cgroup is freezing,
+ * we're racing with the cgroup freezing. In this case, we don't
+ * drop the frozen counter to avoid a transient switch to
+ * the unfrozen state.
  */
-static void freezer_attach(struct cgroup_taskset *tset)
+void cgroup_leave_frozen(bool always_leave)
 {
-       struct task_struct *task;
-       struct cgroup_subsys_state *new_css;
-
-       mutex_lock(&freezer_mutex);
-
-       /*
-        * Make the new tasks conform to the current state of @new_css.
-        * For simplicity, when migrating any task to a FROZEN cgroup, we
-        * revert it to FREEZING and let update_if_frozen() determine the
-        * correct state later.
-        *
-        * Tasks in @tset are on @new_css but may not conform to its
-        * current state before executing the following - !frozen tasks may
-        * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
-        */
-       cgroup_taskset_for_each(task, new_css, tset) {
-               struct freezer *freezer = css_freezer(new_css);
-
-               if (!(freezer->state & CGROUP_FREEZING)) {
-                       __thaw_task(task);
-               } else {
-                       freeze_task(task);
-                       /* clear FROZEN and propagate upwards */
-                       while (freezer && (freezer->state & CGROUP_FROZEN)) {
-                               freezer->state &= ~CGROUP_FROZEN;
-                               freezer = parent_freezer(freezer);
-                       }
-               }
+       struct cgroup *cgrp;
+
+       spin_lock_irq(&css_set_lock);
+       cgrp = task_dfl_cgroup(current);
+       if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
+               cgroup_dec_frozen_cnt(cgrp);
+               cgroup_update_frozen(cgrp);
+               WARN_ON_ONCE(!current->frozen);
+               current->frozen = false;
+       } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
+               spin_lock(&current->sighand->siglock);
+               current->jobctl |= JOBCTL_TRAP_FREEZE;
+               set_thread_flag(TIF_SIGPENDING);
+               spin_unlock(&current->sighand->siglock);
        }
-
-       mutex_unlock(&freezer_mutex);
+       spin_unlock_irq(&css_set_lock);
 }
 
-/**
- * freezer_fork - cgroup post fork callback
- * @task: a task which has just been forked
- *
- * @task has just been created and should conform to the current state of
- * the cgroup_freezer it belongs to.  This function may race against
- * freezer_attach().  Losing to freezer_attach() means that we don't have
- * to do anything as freezer_attach() will put @task into the appropriate
- * state.
+/*
+ * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
+ * jobctl bit.
  */
-static void freezer_fork(struct task_struct *task)
+static void cgroup_freeze_task(struct task_struct *task, bool freeze)
 {
-       struct freezer *freezer;
+       unsigned long flags;
 
-       /*
-        * The root cgroup is non-freezable, so we can skip locking the
-        * freezer.  This is safe regardless of race with task migration.
-        * If we didn't race or won, skipping is obviously the right thing
-        * to do.  If we lost and root is the new cgroup, noop is still the
-        * right thing to do.
-        */
-       if (task_css_is_root(task, freezer_cgrp_id))
+       /* If the task is about to die, don't bother with freezing it. */
+       if (!lock_task_sighand(task, &flags))
                return;
 
-       mutex_lock(&freezer_mutex);
-       rcu_read_lock();
-
-       freezer = task_freezer(task);
-       if (freezer->state & CGROUP_FREEZING)
-               freeze_task(task);
+       if (freeze) {
+               task->jobctl |= JOBCTL_TRAP_FREEZE;
+               signal_wake_up(task, false);
+       } else {
+               task->jobctl &= ~JOBCTL_TRAP_FREEZE;
+               wake_up_process(task);
+       }
 
-       rcu_read_unlock();
-       mutex_unlock(&freezer_mutex);
+       unlock_task_sighand(task, &flags);
 }
 
-/**
- * update_if_frozen - update whether a cgroup finished freezing
- * @css: css of interest
- *
- * Once FREEZING is initiated, transition to FROZEN is lazily updated by
- * calling this function.  If the current state is FREEZING but not FROZEN,
- * this function checks whether all tasks of this cgroup and the descendant
- * cgroups finished freezing and, if so, sets FROZEN.
- *
- * The caller is responsible for grabbing RCU read lock and calling
- * update_if_frozen() on all descendants prior to invoking this function.
- *
- * Task states and freezer state might disagree while tasks are being
- * migrated into or out of @css, so we can't verify task states against
- * @freezer state here.  See freezer_attach() for details.
+/*
+ * Freeze or unfreeze all tasks in the given cgroup.
  */
-static void update_if_frozen(struct cgroup_subsys_state *css)
+static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
 {
-       struct freezer *freezer = css_freezer(css);
-       struct cgroup_subsys_state *pos;
        struct css_task_iter it;
        struct task_struct *task;
 
-       lockdep_assert_held(&freezer_mutex);
-
-       if (!(freezer->state & CGROUP_FREEZING) ||
-           (freezer->state & CGROUP_FROZEN))
-               return;
+       lockdep_assert_held(&cgroup_mutex);
 
-       /* are all (live) children frozen? */
-       rcu_read_lock();
-       css_for_each_child(pos, css) {
-               struct freezer *child = css_freezer(pos);
-
-               if ((child->state & CGROUP_FREEZER_ONLINE) &&
-                   !(child->state & CGROUP_FROZEN)) {
-                       rcu_read_unlock();
-                       return;
-               }
-       }
-       rcu_read_unlock();
+       spin_lock_irq(&css_set_lock);
+       if (freeze)
+               set_bit(CGRP_FREEZE, &cgrp->flags);
+       else
+               clear_bit(CGRP_FREEZE, &cgrp->flags);
+       spin_unlock_irq(&css_set_lock);
 
-       /* are all tasks frozen? */
-       css_task_iter_start(css, 0, &it);
+       if (freeze)
+               TRACE_CGROUP_PATH(freeze, cgrp);
+       else
+               TRACE_CGROUP_PATH(unfreeze, cgrp);
 
+       css_task_iter_start(&cgrp->self, 0, &it);
        while ((task = css_task_iter_next(&it))) {
-               if (freezing(task)) {
-                       /*
-                        * freezer_should_skip() indicates that the task
-                        * should be skipped when determining freezing
-                        * completion.  Consider it frozen in addition to
-                        * the usual frozen condition.
-                        */
-                       if (!frozen(task) && !freezer_should_skip(task))
-                               goto out_iter_end;
-               }
-       }
-
-       freezer->state |= CGROUP_FROZEN;
-out_iter_end:
-       css_task_iter_end(&it);
-}
-
-static int freezer_read(struct seq_file *m, void *v)
-{
-       struct cgroup_subsys_state *css = seq_css(m), *pos;
-
-       mutex_lock(&freezer_mutex);
-       rcu_read_lock();
-
-       /* update states bottom-up */
-       css_for_each_descendant_post(pos, css) {
-               if (!css_tryget_online(pos))
+               /*
+                * Ignore kernel threads here. Freezing cgroups containing
+                * kthreads isn't supported.
+                */
+               if (task->flags & PF_KTHREAD)
                        continue;
-               rcu_read_unlock();
-
-               update_if_frozen(pos);
-
-               rcu_read_lock();
-               css_put(pos);
+               cgroup_freeze_task(task, freeze);
        }
-
-       rcu_read_unlock();
-       mutex_unlock(&freezer_mutex);
-
-       seq_puts(m, freezer_state_strs(css_freezer(css)->state));
-       seq_putc(m, '\n');
-       return 0;
-}
-
-static void freeze_cgroup(struct freezer *freezer)
-{
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       css_task_iter_start(&freezer->css, 0, &it);
-       while ((task = css_task_iter_next(&it)))
-               freeze_task(task);
        css_task_iter_end(&it);
-}
 
-static void unfreeze_cgroup(struct freezer *freezer)
-{
-       struct css_task_iter it;
-       struct task_struct *task;
-
-       css_task_iter_start(&freezer->css, 0, &it);
-       while ((task = css_task_iter_next(&it)))
-               __thaw_task(task);
-       css_task_iter_end(&it);
+       /*
+        * Cgroup state should be revisited here to cover empty leaf cgroups
+        * and cgroups which descendants are already in the desired state.
+        */
+       spin_lock_irq(&css_set_lock);
+       if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
+               cgroup_update_frozen(cgrp);
+       spin_unlock_irq(&css_set_lock);
 }
 
-/**
- * freezer_apply_state - apply state change to a single cgroup_freezer
- * @freezer: freezer to apply state change to
- * @freeze: whether to freeze or unfreeze
- * @state: CGROUP_FREEZING_* flag to set or clear
- *
- * Set or clear @state on @cgroup according to @freeze, and perform
- * freezing or thawing as necessary.
+/*
+ * Adjust the task state (freeze or unfreeze) and revisit the state of
+ * source and destination cgroups.
  */
-static void freezer_apply_state(struct freezer *freezer, bool freeze,
-                               unsigned int state)
+void cgroup_freezer_migrate_task(struct task_struct *task,
+                                struct cgroup *src, struct cgroup *dst)
 {
-       /* also synchronizes against task migration, see freezer_attach() */
-       lockdep_assert_held(&freezer_mutex);
+       lockdep_assert_held(&css_set_lock);
 
-       if (!(freezer->state & CGROUP_FREEZER_ONLINE))
+       /*
+        * Kernel threads are not supposed to be frozen at all.
+        */
+       if (task->flags & PF_KTHREAD)
                return;
 
-       if (freeze) {
-               if (!(freezer->state & CGROUP_FREEZING))
-                       atomic_inc(&system_freezing_cnt);
-               freezer->state |= state;
-               freeze_cgroup(freezer);
-       } else {
-               bool was_freezing = freezer->state & CGROUP_FREEZING;
-
-               freezer->state &= ~state;
-
-               if (!(freezer->state & CGROUP_FREEZING)) {
-                       if (was_freezing)
-                               atomic_dec(&system_freezing_cnt);
-                       freezer->state &= ~CGROUP_FROZEN;
-                       unfreeze_cgroup(freezer);
-               }
+       /*
+        * Adjust counters of freezing and frozen tasks.
+        * Note, that if the task is frozen, but the destination cgroup is not
+        * frozen, we bump both counters to keep them balanced.
+        */
+       if (task->frozen) {
+               cgroup_inc_frozen_cnt(dst);
+               cgroup_dec_frozen_cnt(src);
        }
-}
-
-/**
- * freezer_change_state - change the freezing state of a cgroup_freezer
- * @freezer: freezer of interest
- * @freeze: whether to freeze or thaw
- *
- * Freeze or thaw @freezer according to @freeze.  The operations are
- * recursive - all descendants of @freezer will be affected.
- */
-static void freezer_change_state(struct freezer *freezer, bool freeze)
-{
-       struct cgroup_subsys_state *pos;
+       cgroup_update_frozen(dst);
+       cgroup_update_frozen(src);
 
        /*
-        * Update all its descendants in pre-order traversal.  Each
-        * descendant will try to inherit its parent's FREEZING state as
-        * CGROUP_FREEZING_PARENT.
+        * Force the task to the desired state.
         */
-       mutex_lock(&freezer_mutex);
-       rcu_read_lock();
-       css_for_each_descendant_pre(pos, &freezer->css) {
-               struct freezer *pos_f = css_freezer(pos);
-               struct freezer *parent = parent_freezer(pos_f);
-
-               if (!css_tryget_online(pos))
-                       continue;
-               rcu_read_unlock();
-
-               if (pos_f == freezer)
-                       freezer_apply_state(pos_f, freeze,
-                                           CGROUP_FREEZING_SELF);
-               else
-                       freezer_apply_state(pos_f,
-                                           parent->state & CGROUP_FREEZING,
-                                           CGROUP_FREEZING_PARENT);
-
-               rcu_read_lock();
-               css_put(pos);
-       }
-       rcu_read_unlock();
-       mutex_unlock(&freezer_mutex);
+       cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
 }
 
-static ssize_t freezer_write(struct kernfs_open_file *of,
-                            char *buf, size_t nbytes, loff_t off)
+void cgroup_freeze(struct cgroup *cgrp, bool freeze)
 {
-       bool freeze;
+       struct cgroup_subsys_state *css;
+       struct cgroup *dsct;
+       bool applied = false;
 
-       buf = strstrip(buf);
+       lockdep_assert_held(&cgroup_mutex);
 
-       if (strcmp(buf, freezer_state_strs(0)) == 0)
-               freeze = false;
-       else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
-               freeze = true;
-       else
-               return -EINVAL;
+       /*
+        * Nothing changed? Just exit.
+        */
+       if (cgrp->freezer.freeze == freeze)
+               return;
 
-       freezer_change_state(css_freezer(of_css(of)), freeze);
-       return nbytes;
-}
+       cgrp->freezer.freeze = freeze;
 
-static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
-                                     struct cftype *cft)
-{
-       struct freezer *freezer = css_freezer(css);
+       /*
+        * Propagate changes downwards the cgroup tree.
+        */
+       css_for_each_descendant_pre(css, &cgrp->self) {
+               dsct = css->cgroup;
 
-       return (bool)(freezer->state & CGROUP_FREEZING_SELF);
-}
+               if (cgroup_is_dead(dsct))
+                       continue;
 
-static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
-                                       struct cftype *cft)
-{
-       struct freezer *freezer = css_freezer(css);
+               if (freeze) {
+                       dsct->freezer.e_freeze++;
+                       /*
+                        * Already frozen because of ancestor's settings?
+                        */
+                       if (dsct->freezer.e_freeze > 1)
+                               continue;
+               } else {
+                       dsct->freezer.e_freeze--;
+                       /*
+                        * Still frozen because of ancestor's settings?
+                        */
+                       if (dsct->freezer.e_freeze > 0)
+                               continue;
 
-       return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
-}
+                       WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
+               }
+
+               /*
+                * Do change actual state: freeze or unfreeze.
+                */
+               cgroup_do_freeze(dsct, freeze);
+               applied = true;
+       }
 
-static struct cftype files[] = {
-       {
-               .name = "state",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .seq_show = freezer_read,
-               .write = freezer_write,
-       },
-       {
-               .name = "self_freezing",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .read_u64 = freezer_self_freezing_read,
-       },
-       {
-               .name = "parent_freezing",
-               .flags = CFTYPE_NOT_ON_ROOT,
-               .read_u64 = freezer_parent_freezing_read,
-       },
-       { }     /* terminate */
-};
-
-struct cgroup_subsys freezer_cgrp_subsys = {
-       .css_alloc      = freezer_css_alloc,
-       .css_online     = freezer_css_online,
-       .css_offline    = freezer_css_offline,
-       .css_free       = freezer_css_free,
-       .attach         = freezer_attach,
-       .fork           = freezer_fork,
-       .legacy_cftypes = files,
-};
+       /*
+        * Even if the actual state hasn't changed, let's notify a user.
+        * The state can be enforced by an ancestor cgroup: the cgroup
+        * can already be in the desired state or it can be locked in the
+        * opposite state, so that the transition will never happen.
+        * In both cases it's better to notify a user, that there is
+        * nothing to wait for.
+        */
+       if (!applied) {
+               TRACE_CGROUP_PATH(notify_frozen, cgrp,
+                                 test_bit(CGRP_FROZEN, &cgrp->flags));
+               cgroup_file_notify(&cgrp->events_file);
+       }
+}
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
new file mode 100644 (file)
index 0000000..0823679
--- /dev/null
@@ -0,0 +1,481 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+/*
+ * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
+ * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
+ * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING
+ * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of
+ * its ancestors has FREEZING_SELF set.
+ */
+enum freezer_state_flags {
+       CGROUP_FREEZER_ONLINE   = (1 << 0), /* freezer is fully online */
+       CGROUP_FREEZING_SELF    = (1 << 1), /* this freezer is freezing */
+       CGROUP_FREEZING_PARENT  = (1 << 2), /* the parent freezer is freezing */
+       CGROUP_FROZEN           = (1 << 3), /* this and its descendants frozen */
+
+       /* mask for all FREEZING flags */
+       CGROUP_FREEZING         = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
+};
+
+struct freezer {
+       struct cgroup_subsys_state      css;
+       unsigned int                    state;
+};
+
+static DEFINE_MUTEX(freezer_mutex);
+
+static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
+{
+       return css ? container_of(css, struct freezer, css) : NULL;
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+       return css_freezer(task_css(task, freezer_cgrp_id));
+}
+
+static struct freezer *parent_freezer(struct freezer *freezer)
+{
+       return css_freezer(freezer->css.parent);
+}
+
+bool cgroup_freezing(struct task_struct *task)
+{
+       bool ret;
+
+       rcu_read_lock();
+       ret = task_freezer(task)->state & CGROUP_FREEZING;
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static const char *freezer_state_strs(unsigned int state)
+{
+       if (state & CGROUP_FROZEN)
+               return "FROZEN";
+       if (state & CGROUP_FREEZING)
+               return "FREEZING";
+       return "THAWED";
+};
+
+static struct cgroup_subsys_state *
+freezer_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+       struct freezer *freezer;
+
+       freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+       if (!freezer)
+               return ERR_PTR(-ENOMEM);
+
+       return &freezer->css;
+}
+
+/**
+ * freezer_css_online - commit creation of a freezer css
+ * @css: css being created
+ *
+ * We're committing to creation of @css.  Mark it online and inherit
+ * parent's freezing state while holding both parent's and our
+ * freezer->lock.
+ */
+static int freezer_css_online(struct cgroup_subsys_state *css)
+{
+       struct freezer *freezer = css_freezer(css);
+       struct freezer *parent = parent_freezer(freezer);
+
+       mutex_lock(&freezer_mutex);
+
+       freezer->state |= CGROUP_FREEZER_ONLINE;
+
+       if (parent && (parent->state & CGROUP_FREEZING)) {
+               freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
+               atomic_inc(&system_freezing_cnt);
+       }
+
+       mutex_unlock(&freezer_mutex);
+       return 0;
+}
+
+/**
+ * freezer_css_offline - initiate destruction of a freezer css
+ * @css: css being destroyed
+ *
+ * @css is going away.  Mark it dead and decrement system_freezing_count if
+ * it was holding one.
+ */
+static void freezer_css_offline(struct cgroup_subsys_state *css)
+{
+       struct freezer *freezer = css_freezer(css);
+
+       mutex_lock(&freezer_mutex);
+
+       if (freezer->state & CGROUP_FREEZING)
+               atomic_dec(&system_freezing_cnt);
+
+       freezer->state = 0;
+
+       mutex_unlock(&freezer_mutex);
+}
+
+static void freezer_css_free(struct cgroup_subsys_state *css)
+{
+       kfree(css_freezer(css));
+}
+
+/*
+ * Tasks can be migrated into a different freezer anytime regardless of its
+ * current state.  freezer_attach() is responsible for making new tasks
+ * conform to the current state.
+ *
+ * Freezer state changes and task migration are synchronized via
+ * @freezer->lock.  freezer_attach() makes the new tasks conform to the
+ * current state and all following state changes can see the new tasks.
+ */
+static void freezer_attach(struct cgroup_taskset *tset)
+{
+       struct task_struct *task;
+       struct cgroup_subsys_state *new_css;
+
+       mutex_lock(&freezer_mutex);
+
+       /*
+        * Make the new tasks conform to the current state of @new_css.
+        * For simplicity, when migrating any task to a FROZEN cgroup, we
+        * revert it to FREEZING and let update_if_frozen() determine the
+        * correct state later.
+        *
+        * Tasks in @tset are on @new_css but may not conform to its
+        * current state before executing the following - !frozen tasks may
+        * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
+        */
+       cgroup_taskset_for_each(task, new_css, tset) {
+               struct freezer *freezer = css_freezer(new_css);
+
+               if (!(freezer->state & CGROUP_FREEZING)) {
+                       __thaw_task(task);
+               } else {
+                       freeze_task(task);
+                       /* clear FROZEN and propagate upwards */
+                       while (freezer && (freezer->state & CGROUP_FROZEN)) {
+                               freezer->state &= ~CGROUP_FROZEN;
+                               freezer = parent_freezer(freezer);
+                       }
+               }
+       }
+
+       mutex_unlock(&freezer_mutex);
+}
+
+/**
+ * freezer_fork - cgroup post fork callback
+ * @task: a task which has just been forked
+ *
+ * @task has just been created and should conform to the current state of
+ * the cgroup_freezer it belongs to.  This function may race against
+ * freezer_attach().  Losing to freezer_attach() means that we don't have
+ * to do anything as freezer_attach() will put @task into the appropriate
+ * state.
+ */
+static void freezer_fork(struct task_struct *task)
+{
+       struct freezer *freezer;
+
+       /*
+        * The root cgroup is non-freezable, so we can skip locking the
+        * freezer.  This is safe regardless of race with task migration.
+        * If we didn't race or won, skipping is obviously the right thing
+        * to do.  If we lost and root is the new cgroup, noop is still the
+        * right thing to do.
+        */
+       if (task_css_is_root(task, freezer_cgrp_id))
+               return;
+
+       mutex_lock(&freezer_mutex);
+       rcu_read_lock();
+
+       freezer = task_freezer(task);
+       if (freezer->state & CGROUP_FREEZING)
+               freeze_task(task);
+
+       rcu_read_unlock();
+       mutex_unlock(&freezer_mutex);
+}
+
+/**
+ * update_if_frozen - update whether a cgroup finished freezing
+ * @css: css of interest
+ *
+ * Once FREEZING is initiated, transition to FROZEN is lazily updated by
+ * calling this function.  If the current state is FREEZING but not FROZEN,
+ * this function checks whether all tasks of this cgroup and the descendant
+ * cgroups finished freezing and, if so, sets FROZEN.
+ *
+ * The caller is responsible for grabbing RCU read lock and calling
+ * update_if_frozen() on all descendants prior to invoking this function.
+ *
+ * Task states and freezer state might disagree while tasks are being
+ * migrated into or out of @css, so we can't verify task states against
+ * @freezer state here.  See freezer_attach() for details.
+ */
+static void update_if_frozen(struct cgroup_subsys_state *css)
+{
+       struct freezer *freezer = css_freezer(css);
+       struct cgroup_subsys_state *pos;
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       lockdep_assert_held(&freezer_mutex);
+
+       if (!(freezer->state & CGROUP_FREEZING) ||
+           (freezer->state & CGROUP_FROZEN))
+               return;
+
+       /* are all (live) children frozen? */
+       rcu_read_lock();
+       css_for_each_child(pos, css) {
+               struct freezer *child = css_freezer(pos);
+
+               if ((child->state & CGROUP_FREEZER_ONLINE) &&
+                   !(child->state & CGROUP_FROZEN)) {
+                       rcu_read_unlock();
+                       return;
+               }
+       }
+       rcu_read_unlock();
+
+       /* are all tasks frozen? */
+       css_task_iter_start(css, 0, &it);
+
+       while ((task = css_task_iter_next(&it))) {
+               if (freezing(task)) {
+                       /*
+                        * freezer_should_skip() indicates that the task
+                        * should be skipped when determining freezing
+                        * completion.  Consider it frozen in addition to
+                        * the usual frozen condition.
+                        */
+                       if (!frozen(task) && !freezer_should_skip(task))
+                               goto out_iter_end;
+               }
+       }
+
+       freezer->state |= CGROUP_FROZEN;
+out_iter_end:
+       css_task_iter_end(&it);
+}
+
+static int freezer_read(struct seq_file *m, void *v)
+{
+       struct cgroup_subsys_state *css = seq_css(m), *pos;
+
+       mutex_lock(&freezer_mutex);
+       rcu_read_lock();
+
+       /* update states bottom-up */
+       css_for_each_descendant_post(pos, css) {
+               if (!css_tryget_online(pos))
+                       continue;
+               rcu_read_unlock();
+
+               update_if_frozen(pos);
+
+               rcu_read_lock();
+               css_put(pos);
+       }
+
+       rcu_read_unlock();
+       mutex_unlock(&freezer_mutex);
+
+       seq_puts(m, freezer_state_strs(css_freezer(css)->state));
+       seq_putc(m, '\n');
+       return 0;
+}
+
+static void freeze_cgroup(struct freezer *freezer)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&freezer->css, 0, &it);
+       while ((task = css_task_iter_next(&it)))
+               freeze_task(task);
+       css_task_iter_end(&it);
+}
+
+static void unfreeze_cgroup(struct freezer *freezer)
+{
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&freezer->css, 0, &it);
+       while ((task = css_task_iter_next(&it)))
+               __thaw_task(task);
+       css_task_iter_end(&it);
+}
+
+/**
+ * freezer_apply_state - apply state change to a single cgroup_freezer
+ * @freezer: freezer to apply state change to
+ * @freeze: whether to freeze or unfreeze
+ * @state: CGROUP_FREEZING_* flag to set or clear
+ *
+ * Set or clear @state on @cgroup according to @freeze, and perform
+ * freezing or thawing as necessary.
+ */
+static void freezer_apply_state(struct freezer *freezer, bool freeze,
+                               unsigned int state)
+{
+       /* also synchronizes against task migration, see freezer_attach() */
+       lockdep_assert_held(&freezer_mutex);
+
+       if (!(freezer->state & CGROUP_FREEZER_ONLINE))
+               return;
+
+       if (freeze) {
+               if (!(freezer->state & CGROUP_FREEZING))
+                       atomic_inc(&system_freezing_cnt);
+               freezer->state |= state;
+               freeze_cgroup(freezer);
+       } else {
+               bool was_freezing = freezer->state & CGROUP_FREEZING;
+
+               freezer->state &= ~state;
+
+               if (!(freezer->state & CGROUP_FREEZING)) {
+                       if (was_freezing)
+                               atomic_dec(&system_freezing_cnt);
+                       freezer->state &= ~CGROUP_FROZEN;
+                       unfreeze_cgroup(freezer);
+               }
+       }
+}
+
+/**
+ * freezer_change_state - change the freezing state of a cgroup_freezer
+ * @freezer: freezer of interest
+ * @freeze: whether to freeze or thaw
+ *
+ * Freeze or thaw @freezer according to @freeze.  The operations are
+ * recursive - all descendants of @freezer will be affected.
+ */
+static void freezer_change_state(struct freezer *freezer, bool freeze)
+{
+       struct cgroup_subsys_state *pos;
+
+       /*
+        * Update all its descendants in pre-order traversal.  Each
+        * descendant will try to inherit its parent's FREEZING state as
+        * CGROUP_FREEZING_PARENT.
+        */
+       mutex_lock(&freezer_mutex);
+       rcu_read_lock();
+       css_for_each_descendant_pre(pos, &freezer->css) {
+               struct freezer *pos_f = css_freezer(pos);
+               struct freezer *parent = parent_freezer(pos_f);
+
+               if (!css_tryget_online(pos))
+                       continue;
+               rcu_read_unlock();
+
+               if (pos_f == freezer)
+                       freezer_apply_state(pos_f, freeze,
+                                           CGROUP_FREEZING_SELF);
+               else
+                       freezer_apply_state(pos_f,
+                                           parent->state & CGROUP_FREEZING,
+                                           CGROUP_FREEZING_PARENT);
+
+               rcu_read_lock();
+               css_put(pos);
+       }
+       rcu_read_unlock();
+       mutex_unlock(&freezer_mutex);
+}
+
+static ssize_t freezer_write(struct kernfs_open_file *of,
+                            char *buf, size_t nbytes, loff_t off)
+{
+       bool freeze;
+
+       buf = strstrip(buf);
+
+       if (strcmp(buf, freezer_state_strs(0)) == 0)
+               freeze = false;
+       else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
+               freeze = true;
+       else
+               return -EINVAL;
+
+       freezer_change_state(css_freezer(of_css(of)), freeze);
+       return nbytes;
+}
+
+static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
+                                     struct cftype *cft)
+{
+       struct freezer *freezer = css_freezer(css);
+
+       return (bool)(freezer->state & CGROUP_FREEZING_SELF);
+}
+
+static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
+                                       struct cftype *cft)
+{
+       struct freezer *freezer = css_freezer(css);
+
+       return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
+}
+
+static struct cftype files[] = {
+       {
+               .name = "state",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = freezer_read,
+               .write = freezer_write,
+       },
+       {
+               .name = "self_freezing",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = freezer_self_freezing_read,
+       },
+       {
+               .name = "parent_freezing",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = freezer_parent_freezing_read,
+       },
+       { }     /* terminate */
+};
+
+struct cgroup_subsys freezer_cgrp_subsys = {
+       .css_alloc      = freezer_css_alloc,
+       .css_online     = freezer_css_online,
+       .css_offline    = freezer_css_offline,
+       .css_free       = freezer_css_free,
+       .attach         = freezer_attach,
+       .fork           = freezer_fork,
+       .legacy_cftypes = files,
+};
index 8b03d93ba06828a50fcdd747a3abb829f6ba4a28..5359facf98675d7746e29f3883b07be4e242dca1 100644 (file)
@@ -1225,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child,
        int killed;
 
        freezer_do_not_count();
+       cgroup_enter_frozen();
        killed = wait_for_completion_killable(vfork);
+       cgroup_leave_frozen(false);
        freezer_count();
 
        if (killed) {
index cd83cc3767670f6fbe43577240922652df66d870..62f9aea4a15a0f6295145626ee5597970785d7d3 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/compiler.h>
 #include <linux/posix-timers.h>
 #include <linux/livepatch.h>
+#include <linux/cgroup.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 static bool recalc_sigpending_tsk(struct task_struct *t)
 {
-       if ((t->jobctl & JOBCTL_PENDING_MASK) ||
+       if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
            PENDING(&t->pending, &t->blocked) ||
-           PENDING(&t->signal->shared_pending, &t->blocked)) {
+           PENDING(&t->signal->shared_pending, &t->blocked) ||
+           cgroup_task_frozen(t)) {
                set_tsk_thread_flag(t, TIF_SIGPENDING);
                return true;
        }
@@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
                preempt_disable();
                read_unlock(&tasklist_lock);
                preempt_enable_no_resched();
+               cgroup_enter_frozen();
                freezable_schedule();
        } else {
                /*
@@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr)
                }
 
                /* Now we don't run again until woken by SIGCONT or SIGKILL */
+               cgroup_enter_frozen();
                freezable_schedule();
                return true;
        } else {
@@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void)
        }
 }
 
+/**
+ * do_freezer_trap - handle the freezer jobctl trap
+ *
+ * Puts the task into frozen state, if only the task is not about to quit.
+ * In this case it drops JOBCTL_TRAP_FREEZE.
+ *
+ * CONTEXT:
+ * Must be called with @current->sighand->siglock held,
+ * which is always released before returning.
+ */
+static void do_freezer_trap(void)
+       __releases(&current->sighand->siglock)
+{
+       /*
+        * If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
+        * let's make another loop to give it a chance to be handled.
+        * In any case, we'll return back.
+        */
+       if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
+            JOBCTL_TRAP_FREEZE) {
+               spin_unlock_irq(&current->sighand->siglock);
+               return;
+       }
+
+       /*
+        * Now we're sure that there is no pending fatal signal and no
+        * pending traps. Clear TIF_SIGPENDING to not get out of schedule()
+        * immediately (if there is a non-fatal signal pending), and
+        * put the task into sleep.
+        */
+       __set_current_state(TASK_INTERRUPTIBLE);
+       clear_thread_flag(TIF_SIGPENDING);
+       spin_unlock_irq(&current->sighand->siglock);
+       cgroup_enter_frozen();
+       freezable_schedule();
+}
+
 static int ptrace_signal(int signr, kernel_siginfo_t *info)
 {
        /*
@@ -2452,9 +2493,24 @@ relock:
                    do_signal_stop(0))
                        goto relock;
 
-               if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
-                       do_jobctl_trap();
+               if (unlikely(current->jobctl &
+                            (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
+                       if (current->jobctl & JOBCTL_TRAP_MASK) {
+                               do_jobctl_trap();
+                               spin_unlock_irq(&sighand->siglock);
+                       } else if (current->jobctl & JOBCTL_TRAP_FREEZE)
+                               do_freezer_trap();
+
+                       goto relock;
+               }
+
+               /*
+                * If the task is leaving the frozen state, let's update
+                * cgroup counters and reset the frozen bit.
+                */
+               if (unlikely(cgroup_task_frozen(current))) {
                        spin_unlock_irq(&sighand->siglock);
+                       cgroup_leave_frozen(false);
                        goto relock;
                }
 
@@ -2550,6 +2606,8 @@ relock:
 
        fatal:
                spin_unlock_irq(&sighand->siglock);
+               if (unlikely(cgroup_task_frozen(current)))
+                       cgroup_leave_frozen(true);
 
                /*
                 * Anything else is fatal, maybe with a core dump.
index adacda50a4b211e64bb40489487e9727402806a8..7f9835624793f061575023eee9bed9a2788e8f3c 100644 (file)
@@ -1,2 +1,3 @@
 test_memcontrol
 test_core
+test_freezer
index 23fbaa4a9630b2176bd47fda3b902db51ab6c6d7..8d369b6a20698035297983cd2c7e6e6643f7de2b 100644 (file)
@@ -5,8 +5,10 @@ all:
 
 TEST_GEN_PROGS = test_memcontrol
 TEST_GEN_PROGS += test_core
+TEST_GEN_PROGS += test_freezer
 
 include ../lib.mk
 
 $(OUTPUT)/test_memcontrol: cgroup_util.c
 $(OUTPUT)/test_core: cgroup_util.c
+$(OUTPUT)/test_freezer: cgroup_util.c
index 14c9fe2848062f0c2a8c37004f087d07b1d976f6..4c223266299aa7c90c1288bcbed7fd891f802f3d 100644 (file)
@@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index)
        return ret;
 }
 
+char *cg_control(const char *cgroup, const char *control)
+{
+       size_t len = strlen(cgroup) + strlen(control) + 2;
+       char *ret = malloc(len);
+
+       snprintf(ret, len, "%s/%s", cgroup, control);
+
+       return ret;
+}
+
 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
 {
        char path[PATH_MAX];
@@ -196,7 +206,32 @@ int cg_create(const char *cgroup)
        return mkdir(cgroup, 0644);
 }
 
-static int cg_killall(const char *cgroup)
+int cg_wait_for_proc_count(const char *cgroup, int count)
+{
+       char buf[10 * PAGE_SIZE] = {0};
+       int attempts;
+       char *ptr;
+
+       for (attempts = 10; attempts >= 0; attempts--) {
+               int nr = 0;
+
+               if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+                       break;
+
+               for (ptr = buf; *ptr; ptr++)
+                       if (*ptr == '\n')
+                               nr++;
+
+               if (nr >= count)
+                       return 0;
+
+               usleep(100000);
+       }
+
+       return -1;
+}
+
+int cg_killall(const char *cgroup)
 {
        char buf[PAGE_SIZE];
        char *ptr = buf;
@@ -227,9 +262,7 @@ int cg_destroy(const char *cgroup)
 retry:
        ret = rmdir(cgroup);
        if (ret && errno == EBUSY) {
-               ret = cg_killall(cgroup);
-               if (ret)
-                       return ret;
+               cg_killall(cgroup);
                usleep(100);
                goto retry;
        }
@@ -240,6 +273,14 @@ retry:
        return ret;
 }
 
+int cg_enter(const char *cgroup, int pid)
+{
+       char pidbuf[64];
+
+       snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
+       return cg_write(cgroup, "cgroup.procs", pidbuf);
+}
+
 int cg_enter_current(const char *cgroup)
 {
        char pidbuf[64];
@@ -369,3 +410,12 @@ int set_oom_adj_score(int pid, int score)
        close(fd);
        return 0;
 }
+
+char proc_read_text(int pid, const char *item, char *buf, size_t size)
+{
+       char path[PATH_MAX];
+
+       snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
+
+       return read_text(path, buf, size);
+}
index 9ac8b7958f83b26a268f54f19acb6aeeda391b75..c72f28046bfa28e502a9c200d02559bdade4836c 100644 (file)
@@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err)
 extern int cg_find_unified_root(char *root, size_t len);
 extern char *cg_name(const char *root, const char *name);
 extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern char *cg_control(const char *cgroup, const char *control);
 extern int cg_create(const char *cgroup);
 extern int cg_destroy(const char *cgroup);
 extern int cg_read(const char *cgroup, const char *control,
@@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf);
 extern int cg_run(const char *cgroup,
                  int (*fn)(const char *cgroup, void *arg),
                  void *arg);
+extern int cg_enter(const char *cgroup, int pid);
 extern int cg_enter_current(const char *cgroup);
 extern int cg_run_nowait(const char *cgroup,
                         int (*fn)(const char *cgroup, void *arg),
@@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size);
 extern int alloc_anon(const char *cgroup, void *arg);
 extern int is_swap_enabled(void);
 extern int set_oom_adj_score(int pid, int score);
+extern int cg_wait_for_proc_count(const char *cgroup, int count);
+extern int cg_killall(const char *cgroup);
+extern char proc_read_text(int pid, const char *item, char *buf, size_t size);
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
new file mode 100644 (file)
index 0000000..2bfddb6
--- /dev/null
@@ -0,0 +1,851 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdbool.h>
+#include <linux/limits.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+#define DEBUG
+#ifdef DEBUG
+#define debug(args...) fprintf(stderr, args)
+#else
+#define debug(args...)
+#endif
+
+/*
+ * Check if the cgroup is frozen by looking at the cgroup.events::frozen value.
+ */
+static int cg_check_frozen(const char *cgroup, bool frozen)
+{
+       if (frozen) {
+               if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) {
+                       debug("Cgroup %s isn't frozen\n", cgroup);
+                       return -1;
+               }
+       } else {
+               /*
+                * Check the cgroup.events::frozen value.
+                */
+               if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) {
+                       debug("Cgroup %s is frozen\n", cgroup);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Freeze the given cgroup.
+ */
+static int cg_freeze_nowait(const char *cgroup, bool freeze)
+{
+       return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0");
+}
+
+/*
+ * Prepare for waiting on cgroup.events file.
+ */
+static int cg_prepare_for_wait(const char *cgroup)
+{
+       int fd, ret = -1;
+
+       fd = inotify_init1(0);
+       if (fd == -1) {
+               debug("Error: inotify_init1() failed\n");
+               return fd;
+       }
+
+       ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
+                               IN_MODIFY);
+       if (ret == -1) {
+               debug("Error: inotify_add_watch() failed\n");
+               close(fd);
+       }
+
+       return fd;
+}
+
+/*
+ * Wait for an event. If there are no events for 10 seconds,
+ * treat this an error.
+ */
+static int cg_wait_for(int fd)
+{
+       int ret = -1;
+       struct pollfd fds = {
+               .fd = fd,
+               .events = POLLIN,
+       };
+
+       while (true) {
+               ret = poll(&fds, 1, 10000);
+
+               if (ret == -1) {
+                       if (errno == EINTR)
+                               continue;
+                       debug("Error: poll() failed\n");
+                       break;
+               }
+
+               if (ret > 0 && fds.revents & POLLIN) {
+                       ret = 0;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * Attach a task to the given cgroup and wait for a cgroup frozen event.
+ * All transient events (e.g. populated) are ignored.
+ */
+static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid,
+                                       bool frozen)
+{
+       int fd, ret = -1;
+       int attempts;
+
+       fd = cg_prepare_for_wait(cgroup);
+       if (fd < 0)
+               return fd;
+
+       ret = cg_enter(cgroup, pid);
+       if (ret)
+               goto out;
+
+       for (attempts = 0; attempts < 10; attempts++) {
+               ret = cg_wait_for(fd);
+               if (ret)
+                       break;
+
+               ret = cg_check_frozen(cgroup, frozen);
+               if (ret)
+                       continue;
+       }
+
+out:
+       close(fd);
+       return ret;
+}
+
+/*
+ * Freeze the given cgroup and wait for the inotify signal.
+ * If there are no events in 10 seconds, treat this as an error.
+ * Then check that the cgroup is in the desired state.
+ */
+static int cg_freeze_wait(const char *cgroup, bool freeze)
+{
+       int fd, ret = -1;
+
+       fd = cg_prepare_for_wait(cgroup);
+       if (fd < 0)
+               return fd;
+
+       ret = cg_freeze_nowait(cgroup, freeze);
+       if (ret) {
+               debug("Error: cg_freeze_nowait() failed\n");
+               goto out;
+       }
+
+       ret = cg_wait_for(fd);
+       if (ret)
+               goto out;
+
+       ret = cg_check_frozen(cgroup, freeze);
+out:
+       close(fd);
+       return ret;
+}
+
+/*
+ * A simple process running in a sleep loop until being
+ * re-parented.
+ */
+static int child_fn(const char *cgroup, void *arg)
+{
+       int ppid = getppid();
+
+       while (getppid() == ppid)
+               usleep(1000);
+
+       return getppid() == ppid;
+}
+
+/*
+ * A simple test for the cgroup freezer: populated the cgroup with 100
+ * running processes and freeze it. Then unfreeze it. Then it kills all
+ * processes and destroys the cgroup.
+ */
+static int test_cgfreezer_simple(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cgroup = NULL;
+       int i;
+
+       cgroup = cg_name(root, "cg_test_simple");
+       if (!cgroup)
+               goto cleanup;
+
+       if (cg_create(cgroup))
+               goto cleanup;
+
+       for (i = 0; i < 100; i++)
+               cg_run_nowait(cgroup, child_fn, NULL);
+
+       if (cg_wait_for_proc_count(cgroup, 100))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup, false))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, true))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, false))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup)
+               cg_destroy(cgroup);
+       free(cgroup);
+       return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ *       A
+ *    / / \ \
+ *   B  E  I K
+ *  /\  |
+ * C  D F
+ *      |
+ *      G
+ *      |
+ *      H
+ *
+ * with a process in C, H and 3 processes in K.
+ * Then it tries to freeze and unfreeze the whole tree.
+ */
+static int test_cgfreezer_tree(const char *root)
+{
+       char *cgroup[10] = {0};
+       int ret = KSFT_FAIL;
+       int i;
+
+       cgroup[0] = cg_name(root, "cg_test_tree_A");
+       if (!cgroup[0])
+               goto cleanup;
+
+       cgroup[1] = cg_name(cgroup[0], "B");
+       if (!cgroup[1])
+               goto cleanup;
+
+       cgroup[2] = cg_name(cgroup[1], "C");
+       if (!cgroup[2])
+               goto cleanup;
+
+       cgroup[3] = cg_name(cgroup[1], "D");
+       if (!cgroup[3])
+               goto cleanup;
+
+       cgroup[4] = cg_name(cgroup[0], "E");
+       if (!cgroup[4])
+               goto cleanup;
+
+       cgroup[5] = cg_name(cgroup[4], "F");
+       if (!cgroup[5])
+               goto cleanup;
+
+       cgroup[6] = cg_name(cgroup[5], "G");
+       if (!cgroup[6])
+               goto cleanup;
+
+       cgroup[7] = cg_name(cgroup[6], "H");
+       if (!cgroup[7])
+               goto cleanup;
+
+       cgroup[8] = cg_name(cgroup[0], "I");
+       if (!cgroup[8])
+               goto cleanup;
+
+       cgroup[9] = cg_name(cgroup[0], "K");
+       if (!cgroup[9])
+               goto cleanup;
+
+       for (i = 0; i < 10; i++)
+               if (cg_create(cgroup[i]))
+                       goto cleanup;
+
+       cg_run_nowait(cgroup[2], child_fn, NULL);
+       cg_run_nowait(cgroup[7], child_fn, NULL);
+       cg_run_nowait(cgroup[9], child_fn, NULL);
+       cg_run_nowait(cgroup[9], child_fn, NULL);
+       cg_run_nowait(cgroup[9], child_fn, NULL);
+
+       /*
+        * Wait until all child processes will enter
+        * corresponding cgroups.
+        */
+
+       if (cg_wait_for_proc_count(cgroup[2], 1) ||
+           cg_wait_for_proc_count(cgroup[7], 1) ||
+           cg_wait_for_proc_count(cgroup[9], 3))
+               goto cleanup;
+
+       /*
+        * Freeze B.
+        */
+       if (cg_freeze_wait(cgroup[1], true))
+               goto cleanup;
+
+       /*
+        * Freeze F.
+        */
+       if (cg_freeze_wait(cgroup[5], true))
+               goto cleanup;
+
+       /*
+        * Freeze G.
+        */
+       if (cg_freeze_wait(cgroup[6], true))
+               goto cleanup;
+
+       /*
+        * Check that A and E are not frozen.
+        */
+       if (cg_check_frozen(cgroup[0], false))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[4], false))
+               goto cleanup;
+
+       /*
+        * Freeze A. Check that A, B and E are frozen.
+        */
+       if (cg_freeze_wait(cgroup[0], true))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[1], true))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[4], true))
+               goto cleanup;
+
+       /*
+        * Unfreeze B, F and G
+        */
+       if (cg_freeze_nowait(cgroup[1], false))
+               goto cleanup;
+
+       if (cg_freeze_nowait(cgroup[5], false))
+               goto cleanup;
+
+       if (cg_freeze_nowait(cgroup[6], false))
+               goto cleanup;
+
+       /*
+        * Check that C and H are still frozen.
+        */
+       if (cg_check_frozen(cgroup[2], true))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[7], true))
+               goto cleanup;
+
+       /*
+        * Unfreeze A. Check that A, C and K are not frozen.
+        */
+       if (cg_freeze_wait(cgroup[0], false))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[2], false))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[9], false))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       for (i = 9; i >= 0 && cgroup[i]; i--) {
+               cg_destroy(cgroup[i]);
+               free(cgroup[i]);
+       }
+
+       return ret;
+}
+
+/*
+ * A fork bomb emulator.
+ */
+static int forkbomb_fn(const char *cgroup, void *arg)
+{
+       int ppid;
+
+       fork();
+       fork();
+
+       ppid = getppid();
+
+       while (getppid() == ppid)
+               usleep(1000);
+
+       return getppid() == ppid;
+}
+
+/*
+ * The test runs a fork bomb in a cgroup and tries to freeze it.
+ * Then it kills all processes and checks that cgroup isn't populated
+ * anymore.
+ */
+static int test_cgfreezer_forkbomb(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cgroup = NULL;
+
+       cgroup = cg_name(root, "cg_forkbomb_test");
+       if (!cgroup)
+               goto cleanup;
+
+       if (cg_create(cgroup))
+               goto cleanup;
+
+       cg_run_nowait(cgroup, forkbomb_fn, NULL);
+
+       usleep(100000);
+
+       if (cg_freeze_wait(cgroup, true))
+               goto cleanup;
+
+       if (cg_killall(cgroup))
+               goto cleanup;
+
+       if (cg_wait_for_proc_count(cgroup, 0))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup)
+               cg_destroy(cgroup);
+       free(cgroup);
+       return ret;
+}
+
+/*
+ * The test creates two nested cgroups, freezes the parent
+ * and removes the child. Then it checks that the parent cgroup
+ * remains frozen and it's possible to create a new child
+ * without unfreezing. The new child is frozen too.
+ */
+static int test_cgfreezer_rmdir(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *parent, *child = NULL;
+
+       parent = cg_name(root, "cg_test_rmdir_A");
+       if (!parent)
+               goto cleanup;
+
+       child = cg_name(parent, "cg_test_rmdir_B");
+       if (!child)
+               goto cleanup;
+
+       if (cg_create(parent))
+               goto cleanup;
+
+       if (cg_create(child))
+               goto cleanup;
+
+       if (cg_freeze_wait(parent, true))
+               goto cleanup;
+
+       if (cg_destroy(child))
+               goto cleanup;
+
+       if (cg_check_frozen(parent, true))
+               goto cleanup;
+
+       if (cg_create(child))
+               goto cleanup;
+
+       if (cg_check_frozen(child, true))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (child)
+               cg_destroy(child);
+       free(child);
+       if (parent)
+               cg_destroy(parent);
+       free(parent);
+       return ret;
+}
+
+/*
+ * The test creates two cgroups: A and B, runs a process in A
+ * and performs several migrations:
+ * 1) A (running) -> B (frozen)
+ * 2) B (frozen) -> A (running)
+ * 3) A (frozen) -> B (frozen)
+ *
+ * On each step it checks the actual state of both cgroups.
+ */
+static int test_cgfreezer_migrate(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cgroup[2] = {0};
+       int pid;
+
+       cgroup[0] = cg_name(root, "cg_test_migrate_A");
+       if (!cgroup[0])
+               goto cleanup;
+
+       cgroup[1] = cg_name(root, "cg_test_migrate_B");
+       if (!cgroup[1])
+               goto cleanup;
+
+       if (cg_create(cgroup[0]))
+               goto cleanup;
+
+       if (cg_create(cgroup[1]))
+               goto cleanup;
+
+       pid = cg_run_nowait(cgroup[0], child_fn, NULL);
+       if (pid < 0)
+               goto cleanup;
+
+       if (cg_wait_for_proc_count(cgroup[0], 1))
+               goto cleanup;
+
+       /*
+        * Migrate from A (running) to B (frozen)
+        */
+       if (cg_freeze_wait(cgroup[1], true))
+               goto cleanup;
+
+       if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[0], false))
+               goto cleanup;
+
+       /*
+        * Migrate from B (frozen) to A (running)
+        */
+       if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[1], true))
+               goto cleanup;
+
+       /*
+        * Migrate from A (frozen) to B (frozen)
+        */
+       if (cg_freeze_wait(cgroup[0], true))
+               goto cleanup;
+
+       if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup[0], true))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup[0])
+               cg_destroy(cgroup[0]);
+       free(cgroup[0]);
+       if (cgroup[1])
+               cg_destroy(cgroup[1]);
+       free(cgroup[1]);
+       return ret;
+}
+
+/*
+ * The test checks that ptrace works with a tracing process in a frozen cgroup.
+ */
+static int test_cgfreezer_ptrace(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cgroup = NULL;
+       siginfo_t siginfo;
+       int pid;
+
+       cgroup = cg_name(root, "cg_test_ptrace");
+       if (!cgroup)
+               goto cleanup;
+
+       if (cg_create(cgroup))
+               goto cleanup;
+
+       pid = cg_run_nowait(cgroup, child_fn, NULL);
+       if (pid < 0)
+               goto cleanup;
+
+       if (cg_wait_for_proc_count(cgroup, 1))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, true))
+               goto cleanup;
+
+       if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
+               goto cleanup;
+
+       if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
+               goto cleanup;
+
+       waitpid(pid, NULL, 0);
+
+       /*
+        * Cgroup has to remain frozen, however the test task
+        * is in traced state.
+        */
+       if (cg_check_frozen(cgroup, true))
+               goto cleanup;
+
+       if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
+               goto cleanup;
+
+       if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup, true))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup)
+               cg_destroy(cgroup);
+       free(cgroup);
+       return ret;
+}
+
+/*
+ * Check if the process is stopped.
+ */
+static int proc_check_stopped(int pid)
+{
+       char buf[PAGE_SIZE];
+       int len;
+
+       len = proc_read_text(pid, "stat", buf, sizeof(buf));
+       if (len == -1) {
+               debug("Can't get %d stat\n", pid);
+               return -1;
+       }
+
+       if (strstr(buf, "(test_freezer) T ") == NULL) {
+               debug("Process %d in the unexpected state: %s\n", pid, buf);
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * Test that it's possible to freeze a cgroup with a stopped process.
+ */
+static int test_cgfreezer_stopped(const char *root)
+{
+       int pid, ret = KSFT_FAIL;
+       char *cgroup = NULL;
+
+       cgroup = cg_name(root, "cg_test_stopped");
+       if (!cgroup)
+               goto cleanup;
+
+       if (cg_create(cgroup))
+               goto cleanup;
+
+       pid = cg_run_nowait(cgroup, child_fn, NULL);
+
+       if (cg_wait_for_proc_count(cgroup, 1))
+               goto cleanup;
+
+       if (kill(pid, SIGSTOP))
+               goto cleanup;
+
+       if (cg_check_frozen(cgroup, false))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, true))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, false))
+               goto cleanup;
+
+       if (proc_check_stopped(pid))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup)
+               cg_destroy(cgroup);
+       free(cgroup);
+       return ret;
+}
+
+/*
+ * Test that it's possible to freeze a cgroup with a ptraced process.
+ */
+static int test_cgfreezer_ptraced(const char *root)
+{
+       int pid, ret = KSFT_FAIL;
+       char *cgroup = NULL;
+       siginfo_t siginfo;
+
+       cgroup = cg_name(root, "cg_test_ptraced");
+       if (!cgroup)
+               goto cleanup;
+
+       if (cg_create(cgroup))
+               goto cleanup;
+
+       pid = cg_run_nowait(cgroup, child_fn, NULL);
+
+       if (cg_wait_for_proc_count(cgroup, 1))
+               goto cleanup;
+
+       if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
+               goto cleanup;
+
+       if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
+               goto cleanup;
+
+       waitpid(pid, NULL, 0);
+
+       if (cg_check_frozen(cgroup, false))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, true))
+               goto cleanup;
+
+       /*
+        * cg_check_frozen(cgroup, true) will fail here,
+        * because the task in in the TRACEd state.
+        */
+       if (cg_freeze_wait(cgroup, false))
+               goto cleanup;
+
+       if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
+               goto cleanup;
+
+       if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup)
+               cg_destroy(cgroup);
+       free(cgroup);
+       return ret;
+}
+
+static int vfork_fn(const char *cgroup, void *arg)
+{
+       int pid = vfork();
+
+       if (pid == 0)
+               while (true)
+                       sleep(1);
+
+       return pid;
+}
+
+/*
+ * Test that it's possible to freeze a cgroup with a process,
+ * which called vfork() and is waiting for a child.
+ */
+static int test_cgfreezer_vfork(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cgroup = NULL;
+
+       cgroup = cg_name(root, "cg_test_vfork");
+       if (!cgroup)
+               goto cleanup;
+
+       if (cg_create(cgroup))
+               goto cleanup;
+
+       cg_run_nowait(cgroup, vfork_fn, NULL);
+
+       if (cg_wait_for_proc_count(cgroup, 2))
+               goto cleanup;
+
+       if (cg_freeze_wait(cgroup, true))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+
+cleanup:
+       if (cgroup)
+               cg_destroy(cgroup);
+       free(cgroup);
+       return ret;
+}
+
+#define T(x) { x, #x }
+struct cgfreezer_test {
+       int (*fn)(const char *root);
+       const char *name;
+} tests[] = {
+       T(test_cgfreezer_simple),
+       T(test_cgfreezer_tree),
+       T(test_cgfreezer_forkbomb),
+       T(test_cgfreezer_rmdir),
+       T(test_cgfreezer_migrate),
+       T(test_cgfreezer_ptrace),
+       T(test_cgfreezer_stopped),
+       T(test_cgfreezer_ptraced),
+       T(test_cgfreezer_vfork),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+       char root[PATH_MAX];
+       int i, ret = EXIT_SUCCESS;
+
+       if (cg_find_unified_root(root, sizeof(root)))
+               ksft_exit_skip("cgroup v2 isn't mounted\n");
+       for (i = 0; i < ARRAY_SIZE(tests); i++) {
+               switch (tests[i].fn(root)) {
+               case KSFT_PASS:
+                       ksft_test_result_pass("%s\n", tests[i].name);
+                       break;
+               case KSFT_SKIP:
+                       ksft_test_result_skip("%s\n", tests[i].name);
+                       break;
+               default:
+                       ret = EXIT_FAILURE;
+                       ksft_test_result_fail("%s\n", tests[i].name);
+                       break;
+               }
+       }
+
+       return ret;
+}