Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
[sfrench/cifs-2.6.git] / kernel / cgroup.c
index 3a73f995a81e6167659a7b488f23a4feab3d2787..136eceadeed138b71fd55dfed22a9850b38dd776 100644 (file)
@@ -185,7 +185,6 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_dfl_base_files[];
 static struct cftype cgroup_legacy_base_files[];
 
-static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
                             unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -195,7 +194,6 @@ static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
                              bool is_add);
-static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
 
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
@@ -331,14 +329,6 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
        return false;
 }
 
-static int cgroup_is_releasable(const struct cgroup *cgrp)
-{
-       const int bits =
-               (1 << CGRP_RELEASABLE) |
-               (1 << CGRP_NOTIFY_ON_RELEASE);
-       return (cgrp->flags & bits) == bits;
-}
-
 static int notify_on_release(const struct cgroup *cgrp)
 {
        return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -394,12 +384,7 @@ static int notify_on_release(const struct cgroup *cgrp)
                        ;                                               \
                else
 
-/* the list of cgroups eligible for automatic release. Protected by
- * release_list_lock */
-static LIST_HEAD(release_list);
-static DEFINE_RAW_SPINLOCK(release_list_lock);
 static void cgroup_release_agent(struct work_struct *work);
-static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
 /*
@@ -498,7 +483,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
        return key;
 }
 
-static void put_css_set_locked(struct css_set *cset, bool taskexit)
+static void put_css_set_locked(struct css_set *cset)
 {
        struct cgrp_cset_link *link, *tmp_link;
        struct cgroup_subsys *ss;
@@ -524,11 +509,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
                /* @cgrp can't go away while we're holding css_set_rwsem */
                if (list_empty(&cgrp->cset_links)) {
                        cgroup_update_populated(cgrp, false);
-                       if (notify_on_release(cgrp)) {
-                               if (taskexit)
-                                       set_bit(CGRP_RELEASABLE, &cgrp->flags);
-                               check_for_release(cgrp);
-                       }
+                       check_for_release(cgrp);
                }
 
                kfree(link);
@@ -537,7 +518,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
        kfree_rcu(cset, rcu_head);
 }
 
-static void put_css_set(struct css_set *cset, bool taskexit)
+static void put_css_set(struct css_set *cset)
 {
        /*
         * Ensure that the refcount doesn't hit zero while any readers
@@ -548,7 +529,7 @@ static void put_css_set(struct css_set *cset, bool taskexit)
                return;
 
        down_write(&css_set_rwsem);
-       put_css_set_locked(cset, taskexit);
+       put_css_set_locked(cset);
        up_write(&css_set_rwsem);
 }
 
@@ -969,14 +950,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * knows that the cgroup won't be removed, as cgroup_rmdir()
  * needs that mutex.
  *
- * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
- * (usually) take cgroup_mutex.  These are the two most performance
- * critical pieces of code here.  The exception occurs on cgroup_exit(),
- * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
- * is taken, and if the cgroup count is zero, a usermode call made
- * to the release agent with the name of the cgroup (path relative to
- * the root of cgroup file system) as the argument.
- *
  * A cgroup can only be deleted if both its 'count' of using tasks
  * is zero, and its list of 'children' cgroups is empty.  Since all
  * tasks in the system use _some_ cgroup, and since there is always at
@@ -1587,7 +1560,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
        INIT_LIST_HEAD(&cgrp->self.sibling);
        INIT_LIST_HEAD(&cgrp->self.children);
        INIT_LIST_HEAD(&cgrp->cset_links);
-       INIT_LIST_HEAD(&cgrp->release_list);
        INIT_LIST_HEAD(&cgrp->pidlists);
        mutex_init(&cgrp->pidlist_mutex);
        cgrp->self.cgroup = cgrp;
@@ -1597,6 +1569,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
                INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
 
        init_waitqueue_head(&cgrp->offline_waitq);
+       INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -1634,7 +1607,8 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
                goto out;
        root_cgrp->id = ret;
 
-       ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+       ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
+                             GFP_KERNEL);
        if (ret)
                goto out;
 
@@ -2052,8 +2026,7 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
         * task. As trading it for new_cset is protected by cgroup_mutex,
         * we're safe to drop it here; it will be freed under RCU.
         */
-       set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
-       put_css_set_locked(old_cset, false);
+       put_css_set_locked(old_cset);
 }
 
 /**
@@ -2074,7 +2047,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
                cset->mg_src_cgrp = NULL;
                cset->mg_dst_cset = NULL;
                list_del_init(&cset->mg_preload_node);
-               put_css_set_locked(cset, false);
+               put_css_set_locked(cset);
        }
        up_write(&css_set_rwsem);
 }
@@ -2168,8 +2141,8 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
                if (src_cset == dst_cset) {
                        src_cset->mg_src_cgrp = NULL;
                        list_del_init(&src_cset->mg_preload_node);
-                       put_css_set(src_cset, false);
-                       put_css_set(dst_cset, false);
+                       put_css_set(src_cset);
+                       put_css_set(dst_cset);
                        continue;
                }
 
@@ -2178,7 +2151,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
                if (list_empty(&dst_cset->mg_preload_node))
                        list_add(&dst_cset->mg_preload_node, &csets);
                else
-                       put_css_set(dst_cset, false);
+                       put_css_set(dst_cset);
        }
 
        list_splice_tail(&csets, preloaded_csets);
@@ -4173,7 +4146,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
 static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
                                          struct cftype *cft, u64 val)
 {
-       clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
        if (val)
                set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
        else
@@ -4351,6 +4323,7 @@ static void css_free_work_fn(struct work_struct *work)
                /* cgroup free path */
                atomic_dec(&cgrp->root->nr_cgrps);
                cgroup_pidlist_destroy_all(cgrp);
+               cancel_work_sync(&cgrp->release_agent_work);
 
                if (cgroup_parent(cgrp)) {
                        /*
@@ -4510,7 +4483,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
        init_and_link_css(css, ss, cgrp);
 
-       err = percpu_ref_init(&css->refcnt, css_release);
+       err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
        if (err)
                goto err_free_css;
 
@@ -4583,7 +4556,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
                goto out_unlock;
        }
 
-       ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+       ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
        if (ret)
                goto out_free_cgrp;
 
@@ -4813,19 +4786,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
        for_each_css(css, ssid, cgrp)
                kill_css(css);
 
-       /* CSS_ONLINE is clear, remove from ->release_list for the last time */
-       raw_spin_lock(&release_list_lock);
-       if (!list_empty(&cgrp->release_list))
-               list_del_init(&cgrp->release_list);
-       raw_spin_unlock(&release_list_lock);
-
        /*
         * Remove @cgrp directory along with the base files.  @cgrp has an
         * extra ref on its kn.
         */
        kernfs_remove(cgrp->kn);
 
-       set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
        check_for_release(cgroup_parent(cgrp));
 
        /* put the base reference */
@@ -4842,13 +4808,10 @@ static int cgroup_rmdir(struct kernfs_node *kn)
        cgrp = cgroup_kn_lock_live(kn);
        if (!cgrp)
                return 0;
-       cgroup_get(cgrp);       /* for @kn->priv clearing */
 
        ret = cgroup_destroy_locked(cgrp);
 
        cgroup_kn_unlock(kn);
-
-       cgroup_put(cgrp);
        return ret;
 }
 
@@ -5052,12 +5015,9 @@ core_initcall(cgroup_wq_init);
  *  - Print task's cgroup paths into seq_file, one line for each hierarchy
  *  - Used for /proc/<pid>/cgroup.
  */
-
-/* TODO: Use a proper seq_file iterator */
-int proc_cgroup_show(struct seq_file *m, void *v)
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+                    struct pid *pid, struct task_struct *tsk)
 {
-       struct pid *pid;
-       struct task_struct *tsk;
        char *buf, *path;
        int retval;
        struct cgroup_root *root;
@@ -5067,14 +5027,6 @@ int proc_cgroup_show(struct seq_file *m, void *v)
        if (!buf)
                goto out;
 
-       retval = -ESRCH;
-       pid = m->private;
-       tsk = get_pid_task(pid, PIDTYPE_PID);
-       if (!tsk)
-               goto out_free;
-
-       retval = 0;
-
        mutex_lock(&cgroup_mutex);
        down_read(&css_set_rwsem);
 
@@ -5104,11 +5056,10 @@ int proc_cgroup_show(struct seq_file *m, void *v)
                seq_putc(m, '\n');
        }
 
+       retval = 0;
 out_unlock:
        up_read(&css_set_rwsem);
        mutex_unlock(&cgroup_mutex);
-       put_task_struct(tsk);
-out_free:
        kfree(buf);
 out:
        return retval;
@@ -5179,7 +5130,7 @@ void cgroup_post_fork(struct task_struct *child)
        int i;
 
        /*
-        * This may race against cgroup_enable_task_cg_links().  As that
+        * This may race against cgroup_enable_task_cg_lists().  As that
         * function sets use_task_css_set_links before grabbing
         * tasklist_lock and we just went through tasklist_lock to add
         * @child, it's guaranteed that either we see the set
@@ -5194,7 +5145,7 @@ void cgroup_post_fork(struct task_struct *child)
         * when implementing operations which need to migrate all tasks of
         * a cgroup to another.
         *
-        * Note that if we lose to cgroup_enable_task_cg_links(), @child
+        * Note that if we lose to cgroup_enable_task_cg_lists(), @child
         * will remain in init_css_set.  This is safe because all tasks are
         * in the init_css_set before cg_links is enabled and there's no
         * operation which transfers all tasks out of init_css_set.
@@ -5278,30 +5229,14 @@ void cgroup_exit(struct task_struct *tsk)
        }
 
        if (put_cset)
-               put_css_set(cset, true);
+               put_css_set(cset);
 }
 
 static void check_for_release(struct cgroup *cgrp)
 {
-       if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
-           !css_has_online_children(&cgrp->self)) {
-               /*
-                * Control Group is currently removeable. If it's not
-                * already queued for a userspace notification, queue
-                * it now
-                */
-               int need_schedule_work = 0;
-
-               raw_spin_lock(&release_list_lock);
-               if (!cgroup_is_dead(cgrp) &&
-                   list_empty(&cgrp->release_list)) {
-                       list_add(&cgrp->release_list, &release_list);
-                       need_schedule_work = 1;
-               }
-               raw_spin_unlock(&release_list_lock);
-               if (need_schedule_work)
-                       schedule_work(&release_agent_work);
-       }
+       if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
+           !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
+               schedule_work(&cgrp->release_agent_work);
 }
 
 /*
@@ -5329,52 +5264,39 @@ static void check_for_release(struct cgroup *cgrp)
  */
 static void cgroup_release_agent(struct work_struct *work)
 {
-       BUG_ON(work != &release_agent_work);
+       struct cgroup *cgrp =
+               container_of(work, struct cgroup, release_agent_work);
+       char *pathbuf = NULL, *agentbuf = NULL, *path;
+       char *argv[3], *envp[3];
+
        mutex_lock(&cgroup_mutex);
-       raw_spin_lock(&release_list_lock);
-       while (!list_empty(&release_list)) {
-               char *argv[3], *envp[3];
-               int i;
-               char *pathbuf = NULL, *agentbuf = NULL, *path;
-               struct cgroup *cgrp = list_entry(release_list.next,
-                                                   struct cgroup,
-                                                   release_list);
-               list_del_init(&cgrp->release_list);
-               raw_spin_unlock(&release_list_lock);
-               pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-               if (!pathbuf)
-                       goto continue_free;
-               path = cgroup_path(cgrp, pathbuf, PATH_MAX);
-               if (!path)
-                       goto continue_free;
-               agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
-               if (!agentbuf)
-                       goto continue_free;
-
-               i = 0;
-               argv[i++] = agentbuf;
-               argv[i++] = path;
-               argv[i] = NULL;
-
-               i = 0;
-               /* minimal command environment */
-               envp[i++] = "HOME=/";
-               envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-               envp[i] = NULL;
-
-               /* Drop the lock while we invoke the usermode helper,
-                * since the exec could involve hitting disk and hence
-                * be a slow process */
-               mutex_unlock(&cgroup_mutex);
-               call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-               mutex_lock(&cgroup_mutex);
- continue_free:
-               kfree(pathbuf);
-               kfree(agentbuf);
-               raw_spin_lock(&release_list_lock);
-       }
-       raw_spin_unlock(&release_list_lock);
+
+       pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+       agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+       if (!pathbuf || !agentbuf)
+               goto out;
+
+       path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+       if (!path)
+               goto out;
+
+       argv[0] = agentbuf;
+       argv[1] = path;
+       argv[2] = NULL;
+
+       /* minimal command environment */
+       envp[0] = "HOME=/";
+       envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+       envp[2] = NULL;
+
+       mutex_unlock(&cgroup_mutex);
+       call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+       goto out_free;
+out:
        mutex_unlock(&cgroup_mutex);
+out_free:
+       kfree(agentbuf);
+       kfree(pathbuf);
 }
 
 static int __init cgroup_disable(char *str)
@@ -5562,7 +5484,8 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 
 static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-       return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+       return (!cgroup_has_tasks(css->cgroup) &&
+               !css_has_online_children(&css->cgroup->self));
 }
 
 static struct cftype debug_files[] =  {