Merge branch 'stable-4.8' of git://git.infradead.org/users/pcmoore/audit
[sfrench/cifs-2.6.git] / fs / proc / base.c
index da8b1943ba0421ed657a5c61a84723a3e236f39c..ac0df4dde823866b54480c7062d5188887009365 100644 (file)
@@ -579,11 +579,8 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
        unsigned long totalpages = totalram_pages + total_swap_pages;
        unsigned long points = 0;
 
-       read_lock(&tasklist_lock);
-       if (pid_alive(task))
-               points = oom_badness(task, NULL, NULL, totalpages) *
-                                               1000 / totalpages;
-       read_unlock(&tasklist_lock);
+       points = oom_badness(task, NULL, NULL, totalpages) *
+                                       1000 / totalpages;
        seq_printf(m, "%lu\n", points);
 
        return 0;
@@ -1024,23 +1021,107 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
        char buffer[PROC_NUMBUF];
        int oom_adj = OOM_ADJUST_MIN;
        size_t len;
-       unsigned long flags;
 
        if (!task)
                return -ESRCH;
-       if (lock_task_sighand(task, &flags)) {
-               if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
-                       oom_adj = OOM_ADJUST_MAX;
-               else
-                       oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
-                                 OOM_SCORE_ADJ_MAX;
-               unlock_task_sighand(task, &flags);
-       }
+       if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+               oom_adj = OOM_ADJUST_MAX;
+       else
+               oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+                         OOM_SCORE_ADJ_MAX;
        put_task_struct(task);
        len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
        return simple_read_from_buffer(buf, count, ppos, buffer, len);
 }
 
+static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
+{
+       static DEFINE_MUTEX(oom_adj_mutex);
+       struct mm_struct *mm = NULL;
+       struct task_struct *task;
+       int err = 0;
+
+       task = get_proc_task(file_inode(file));
+       if (!task)
+               return -ESRCH;
+
+       mutex_lock(&oom_adj_mutex);
+       if (legacy) {
+               if (oom_adj < task->signal->oom_score_adj &&
+                               !capable(CAP_SYS_RESOURCE)) {
+                       err = -EACCES;
+                       goto err_unlock;
+               }
+               /*
+                * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+                * /proc/pid/oom_score_adj instead.
+                */
+               pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+                         current->comm, task_pid_nr(current), task_pid_nr(task),
+                         task_pid_nr(task));
+       } else {
+               if ((short)oom_adj < task->signal->oom_score_adj_min &&
+                               !capable(CAP_SYS_RESOURCE)) {
+                       err = -EACCES;
+                       goto err_unlock;
+               }
+       }
+
+       /*
+        * Make sure we will check other processes sharing the mm if this is
+        * not vfrok which wants its own oom_score_adj.
+        * pin the mm so it doesn't go away and get reused after task_unlock
+        */
+       if (!task->vfork_done) {
+               struct task_struct *p = find_lock_task_mm(task);
+
+               if (p) {
+                       if (atomic_read(&p->mm->mm_users) > 1) {
+                               mm = p->mm;
+                               atomic_inc(&mm->mm_count);
+                       }
+                       task_unlock(p);
+               }
+       }
+
+       task->signal->oom_score_adj = oom_adj;
+       if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
+               task->signal->oom_score_adj_min = (short)oom_adj;
+       trace_oom_score_adj_update(task);
+
+       if (mm) {
+               struct task_struct *p;
+
+               rcu_read_lock();
+               for_each_process(p) {
+                       if (same_thread_group(task, p))
+                               continue;
+
+                       /* do not touch kernel threads or the global init */
+                       if (p->flags & PF_KTHREAD || is_global_init(p))
+                               continue;
+
+                       task_lock(p);
+                       if (!p->vfork_done && process_shares_mm(p, mm)) {
+                               pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
+                                               task_pid_nr(p), p->comm,
+                                               p->signal->oom_score_adj, oom_adj,
+                                               task_pid_nr(task), task->comm);
+                               p->signal->oom_score_adj = oom_adj;
+                               if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
+                                       p->signal->oom_score_adj_min = (short)oom_adj;
+                       }
+                       task_unlock(p);
+               }
+               rcu_read_unlock();
+               mmdrop(mm);
+       }
+err_unlock:
+       mutex_unlock(&oom_adj_mutex);
+       put_task_struct(task);
+       return err;
+}
+
 /*
  * /proc/pid/oom_adj exists solely for backwards compatibility with previous
  * kernels.  The effective policy is defined by oom_score_adj, which has a
@@ -1054,10 +1135,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
 static ssize_t oom_adj_write(struct file *file, const char __user *buf,
                             size_t count, loff_t *ppos)
 {
-       struct task_struct *task;
        char buffer[PROC_NUMBUF];
        int oom_adj;
-       unsigned long flags;
        int err;
 
        memset(buffer, 0, sizeof(buffer));
@@ -1077,23 +1156,6 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
                goto out;
        }
 
-       task = get_proc_task(file_inode(file));
-       if (!task) {
-               err = -ESRCH;
-               goto out;
-       }
-
-       task_lock(task);
-       if (!task->mm) {
-               err = -EINVAL;
-               goto err_task_lock;
-       }
-
-       if (!lock_task_sighand(task, &flags)) {
-               err = -ESRCH;
-               goto err_task_lock;
-       }
-
        /*
         * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
         * value is always attainable.
@@ -1103,27 +1165,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
        else
                oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
 
-       if (oom_adj < task->signal->oom_score_adj &&
-           !capable(CAP_SYS_RESOURCE)) {
-               err = -EACCES;
-               goto err_sighand;
-       }
-
-       /*
-        * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
-        * /proc/pid/oom_score_adj instead.
-        */
-       pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
-                 current->comm, task_pid_nr(current), task_pid_nr(task),
-                 task_pid_nr(task));
-
-       task->signal->oom_score_adj = oom_adj;
-       trace_oom_score_adj_update(task);
-err_sighand:
-       unlock_task_sighand(task, &flags);
-err_task_lock:
-       task_unlock(task);
-       put_task_struct(task);
+       err = __set_oom_adj(file, oom_adj, true);
 out:
        return err < 0 ? err : count;
 }
@@ -1140,15 +1182,11 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
        struct task_struct *task = get_proc_task(file_inode(file));
        char buffer[PROC_NUMBUF];
        short oom_score_adj = OOM_SCORE_ADJ_MIN;
-       unsigned long flags;
        size_t len;
 
        if (!task)
                return -ESRCH;
-       if (lock_task_sighand(task, &flags)) {
-               oom_score_adj = task->signal->oom_score_adj;
-               unlock_task_sighand(task, &flags);
-       }
+       oom_score_adj = task->signal->oom_score_adj;
        put_task_struct(task);
        len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
        return simple_read_from_buffer(buf, count, ppos, buffer, len);
@@ -1157,9 +1195,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
                                        size_t count, loff_t *ppos)
 {
-       struct task_struct *task;
        char buffer[PROC_NUMBUF];
-       unsigned long flags;
        int oom_score_adj;
        int err;
 
@@ -1180,39 +1216,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
                goto out;
        }
 
-       task = get_proc_task(file_inode(file));
-       if (!task) {
-               err = -ESRCH;
-               goto out;
-       }
-
-       task_lock(task);
-       if (!task->mm) {
-               err = -EINVAL;
-               goto err_task_lock;
-       }
-
-       if (!lock_task_sighand(task, &flags)) {
-               err = -ESRCH;
-               goto err_task_lock;
-       }
-
-       if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
-                       !capable(CAP_SYS_RESOURCE)) {
-               err = -EACCES;
-               goto err_sighand;
-       }
-
-       task->signal->oom_score_adj = (short)oom_score_adj;
-       if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
-               task->signal->oom_score_adj_min = (short)oom_score_adj;
-       trace_oom_score_adj_update(task);
-
-err_sighand:
-       unlock_task_sighand(task, &flags);
-err_task_lock:
-       task_unlock(task);
-       put_task_struct(task);
+       err = __set_oom_adj(file, oom_score_adj, false);
 out:
        return err < 0 ? err : count;
 }
@@ -1815,12 +1819,17 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
 
        child = d_hash_and_lookup(dir, &qname);
        if (!child) {
-               child = d_alloc(dir, &qname);
-               if (!child)
-                       goto end_instantiate;
-               if (instantiate(d_inode(dir), child, task, ptr) < 0) {
-                       dput(child);
+               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+               child = d_alloc_parallel(dir, &qname, &wq);
+               if (IS_ERR(child))
                        goto end_instantiate;
+               if (d_in_lookup(child)) {
+                       int err = instantiate(d_inode(dir), child, task, ptr);
+                       d_lookup_done(child);
+                       if (err < 0) {
+                               dput(child);
+                               goto end_instantiate;
+                       }
                }
        }
        inode = d_inode(child);
@@ -2150,8 +2159,8 @@ out:
 
 static const struct file_operations proc_map_files_operations = {
        .read           = generic_read_dir,
-       .iterate        = proc_map_files_readdir,
-       .llseek         = default_llseek,
+       .iterate_shared = proc_map_files_readdir,
+       .llseek         = generic_file_llseek,
 };
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
@@ -2498,8 +2507,8 @@ static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
 
 static const struct file_operations proc_attr_dir_operations = {
        .read           = generic_read_dir,
-       .iterate        = proc_attr_dir_readdir,
-       .llseek         = default_llseek,
+       .iterate_shared = proc_attr_dir_readdir,
+       .llseek         = generic_file_llseek,
 };
 
 static struct dentry *proc_attr_dir_lookup(struct inode *dir,
@@ -2906,8 +2915,8 @@ static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
 
 static const struct file_operations proc_tgid_base_operations = {
        .read           = generic_read_dir,
-       .iterate        = proc_tgid_base_readdir,
-       .llseek         = default_llseek,
+       .iterate_shared = proc_tgid_base_readdir,
+       .llseek         = generic_file_llseek,
 };
 
 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -3152,6 +3161,44 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
        return 0;
 }
 
+/*
+ * proc_tid_comm_permission is a special permission function exclusively
+ * used for the node /proc/<pid>/task/<tid>/comm.
+ * It bypasses generic permission checks in the case where a task of the same
+ * task group attempts to access the node.
+ * The rationale behind this is that glibc and bionic access this node for
+ * cross thread naming (pthread_set/getname_np(!self)). However, if
+ * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
+ * which locks out the cross thread naming implementation.
+ * This function makes sure that the node is always accessible for members of
+ * same thread group.
+ */
+static int proc_tid_comm_permission(struct inode *inode, int mask)
+{
+       bool is_same_tgroup;
+       struct task_struct *task;
+
+       task = get_proc_task(inode);
+       if (!task)
+               return -ESRCH;
+       is_same_tgroup = same_thread_group(current, task);
+       put_task_struct(task);
+
+       if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
+               /* This file (/proc/<pid>/task/<tid>/comm) can always be
+                * read or written by the members of the corresponding
+                * thread group.
+                */
+               return 0;
+       }
+
+       return generic_permission(inode, mask);
+}
+
+static const struct inode_operations proc_tid_comm_inode_operations = {
+               .permission = proc_tid_comm_permission,
+};
+
 /*
  * Tasks
  */
@@ -3170,7 +3217,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
-       REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
+       NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
+                        &proc_tid_comm_inode_operations,
+                        &proc_pid_set_comm_operations, {}),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
        ONE("syscall",   S_IRUSR, proc_pid_syscall),
 #endif
@@ -3254,8 +3303,8 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
 
 static const struct file_operations proc_tid_base_operations = {
        .read           = generic_read_dir,
-       .iterate        = proc_tid_base_readdir,
-       .llseek         = default_llseek,
+       .iterate_shared = proc_tid_base_readdir,
+       .llseek         = generic_file_llseek,
 };
 
 static const struct inode_operations proc_tid_base_inode_operations = {
@@ -3465,6 +3514,6 @@ static const struct inode_operations proc_task_inode_operations = {
 
 static const struct file_operations proc_task_operations = {
        .read           = generic_read_dir,
-       .iterate        = proc_task_readdir,
-       .llseek         = default_llseek,
+       .iterate_shared = proc_task_readdir,
+       .llseek         = generic_file_llseek,
 };