Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
[sfrench/cifs-2.6.git] / kernel / fork.c
index 389712ffc0ad073442800978e3aa098d8d72c64e..a31b823b3c2d6d4e6254128f00e36c6c1b048c37 100644 (file)
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
                                                  int node)
 {
-       struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+       struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
                                             THREAD_SIZE_ORDER);
 
        return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-       free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+       free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
        unsigned long charge;
        struct mempolicy *pol;
 
+       uprobe_start_dup_mmap();
        down_write(&oldmm->mmap_sem);
        flush_cache_dup_mm(oldmm);
        uprobe_dup_mmap(oldmm, mm);
@@ -469,6 +470,7 @@ out:
        up_write(&mm->mmap_sem);
        flush_tlb_mm(oldmm);
        up_write(&oldmm->mmap_sem);
+       uprobe_end_dup_mmap();
        return retval;
 fail_nomem_anon_vma_fork:
        mpol_put(pol);
@@ -820,6 +822,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        mm->pmd_huge_pte = NULL;
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+       mm->first_nid = NUMA_PTE_SCAN_INIT;
 #endif
        if (!mm_init(mm, tsk))
                goto fail_nomem;
@@ -1039,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        atomic_set(&sig->live, 1);
        atomic_set(&sig->sigcnt, 1);
        init_waitqueue_head(&sig->wait_chldexit);
-       if (clone_flags & CLONE_NEWPID)
-               sig->flags |= SIGNAL_UNKILLABLE;
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
        INIT_LIST_HEAD(&sig->posix_timers);
@@ -1134,7 +1137,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 {
        int retval;
        struct task_struct *p;
-       int cgroup_callbacks_done = 0;
 
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
@@ -1221,7 +1223,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->utime = p->stime = p->gtime = 0;
        p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-       p->prev_utime = p->prev_stime = 0;
+       p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
 #if defined(SPLIT_RSS_COUNTING)
        memset(&p->rss_stat, 0, sizeof(p->rss_stat));
@@ -1392,12 +1394,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        INIT_LIST_HEAD(&p->thread_group);
        p->task_works = NULL;
 
-       /* Now that the task is set up, run cgroup callbacks if
-        * necessary. We need to run them before the task is visible
-        * on the tasklist. */
-       cgroup_fork_callbacks(p);
-       cgroup_callbacks_done = 1;
-
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
 
@@ -1440,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
                if (thread_group_leader(p)) {
-                       if (is_child_reaper(pid))
-                               p->nsproxy->pid_ns->child_reaper = p;
+                       if (is_child_reaper(pid)) {
+                               ns_of_pid(pid)->child_reaper = p;
+                               p->signal->flags |= SIGNAL_UNKILLABLE;
+                       }
 
                        p->signal->leader_pid = pid;
                        p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1475,8 +1473,6 @@ bad_fork_cleanup_io:
        if (p->io_context)
                exit_io_context(p);
 bad_fork_cleanup_namespaces:
-       if (unlikely(clone_flags & CLONE_NEWPID))
-               pid_ns_release_proc(p->nsproxy->pid_ns);
        exit_task_namespaces(p);
 bad_fork_cleanup_mm:
        if (p->mm)
@@ -1502,7 +1498,7 @@ bad_fork_cleanup_cgroup:
 #endif
        if (clone_flags & CLONE_THREAD)
                threadgroup_change_end(current);
-       cgroup_exit(p, cgroup_callbacks_done);
+       cgroup_exit(p, 0);
        delayacct_tsk_free(p);
        module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
@@ -1556,15 +1552,9 @@ long do_fork(unsigned long clone_flags,
         * Do some preliminary argument and permissions checking before we
         * actually start allocating stuff
         */
-       if (clone_flags & CLONE_NEWUSER) {
-               if (clone_flags & CLONE_THREAD)
+       if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
+               if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
                        return -EINVAL;
-               /* hopefully this check will go away when userns support is
-                * complete
-                */
-               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
-                               !capable(CAP_SETGID))
-                       return -EPERM;
        }
 
        /*
@@ -1724,7 +1714,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
 {
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
+                               CLONE_NEWUSER|CLONE_NEWPID))
                return -EINVAL;
        /*
         * Not implemented, but pretend it works if there is nothing to
@@ -1791,19 +1782,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
        struct fs_struct *fs, *new_fs = NULL;
        struct files_struct *fd, *new_fd = NULL;
+       struct cred *new_cred = NULL;
        struct nsproxy *new_nsproxy = NULL;
        int do_sysvsem = 0;
        int err;
 
-       err = check_unshare_flags(unshare_flags);
-       if (err)
-               goto bad_unshare_out;
-
+       /*
+        * If unsharing a user namespace must also unshare the thread.
+        */
+       if (unshare_flags & CLONE_NEWUSER)
+               unshare_flags |= CLONE_THREAD;
+       /*
+        * If unsharing a pid namespace must also unshare the thread.
+        */
+       if (unshare_flags & CLONE_NEWPID)
+               unshare_flags |= CLONE_THREAD;
+       /*
+        * If unsharing a thread from a thread group, must also unshare vm.
+        */
+       if (unshare_flags & CLONE_THREAD)
+               unshare_flags |= CLONE_VM;
+       /*
+        * If unsharing vm, must also unshare signal handlers.
+        */
+       if (unshare_flags & CLONE_VM)
+               unshare_flags |= CLONE_SIGHAND;
        /*
         * If unsharing namespace, must also unshare filesystem information.
         */
        if (unshare_flags & CLONE_NEWNS)
                unshare_flags |= CLONE_FS;
+
+       err = check_unshare_flags(unshare_flags);
+       if (err)
+               goto bad_unshare_out;
        /*
         * CLONE_NEWIPC must also detach from the undolist: after switching
         * to a new ipc namespace, the semaphore arrays from the old
@@ -1817,11 +1829,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
        err = unshare_fd(unshare_flags, &new_fd);
        if (err)
                goto bad_unshare_cleanup_fs;
-       err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+       err = unshare_userns(unshare_flags, &new_cred);
        if (err)
                goto bad_unshare_cleanup_fd;
+       err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+                                        new_cred, new_fs);
+       if (err)
+               goto bad_unshare_cleanup_cred;
 
-       if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+       if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
                if (do_sysvsem) {
                        /*
                         * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1854,11 +1870,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
                }
 
                task_unlock(current);
+
+               if (new_cred) {
+                       /* Install the new user namespace */
+                       commit_creds(new_cred);
+                       new_cred = NULL;
+               }
        }
 
        if (new_nsproxy)
                put_nsproxy(new_nsproxy);
 
+bad_unshare_cleanup_cred:
+       if (new_cred)
+               put_cred(new_cred);
 bad_unshare_cleanup_fd:
        if (new_fd)
                put_files_struct(new_fd);