Merge master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6
[sfrench/cifs-2.6.git] / fs / exec.c
index 073b0b8c6d055a3fd947f16fbdbe5552a6bc34c8..2c942e2d14ea2b3ebf2fb51bcf692e3a37565ae5 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -29,6 +29,7 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/smp_lock.h>
+#include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -63,52 +64,28 @@ int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
 int suid_dumpable = 0;
 
-EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
-static struct linux_binfmt *formats;
+static LIST_HEAD(formats);
 static DEFINE_RWLOCK(binfmt_lock);
 
 int register_binfmt(struct linux_binfmt * fmt)
 {
-       struct linux_binfmt ** tmp = &formats;
-
        if (!fmt)
                return -EINVAL;
-       if (fmt->next)
-               return -EBUSY;
        write_lock(&binfmt_lock);
-       while (*tmp) {
-               if (fmt == *tmp) {
-                       write_unlock(&binfmt_lock);
-                       return -EBUSY;
-               }
-               tmp = &(*tmp)->next;
-       }
-       fmt->next = formats;
-       formats = fmt;
+       list_add(&fmt->lh, &formats);
        write_unlock(&binfmt_lock);
        return 0;       
 }
 
 EXPORT_SYMBOL(register_binfmt);
 
-int unregister_binfmt(struct linux_binfmt * fmt)
+void unregister_binfmt(struct linux_binfmt * fmt)
 {
-       struct linux_binfmt ** tmp = &formats;
-
        write_lock(&binfmt_lock);
-       while (*tmp) {
-               if (fmt == *tmp) {
-                       *tmp = fmt->next;
-                       fmt->next = NULL;
-                       write_unlock(&binfmt_lock);
-                       return 0;
-               }
-               tmp = &(*tmp)->next;
-       }
+       list_del(&fmt->lh);
        write_unlock(&binfmt_lock);
-       return -EINVAL;
 }
 
 EXPORT_SYMBOL(unregister_binfmt);
@@ -134,9 +111,6 @@ asmlinkage long sys_uselib(const char __user * library)
        if (error)
                goto out;
 
-       error = -EACCES;
-       if (nd.mnt->mnt_flags & MNT_NOEXEC)
-               goto exit;
        error = -EINVAL;
        if (!S_ISREG(nd.dentry->d_inode->i_mode))
                goto exit;
@@ -155,7 +129,7 @@ asmlinkage long sys_uselib(const char __user * library)
                struct linux_binfmt * fmt;
 
                read_lock(&binfmt_lock);
-               for (fmt = formats ; fmt ; fmt = fmt->next) {
+               list_for_each_entry(fmt, &formats, lh) {
                        if (!fmt->load_shlib)
                                continue;
                        if (!try_module_get(fmt->module))
@@ -260,7 +234,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
        vma->vm_start = vma->vm_end - PAGE_SIZE;
 
        vma->vm_flags = VM_STACK_FLAGS;
-       vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+       vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
        err = insert_vm_struct(mm, vma);
        if (err) {
                up_write(&mm->mmap_sem);
@@ -680,8 +654,7 @@ struct file *open_exec(const char *name)
        if (!err) {
                struct inode *inode = nd.dentry->d_inode;
                file = ERR_PTR(-EACCES);
-               if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
-                   S_ISREG(inode->i_mode)) {
+               if (S_ISREG(inode->i_mode)) {
                        int err = vfs_permission(&nd, MAY_EXEC);
                        file = ERR_PTR(err);
                        if (!err) {
@@ -773,24 +746,11 @@ static int exec_mmap(struct mm_struct *mm)
 static int de_thread(struct task_struct *tsk)
 {
        struct signal_struct *sig = tsk->signal;
-       struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+       struct sighand_struct *oldsighand = tsk->sighand;
        spinlock_t *lock = &oldsighand->siglock;
        struct task_struct *leader = NULL;
        int count;
 
-       /*
-        * If we don't share sighandlers, then we aren't sharing anything
-        * and we can just re-use it all.
-        */
-       if (atomic_read(&oldsighand->count) <= 1) {
-               exit_itimers(sig);
-               return 0;
-       }
-
-       newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-       if (!newsighand)
-               return -ENOMEM;
-
        if (thread_group_empty(tsk))
                goto no_thread_group;
 
@@ -807,7 +767,6 @@ static int de_thread(struct task_struct *tsk)
                 */
                spin_unlock_irq(lock);
                read_unlock(&tasklist_lock);
-               kmem_cache_free(sighand_cachep, newsighand);
                return -EAGAIN;
        }
 
@@ -816,8 +775,8 @@ static int de_thread(struct task_struct *tsk)
         * Reparenting needs write_lock on tasklist_lock,
         * so it is safe to do it under read_lock.
         */
-       if (unlikely(tsk->group_leader == child_reaper(tsk)))
-               tsk->nsproxy->pid_ns->child_reaper = tsk;
+       if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
+               task_active_pid_ns(tsk)->child_reaper = tsk;
 
        zap_other_threads(tsk);
        read_unlock(&tasklist_lock);
@@ -841,16 +800,15 @@ static int de_thread(struct task_struct *tsk)
                        hrtimer_restart(&sig->real_timer);
                spin_lock_irq(lock);
        }
+
+       sig->notify_count = count;
+       sig->group_exit_task = tsk;
        while (atomic_read(&sig->count) > count) {
-               sig->group_exit_task = tsk;
-               sig->notify_count = count;
                __set_current_state(TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(lock);
                schedule();
                spin_lock_irq(lock);
        }
-       sig->group_exit_task = NULL;
-       sig->notify_count = 0;
        spin_unlock_irq(lock);
 
        /*
@@ -859,14 +817,17 @@ static int de_thread(struct task_struct *tsk)
         * and to assume its PID:
         */
        if (!thread_group_leader(tsk)) {
-               /*
-                * Wait for the thread group leader to be a zombie.
-                * It should already be zombie at this point, most
-                * of the time.
-                */
                leader = tsk->group_leader;
-               while (leader->exit_state != EXIT_ZOMBIE)
-                       yield();
+
+               sig->notify_count = -1;
+               for (;;) {
+                       write_lock_irq(&tasklist_lock);
+                       if (likely(leader->exit_state))
+                               break;
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tasklist_lock);
+                       schedule();
+               }
 
                /*
                 * The only record we have of the real-time age of a
@@ -880,10 +841,8 @@ static int de_thread(struct task_struct *tsk)
                 */
                tsk->start_time = leader->start_time;
 
-               write_lock_irq(&tasklist_lock);
-
-               BUG_ON(leader->tgid != tsk->tgid);
-               BUG_ON(tsk->pid == tsk->tgid);
+               BUG_ON(!same_thread_group(leader, tsk));
+               BUG_ON(has_group_leader_pid(tsk));
                /*
                 * An exec() starts a new thread group with the
                 * TGID of the previous thread group. Rehash the
@@ -898,7 +857,7 @@ static int de_thread(struct task_struct *tsk)
                 */
                detach_pid(tsk, PIDTYPE_PID);
                tsk->pid = leader->pid;
-               attach_pid(tsk, PIDTYPE_PID,  find_pid(tsk->pid));
+               attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
                transfer_pid(leader, tsk, PIDTYPE_PGID);
                transfer_pid(leader, tsk, PIDTYPE_SID);
                list_replace_rcu(&leader->tasks, &tsk->tasks);
@@ -914,6 +873,8 @@ static int de_thread(struct task_struct *tsk)
                write_unlock_irq(&tasklist_lock);
         }
 
+       sig->group_exit_task = NULL;
+       sig->notify_count = 0;
        /*
         * There may be one thread left which is just exiting,
         * but it's safe to stop telling the group to kill themselves.
@@ -925,29 +886,23 @@ no_thread_group:
        if (leader)
                release_task(leader);
 
-       if (atomic_read(&oldsighand->count) == 1) {
-               /*
-                * Now that we nuked the rest of the thread group,
-                * it turns out we are not sharing sighand any more either.
-                * So we can just keep it.
-                */
-               kmem_cache_free(sighand_cachep, newsighand);
-       } else {
+       if (atomic_read(&oldsighand->count) != 1) {
+               struct sighand_struct *newsighand;
                /*
-                * Move our state over to newsighand and switch it in.
+                * This ->sighand is shared with the CLONE_SIGHAND
+                * but not CLONE_THREAD task, switch to the new one.
                 */
+               newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+               if (!newsighand)
+                       return -ENOMEM;
+
                atomic_set(&newsighand->count, 1);
                memcpy(newsighand->action, oldsighand->action,
                       sizeof(newsighand->action));
 
                write_lock_irq(&tasklist_lock);
                spin_lock(&oldsighand->siglock);
-               spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
-
                rcu_assign_pointer(tsk->sighand, newsighand);
-               recalc_sigpending();
-
-               spin_unlock(&newsighand->siglock);
                spin_unlock(&oldsighand->siglock);
                write_unlock_irq(&tasklist_lock);
 
@@ -957,12 +912,11 @@ no_thread_group:
        BUG_ON(!thread_group_leader(tsk));
        return 0;
 }
-       
+
 /*
  * These functions flushes out all traces of the currently running executable
  * so that a new one can be started
  */
-
 static void flush_old_files(struct files_struct * files)
 {
        long j = -1;
@@ -1284,7 +1238,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
        retval = -ENOENT;
        for (try=0; try<2; try++) {
                read_lock(&binfmt_lock);
-               for (fmt = formats ; fmt ; fmt = fmt->next) {
+               list_for_each_entry(fmt, &formats, lh) {
                        int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
                        if (!fn)
                                continue;
@@ -1479,7 +1433,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
                        case 'p':
                                pid_in_pattern = 1;
                                rc = snprintf(out_ptr, out_end - out_ptr,
-                                             "%d", current->tgid);
+                                             "%d", task_tgid_vnr(current));
                                if (rc > out_end - out_ptr)
                                        goto out;
                                out_ptr += rc;
@@ -1537,6 +1491,14 @@ static int format_corename(char *corename, const char *pattern, long signr)
                                        goto out;
                                out_ptr += rc;
                                break;
+                       /* core limit size */
+                       case 'c':
+                               rc = snprintf(out_ptr, out_end - out_ptr,
+                                             "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur);
+                               if (rc > out_end - out_ptr)
+                                       goto out;
+                               out_ptr += rc;
+                               break;
                        default:
                                break;
                        }
@@ -1551,7 +1513,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
        if (!ispipe && !pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
                rc = snprintf(out_ptr, out_end - out_ptr,
-                             ".%d", current->tgid);
+                             ".%d", task_tgid_vnr(current));
                if (rc > out_end - out_ptr)
                        goto out;
                out_ptr += rc;
@@ -1699,7 +1661,6 @@ void set_dumpable(struct mm_struct *mm, int value)
                break;
        }
 }
-EXPORT_SYMBOL_GPL(set_dumpable);
 
 int get_dumpable(struct mm_struct *mm)
 {
@@ -1720,6 +1681,10 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        int fsuid = current->fsuid;
        int flag = 0;
        int ispipe = 0;
+       unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+       char **helper_argv = NULL;
+       int helper_argc = 0;
+       char *delimit;
 
        audit_core_dumps(signr);
 
@@ -1753,9 +1718,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         */
        clear_thread_flag(TIF_SIGPENDING);
 
-       if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
-               goto fail_unlock;
-
        /*
         * lock_kernel() because format_corename() is controlled by sysctl, which
         * uses lock_kernel()
@@ -1763,9 +1725,39 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        lock_kernel();
        ispipe = format_corename(corename, core_pattern, signr);
        unlock_kernel();
+       /*
+        * Don't bother to check the RLIMIT_CORE value if core_pattern points
+        * to a pipe.  Since we're not writing directly to the filesystem
+        * RLIMIT_CORE doesn't really apply, as no actual core file will be
+        * created unless the pipe reader choses to write out the core file
+        * at which point file size limits and permissions will be imposed
+        * as it does with any other process
+        */
+       if ((!ispipe) && (core_limit < binfmt->min_coredump))
+               goto fail_unlock;
+
        if (ispipe) {
+               helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
+               /* Terminate the string before the first option */
+               delimit = strchr(corename, ' ');
+               if (delimit)
+                       *delimit = '\0';
+               delimit = strrchr(helper_argv[0], '/');
+               if (delimit)
+                       delimit++;
+               else
+                       delimit = helper_argv[0];
+               if (!strcmp(delimit, current->comm)) {
+                       printk(KERN_NOTICE "Recursive core dump detected, "
+                                       "aborting\n");
+                       goto fail_unlock;
+               }
+
+               core_limit = RLIM_INFINITY;
+
                /* SIGPIPE can happen, but it's just never processed */
-               if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+               if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+                               &file)) {
                        printk(KERN_INFO "Core dump to %s pipe failed\n",
                               corename);
                        goto fail_unlock;
@@ -1793,13 +1785,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                goto close_fail;
 
-       retval = binfmt->core_dump(signr, regs, file);
+       retval = binfmt->core_dump(signr, regs, file, core_limit);
 
        if (retval)
                current->signal->group_exit_code |= 0x80;
 close_fail:
        filp_close(file, NULL);
 fail_unlock:
+       if (helper_argv)
+               argv_free(helper_argv);
+
        current->fsuid = fsuid;
        complete_all(&mm->core_done);
 fail: