Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / kernel / seccomp.c
index 84922befea8414468eafe1330ffc372ec9bdb8da..4ef9687ac115f21aae6ce4321e2639866892389d 100644 (file)
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
-/* #define SECCOMP_DEBUG 1 */
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+#include <asm/syscall.h>
+#endif
 
 #ifdef CONFIG_SECCOMP_FILTER
-#include <asm/syscall.h>
 #include <linux/filter.h>
 #include <linux/pid.h>
 #include <linux/ptrace.h>
@@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(int syscall)
+static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
        struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
-       struct seccomp_data sd;
+       struct seccomp_data sd_local;
        u32 ret = SECCOMP_RET_ALLOW;
 
        /* Ensure unexpected behavior doesn't result in failing open. */
@@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall)
        /* Make sure cross-thread synced filter points somewhere sane. */
        smp_read_barrier_depends();
 
-       populate_seccomp_data(&sd);
+       if (!sd) {
+               populate_seccomp_data(&sd_local);
+               sd = &sd_local;
+       }
 
        /*
         * All filters in the list are evaluated and the lowest BPF return
         * value always takes priority (ignoring the DATA).
         */
        for (; f; f = f->prev) {
-               u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
+               u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
 
                if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
                        ret = cur_ret;
@@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = {
 };
 #endif
 
-int __secure_computing(int this_syscall)
+static void __secure_computing_strict(int this_syscall)
+{
+       int *syscall_whitelist = mode1_syscalls;
+#ifdef CONFIG_COMPAT
+       if (is_compat_task())
+               syscall_whitelist = mode1_syscalls_32;
+#endif
+       do {
+               if (*syscall_whitelist == this_syscall)
+                       return;
+       } while (*++syscall_whitelist);
+
+#ifdef SECCOMP_DEBUG
+       dump_stack();
+#endif
+       audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+       do_exit(SIGKILL);
+}
+
+#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+void secure_computing_strict(int this_syscall)
+{
+       int mode = current->seccomp.mode;
+
+       if (mode == 0)
+               return;
+       else if (mode == SECCOMP_MODE_STRICT)
+               __secure_computing_strict(this_syscall);
+       else
+               BUG();
+}
+#else
+int __secure_computing(void)
 {
-       int exit_sig = 0;
-       int *syscall;
-       u32 ret;
+       u32 phase1_result = seccomp_phase1(NULL);
+
+       if (likely(phase1_result == SECCOMP_PHASE1_OK))
+               return 0;
+       else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
+               return -1;
+       else
+               return seccomp_phase2(phase1_result);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
+{
+       u32 filter_ret, action;
+       int data;
 
        /*
         * Make sure that any changes to mode from another thread have
@@ -575,85 +623,127 @@ int __secure_computing(int this_syscall)
         */
        rmb();
 
-       switch (current->seccomp.mode) {
-       case SECCOMP_MODE_STRICT:
-               syscall = mode1_syscalls;
-#ifdef CONFIG_COMPAT
-               if (is_compat_task())
-                       syscall = mode1_syscalls_32;
+       filter_ret = seccomp_run_filters(sd);
+       data = filter_ret & SECCOMP_RET_DATA;
+       action = filter_ret & SECCOMP_RET_ACTION;
+
+       switch (action) {
+       case SECCOMP_RET_ERRNO:
+               /* Set the low-order 16-bits as a errno. */
+               syscall_set_return_value(current, task_pt_regs(current),
+                                        -data, 0);
+               goto skip;
+
+       case SECCOMP_RET_TRAP:
+               /* Show the handler the original registers. */
+               syscall_rollback(current, task_pt_regs(current));
+               /* Let the filter pass back 16 bits of data. */
+               seccomp_send_sigsys(this_syscall, data);
+               goto skip;
+
+       case SECCOMP_RET_TRACE:
+               return filter_ret;  /* Save the rest for phase 2. */
+
+       case SECCOMP_RET_ALLOW:
+               return SECCOMP_PHASE1_OK;
+
+       case SECCOMP_RET_KILL:
+       default:
+               audit_seccomp(this_syscall, SIGSYS, action);
+               do_exit(SIGSYS);
+       }
+
+       unreachable();
+
+skip:
+       audit_seccomp(this_syscall, 0, action);
+       return SECCOMP_PHASE1_SKIP;
+}
 #endif
-               do {
-                       if (*syscall == this_syscall)
-                               return 0;
-               } while (*++syscall);
-               exit_sig = SIGKILL;
-               ret = SECCOMP_RET_KILL;
-               break;
+
+/**
+ * seccomp_phase1() - run fast path seccomp checks on the current syscall
+ * @arg sd: The seccomp_data or NULL
+ *
+ * This only reads pt_regs via the syscall_xyz helpers.  The only change
+ * it will make to pt_regs is via syscall_set_return_value, and it will
+ * only do that if it returns SECCOMP_PHASE1_SKIP.
+ *
+ * If sd is provided, it will not read pt_regs at all.
+ *
+ * It may also call do_exit or force a signal; these actions must be
+ * safe.
+ *
+ * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
+ * be processed normally.
+ *
+ * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
+ * invoked.  In this case, seccomp_phase1 will have set the return value
+ * using syscall_set_return_value.
+ *
+ * If it returns anything else, then the return value should be passed
+ * to seccomp_phase2 from a context in which ptrace hooks are safe.
+ */
+u32 seccomp_phase1(struct seccomp_data *sd)
+{
+       int mode = current->seccomp.mode;
+       int this_syscall = sd ? sd->nr :
+               syscall_get_nr(current, task_pt_regs(current));
+
+       switch (mode) {
+       case SECCOMP_MODE_STRICT:
+               __secure_computing_strict(this_syscall);  /* may call do_exit */
+               return SECCOMP_PHASE1_OK;
 #ifdef CONFIG_SECCOMP_FILTER
-       case SECCOMP_MODE_FILTER: {
-               int data;
-               struct pt_regs *regs = task_pt_regs(current);
-               ret = seccomp_run_filters(this_syscall);
-               data = ret & SECCOMP_RET_DATA;
-               ret &= SECCOMP_RET_ACTION;
-               switch (ret) {
-               case SECCOMP_RET_ERRNO:
-                       /* Set the low-order 16-bits as a errno. */
-                       syscall_set_return_value(current, regs,
-                                                -data, 0);
-                       goto skip;
-               case SECCOMP_RET_TRAP:
-                       /* Show the handler the original registers. */
-                       syscall_rollback(current, regs);
-                       /* Let the filter pass back 16 bits of data. */
-                       seccomp_send_sigsys(this_syscall, data);
-                       goto skip;
-               case SECCOMP_RET_TRACE:
-                       /* Skip these calls if there is no tracer. */
-                       if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
-                               syscall_set_return_value(current, regs,
-                                                        -ENOSYS, 0);
-                               goto skip;
-                       }
-                       /* Allow the BPF to provide the event message */
-                       ptrace_event(PTRACE_EVENT_SECCOMP, data);
-                       /*
-                        * The delivery of a fatal signal during event
-                        * notification may silently skip tracer notification.
-                        * Terminating the task now avoids executing a system
-                        * call that may not be intended.
-                        */
-                       if (fatal_signal_pending(current))
-                               break;
-                       if (syscall_get_nr(current, regs) < 0)
-                               goto skip;  /* Explicit request to skip. */
-
-                       return 0;
-               case SECCOMP_RET_ALLOW:
-                       return 0;
-               case SECCOMP_RET_KILL:
-               default:
-                       break;
-               }
-               exit_sig = SIGSYS;
-               break;
-       }
+       case SECCOMP_MODE_FILTER:
+               return __seccomp_phase1_filter(this_syscall, sd);
 #endif
        default:
                BUG();
        }
+}
 
-#ifdef SECCOMP_DEBUG
-       dump_stack();
-#endif
-       audit_seccomp(this_syscall, exit_sig, ret);
-       do_exit(exit_sig);
-#ifdef CONFIG_SECCOMP_FILTER
-skip:
-       audit_seccomp(this_syscall, exit_sig, ret);
-#endif
-       return -1;
+/**
+ * seccomp_phase2() - finish slow path seccomp work for the current syscall
+ * @phase1_result: The return value from seccomp_phase1()
+ *
+ * This must be called from a context in which ptrace hooks can be used.
+ *
+ * Returns 0 if the syscall should be processed or -1 to skip the syscall.
+ */
+int seccomp_phase2(u32 phase1_result)
+{
+       struct pt_regs *regs = task_pt_regs(current);
+       u32 action = phase1_result & SECCOMP_RET_ACTION;
+       int data = phase1_result & SECCOMP_RET_DATA;
+
+       BUG_ON(action != SECCOMP_RET_TRACE);
+
+       audit_seccomp(syscall_get_nr(current, regs), 0, action);
+
+       /* Skip these calls if there is no tracer. */
+       if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
+               syscall_set_return_value(current, regs,
+                                        -ENOSYS, 0);
+               return -1;
+       }
+
+       /* Allow the BPF to provide the event message */
+       ptrace_event(PTRACE_EVENT_SECCOMP, data);
+       /*
+        * The delivery of a fatal signal during event
+        * notification may silently skip tracer notification.
+        * Terminating the task now avoids executing a system
+        * call that may not be intended.
+        */
+       if (fatal_signal_pending(current))
+               do_exit(SIGSYS);
+       if (syscall_get_nr(current, regs) < 0)
+               return -1;  /* Explicit request to skip. */
+
+       return 0;
 }
+#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
 
 long prctl_get_seccomp(void)
 {