Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Sep 2019 02:06:29 +0000 (19:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Sep 2019 02:06:29 +0000 (19:06 -0700)
Pull x86 entry updates from Ingo Molnar:
 "This contains x32 and compat syscall improvements, the biggest one of
  which splits x32 syscalls into their own table, which allows new
  syscalls to share the x32 and x86-64 number - which turns the
  512-547 special syscall numbers range into a legacy wart that won't be
  extended going forward"

* 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/syscalls: Split the x32 syscalls into their own table
  x86/syscalls: Disallow compat entries for all types of 64-bit syscalls
  x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask()
  x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long

arch/x86/entry/common.c
arch/x86/entry/syscall_64.c
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscalltbl.sh
arch/x86/include/asm/syscall.h
arch/x86/include/asm/unistd.h
arch/x86/include/uapi/asm/unistd.h
arch/x86/kernel/asm-offsets_64.c
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/syscall_numbering.c [new file with mode: 0644]

index 536b574b61613403618c3db62d72c6adac91af98..3f8e22615812bf4d82c3dd6a0e2c878ecf61f3ec 100644 (file)
@@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
        if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
                nr = syscall_trace_enter(regs);
 
-       /*
-        * NB: Native and x32 syscalls are dispatched from the same
-        * table.  The only functional difference is the x32 bit in
-        * regs->orig_ax, which changes the behavior of some syscalls.
-        */
-       nr &= __SYSCALL_MASK;
        if (likely(nr < NR_syscalls)) {
                nr = array_index_nospec(nr, NR_syscalls);
                regs->ax = sys_call_table[nr](regs);
+#ifdef CONFIG_X86_X32_ABI
+       } else if (likely((nr & __X32_SYSCALL_BIT) &&
+                         (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
+               nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
+                                       X32_NR_syscalls);
+               regs->ax = x32_sys_call_table[nr](regs);
+#endif
        }
 
        syscall_return_slowpath(regs);
index d5252bc1e38058ebc2d071d9bd3b3bd3cc1a976a..b1bf31713374a94b26d595f73d05d8d82fd2e8be 100644 (file)
 /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
 extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
 #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
+#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
+#undef __SYSCALL_X32
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_X32(nr, sym, qual)
 
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
        /*
@@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
        [0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
+
+asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
+       /*
+        * Smells like a compiler bug -- it doesn't work
+        * when the & below is removed.
+        */
+       [0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#endif
index c00019abd0769d60d7a142db83b611d076e9ab0b..3fe02546aed35ba16a6142bfd26ad4869c50e65f 100644 (file)
 172    i386    prctl                   sys_prctl                       __ia32_sys_prctl
 173    i386    rt_sigreturn            sys_rt_sigreturn                sys32_rt_sigreturn
 174    i386    rt_sigaction            sys_rt_sigaction                __ia32_compat_sys_rt_sigaction
-175    i386    rt_sigprocmask          sys_rt_sigprocmask              __ia32_sys_rt_sigprocmask
+175    i386    rt_sigprocmask          sys_rt_sigprocmask              __ia32_compat_sys_rt_sigprocmask
 176    i386    rt_sigpending           sys_rt_sigpending               __ia32_compat_sys_rt_sigpending
 177    i386    rt_sigtimedwait         sys_rt_sigtimedwait_time32      __ia32_compat_sys_rt_sigtimedwait_time32
 178    i386    rt_sigqueueinfo         sys_rt_sigqueueinfo             __ia32_compat_sys_rt_sigqueueinfo
-179    i386    rt_sigsuspend           sys_rt_sigsuspend               __ia32_sys_rt_sigsuspend
+179    i386    rt_sigsuspend           sys_rt_sigsuspend               __ia32_compat_sys_rt_sigsuspend
 180    i386    pread64                 sys_pread64                     __ia32_compat_sys_x86_pread
 181    i386    pwrite64                sys_pwrite64                    __ia32_compat_sys_x86_pwrite
 182    i386    chown                   sys_chown16                     __ia32_sys_chown16
index 94fcd1951acaae51a3391d061e429f866469baae..1af2be39e7d9e018ce00166f9b090a2f4de923bc 100644 (file)
@@ -1,13 +1,13 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 in="$1"
 out="$2"
 
 syscall_macro() {
-    abi="$1"
-    nr="$2"
-    entry="$3"
+    local abi="$1"
+    local nr="$2"
+    local entry="$3"
 
     # Entry can be either just a function name or "function/qualifier"
     real_entry="${entry%%/*}"
@@ -21,14 +21,14 @@ syscall_macro() {
 }
 
 emit() {
-    abi="$1"
-    nr="$2"
-    entry="$3"
-    compat="$4"
-    umlentry=""
+    local abi="$1"
+    local nr="$2"
+    local entry="$3"
+    local compat="$4"
+    local umlentry=""
 
-    if [ "$abi" = "64" -a -n "$compat" ]; then
-       echo "a compat entry for a 64-bit syscall makes no sense" >&2
+    if [ "$abi" != "I386" -a -n "$compat" ]; then
+       echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
        exit 1
     fi
 
@@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
     while read nr abi name entry compat; do
        abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
        if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
-           # COMMON is the same as 64, except that we don't expect X32
-           # programs to use it.  Our expectation has nothing to do with
-           # any generated code, so treat them the same.
            emit 64 "$nr" "$entry" "$compat"
+           if [ "$abi" = "COMMON" ]; then
+               # COMMON means that this syscall exists in the same form for
+               # 64-bit and X32.
+               echo "#ifdef CONFIG_X86_X32_ABI"
+               emit X32 "$nr" "$entry" "$compat"
+               echo "#endif"
+           fi
        elif [ "$abi" = "X32" ]; then
-           # X32 is equivalent to 64 on an X32-compatible kernel.
            echo "#ifdef CONFIG_X86_X32_ABI"
-           emit 64 "$nr" "$entry" "$compat"
+           emit X32 "$nr" "$entry" "$compat"
            echo "#endif"
        elif [ "$abi" = "I386" ]; then
            emit "$abi" "$nr" "$entry" "$compat"
index 2dc4a021beea4e163bf56ae367404cfda79304bc..8db3fdb6102ecb373f085de3a6033c645fbe644c 100644 (file)
@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
 extern const sys_call_ptr_t ia32_sys_call_table[];
 #endif
 
+#ifdef CONFIG_X86_X32_ABI
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
  * This importantly ignores the high bits on 64-bit, so comparisons
index 097589753feceb68cb9be9e1f5255ff4e060117b..a7dd080749ce4e6aa527292dd1e61c352536da63 100644 (file)
@@ -5,12 +5,6 @@
 #include <uapi/asm/unistd.h>
 
 
-# ifdef CONFIG_X86_X32_ABI
-#  define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-#  define __SYSCALL_MASK (~0)
-# endif
-
 # ifdef CONFIG_X86_32
 
 #  include <asm/unistd_32.h>
index 30d7d04d72d6bad6dfb419f7606e8d5c8fd9b622..196fdd02b8b1b3d71ac994715f7dee8fa8b5fa52 100644 (file)
@@ -3,7 +3,7 @@
 #define _UAPI_ASM_X86_UNISTD_H
 
 /* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT      0x40000000
+#define __X32_SYSCALL_BIT      0x40000000UL
 
 #ifndef __KERNEL__
 # ifdef __i386__
index d3d075226c0aa39761e9f4a33ae05cec321d36d7..70e97727a26ab83a801c69daf2391527d3d4f51c 100644 (file)
@@ -6,13 +6,28 @@
 #include <asm/ia32.h>
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_X32(nr, sym, qual)
 static char syscalls_64[] = {
 #include <asm/syscalls_64.h>
 };
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
+static char syscalls_x32[] = {
+#include <asm/syscalls_64.h>
+};
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+#endif
+
 #define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls_ia32[] = {
 #include <asm/syscalls_32.h>
 };
+#undef __SYSCALL_I386
 
 #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 #include <asm/kvm_para.h>
@@ -80,6 +95,11 @@ int main(void)
        DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
        DEFINE(NR_syscalls, sizeof(syscalls_64));
 
+#ifdef CONFIG_X86_X32_ABI
+       DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
+       DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
+#endif
+
        DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
        DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
 
index 3bc5b744e64403693579088fa7cf529145f3ed95..5d49bfec1e9aecb98aa01fd016753f9d1cceb8da 100644 (file)
@@ -17,7 +17,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
new file mode 100644 (file)
index 0000000..d6b09cb
--- /dev/null
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2018 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <syscall.h>
+
+static int nerrs;
+
+#define X32_BIT 0x40000000UL
+
+static void check_enosys(unsigned long nr, bool *ok)
+{
+       /* If this fails, a segfault is reasonably likely. */
+       fflush(stdout);
+
+       long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
+       if (ret == 0) {
+               printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
+               *ok = false;
+       } else if (errno != ENOSYS) {
+               printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
+               *ok = false;
+       }
+}
+
+static void test_x32_without_x32_bit(void)
+{
+       bool ok = true;
+
+       /*
+        * Syscalls 512-547 are "x32" syscalls.  They are intended to be
+        * called with the x32 (0x40000000) bit set.  Calling them without
+        * the x32 bit set is nonsense and should not work.
+        */
+       printf("[RUN]\tChecking syscalls 512-547\n");
+       for (int i = 512; i <= 547; i++)
+               check_enosys(i, &ok);
+
+       /*
+        * Check that a handful of 64-bit-only syscalls are rejected if the x32
+        * bit is set.
+        */
+       printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
+       check_enosys(16 | X32_BIT, &ok);        /* ioctl */
+       check_enosys(19 | X32_BIT, &ok);        /* readv */
+       check_enosys(20 | X32_BIT, &ok);        /* writev */
+
+       /*
+        * Check some syscalls with high bits set.
+        */
+       printf("[RUN]\tChecking numbers above 2^32-1\n");
+       check_enosys((1UL << 32), &ok);
+       check_enosys(X32_BIT | (1UL << 32), &ok);
+
+       if (!ok)
+               nerrs++;
+       else
+               printf("[OK]\tThey all returned -ENOSYS\n");
+}
+
+int main()
+{
+       /*
+        * Anyone diagnosing a failure will want to know whether the kernel
+        * supports x32.  Tell them.
+        */
+       printf("\tChecking for x32...");
+       fflush(stdout);
+       if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
+               printf(" supported\n");
+       } else if (errno == ENOSYS) {
+               printf(" not supported\n");
+       } else {
+               printf(" confused\n");
+       }
+
+       test_x32_without_x32_bit();
+
+       return nerrs ? 1 : 0;
+}