ARM: smp: Store current pointer in TPIDRURO register if available
authorArd Biesheuvel <ardb@kernel.org>
Sat, 18 Sep 2021 08:44:37 +0000 (10:44 +0200)
committerArd Biesheuvel <ardb@kernel.org>
Mon, 27 Sep 2021 14:54:02 +0000 (16:54 +0200)
Now that the user space TLS register is assigned on every return to user
space, we can use it to keep the 'current' pointer while running in the
kernel. This removes the need to access it via thread_info, which is
located at the base of the stack, but will be moved out of there in a
subsequent patch.

Use the __builtin_thread_pointer() helper when available - this will
help GCC understand that reloading the value within the same function is
not necessary, even when using the per-task stack protector (which also
generates accesses via the TLS register). For example, the generated
code below loads TPIDRURO only once, and uses it to access both the
stack canary and the preempt_count fields.

<do_one_initcall>:
       e92d 41f0       stmdb   sp!, {r4, r5, r6, r7, r8, lr}
       ee1d 4f70       mrc     15, 0, r4, cr13, cr0, {3}
       4606            mov     r6, r0
       b094            sub     sp, #80 ; 0x50
       f8d4 34e8       ldr.w   r3, [r4, #1256] ; 0x4e8  <- stack canary
       9313            str     r3, [sp, #76]   ; 0x4c
       f8d4 8004       ldr.w   r8, [r4, #4]             <- preempt count

Co-developed-by: Keith Packard <keithpac@amazon.com>
Signed-off-by: Keith Packard <keithpac@amazon.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
12 files changed:
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/include/asm/assembler.h
arch/arm/include/asm/current.h [new file with mode: 0644]
arch/arm/include/asm/switch_to.h
arch/arm/include/asm/thread_info.h
arch/arm/kernel/entry-armv.S
arch/arm/kernel/entry-common.S
arch/arm/kernel/head-common.S
arch/arm/kernel/process.c
arch/arm/kernel/smp.c
arch/arm/mm/proc-macros.S

index ff3e64ae959e4496ae57f2d55d4a68dc30a93692..cd195e6f4ea61eda7da2a45b05ae9249e46341de 100644 (file)
@@ -1157,6 +1157,11 @@ config SMP_ON_UP
 
          If you don't know what to do here, say Y.
 
+
+config CURRENT_POINTER_IN_TPIDRURO
+       def_bool y
+       depends on SMP && CPU_32v6K && !CPU_V6
+
 config ARM_CPU_TOPOLOGY
        bool "Support cpu topology definition"
        depends on SMP && CPU_V7
index b46e673a0ebe67c007720e1491beb9bac59f31f9..1c540157e2831afde542c85005f1f503963dd437 100644 (file)
@@ -113,6 +113,10 @@ ifeq ($(CONFIG_CC_IS_CLANG),y)
 CFLAGS_ABI     += -meabi gnu
 endif
 
+ifeq ($(CONFIG_CURRENT_POINTER_IN_TPIDRURO),y)
+CFLAGS_ABI     += -mtp=cp15
+endif
+
 # Accept old syntax despite ".syntax unified"
 AFLAGS_NOWARN  :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
 
index e2b1fd558bf3c4d1d6ccc55388e7f3e7b3b090dc..c1551dee28be5c9745862ac987c2d1e228a7eaec 100644 (file)
        .endm
        .endr
 
+       .macro  get_current, rd
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+       mrc     p15, 0, \rd, c13, c0, 3         @ get TPIDRURO register
+#else
+       get_thread_info \rd
+       ldr     \rd, [\rd, #TI_TASK]
+#endif
+       .endm
+
+       .macro  set_current, rn
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+       mcr     p15, 0, \rn, c13, c0, 3         @ set TPIDRURO register
+#endif
+       .endm
+
+       .macro  reload_current, t1:req, t2:req
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+       adr_l   \t1, __entry_task               @ get __entry_task base address
+       mrc     p15, 0, \t2, c13, c0, 4         @ get per-CPU offset
+       ldr     \t1, [\t1, \t2]                 @ load variable
+       mcr     p15, 0, \t1, c13, c0, 3         @ store in TPIDRURO
+#endif
+       .endm
+
 /*
  * Get current thread_info.
  */
diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
new file mode 100644 (file)
index 0000000..1d472fa
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 Keith Packard <keithp@keithp.com>
+ * Copyright (c) 2021 Google, LLC <ardb@kernel.org>
+ */
+
+#ifndef _ASM_ARM_CURRENT_H
+#define _ASM_ARM_CURRENT_H
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+static inline void set_current(struct task_struct *cur)
+{
+       if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))
+               return;
+
+       /* Set TPIDRURO */
+       asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory");
+}
+
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+
+static inline struct task_struct *get_current(void)
+{
+       struct task_struct *cur;
+
+#if __has_builtin(__builtin_thread_pointer)
+       /*
+        * Use the __builtin helper when available - this results in better
+        * code, especially when using GCC in combination with the per-task
+        * stack protector, as the compiler will recognize that it needs to
+        * load the TLS register only once in every function.
+        */
+       cur = __builtin_thread_pointer();
+#else
+       asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(cur));
+#endif
+       return cur;
+}
+
+#define current get_current()
+#else
+#include <asm-generic/current.h>
+#endif /* CONFIG_CURRENT_POINTER_IN_TPIDRURO */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_ARM_CURRENT_H */
index 007d8fea715721d2435d5aff668f5a3b62b10054..61e4a3c4ca6ecedb1cf5e697561a8aa665b1761b 100644 (file)
@@ -26,6 +26,8 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
 #define switch_to(prev,next,last)                                      \
 do {                                                                   \
        __complete_pending_tlbi();                                      \
+       if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))             \
+               __this_cpu_write(__entry_task, next);                   \
        last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));        \
 } while (0)
 
index f0cacc733231fcdefa3f32b7b9e8fb68387c2f8a..76b6fbd5540c23f3454bb930204af56c0199b1d6 100644 (file)
@@ -29,6 +29,8 @@
 
 struct task_struct;
 
+DECLARE_PER_CPU(struct task_struct *, __entry_task);
+
 #include <asm/types.h>
 
 struct cpu_context_save {
index 241b73d64df739966fd456849cf3a2fee32070c0..7263a45abf3d5662557abb7a962b82893578b329 100644 (file)
@@ -384,6 +384,8 @@ ENDPROC(__fiq_abt)
  ATRAP(        teq     r8, r7)
  ATRAP( mcrne  p15, 0, r8, c1, c0, 0)
 
+       reload_current r7, r8
+
        @
        @ Clear FP to mark the first stack frame
        @
@@ -762,6 +764,8 @@ ENTRY(__switch_to)
        add     r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK
        .endif
        ldr     r7, [r7, #TSK_STACK_CANARY & IMM12_MASK]
+#elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO)
+       ldr     r7, [r2, #TI_TASK]
 #endif
 #ifdef CONFIG_CPU_USE_DOMAINS
        mcr     p15, 0, r6, c3, c0, 0           @ Set domain register
@@ -776,6 +780,7 @@ ENTRY(__switch_to)
 #endif
  THUMB(        mov     ip, r4                     )
        mov     r0, r5
+       set_current r7
  ARM(  ldmia   r4, {r4 - sl, fp, sp, pc}  )    @ Load all regs saved previously
  THUMB(        ldmia   ip!, {r4 - sl, fp}         )    @ Load all regs saved previously
  THUMB(        ldr     sp, [ip], #4               )
index d9c99db50243f7252de2a9542e804b92487a30c9..ac86c34682bb505094443c043ae92ba2b71b55b4 100644 (file)
@@ -170,6 +170,7 @@ ENTRY(vector_swi)
        str     saved_psr, [sp, #S_PSR]         @ Save CPSR
        str     r0, [sp, #S_OLD_R0]             @ Save OLD_R0
 #endif
+       reload_current r10, ip
        zero_fp
        alignment_trap r10, ip, __cr_alignment
        asm_trace_hardirqs_on save=0
index 29b2eda136bba8cd1139d9ea878eeef3a53ab403..da18e0a17dc20cdb518fc6dd98ecfafc903b7d58 100644 (file)
@@ -105,6 +105,11 @@ __mmap_switched:
        mov     r1, #0
        bl      __memset                        @ clear .bss
 
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+       adr_l   r0, init_task                   @ get swapper task_struct
+       set_current r0
+#endif
+
        ldmia   r4, {r0, r1, r2, r3}
        str     r9, [r0]                        @ Save processor ID
        str     r7, [r1]                        @ Save machine type
index cd73c216b2729d135cefad0f620875756d9d400b..30428d756515d57157d26d7a08b507a2bdeb2597 100644 (file)
 
 #include "signal.h"
 
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+#endif
+
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
index 8979d548ec178dcf960a711c38cdd7afe6fc4ac3..97ee6b1567e9f5bdbc132880a1a279e5d698b5c1 100644 (file)
@@ -409,6 +409,8 @@ asmlinkage void secondary_start_kernel(struct task_struct *task)
        struct mm_struct *mm = &init_mm;
        unsigned int cpu;
 
+       set_current(task);
+
        secondary_biglittle_init();
 
        /*
index e2c743aa2eb2b88042064657e2307ab0134d31d8..d48ba99d739cab4bc0b0f01110ecd856544050c6 100644 (file)
@@ -30,8 +30,7 @@
  * act_mm - get current->active_mm
  */
        .macro  act_mm, rd
-       get_thread_info \rd
-       ldr     \rd, [\rd, #TI_TASK]
+       get_current \rd
        .if (TSK_ACTIVE_MM > IMM12_MASK)
        add     \rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK
        .endif