powerpc/64: Add support to build with prefixed instructions
authorNicholas Piggin <npiggin@gmail.com>
Sat, 8 Apr 2023 02:17:49 +0000 (12:17 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 20 Apr 2023 02:54:22 +0000 (12:54 +1000)
Add an option to build kernel and module with prefixed instructions if
the CPU and toolchain support it.

This is not related to kernel support for userspace execution of
prefixed instructions.

Building with prefixed instructions breaks some extended inline asm
memory addressing, for example it will provide immediates that exceed
the range of simple load/store displacement. Whether this is a
toolchain or a kernel asm problem remains to be seen. For now, these
are replaced with simpler and less efficient direct register addressing
when compiling with prefixed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20230408021752.862660-4-npiggin@gmail.com
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/include/asm/atomic.h
arch/powerpc/include/asm/io.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/kernel/trace/ftrace.c
arch/powerpc/platforms/Kconfig.cputype

index 3fb2c276613953ff09948499976decd6df4df2fe..109c00bd91db207bd448ff55fdb4aa523ac9e93c 100644 (file)
@@ -4,6 +4,9 @@ source "arch/powerpc/platforms/Kconfig.cputype"
 config CC_HAS_ELFV2
        def_bool PPC64 && $(cc-option, -mabi=elfv2)
 
+config CC_HAS_PREFIXED
+       def_bool PPC64 && $(cc-option, -mcpu=power10 -mprefixed)
+
 config 32BIT
        bool
        default y if PPC32
index 4343cca57cb3b7b358554c3305f770abae3af7b6..9fb770d3b40900f9434cbc8257809048f910a561 100644 (file)
@@ -180,7 +180,11 @@ ifdef CONFIG_476FPE_ERR46
 endif
 
 # No prefix or pcrel
+ifdef CONFIG_PPC_KERNEL_PREFIXED
+KBUILD_CFLAGS += $(call cc-option,-mprefixed)
+else
 KBUILD_CFLAGS += $(call cc-option,-mno-prefixed)
+endif
 KBUILD_CFLAGS += $(call cc-option,-mno-pcrel)
 
 # No AltiVec or VSX or MMA instructions when building kernel
index b3a53830446b7c9d48f00888ac7033d75f945b82..47228b17747811160b7c5dcd3cf5e98a31cf8aba 100644 (file)
@@ -27,14 +27,22 @@ static __inline__ int arch_atomic_read(const atomic_t *v)
 {
        int t;
 
-       __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
+       /* -mprefixed can generate offsets beyond range, fall back hack */
+       if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+               __asm__ __volatile__("lwz %0,0(%1)" : "=r"(t) : "b"(&v->counter));
+       else
+               __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
 
        return t;
 }
 
 static __inline__ void arch_atomic_set(atomic_t *v, int i)
 {
-       __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
+       /* -mprefixed can generate offsets beyond range, fall back hack */
+       if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+               __asm__ __volatile__("stw %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter));
+       else
+               __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
 }
 
 #define ATOMIC_OP(op, asm_op, suffix, sign, ...)                       \
@@ -197,14 +205,22 @@ static __inline__ s64 arch_atomic64_read(const atomic64_t *v)
 {
        s64 t;
 
-       __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
+       /* -mprefixed can generate offsets beyond range, fall back hack */
+       if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+               __asm__ __volatile__("ld %0,0(%1)" : "=r"(t) : "b"(&v->counter));
+       else
+               __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
 
        return t;
 }
 
 static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
 {
-       __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
+       /* -mprefixed can generate offsets beyond range, fall back hack */
+       if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+               __asm__ __volatile__("std %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter));
+       else
+               __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
 }
 
 #define ATOMIC64_OP(op, asm_op)                                                \
index fc112a91d0c2f4bf85503dffb38dd1b8dd19967a..f1e657c9bbe8e859a372916e7cb198d4059b4bb3 100644 (file)
@@ -97,6 +97,42 @@ extern bool isa_io_special;
  *
  */
 
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define DEF_MMIO_IN_X(name, size, insn)                                \
+static inline u##size name(const volatile u##size __iomem *addr)       \
+{                                                                      \
+       u##size ret;                                                    \
+       __asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync"    \
+               : "=r" (ret) : "r" (addr) : "memory");                  \
+       return ret;                                                     \
+}
+
+#define DEF_MMIO_OUT_X(name, size, insn)                               \
+static inline void name(volatile u##size __iomem *addr, u##size val)   \
+{                                                                      \
+       __asm__ __volatile__("sync;"#insn" %1,0,%0"                     \
+               : : "r" (addr), "r" (val) : "memory");                  \
+       mmiowb_set_pending();                                           \
+}
+
+#define DEF_MMIO_IN_D(name, size, insn)                                \
+static inline u##size name(const volatile u##size __iomem *addr)       \
+{                                                                      \
+       u##size ret;                                                    \
+       __asm__ __volatile__("sync;"#insn" %0,0(%1);twi 0,%0,0;isync"\
+               : "=r" (ret) : "b" (addr) : "memory");  \
+       return ret;                                                     \
+}
+
+#define DEF_MMIO_OUT_D(name, size, insn)                               \
+static inline void name(volatile u##size __iomem *addr, u##size val)   \
+{                                                                      \
+       __asm__ __volatile__("sync;"#insn" %1,0(%0)"                    \
+               : : "b" (addr), "r" (val) : "memory");  \
+       mmiowb_set_pending();                                           \
+}
+#else
 #define DEF_MMIO_IN_X(name, size, insn)                                \
 static inline u##size name(const volatile u##size __iomem *addr)       \
 {                                                                      \
@@ -130,6 +166,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)        \
                : "=m<>" (*addr) : "r" (val) : "memory");       \
        mmiowb_set_pending();                                           \
 }
+#endif
 
 DEF_MMIO_IN_D(in_8,     8, lbz);
 DEF_MMIO_OUT_D(out_8,   8, stb);
index 52378e641d382d744ac3ca7b70d452e8b9be6971..a2d255aa96276c08c1365f722dbc1c1684a7ff6b 100644 (file)
@@ -71,14 +71,26 @@ __pu_failed:                                                        \
  * because we do not write to any memory gcc knows about, so there
  * are no aliasing issues.
  */
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __put_user_asm_goto(x, addr, label, op)                        \
+       asm_volatile_goto(                                      \
+               "1:     " op " %0,0(%1) # put_user\n"           \
+               EX_TABLE(1b, %l2)                               \
+               :                                               \
+               : "r" (x), "b" (addr)                           \
+               :                                               \
+               : label)
+#else
 #define __put_user_asm_goto(x, addr, label, op)                        \
        asm_volatile_goto(                                      \
                "1:     " op "%U1%X1 %0,%1      # put_user\n"   \
                EX_TABLE(1b, %l2)                               \
                :                                               \
-               : "r" (x), "m<>" (*addr)                \
+               : "r" (x), "m<>" (*addr)                        \
                :                                               \
                : label)
+#endif
 
 #ifdef __powerpc64__
 #define __put_user_asm2_goto(x, ptr, label)                    \
@@ -131,14 +143,26 @@ do {                                                              \
 
 #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
 
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __get_user_asm_goto(x, addr, label, op)                        \
+       asm_volatile_goto(                                      \
+               "1:     "op" %0,0(%1)   # get_user\n"           \
+               EX_TABLE(1b, %l2)                               \
+               : "=r" (x)                                      \
+               : "b" (addr)                                    \
+               :                                               \
+               : label)
+#else
 #define __get_user_asm_goto(x, addr, label, op)                        \
        asm_volatile_goto(                                      \
                "1:     "op"%U1%X1 %0, %1       # get_user\n"   \
                EX_TABLE(1b, %l2)                               \
                : "=r" (x)                                      \
-               : "m<>" (*addr)                         \
+               : "m<>" (*addr)                                 \
                :                                               \
                : label)
+#endif
 
 #ifdef __powerpc64__
 #define __get_user_asm2_goto(x, addr, label)                   \
index 7b85c3b460a3c048ec31cce44e9b21066b96c5a8..72864fb7a6ccdca7ada5d904a38d76c1dabbebb7 100644 (file)
@@ -194,6 +194,8 @@ __ftrace_make_nop(struct module *mod,
         * get corrupted.
         *
         * Use a b +8 to jump over the load.
+        * XXX: could make PCREL depend on MPROFILE_KERNEL
+        * XXX: check PCREL && MPROFILE_KERNEL calling sequence
         */
        if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
                pop = ppc_inst(PPC_RAW_NOP());
index 046b571496b133c571997dca175a145beecd1b24..1ff0d2818da607697aae18e59ac5ca6131fc0833 100644 (file)
@@ -180,6 +180,7 @@ config POWER10_CPU
        bool "POWER10"
        depends on PPC_BOOK3S_64
        select ARCH_HAS_FAST_MULTIPLIER
+       select PPC_HAVE_PREFIXED_SUPPORT
 
 config E5500_CPU
        bool "Freescale e5500"
@@ -454,6 +455,22 @@ config PPC_RADIX_MMU_DEFAULT
 
          If you're unsure, say Y.
 
+config PPC_KERNEL_PREFIXED
+       depends on PPC_HAVE_PREFIXED_SUPPORT
+       depends on CC_HAS_PREFIXED
+       default n
+       bool "Build Kernel with Prefixed Instructions"
+       help
+         POWER10 and later CPUs support prefixed instructions, 8 byte
+         instructions that include large immediate, pc relative addressing,
+         and various floating point, vector, MMA.
+
+         This option builds the kernel with prefixed instructions, and
+         allows a pc relative addressing option to be selected.
+
+         Kernel support for prefixed instructions in applications and guests
+         is not affected by this option.
+
 config PPC_KUEP
        bool "Kernel Userspace Execution Prevention" if !40x
        default y if !40x
@@ -490,6 +507,9 @@ config PPC_MMU_NOHASH
 config PPC_HAVE_PMU_SUPPORT
        bool
 
+config PPC_HAVE_PREFIXED_SUPPORT
+       bool
+
 config PMU_SYSFS
        bool "Create PMU SPRs sysfs file"
        default n