riscv: Allow to downgrade paging mode from the command line
authorAlexandre Ghiti <alexghiti@rivosinc.com>
Mon, 24 Apr 2023 09:23:13 +0000 (11:23 +0200)
committerPalmer Dabbelt <palmer@rivosinc.com>
Wed, 26 Apr 2023 14:30:52 +0000 (07:30 -0700)
Add 2 early command line parameters that allow to downgrade satp mode
(using the same naming as x86):
- "no5lvl": use a 4-level page table (down from sv57 to sv48)
- "no4lvl": use a 3-level page table (down from sv57/sv48 to sv39)

Note that going through the device tree to get the kernel command line
works with ACPI too since the efi stub creates a device tree anyway with
the command line.

In KASAN kernels, we can't use the libfdt that early in the boot process
since we are not ready to execute instrumented functions. So instead of
using the "generic" libfdt, we compile our own versions of those functions
that are not instrumented and that are prefixed so that they do not
conflict with the generic ones. We also need the non-instrumented versions
of the string functions and the prefixed versions of memcpy/memmove.

This is largely inspired by commit aacd149b6238 ("arm64: head: avoid
relocating the kernel twice for KASLR") from which I removed compilation
flags that were not relevant to RISC-V at the moment (LTO, SCS). Also
note that we have to link with -z norelro to avoid ld.lld to throw a
warning with the new .got sections, like in commit 311bea3cb9ee ("arm64:
link with -z norelro for LLD or aarch64-elf").

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://lore.kernel.org/r/20230424092313.178699-2-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Documentation/admin-guide/kernel-parameters.txt
arch/riscv/Makefile
arch/riscv/kernel/Makefile
arch/riscv/kernel/pi/Makefile [new file with mode: 0644]
arch/riscv/kernel/pi/cmdline_early.c [new file with mode: 0644]
arch/riscv/kernel/vmlinux.lds.S
arch/riscv/lib/memcpy.S
arch/riscv/lib/memmove.S
arch/riscv/lib/strlen.S
arch/riscv/mm/init.c

index 6221a1d057dd58de265283de65e785c618ea6758..accc400b43f1d1b3ce912d70c24dd5ff88292b8a 100644 (file)
                        emulation library even if a 387 maths coprocessor
                        is present.
 
-       no5lvl          [X86-64] Disable 5-level paging mode. Forces
+       no4lvl          [RISCV] Disable 4-level and 5-level paging modes. Forces
+                       kernel to use 3-level paging instead.
+
+       no5lvl          [X86-64,RISCV] Disable 5-level paging mode. Forces
                        kernel to use 4-level paging instead.
 
        nofsgsbase      [X86] Disables FSGSBASE instructions.
index e859e1721a8f2b6f98d20b2d035eefe5f12daeb1..d44d0fb981686fd7dc8f0d50bdebecfb8302f530 100644 (file)
@@ -7,8 +7,9 @@
 #
 
 OBJCOPYFLAGS    := -O binary
+LDFLAGS_vmlinux := -z norelro
 ifeq ($(CONFIG_RELOCATABLE),y)
-       LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro --emit-relocs
+       LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs
        KBUILD_CFLAGS += -fPIE
 endif
 ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
index 392fa6e35d4a7c39edacc327034b427d0da635cd..0fee73a20c87acc2e172f74aa1cf56e4f6277089 100644 (file)
@@ -87,3 +87,5 @@ obj-$(CONFIG_EFI)             += efi.o
 obj-$(CONFIG_COMPAT)           += compat_syscall_table.o
 obj-$(CONFIG_COMPAT)           += compat_signal.o
 obj-$(CONFIG_COMPAT)           += compat_vdso/
+
+obj-$(CONFIG_64BIT)            += pi/
diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile
new file mode 100644 (file)
index 0000000..5d7cb99
--- /dev/null
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# This file was copied from arm64/kernel/pi/Makefile.
+
+KBUILD_CFLAGS  := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
+                  -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+                  $(call cc-option,-mbranch-protection=none) \
+                  -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+                  -D__DISABLE_EXPORTS -ffreestanding \
+                  -fno-asynchronous-unwind-tables -fno-unwind-tables \
+                  $(call cc-option,-fno-addrsig)
+
+KBUILD_CFLAGS  += -mcmodel=medany
+
+CFLAGS_cmdline_early.o += -D__NO_FORTIFY
+CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY
+
+GCOV_PROFILE   := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT        := n
+
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
+                              --remove-section=.note.gnu.property \
+                              --prefix-alloc-sections=.init
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+       $(call if_changed,objcopy)
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+       $(call if_changed_rule,cc_o_c)
+
+$(obj)/string.o: $(srctree)/lib/string.c FORCE
+       $(call if_changed_rule,cc_o_c)
+
+$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
+       $(call if_changed_rule,cc_o_c)
+
+obj-y          := cmdline_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+extra-y                := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c
new file mode 100644 (file)
index 0000000..05652d1
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/string.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+
+static char early_cmdline[COMMAND_LINE_SIZE];
+
+/*
+ * Declare the functions that are exported (but prefixed) here so that LLVM
+ * does not complain it lacks the 'static' keyword (which, if added, makes
+ * LLVM complain because the function is actually unused in this file).
+ */
+u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa);
+
+static char *get_early_cmdline(uintptr_t dtb_pa)
+{
+       const char *fdt_cmdline = NULL;
+       unsigned int fdt_cmdline_size = 0;
+       int chosen_node;
+
+       if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+               chosen_node = fdt_path_offset((void *)dtb_pa, "/chosen");
+               if (chosen_node >= 0) {
+                       fdt_cmdline = fdt_getprop((void *)dtb_pa, chosen_node,
+                                                 "bootargs", NULL);
+                       if (fdt_cmdline) {
+                               fdt_cmdline_size = strlen(fdt_cmdline);
+                               strscpy(early_cmdline, fdt_cmdline,
+                                       COMMAND_LINE_SIZE);
+                       }
+               }
+       }
+
+       if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
+           IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
+           fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) {
+               strncat(early_cmdline, CONFIG_CMDLINE,
+                       COMMAND_LINE_SIZE - fdt_cmdline_size);
+       }
+
+       return early_cmdline;
+}
+
+static u64 match_noXlvl(char *cmdline)
+{
+       if (strstr(cmdline, "no4lvl"))
+               return SATP_MODE_48;
+       else if (strstr(cmdline, "no5lvl"))
+               return SATP_MODE_57;
+
+       return 0;
+}
+
+u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa)
+{
+       char *cmdline = get_early_cmdline(dtb_pa);
+
+       return match_noXlvl(cmdline);
+}
index 615ff5842690b1e0cc963061088c3885ae87bd28..305877d85e96bb3eb2aa24caf6b0d6f23333a2c0 100644 (file)
@@ -83,6 +83,14 @@ SECTIONS
        /* Start of init data section */
        __init_data_begin = .;
        INIT_DATA_SECTION(16)
+
+       /* Those sections result from the compilation of kernel/pi/string.c */
+       .init.pidata : {
+               *(.init.srodata.cst8*)
+               *(.init__bug_table*)
+               *(.init.sdata*)
+       }
+
        .init.bss : {
                *(.init.bss)    /* from the EFI stub */
        }
@@ -128,9 +136,10 @@ SECTIONS
                __rela_dyn_end = .;
        }
 
+       .got : { *(.got*) }
+
 #ifdef CONFIG_RELOCATABLE
        .data.rel : { *(.data.rel*) }
-       .got : { *(.got*) }
        .plt : { *(.plt) }
        .dynamic : { *(.dynamic) }
        .dynsym : { *(.dynsym) }
index 51ab716253fa3c9939a95b8721b7524099824e63..1a40d01a95439e1592b673ad76e5f5b964bbbed3 100644 (file)
@@ -106,3 +106,5 @@ WEAK(memcpy)
 6:
        ret
 END(__memcpy)
+SYM_FUNC_ALIAS(__pi_memcpy, __memcpy)
+SYM_FUNC_ALIAS(__pi___memcpy, __memcpy)
index e0609e1f0864dedf88ae4e2e9983f0f928ff6c31..838ff2022fe32d8e16769ef0e5962d9c294b0ed3 100644 (file)
@@ -314,3 +314,5 @@ return_from_memmove:
 
 SYM_FUNC_END(memmove)
 SYM_FUNC_END(__memmove)
+SYM_FUNC_ALIAS(__pi_memmove, __memmove)
+SYM_FUNC_ALIAS(__pi___memmove, __memmove)
index db3d42d99b78a475f5a49cc1d8b6db1077f7df11..8ae3064e45ff00592d2b927ee8cc6e4216642867 100644 (file)
@@ -130,3 +130,4 @@ strlen_zbb:
 .option pop
 #endif
 SYM_FUNC_END(strlen)
+SYM_FUNC_ALIAS(__pi_strlen, strlen)
index bce899b180cd2b3f68740a3b96c761c19d1b5b51..3ad771571c2d049939c0ec3ccf2eb14fa4aa77e0 100644 (file)
@@ -746,6 +746,8 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa);
+
 static void __init disable_pgtable_l5(void)
 {
        pgtable_l5_enabled = false;
@@ -760,17 +762,39 @@ static void __init disable_pgtable_l4(void)
        satp_mode = SATP_MODE_39;
 }
 
+static int __init print_no4lvl(char *p)
+{
+       pr_info("Disabled 4-level and 5-level paging");
+       return 0;
+}
+early_param("no4lvl", print_no4lvl);
+
+static int __init print_no5lvl(char *p)
+{
+       pr_info("Disabled 5-level paging");
+       return 0;
+}
+early_param("no5lvl", print_no5lvl);
+
 /*
  * There is a simple way to determine if 4-level is supported by the
  * underlying hardware: establish 1:1 mapping in 4-level page table mode
  * then read SATP to see if the configuration was taken into account
  * meaning sv48 is supported.
  */
-static __init void set_satp_mode(void)
+static __init void set_satp_mode(uintptr_t dtb_pa)
 {
        u64 identity_satp, hw_satp;
        uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
-       bool check_l4 = false;
+       u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
+
+       if (satp_mode_cmdline == SATP_MODE_57) {
+               disable_pgtable_l5();
+       } else if (satp_mode_cmdline == SATP_MODE_48) {
+               disable_pgtable_l5();
+               disable_pgtable_l4();
+               return;
+       }
 
        create_p4d_mapping(early_p4d,
                        set_satp_mode_pmd, (uintptr_t)early_pud,
@@ -789,7 +813,8 @@ static __init void set_satp_mode(void)
 retry:
        create_pgd_mapping(early_pg_dir,
                           set_satp_mode_pmd,
-                          check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
+                          pgtable_l5_enabled ?
+                               (uintptr_t)early_p4d : (uintptr_t)early_pud,
                           PGDIR_SIZE, PAGE_TABLE);
 
        identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
@@ -800,9 +825,8 @@ retry:
        local_flush_tlb_all();
 
        if (hw_satp != identity_satp) {
-               if (!check_l4) {
+               if (pgtable_l5_enabled) {
                        disable_pgtable_l5();
-                       check_l4 = true;
                        memset(early_pg_dir, 0, PAGE_SIZE);
                        goto retry;
                }
@@ -1031,7 +1055,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 #endif
 
 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
-       set_satp_mode();
+       set_satp_mode(dtb_pa);
 #endif
 
        /*