Merge branches 'for-next/kvm-build-fix', 'for-next/va-refactor', 'for-next/lto',...
authorCatalin Marinas <catalin.marinas@arm.com>
Wed, 9 Dec 2020 18:04:35 +0000 (18:04 +0000)
committerCatalin Marinas <catalin.marinas@arm.com>
Wed, 9 Dec 2020 18:04:35 +0000 (18:04 +0000)
* for-next/kvm-build-fix:
  : Fix KVM build issues with 64K pages
  KVM: arm64: Fix build error in user_mem_abort()

* for-next/va-refactor:
  : VA layout changes
  arm64: mm: don't assume struct page is always 64 bytes
  Documentation/arm64: fix RST layout of memory.rst
  arm64: mm: tidy up top of kernel VA space
  arm64: mm: make vmemmap region a projection of the linear region
  arm64: mm: extend linear region for 52-bit VA configurations

* for-next/lto:
  : Upgrade READ_ONCE() to RCpc acquire on arm64 with LTO
  arm64: lto: Strengthen READ_ONCE() to acquire when CONFIG_LTO=y
  arm64: alternatives: Remove READ_ONCE() usage during patch operation
  arm64: cpufeatures: Add capability for LDAPR instruction
  arm64: alternatives: Split up alternative.h
  arm64: uaccess: move uao_* alternatives to asm-uaccess.h

* for-next/mem-hotplug:
  : Memory hotplug improvements
  arm64/mm/hotplug: Ensure early memory sections are all online
  arm64/mm/hotplug: Enable MEM_OFFLINE event handling
  arm64/mm/hotplug: Register boot memory hot remove notifier earlier
  arm64: mm: account for hotplug memory when randomizing the linear region

* for-next/cppc-ffh:
  : Add CPPC FFH support using arm64 AMU counters
  arm64: abort counter_read_on_cpu() when irqs_disabled()
  arm64: implement CPPC FFH support using AMUs
  arm64: split counter validation function
  arm64: wrap and generalise counter read functions

* for-next/pad-image-header:
  : Pad Image header to 64KB and unmap it
  arm64: head: tidy up the Image header definition
  arm64/head: avoid symbol names pointing into first 64 KB of kernel image
  arm64: omit [_text, _stext) from permanent kernel mapping

* for-next/zone-dma-default-32-bit:
  : Default to 32-bit wide ZONE_DMA (previously reduced to 1GB for RPi4)
  of: unittest: Fix build on architectures without CONFIG_OF_ADDRESS
  mm: Remove examples from enum zone_type comment
  arm64: mm: Set ZONE_DMA size based on early IORT scan
  arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges
  of: unittest: Add test for of_dma_get_max_cpu_address()
  of/address: Introduce of_dma_get_max_cpu_address()
  arm64: mm: Move zone_dma_bits initialization into zone_sizes_init()
  arm64: mm: Move reserve_crashkernel() into mem_init()
  arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required
  arm64: Ignore any DMA offsets in the max_zone_phys() calculation

* for-next/signal-tag-bits:
  : Expose the FAR_EL1 tag bits in siginfo
  arm64: expose FAR_EL1 tag bits in siginfo
  signal: define the SA_EXPOSE_TAGBITS bit in sa_flags
  signal: define the SA_UNSUPPORTED bit in sa_flags
  arch: provide better documentation for the arch-specific SA_* flags
  signal: clear non-uapi flag bits when passing/returning sa_flags
  arch: move SA_* definitions to generic headers
  parisc: start using signal-defs.h
  parisc: Drop parisc special case for __sighandler_t

* for-next/cmdline-extended:
  : Add support for CONFIG_CMDLINE_EXTENDED
  arm64: Extend the kernel command line from the bootloader
  arm64: kaslr: Refactor early init command line parsing

34 files changed:
Documentation/arm64/kasan-offsets.sh
Documentation/arm64/memory.rst
arch/arm64/Kconfig
arch/arm64/include/asm/alternative-macros.h [new file with mode: 0644]
arch/arm64/include/asm/alternative.h
arch/arm64/include/asm/asm-uaccess.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/insn.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/rwonce.h [new file with mode: 0644]
arch/arm64/include/asm/topology.h
arch/arm64/kernel/alternative.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/efi-header.S
arch/arm64/kernel/head.S
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/proton-pack.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/topology.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/mmu.c
arch/arm64/lib/mte.S
arch/arm64/mm/init.c
arch/arm64/mm/mmu.c
drivers/acpi/arm64/iort.c
drivers/of/address.c
drivers/of/unittest.c
include/linux/acpi_iort.h
include/linux/mmzone.h
include/linux/of.h

index 2b7a021db363abcff632eeeaf4d2527b9911d0d5..2dc5f9e18039b3a01e08857455cc2c59e79d6af6 100644 (file)
@@ -1,12 +1,11 @@
 #!/bin/sh
 
 # Print out the KASAN_SHADOW_OFFSETS required to place the KASAN SHADOW
-# start address at the mid-point of the kernel VA space
+# start address at the top of the linear region
 
 print_kasan_offset () {
        printf "%02d\t" $1
        printf "0x%08x00000000\n" $(( (0xffffffff & (-1 << ($1 - 1 - 32))) \
-                       + (1 << ($1 - 32 - $2)) \
                        - (1 << (64 - 32 - $2)) ))
 }
 
index cf03b3290800c25ea7c5d8cba093b0189e3d727a..e7522e5c8322b3a3f0df9654c34618f5de4d1229 100644 (file)
@@ -32,17 +32,16 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   -----------------------------------------------------------------------
   0000000000000000     0000ffffffffffff         256TB          user
   ffff000000000000     ffff7fffffffffff         128TB          kernel logical memory map
-  ffff800000000000     ffff9fffffffffff          32TB          kasan shadow region
-  ffffa00000000000     ffffa00007ffffff         128MB          bpf jit region
-  ffffa00008000000     ffffa0000fffffff         128MB          modules
-  ffffa00010000000     fffffdffbffeffff         ~93TB          vmalloc
-  fffffdffbfff0000     fffffdfffe5f8fff        ~998MB          [guard region]
-  fffffdfffe5f9000     fffffdfffe9fffff        4124KB          fixed mappings
-  fffffdfffea00000     fffffdfffebfffff           2MB          [guard region]
-  fffffdfffec00000     fffffdffffbfffff          16MB          PCI I/O space
-  fffffdffffc00000     fffffdffffdfffff           2MB          [guard region]
-  fffffdffffe00000     ffffffffffdfffff           2TB          vmemmap
-  ffffffffffe00000     ffffffffffffffff           2MB          [guard region]
+ [ffff600000000000     ffff7fffffffffff]         32TB          [kasan shadow region]
+  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
+  ffff800008000000     ffff80000fffffff         128MB          modules
+  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
+  fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
+  fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
+  fffffbffff800000     fffffbffffffffff           8MB          [guard region]
+  fffffc0000000000     fffffdffffffffff           2TB          vmemmap
+  fffffe0000000000     ffffffffffffffff           2TB          [guard region]
 
 
 AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
@@ -50,19 +49,17 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   Start                        End                     Size            Use
   -----------------------------------------------------------------------
   0000000000000000     000fffffffffffff           4PB          user
-  fff0000000000000     fff7ffffffffffff           2PB          kernel logical memory map
-  fff8000000000000     fffd9fffffffffff        1440TB          [gap]
-  fffda00000000000     ffff9fffffffffff         512TB          kasan shadow region
-  ffffa00000000000     ffffa00007ffffff         128MB          bpf jit region
-  ffffa00008000000     ffffa0000fffffff         128MB          modules
-  ffffa00010000000     fffff81ffffeffff         ~88TB          vmalloc
-  fffff81fffff0000     fffffc1ffe58ffff          ~3TB          [guard region]
-  fffffc1ffe590000     fffffc1ffe9fffff        4544KB          fixed mappings
-  fffffc1ffea00000     fffffc1ffebfffff           2MB          [guard region]
-  fffffc1ffec00000     fffffc1fffbfffff          16MB          PCI I/O space
-  fffffc1fffc00000     fffffc1fffdfffff           2MB          [guard region]
-  fffffc1fffe00000     ffffffffffdfffff        3968GB          vmemmap
-  ffffffffffe00000     ffffffffffffffff           2MB          [guard region]
+  fff0000000000000     ffff7fffffffffff          ~4PB          kernel logical memory map
+ [fffd800000000000     ffff7fffffffffff]        512TB          [kasan shadow region]
+  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
+  ffff800008000000     ffff80000fffffff         128MB          modules
+  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
+  fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
+  fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
+  fffffbffff800000     fffffbffffffffff           8MB          [guard region]
+  fffffc0000000000     ffffffdfffffffff          ~4TB          vmemmap
+  ffffffe000000000     ffffffffffffffff         128GB          [guard region]
 
 
 Translation table lookup with 4KB pages::
index 1515f6f153a0dc9a3ebc4220442686138a9f7859..2272a95057275e6f09758b3339e352c7ac16c96c 100644 (file)
@@ -331,16 +331,16 @@ config BROKEN_GAS_INST
 config KASAN_SHADOW_OFFSET
        hex
        depends on KASAN
-       default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
-       default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
-       default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
-       default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
-       default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
-       default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
-       default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
-       default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
-       default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
-       default 0xeffffff900000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
+       default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
+       default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
+       default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
+       default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
+       default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
+       default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
+       default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
+       default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
+       default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
+       default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
        default 0xffffffffffffffff
 
 source "arch/arm64/Kconfig.platforms"
@@ -1388,6 +1388,9 @@ config ARM64_PAN
         The feature is detected at runtime, and will remain as a 'nop'
         instruction if the cpu does not implement the feature.
 
+config AS_HAS_LDAPR
+       def_bool $(as-instr,.arch_extension rcpc)
+
 config ARM64_LSE_ATOMICS
        bool
        default ARM64_USE_LSE_ATOMICS
@@ -1846,15 +1849,36 @@ config CMDLINE
          entering them here. As a minimum, you should specify the the
          root device (e.g. root=/dev/nfs).
 
+choice
+       prompt "Kernel command line type" if CMDLINE != ""
+       default CMDLINE_FROM_BOOTLOADER
+       help
+         Choose how the kernel will handle the provided default kernel
+         command line string.
+
+config CMDLINE_FROM_BOOTLOADER
+       bool "Use bootloader kernel arguments if available"
+       help
+         Uses the command-line options passed by the boot loader. If
+         the boot loader doesn't provide any, the default kernel command
+         string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+       bool "Extend bootloader kernel arguments"
+       help
+         The command-line arguments provided by the boot loader will be
+         appended to the default kernel command string.
+
 config CMDLINE_FORCE
        bool "Always use the default kernel command string"
-       depends on CMDLINE != ""
        help
          Always use the default kernel command string, even if the boot
          loader passes other arguments to the kernel.
          This is useful if you cannot or don't want to change the
          command-line options your boot loader passes to the kernel.
 
+endchoice
+
 config EFI_STUB
        bool
 
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644 (file)
index 0000000..5df500d
--- /dev/null
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <asm/cpucaps.h>
+
+#define ARM64_CB_PATCH ARM64_NCAPS
+
+/* A64 instructions are always 32 bits. */
+#define        AARCH64_INSN_SIZE               4
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(feature)                                                      \
+       " .word 661b - .\n"                             /* label           */ \
+       " .word 663f - .\n"                             /* new instruction */ \
+       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
+       " .byte 662b-661b\n"                            /* source len      */ \
+       " .byte 664f-663f\n"                            /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(feature, cb)                                       \
+       " .word 661b - .\n"                             /* label           */ \
+       " .word " __stringify(cb) "- .\n"               /* callback */        \
+       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
+       " .byte 662b-661b\n"                            /* source len      */ \
+       " .byte 664f-663f\n"                            /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ *     .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)    \
+       ".if "__stringify(cfg_enabled)" == 1\n"                         \
+       "661:\n\t"                                                      \
+       oldinstr "\n"                                                   \
+       "662:\n"                                                        \
+       ".pushsection .altinstructions,\"a\"\n"                         \
+       ALTINSTR_ENTRY(feature)                                         \
+       ".popsection\n"                                                 \
+       ".subsection 1\n"                                               \
+       "663:\n\t"                                                      \
+       newinstr "\n"                                                   \
+       "664:\n\t"                                                      \
+       ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
+       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
+       ".previous\n"                                                   \
+       ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
+       ".if "__stringify(cfg_enabled)" == 1\n"                         \
+       "661:\n\t"                                                      \
+       oldinstr "\n"                                                   \
+       "662:\n"                                                        \
+       ".pushsection .altinstructions,\"a\"\n"                         \
+       ALTINSTR_ENTRY_CB(feature, cb)                                  \
+       ".popsection\n"                                                 \
+       "663:\n\t"                                                      \
+       "664:\n\t"                                                      \
+       ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)        \
+       __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cb) \
+       __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+       .word \orig_offset - .
+       .word \alt_offset - .
+       .hword \feature
+       .byte \orig_len
+       .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+       .if \enable
+661:   \insn1
+662:   .pushsection .altinstructions, "a"
+       altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+       .popsection
+       .subsection 1
+663:   \insn2
+664:   .previous
+       .org    . - (664b-663b) + (662b-661b)
+       .org    . - (662b-661b) + (664b-663b)
+       .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ *    sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ *    alternative sequence it is defined in (branches into an
+ *    alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+       .set .Lasm_alt_mode, 0
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+       .popsection
+661:
+.endm
+
+.macro alternative_if cap
+       .set .Lasm_alt_mode, 1
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+       .popsection
+       .subsection 1
+       .align 2        /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cb
+       .set .Lasm_alt_mode, 0
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+       .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+       .if .Lasm_alt_mode==0
+       .subsection 1
+       .else
+       .previous
+       .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+       .if .Lasm_alt_mode==0
+       .previous
+       .endif
+       .org    . - (664b-663b) + (662b-661b)
+       .org    . - (662b-661b) + (664b-663b)
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+       nops    (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
+       alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+.macro user_alt, label, oldinstr, newinstr, cond
+9999:  alternative_insn "\oldinstr", "\newinstr", \cond
+       _asm_extable 9999b, \label
+.endm
+
+#endif  /*  __ASSEMBLY__  */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ *      will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...)   \
+       _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
index 619db9b4c9d5c0434e321736c7a5fec9dc6a9b1c..a38b92e11811e8b1ce379df536e11545bff4f1cc 100644 (file)
@@ -2,17 +2,13 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
-#include <asm/cpucaps.h>
-#include <asm/insn.h>
-
-#define ARM64_CB_PATCH ARM64_NCAPS
+#include <asm/alternative-macros.h>
 
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
-#include <linux/stringify.h>
 
 struct alt_instr {
        s32 orig_offset;        /* offset to original instruction */
@@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length);
 static inline void apply_alternatives_module(void *start, size_t length) { }
 #endif
 
-#define ALTINSTR_ENTRY(feature)                                                      \
-       " .word 661b - .\n"                             /* label           */ \
-       " .word 663f - .\n"                             /* new instruction */ \
-       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
-       " .byte 662b-661b\n"                            /* source len      */ \
-       " .byte 664f-663f\n"                            /* replacement len */
-
-#define ALTINSTR_ENTRY_CB(feature, cb)                                       \
-       " .word 661b - .\n"                             /* label           */ \
-       " .word " __stringify(cb) "- .\n"               /* callback */        \
-       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
-       " .byte 662b-661b\n"                            /* source len      */ \
-       " .byte 664f-663f\n"                            /* replacement len */
-
-/*
- * alternative assembly primitive:
- *
- * If any of these .org directive fail, it means that insn1 and insn2
- * don't have the same length. This used to be written as
- *
- * .if ((664b-663b) != (662b-661b))
- *     .error "Alternatives instruction length mismatch"
- * .endif
- *
- * but most assemblers die if insn1 or insn2 have a .inst. This should
- * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
- * containing commit 4e4d08cf7399b606 or c1baaddf8861).
- *
- * Alternatives with callbacks do not generate replacement instructions.
- */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)    \
-       ".if "__stringify(cfg_enabled)" == 1\n"                         \
-       "661:\n\t"                                                      \
-       oldinstr "\n"                                                   \
-       "662:\n"                                                        \
-       ".pushsection .altinstructions,\"a\"\n"                         \
-       ALTINSTR_ENTRY(feature)                                         \
-       ".popsection\n"                                                 \
-       ".subsection 1\n"                                               \
-       "663:\n\t"                                                      \
-       newinstr "\n"                                                   \
-       "664:\n\t"                                                      \
-       ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
-       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
-       ".previous\n"                                                   \
-       ".endif\n"
-
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
-       ".if "__stringify(cfg_enabled)" == 1\n"                         \
-       "661:\n\t"                                                      \
-       oldinstr "\n"                                                   \
-       "662:\n"                                                        \
-       ".pushsection .altinstructions,\"a\"\n"                         \
-       ALTINSTR_ENTRY_CB(feature, cb)                                  \
-       ".popsection\n"                                                 \
-       "663:\n\t"                                                      \
-       "664:\n\t"                                                      \
-       ".endif\n"
-
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)        \
-       __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
-
-#define ALTERNATIVE_CB(oldinstr, cb) \
-       __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
-#else
-
-#include <asm/assembler.h>
-
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
-       .word \orig_offset - .
-       .word \alt_offset - .
-       .hword \feature
-       .byte \orig_len
-       .byte \alt_len
-.endm
-
-.macro alternative_insn insn1, insn2, cap, enable = 1
-       .if \enable
-661:   \insn1
-662:   .pushsection .altinstructions, "a"
-       altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
-       .popsection
-       .subsection 1
-663:   \insn2
-664:   .previous
-       .org    . - (664b-663b) + (662b-661b)
-       .org    . - (662b-661b) + (664b-663b)
-       .endif
-.endm
-
-/*
- * Alternative sequences
- *
- * The code for the case where the capability is not present will be
- * assembled and linked as normal. There are no restrictions on this
- * code.
- *
- * The code for the case where the capability is present will be
- * assembled into a special section to be used for dynamic patching.
- * Code for that case must:
- *
- * 1. Be exactly the same length (in bytes) as the default code
- *    sequence.
- *
- * 2. Not contain a branch target that is used outside of the
- *    alternative sequence it is defined in (branches into an
- *    alternative sequence are not fixed up).
- */
-
-/*
- * Begin an alternative code sequence.
- */
-.macro alternative_if_not cap
-       .set .Lasm_alt_mode, 0
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
-       .popsection
-661:
-.endm
-
-.macro alternative_if cap
-       .set .Lasm_alt_mode, 1
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
-       .popsection
-       .subsection 1
-       .align 2        /* So GAS knows label 661 is suitably aligned */
-661:
-.endm
-
-.macro alternative_cb cb
-       .set .Lasm_alt_mode, 0
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
-       .popsection
-661:
-.endm
-
-/*
- * Provide the other half of the alternative code sequence.
- */
-.macro alternative_else
-662:
-       .if .Lasm_alt_mode==0
-       .subsection 1
-       .else
-       .previous
-       .endif
-663:
-.endm
-
-/*
- * Complete an alternative code sequence.
- */
-.macro alternative_endif
-664:
-       .if .Lasm_alt_mode==0
-       .previous
-       .endif
-       .org    . - (664b-663b) + (662b-661b)
-       .org    . - (662b-661b) + (664b-663b)
-.endm
-
-/*
- * Callback-based alternative epilogue
- */
-.macro alternative_cb_end
-662:
-.endm
-
-/*
- * Provides a trivial alternative or default sequence consisting solely
- * of NOPs. The number of NOPs is chosen automatically to match the
- * previous case.
- */
-.macro alternative_else_nop_endif
-alternative_else
-       nops    (662b-661b) / AARCH64_INSN_SIZE
-alternative_endif
-.endm
-
-#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
-       alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-
-.macro user_alt, label, oldinstr, newinstr, cond
-9999:  alternative_insn "\oldinstr", "\newinstr", \cond
-       _asm_extable 9999b, \label
-.endm
-
-/*
- * Generate the assembly for UAO alternatives with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
-#ifdef CONFIG_ARM64_UAO
-       .macro uao_ldp l, reg1, reg2, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  ldp     \reg1, \reg2, [\addr], \post_inc;
-8889:                  nop;
-                       nop;
-               alternative_else
-                       ldtr    \reg1, [\addr];
-                       ldtr    \reg2, [\addr, #8];
-                       add     \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
-       .endm
-
-       .macro uao_stp l, reg1, reg2, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  stp     \reg1, \reg2, [\addr], \post_inc;
-8889:                  nop;
-                       nop;
-               alternative_else
-                       sttr    \reg1, [\addr];
-                       sttr    \reg2, [\addr, #8];
-                       add     \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
-       .endm
-
-       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  \inst   \reg, [\addr], \post_inc;
-                       nop;
-               alternative_else
-                       \alt_inst       \reg, [\addr];
-                       add             \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-       .endm
-#else
-       .macro uao_ldp l, reg1, reg2, addr, post_inc
-               USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
-       .endm
-       .macro uao_stp l, reg1, reg2, addr, post_inc
-               USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
-       .endm
-       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
-               USER(\l, \inst \reg, [\addr], \post_inc)
-       .endm
-#endif
-
-#endif  /*  __ASSEMBLY__  */
-
-/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
- *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
- * N.B. If CONFIG_FOO is specified, but not selected, the whole block
- *      will be omitted, including oldinstr.
- */
-#define ALTERNATIVE(oldinstr, newinstr, ...)   \
-       _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
-
+#endif /* __ASSEMBLY__ */
 #endif /* __ASM_ALTERNATIVE_H */
index f68a0e64482a1eb2acceb89a000b517175aba6d4..2c26ca5b7bb04f07d2d48c5dc19a22e18ce2bbeb 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef __ASM_ASM_UACCESS_H
 #define __ASM_ASM_UACCESS_H
 
-#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
@@ -58,4 +58,63 @@ alternative_else_nop_endif
        .endm
 #endif
 
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+       .macro uao_ldp l, reg1, reg2, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  ldp     \reg1, \reg2, [\addr], \post_inc;
+8889:                  nop;
+                       nop;
+               alternative_else
+                       ldtr    \reg1, [\addr];
+                       ldtr    \reg2, [\addr, #8];
+                       add     \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro uao_stp l, reg1, reg2, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  stp     \reg1, \reg2, [\addr], \post_inc;
+8889:                  nop;
+                       nop;
+               alternative_else
+                       sttr    \reg1, [\addr];
+                       sttr    \reg2, [\addr, #8];
+                       add     \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  \inst   \reg, [\addr], \post_inc;
+                       nop;
+               alternative_else
+                       \alt_inst       \reg, [\addr];
+                       add             \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+       .endm
+#else
+       .macro uao_ldp l, reg1, reg2, addr, post_inc
+               USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+       .endm
+       .macro uao_stp l, reg1, reg2, addr, post_inc
+               USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+       .endm
+       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+               USER(\l, \inst \reg, [\addr], \post_inc)
+       .endm
+#endif
+
 #endif
index e7d98997c09c3058dd0656af26528ebd220cd2bc..64ea0bb9f42099f876abdbc48349fce6e09b0174 100644 (file)
@@ -66,7 +66,8 @@
 #define ARM64_HAS_TLB_RANGE                    56
 #define ARM64_MTE                              57
 #define ARM64_WORKAROUND_1508412               58
+#define ARM64_HAS_LDAPR                                59
 
-#define ARM64_NCAPS                            59
+#define ARM64_NCAPS                            60
 
 #endif /* __ASM_CPUCAPS_H */
index 97244d4feca9c31465c0b4dbd64d3a849e1ce4b7..f5b44ac354dc3b0130ffa93b467054d18577bdc9 100644 (file)
@@ -765,8 +765,16 @@ static inline bool cpu_has_hw_af(void)
 #ifdef CONFIG_ARM64_AMU_EXTN
 /* Check whether the cpu supports the Activity Monitors Unit (AMU) */
 extern bool cpu_has_amu_feat(int cpu);
+#else
+static inline bool cpu_has_amu_feat(int cpu)
+{
+       return false;
+}
 #endif
 
+/* Get a cpu that supports the Activity Monitors Unit (AMU) */
+extern int get_cpu_with_amu_feat(void);
+
 static inline unsigned int get_vmid_bits(u64 mmfr1)
 {
        int vmid_bits;
index 4b39293d0f72dddb809c734aace5df8f80fdc669..4ebb9c054cccd7fa9c70a47d3065dad6de0842ca 100644 (file)
@@ -10,8 +10,7 @@
 #include <linux/build_bug.h>
 #include <linux/types.h>
 
-/* A64 instructions are always 32 bits. */
-#define        AARCH64_INSN_SIZE               4
+#include <asm/alternative.h>
 
 #ifndef __ASSEMBLY__
 /*
index cd61239bae8c25d4a59f7121ba1c34201eb4f79d..556cb2d62b5bb907d66d10dc27d3446268dd7946 100644 (file)
@@ -30,8 +30,8 @@
  * keep a constant PAGE_OFFSET and "fallback" to using the higher end
  * of the VMEMMAP where 52-bit support is not available in hardware.
  */
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \
-                       >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_SHIFT  (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE   ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
 
 /*
  * PAGE_OFFSET - the virtual address of the start of the linear map, at the
 #define _PAGE_OFFSET(va)       (-(UL(1) << (va)))
 #define PAGE_OFFSET            (_PAGE_OFFSET(VA_BITS))
 #define KIMAGE_VADDR           (MODULES_END)
-#define BPF_JIT_REGION_START   (KASAN_SHADOW_END)
+#define BPF_JIT_REGION_START   (_PAGE_END(VA_BITS_MIN))
 #define BPF_JIT_REGION_SIZE    (SZ_128M)
 #define BPF_JIT_REGION_END     (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
 #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR          (BPF_JIT_REGION_END)
 #define MODULES_VSIZE          (SZ_128M)
-#define VMEMMAP_START          (-VMEMMAP_SIZE - SZ_2M)
+#define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
-#define PCI_IO_END             (VMEMMAP_START - SZ_2M)
+#define PCI_IO_END             (VMEMMAP_START - SZ_8M)
 #define PCI_IO_START           (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP            (PCI_IO_START - SZ_2M)
+#define FIXADDR_TOP            (VMEMMAP_START - SZ_32M)
 
 #if VA_BITS > 48
 #define VA_BITS_MIN            (48)
 #define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
 #define KASAN_SHADOW_END       ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
                                        + KASAN_SHADOW_OFFSET)
+#define PAGE_END               (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT)))
 #define KASAN_THREAD_SHIFT     1
 #else
 #define KASAN_THREAD_SHIFT     0
-#define KASAN_SHADOW_END       (_PAGE_END(VA_BITS_MIN))
+#define PAGE_END               (_PAGE_END(VA_BITS_MIN))
 #endif /* CONFIG_KASAN */
 
 #define MIN_THREAD_SHIFT       (14 + KASAN_THREAD_SHIFT)
 #include <asm/bug.h>
 
 extern u64                     vabits_actual;
-#define PAGE_END               (_PAGE_END(vabits_actual))
 
 extern s64                     memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
@@ -238,11 +238,9 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 
 
 /*
- * The linear kernel range starts at the bottom of the virtual address
- * space. Testing the top bit for the start of the region is a
- * sufficient check and avoids having to worry about the tag.
+ * The linear kernel range starts at the bottom of the virtual address space.
  */
-#define __is_lm_address(addr)  (!(((u64)addr) & BIT(vabits_actual - 1)))
+#define __is_lm_address(addr)  (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
 
 #define __lm_to_phys(addr)     (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
 #define __kimg_to_phys(addr)   ((addr) - kimage_voffset)
index 4ff12a7adcfd112beed24807ef231008447678e3..ec307b8bcb15c618658ed82de3c4801b735fd3cb 100644 (file)
@@ -22,7 +22,7 @@
  *     and fixed mappings
  */
 #define VMALLOC_START          (MODULES_END)
-#define VMALLOC_END            (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+#define VMALLOC_END            (VMEMMAP_START - SZ_256M)
 
 #define vmemmap                        ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
 
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644 (file)
index 0000000..1bce62f
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_LTO
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#ifdef CONFIG_AS_HAS_LDAPR
+#define __LOAD_RCPC(sfx, regs...)                                      \
+       ALTERNATIVE(                                                    \
+               "ldar"  #sfx "\t" #regs,                                \
+               ".arch_extension rcpc\n"                                \
+               "ldapr" #sfx "\t" #regs,                                \
+       ARM64_HAS_LDAPR)
+#else
+#define __LOAD_RCPC(sfx, regs...)      "ldar" #sfx "\t" #regs
+#endif /* CONFIG_AS_HAS_LDAPR */
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x)                                                 \
+({                                                                     \
+       typeof(&(x)) __x = &(x);                                        \
+       int atomic = 1;                                                 \
+       union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+       switch (sizeof(x)) {                                            \
+       case 1:                                                         \
+               asm volatile(__LOAD_RCPC(b, %w0, %1)                    \
+                       : "=r" (*(__u8 *)__u.__c)                       \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 2:                                                         \
+               asm volatile(__LOAD_RCPC(h, %w0, %1)                    \
+                       : "=r" (*(__u16 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 4:                                                         \
+               asm volatile(__LOAD_RCPC(, %w0, %1)                     \
+                       : "=r" (*(__u32 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 8:                                                         \
+               asm volatile(__LOAD_RCPC(, %0, %1)                      \
+                       : "=r" (*(__u64 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       default:                                                        \
+               atomic = 0;                                             \
+       }                                                               \
+       atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
index 11a465243f660a1413026901ea02d00375f3b536..3b8dca4eb08d4138d5301d352178be877511bc29 100644 (file)
@@ -16,12 +16,14 @@ int pcibus_to_node(struct pci_bus *bus);
 
 #include <linux/arch_topology.h>
 
+void update_freq_counters_refs(void);
+void topology_scale_freq_tick(void);
+
 #ifdef CONFIG_ARM64_AMU_EXTN
 /*
  * Replace task scheduler's default counter-based
  * frequency-invariance scale factor setting.
  */
-void topology_scale_freq_tick(void);
 #define arch_scale_freq_tick topology_scale_freq_tick
 #endif /* CONFIG_ARM64_AMU_EXTN */
 
index 73039949b5ce2f6227f11d0d1591967c38bda8f6..a57cffb752e8955b8b9b9dbc144f632dead81402 100644 (file)
@@ -21,7 +21,8 @@
 #define ALT_ORIG_PTR(a)                __ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)                __ALT_PTR(a, alt_offset)
 
-static int all_alternatives_applied;
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
 
 static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
 
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
 
        /* We always have a CPU 0 at this point (__init) */
        if (smp_processor_id()) {
-               while (!READ_ONCE(all_alternatives_applied))
+               while (!all_alternatives_applied)
                        cpu_relax();
                isb();
        } else {
@@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
                BUG_ON(all_alternatives_applied);
                __apply_alternatives(&region, false, remaining_capabilities);
                /* Barriers provided by the cache flushing */
-               WRITE_ONCE(all_alternatives_applied, 1);
+               all_alternatives_applied = 1;
        }
 
        return 0;
index dcc165b3fc046b8573a579f04dd3e71474c7c471..bffcd55668c7fe4ff3c4feb250780af2494d6a8b 100644 (file)
@@ -1526,8 +1526,10 @@ bool cpu_has_amu_feat(int cpu)
        return cpumask_test_cpu(cpu, &amu_cpus);
 }
 
-/* Initialize the use of AMU counters for frequency invariance */
-extern void init_cpu_freq_invariance_counters(void);
+int get_cpu_with_amu_feat(void)
+{
+       return cpumask_any(&amu_cpus);
+}
 
 static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
 {
@@ -1535,7 +1537,7 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
                pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
                        smp_processor_id());
                cpumask_set_cpu(smp_processor_id(), &amu_cpus);
-               init_cpu_freq_invariance_counters();
+               update_freq_counters_refs();
        }
 }
 
@@ -1557,6 +1559,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
 
        return true;
 }
+#else
+int get_cpu_with_amu_feat(void)
+{
+       return nr_cpu_ids;
+}
 #endif
 
 #ifdef CONFIG_ARM64_VHE
@@ -2136,6 +2143,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .cpu_enable = cpu_enable_mte,
        },
 #endif /* CONFIG_ARM64_MTE */
+       {
+               .desc = "RCpc load-acquire (LDAPR)",
+               .capability = ARM64_HAS_LDAPR,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .sys_reg = SYS_ID_AA64ISAR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+               .matches = has_cpuid_feature,
+               .min_field_value = 1,
+       },
        {},
 };
 
index a71844fb923eef9669bd23b087b752c214b8a763..28d8a5dca5f129784b158589a3c4c1d231ffd6e9 100644 (file)
@@ -7,30 +7,48 @@
 #include <linux/pe.h>
 #include <linux/sizes.h>
 
+       .macro  efi_signature_nop
+#ifdef CONFIG_EFI
+.L_head:
+       /*
+        * This ccmp instruction has no meaningful effect except that
+        * its opcode forms the magic "MZ" signature required by UEFI.
+        */
+       ccmp    x18, #0, #0xd, pl
+#else
+       /*
+        * Bootloaders may inspect the opcode at the start of the kernel
+        * image to decide if the kernel is capable of booting via UEFI.
+        * So put an ordinary NOP here, not the "MZ.." pseudo-nop above.
+        */
+       nop
+#endif
+       .endm
+
        .macro  __EFI_PE_HEADER
+#ifdef CONFIG_EFI
+       .set    .Lpe_header_offset, . - .L_head
        .long   PE_MAGIC
-coff_header:
        .short  IMAGE_FILE_MACHINE_ARM64                // Machine
-       .short  section_count                           // NumberOfSections
+       .short  .Lsection_count                         // NumberOfSections
        .long   0                                       // TimeDateStamp
        .long   0                                       // PointerToSymbolTable
        .long   0                                       // NumberOfSymbols
-       .short  section_table - optional_header         // SizeOfOptionalHeader
+       .short  .Lsection_table - .Loptional_header     // SizeOfOptionalHeader
        .short  IMAGE_FILE_DEBUG_STRIPPED | \
                IMAGE_FILE_EXECUTABLE_IMAGE | \
                IMAGE_FILE_LINE_NUMS_STRIPPED           // Characteristics
 
-optional_header:
+.Loptional_header:
        .short  PE_OPT_MAGIC_PE32PLUS                   // PE32+ format
        .byte   0x02                                    // MajorLinkerVersion
        .byte   0x14                                    // MinorLinkerVersion
-       .long   __initdata_begin - efi_header_end       // SizeOfCode
+       .long   __initdata_begin - .Lefi_header_end     // SizeOfCode
        .long   __pecoff_data_size                      // SizeOfInitializedData
        .long   0                                       // SizeOfUninitializedData
-       .long   __efistub_efi_pe_entry - _head          // AddressOfEntryPoint
-       .long   efi_header_end - _head                  // BaseOfCode
+       .long   __efistub_efi_pe_entry - .L_head        // AddressOfEntryPoint
+       .long   .Lefi_header_end - .L_head              // BaseOfCode
 
-extra_header_fields:
        .quad   0                                       // ImageBase
        .long   SEGMENT_ALIGN                           // SectionAlignment
        .long   PECOFF_FILE_ALIGNMENT                   // FileAlignment
@@ -42,10 +60,10 @@ extra_header_fields:
        .short  0                                       // MinorSubsystemVersion
        .long   0                                       // Win32VersionValue
 
-       .long   _end - _head                            // SizeOfImage
+       .long   _end - .L_head                          // SizeOfImage
 
        // Everything before the kernel image is considered part of the header
-       .long   efi_header_end - _head                  // SizeOfHeaders
+       .long   .Lefi_header_end - .L_head              // SizeOfHeaders
        .long   0                                       // CheckSum
        .short  IMAGE_SUBSYSTEM_EFI_APPLICATION         // Subsystem
        .short  0                                       // DllCharacteristics
@@ -54,7 +72,7 @@ extra_header_fields:
        .quad   0                                       // SizeOfHeapReserve
        .quad   0                                       // SizeOfHeapCommit
        .long   0                                       // LoaderFlags
-       .long   (section_table - .) / 8                 // NumberOfRvaAndSizes
+       .long   (.Lsection_table - .) / 8               // NumberOfRvaAndSizes
 
        .quad   0                                       // ExportTable
        .quad   0                                       // ImportTable
@@ -64,17 +82,17 @@ extra_header_fields:
        .quad   0                                       // BaseRelocationTable
 
 #ifdef CONFIG_DEBUG_EFI
-       .long   efi_debug_table - _head                 // DebugTable
-       .long   efi_debug_table_size
+       .long   .Lefi_debug_table - .L_head             // DebugTable
+       .long   .Lefi_debug_table_size
 #endif
 
        // Section table
-section_table:
+.Lsection_table:
        .ascii  ".text\0\0\0"
-       .long   __initdata_begin - efi_header_end       // VirtualSize
-       .long   efi_header_end - _head                  // VirtualAddress
-       .long   __initdata_begin - efi_header_end       // SizeOfRawData
-       .long   efi_header_end - _head                  // PointerToRawData
+       .long   __initdata_begin - .Lefi_header_end     // VirtualSize
+       .long   .Lefi_header_end - .L_head              // VirtualAddress
+       .long   __initdata_begin - .Lefi_header_end     // SizeOfRawData
+       .long   .Lefi_header_end - .L_head              // PointerToRawData
 
        .long   0                                       // PointerToRelocations
        .long   0                                       // PointerToLineNumbers
@@ -86,9 +104,9 @@ section_table:
 
        .ascii  ".data\0\0\0"
        .long   __pecoff_data_size                      // VirtualSize
-       .long   __initdata_begin - _head                // VirtualAddress
+       .long   __initdata_begin - .L_head              // VirtualAddress
        .long   __pecoff_data_rawsize                   // SizeOfRawData
-       .long   __initdata_begin - _head                // PointerToRawData
+       .long   __initdata_begin - .L_head              // PointerToRawData
 
        .long   0                                       // PointerToRelocations
        .long   0                                       // PointerToLineNumbers
@@ -98,7 +116,7 @@ section_table:
                IMAGE_SCN_MEM_READ | \
                IMAGE_SCN_MEM_WRITE                     // Characteristics
 
-       .set    section_count, (. - section_table) / 40
+       .set    .Lsection_count, (. - .Lsection_table) / 40
 
 #ifdef CONFIG_DEBUG_EFI
        /*
@@ -114,21 +132,21 @@ section_table:
        __INITRODATA
 
        .align  2
-efi_debug_table:
+.Lefi_debug_table:
        // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
        .long   0                                       // Characteristics
        .long   0                                       // TimeDateStamp
        .short  0                                       // MajorVersion
        .short  0                                       // MinorVersion
        .long   IMAGE_DEBUG_TYPE_CODEVIEW               // Type
-       .long   efi_debug_entry_size                    // SizeOfData
+       .long   .Lefi_debug_entry_size                  // SizeOfData
        .long   0                                       // RVA
-       .long   efi_debug_entry - _head                 // FileOffset
+       .long   .Lefi_debug_entry - .L_head             // FileOffset
 
-       .set    efi_debug_table_size, . - efi_debug_table
+       .set    .Lefi_debug_table_size, . - .Lefi_debug_table
        .previous
 
-efi_debug_entry:
+.Lefi_debug_entry:
        // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
        .ascii  "NB10"                                  // Signature
        .long   0                                       // Unknown
@@ -137,16 +155,12 @@ efi_debug_entry:
 
        .asciz  VMLINUX_PATH
 
-       .set    efi_debug_entry_size, . - efi_debug_entry
+       .set    .Lefi_debug_entry_size, . - .Lefi_debug_entry
 #endif
 
-       /*
-        * EFI will load .text onwards at the 4k section alignment
-        * described in the PE/COFF header. To ensure that instruction
-        * sequences using an adrp and a :lo12: immediate will function
-        * correctly at this alignment, we must ensure that .text is
-        * placed at a 4k boundary in the Image to begin with.
-        */
        .balign SEGMENT_ALIGN
-efi_header_end:
+.Lefi_header_end:
+#else
+       .set    .Lpe_header_offset, 0x0
+#endif
        .endm
index d8d9caf02834e03f3b1da6201cc57b2dbf59eabe..c1f8f2c5be47ed29612fe2b8e0d5e3876d83aca4 100644 (file)
  * in the entry routines.
  */
        __HEAD
-_head:
        /*
         * DO NOT MODIFY. Image header expected by Linux boot-loaders.
         */
-#ifdef CONFIG_EFI
-       /*
-        * This add instruction has no meaningful effect except that
-        * its opcode forms the magic "MZ" signature required by UEFI.
-        */
-       add     x13, x18, #0x16
-       b       primary_entry
-#else
+       efi_signature_nop                       // special NOP to identity as PE/COFF executable
        b       primary_entry                   // branch to kernel start, magic
-       .long   0                               // reserved
-#endif
        .quad   0                               // Image load offset from start of RAM, little-endian
        le64sym _kernel_size_le                 // Effective size of kernel image, little-endian
        le64sym _kernel_flags_le                // Informative flags, little-endian
@@ -80,14 +70,9 @@ _head:
        .quad   0                               // reserved
        .quad   0                               // reserved
        .ascii  ARM64_IMAGE_MAGIC               // Magic number
-#ifdef CONFIG_EFI
-       .long   pe_header - _head               // Offset to the PE header.
+       .long   .Lpe_header_offset              // Offset to the PE header.
 
-pe_header:
        __EFI_PE_HEADER
-#else
-       .long   0                               // reserved
-#endif
 
        __INIT
 
index b181e0544b79909b8d514e69514c57af8a8cb258..0921aa1520b07f73c9baeded902e135cf0adb6ed 100644 (file)
@@ -50,10 +50,16 @@ static __init u64 get_kaslr_seed(void *fdt)
        return ret;
 }
 
-static __init const u8 *kaslr_get_cmdline(void *fdt)
+static __init bool cmdline_contains_nokaslr(const u8 *cmdline)
 {
-       static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+       const u8 *str;
 
+       str = strstr(cmdline, "nokaslr");
+       return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static __init bool is_kaslr_disabled_cmdline(void *fdt)
+{
        if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
                int node;
                const u8 *prop;
@@ -65,10 +71,17 @@ static __init const u8 *kaslr_get_cmdline(void *fdt)
                prop = fdt_getprop(fdt, node, "bootargs", NULL);
                if (!prop)
                        goto out;
-               return prop;
+
+               if (cmdline_contains_nokaslr(prop))
+                       return true;
+
+               if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+                       goto out;
+
+               return false;
        }
 out:
-       return default_cmdline;
+       return cmdline_contains_nokaslr(CONFIG_CMDLINE);
 }
 
 /*
@@ -83,7 +96,6 @@ u64 __init kaslr_early_init(u64 dt_phys)
 {
        void *fdt;
        u64 seed, offset, mask, module_range;
-       const u8 *cmdline, *str;
        unsigned long raw;
        int size;
 
@@ -115,9 +127,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
         * Check if 'nokaslr' appears on the command line, and
         * return 0 if that is the case.
         */
-       cmdline = kaslr_get_cmdline(fdt);
-       str = strstr(cmdline, "nokaslr");
-       if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+       if (is_kaslr_disabled_cmdline(fdt)) {
                kaslr_status = KASLR_DISABLED_CMDLINE;
                return 0;
        }
index c18eb7d41274b296f7709c242f670c1f0986db57..4b202e460e6d1b620c962468368eef4a102e45e0 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/prctl.h>
 #include <linux/sched/task_stack.h>
 
+#include <asm/insn.h>
 #include <asm/spectre.h>
 #include <asm/traps.h>
 
index 133257ffd8591d692f5f64abf8f41fb2d0e0910c..fe1cf52f5f80c04b9392a7078d9474d222b74ef3 100644 (file)
@@ -206,7 +206,7 @@ static void __init request_standard_resources(void)
        unsigned long i = 0;
        size_t res_size;
 
-       kernel_code.start   = __pa_symbol(_text);
+       kernel_code.start   = __pa_symbol(_stext);
        kernel_code.end     = __pa_symbol(__init_begin - 1);
        kernel_data.start   = __pa_symbol(_sdata);
        kernel_data.end     = __pa_symbol(_end - 1);
@@ -283,7 +283,7 @@ u64 cpu_logical_map(int cpu)
 
 void __init __no_sanitize_address setup_arch(char **cmdline_p)
 {
-       init_mm.start_code = (unsigned long) _text;
+       init_mm.start_code = (unsigned long) _stext;
        init_mm.end_code   = (unsigned long) _etext;
        init_mm.end_data   = (unsigned long) _edata;
        init_mm.brk        = (unsigned long) _end;
index 543c67cae02ff7d4c96dc027e85ec3d2b075ecc9..b8026ec684ba1c3d4d79c6112d14b1fffe16bc3f 100644 (file)
@@ -124,6 +124,12 @@ int __init parse_acpi_topology(void)
 #endif
 
 #ifdef CONFIG_ARM64_AMU_EXTN
+#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
+#define read_constcnt()        read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
+#else
+#define read_corecnt() (0UL)
+#define read_constcnt()        (0UL)
+#endif
 
 #undef pr_fmt
 #define pr_fmt(fmt) "AMU: " fmt
@@ -133,54 +139,58 @@ static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
 static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
 static cpumask_var_t amu_fie_cpus;
 
-/* Initialize counter reference per-cpu variables for the current CPU */
-void init_cpu_freq_invariance_counters(void)
+void update_freq_counters_refs(void)
 {
-       this_cpu_write(arch_core_cycles_prev,
-                      read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0));
-       this_cpu_write(arch_const_cycles_prev,
-                      read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0));
+       this_cpu_write(arch_core_cycles_prev, read_corecnt());
+       this_cpu_write(arch_const_cycles_prev, read_constcnt());
 }
 
-static int validate_cpu_freq_invariance_counters(int cpu)
+static inline bool freq_counters_valid(int cpu)
 {
-       u64 max_freq_hz, ratio;
+       if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
+               return false;
 
        if (!cpu_has_amu_feat(cpu)) {
                pr_debug("CPU%d: counters are not supported.\n", cpu);
-               return -EINVAL;
+               return false;
        }
 
        if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
                     !per_cpu(arch_core_cycles_prev, cpu))) {
                pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
-               return -EINVAL;
+               return false;
        }
 
-       /* Convert maximum frequency from KHz to Hz and validate */
-       max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000;
-       if (unlikely(!max_freq_hz)) {
-               pr_debug("CPU%d: invalid maximum frequency.\n", cpu);
+       return true;
+}
+
+static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+{
+       u64 ratio;
+
+       if (unlikely(!max_rate || !ref_rate)) {
+               pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+                        cpu);
                return -EINVAL;
        }
 
        /*
         * Pre-compute the fixed ratio between the frequency of the constant
-        * counter and the maximum frequency of the CPU.
+        * reference counter and the maximum frequency of the CPU.
         *
-        *                            const_freq
-        * arch_max_freq_scale =   ---------------- * SCHED_CAPACITY_SCALE²
-        *                         cpuinfo_max_freq
+        *                          ref_rate
+        * arch_max_freq_scale =   ---------- * SCHED_CAPACITY_SCALE²
+        *                          max_rate
         *
         * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
         * in order to ensure a good resolution for arch_max_freq_scale for
-        * very low arch timer frequencies (down to the KHz range which should
+        * very low reference frequencies (down to the KHz range which should
         * be unlikely).
         */
-       ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT);
-       ratio = div64_u64(ratio, max_freq_hz);
+       ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT);
+       ratio = div64_u64(ratio, max_rate);
        if (!ratio) {
-               WARN_ONCE(1, "System timer frequency too low.\n");
+               WARN_ONCE(1, "Reference frequency too low.\n");
                return -EINVAL;
        }
 
@@ -227,8 +237,12 @@ static int __init init_amu_fie(void)
        }
 
        for_each_present_cpu(cpu) {
-               if (validate_cpu_freq_invariance_counters(cpu))
+               if (!freq_counters_valid(cpu) ||
+                   freq_inv_set_max_ratio(cpu,
+                                          cpufreq_get_hw_max_freq(cpu) * 1000,
+                                          arch_timer_get_rate()))
                        continue;
+
                cpumask_set_cpu(cpu, valid_cpus);
                have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
        }
@@ -280,11 +294,14 @@ void topology_scale_freq_tick(void)
        if (!cpumask_test_cpu(cpu, amu_fie_cpus))
                return;
 
-       const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0);
-       core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
        prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
        prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
 
+       update_freq_counters_refs();
+
+       const_cnt = this_cpu_read(arch_const_cycles_prev);
+       core_cnt = this_cpu_read(arch_core_cycles_prev);
+
        if (unlikely(core_cnt <= prev_core_cnt ||
                     const_cnt <= prev_const_cnt))
                goto store_and_exit;
@@ -309,4 +326,71 @@ store_and_exit:
        this_cpu_write(arch_core_cycles_prev, core_cnt);
        this_cpu_write(arch_const_cycles_prev, const_cnt);
 }
-#endif /* CONFIG_ARM64_AMU_EXTN */
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+static void cpu_read_corecnt(void *val)
+{
+       *(u64 *)val = read_corecnt();
+}
+
+static void cpu_read_constcnt(void *val)
+{
+       *(u64 *)val = read_constcnt();
+}
+
+static inline
+int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+{
+       /*
+        * Abort call on counterless CPU or when interrupts are
+        * disabled - can lead to deadlock in smp sync call.
+        */
+       if (!cpu_has_amu_feat(cpu))
+               return -EOPNOTSUPP;
+
+       if (WARN_ON_ONCE(irqs_disabled()))
+               return -EPERM;
+
+       smp_call_function_single(cpu, func, val, 1);
+
+       return 0;
+}
+
+/*
+ * Refer to drivers/acpi/cppc_acpi.c for the description of the functions
+ * below.
+ */
+bool cpc_ffh_supported(void)
+{
+       return freq_counters_valid(get_cpu_with_amu_feat());
+}
+
+int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+{
+       int ret = -EOPNOTSUPP;
+
+       switch ((u64)reg->address) {
+       case 0x0:
+               ret = counters_read_on_cpu(cpu, cpu_read_corecnt, val);
+               break;
+       case 0x1:
+               ret = counters_read_on_cpu(cpu, cpu_read_constcnt, val);
+               break;
+       }
+
+       if (!ret) {
+               *val &= GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+                                   reg->bit_offset);
+               *val >>= reg->bit_offset;
+       }
+
+       return ret;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+       return -EOPNOTSUPP;
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
index d65f52264abaeb9edb0566037665a0e970f49ade..a8f8e409e2bfbcdd2a9c0318eb41465570a83882 100644 (file)
@@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv      \
             $(btildflags-y) -T
 
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
 CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
 KASAN_SANITIZE                 := n
index 79280c53b9a616953eead419df59948c5f7e13a5..a1e0f91e6cea649a6527b367b16618c356905918 100644 (file)
@@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
 # As a result we set our own flags here.
 
 # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
 VDSO_CPPFLAGS += $(LINUXINCLUDE)
 
 # Common C and assembly flags
index 1bda604f4c704bd22f810c02fcacd60d720c43e4..94a08e3e32b1c2e5e051ea70024609e0ad7de185 100644 (file)
@@ -121,7 +121,7 @@ SECTIONS
                _text = .;
                HEAD_TEXT
        }
-       .text : {                       /* Real text segment            */
+       .text : ALIGN(SEGMENT_ALIGN) {  /* Real text segment            */
                _stext = .;             /* Text and read-only data      */
                        IRQENTRY_TEXT
                        SOFTIRQENTRY_TEXT
@@ -201,7 +201,7 @@ SECTIONS
                INIT_CALLS
                CON_INITCALL
                INIT_RAM_FS
-               *(.init.rodata.* .init.bss)     /* from the EFI stub */
+               *(.init.altinstructions .init.rodata.* .init.bss)       /* from the EFI stub */
        }
        .exit.data : {
                EXIT_DATA
index 57972bdb213ab25849e1df59c9bf51735031e9e9..1a01da9fdc99cd11a9912c31e4893d94cf070cb4 100644 (file)
@@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        }
 
        switch (vma_shift) {
+#ifndef __PAGETABLE_PMD_FOLDED
        case PUD_SHIFT:
                if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
                        break;
                fallthrough;
+#endif
        case CONT_PMD_SHIFT:
                vma_shift = PMD_SHIFT;
                fallthrough;
index 03ca6d8b8670692dc18994fe348ab4ed0d2100fb..cceed41bba153a762ef888266b4e167d1f43d5af 100644 (file)
@@ -4,7 +4,7 @@
  */
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
+#include <asm/asm-uaccess.h>
 #include <asm/assembler.h>
 #include <asm/mte.h>
 #include <asm/page.h>
index 095540667f0fdd7ed408202264ffd57d5c22d0da..fbd452e12397b6d8198ca0b3b6bd9385917ee328 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
+#include <linux/acpi_iort.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -42,8 +43,6 @@
 #include <asm/tlb.h>
 #include <asm/alternative.h>
 
-#define ARM64_ZONE_DMA_BITS    30
-
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -175,21 +174,34 @@ static void __init reserve_elfcorehdr(void)
 #endif /* CONFIG_CRASH_DUMP */
 
 /*
- * Return the maximum physical address for a zone with a given address size
- * limit. It currently assumes that for memory starting above 4G, 32-bit
- * devices will use a DMA offset.
+ * Return the maximum physical address for a zone accessible by the given bits
+ * limit. If DRAM starts above 32-bit, expand the zone to the maximum
+ * available memory, otherwise cap it at 32-bit.
  */
 static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
 {
-       phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
-       return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
+       phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
+       phys_addr_t phys_start = memblock_start_of_DRAM();
+
+       if (phys_start > U32_MAX)
+               zone_mask = PHYS_ADDR_MAX;
+       else if (phys_start > zone_mask)
+               zone_mask = U32_MAX;
+
+       return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
 }
 
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
+       unsigned int __maybe_unused acpi_zone_dma_bits;
+       unsigned int __maybe_unused dt_zone_dma_bits;
 
 #ifdef CONFIG_ZONE_DMA
+       acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
+       dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
+       zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
+       arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
        max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
 #ifdef CONFIG_ZONE_DMA32
@@ -269,7 +281,7 @@ static void __init fdt_enforce_memory_region(void)
 
 void __init arm64_memblock_init(void)
 {
-       const s64 linear_region_size = BIT(vabits_actual - 1);
+       const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
 
        /* Handle linux,usable-memory-range property */
        fdt_enforce_memory_region();
@@ -348,15 +360,18 @@ void __init arm64_memblock_init(void)
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
                extern u16 memstart_offset_seed;
-               u64 range = linear_region_size -
-                           (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+               u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+               int parange = cpuid_feature_extract_unsigned_field(
+                                       mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+               s64 range = linear_region_size -
+                           BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
 
                /*
                 * If the size of the linear region exceeds, by a sufficient
-                * margin, the size of the region that the available physical
-                * memory spans, randomize the linear region as well.
+                * margin, the size of the region that the physical memory can
+                * span, randomize the linear region as well.
                 */
-               if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+               if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) {
                        range /= ARM64_MEMSTART_ALIGN;
                        memstart_addr -= ARM64_MEMSTART_ALIGN *
                                         ((range * memstart_offset_seed) >> 16);
@@ -367,7 +382,7 @@ void __init arm64_memblock_init(void)
         * Register the kernel text, kernel data, initrd, and initial
         * pagetables with memblock.
         */
-       memblock_reserve(__pa_symbol(_text), _end - _text);
+       memblock_reserve(__pa_symbol(_stext), _end - _stext);
        if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
                /* the generic initrd code expects virtual addresses */
                initrd_start = __phys_to_virt(phys_initrd_start);
@@ -376,18 +391,11 @@ void __init arm64_memblock_init(void)
 
        early_init_fdt_scan_reserved_mem();
 
-       if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-               zone_dma_bits = ARM64_ZONE_DMA_BITS;
-               arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
-       }
-
        if (IS_ENABLED(CONFIG_ZONE_DMA32))
                arm64_dma32_phys_limit = max_zone_phys(32);
        else
                arm64_dma32_phys_limit = PHYS_MASK + 1;
 
-       reserve_crashkernel();
-
        reserve_elfcorehdr();
 
        high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -427,6 +435,12 @@ void __init bootmem_init(void)
        sparse_init();
        zone_sizes_init(min, max);
 
+       /*
+        * request_standard_resources() depends on crashkernel's memory being
+        * reserved, so do it here.
+        */
+       reserve_crashkernel();
+
        memblock_dump_all();
 }
 
index 1c0f3e02f731edd8b962b2d62a8954236b91c3d7..fe0721a44376297df3591c669d046611724896f1 100644 (file)
@@ -464,20 +464,35 @@ void __init mark_linear_text_alias_ro(void)
        /*
         * Remove the write permissions from the linear alias of .text/.rodata
         */
-       update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
-                           (unsigned long)__init_begin - (unsigned long)_text,
+       update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
+                           (unsigned long)__init_begin - (unsigned long)_stext,
                            PAGE_KERNEL_RO);
 }
 
+static bool crash_mem_map __initdata;
+
+static int __init enable_crash_mem_map(char *arg)
+{
+       /*
+        * Proper parameter parsing is done by reserve_crashkernel(). We only
+        * need to know if the linear map has to avoid block mappings so that
+        * the crashkernel reservations can be unmapped later.
+        */
+       crash_mem_map = true;
+
+       return 0;
+}
+early_param("crashkernel", enable_crash_mem_map);
+
 static void __init map_mem(pgd_t *pgdp)
 {
-       phys_addr_t kernel_start = __pa_symbol(_text);
+       phys_addr_t kernel_start = __pa_symbol(_stext);
        phys_addr_t kernel_end = __pa_symbol(__init_begin);
        phys_addr_t start, end;
        int flags = 0;
        u64 i;
 
-       if (rodata_full || debug_pagealloc_enabled())
+       if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
                flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
        /*
@@ -487,11 +502,6 @@ static void __init map_mem(pgd_t *pgdp)
         * the following for-loop
         */
        memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
-#ifdef CONFIG_KEXEC_CORE
-       if (crashk_res.end)
-               memblock_mark_nomap(crashk_res.start,
-                                   resource_size(&crashk_res));
-#endif
 
        /* map all the memory banks */
        for_each_mem_range(i, &start, &end) {
@@ -506,7 +516,7 @@ static void __init map_mem(pgd_t *pgdp)
        }
 
        /*
-        * Map the linear alias of the [_text, __init_begin) interval
+        * Map the linear alias of the [_stext, __init_begin) interval
         * as non-executable now, and remove the write permission in
         * mark_linear_text_alias_ro() below (which will be called after
         * alternative patching has completed). This makes the contents
@@ -518,21 +528,6 @@ static void __init map_mem(pgd_t *pgdp)
        __map_memblock(pgdp, kernel_start, kernel_end,
                       PAGE_KERNEL, NO_CONT_MAPPINGS);
        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
-
-#ifdef CONFIG_KEXEC_CORE
-       /*
-        * Use page-level mappings here so that we can shrink the region
-        * in page granularity and put back unused memory to buddy system
-        * through /sys/kernel/kexec_crash_size interface.
-        */
-       if (crashk_res.end) {
-               __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
-                              PAGE_KERNEL,
-                              NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
-               memblock_clear_nomap(crashk_res.start,
-                                    resource_size(&crashk_res));
-       }
-#endif
 }
 
 void mark_rodata_ro(void)
@@ -665,7 +660,7 @@ static void __init map_kernel(pgd_t *pgdp)
         * Only rodata will be remapped with different permissions later on,
         * all other segments are allowed to use contiguous mappings.
         */
-       map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
+       map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
                           VM_NO_GUARD);
        map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
@@ -1493,13 +1488,43 @@ static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
        unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
        unsigned long pfn = arg->start_pfn;
 
-       if (action != MEM_GOING_OFFLINE)
+       if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE))
                return NOTIFY_OK;
 
        for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+               unsigned long start = PFN_PHYS(pfn);
+               unsigned long end = start + (1UL << PA_SECTION_SHIFT);
+
                ms = __pfn_to_section(pfn);
-               if (early_section(ms))
+               if (!early_section(ms))
+                       continue;
+
+               if (action == MEM_GOING_OFFLINE) {
+                       /*
+                        * Boot memory removal is not supported. Prevent
+                        * it via blocking any attempted offline request
+                        * for the boot memory and just report it.
+                        */
+                       pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end);
                        return NOTIFY_BAD;
+               } else if (action == MEM_OFFLINE) {
+                       /*
+                        * This should have never happened. Boot memory
+                        * offlining should have been prevented by this
+                        * very notifier. Probably some memory removal
+                        * procedure might have changed which would then
+                        * require further debug.
+                        */
+                       pr_err("Boot memory [%lx %lx] offlined\n", start, end);
+
+                       /*
+                        * Core memory hotplug does not process a return
+                        * code from the notifier for MEM_OFFLINE events.
+                        * The error condition has been reported. Return
+                        * from here as if ignored.
+                        */
+                       return NOTIFY_DONE;
+               }
        }
        return NOTIFY_OK;
 }
@@ -1508,9 +1533,66 @@ static struct notifier_block prevent_bootmem_remove_nb = {
        .notifier_call = prevent_bootmem_remove_notifier,
 };
 
+/*
+ * This ensures that boot memory sections on the platform are online
+ * from early boot. Memory sections could not be prevented from being
+ * offlined, unless for some reason they are not online to begin with.
+ * This helps validate the basic assumption on which the above memory
+ * event notifier works to prevent boot memory section offlining and
+ * its possible removal.
+ */
+static void validate_bootmem_online(void)
+{
+       phys_addr_t start, end, addr;
+       struct mem_section *ms;
+       u64 i;
+
+       /*
+        * Scanning across all memblock might be expensive
+        * on some big memory systems. Hence enable this
+        * validation only with DEBUG_VM.
+        */
+       if (!IS_ENABLED(CONFIG_DEBUG_VM))
+               return;
+
+       for_each_mem_range(i, &start, &end) {
+               for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) {
+                       ms = __pfn_to_section(PHYS_PFN(addr));
+
+                       /*
+                        * All memory ranges in the system at this point
+                        * should have been marked as early sections.
+                        */
+                       WARN_ON(!early_section(ms));
+
+                       /*
+                        * Memory notifier mechanism here to prevent boot
+                        * memory offlining depends on the fact that each
+                        * early section memory on the system is initially
+                        * online. Otherwise a given memory section which
+                        * is already offline will be overlooked and can
+                        * be removed completely. Call out such sections.
+                        */
+                       if (!online_section(ms))
+                               pr_err("Boot memory [%llx %llx] is offline, can be removed\n",
+                                       addr, addr + (1UL << PA_SECTION_SHIFT));
+               }
+       }
+}
+
 static int __init prevent_bootmem_remove_init(void)
 {
-       return register_memory_notifier(&prevent_bootmem_remove_nb);
+       int ret = 0;
+
+       if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+               return ret;
+
+       validate_bootmem_online();
+       ret = register_memory_notifier(&prevent_bootmem_remove_nb);
+       if (ret)
+               pr_err("%s: Notifier registration failed %d\n", __func__, ret);
+
+       return ret;
 }
-device_initcall(prevent_bootmem_remove_init);
+early_initcall(prevent_bootmem_remove_init);
 #endif
index 9929ff50c0c095f465d7a66272c8b2e35e9fd26a..1787406684aafc968ca8ab8eb19e80d6058472a8 100644 (file)
@@ -1718,3 +1718,58 @@ void __init acpi_iort_init(void)
 
        iort_init_platform_devices();
 }
+
+#ifdef CONFIG_ZONE_DMA
+/*
+ * Extract the highest CPU physical address accessible to all DMA masters in
+ * the system. PHYS_ADDR_MAX is returned when no constrained device is found.
+ */
+phys_addr_t __init acpi_iort_dma_get_max_cpu_address(void)
+{
+       phys_addr_t limit = PHYS_ADDR_MAX;
+       struct acpi_iort_node *node, *end;
+       struct acpi_table_iort *iort;
+       acpi_status status;
+       int i;
+
+       if (acpi_disabled)
+               return limit;
+
+       status = acpi_get_table(ACPI_SIG_IORT, 0,
+                               (struct acpi_table_header **)&iort);
+       if (ACPI_FAILURE(status))
+               return limit;
+
+       node = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->node_offset);
+       end = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->header.length);
+
+       for (i = 0; i < iort->node_count; i++) {
+               if (node >= end)
+                       break;
+
+               switch (node->type) {
+                       struct acpi_iort_named_component *ncomp;
+                       struct acpi_iort_root_complex *rc;
+                       phys_addr_t local_limit;
+
+               case ACPI_IORT_NODE_NAMED_COMPONENT:
+                       ncomp = (struct acpi_iort_named_component *)node->node_data;
+                       local_limit = DMA_BIT_MASK(ncomp->memory_address_limit);
+                       limit = min_not_zero(limit, local_limit);
+                       break;
+
+               case ACPI_IORT_NODE_PCI_ROOT_COMPLEX:
+                       if (node->revision < 1)
+                               break;
+
+                       rc = (struct acpi_iort_root_complex *)node->node_data;
+                       local_limit = DMA_BIT_MASK(rc->memory_address_limit);
+                       limit = min_not_zero(limit, local_limit);
+                       break;
+               }
+               node = ACPI_ADD_PTR(struct acpi_iort_node, node, node->length);
+       }
+       acpi_put_table(&iort->header);
+       return limit;
+}
+#endif
index eb9ab4f1e80b00c124e916ded75dfa597897ce75..09c0af7fd1c4480d6bd9f17bb4051bd49b514d0e 100644 (file)
@@ -1024,6 +1024,48 @@ out:
 }
 #endif /* CONFIG_HAS_DMA */
 
+/**
+ * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA
+ * @np: The node to start searching from or NULL to start from the root
+ *
+ * Gets the highest CPU physical address that is addressable by all DMA masters
+ * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no
+ * DMA constrained device is found, it returns PHYS_ADDR_MAX.
+ */
+phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
+{
+       phys_addr_t max_cpu_addr = PHYS_ADDR_MAX;
+       struct of_range_parser parser;
+       phys_addr_t subtree_max_addr;
+       struct device_node *child;
+       struct of_range range;
+       const __be32 *ranges;
+       u64 cpu_end = 0;
+       int len;
+
+       if (!np)
+               np = of_root;
+
+       ranges = of_get_property(np, "dma-ranges", &len);
+       if (ranges && len) {
+               of_dma_range_parser_init(&parser, np);
+               for_each_of_range(&parser, &range)
+                       if (range.cpu_addr + range.size > cpu_end)
+                               cpu_end = range.cpu_addr + range.size - 1;
+
+               if (max_cpu_addr > cpu_end)
+                       max_cpu_addr = cpu_end;
+       }
+
+       for_each_available_child_of_node(np, child) {
+               subtree_max_addr = of_dma_get_max_cpu_address(child);
+               if (max_cpu_addr > subtree_max_addr)
+                       max_cpu_addr = subtree_max_addr;
+       }
+
+       return max_cpu_addr;
+}
+
 /**
  * of_dma_is_coherent - Check if device is coherent
  * @np:        device node
index 06cc988faf78b34dddf596faa3bbe5dac9622af2..eb51bc147440172ceb00ba4239e72f0c1bc358a8 100644 (file)
@@ -869,6 +869,26 @@ static void __init of_unittest_changeset(void)
 #endif
 }
 
+static void __init of_unittest_dma_get_max_cpu_address(void)
+{
+       struct device_node *np;
+       phys_addr_t cpu_addr;
+
+       if (!IS_ENABLED(CONFIG_OF_ADDRESS))
+               return;
+
+       np = of_find_node_by_path("/testcase-data/address-tests");
+       if (!np) {
+               pr_err("missing testcase data\n");
+               return;
+       }
+
+       cpu_addr = of_dma_get_max_cpu_address(np);
+       unittest(cpu_addr == 0x4fffffff,
+                "of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting %x)\n",
+                &cpu_addr, 0x4fffffff);
+}
+
 static void __init of_unittest_dma_ranges_one(const char *path,
                u64 expect_dma_addr, u64 expect_paddr)
 {
@@ -3266,6 +3286,7 @@ static int __init of_unittest(void)
        of_unittest_changeset();
        of_unittest_parse_interrupts();
        of_unittest_parse_interrupts_extended();
+       of_unittest_dma_get_max_cpu_address();
        of_unittest_parse_dma_ranges();
        of_unittest_pci_dma_ranges();
        of_unittest_match_node();
index 20a32120bb880962ea7caa303c937991248b71cc..1a12baa58e409b05ea715704cb8e27ff6b95c195 100644 (file)
@@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
 const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
                                                const u32 *id_in);
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
+phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
 #else
 static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_id(struct device *dev, u32 id)
@@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id(
 static inline
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
+
+static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void)
+{ return PHYS_ADDR_MAX; }
 #endif
 
 #endif /* __ACPI_IORT_H__ */
index fb3bf696c05e86e81e6ff2c3ceca50e299625faf..9d0c454d23cd626107aecfd184f1e446f4d763b1 100644 (file)
@@ -354,26 +354,6 @@ enum zone_type {
         * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
         * platforms may need both zones as they support peripherals with
         * different DMA addressing limitations.
-        *
-        * Some examples:
-        *
-        *  - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
-        *    rest of the lower 4G.
-        *
-        *  - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
-        *    the specific device.
-        *
-        *  - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
-        *    lower 4G.
-        *
-        *  - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
-        *    depending on the specific device.
-        *
-        *  - s390 uses ZONE_DMA fixed to the lower 2G.
-        *
-        *  - ia64 and riscv only use ZONE_DMA32.
-        *
-        *  - parisc uses neither.
         */
 #ifdef CONFIG_ZONE_DMA
        ZONE_DMA,
index 5d51891cbf1a68b38dbe65745555759b7dd653e0..9ed5b8532c30318786831c0fb1183f86d40e4073 100644 (file)
@@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out);
 
+phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+
 #else /* CONFIG_OF */
 
 static inline void of_core_init(void)
@@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id,
        return -EINVAL;
 }
 
+static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+{
+       return PHYS_ADDR_MAX;
+}
+
 #define of_match_ptr(_ptr)     NULL
 #define of_match_node(_matches, _node) NULL
 #endif /* CONFIG_OF */