Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 18 Mar 2016 03:03:47 +0000 (20:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 18 Mar 2016 03:03:47 +0000 (20:03 -0700)
Pull arm64 updates from Catalin Marinas:
 "Here are the main arm64 updates for 4.6.  There are some relatively
  intrusive changes to support KASLR, the reworking of the kernel
  virtual memory layout and initial page table creation.

  Summary:

   - Initial page table creation reworked to avoid breaking large block
     mappings (huge pages) into smaller ones.  The ARM architecture
     requires break-before-make in such cases to avoid TLB conflicts but
     that's not always possible on live page tables

   - Kernel virtual memory layout: the kernel image is no longer linked
     to the bottom of the linear mapping (PAGE_OFFSET) but at the bottom
     of the vmalloc space, allowing the kernel to be loaded (nearly)
     anywhere in physical RAM

   - Kernel ASLR: position independent kernel Image and modules being
     randomly mapped in the vmalloc space with the randomness is
     provided by UEFI (efi_get_random_bytes() patches merged via the
     arm64 tree, acked by Matt Fleming)

   - Implement relative exception tables for arm64, required by KASLR
     (initial code for ARCH_HAS_RELATIVE_EXTABLE added to lib/extable.c
     but actual x86 conversion to deferred to 4.7 because of the merge
     dependencies)

   - Support for the User Access Override feature of ARMv8.2: this
     allows uaccess functions (get_user etc.) to be implemented using
     LDTR/STTR instructions.  Such instructions, when run by the kernel,
     perform unprivileged accesses adding an extra level of protection.
     The set_fs() macro is used to "upgrade" such instruction to
     privileged accesses via the UAO bit

   - Half-precision floating point support (part of ARMv8.2)

   - Optimisations for CPUs with or without a hardware prefetcher (using
     run-time code patching)

   - copy_page performance improvement to deal with 128 bytes at a time

   - Sanity checks on the CPU capabilities (via CPUID) to prevent
     incompatible secondary CPUs from being brought up (e.g.  weird
     big.LITTLE configurations)

   - valid_user_regs() reworked for better sanity check of the
     sigcontext information (restored pstate information)

   - ACPI parking protocol implementation

   - CONFIG_DEBUG_RODATA enabled by default

   - VDSO code marked as read-only

   - DEBUG_PAGEALLOC support

   - ARCH_HAS_UBSAN_SANITIZE_ALL enabled

   - Erratum workaround Cavium ThunderX SoC

   - set_pte_at() fix for PROT_NONE mappings

   - Code clean-ups"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (99 commits)
  arm64: kasan: Fix zero shadow mapping overriding kernel image shadow
  arm64: kasan: Use actual memory node when populating the kernel image shadow
  arm64: Update PTE_RDONLY in set_pte_at() for PROT_NONE permission
  arm64: Fix misspellings in comments.
  arm64: efi: add missing frame pointer assignment
  arm64: make mrs_s prefixing implicit in read_cpuid
  arm64: enable CONFIG_DEBUG_RODATA by default
  arm64: Rework valid_user_regs
  arm64: mm: check at build time that PAGE_OFFSET divides the VA space evenly
  arm64: KVM: Move kvm_call_hyp back to its original localtion
  arm64: mm: treat memstart_addr as a signed quantity
  arm64: mm: list kernel sections in order
  arm64: lse: deal with clobbered IP registers after branch via PLT
  arm64: mm: dump: Use VA_START directly instead of private LOWEST_ADDR
  arm64: kconfig: add submenu for 8.2 architectural features
  arm64: kernel: acpi: fix ioremap in ACPI parking protocol cpu_postboot
  arm64: Add support for Half precision floating point
  arm64: Remove fixmap include fragility
  arm64: Add workaround for Cavium erratum 27456
  arm64: mm: Mark .rodata as RO
  ...

106 files changed:
Documentation/arm64/booting.txt
Documentation/arm64/silicon-errata.txt
Documentation/features/vm/huge-vmap/arch-support.txt
arch/arm/include/asm/kvm_asm.h
arch/arm/kvm/arm.c
arch/arm64/Kconfig
arch/arm64/Kconfig.debug
arch/arm64/Makefile
arch/arm64/boot/dts/nvidia/tegra132.dtsi
arch/arm64/boot/dts/nvidia/tegra210.dtsi
arch/arm64/include/asm/acpi.h
arch/arm64/include/asm/alternative.h
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/boot.h
arch/arm64/include/asm/brk-imm.h [new file with mode: 0644]
arch/arm64/include/asm/bug.h
arch/arm64/include/asm/cpu.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/debug-monitors.h
arch/arm64/include/asm/elf.h
arch/arm64/include/asm/fixmap.h
arch/arm64/include/asm/ftrace.h
arch/arm64/include/asm/futex.h
arch/arm64/include/asm/hardirq.h
arch/arm64/include/asm/kasan.h
arch/arm64/include/asm/kernel-pgtable.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/lse.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/module.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable-prot.h [new file with mode: 0644]
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/smp.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/uaccess.h
arch/arm64/include/asm/word-at-a-time.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/include/uapi/asm/ptrace.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/acpi_parking_protocol.c [new file with mode: 0644]
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpu_ops.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/efi-entry.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/head.S
arch/arm64/kernel/image.h
arch/arm64/kernel/kaslr.c [new file with mode: 0644]
arch/arm64/kernel/kgdb.c
arch/arm64/kernel/module-plts.c [new file with mode: 0644]
arch/arm64/kernel/module.c
arch/arm64/kernel/module.lds [new file with mode: 0644]
arch/arm64/kernel/process.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/signal.c
arch/arm64/kernel/signal32.c
arch/arm64/kernel/smp.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/vdso/vdso.S
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/hyp.S
arch/arm64/kvm/hyp/debug-sr.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/Makefile
arch/arm64/lib/clear_user.S
arch/arm64/lib/copy_from_user.S
arch/arm64/lib/copy_in_user.S
arch/arm64/lib/copy_page.S
arch/arm64/lib/copy_to_user.S
arch/arm64/lib/memcmp.S
arch/arm64/mm/context.c
arch/arm64/mm/dump.c
arch/arm64/mm/extable.c
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc.S
arch/x86/include/asm/efi.h
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/arm-stub.c
drivers/firmware/efi/libstub/arm64-stub.c
drivers/firmware/efi/libstub/efi-stub-helper.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/fdt.c
drivers/firmware/efi/libstub/random.c [new file with mode: 0644]
drivers/of/fdt.c
include/asm-generic/fixmap.h
include/linux/efi.h
lib/extable.c
scripts/sortextable.c

index 701d39d3171a74d8c2eb670c0b1be2931f326ee8..56d6d8b796db6dd3aadd252a85cc4b691f9e20f7 100644 (file)
@@ -109,7 +109,13 @@ Header notes:
                        1 - 4K
                        2 - 16K
                        3 - 64K
-  Bits 3-63:   Reserved.
+  Bit 3:       Kernel physical placement
+                       0 - 2MB aligned base should be as close as possible
+                           to the base of DRAM, since memory below it is not
+                           accessible via the linear mapping
+                       1 - 2MB aligned base may be anywhere in physical
+                           memory
+  Bits 4-63:   Reserved.
 
 - When image_size is zero, a bootloader should attempt to keep as much
   memory as possible free for use by the kernel immediately after the
@@ -117,14 +123,14 @@ Header notes:
   depending on selected features, and is effectively unbound.
 
 The Image must be placed text_offset bytes from a 2MB aligned base
-address near the start of usable system RAM and called there. Memory
-below that base address is currently unusable by Linux, and therefore it
-is strongly recommended that this location is the start of system RAM.
-The region between the 2 MB aligned base address and the start of the
-image has no special significance to the kernel, and may be used for
-other purposes.
+address anywhere in usable system RAM and called there. The region
+between the 2 MB aligned base address and the start of the image has no
+special significance to the kernel, and may be used for other purposes.
 At least image_size bytes from the start of the image must be free for
 use by the kernel.
+NOTE: versions prior to v4.6 cannot make use of memory below the
+physical offset of the Image so it is recommended that the Image be
+placed as close as possible to the start of system RAM.
 
 Any memory described to the kernel (even that below the start of the
 image) which is not marked as reserved from the kernel (e.g., with a
index 58b71ddf9b602a5e0e81852fbbbc54ef0c5b92cd..ba4b6acfc5457bc30d5adc519ed0869b4433dfcd 100644 (file)
@@ -56,3 +56,4 @@ stable kernels.
 |                |                 |                 |                         |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |
+| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456    |
index af6816bccb439d76a7f80d43f97f4a95e7d604c2..df1d1f3c9af290aa6ffaa1584f71ba6025e54c4e 100644 (file)
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |       avr32: | TODO |
     |    blackfin: | TODO |
     |         c6x: | TODO |
index 15d58b42d5a18aa6e56203dd2f15d6843afc97a0..3d5a5cd071bd15d648658473bc5a747e0ef1ff81 100644 (file)
@@ -48,6 +48,8 @@
 #define rr_lo_hi(a1, a2) a1, a2
 #endif
 
+#define kvm_ksym_ref(kva)      (kva)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
index 76552b51c7aea64fb9b8cc84436abad526991732..3e0fb66d8e053b5bbd7317d8bbcedf69950833dc 100644 (file)
@@ -1051,7 +1051,7 @@ static void cpu_init_hyp_mode(void *dummy)
        pgd_ptr = kvm_mmu_get_httbr();
        stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
        hyp_stack_ptr = stack_page + PAGE_SIZE;
-       vector_ptr = (unsigned long)__kvm_hyp_vector;
+       vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
 
        __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
        __cpu_init_stage2();
@@ -1220,13 +1220,15 @@ static int init_hyp_mode(void)
        /*
         * Map the Hyp-code called directly from the host
         */
-       err = create_hyp_mappings(__hyp_text_start, __hyp_text_end);
+       err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
+                                 kvm_ksym_ref(__hyp_text_end));
        if (err) {
                kvm_err("Cannot map world-switch code\n");
                goto out_err;
        }
 
-       err = create_hyp_mappings(__start_rodata, __end_rodata);
+       err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
+                                 kvm_ksym_ref(__end_rodata));
        if (err) {
                kvm_err("Cannot map rodata section\n");
                goto out_err;
index b3f2522d266d802221bf22d2f5c56349631cab0c..4f436220384f847bbdee53cd7a8bce5fa089da61 100644 (file)
@@ -14,6 +14,7 @@ config ARM64
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
        select ARCH_WANT_FRAME_POINTERS
+       select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARM_AMBA
        select ARM_ARCH_TIMER
        select ARM_GIC
@@ -49,6 +50,7 @@ config ARM64
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_BITREVERSE
+       select HAVE_ARCH_HUGE_VMAP
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
        select HAVE_ARCH_KGDB
@@ -391,6 +393,7 @@ config ARM64_ERRATUM_843419
        bool "Cortex-A53: 843419: A load or store might access an incorrect address"
        depends on MODULES
        default y
+       select ARM64_MODULE_CMODEL_LARGE
        help
          This option builds kernel modules using the large memory model in
          order to avoid the use of the ADRP instruction, which can cause
@@ -430,6 +433,17 @@ config CAVIUM_ERRATUM_23154
 
          If unsure, say Y.
 
+config CAVIUM_ERRATUM_27456
+       bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption"
+       default y
+       help
+         On ThunderX T88 pass 1.x through 2.1 parts, broadcast TLBI
+         instructions may cause the icache to become corrupted if it
+         contains data for a non-current ASID.  The fix is to
+         invalidate the icache when changing the mm context.
+
+         If unsure, say Y.
+
 endmenu
 
 
@@ -535,6 +549,9 @@ config HOTPLUG_CPU
 source kernel/Kconfig.preempt
 source kernel/Kconfig.hz
 
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+       def_bool y
+
 config ARCH_HAS_HOLES_MEMORYMODEL
        def_bool y if SPARSEMEM
 
@@ -763,10 +780,97 @@ config ARM64_VHE
 
 endmenu
 
+menu "ARMv8.2 architectural features"
+
+config ARM64_UAO
+       bool "Enable support for User Access Override (UAO)"
+       default y
+       help
+         User Access Override (UAO; part of the ARMv8.2 Extensions)
+         causes the 'unprivileged' variant of the load/store instructions to
+         be overriden to be privileged.
+
+         This option changes get_user() and friends to use the 'unprivileged'
+         variant of the load/store instructions. This ensures that user-space
+         really did have access to the supplied memory. When addr_limit is
+         set to kernel memory the UAO bit will be set, allowing privileged
+         access to kernel memory.
+
+         Choosing this option will cause copy_to_user() et al to use user-space
+         memory permissions.
+
+         The feature is detected at runtime, the kernel will use the
+         regular load/store instructions if the cpu does not implement the
+         feature.
+
+endmenu
+
+config ARM64_MODULE_CMODEL_LARGE
+       bool
+
+config ARM64_MODULE_PLTS
+       bool
+       select ARM64_MODULE_CMODEL_LARGE
+       select HAVE_MOD_ARCH_SPECIFIC
+
+config RELOCATABLE
+       bool
+       help
+         This builds the kernel as a Position Independent Executable (PIE),
+         which retains all relocation metadata required to relocate the
+         kernel binary at runtime to a different virtual address than the
+         address it was linked at.
+         Since AArch64 uses the RELA relocation format, this requires a
+         relocation pass at runtime even if the kernel is loaded at the
+         same address it was linked at.
+
+config RANDOMIZE_BASE
+       bool "Randomize the address of the kernel image"
+       select ARM64_MODULE_PLTS
+       select RELOCATABLE
+       help
+         Randomizes the virtual address at which the kernel image is
+         loaded, as a security feature that deters exploit attempts
+         relying on knowledge of the location of kernel internals.
+
+         It is the bootloader's job to provide entropy, by passing a
+         random u64 value in /chosen/kaslr-seed at kernel entry.
+
+         When booting via the UEFI stub, it will invoke the firmware's
+         EFI_RNG_PROTOCOL implementation (if available) to supply entropy
+         to the kernel proper. In addition, it will randomise the physical
+         location of the kernel Image as well.
+
+         If unsure, say N.
+
+config RANDOMIZE_MODULE_REGION_FULL
+       bool "Randomize the module region independently from the core kernel"
+       depends on RANDOMIZE_BASE
+       default y
+       help
+         Randomizes the location of the module region without considering the
+         location of the core kernel. This way, it is impossible for modules
+         to leak information about the location of core kernel data structures
+         but it does imply that function calls between modules and the core
+         kernel will need to be resolved via veneers in the module PLT.
+
+         When this option is not set, the module region will be randomized over
+         a limited range that contains the [_stext, _etext] interval of the
+         core kernel, so branch relocations are always in range.
+
 endmenu
 
 menu "Boot options"
 
+config ARM64_ACPI_PARKING_PROTOCOL
+       bool "Enable support for the ARM64 ACPI parking protocol"
+       depends on ACPI
+       help
+         Enable support for the ARM64 ACPI parking protocol. If disabled
+         the kernel will not allow booting through the ARM64 ACPI parking
+         protocol even if the corresponding data is present in the ACPI
+         MADT table.
+
 config CMDLINE
        string "Default kernel command string"
        default ""
index e13c4bf84d9e13b0047175600210d11c844afcec..7e76845a04349f928fac6b7e809aa5f66023fcc7 100644 (file)
@@ -50,13 +50,13 @@ config DEBUG_SET_MODULE_RONX
 
 config DEBUG_RODATA
        bool "Make kernel text and rodata read-only"
+       default y
        help
          If this is set, kernel text and rodata will be made read-only. This
          is to help catch accidental or malicious attempts to change the
-         kernel's executable code. Additionally splits rodata from kernel
-         text so it can be made explicitly non-executable.
+         kernel's executable code.
 
-          If in doubt, say Y
+         If in doubt, say Y
 
 config DEBUG_ALIGN_RODATA
        depends on DEBUG_RODATA && ARM64_4K_PAGES
index b5e3f6d42b88bef5f17d3c21a80f9e97a013b676..354d75402aceaa771adc63c04ecd51538edcbb0b 100644 (file)
@@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 OBJCOPYFLAGS   :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS                :=-9
 
+ifneq ($(CONFIG_RELOCATABLE),)
+LDFLAGS_vmlinux                += -pie
+endif
+
 KBUILD_DEFCONFIG := defconfig
 
 # Check for binutils support for specific extensions
@@ -43,10 +47,14 @@ endif
 
 CHECKFLAGS     += -D__aarch64__
 
-ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
 KBUILD_CFLAGS_MODULE   += -mcmodel=large
 endif
 
+ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
+KBUILD_LDFLAGS_MODULE  += -T $(srctree)/arch/arm64/kernel/module.lds
+endif
+
 # Default value
 head-y         := arch/arm64/kernel/head.o
 
index e8bb46027bed690385f51905a1cab74cea40274d..6e28e41d7e3ebbf9fd0055ccfe2add6fbfa1e8cb 100644 (file)
        /*
         * There are two serial driver i.e. 8250 based simple serial
         * driver and APB DMA based serial driver for higher baudrate
-        * and performace. To enable the 8250 based driver, the compatible
+        * and performance. To enable the 8250 based driver, the compatible
         * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
         * the APB DMA based serial driver, the comptible is
         * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
index bc23f4dea002b51f089b6c42756af16f9c591cca..23b0630602cf0630893cfbe5f1e631ed6135e4d6 100644 (file)
        /*
         * There are two serial driver i.e. 8250 based simple serial
         * driver and APB DMA based serial driver for higher baudrate
-        * and performace. To enable the 8250 based driver, the compatible
+        * and performance. To enable the 8250 based driver, the compatible
         * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
         * the APB DMA based serial driver, the comptible is
         * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
index caafd63b8092d8102401112d811b1055f4cc3524..aee323b13802ad143e9774d2076a4b1c63731ece 100644 (file)
@@ -87,9 +87,26 @@ void __init acpi_init_cpus(void);
 static inline void acpi_init_cpus(void) { }
 #endif /* CONFIG_ACPI */
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+bool acpi_parking_protocol_valid(int cpu);
+void __init
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor);
+#else
+static inline bool acpi_parking_protocol_valid(int cpu) { return false; }
+static inline void
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
+{}
+#endif
+
 static inline const char *acpi_get_enable_method(int cpu)
 {
-       return acpi_psci_present() ? "psci" : NULL;
+       if (acpi_psci_present())
+               return "psci";
+
+       if (acpi_parking_protocol_valid(cpu))
+               return "parking-protocol";
+
+       return NULL;
 }
 
 #ifdef CONFIG_ACPI_APEI
index e4962f04201e295d933d33f7f39745a5f5e65f65..beccbdefa106a4ba0dd7d98537427a37d2c26517 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
+#include <asm/cpufeature.h>
+
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
@@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length);
 
 #else
 
+#include <asm/assembler.h>
+
 .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
        .word \orig_offset - .
        .word \alt_offset - .
@@ -136,6 +140,65 @@ void apply_alternatives(void *start, size_t length);
        alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
 
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+       .macro uao_ldp l, reg1, reg2, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  ldp     \reg1, \reg2, [\addr], \post_inc;
+8889:                  nop;
+                       nop;
+               alternative_else
+                       ldtr    \reg1, [\addr];
+                       ldtr    \reg2, [\addr, #8];
+                       add     \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro uao_stp l, reg1, reg2, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  stp     \reg1, \reg2, [\addr], \post_inc;
+8889:                  nop;
+                       nop;
+               alternative_else
+                       sttr    \reg1, [\addr];
+                       sttr    \reg2, [\addr, #8];
+                       add     \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  \inst   \reg, [\addr], \post_inc;
+                       nop;
+               alternative_else
+                       \alt_inst       \reg, [\addr];
+                       add             \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+       .endm
+#else
+       .macro uao_ldp l, reg1, reg2, addr, post_inc
+               USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+       .endm
+       .macro uao_stp l, reg1, reg2, addr, post_inc
+               USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+       .endm
+       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+               USER(\l, \inst \reg, [\addr], \post_inc)
+       .endm
+#endif
+
 #endif  /*  __ASSEMBLY__  */
 
 /*
index bb7b72734c24ae5f5572d2b01c70900abe99abcf..70f7b9e04598bdf953b56d40794f1ab5aaf966ca 100644 (file)
        dmb     \opt
        .endm
 
+/*
+ * Emit an entry into the exception table
+ */
+       .macro          _asm_extable, from, to
+       .pushsection    __ex_table, "a"
+       .align          3
+       .long           (\from - .), (\to - .)
+       .popsection
+       .endm
+
 #define USER(l, x...)                          \
 9999:  x;                                      \
-       .section __ex_table,"a";                \
-       .align  3;                              \
-       .quad   9999b,l;                        \
-       .previous
+       _asm_extable    9999b, l
 
 /*
  * Register aliases.
@@ -215,4 +222,15 @@ lr .req    x30             // link register
        .size   __pi_##x, . - x;        \
        ENDPROC(x)
 
+       /*
+        * Emit a 64-bit absolute little endian symbol reference in a way that
+        * ensures that it will be resolved at build time, even when building a
+        * PIE binary. This requires cooperation from the linker script, which
+        * must emit the lo32/hi32 halves individually.
+        */
+       .macro  le64sym, sym
+       .long   \sym\()_lo32
+       .long   \sym\()_hi32
+       .endm
+
 #endif /* __ASM_ASSEMBLER_H */
index 197e06afbf71947eb505a893e53b73be73543be9..39c1d340fec59136b8ddd6e3ac3f39354289189f 100644 (file)
@@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v)
        "       stclr   %w[i], %[v]\n")
        : [i] "+r" (w0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_or(int i, atomic_t *v)
@@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v)
        "       stset   %w[i], %[v]\n")
        : [i] "+r" (w0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_xor(int i, atomic_t *v)
@@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v)
        "       steor   %w[i], %[v]\n")
        : [i] "+r" (w0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_add(int i, atomic_t *v)
@@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v)
        "       stadd   %w[i], %[v]\n")
        : [i] "+r" (w0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)                          \
@@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v)         \
        "       add     %w[i], %w[i], w30")                             \
        : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
        : "r" (x1)                                                      \
-       : "x30" , ##cl);                                                \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                        \
        return w0;                                                      \
 }
@@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v)
        "       stclr   %w[i], %[v]")
        : [i] "+r" (w0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_sub(int i, atomic_t *v)
@@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v)
        "       stadd   %w[i], %[v]")
        : [i] "+r" (w0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC_OP_SUB_RETURN(name, mb, cl...)                          \
@@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v)               \
        "       add     %w[i], %w[i], w30")                             \
        : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
        : "r" (x1)                                                      \
-       : "x30" , ##cl);                                                \
+       : __LL_SC_CLOBBERS , ##cl);                                     \
                                                                        \
        return w0;                                                      \
 }
@@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
        "       stclr   %[i], %[v]\n")
        : [i] "+r" (x0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_or(long i, atomic64_t *v)
@@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v)
        "       stset   %[i], %[v]\n")
        : [i] "+r" (x0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v)
        "       steor   %[i], %[v]\n")
        : [i] "+r" (x0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_add(long i, atomic64_t *v)
@@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v)
        "       stadd   %[i], %[v]\n")
        : [i] "+r" (x0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)                                \
@@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \
        "       add     %[i], %[i], x30")                               \
        : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
        : "r" (x1)                                                      \
-       : "x30" , ##cl);                                                \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                        \
        return x0;                                                      \
 }
@@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
        "       stclr   %[i], %[v]")
        : [i] "+r" (x0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_sub(long i, atomic64_t *v)
@@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
        "       stadd   %[i], %[v]")
        : [i] "+r" (x0), [v] "+Q" (v->counter)
        : "r" (x1)
-       : "x30");
+       : __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)                                \
@@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
        "       add     %[i], %[i], x30")                               \
        : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
        : "r" (x1)                                                      \
-       : "x30" , ##cl);                                                \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                        \
        return x0;                                                      \
 }
@@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
        "2:")
        : [ret] "+&r" (x0), [v] "+Q" (v->counter)
        :
-       : "x30", "cc", "memory");
+       : __LL_SC_CLOBBERS, "cc", "memory");
 
        return x0;
 }
@@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,       \
        "       mov     %" #w "[ret], " #w "30")                        \
        : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)             \
        : [old] "r" (x1), [new] "r" (x2)                                \
-       : "x30" , ##cl);                                                \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                        \
        return x0;                                                      \
 }
@@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1,               \
          [v] "+Q" (*(unsigned long *)ptr)                              \
        : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),             \
          [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)              \
-       : "x30" , ##cl);                                                \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
                                                                        \
        return x0;                                                      \
 }
index 81151b67b26bf61fd756540f5643d8b5ab6882c7..ebf2481889c34848be0b34158647a4f60b770090 100644 (file)
 #define MIN_FDT_ALIGN          8
 #define MAX_FDT_SIZE           SZ_2M
 
+/*
+ * arm64 requires the kernel image to placed
+ * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ */
+#define MIN_KIMG_ALIGN         SZ_2M
+
 #endif
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
new file mode 100644 (file)
index 0000000..ed693c5
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BRK_IMM_H
+#define __ASM_BRK_IMM_H
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgdb are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ * 0x800: kernel-mode BUG() and WARN() traps
+ */
+#define FAULT_BRK_IMM                  0x100
+#define KGDB_DYN_DBG_BRK_IMM           0x400
+#define KGDB_COMPILED_DBG_BRK_IMM      0x401
+#define BUG_BRK_IMM                    0x800
+
+#endif
index 4a748ce9ba1a71241c8f13834e488cef9753dc38..561190d1588136b1f7bc207ebfd04251694ea552 100644 (file)
@@ -18,7 +18,7 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#include <asm/debug-monitors.h>
+#include <asm/brk-imm.h>
 
 #ifdef CONFIG_GENERIC_BUG
 #define HAVE_ARCH_BUG
index b5e9cee4b5f81a3498a67b934dc9157e2d4cf45b..13a6103130cd7036b889250889264847b6871249 100644 (file)
@@ -36,6 +36,7 @@ struct cpuinfo_arm64 {
        u64             reg_id_aa64isar1;
        u64             reg_id_aa64mmfr0;
        u64             reg_id_aa64mmfr1;
+       u64             reg_id_aa64mmfr2;
        u64             reg_id_aa64pfr0;
        u64             reg_id_aa64pfr1;
 
index a5c769b1c65b9dbd0a34f381ee603f447421ba5e..b9b649422fca22f9f333ee4bc11b0addd24dc59c 100644 (file)
 #define ARM64_HAS_LSE_ATOMICS                  5
 #define ARM64_WORKAROUND_CAVIUM_23154          6
 #define ARM64_WORKAROUND_834220                        7
-/* #define ARM64_HAS_NO_HW_PREFETCH            8 */
-/* #define ARM64_HAS_UAO                       9 */
-/* #define ARM64_ALT_PAN_NOT_UAO               10 */
+#define ARM64_HAS_NO_HW_PREFETCH               8
+#define ARM64_HAS_UAO                          9
+#define ARM64_ALT_PAN_NOT_UAO                  10
 #define ARM64_HAS_VIRT_HOST_EXTN               11
+#define ARM64_WORKAROUND_CAVIUM_27456          12
 
-#define ARM64_NCAPS                            12
+#define ARM64_NCAPS                            13
 
 #ifndef __ASSEMBLY__
 
@@ -89,9 +90,10 @@ struct arm64_cpu_capabilities {
 
                struct {        /* Feature register checking */
                        u32 sys_reg;
-                       int field_pos;
-                       int min_field_value;
-                       int hwcap_type;
+                       u8 field_pos;
+                       u8 min_field_value;
+                       u8 hwcap_type;
+                       bool sign;
                        unsigned long hwcap;
                };
        };
@@ -121,15 +123,15 @@ static inline void cpus_set_cap(unsigned int num)
 }
 
 static inline int __attribute_const__
-cpuid_feature_extract_field_width(u64 features, int field, int width)
+cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
 {
        return (s64)(features << (64 - width - field)) >> (64 - width);
 }
 
 static inline int __attribute_const__
-cpuid_feature_extract_field(u64 features, int field)
+cpuid_feature_extract_signed_field(u64 features, int field)
 {
-       return cpuid_feature_extract_field_width(features, field, 4);
+       return cpuid_feature_extract_signed_field_width(features, field, 4);
 }
 
 static inline unsigned int __attribute_const__
@@ -149,17 +151,23 @@ static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
        return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
 }
 
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field, bool sign)
+{
+       return (sign) ?
+               cpuid_feature_extract_signed_field(features, field) :
+               cpuid_feature_extract_unsigned_field(features, field);
+}
+
 static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
 {
-       return ftrp->sign ?
-               cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) :
-               cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width);
+       return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign);
 }
 
 static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 {
-       return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
-               cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+       return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+               cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
 }
 
 void __init setup_cpu_features(void);
@@ -168,13 +176,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
                            const char *info);
 void check_local_cpu_errata(void);
 
-#ifdef CONFIG_HOTPLUG_CPU
 void verify_local_cpu_capabilities(void);
-#else
-static inline void verify_local_cpu_capabilities(void)
-{
-}
-#endif
 
 u64 read_system_reg(u32 id);
 
index 1a5949364ed0f43eee2be4b61c3497fe4fdbbb7b..f2309a25d14c8b40741a03a2da8926887a67016f 100644 (file)
 #define MPIDR_AFFINITY_LEVEL(mpidr, level) \
        ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
 
-#define read_cpuid(reg) ({                                             \
-       u64 __val;                                                      \
-       asm("mrs        %0, " #reg : "=r" (__val));                     \
-       __val;                                                          \
-})
-
 #define MIDR_REVISION_MASK     0xf
 #define MIDR_REVISION(midr)    ((midr) & MIDR_REVISION_MASK)
 #define MIDR_PARTNUM_SHIFT     4
 #define MIDR_IMPLEMENTOR(midr) \
        (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
 
-#define MIDR_CPU_PART(imp, partnum) \
+#define MIDR_CPU_MODEL(imp, partnum) \
        (((imp)                 << MIDR_IMPLEMENTOR_SHIFT) | \
        (0xf                    << MIDR_ARCHITECTURE_SHIFT) | \
        ((partnum)              << MIDR_PARTNUM_SHIFT))
 
+#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+                            MIDR_ARCHITECTURE_MASK)
+
+#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max)           \
+({                                                                     \
+       u32 _model = (midr) & MIDR_CPU_MODEL_MASK;                      \
+       u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK);     \
+                                                                       \
+       _model == (model) && rv >= (rv_min) && rv <= (rv_max);          \
+ })
+
 #define ARM_CPU_IMP_ARM                        0x41
 #define ARM_CPU_IMP_APM                        0x50
 #define ARM_CPU_IMP_CAVIUM             0x43
 
 #define CAVIUM_CPU_PART_THUNDERX       0x0A1
 
+#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_THUNDERX  MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+
 #ifndef __ASSEMBLY__
 
+#include <asm/sysreg.h>
+
+#define read_cpuid(reg) ({                                             \
+       u64 __val;                                                      \
+       asm("mrs_s      %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \
+       __val;                                                          \
+})
+
 /*
  * The CPU ID never changes at run time, so we might as well tell the
  * compiler that it's constant.  Use this function to read the CPU ID
index 279c85b5ec091eafaa37fbb10d4ff59e2092a605..2fcb9b7c876c06d67c220f1eefcd0b61ef715a1c 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <asm/brk-imm.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/ptrace.h>
  */
 #define BREAK_INSTR_SIZE               AARCH64_INSN_SIZE
 
-/*
- * #imm16 values used for BRK instruction generation
- * Allowed values for kgbd are 0x400 - 0x7ff
- * 0x100: for triggering a fault on purpose (reserved)
- * 0x400: for dynamic BRK instruction
- * 0x401: for compile time BRK instruction
- * 0x800: kernel-mode BUG() and WARN() traps
- */
-#define FAULT_BRK_IMM                  0x100
-#define KGDB_DYN_DBG_BRK_IMM           0x400
-#define KGDB_COMPILED_DBG_BRK_IMM      0x401
-#define BUG_BRK_IMM                    0x800
-
 /*
  * BRK instruction encoding
  * The #imm16 value should be placed at bits[20:5] within BRK ins
index faad6df49e5b00a49f2b82f06dc48b735662e55c..24ed037f09fd32385b9ccfb0e8d380b5c0d5c7ed 100644 (file)
 #include <asm/ptrace.h>
 #include <asm/user.h>
 
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs) \
-       *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef struct user_fpsimd_state elf_fpregset_t;
-
 /*
  * AArch64 static relocation types.
  */
@@ -86,6 +77,8 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 #define R_AARCH64_MOVW_PREL_G2_NC      292
 #define R_AARCH64_MOVW_PREL_G3         293
 
+#define R_AARCH64_RELATIVE             1027
+
 /*
  * These are used to set parameters in the core dumps.
  */
@@ -127,6 +120,17 @@ typedef struct user_fpsimd_state elf_fpregset_t;
  */
 #define ELF_ET_DYN_BASE        (2 * TASK_SIZE_64 / 3)
 
+#ifndef __ASSEMBLY__
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs) \
+       *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
+
 /*
  * When the program starts, a1 contains a pointer to a function to be
  * registered with atexit, as per the SVR4 ABI.  A value of 0 means we have no
@@ -186,4 +190,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
 
 #endif /* CONFIG_COMPAT */
 
+#endif /* !__ASSEMBLY__ */
+
 #endif
index 309704544d22763d6348095814fbdce935172c1b..caf86be815ba2cfa26d6cbcab8ee1cd7f6bdf158 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/sizes.h>
 #include <asm/boot.h>
 #include <asm/page.h>
+#include <asm/pgtable-prot.h>
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -62,6 +63,16 @@ enum fixed_addresses {
 
        FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
        FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
+       /*
+        * Used for kernel page table creation, so unmapped memory may be used
+        * for tables.
+        */
+       FIX_PTE,
+       FIX_PMD,
+       FIX_PUD,
+       FIX_PGD,
+
        __end_of_fixed_addresses
 };
 
index 3c60f37e48ab51998db2c5870fe2df4427949b37..caa955f10e19509adf568e4a44bc357f2340b562 100644 (file)
@@ -48,7 +48,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
  * See kernel/trace/trace_syscalls.c
  *
  * x86 code says:
- * If the user realy wants these, then they should use the
+ * If the user really wants these, then they should use the
  * raw syscall tracepoints with filtering.
  */
 #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
index 5f3ab8c1db55cca8dbf4c9e1fc315e90335b6d60..f2585cdd32c29832566718e99d7b5fd9c61d2322 100644 (file)
 "4:    mov     %w0, %w5\n"                                             \
 "      b       3b\n"                                                   \
 "      .popsection\n"                                                  \
-"      .pushsection __ex_table,\"a\"\n"                                \
-"      .align  3\n"                                                    \
-"      .quad   1b, 4b, 2b, 4b\n"                                       \
-"      .popsection\n"                                                  \
+       _ASM_EXTABLE(1b, 4b)                                            \
+       _ASM_EXTABLE(2b, 4b)                                            \
        ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,            \
                    CONFIG_ARM64_PAN)                                   \
        : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)       \
@@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "4:    mov     %w0, %w6\n"
 "      b       3b\n"
 "      .popsection\n"
-"      .pushsection __ex_table,\"a\"\n"
-"      .align  3\n"
-"      .quad   1b, 4b, 2b, 4b\n"
-"      .popsection\n"
+       _ASM_EXTABLE(1b, 4b)
+       _ASM_EXTABLE(2b, 4b)
 ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
        : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
        : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
index a57601f9d17cdffb1122e2864ae5353273eb59ee..8740297dac775dac5bac2bb9260fca62df7d0fb9 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI 5
+#define NR_IPI 6
 
 typedef struct {
        unsigned int __softirq_pending;
index 2774fa384c47f27b4e936644cef0e980f7fbcfe7..71ad0f93eb7153226a43d78e909d3e7c3690bf92 100644 (file)
@@ -7,13 +7,14 @@
 
 #include <linux/linkage.h>
 #include <asm/memory.h>
+#include <asm/pgtable-types.h>
 
 /*
  * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
  * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
  */
 #define KASAN_SHADOW_START      (VA_START)
-#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
 
 /*
  * This value is used to map an address to the corresponding shadow
 #define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
 
 void kasan_init(void);
+void kasan_copy_shadow(pgd_t *pgdir);
 asmlinkage void kasan_early_init(void);
 
 #else
 static inline void kasan_init(void) { }
+static inline void kasan_copy_shadow(pgd_t *pgdir) { }
 #endif
 
 #endif
index a459714ee29e38fbf81f2061026933736ed1cbfc..5c6375d8528bb8ddd313bfa2911f7a0d77819028 100644 (file)
 #define SWAPPER_MM_MMUFLAGS    (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
 #endif
 
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define ARM64_MEMSTART_ALIGN   SZ_512M
+#else
+#define ARM64_MEMSTART_ALIGN   SZ_1G
+#endif
 
 #endif /* __ASM_KERNEL_PGTABLE_H */
index b56a0a81e4cbda9a9245f0d518ceb90035d9c9f2..0e391dbfc42023aad2dabf26b5e9d7b49fa8aec2 100644 (file)
@@ -62,7 +62,7 @@
 
 /*
  * The bits we set in HCR:
- * RW:         64bit by default, can be overriden for 32bit VMs
+ * RW:         64bit by default, can be overridden for 32bit VMs
  * TAC:                Trap ACTLR
  * TSC:                Trap SMC
  * TVM:                Trap VM ops (until M+C set in SCTLR_EL1)
index 2d02ba67478c45d205ff397aa1b04dc01f61d44b..226f49d69ea98ed0a5dac3eb9448be4f460eb548 100644 (file)
@@ -26,6 +26,8 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT    0
 #define KVM_ARM64_DEBUG_DIRTY          (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+#define kvm_ksym_ref(sym)              phys_to_virt((u64)&sym - kimage_voffset)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
index 71fa6fe9d54addc45099c3f8fbbf35c1c9e38b71..227ed475dbd3c328abbfab391fa4d62bf953f034 100644 (file)
@@ -102,8 +102,8 @@ enum vcpu_sysreg {
        TTBR1_EL1,      /* Translation Table Base Register 1 */
        TCR_EL1,        /* Translation Control Register */
        ESR_EL1,        /* Exception Syndrome Register */
-       AFSR0_EL1,      /* Auxilary Fault Status Register 0 */
-       AFSR1_EL1,      /* Auxilary Fault Status Register 1 */
+       AFSR0_EL1,      /* Auxiliary Fault Status Register 0 */
+       AFSR1_EL1,      /* Auxiliary Fault Status Register 1 */
        FAR_EL1,        /* Fault Address Register */
        MAIR_EL1,       /* Memory Attribute Indirection Register */
        VBAR_EL1,       /* Vector Base Address Register */
@@ -326,7 +326,9 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
-u64 kvm_call_hyp(void *hypfn, ...);
+u64 __kvm_call_hyp(void *hypfn, ...);
+#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
+
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
@@ -347,8 +349,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
         * Call initialization code, and switch to the full blown
         * HYP code.
         */
-       kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
-                    hyp_stack_ptr, vector_ptr);
+       __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+                      hyp_stack_ptr, vector_ptr);
 }
 
 static inline void kvm_arch_hardware_disable(void) {}
index 9a9318adefa6b47581f4653a27e1faf9025a5d4d..22732a5e31197b4f653d58fb456e7ffb72b6e631 100644 (file)
@@ -317,7 +317,7 @@ static inline unsigned int kvm_get_vmid_bits(void)
 {
        int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
 
-       return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+       return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
 #endif /* __ASSEMBLY__ */
index 3de42d68611df91ba6d46e32c197f700bb52bf52..23acc00be32d019a9f0f71b75153b5b32996b083 100644 (file)
@@ -26,6 +26,7 @@ __asm__(".arch_extension      lse");
 
 /* Macro for constructing calls to out-of-line ll/sc atomics */
 #define __LL_SC_CALL(op)       "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+#define __LL_SC_CLOBBERS       "x16", "x17", "x30"
 
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)                               \
index 853953cd1f0813fd562b68b7cdddd95582f1e392..12f8a00fb3f1767a645a04358dcaca08fd4f6b43 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
+#include <asm/bug.h>
 #include <asm/sizes.h>
 
 /*
  * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS                        (CONFIG_ARM64_VA_BITS)
 #define VA_START               (UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET            (UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define MODULES_END            (PAGE_OFFSET)
-#define MODULES_VADDR          (MODULES_END - SZ_64M)
-#define PCI_IO_END             (MODULES_VADDR - SZ_2M)
+#define KIMAGE_VADDR           (MODULES_END)
+#define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR          (VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VSIZE          (SZ_128M)
+#define PCI_IO_END             (PAGE_OFFSET - SZ_2M)
 #define PCI_IO_START           (PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP            (PCI_IO_START - SZ_2M)
 #define TASK_SIZE_64           (UL(1) << VA_BITS)
 
 #define TASK_UNMAPPED_BASE     (PAGE_ALIGN(TASK_SIZE / 4))
 
+/*
+ * The size of the KASAN shadow region. This should be 1/8th of the
+ * size of the entire kernel virtual address space.
+ */
+#ifdef CONFIG_KASAN
+#define KASAN_SHADOW_SIZE      (UL(1) << (VA_BITS - 3))
+#else
+#define KASAN_SHADOW_SIZE      (0)
+#endif
+
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
  */
-#define __virt_to_phys(x)      (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
-#define __phys_to_virt(x)      ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __virt_to_phys(x) ({                                           \
+       phys_addr_t __x = (phys_addr_t)(x);                             \
+       __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :   \
+                                (__x - kimage_voffset); })
+
+#define __phys_to_virt(x)      ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
+#define __phys_to_kimg(x)      ((unsigned long)((x) + kimage_voffset))
 
 /*
  * Convert a page to/from a physical address
 #define MT_S2_NORMAL           0xf
 #define MT_S2_DEVICE_nGnRE     0x1
 
+#ifdef CONFIG_ARM64_4K_PAGES
+#define IOREMAP_MAX_ORDER      (PUD_SHIFT)
+#else
+#define IOREMAP_MAX_ORDER      (PMD_SHIFT)
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#define __early_init_dt_declare_initrd(__start, __end)                 \
+       do {                                                            \
+               initrd_start = (__start);                               \
+               initrd_end = (__end);                                   \
+       } while (0)
+#endif
+
 #ifndef __ASSEMBLY__
 
-extern phys_addr_t             memstart_addr;
+#include <linux/bitops.h>
+#include <linux/mmdebug.h>
+
+extern s64                     memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET            ({ memstart_addr; })
+#define PHYS_OFFSET            ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
+
+/* the virtual base of the kernel image (minus TEXT_OFFSET) */
+extern u64                     kimage_vaddr;
+
+/* the offset between the kernel virtual and physical mappings */
+extern u64                     kimage_voffset;
 
 /*
- * The maximum physical address that the linear direct mapping
- * of system RAM can cover. (PAGE_OFFSET can be interpreted as
- * a 2's complement signed quantity and negated to derive the
- * maximum size of the linear mapping.)
+ * Allow all memory at the discovery stage. We will clip it later.
  */
-#define MAX_MEMBLOCK_ADDR      ({ memstart_addr - PAGE_OFFSET - 1; })
+#define MIN_MEMBLOCK_ADDR      0
+#define MAX_MEMBLOCK_ADDR      U64_MAX
 
 /*
  * PFNs are used to describe any physical page; this means
index 24165784b8038b732ea568d1e74fd8c0a699b914..b1892a0dbcb0f8d7b52aeccd11f89418421712e8 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm-generic/mm_hooks.h>
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
@@ -48,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
  */
 static inline void cpu_set_reserved_ttbr0(void)
 {
-       unsigned long ttbr = page_to_phys(empty_zero_page);
+       unsigned long ttbr = virt_to_phys(empty_zero_page);
 
        asm(
        "       msr     ttbr0_el1, %0                   // set TTBR0\n"
@@ -73,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void)
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
-static inline void cpu_set_default_tcr_t0sz(void)
+static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
 {
        unsigned long tcr;
 
@@ -86,7 +87,62 @@ static inline void cpu_set_default_tcr_t0sz(void)
        "       msr     tcr_el1, %0     ;"
        "       isb"
        : "=&r" (tcr)
-       : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+       : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+#define cpu_set_default_tcr_t0sz()     __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
+#define cpu_set_idmap_tcr_t0sz()       __cpu_set_tcr_t0sz(idmap_t0sz)
+
+/*
+ * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
+ *
+ * The idmap lives in the same VA range as userspace, but uses global entries
+ * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
+ * speculative TLB fetches, we must temporarily install the reserved page
+ * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
+ *
+ * If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
+ * which should not be installed in TTBR0_EL1. In this case we can leave the
+ * reserved page tables in place.
+ */
+static inline void cpu_uninstall_idmap(void)
+{
+       struct mm_struct *mm = current->active_mm;
+
+       cpu_set_reserved_ttbr0();
+       local_flush_tlb_all();
+       cpu_set_default_tcr_t0sz();
+
+       if (mm != &init_mm)
+               cpu_switch_mm(mm->pgd, mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+       cpu_set_reserved_ttbr0();
+       local_flush_tlb_all();
+       cpu_set_idmap_tcr_t0sz();
+
+       cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
+/*
+ * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
+ * avoiding the possibility of conflicting TLB entries being allocated.
+ */
+static inline void cpu_replace_ttbr1(pgd_t *pgd)
+{
+       typedef void (ttbr_replace_func)(phys_addr_t);
+       extern ttbr_replace_func idmap_cpu_replace_ttbr1;
+       ttbr_replace_func *replace_phys;
+
+       phys_addr_t pgd_phys = virt_to_phys(pgd);
+
+       replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+
+       cpu_install_idmap();
+       replace_phys(pgd_phys);
+       cpu_uninstall_idmap();
 }
 
 /*
@@ -147,4 +203,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #define deactivate_mm(tsk,mm)  do { } while (0)
 #define activate_mm(prev,next) switch_mm(prev, next, NULL)
 
+void verify_cpu_asid_bits(void);
+
 #endif
index e80e232b730e2d29f77dba57bfa91ad4439f9781..e12af6754634b3d2aa031ae23ce25228dc766cfb 100644 (file)
 
 #define MODULE_ARCH_VERMAGIC   "aarch64"
 
+#ifdef CONFIG_ARM64_MODULE_PLTS
+struct mod_arch_specific {
+       struct elf64_shdr       *plt;
+       int                     plt_num_entries;
+       int                     plt_max_entries;
+};
+#endif
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+                         Elf64_Sym *sym);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+extern u64 module_alloc_base;
+#else
+#define module_alloc_base      ((u64)_etext - MODULES_VSIZE)
+#endif
+
 #endif /* __ASM_MODULE_H */
index c15053902942e0a3a34ba4882545c5b6d163c23c..ff98585d085aa5737c9c17478555f22b11261f2f 100644 (file)
@@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
        free_page((unsigned long)pmd);
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
 {
-       set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+       set_pud(pud, __pud(pmd | prot));
 }
 
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+       __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+}
+#else
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+{
+       BUILD_BUG();
+}
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
        free_page((unsigned long)pud);
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
 {
-       set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
+       set_pgd(pgdp, __pgd(pud | prot));
 }
 
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+       __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+}
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+{
+       BUILD_BUG();
+}
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
new file mode 100644 (file)
index 0000000..29fcb33
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PGTABLE_PROT_H
+#define __ASM_PGTABLE_PROT_H
+
+#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+#include <linux/const.h>
+
+/*
+ * Software defined PTE bits definition.
+ */
+#define PTE_VALID              (_AT(pteval_t, 1) << 0)
+#define PTE_WRITE              (PTE_DBM)                /* same as DBM (51) */
+#define PTE_DIRTY              (_AT(pteval_t, 1) << 55)
+#define PTE_SPECIAL            (_AT(pteval_t, 1) << 56)
+#define PTE_PROT_NONE          (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+
+#ifndef __ASSEMBLY__
+
+#include <asm/pgtable-types.h>
+
+#define PROT_DEFAULT           (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC  (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT          (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_RO         __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
+
+#define PAGE_HYP               __pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP_DEVICE                __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+
+#define PAGE_S2                        __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE         __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+
+#define PAGE_NONE              __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED            __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC       __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY              __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC         __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY          __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC     __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+
+#define __P000  PAGE_NONE
+#define __P001  PAGE_READONLY
+#define __P010  PAGE_COPY
+#define __P011  PAGE_COPY
+#define __P100  PAGE_READONLY_EXEC
+#define __P101  PAGE_READONLY_EXEC
+#define __P110  PAGE_COPY_EXEC
+#define __P111  PAGE_COPY_EXEC
+
+#define __S000  PAGE_NONE
+#define __S001  PAGE_READONLY
+#define __S010  PAGE_SHARED
+#define __S011  PAGE_SHARED
+#define __S100  PAGE_READONLY_EXEC
+#define __S101  PAGE_READONLY_EXEC
+#define __S110  PAGE_SHARED_EXEC
+#define __S111  PAGE_SHARED_EXEC
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_PGTABLE_PROT_H */
index 819aff5d593f701d1101d4cc42dbec8cedbc80aa..989fef16d4614994829e81c9f10c614d039e5b86 100644 (file)
 
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
-
-/*
- * Software defined PTE bits definition.
- */
-#define PTE_VALID              (_AT(pteval_t, 1) << 0)
-#define PTE_WRITE              (PTE_DBM)                /* same as DBM (51) */
-#define PTE_DIRTY              (_AT(pteval_t, 1) << 55)
-#define PTE_SPECIAL            (_AT(pteval_t, 1) << 56)
-#define PTE_PROT_NONE          (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
+#include <asm/pgtable-prot.h>
 
 /*
  * VMALLOC and SPARSEMEM_VMEMMAP ranges.
  *
  * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
  *     (rounded up to PUD_SIZE).
- * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_START: beginning of the kernel vmalloc space
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
  *     fixed mappings and modules
  */
 #define VMEMMAP_SIZE           ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
 
-#ifndef CONFIG_KASAN
-#define VMALLOC_START          (VA_START)
-#else
-#include <asm/kasan.h>
-#define VMALLOC_START          (KASAN_SHADOW_END + SZ_64K)
-#endif
-
+#define VMALLOC_START          (MODULES_END)
 #define VMALLOC_END            (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define VMEMMAP_START          (VMALLOC_END + SZ_64K)
@@ -59,6 +45,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/fixmap.h>
 #include <linux/mmdebug.h>
 
 extern void __pte_error(const char *file, int line, unsigned long val);
@@ -66,65 +53,12 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
 extern void __pud_error(const char *file, int line, unsigned long val);
 extern void __pgd_error(const char *file, int line, unsigned long val);
 
-#define PROT_DEFAULT           (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define PROT_SECT_DEFAULT      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-
-#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
-
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define PROT_SECT_NORMAL_EXEC  (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-
-#define _PAGE_DEFAULT          (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
-#define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_RO         __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
-
-#define PAGE_HYP               __pgprot(_PAGE_DEFAULT | PTE_HYP)
-#define PAGE_HYP_DEVICE                __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-
-#define PAGE_S2                        __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE         __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-
-#define PAGE_NONE              __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED            __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC       __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY              __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC         __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY          __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC     __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-
-#define __P000  PAGE_NONE
-#define __P001  PAGE_READONLY
-#define __P010  PAGE_COPY
-#define __P011  PAGE_COPY
-#define __P100  PAGE_READONLY_EXEC
-#define __P101  PAGE_READONLY_EXEC
-#define __P110  PAGE_COPY_EXEC
-#define __P111  PAGE_COPY_EXEC
-
-#define __S000  PAGE_NONE
-#define __S001  PAGE_READONLY
-#define __S010  PAGE_SHARED
-#define __S011  PAGE_SHARED
-#define __S100  PAGE_READONLY_EXEC
-#define __S101  PAGE_READONLY_EXEC
-#define __S110  PAGE_SHARED_EXEC
-#define __S111  PAGE_SHARED_EXEC
-
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr)       (empty_zero_page)
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr)       virt_to_page(empty_zero_page)
 
 #define pte_ERROR(pte)         __pte_error(__FILE__, __LINE__, pte_val(pte))
 
@@ -136,16 +70,6 @@ extern struct page *empty_zero_page;
 #define pte_clear(mm,addr,ptep)        set_pte(ptep, __pte(0))
 #define pte_page(pte)          (pfn_to_page(pte_pfn(pte)))
 
-/* Find an entry in the third-level page table. */
-#define pte_index(addr)                (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
-#define pte_offset_kernel(dir,addr)    (pmd_page_vaddr(*(dir)) + pte_index(addr))
-
-#define pte_offset_map(dir,addr)       pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr)        pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte)                 do { } while (0)
-#define pte_unmap_nested(pte)          do { } while (0)
-
 /*
  * The following only work if pte_present(). Undefined behaviour otherwise.
  */
@@ -279,7 +203,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
-       if (pte_valid(pte)) {
+       if (pte_present(pte)) {
                if (pte_sw_dirty(pte) && pte_write(pte))
                        pte_val(pte) &= ~PTE_RDONLY;
                else
@@ -412,7 +336,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 #define pmd_sect(pmd)          ((pmd_val(pmd) & PMD_TYPE_MASK) == \
                                 PMD_TYPE_SECT)
 
-#ifdef CONFIG_ARM64_64K_PAGES
+#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
 #define pud_sect(pud)          (0)
 #define pud_table(pud)         (1)
 #else
@@ -434,13 +358,31 @@ static inline void pmd_clear(pmd_t *pmdp)
        set_pmd(pmdp, __pmd(0));
 }
 
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 {
-       return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+       return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
+/* Find an entry in the third-level page table. */
+#define pte_index(addr)                (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_phys(dir,addr)      (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_kernel(dir,addr)    ((pte_t *)__va(pte_offset_phys((dir), (addr))))
+
+#define pte_offset_map(dir,addr)       pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr)        pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte)                 do { } while (0)
+#define pte_unmap_nested(pte)          do { } while (0)
+
+#define pte_set_fixmap(addr)           ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
+#define pte_set_fixmap_offset(pmd, addr)       pte_set_fixmap(pte_offset_phys(pmd, addr))
+#define pte_clear_fixmap()             clear_fixmap(FIX_PTE)
+
 #define pmd_page(pmd)          pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pte_offset_kimg(dir,addr)      ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -467,21 +409,37 @@ static inline void pud_clear(pud_t *pudp)
        set_pud(pudp, __pud(0));
 }
 
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline phys_addr_t pud_page_paddr(pud_t pud)
 {
-       return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+       return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)                (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
-       return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
+#define pmd_offset_phys(dir, addr)     (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset(dir, addr)          ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
+
+#define pmd_set_fixmap(addr)           ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_set_fixmap_offset(pud, addr)       pmd_set_fixmap(pmd_offset_phys(pud, addr))
+#define pmd_clear_fixmap()             clear_fixmap(FIX_PMD)
 
 #define pud_page(pud)          pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pmd_offset_kimg(dir,addr)      ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
+
+#else
+
+#define pud_page_paddr(pud)    ({ BUILD_BUG(); 0; })
+
+/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
+#define pmd_set_fixmap(addr)           NULL
+#define pmd_set_fixmap_offset(pudp, addr)      ((pmd_t *)pudp)
+#define pmd_clear_fixmap()
+
+#define pmd_offset_kimg(dir,addr)      ((pmd_t *)dir)
+
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -503,21 +461,37 @@ static inline void pgd_clear(pgd_t *pgdp)
        set_pgd(pgdp, __pgd(0));
 }
 
-static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 {
-       return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+       return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)                (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
-{
-       return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
-}
+#define pud_offset_phys(dir, addr)     (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset(dir, addr)          ((pud_t *)__va(pud_offset_phys((dir), (addr))))
+
+#define pud_set_fixmap(addr)           ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
+#define pud_set_fixmap_offset(pgd, addr)       pud_set_fixmap(pud_offset_phys(pgd, addr))
+#define pud_clear_fixmap()             clear_fixmap(FIX_PUD)
 
 #define pgd_page(pgd)          pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pud_offset_kimg(dir,addr)      ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+
+#else
+
+#define pgd_page_paddr(pgd)    ({ BUILD_BUG(); 0;})
+
+/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
+#define pud_set_fixmap(addr)           NULL
+#define pud_set_fixmap_offset(pgdp, addr)      ((pud_t *)pgdp)
+#define pud_clear_fixmap()
+
+#define pud_offset_kimg(dir,addr)      ((pud_t *)dir)
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)         __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -525,11 +499,16 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
 /* to find an entry in a page-table-directory */
 #define pgd_index(addr)                (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 
-#define pgd_offset(mm, addr)   ((mm)->pgd+pgd_index(addr))
+#define pgd_offset_raw(pgd, addr)      ((pgd) + pgd_index(addr))
+
+#define pgd_offset(mm, addr)   (pgd_offset_raw((mm)->pgd, (addr)))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)     pgd_offset(&init_mm, addr)
 
+#define pgd_set_fixmap(addr)   ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
+#define pgd_clear_fixmap()     clear_fixmap(FIX_PGD)
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
        const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
@@ -649,6 +628,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
  *     bits 0-1:       present (must be zero)
  *     bits 2-7:       swap type
  *     bits 8-57:      swap offset
+ *     bit  58:        PTE_PROT_NONE (must be zero)
  */
 #define __SWP_TYPE_SHIFT       2
 #define __SWP_TYPE_BITS                6
index 4acb7ca94fcd9c05569f3103ab09097c19ea72d5..cef1cf398356f1f61ceea854fc564f6df1d316cc 100644 (file)
 
 #include <linux/string.h>
 
+#include <asm/alternative.h>
 #include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
@@ -177,9 +179,11 @@ static inline void prefetchw(const void *ptr)
 }
 
 #define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *x)
+static inline void spin_lock_prefetch(const void *ptr)
 {
-       prefetchw(x);
+       asm volatile(ARM64_LSE_ATOMIC_INSN(
+                    "prfm pstl1strm, %a0",
+                    "nop") : : "p" (ptr));
 }
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -187,5 +191,6 @@ static inline void spin_lock_prefetch(const void *x)
 #endif
 
 void cpu_enable_pan(void *__unused);
+void cpu_enable_uao(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
index e9e5467e0bf4523a3de2ceb7fc11cdb0f81e8154..a307eb6e7fa8d75e536fe3cff56c5d4f978fe212 100644 (file)
@@ -58,6 +58,7 @@
 #define COMPAT_PSR_Z_BIT       0x40000000
 #define COMPAT_PSR_N_BIT       0x80000000
 #define COMPAT_PSR_IT_MASK     0x0600fc00      /* If-Then execution state mask */
+#define COMPAT_PSR_GE_MASK     0x000f0000
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #define COMPAT_PSR_ENDSTATE    COMPAT_PSR_E_BIT
@@ -151,35 +152,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
        return regs->regs[0];
 }
 
-/*
- * Are the current registers suitable for user mode? (used to maintain
- * security in signal handlers)
- */
-static inline int valid_user_regs(struct user_pt_regs *regs)
-{
-       if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) {
-               regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT);
-
-               /* The T bit is reserved for AArch64 */
-               if (!(regs->pstate & PSR_MODE32_BIT))
-                       regs->pstate &= ~COMPAT_PSR_T_BIT;
-
-               return 1;
-       }
-
-       /*
-        * Force PSR to something logical...
-        */
-       regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \
-                       COMPAT_PSR_T_BIT | PSR_MODE32_BIT;
-
-       if (!(regs->pstate & PSR_MODE32_BIT)) {
-               regs->pstate &= ~COMPAT_PSR_T_BIT;
-               regs->pstate |= PSR_MODE_EL0t;
-       }
-
-       return 0;
-}
+/* We must avoid circular header include via sched.h */
+struct task_struct;
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
 
 #define instruction_pointer(regs)      ((unsigned long)(regs)->pc)
 
index d9c3d6a6100ac5d68e9b412113daccd1e43d8371..817a067ba05817632181dee6049a5b4e6964bd80 100644 (file)
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+/* Values for secondary_data.status */
+
+#define CPU_MMU_OFF            (-1)
+#define CPU_BOOT_SUCCESS       (0)
+/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
+#define CPU_KILL_ME            (1)
+/* The cpu couldn't die gracefully and is looping in the kernel */
+#define CPU_STUCK_IN_KERNEL    (2)
+/* Fatal system error detected by secondary CPU, crash the system */
+#define CPU_PANIC_KERNEL       (3)
+
+#ifndef __ASSEMBLY__
+
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/thread_info.h>
@@ -54,19 +67,52 @@ asmlinkage void secondary_start_kernel(void);
 
 /*
  * Initial data for bringing up a secondary CPU.
+ * @stack  - sp for the secondary CPU
+ * @status - Result passed back from the secondary CPU to
+ *           indicate failure.
  */
 struct secondary_data {
        void *stack;
+       long status;
 };
+
 extern struct secondary_data secondary_data;
+extern long __early_cpu_boot_status;
 extern void secondary_entry(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+#else
+static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+       BUILD_BUG();
+}
+#endif
+
 extern int __cpu_disable(void);
 
 extern void __cpu_die(unsigned int cpu);
 extern void cpu_die(void);
+extern void cpu_die_early(void);
+
+static inline void cpu_park_loop(void)
+{
+       for (;;) {
+               wfe();
+               wfi();
+       }
+}
+
+static inline void update_cpu_boot_status(int val)
+{
+       WRITE_ONCE(secondary_data.status, val);
+       /* Ensure the visibility of the status update */
+       dsb(ishst);
+}
+
+#endif /* ifndef __ASSEMBLY__ */
 
 #endif /* ifndef __ASM_SMP_H */
index 4aeebec3d8828431a59b757f421b6260511f3455..1a78d6e2a78b58bf21113de3810785a81a35a29d 100644 (file)
 
 #define SYS_ID_AA64MMFR0_EL1           sys_reg(3, 0, 0, 7, 0)
 #define SYS_ID_AA64MMFR1_EL1           sys_reg(3, 0, 0, 7, 1)
+#define SYS_ID_AA64MMFR2_EL1           sys_reg(3, 0, 0, 7, 2)
 
 #define SYS_CNTFRQ_EL0                 sys_reg(3, 3, 14, 0, 0)
 #define SYS_CTR_EL0                    sys_reg(3, 3, 0, 0, 1)
 #define SYS_DCZID_EL0                  sys_reg(3, 3, 0, 0, 7)
 
 #define REG_PSTATE_PAN_IMM             sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM             sys_reg(0, 0, 4, 0, 3)
 
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
                                     (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
+                                    (!!x)<<8 | 0x1f)
 
 /* SCTLR_EL1 */
 #define SCTLR_EL1_CP15BEN      (0x1 << 5)
 #define ID_AA64MMFR1_VMIDBITS_SHIFT    4
 #define ID_AA64MMFR1_HADBS_SHIFT       0
 
+/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_UAO_SHIFT         4
+
 /* id_aa64dfr0 */
 #define ID_AA64DFR0_CTX_CMPS_SHIFT     28
 #define ID_AA64DFR0_WRPS_SHIFT         20
 #ifdef __ASSEMBLY__
 
        .irp    num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
-       .equ    __reg_num_x\num, \num
+       .equ    .L__reg_num_x\num, \num
        .endr
-       .equ    __reg_num_xzr, 31
+       .equ    .L__reg_num_xzr, 31
 
        .macro  mrs_s, rt, sreg
-       .inst   0xd5200000|(\sreg)|(__reg_num_\rt)
+       .inst   0xd5200000|(\sreg)|(.L__reg_num_\rt)
        .endm
 
        .macro  msr_s, sreg, rt
-       .inst   0xd5000000|(\sreg)|(__reg_num_\rt)
+       .inst   0xd5000000|(\sreg)|(.L__reg_num_\rt)
        .endm
 
 #else
 
 asm(
 "      .irp    num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-"      .equ    __reg_num_x\\num, \\num\n"
+"      .equ    .L__reg_num_x\\num, \\num\n"
 "      .endr\n"
-"      .equ    __reg_num_xzr, 31\n"
+"      .equ    .L__reg_num_xzr, 31\n"
 "\n"
 "      .macro  mrs_s, rt, sreg\n"
-"      .inst   0xd5200000|(\\sreg)|(__reg_num_\\rt)\n"
+"      .inst   0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
 "      .endm\n"
 "\n"
 "      .macro  msr_s, sreg, rt\n"
-"      .inst   0xd5000000|(\\sreg)|(__reg_num_\\rt)\n"
+"      .inst   0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
 "      .endm\n"
 );
 
index b2ede967fe7d49258c56af51e73786efb5d1a420..0685d74572af788b05d44658c7dba7e4fc3742ba 100644 (file)
 #define VERIFY_WRITE 1
 
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
 
 struct exception_table_entry
 {
-       unsigned long insn, fixup;
+       int insn, fixup;
 };
 
+#define ARCH_HAS_RELATIVE_EXTABLE
+
 extern int fixup_exception(struct pt_regs *regs);
 
 #define KERNEL_DS      (-1UL)
@@ -64,6 +66,16 @@ extern int fixup_exception(struct pt_regs *regs);
 static inline void set_fs(mm_segment_t fs)
 {
        current_thread_info()->addr_limit = fs;
+
+       /*
+        * Enable/disable UAO so that copy_to_user() etc can access
+        * kernel memory with the unprivileged instructions.
+        */
+       if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
+               asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+       else
+               asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+                               CONFIG_ARM64_UAO));
 }
 
 #define segment_eq(a, b)       ((a) == (b))
@@ -105,6 +117,12 @@ static inline void set_fs(mm_segment_t fs)
 #define access_ok(type, addr, size)    __range_ok(addr, size)
 #define user_addr_max                  get_fs
 
+#define _ASM_EXTABLE(from, to)                                         \
+       "       .pushsection    __ex_table, \"a\"\n"                    \
+       "       .align          3\n"                                    \
+       "       .long           (" #from " - .), (" #to " - .)\n"       \
+       "       .popsection\n"
+
 /*
  * The "__xxx" versions of the user access functions do not verify the address
  * space - it must have been done previously with a separate "access_ok()"
@@ -113,9 +131,10 @@ static inline void set_fs(mm_segment_t fs)
  * The "__xxx_error" versions set the third argument to -EFAULT if an error
  * occurs, and leave it unchanged on success.
  */
-#define __get_user_asm(instr, reg, x, addr, err)                       \
+#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature)   \
        asm volatile(                                                   \
-       "1:     " instr "       " reg "1, [%2]\n"                       \
+       "1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",                  \
+                       alt_instr " " reg "1, [%2]\n", feature)         \
        "2:\n"                                                          \
        "       .section .fixup, \"ax\"\n"                              \
        "       .align  2\n"                                            \
@@ -123,10 +142,7 @@ static inline void set_fs(mm_segment_t fs)
        "       mov     %1, #0\n"                                       \
        "       b       2b\n"                                           \
        "       .previous\n"                                            \
-       "       .section __ex_table,\"a\"\n"                            \
-       "       .align  3\n"                                            \
-       "       .quad   1b, 3b\n"                                       \
-       "       .previous"                                              \
+       _ASM_EXTABLE(1b, 3b)                                            \
        : "+r" (err), "=&r" (x)                                         \
        : "r" (addr), "i" (-EFAULT))
 
@@ -134,26 +150,30 @@ static inline void set_fs(mm_segment_t fs)
 do {                                                                   \
        unsigned long __gu_val;                                         \
        __chk_user_ptr(ptr);                                            \
-       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,        \
+       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
                        CONFIG_ARM64_PAN));                             \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
-               __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err));   \
+               __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr),  \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        case 2:                                                         \
-               __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err));   \
+               __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr),  \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        case 4:                                                         \
-               __get_user_asm("ldr", "%w", __gu_val, (ptr), (err));    \
+               __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr),    \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        case 8:                                                         \
-               __get_user_asm("ldr", "%",  __gu_val, (ptr), (err));    \
+               __get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),    \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
        }                                                               \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
-       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,        \
+       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
                        CONFIG_ARM64_PAN));                             \
 } while (0)
 
@@ -181,19 +201,17 @@ do {                                                                      \
                ((x) = 0, -EFAULT);                                     \
 })
 
-#define __put_user_asm(instr, reg, x, addr, err)                       \
+#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature)   \
        asm volatile(                                                   \
-       "1:     " instr "       " reg "1, [%2]\n"                       \
+       "1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",                  \
+                       alt_instr " " reg "1, [%2]\n", feature)         \
        "2:\n"                                                          \
        "       .section .fixup,\"ax\"\n"                               \
        "       .align  2\n"                                            \
        "3:     mov     %w0, %3\n"                                      \
        "       b       2b\n"                                           \
        "       .previous\n"                                            \
-       "       .section __ex_table,\"a\"\n"                            \
-       "       .align  3\n"                                            \
-       "       .quad   1b, 3b\n"                                       \
-       "       .previous"                                              \
+       _ASM_EXTABLE(1b, 3b)                                            \
        : "+r" (err)                                                    \
        : "r" (x), "r" (addr), "i" (-EFAULT))
 
@@ -201,25 +219,29 @@ do {                                                                      \
 do {                                                                   \
        __typeof__(*(ptr)) __pu_val = (x);                              \
        __chk_user_ptr(ptr);                                            \
-       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,        \
+       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
                        CONFIG_ARM64_PAN));                             \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
-               __put_user_asm("strb", "%w", __pu_val, (ptr), (err));   \
+               __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr),  \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        case 2:                                                         \
-               __put_user_asm("strh", "%w", __pu_val, (ptr), (err));   \
+               __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr),  \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        case 4:                                                         \
-               __put_user_asm("str",  "%w", __pu_val, (ptr), (err));   \
+               __put_user_asm("str", "sttr", "%w", __pu_val, (ptr),    \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        case 8:                                                         \
-               __put_user_asm("str",  "%", __pu_val, (ptr), (err));    \
+               __put_user_asm("str", "sttr", "%", __pu_val, (ptr),     \
+                              (err), ARM64_HAS_UAO);                   \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
        }                                                               \
-       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,        \
+       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
                        CONFIG_ARM64_PAN));                             \
 } while (0)
 
index aab5bf09e9d902f7bdf3d09e61fcd97db748c505..2b79b8a89457bd70e20e24ba33c8029195e3284c 100644 (file)
@@ -16,6 +16,8 @@
 #ifndef __ASM_WORD_AT_A_TIME_H
 #define __ASM_WORD_AT_A_TIME_H
 
+#include <asm/uaccess.h>
+
 #ifndef __AARCH64EB__
 
 #include <linux/kernel.h>
@@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 #endif
        "       b       2b\n"
        "       .popsection\n"
-       "       .pushsection __ex_table,\"a\"\n"
-       "       .align  3\n"
-       "       .quad   1b, 3b\n"
-       "       .popsection"
+       _ASM_EXTABLE(1b, 3b)
        : "=&r" (ret), "=&r" (offset)
        : "r" (addr), "Q" (*(unsigned long *)addr));
 
index 361c8a8ef55f372a6c766889cbae007e680f7ef3..a739287ef6a3dec81054636109df24ffd0cd8052 100644 (file)
@@ -28,5 +28,7 @@
 #define HWCAP_SHA2             (1 << 6)
 #define HWCAP_CRC32            (1 << 7)
 #define HWCAP_ATOMICS          (1 << 8)
+#define HWCAP_FPHP             (1 << 9)
+#define HWCAP_ASIMDHP          (1 << 10)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index 208db3df135a48d1b7118da22697ad24cd75706e..b5c3933ed44163b2fb489a00553195afc1ff6806 100644 (file)
@@ -45,6 +45,7 @@
 #define PSR_A_BIT      0x00000100
 #define PSR_D_BIT      0x00000200
 #define PSR_PAN_BIT    0x00400000
+#define PSR_UAO_BIT    0x00800000
 #define PSR_Q_BIT      0x08000000
 #define PSR_V_BIT      0x10000000
 #define PSR_C_BIT      0x20000000
index 83cd7e68e83b2e59740b8e56268b7f0a2604aa63..3793003e16a2467c4b2bcc710e3e776c895ab03d 100644 (file)
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT)            += sys32.o kuser32.o signal32.o         \
                                           ../../arm/kernel/opcodes.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)    += ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)            += arm64ksyms.o module.o
+arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)  += module-plts.o
 arm64-obj-$(CONFIG_PERF_EVENTS)                += perf_regs.o perf_callchain.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)     += perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -41,7 +42,9 @@ arm64-obj-$(CONFIG_EFI)                       += efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)                        += pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)               += acpi.o
+arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)        += acpi_parking_protocol.o
 arm64-obj-$(CONFIG_PARAVIRT)           += paravirt.o
+arm64-obj-$(CONFIG_RANDOMIZE_BASE)     += kaslr.o
 
 obj-y                                  += $(arm64-obj-y) vdso/
 obj-m                                  += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
new file mode 100644 (file)
index 0000000..a32b401
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * ARM64 ACPI Parking Protocol implementation
+ *
+ * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *         Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+#include <asm/cpu_ops.h>
+
+struct parking_protocol_mailbox {
+       __le32 cpu_id;
+       __le32 reserved;
+       __le64 entry_point;
+};
+
+struct cpu_mailbox_entry {
+       struct parking_protocol_mailbox __iomem *mailbox;
+       phys_addr_t mailbox_addr;
+       u8 version;
+       u8 gic_cpu_id;
+};
+
+static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS];
+
+void __init acpi_set_mailbox_entry(int cpu,
+                                  struct acpi_madt_generic_interrupt *p)
+{
+       struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+       cpu_entry->mailbox_addr = p->parked_address;
+       cpu_entry->version = p->parking_version;
+       cpu_entry->gic_cpu_id = p->cpu_interface_number;
+}
+
+bool acpi_parking_protocol_valid(int cpu)
+{
+       struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+       return cpu_entry->mailbox_addr && cpu_entry->version;
+}
+
+static int acpi_parking_protocol_cpu_init(unsigned int cpu)
+{
+       pr_debug("%s: ACPI parked addr=%llx\n", __func__,
+                 cpu_mailbox_entries[cpu].mailbox_addr);
+
+       return 0;
+}
+
+static int acpi_parking_protocol_cpu_prepare(unsigned int cpu)
+{
+       return 0;
+}
+
+static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
+{
+       struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+       struct parking_protocol_mailbox __iomem *mailbox;
+       __le32 cpu_id;
+
+       /*
+        * Map mailbox memory with attribute device nGnRE (ie ioremap -
+        * this deviates from the parking protocol specifications since
+        * the mailboxes are required to be mapped nGnRnE; the attribute
+        * discrepancy is harmless insofar as the protocol specification
+        * is concerned).
+        * If the mailbox is mistakenly allocated in the linear mapping
+        * by FW ioremap will fail since the mapping will be prevented
+        * by the kernel (it clashes with the linear mapping attributes
+        * specifications).
+        */
+       mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+       if (!mailbox)
+               return -EIO;
+
+       cpu_id = readl_relaxed(&mailbox->cpu_id);
+       /*
+        * Check if firmware has set-up the mailbox entry properly
+        * before kickstarting the respective cpu.
+        */
+       if (cpu_id != ~0U) {
+               iounmap(mailbox);
+               return -ENXIO;
+       }
+
+       /*
+        * stash the mailbox address mapping to use it for further FW
+        * checks in the postboot method
+        */
+       cpu_entry->mailbox = mailbox;
+
+       /*
+        * We write the entry point and cpu id as LE regardless of the
+        * native endianness of the kernel. Therefore, any boot-loaders
+        * that read this address need to convert this address to the
+        * Boot-Loader's endianness before jumping.
+        */
+       writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+       writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
+
+       arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+
+       return 0;
+}
+
+static void acpi_parking_protocol_cpu_postboot(void)
+{
+       int cpu = smp_processor_id();
+       struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+       struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox;
+       __le64 entry_point;
+
+       entry_point = readl_relaxed(&mailbox->entry_point);
+       /*
+        * Check if firmware has cleared the entry_point as expected
+        * by the protocol specification.
+        */
+       WARN_ON(entry_point);
+}
+
+const struct cpu_operations acpi_parking_protocol_ops = {
+       .name           = "parking-protocol",
+       .cpu_init       = acpi_parking_protocol_cpu_init,
+       .cpu_prepare    = acpi_parking_protocol_cpu_prepare,
+       .cpu_boot       = acpi_parking_protocol_cpu_boot,
+       .cpu_postboot   = acpi_parking_protocol_cpu_postboot
+};
index 3e01207917b13db999b08d36cb722c153ad8cd7f..c37202c0c838d01a71d56b05d114cd0b419ff480 100644 (file)
@@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
        "4:     mov             %w0, %w5\n"                     \
        "       b               3b\n"                           \
        "       .popsection"                                    \
-       "       .pushsection     __ex_table,\"a\"\n"            \
-       "       .align          3\n"                            \
-       "       .quad           0b, 4b\n"                       \
-       "       .quad           1b, 4b\n"                       \
-       "       .popsection\n"                                  \
+       _ASM_EXTABLE(0b, 4b)                                    \
+       _ASM_EXTABLE(1b, 4b)                                    \
        ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,    \
                CONFIG_ARM64_PAN)                               \
        : "=&r" (res), "+r" (data), "=&r" (temp)                \
index b0ab4e93db0d41e7dafa5110425dbc0471742d78..3ae6b310ac9bc59e0563e88ffad7986b99a637ad 100644 (file)
@@ -104,6 +104,8 @@ int main(void)
   DEFINE(TZ_MINWEST,           offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,           offsetof(struct timezone, tz_dsttime));
   BLANK();
+  DEFINE(CPU_BOOT_STACK,       offsetof(struct secondary_data, stack));
+  BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
   DEFINE(VCPU_CONTEXT,         offsetof(struct kvm_vcpu, arch.ctxt));
   DEFINE(CPU_GP_REGS,          offsetof(struct kvm_cpu_context, gp_regs));
index feb6b4efa6414846d5598ccb0913a544ba0cf441..06afd04e02c0d05f1e0546230a5d446b8bb06b60 100644 (file)
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
 
-#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
-#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-#define MIDR_THUNDERX  MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
-
-#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
-                       MIDR_ARCHITECTURE_MASK)
-
 static bool __maybe_unused
 is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
 {
-       u32 midr = read_cpuid_id();
-
-       if ((midr & CPU_MODEL_MASK) != entry->midr_model)
-               return false;
-
-       midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
-
-       return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+       return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
+                                      entry->midr_range_min,
+                                      entry->midr_range_max);
 }
 
 #define MIDR_RANGE(model, min, max) \
@@ -99,6 +87,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                .capability = ARM64_WORKAROUND_CAVIUM_23154,
                MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01),
        },
+#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+       {
+       /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+               .desc = "Cavium erratum 27456",
+               .capability = ARM64_WORKAROUND_CAVIUM_27456,
+               MIDR_RANGE(MIDR_THUNDERX, 0x00,
+                          (1 << MIDR_VARIANT_SHIFT) | 1),
+       },
 #endif
        {
        }
index b6bd7d4477683393fb34dc6b055b07382e8ab050..c7cfb8fe06f94c7f5113abf0cb624980e4227127 100644 (file)
 #include <asm/smp_plat.h>
 
 extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations acpi_parking_protocol_ops;
 extern const struct cpu_operations cpu_psci_ops;
 
 const struct cpu_operations *cpu_ops[NR_CPUS];
 
-static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
        &smp_spin_table_ops,
        &cpu_psci_ops,
        NULL,
 };
 
+static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+       &acpi_parking_protocol_ops,
+#endif
+       &cpu_psci_ops,
+       NULL,
+};
+
 static const struct cpu_operations * __init cpu_get_ops(const char *name)
 {
-       const struct cpu_operations **ops = supported_cpu_ops;
+       const struct cpu_operations **ops;
+
+       ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
 
        while (*ops) {
                if (!strcmp(name, (*ops)->name))
@@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu)
                }
        } else {
                enable_method = acpi_get_enable_method(cpu);
-               if (!enable_method)
-                       pr_err("Unsupported ACPI enable-method\n");
+               if (!enable_method) {
+                       /*
+                        * In ACPI systems the boot CPU does not require
+                        * checking the enable method since for some
+                        * boot protocol (ie parking protocol) it need not
+                        * be initialized. Don't warn spuriously.
+                        */
+                       if (cpu != 0)
+                               pr_err("Unsupported ACPI enable-method\n");
+               }
        }
 
        return enable_method;
index ba745199297e3b086d0b28320305f4f8885eb48e..943f5140e0f33d592a78d179b8c8d68e5851e837 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/virt.h>
@@ -55,19 +56,23 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
                .safe_val = SAFE_VAL,                   \
        }
 
-/* Define a feature with signed values */
+/* Define a feature with unsigned values */
 #define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
-       __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
-
-/* Define a feature with unsigned value */
-#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
        __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
 
+/* Define a feature with a signed value */
+#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+       __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
 #define ARM64_FTR_END                                  \
        {                                               \
                .width = 0,                             \
        }
 
+/* meta feature for alternatives */
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+
 static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
@@ -85,8 +90,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
-       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
        /* Linux doesn't care about the EL3 */
        ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
@@ -97,8 +102,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 
 static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
-       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
        /* Linux shouldn't care about secure memory */
@@ -109,7 +114,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
         * Differing PARange is fine as long as all peripherals and memory are mapped
         * within the minimum PARange of all CPUs
         */
-       U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -124,29 +129,34 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
        ARM64_FTR_END,
 };
 
+static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
 static struct arm64_ftr_bits ftr_ctr[] = {
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),      /* RAO */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),        /* RAO */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),        /* CWG */
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),  /* CWG */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),   /* ERG */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),   /* DminLine */
        /*
         * Linux can handle differing I-cache policies. Userspace JITs will
         * make use of *minLine
         */
-       U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),   /* L1Ip */
+       ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),     /* L1Ip */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),        /* RAZ */
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),  /* IminLine */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),    /* IminLine */
        ARM64_FTR_END,
 };
 
 static struct arm64_ftr_bits ftr_id_mmfr0[] = {
-       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),        /* InnerShr */
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf),    /* InnerShr */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),        /* FCSE */
        ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),        /* AuxReg */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0),        /* TCM */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),        /* ShareLvl */
-       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf),     /* OuterShr */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
        ARM64_FTR_END,
@@ -154,12 +164,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = {
 
 static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
        ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
-       U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
        ARM64_FTR_END,
 };
 
@@ -205,6 +215,18 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = {
        ARM64_FTR_END,
 };
 
+static struct arm64_ftr_bits ftr_id_dfr0[] = {
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+       S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf),       /* PerfMon */
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+       ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+       ARM64_FTR_END,
+};
+
 /*
  * Common ftr bits for a 32bit register with all hidden, strict
  * attributes, with 4bit feature fields and a default safe value of
@@ -250,7 +272,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
        /* Op1 = 0, CRn = 0, CRm = 1 */
        ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
        ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
-       ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+       ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0),
        ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
        ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
        ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
@@ -285,6 +307,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
        /* Op1 = 0, CRn = 0, CRm = 7 */
        ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
        ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+       ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
 
        /* Op1 = 3, CRn = 0, CRm = 0 */
        ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
@@ -409,6 +432,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
        init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
        init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
        init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+       init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
        init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
        init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
        init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -518,6 +542,8 @@ void update_cpu_features(int cpu,
                                      info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
        taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
                                      info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+       taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
+                                     info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
 
        /*
         * EL3 is not our concern.
@@ -593,7 +619,7 @@ u64 read_system_reg(u32 id)
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
-       int val = cpuid_feature_extract_field(reg, entry->field_pos);
+       int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
 
        return val >= entry->min_field_value;
 }
@@ -622,6 +648,18 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
        return has_sre;
 }
 
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
+{
+       u32 midr = read_cpuid_id();
+       u32 rv_min, rv_max;
+
+       /* Cavium ThunderX pass 1.x and 2.x */
+       rv_min = 0;
+       rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+       return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
 static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
 {
        return is_kernel_in_hyp_mode();
@@ -634,6 +672,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_useable_gicv3_cpuif,
                .sys_reg = SYS_ID_AA64PFR0_EL1,
                .field_pos = ID_AA64PFR0_GIC_SHIFT,
+               .sign = FTR_UNSIGNED,
                .min_field_value = 1,
        },
 #ifdef CONFIG_ARM64_PAN
@@ -643,6 +682,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64MMFR1_EL1,
                .field_pos = ID_AA64MMFR1_PAN_SHIFT,
+               .sign = FTR_UNSIGNED,
                .min_field_value = 1,
                .enable = cpu_enable_pan,
        },
@@ -654,9 +694,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR0_EL1,
                .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+               .sign = FTR_UNSIGNED,
                .min_field_value = 2,
        },
 #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+       {
+               .desc = "Software prefetching using PRFM",
+               .capability = ARM64_HAS_NO_HW_PREFETCH,
+               .matches = has_no_hw_prefetch,
+       },
+#ifdef CONFIG_ARM64_UAO
+       {
+               .desc = "User Access Override",
+               .capability = ARM64_HAS_UAO,
+               .matches = has_cpuid_feature,
+               .sys_reg = SYS_ID_AA64MMFR2_EL1,
+               .field_pos = ID_AA64MMFR2_UAO_SHIFT,
+               .min_field_value = 1,
+               .enable = cpu_enable_uao,
+       },
+#endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+       {
+               .capability = ARM64_ALT_PAN_NOT_UAO,
+               .matches = cpufeature_pan_not_uao,
+       },
+#endif /* CONFIG_ARM64_PAN */
        {
                .desc = "Virtualization Host Extensions",
                .capability = ARM64_HAS_VIRT_HOST_EXTN,
@@ -665,32 +728,35 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
        {},
 };
 
-#define HWCAP_CAP(reg, field, min_value, type, cap)            \
+#define HWCAP_CAP(reg, field, s, min_value, type, cap) \
        {                                                       \
                .desc = #cap,                                   \
                .matches = has_cpuid_feature,                   \
                .sys_reg = reg,                                 \
                .field_pos = field,                             \
+               .sign = s,                                      \
                .min_field_value = min_value,                   \
                .hwcap_type = type,                             \
                .hwcap = cap,                                   \
        }
 
 static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
-       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
-       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
+       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
+       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
+       HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
 #ifdef CONFIG_COMPAT
-       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
-       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
-       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
-       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
-       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+       HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
 #endif
        {},
 };
@@ -745,7 +811,7 @@ static void __init setup_cpu_hwcaps(void)
        int i;
        const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
 
-       for (i = 0; hwcaps[i].desc; i++)
+       for (i = 0; hwcaps[i].matches; i++)
                if (hwcaps[i].matches(&hwcaps[i]))
                        cap_set_hwcap(&hwcaps[i]);
 }
@@ -755,11 +821,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 {
        int i;
 
-       for (i = 0; caps[i].desc; i++) {
+       for (i = 0; caps[i].matches; i++) {
                if (!caps[i].matches(&caps[i]))
                        continue;
 
-               if (!cpus_have_cap(caps[i].capability))
+               if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
                        pr_info("%s %s\n", info, caps[i].desc);
                cpus_set_cap(caps[i].capability);
        }
@@ -774,13 +840,11 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
        int i;
 
-       for (i = 0; caps[i].desc; i++)
+       for (i = 0; caps[i].matches; i++)
                if (caps[i].enable && cpus_have_cap(caps[i].capability))
                        on_each_cpu(caps[i].enable, NULL, true);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
 /*
  * Flag to indicate if we have computed the system wide
  * capabilities based on the boot time active CPUs. This
@@ -802,35 +866,36 @@ static inline void set_sys_caps_initialised(void)
 static u64 __raw_read_system_reg(u32 sys_id)
 {
        switch (sys_id) {
-       case SYS_ID_PFR0_EL1:           return (u64)read_cpuid(ID_PFR0_EL1);
-       case SYS_ID_PFR1_EL1:           return (u64)read_cpuid(ID_PFR1_EL1);
-       case SYS_ID_DFR0_EL1:           return (u64)read_cpuid(ID_DFR0_EL1);
-       case SYS_ID_MMFR0_EL1:          return (u64)read_cpuid(ID_MMFR0_EL1);
-       case SYS_ID_MMFR1_EL1:          return (u64)read_cpuid(ID_MMFR1_EL1);
-       case SYS_ID_MMFR2_EL1:          return (u64)read_cpuid(ID_MMFR2_EL1);
-       case SYS_ID_MMFR3_EL1:          return (u64)read_cpuid(ID_MMFR3_EL1);
-       case SYS_ID_ISAR0_EL1:          return (u64)read_cpuid(ID_ISAR0_EL1);
-       case SYS_ID_ISAR1_EL1:          return (u64)read_cpuid(ID_ISAR1_EL1);
-       case SYS_ID_ISAR2_EL1:          return (u64)read_cpuid(ID_ISAR2_EL1);
-       case SYS_ID_ISAR3_EL1:          return (u64)read_cpuid(ID_ISAR3_EL1);
-       case SYS_ID_ISAR4_EL1:          return (u64)read_cpuid(ID_ISAR4_EL1);
-       case SYS_ID_ISAR5_EL1:          return (u64)read_cpuid(ID_ISAR4_EL1);
-       case SYS_MVFR0_EL1:             return (u64)read_cpuid(MVFR0_EL1);
-       case SYS_MVFR1_EL1:             return (u64)read_cpuid(MVFR1_EL1);
-       case SYS_MVFR2_EL1:             return (u64)read_cpuid(MVFR2_EL1);
-
-       case SYS_ID_AA64PFR0_EL1:       return (u64)read_cpuid(ID_AA64PFR0_EL1);
-       case SYS_ID_AA64PFR1_EL1:       return (u64)read_cpuid(ID_AA64PFR0_EL1);
-       case SYS_ID_AA64DFR0_EL1:       return (u64)read_cpuid(ID_AA64DFR0_EL1);
-       case SYS_ID_AA64DFR1_EL1:       return (u64)read_cpuid(ID_AA64DFR0_EL1);
-       case SYS_ID_AA64MMFR0_EL1:      return (u64)read_cpuid(ID_AA64MMFR0_EL1);
-       case SYS_ID_AA64MMFR1_EL1:      return (u64)read_cpuid(ID_AA64MMFR1_EL1);
-       case SYS_ID_AA64ISAR0_EL1:      return (u64)read_cpuid(ID_AA64ISAR0_EL1);
-       case SYS_ID_AA64ISAR1_EL1:      return (u64)read_cpuid(ID_AA64ISAR1_EL1);
-
-       case SYS_CNTFRQ_EL0:            return (u64)read_cpuid(CNTFRQ_EL0);
-       case SYS_CTR_EL0:               return (u64)read_cpuid(CTR_EL0);
-       case SYS_DCZID_EL0:             return (u64)read_cpuid(DCZID_EL0);
+       case SYS_ID_PFR0_EL1:           return read_cpuid(ID_PFR0_EL1);
+       case SYS_ID_PFR1_EL1:           return read_cpuid(ID_PFR1_EL1);
+       case SYS_ID_DFR0_EL1:           return read_cpuid(ID_DFR0_EL1);
+       case SYS_ID_MMFR0_EL1:          return read_cpuid(ID_MMFR0_EL1);
+       case SYS_ID_MMFR1_EL1:          return read_cpuid(ID_MMFR1_EL1);
+       case SYS_ID_MMFR2_EL1:          return read_cpuid(ID_MMFR2_EL1);
+       case SYS_ID_MMFR3_EL1:          return read_cpuid(ID_MMFR3_EL1);
+       case SYS_ID_ISAR0_EL1:          return read_cpuid(ID_ISAR0_EL1);
+       case SYS_ID_ISAR1_EL1:          return read_cpuid(ID_ISAR1_EL1);
+       case SYS_ID_ISAR2_EL1:          return read_cpuid(ID_ISAR2_EL1);
+       case SYS_ID_ISAR3_EL1:          return read_cpuid(ID_ISAR3_EL1);
+       case SYS_ID_ISAR4_EL1:          return read_cpuid(ID_ISAR4_EL1);
+       case SYS_ID_ISAR5_EL1:          return read_cpuid(ID_ISAR4_EL1);
+       case SYS_MVFR0_EL1:             return read_cpuid(MVFR0_EL1);
+       case SYS_MVFR1_EL1:             return read_cpuid(MVFR1_EL1);
+       case SYS_MVFR2_EL1:             return read_cpuid(MVFR2_EL1);
+
+       case SYS_ID_AA64PFR0_EL1:       return read_cpuid(ID_AA64PFR0_EL1);
+       case SYS_ID_AA64PFR1_EL1:       return read_cpuid(ID_AA64PFR0_EL1);
+       case SYS_ID_AA64DFR0_EL1:       return read_cpuid(ID_AA64DFR0_EL1);
+       case SYS_ID_AA64DFR1_EL1:       return read_cpuid(ID_AA64DFR0_EL1);
+       case SYS_ID_AA64MMFR0_EL1:      return read_cpuid(ID_AA64MMFR0_EL1);
+       case SYS_ID_AA64MMFR1_EL1:      return read_cpuid(ID_AA64MMFR1_EL1);
+       case SYS_ID_AA64MMFR2_EL1:      return read_cpuid(ID_AA64MMFR2_EL1);
+       case SYS_ID_AA64ISAR0_EL1:      return read_cpuid(ID_AA64ISAR0_EL1);
+       case SYS_ID_AA64ISAR1_EL1:      return read_cpuid(ID_AA64ISAR1_EL1);
+
+       case SYS_CNTFRQ_EL0:            return read_cpuid(CNTFRQ_EL0);
+       case SYS_CTR_EL0:               return read_cpuid(CTR_EL0);
+       case SYS_DCZID_EL0:             return read_cpuid(DCZID_EL0);
        default:
                BUG();
                return 0;
@@ -838,25 +903,12 @@ static u64 __raw_read_system_reg(u32 sys_id)
 }
 
 /*
- * Park the CPU which doesn't have the capability as advertised
- * by the system.
+ * Check for CPU features that are used in early boot
+ * based on the Boot CPU value.
  */
-static void fail_incapable_cpu(char *cap_type,
-                                const struct arm64_cpu_capabilities *cap)
+static void check_early_cpu_features(void)
 {
-       int cpu = smp_processor_id();
-
-       pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
-       /* Mark this CPU absent */
-       set_cpu_present(cpu, 0);
-
-       /* Check if we can park ourselves */
-       if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
-               cpu_ops[cpu]->cpu_die(cpu);
-       asm(
-       "1:     wfe\n"
-       "       wfi\n"
-       "       b       1b");
+       verify_cpu_asid_bits();
 }
 
 /*
@@ -872,6 +924,8 @@ void verify_local_cpu_capabilities(void)
        int i;
        const struct arm64_cpu_capabilities *caps;
 
+       check_early_cpu_features();
+
        /*
         * If we haven't computed the system capabilities, there is nothing
         * to verify.
@@ -880,35 +934,33 @@ void verify_local_cpu_capabilities(void)
                return;
 
        caps = arm64_features;
-       for (i = 0; caps[i].desc; i++) {
+       for (i = 0; caps[i].matches; i++) {
                if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
                        continue;
                /*
                 * If the new CPU misses an advertised feature, we cannot proceed
                 * further, park the cpu.
                 */
-               if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
-                       fail_incapable_cpu("arm64_features", &caps[i]);
+               if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+                       pr_crit("CPU%d: missing feature: %s\n",
+                                       smp_processor_id(), caps[i].desc);
+                       cpu_die_early();
+               }
                if (caps[i].enable)
                        caps[i].enable(NULL);
        }
 
-       for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+       for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
                if (!cpus_have_hwcap(&caps[i]))
                        continue;
-               if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
-                       fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+               if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
+                       pr_crit("CPU%d: missing HWCAP: %s\n",
+                                       smp_processor_id(), caps[i].desc);
+                       cpu_die_early();
+               }
        }
 }
 
-#else  /* !CONFIG_HOTPLUG_CPU */
-
-static inline void set_sys_caps_initialised(void)
-{
-}
-
-#endif /* CONFIG_HOTPLUG_CPU */
-
 static void __init setup_feature_capabilities(void)
 {
        update_cpu_capabilities(arm64_features, "detected feature:");
@@ -939,3 +991,9 @@ void __init setup_cpu_features(void)
                pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
                        L1_CACHE_BYTES, cls);
 }
+
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+{
+       return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+}
index 212ae6361d8be45d6d73d197284a1ca587599342..84c8684431c7f5d7fe6af5b4e00ffb3a4e98e174 100644 (file)
@@ -59,6 +59,8 @@ static const char *const hwcap_str[] = {
        "sha2",
        "crc32",
        "atomics",
+       "fphp",
+       "asimdhp",
        NULL
 };
 
@@ -210,6 +212,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
        info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
        info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
        info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+       info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
        info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
        info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
 
index c536c9e307b9a484dca6d03572f28cbf9f8c0667..c45f2968bc8ce251313badc343c60ab9cc7a88d9 100644 (file)
@@ -34,7 +34,7 @@
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {
-       return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+       return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
                                                ID_AA64DFR0_DEBUGVER_SHIFT);
 }
 
@@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
 
 /* EL1 Single Step Handler hooks */
 static LIST_HEAD(step_hook);
-static DEFINE_RWLOCK(step_hook_lock);
+static DEFINE_SPINLOCK(step_hook_lock);
 
 void register_step_hook(struct step_hook *hook)
 {
-       write_lock(&step_hook_lock);
-       list_add(&hook->node, &step_hook);
-       write_unlock(&step_hook_lock);
+       spin_lock(&step_hook_lock);
+       list_add_rcu(&hook->node, &step_hook);
+       spin_unlock(&step_hook_lock);
 }
 
 void unregister_step_hook(struct step_hook *hook)
 {
-       write_lock(&step_hook_lock);
-       list_del(&hook->node);
-       write_unlock(&step_hook_lock);
+       spin_lock(&step_hook_lock);
+       list_del_rcu(&hook->node);
+       spin_unlock(&step_hook_lock);
+       synchronize_rcu();
 }
 
 /*
@@ -213,15 +214,15 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
        struct step_hook *hook;
        int retval = DBG_HOOK_ERROR;
 
-       read_lock(&step_hook_lock);
+       rcu_read_lock();
 
-       list_for_each_entry(hook, &step_hook, node)     {
+       list_for_each_entry_rcu(hook, &step_hook, node) {
                retval = hook->fn(regs, esr);
                if (retval == DBG_HOOK_HANDLED)
                        break;
        }
 
-       read_unlock(&step_hook_lock);
+       rcu_read_unlock();
 
        return retval;
 }
index a773db92908b03d325c26dba0dc5ea9c287ef49d..cae3112f779122528545dc086549f9d82c688d52 100644 (file)
@@ -35,6 +35,7 @@ ENTRY(entry)
         * for image_addr variable passed to efi_entry().
         */
        stp     x29, x30, [sp, #-32]!
+       mov     x29, sp
 
        /*
         * Call efi_entry to do the real work.
@@ -61,7 +62,7 @@ ENTRY(entry)
         */
        mov     x20, x0         // DTB address
        ldr     x0, [sp, #16]   // relocated _text address
-       ldr     x21, =stext_offset
+       movz    x21, #:abs_g0:stext_offset
        add     x21, x0, x21
 
        /*
index acc1afd5c749a62b7c0bae5a14d2fd7dbc33adf2..975b274ee7b551a5c9480e776b918859fbe6914d 100644 (file)
@@ -45,7 +45,7 @@
  *     been used to perform kernel mode NEON in the meantime.
  *
  * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
- * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * the id of the current CPU every time the state is loaded onto a CPU. For (b),
  * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
  * address of the userland FPSIMD state of the task that was loaded onto the CPU
  * the most recently, or NULL if kernel mode NEON has been performed after that.
index 6f2f37743d3b34994b9d91fb1f99890910accbad..6ebd204da16ab680fc66cf1669536170e495bc3a 100644 (file)
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/elf.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/kvm_arm.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/smp.h>
 #include <asm/sysreg.h>
 #include <asm/thread_info.h>
 #include <asm/virt.h>
  * in the entry routines.
  */
        __HEAD
-
+_head:
        /*
         * DO NOT MODIFY. Image header expected by Linux boot-loaders.
         */
 #ifdef CONFIG_EFI
-efi_head:
        /*
         * This add instruction has no meaningful effect except that
         * its opcode forms the magic "MZ" signature required by UEFI.
@@ -84,9 +85,9 @@ efi_head:
        b       stext                           // branch to kernel start, magic
        .long   0                               // reserved
 #endif
-       .quad   _kernel_offset_le               // Image load offset from start of RAM, little-endian
-       .quad   _kernel_size_le                 // Effective size of kernel image, little-endian
-       .quad   _kernel_flags_le                // Informative flags, little-endian
+       le64sym _kernel_offset_le               // Image load offset from start of RAM, little-endian
+       le64sym _kernel_size_le                 // Effective size of kernel image, little-endian
+       le64sym _kernel_flags_le                // Informative flags, little-endian
        .quad   0                               // reserved
        .quad   0                               // reserved
        .quad   0                               // reserved
@@ -95,14 +96,14 @@ efi_head:
        .byte   0x4d
        .byte   0x64
 #ifdef CONFIG_EFI
-       .long   pe_header - efi_head            // Offset to the PE header.
+       .long   pe_header - _head               // Offset to the PE header.
 #else
        .word   0                               // reserved
 #endif
 
 #ifdef CONFIG_EFI
        .globl  __efistub_stext_offset
-       .set    __efistub_stext_offset, stext - efi_head
+       .set    __efistub_stext_offset, stext - _head
        .align 3
 pe_header:
        .ascii  "PE"
@@ -125,7 +126,7 @@ optional_header:
        .long   _end - stext                    // SizeOfCode
        .long   0                               // SizeOfInitializedData
        .long   0                               // SizeOfUninitializedData
-       .long   __efistub_entry - efi_head      // AddressOfEntryPoint
+       .long   __efistub_entry - _head         // AddressOfEntryPoint
        .long   __efistub_stext_offset          // BaseOfCode
 
 extra_header_fields:
@@ -140,7 +141,7 @@ extra_header_fields:
        .short  0                               // MinorSubsystemVersion
        .long   0                               // Win32VersionValue
 
-       .long   _end - efi_head                 // SizeOfImage
+       .long   _end - _head                    // SizeOfImage
 
        // Everything before the kernel image is considered part of the header
        .long   __efistub_stext_offset          // SizeOfHeaders
@@ -211,6 +212,7 @@ section_table:
 ENTRY(stext)
        bl      preserve_boot_args
        bl      el2_setup                       // Drop to EL1, w20=cpu_boot_mode
+       mov     x23, xzr                        // KASLR offset, defaults to 0
        adrp    x24, __PHYS_OFFSET
        bl      set_cpu_boot_mode_flag
        bl      __create_page_tables            // x25=TTBR0, x26=TTBR1
@@ -220,11 +222,13 @@ ENTRY(stext)
         * On return, the CPU will be ready for the MMU to be turned on and
         * the TCR will have been set.
         */
-       ldr     x27, =__mmap_switched           // address to jump to after
+       ldr     x27, 0f                         // address to jump to after
                                                // MMU has been enabled
        adr_l   lr, __enable_mmu                // return (PIC) address
        b       __cpu_setup                     // initialise processor
 ENDPROC(stext)
+       .align  3
+0:     .quad   __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
 
 /*
  * Preserve the arguments passed by the bootloader in x0 .. x3
@@ -312,7 +316,7 @@ ENDPROC(preserve_boot_args)
 __create_page_tables:
        adrp    x25, idmap_pg_dir
        adrp    x26, swapper_pg_dir
-       mov     x27, lr
+       mov     x28, lr
 
        /*
         * Invalidate the idmap and swapper page tables to avoid potential
@@ -390,9 +394,11 @@ __create_page_tables:
         * Map the kernel image (starting with PHYS_OFFSET).
         */
        mov     x0, x26                         // swapper_pg_dir
-       mov     x5, #PAGE_OFFSET
+       ldr     x5, =KIMAGE_VADDR
+       add     x5, x5, x23                     // add KASLR displacement
        create_pgd_entry x0, x5, x3, x6
-       ldr     x6, =KERNEL_END                 // __va(KERNEL_END)
+       ldr     w6, kernel_img_size
+       add     x6, x6, x5
        mov     x3, x24                         // phys offset
        create_block_map x0, x7, x3, x5, x6
 
@@ -406,9 +412,11 @@ __create_page_tables:
        dmb     sy
        bl      __inval_cache_range
 
-       mov     lr, x27
-       ret
+       ret     x28
 ENDPROC(__create_page_tables)
+
+kernel_img_size:
+       .long   _end - (_head - TEXT_OFFSET)
        .ltorg
 
 /*
@@ -416,22 +424,80 @@ ENDPROC(__create_page_tables)
  */
        .set    initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
+       mov     x28, lr                         // preserve LR
+       adr_l   x8, vectors                     // load VBAR_EL1 with virtual
+       msr     vbar_el1, x8                    // vector table address
+       isb
+
        // Clear BSS
        adr_l   x0, __bss_start
        mov     x1, xzr
        adr_l   x2, __bss_stop
        sub     x2, x2, x0
        bl      __pi_memset
+       dsb     ishst                           // Make zero page visible to PTW
+
+#ifdef CONFIG_RELOCATABLE
+
+       /*
+        * Iterate over each entry in the relocation table, and apply the
+        * relocations in place.
+        */
+       adr_l   x8, __dynsym_start              // start of symbol table
+       adr_l   x9, __reloc_start               // start of reloc table
+       adr_l   x10, __reloc_end                // end of reloc table
+
+0:     cmp     x9, x10
+       b.hs    2f
+       ldp     x11, x12, [x9], #24
+       ldr     x13, [x9, #-8]
+       cmp     w12, #R_AARCH64_RELATIVE
+       b.ne    1f
+       add     x13, x13, x23                   // relocate
+       str     x13, [x11, x23]
+       b       0b
+
+1:     cmp     w12, #R_AARCH64_ABS64
+       b.ne    0b
+       add     x12, x12, x12, lsl #1           // symtab offset: 24x top word
+       add     x12, x8, x12, lsr #(32 - 3)     // ... shifted into bottom word
+       ldrsh   w14, [x12, #6]                  // Elf64_Sym::st_shndx
+       ldr     x15, [x12, #8]                  // Elf64_Sym::st_value
+       cmp     w14, #-0xf                      // SHN_ABS (0xfff1) ?
+       add     x14, x15, x23                   // relocate
+       csel    x15, x14, x15, ne
+       add     x15, x13, x15
+       str     x15, [x11, x23]
+       b       0b
+
+2:     adr_l   x8, kimage_vaddr                // make relocated kimage_vaddr
+       dc      cvac, x8                        // value visible to secondaries
+       dsb     sy                              // with MMU off
+#endif
 
        adr_l   sp, initial_sp, x4
        mov     x4, sp
        and     x4, x4, #~(THREAD_SIZE - 1)
        msr     sp_el0, x4                      // Save thread_info
        str_l   x21, __fdt_pointer, x5          // Save FDT pointer
-       str_l   x24, memstart_addr, x6          // Save PHYS_OFFSET
+
+       ldr_l   x4, kimage_vaddr                // Save the offset between
+       sub     x4, x4, x24                     // the kernel virtual and
+       str_l   x4, kimage_voffset, x5          // physical mappings
+
        mov     x29, #0
 #ifdef CONFIG_KASAN
        bl      kasan_early_init
+#endif
+#ifdef CONFIG_RANDOMIZE_BASE
+       cbnz    x23, 0f                         // already running randomized?
+       mov     x0, x21                         // pass FDT address in x0
+       bl      kaslr_early_init                // parse FDT for KASLR options
+       cbz     x0, 0f                          // KASLR disabled? just proceed
+       mov     x23, x0                         // record KASLR offset
+       ret     x28                             // we must enable KASLR, return
+                                               // to __enable_mmu()
+0:
 #endif
        b       start_kernel
 ENDPROC(__mmap_switched)
@@ -441,6 +507,10 @@ ENDPROC(__mmap_switched)
  * hotplug and needs to have the same protections as the text region
  */
        .section ".text","ax"
+
+ENTRY(kimage_vaddr)
+       .quad           _text - TEXT_OFFSET
+
 /*
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
@@ -631,13 +701,20 @@ ENTRY(secondary_startup)
        adrp    x26, swapper_pg_dir
        bl      __cpu_setup                     // initialise processor
 
-       ldr     x21, =secondary_data
-       ldr     x27, =__secondary_switched      // address to jump to after enabling the MMU
+       ldr     x8, kimage_vaddr
+       ldr     w9, 0f
+       sub     x27, x8, w9, sxtw               // address to jump to after enabling the MMU
        b       __enable_mmu
 ENDPROC(secondary_startup)
+0:     .long   (_text - TEXT_OFFSET) - __secondary_switched
 
 ENTRY(__secondary_switched)
-       ldr     x0, [x21]                       // get secondary_data.stack
+       adr_l   x5, vectors
+       msr     vbar_el1, x5
+       isb
+
+       adr_l   x0, secondary_data
+       ldr     x0, [x0, #CPU_BOOT_STACK]       // get secondary_data.stack
        mov     sp, x0
        and     x0, x0, #~(THREAD_SIZE - 1)
        msr     sp_el0, x0                      // save thread_info
@@ -645,6 +722,29 @@ ENTRY(__secondary_switched)
        b       secondary_start_kernel
 ENDPROC(__secondary_switched)
 
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ *
+ * update_early_cpu_boot_status tmp, status
+ *  - Corrupts tmp1, tmp2
+ *  - Writes 'status' to __early_cpu_boot_status and makes sure
+ *    it is committed to memory.
+ */
+
+       .macro  update_early_cpu_boot_status status, tmp1, tmp2
+       mov     \tmp2, #\status
+       str_l   \tmp2, __early_cpu_boot_status, \tmp1
+       dmb     sy
+       dc      ivac, \tmp1                     // Invalidate potentially stale cache line
+       .endm
+
+       .pushsection    .data..cacheline_aligned
+       .align  L1_CACHE_SHIFT
+ENTRY(__early_cpu_boot_status)
+       .long   0
+       .popsection
+
 /*
  * Enable the MMU.
  *
@@ -658,12 +758,12 @@ ENDPROC(__secondary_switched)
  */
        .section        ".idmap.text", "ax"
 __enable_mmu:
+       mrs     x18, sctlr_el1                  // preserve old SCTLR_EL1 value
        mrs     x1, ID_AA64MMFR0_EL1
        ubfx    x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
        cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
        b.ne    __no_granule_support
-       ldr     x5, =vectors
-       msr     vbar_el1, x5
+       update_early_cpu_boot_status 0, x1, x2
        msr     ttbr0_el1, x25                  // load TTBR0
        msr     ttbr1_el1, x26                  // load TTBR1
        isb
@@ -677,10 +777,33 @@ __enable_mmu:
        ic      iallu
        dsb     nsh
        isb
+#ifdef CONFIG_RANDOMIZE_BASE
+       mov     x19, x0                         // preserve new SCTLR_EL1 value
+       blr     x27
+
+       /*
+        * If we return here, we have a KASLR displacement in x23 which we need
+        * to take into account by discarding the current kernel mapping and
+        * creating a new one.
+        */
+       msr     sctlr_el1, x18                  // disable the MMU
+       isb
+       bl      __create_page_tables            // recreate kernel mapping
+
+       msr     sctlr_el1, x19                  // re-enable the MMU
+       isb
+       ic      ialluis                         // flush instructions fetched
+       isb                                     // via old mapping
+       add     x27, x27, x23                   // relocated __mmap_switched
+#endif
        br      x27
 ENDPROC(__enable_mmu)
 
 __no_granule_support:
+       /* Indicate that this CPU can't boot and is stuck in the kernel */
+       update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
+1:
        wfe
-       b __no_granule_support
+       wfi
+       b 1b
 ENDPROC(__no_granule_support)
index 352f7abd91c998f297b229d00e5cbb2f4882e81c..5e360ce88f10ba4110697552a47ea8c42053a02f 100644 (file)
  * There aren't any ELF relocations we can use to endian-swap values known only
  * at link time (e.g. the subtraction of two symbol addresses), so we must get
  * the linker to endian-swap certain values before emitting them.
+ *
+ * Note that, in order for this to work when building the ELF64 PIE executable
+ * (for KASLR), these values should not be referenced via R_AARCH64_ABS64
+ * relocations, since these are fixed up at runtime rather than at build time
+ * when PIE is in effect. So we need to split them up in 32-bit high and low
+ * words.
  */
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define DATA_LE64(data)                                        \
-       ((((data) & 0x00000000000000ff) << 56) |        \
-        (((data) & 0x000000000000ff00) << 40) |        \
-        (((data) & 0x0000000000ff0000) << 24) |        \
-        (((data) & 0x00000000ff000000) << 8)  |        \
-        (((data) & 0x000000ff00000000) >> 8)  |        \
-        (((data) & 0x0000ff0000000000) >> 24) |        \
-        (((data) & 0x00ff000000000000) >> 40) |        \
-        (((data) & 0xff00000000000000) >> 56))
+#define DATA_LE32(data)                                \
+       ((((data) & 0x000000ff) << 24) |        \
+        (((data) & 0x0000ff00) << 8)  |        \
+        (((data) & 0x00ff0000) >> 8)  |        \
+        (((data) & 0xff000000) >> 24))
 #else
-#define DATA_LE64(data) ((data) & 0xffffffffffffffff)
+#define DATA_LE32(data) ((data) & 0xffffffff)
 #endif
 
+#define DEFINE_IMAGE_LE64(sym, data)                           \
+       sym##_lo32 = DATA_LE32((data) & 0xffffffff);            \
+       sym##_hi32 = DATA_LE32((data) >> 32)
+
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE 1
+#define __HEAD_FLAG_BE         1
 #else
-#define __HEAD_FLAG_BE 0
+#define __HEAD_FLAG_BE         0
 #endif
 
-#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+#define __HEAD_FLAG_PAGE_SIZE  ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAG_PHYS_BASE  1
 
-#define __HEAD_FLAGS   ((__HEAD_FLAG_BE << 0) |        \
-                        (__HEAD_FLAG_PAGE_SIZE << 1))
+#define __HEAD_FLAGS           ((__HEAD_FLAG_BE << 0) |        \
+                                (__HEAD_FLAG_PAGE_SIZE << 1) | \
+                                (__HEAD_FLAG_PHYS_BASE << 3))
 
 /*
  * These will output as part of the Image header, which should be little-endian
@@ -58,9 +67,9 @@
  * endian swapped in head.S, all are done here for consistency.
  */
 #define HEAD_SYMBOLS                                           \
-       _kernel_size_le         = DATA_LE64(_end - _text);      \
-       _kernel_offset_le       = DATA_LE64(TEXT_OFFSET);       \
-       _kernel_flags_le        = DATA_LE64(__HEAD_FLAGS);
+       DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text);       \
+       DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET);      \
+       DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
 
 #ifdef CONFIG_EFI
 
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
new file mode 100644 (file)
index 0000000..5829839
--- /dev/null
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+
+u64 __read_mostly module_alloc_base;
+u16 __initdata memstart_offset_seed;
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+       int node, len;
+       u64 *prop;
+       u64 ret;
+
+       node = fdt_path_offset(fdt, "/chosen");
+       if (node < 0)
+               return 0;
+
+       prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+       if (!prop || len != sizeof(u64))
+               return 0;
+
+       ret = fdt64_to_cpu(*prop);
+       *prop = 0;
+       return ret;
+}
+
+static __init const u8 *get_cmdline(void *fdt)
+{
+       static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+
+       if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+               int node;
+               const u8 *prop;
+
+               node = fdt_path_offset(fdt, "/chosen");
+               if (node < 0)
+                       goto out;
+
+               prop = fdt_getprop(fdt, node, "bootargs", NULL);
+               if (!prop)
+                       goto out;
+               return prop;
+       }
+out:
+       return default_cmdline;
+}
+
+extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
+                                      pgprot_t prot);
+
+/*
+ * This routine will be executed with the kernel mapped at its default virtual
+ * address, and if it returns successfully, the kernel will be remapped, and
+ * start_kernel() will be executed from a randomized virtual offset. The
+ * relocation will result in all absolute references (e.g., static variables
+ * containing function pointers) to be reinitialized, and zero-initialized
+ * .bss variables will be reset to 0.
+ */
+u64 __init kaslr_early_init(u64 dt_phys)
+{
+       void *fdt;
+       u64 seed, offset, mask, module_range;
+       const u8 *cmdline, *str;
+       int size;
+
+       /*
+        * Set a reasonable default for module_alloc_base in case
+        * we end up running with module randomization disabled.
+        */
+       module_alloc_base = (u64)_etext - MODULES_VSIZE;
+
+       /*
+        * Try to map the FDT early. If this fails, we simply bail,
+        * and proceed with KASLR disabled. We will make another
+        * attempt at mapping the FDT in setup_machine()
+        */
+       early_fixmap_init();
+       fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+       if (!fdt)
+               return 0;
+
+       /*
+        * Retrieve (and wipe) the seed from the FDT
+        */
+       seed = get_kaslr_seed(fdt);
+       if (!seed)
+               return 0;
+
+       /*
+        * Check if 'nokaslr' appears on the command line, and
+        * return 0 if that is the case.
+        */
+       cmdline = get_cmdline(fdt);
+       str = strstr(cmdline, "nokaslr");
+       if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+               return 0;
+
+       /*
+        * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+        * kernel image offset from the seed. Let's place the kernel in the
+        * lower half of the VMALLOC area (VA_BITS - 2).
+        * Even if we could randomize at page granularity for 16k and 64k pages,
+        * let's always round to 2 MB so we don't interfere with the ability to
+        * map using contiguous PTEs
+        */
+       mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
+       offset = seed & mask;
+
+       /* use the top 16 bits to randomize the linear region */
+       memstart_offset_seed = seed >> 48;
+
+       /*
+        * The kernel Image should not extend across a 1GB/32MB/512MB alignment
+        * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
+        * happens, increase the KASLR offset by the size of the kernel image.
+        */
+       if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
+           (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+               offset = (offset + (u64)(_end - _text)) & mask;
+
+       if (IS_ENABLED(CONFIG_KASAN))
+               /*
+                * KASAN does not expect the module region to intersect the
+                * vmalloc region, since shadow memory is allocated for each
+                * module at load time, whereas the vmalloc region is shadowed
+                * by KASAN zero pages. So keep modules out of the vmalloc
+                * region if KASAN is enabled.
+                */
+               return offset;
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+               /*
+                * Randomize the module region independently from the core
+                * kernel. This prevents modules from leaking any information
+                * about the address of the kernel itself, but results in
+                * branches between modules and the core kernel that are
+                * resolved via PLTs. (Branches between modules will be
+                * resolved normally.)
+                */
+               module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;
+               module_alloc_base = VMALLOC_START;
+       } else {
+               /*
+                * Randomize the module region by setting module_alloc_base to
+                * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+                * _stext) . This guarantees that the resulting region still
+                * covers [_stext, _etext], and that all relative branches can
+                * be resolved without veneers.
+                */
+               module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+               module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
+       }
+
+       /* use the lower 21 bits to randomize the base of the module region */
+       module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
+       module_alloc_base &= PAGE_MASK;
+
+       return offset;
+}
index bcac81e600b9af09341cffdd80d83880626f77b2..b67531a13136dd691bba7799af4c1664dc84b3e4 100644 (file)
@@ -292,8 +292,8 @@ static struct notifier_block kgdb_notifier = {
 };
 
 /*
- * kgdb_arch_init - Perform any architecture specific initalization.
- * This function will handle the initalization of any architecture
+ * kgdb_arch_init - Perform any architecture specific initialization.
+ * This function will handle the initialization of any architecture
  * specific callbacks.
  */
 int kgdb_arch_init(void)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
new file mode 100644 (file)
index 0000000..1ce90d8
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+
+struct plt_entry {
+       /*
+        * A program that conforms to the AArch64 Procedure Call Standard
+        * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+        * IP1 (x17) may be inserted at any branch instruction that is
+        * exposed to a relocation that supports long branches. Since that
+        * is exactly what we are dealing with here, we are free to use x16
+        * as a scratch register in the PLT veneers.
+        */
+       __le32  mov0;   /* movn x16, #0x....                    */
+       __le32  mov1;   /* movk x16, #0x...., lsl #16           */
+       __le32  mov2;   /* movk x16, #0x...., lsl #32           */
+       __le32  br;     /* br   x16                             */
+};
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+                         Elf64_Sym *sym)
+{
+       struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
+       int i = mod->arch.plt_num_entries;
+       u64 val = sym->st_value + rela->r_addend;
+
+       /*
+        * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
+        * which are listed in the ELF symtab section, but without a type
+        * or a size.
+        * So, similar to how the module loader uses the Elf64_Sym::st_value
+        * field to store the resolved addresses of undefined symbols, let's
+        * borrow the Elf64_Sym::st_size field (whose value is never used by
+        * the module loader, even for symbols that are defined) to record
+        * the address of a symbol's associated PLT entry as we emit it for a
+        * zero addend relocation (which is the only kind we have to deal with
+        * in practice). This allows us to find duplicates without having to
+        * go through the table every time.
+        */
+       if (rela->r_addend == 0 && sym->st_size != 0) {
+               BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
+               return sym->st_size;
+       }
+
+       mod->arch.plt_num_entries++;
+       BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
+
+       /*
+        * MOVK/MOVN/MOVZ opcode:
+        * +--------+------------+--------+-----------+-------------+---------+
+        * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
+        * +--------+------------+--------+-----------+-------------+---------+
+        *
+        * Rd     := 0x10 (x16)
+        * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
+        * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
+        * sf     := 1 (64-bit variant)
+        */
+       plt[i] = (struct plt_entry){
+               cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5),
+               cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
+               cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
+               cpu_to_le32(0xd61f0200)
+       };
+
+       if (rela->r_addend == 0)
+               sym->st_size = (u64)&plt[i];
+
+       return (u64)&plt[i];
+}
+
+#define cmp_3way(a,b)  ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+       const Elf64_Rela *x = a, *y = b;
+       int i;
+
+       /* sort by type, symbol index and addend */
+       i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+       if (i == 0)
+               i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+       if (i == 0)
+               i = cmp_3way(x->r_addend, y->r_addend);
+       return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+       /*
+        * Entries are sorted by type, symbol index and addend. That means
+        * that, if a duplicate entry exists, it must be in the preceding
+        * slot.
+        */
+       return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+       unsigned int ret = 0;
+       Elf64_Sym *s;
+       int i;
+
+       for (i = 0; i < num; i++) {
+               switch (ELF64_R_TYPE(rela[i].r_info)) {
+               case R_AARCH64_JUMP26:
+               case R_AARCH64_CALL26:
+                       /*
+                        * We only have to consider branch targets that resolve
+                        * to undefined symbols. This is not simply a heuristic,
+                        * it is a fundamental limitation, since the PLT itself
+                        * is part of the module, and needs to be within 128 MB
+                        * as well, so modules can never grow beyond that limit.
+                        */
+                       s = syms + ELF64_R_SYM(rela[i].r_info);
+                       if (s->st_shndx != SHN_UNDEF)
+                               break;
+
+                       /*
+                        * Jump relocations with non-zero addends against
+                        * undefined symbols are supported by the ELF spec, but
+                        * do not occur in practice (e.g., 'jump n bytes past
+                        * the entry point of undefined function symbol f').
+                        * So we need to support them, but there is no need to
+                        * take them into consideration when trying to optimize
+                        * this code. So let's only check for duplicates when
+                        * the addend is zero: this allows us to record the PLT
+                        * entry address in the symbol table itself, rather than
+                        * having to search the list for duplicates each time we
+                        * emit one.
+                        */
+                       if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
+                               ret++;
+                       break;
+               }
+       }
+       return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+                             char *secstrings, struct module *mod)
+{
+       unsigned long plt_max_entries = 0;
+       Elf64_Sym *syms = NULL;
+       int i;
+
+       /*
+        * Find the empty .plt section so we can expand it to store the PLT
+        * entries. Record the symtab address as well.
+        */
+       for (i = 0; i < ehdr->e_shnum; i++) {
+               if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
+                       mod->arch.plt = sechdrs + i;
+               else if (sechdrs[i].sh_type == SHT_SYMTAB)
+                       syms = (Elf64_Sym *)sechdrs[i].sh_addr;
+       }
+
+       if (!mod->arch.plt) {
+               pr_err("%s: module PLT section missing\n", mod->name);
+               return -ENOEXEC;
+       }
+       if (!syms) {
+               pr_err("%s: module symtab section missing\n", mod->name);
+               return -ENOEXEC;
+       }
+
+       for (i = 0; i < ehdr->e_shnum; i++) {
+               Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+               int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+               Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
+
+               if (sechdrs[i].sh_type != SHT_RELA)
+                       continue;
+
+               /* ignore relocations that operate on non-exec sections */
+               if (!(dstsec->sh_flags & SHF_EXECINSTR))
+                       continue;
+
+               /* sort by type, symbol index and addend */
+               sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+               plt_max_entries += count_plts(syms, rels, numrels);
+       }
+
+       mod->arch.plt->sh_type = SHT_NOBITS;
+       mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+       mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+       mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
+       mod->arch.plt_num_entries = 0;
+       mod->arch.plt_max_entries = plt_max_entries;
+       return 0;
+}
index 93e970231ca9d5a6a69bf721fadc8cfc5b7344c2..7f316982ce00186262728518f3a03f7871fb7dd7 100644 (file)
@@ -34,10 +34,26 @@ void *module_alloc(unsigned long size)
 {
        void *p;
 
-       p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+       p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+                               module_alloc_base + MODULES_VSIZE,
                                GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
                                NUMA_NO_NODE, __builtin_return_address(0));
 
+       if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+           !IS_ENABLED(CONFIG_KASAN))
+               /*
+                * KASAN can only deal with module allocations being served
+                * from the reserved module region, since the remainder of
+                * the vmalloc region is already backed by zero shadow pages,
+                * and punching holes into it is non-trivial. Since the module
+                * region is not randomized when KASAN is enabled, it is even
+                * less likely that the module region gets exhausted, so we
+                * can simply omit this fallback in that case.
+                */
+               p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START,
+                               VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+                               NUMA_NO_NODE, __builtin_return_address(0));
+
        if (p && (kasan_module_alloc(p, size) < 0)) {
                vfree(p);
                return NULL;
@@ -361,6 +377,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
                case R_AARCH64_CALL26:
                        ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
                                             AARCH64_INSN_IMM_26);
+
+                       if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+                           ovf == -ERANGE) {
+                               val = module_emit_plt_entry(me, &rel[i], sym);
+                               ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
+                                                    26, AARCH64_INSN_IMM_26);
+                       }
                        break;
 
                default:
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
new file mode 100644 (file)
index 0000000..8949f6c
--- /dev/null
@@ -0,0 +1,3 @@
+SECTIONS {
+       .plt (NOLOAD) : { BYTE(0) }
+}
index 88d742ba19d50590561c741ff3a1d48ba49c0ce3..80624829db613961b7a088ce18d8591361b448c7 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/notifier.h>
 #include <trace/events/power.h>
 
+#include <asm/alternative.h>
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
 #include <asm/fpsimd.h>
@@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
        } else {
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->pstate = PSR_MODE_EL1h;
+               if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+                   cpus_have_cap(ARM64_HAS_UAO))
+                       childregs->pstate |= PSR_UAO_BIT;
                p->thread.cpu_context.x19 = stack_start;
                p->thread.cpu_context.x20 = stk_sz;
        }
@@ -308,6 +312,17 @@ static void tls_thread_switch(struct task_struct *next)
        : : "r" (tpidr), "r" (tpidrro));
 }
 
+/* Restore the UAO state depending on next's addr_limit */
+static void uao_thread_switch(struct task_struct *next)
+{
+       if (IS_ENABLED(CONFIG_ARM64_UAO)) {
+               if (task_thread_info(next)->addr_limit == KERNEL_DS)
+                       asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+               else
+                       asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
+       }
+}
+
 /*
  * Thread switching.
  */
@@ -320,6 +335,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
        tls_thread_switch(next);
        hw_breakpoint_thread_switch(next);
        contextidr_thread_switch(next);
+       uao_thread_switch(next);
 
        /*
         * Complete any pending TLB or cache maintenance on this CPU in case
index ff7f13239515676262864f3c9d1a7a938c7e63d3..3f6cd5c5234f1b565d2278987a59d0d476788283 100644 (file)
@@ -500,7 +500,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
        if (ret)
                return ret;
 
-       if (!valid_user_regs(&newregs))
+       if (!valid_user_regs(&newregs, target))
                return -EINVAL;
 
        task_pt_regs(target)->user_regs = newregs;
@@ -770,7 +770,7 @@ static int compat_gpr_set(struct task_struct *target,
 
        }
 
-       if (valid_user_regs(&newregs.user_regs))
+       if (valid_user_regs(&newregs.user_regs, target))
                *task_pt_regs(target) = newregs;
        else
                ret = -EINVAL;
@@ -1272,3 +1272,79 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs)
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
 }
+
+/*
+ * Bits which are always architecturally RES0 per ARM DDI 0487A.h
+ * Userspace cannot use these until they have an architectural meaning.
+ * We also reserve IL for the kernel; SS is handled dynamically.
+ */
+#define SPSR_EL1_AARCH64_RES0_BITS \
+       (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \
+        GENMASK_ULL(5, 5))
+#define SPSR_EL1_AARCH32_RES0_BITS \
+       (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20))
+
+static int valid_compat_regs(struct user_pt_regs *regs)
+{
+       regs->pstate &= ~SPSR_EL1_AARCH32_RES0_BITS;
+
+       if (!system_supports_mixed_endian_el0()) {
+               if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+                       regs->pstate |= COMPAT_PSR_E_BIT;
+               else
+                       regs->pstate &= ~COMPAT_PSR_E_BIT;
+       }
+
+       if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) &&
+           (regs->pstate & COMPAT_PSR_A_BIT) == 0 &&
+           (regs->pstate & COMPAT_PSR_I_BIT) == 0 &&
+           (regs->pstate & COMPAT_PSR_F_BIT) == 0) {
+               return 1;
+       }
+
+       /*
+        * Force PSR to a valid 32-bit EL0t, preserving the same bits as
+        * arch/arm.
+        */
+       regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT |
+                       COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT |
+                       COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK |
+                       COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT |
+                       COMPAT_PSR_T_BIT;
+       regs->pstate |= PSR_MODE32_BIT;
+
+       return 0;
+}
+
+static int valid_native_regs(struct user_pt_regs *regs)
+{
+       regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS;
+
+       if (user_mode(regs) && !(regs->pstate & PSR_MODE32_BIT) &&
+           (regs->pstate & PSR_D_BIT) == 0 &&
+           (regs->pstate & PSR_A_BIT) == 0 &&
+           (regs->pstate & PSR_I_BIT) == 0 &&
+           (regs->pstate & PSR_F_BIT) == 0) {
+               return 1;
+       }
+
+       /* Force PSR to a valid 64-bit EL0t */
+       regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT;
+
+       return 0;
+}
+
+/*
+ * Are the current registers suitable for user mode? (used to maintain
+ * security in signal handlers)
+ */
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task)
+{
+       if (!test_tsk_thread_flag(task, TIF_SINGLESTEP))
+               regs->pstate &= ~DBG_SPSR_SS;
+
+       if (is_compat_thread(task_thread_info(task)))
+               return valid_compat_regs(regs);
+       else
+               return valid_native_regs(regs);
+}
index 450987d99b9b9e497411a79f34781a3cb242df6e..9dc67769b6a465bad612e43e0e6e135581954daf 100644 (file)
@@ -62,6 +62,7 @@
 #include <asm/memblock.h>
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/mmu_context.h>
 
 phys_addr_t __fdt_pointer __initdata;
 
@@ -313,6 +314,12 @@ void __init setup_arch(char **cmdline_p)
         */
        local_async_enable();
 
+       /*
+        * TTBR0 is only used for the identity mapping at this stage. Make it
+        * point to zero page to avoid speculatively fetching new entries.
+        */
+       cpu_uninstall_idmap();
+
        efi_init();
        arm64_memblock_init();
 
@@ -381,3 +388,32 @@ static int __init topology_init(void)
        return 0;
 }
 subsys_initcall(topology_init);
+
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+                             void *p)
+{
+       u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
+               pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
+                        kaslr_offset, KIMAGE_VADDR);
+       } else {
+               pr_emerg("Kernel Offset: disabled\n");
+       }
+       return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+       .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &kernel_offset_notifier);
+       return 0;
+}
+__initcall(register_kernel_offset_dumper);
index e18c48cb6db1cb844411ad0c7919ac7ea7799f41..a8eafdbc7cb824f3f3fec09da2e75c4ae19ba41c 100644 (file)
@@ -115,7 +115,7 @@ static int restore_sigframe(struct pt_regs *regs,
         */
        regs->syscallno = ~0UL;
 
-       err |= !valid_user_regs(&regs->user_regs);
+       err |= !valid_user_regs(&regs->user_regs, current);
 
        if (err == 0) {
                struct fpsimd_context *fpsimd_ctx =
@@ -307,7 +307,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
        /*
         * Check that the resulting registers are actually sane.
         */
-       ret |= !valid_user_regs(&regs->user_regs);
+       ret |= !valid_user_regs(&regs->user_regs, current);
 
        /*
         * Fast forward the stepping logic so we step into the signal
index 71ef6dc89ae509cd299eab3f1959ffe4e8f96a33..b7063de792f7845aab4e8eb40cb6f5b82f44ce21 100644 (file)
@@ -166,7 +166,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 #ifdef BUS_MCEERR_AO
                /*
                 * Other callers might not initialize the si_lsb field,
-                * so check explicitely for the right codes here.
+                * so check explicitly for the right codes here.
                 */
                if (from->si_signo == SIGBUS &&
                    (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
@@ -356,7 +356,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
         */
        regs->syscallno = ~0UL;
 
-       err |= !valid_user_regs(&regs->user_regs);
+       err |= !valid_user_regs(&regs->user_regs, current);
 
        aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
        if (err == 0)
index 460765799c642396f8b5d2c202c846b481974ff9..b2d5f4ee9a1cd6676d7124829fe51a266b68a965 100644 (file)
@@ -63,6 +63,8 @@
  * where to place its SVC stack
  */
 struct secondary_data secondary_data;
+/* Number of CPUs which aren't online, but looping in kernel text. */
+int cpus_stuck_in_kernel;
 
 enum ipi_msg_type {
        IPI_RESCHEDULE,
@@ -70,8 +72,19 @@ enum ipi_msg_type {
        IPI_CPU_STOP,
        IPI_TIMER,
        IPI_IRQ_WORK,
+       IPI_WAKEUP
 };
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_kill(unsigned int cpu);
+#else
+static inline int op_cpu_kill(unsigned int cpu)
+{
+       return -ENOSYS;
+}
+#endif
+
+
 /*
  * Boot a secondary CPU, and assign it the specified idle task.
  * This also gives us the initial stack to use for this CPU.
@@ -89,12 +102,14 @@ static DECLARE_COMPLETION(cpu_running);
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
        int ret;
+       long status;
 
        /*
         * We need to tell the secondary core where to find its stack and the
         * page tables.
         */
        secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+       update_cpu_boot_status(CPU_MMU_OFF);
        __flush_dcache_area(&secondary_data, sizeof(secondary_data));
 
        /*
@@ -118,6 +133,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
        }
 
        secondary_data.stack = NULL;
+       status = READ_ONCE(secondary_data.status);
+       if (ret && status) {
+
+               if (status == CPU_MMU_OFF)
+                       status = READ_ONCE(__early_cpu_boot_status);
+
+               switch (status) {
+               default:
+                       pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+                                       cpu, status);
+                       break;
+               case CPU_KILL_ME:
+                       if (!op_cpu_kill(cpu)) {
+                               pr_crit("CPU%u: died during early boot\n", cpu);
+                               break;
+                       }
+                       /* Fall through */
+                       pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+               case CPU_STUCK_IN_KERNEL:
+                       pr_crit("CPU%u: is stuck in kernel\n", cpu);
+                       cpus_stuck_in_kernel++;
+                       break;
+               case CPU_PANIC_KERNEL:
+                       panic("CPU%u detected unsupported configuration\n", cpu);
+               }
+       }
 
        return ret;
 }
@@ -149,9 +190,7 @@ asmlinkage void secondary_start_kernel(void)
         * TTBR0 is only used for the identity mapping at this stage. Make it
         * point to zero page to avoid speculatively fetching new entries.
         */
-       cpu_set_reserved_ttbr0();
-       local_flush_tlb_all();
-       cpu_set_default_tcr_t0sz();
+       cpu_uninstall_idmap();
 
        preempt_disable();
        trace_hardirqs_off();
@@ -185,6 +224,9 @@ asmlinkage void secondary_start_kernel(void)
         */
        pr_info("CPU%u: Booted secondary processor [%08x]\n",
                                         cpu, read_cpuid_id());
+       update_cpu_boot_status(CPU_BOOT_SUCCESS);
+       /* Make sure the status update is visible before we complete */
+       smp_wmb();
        set_cpu_online(cpu, true);
        complete(&cpu_running);
 
@@ -313,6 +355,30 @@ void cpu_die(void)
 }
 #endif
 
+/*
+ * Kill the calling secondary CPU, early in bringup before it is turned
+ * online.
+ */
+void cpu_die_early(void)
+{
+       int cpu = smp_processor_id();
+
+       pr_crit("CPU%d: will not boot\n", cpu);
+
+       /* Mark this CPU absent */
+       set_cpu_present(cpu, 0);
+
+#ifdef CONFIG_HOTPLUG_CPU
+       update_cpu_boot_status(CPU_KILL_ME);
+       /* Check if we can park ourselves */
+       if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+               cpu_ops[cpu]->cpu_die(cpu);
+#endif
+       update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
+
+       cpu_park_loop();
+}
+
 static void __init hyp_mode_check(void)
 {
        if (is_hyp_mode_available())
@@ -445,6 +511,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
        /* map the logical cpu id to cpu MPIDR */
        cpu_logical_map(cpu_count) = hwid;
 
+       /*
+        * Set-up the ACPI parking protocol cpu entries
+        * while initializing the cpu_logical_map to
+        * avoid parsing MADT entries multiple times for
+        * nothing (ie a valid cpu_logical_map entry should
+        * contain a valid parking protocol data set to
+        * initialize the cpu if the parking protocol is
+        * the only available enable method).
+        */
+       acpi_set_mailbox_entry(cpu_count, processor);
+
        cpu_count++;
 }
 
@@ -627,6 +704,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
        S(IPI_CPU_STOP, "CPU stop interrupts"),
        S(IPI_TIMER, "Timer broadcast interrupts"),
        S(IPI_IRQ_WORK, "IRQ work interrupts"),
+       S(IPI_WAKEUP, "CPU wake-up interrupts"),
 };
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
@@ -670,6 +748,13 @@ void arch_send_call_function_single_ipi(int cpu)
        smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+       smp_cross_call(mask, IPI_WAKEUP);
+}
+#endif
+
 #ifdef CONFIG_IRQ_WORK
 void arch_irq_work_raise(void)
 {
@@ -747,6 +832,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
                break;
 #endif
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+       case IPI_WAKEUP:
+               WARN_ONCE(!acpi_parking_protocol_valid(cpu),
+                         "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
+                         cpu);
+               break;
+#endif
+
        default:
                pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
                break;
index 1095aa483a1c28e5387b23895c14d7a1746268a3..66055392f445ef47a7fb3749ca6024df1ea185c9 100644 (file)
@@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
  */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
-       struct mm_struct *mm = current->active_mm;
        int ret;
        unsigned long flags;
 
@@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
        ret = __cpu_suspend_enter(arg, fn);
        if (ret == 0) {
                /*
-                * We are resuming from reset with TTBR0_EL1 set to the
-                * idmap to enable the MMU; set the TTBR0 to the reserved
-                * page tables to prevent speculative TLB allocations, flush
-                * the local tlb and set the default tcr_el1.t0sz so that
-                * the TTBR0 address space set-up is properly restored.
-                * If the current active_mm != &init_mm we entered cpu_suspend
-                * with mappings in TTBR0 that must be restored, so we switch
-                * them back to complete the address space configuration
-                * restoration before returning.
+                * We are resuming from reset with the idmap active in TTBR0_EL1.
+                * We must uninstall the idmap and restore the expected MMU
+                * state before we can possibly return to userspace.
                 */
-               cpu_set_reserved_ttbr0();
-               local_flush_tlb_all();
-               cpu_set_default_tcr_t0sz();
-
-               if (mm != &init_mm)
-                       cpu_switch_mm(mm->pgd, mm);
+               cpu_uninstall_idmap();
 
                /*
                 * Restore per-cpu offset before any kernel
index 60c1db54b41a251b4d006c17a696aa9d6837067b..82379a70ef03ff4296cb8d7fc04226a3d68e1392 100644 (file)
@@ -21,9 +21,8 @@
 #include <linux/const.h>
 #include <asm/page.h>
 
-       __PAGE_ALIGNED_DATA
-
        .globl vdso_start, vdso_end
+       .section .rodata
        .balign PAGE_SIZE
 vdso_start:
        .incbin "arch/arm64/kernel/vdso/vdso.so"
index e3928f578891fdd0a5d3535975d350b6489f8df1..4c56e7a0621bcea078fb616487d60f9290f85214 100644 (file)
@@ -87,15 +87,16 @@ SECTIONS
                EXIT_CALL
                *(.discard)
                *(.discard.*)
+               *(.interp .dynamic)
        }
 
-       . = PAGE_OFFSET + TEXT_OFFSET;
+       . = KIMAGE_VADDR + TEXT_OFFSET;
 
        .head.text : {
                _text = .;
                HEAD_TEXT
        }
-       ALIGN_DEBUG_RO
+       ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
        .text : {                       /* Real text segment            */
                _stext = .;             /* Text and read-only data      */
                        __exception_text_start = .;
@@ -113,13 +114,13 @@ SECTIONS
                *(.got)                 /* Global offset table          */
        }
 
-       RO_DATA(PAGE_SIZE)
-       EXCEPTION_TABLE(8)
+       ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+       RO_DATA(PAGE_SIZE)              /* everything from this point to */
+       EXCEPTION_TABLE(8)              /* _etext will be marked RO NX   */
        NOTES
-       ALIGN_DEBUG_RO
-       _etext = .;                     /* End of text and rodata section */
 
        ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+       _etext = .;                     /* End of text and rodata section */
        __init_begin = .;
 
        INIT_TEXT_SECTION(8)
@@ -150,6 +151,21 @@ SECTIONS
        .altinstr_replacement : {
                *(.altinstr_replacement)
        }
+       .rela : ALIGN(8) {
+               __reloc_start = .;
+               *(.rela .rela*)
+               __reloc_end = .;
+       }
+       .dynsym : ALIGN(8) {
+               __dynsym_start = .;
+               *(.dynsym)
+       }
+       .dynstr : {
+               *(.dynstr)
+       }
+       .hash : {
+               *(.hash)
+       }
 
        . = ALIGN(PAGE_SIZE);
        __init_end = .;
@@ -187,4 +203,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
  */
-ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
index 0689a74e6ba0b493ffc669448e34b34579c9f5ec..48f19a37b3df552a08db7b186a65ae4fad4b32e4 100644 (file)
@@ -22,7 +22,7 @@
 #include <asm/cpufeature.h>
 
 /*
- * u64 kvm_call_hyp(void *hypfn, ...);
+ * u64 __kvm_call_hyp(void *hypfn, ...);
  *
  * This is not really a variadic function in the classic C-way and care must
  * be taken when calling this to ensure parameters are passed in registers
@@ -39,7 +39,7 @@
  * used to implement __hyp_get_vectors in the same way as in
  * arch/arm64/kernel/hyp_stub.S.
  */
-ENTRY(kvm_call_hyp)
+ENTRY(__kvm_call_hyp)
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN    
        hvc     #0
        ret
@@ -47,4 +47,4 @@ alternative_else
        b       __vhe_hyp_call
        nop
 alternative_endif
-ENDPROC(kvm_call_hyp)
+ENDPROC(__kvm_call_hyp)
index 053cf8b057c19d556c8fccacd41374abe0fc449f..33342a776ec75845069a93381a3465cdf435509a 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 
+#include <asm/debug-monitors.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
 
index 61ba591048454c37a92d7855e6059a3f3389ed81..7bbe3ff02602f93f87ffd8a04ac0577ce3aea038 100644 (file)
@@ -1116,7 +1116,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
        } else {
                u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
                u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
-               u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+               u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
 
                p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
                             (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
index 1a811ecf71da8a8032a1e8cda8cf686f8fc71189..c86b7909ef312009028c46ba83b375b544d9ae84 100644 (file)
@@ -4,15 +4,16 @@ lib-y         := bitops.o clear_user.o delay.o copy_from_user.o       \
                   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o       \
                   strchr.o strrchr.o
 
-# Tell the compiler to treat all general purpose registers as
-# callee-saved, which allows for efficient runtime patching of the bl
-# instruction in the caller with an atomic instruction when supported by
-# the CPU. Result and argument registers are handled correctly, based on
-# the function prototype.
+# Tell the compiler to treat all general purpose registers (with the
+# exception of the IP registers, which are already handled by the caller
+# in case of a PLT) as callee-saved, which allows for efficient runtime
+# patching of the bl instruction in the caller with an atomic instruction
+# when supported by the CPU. Result and argument registers are handled
+# correctly, based on the function prototype.
 lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
 CFLAGS_atomic_ll_sc.o  := -fcall-used-x0 -ffixed-x1 -ffixed-x2         \
                   -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6          \
                   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9           \
                   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12   \
                   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15   \
-                  -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
+                  -fcall-saved-x18
index a9723c71c52b20adf4e2efe69b2e43163cb2c878..5d1cad3ce6d601aa474ae9c9b8ef4c76a785912e 100644 (file)
  * Alignment fixed up by hardware.
  */
 ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        mov     x2, x1                  // save the size for fixup return
        subs    x1, x1, #8
        b.mi    2f
 1:
-USER(9f, str   xzr, [x0], #8   )
+uao_user_alternative 9f, str, sttr, xzr, x0, 8
        subs    x1, x1, #8
        b.pl    1b
 2:     adds    x1, x1, #4
        b.mi    3f
-USER(9f, str   wzr, [x0], #4   )
+uao_user_alternative 9f, str, sttr, wzr, x0, 4
        sub     x1, x1, #4
 3:     adds    x1, x1, #2
        b.mi    4f
-USER(9f, strh  wzr, [x0], #2   )
+uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
        sub     x1, x1, #2
 4:     adds    x1, x1, #1
        b.mi    5f
-USER(9f, strb  wzr, [x0]       )
+uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:     mov     x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        ret
 ENDPROC(__clear_user)
index 4699cd74f87e4af7bf69da8ce48a88a7f4f69b74..17e8306dca294ecf37fc27355956eb6bcfc5d92e 100644 (file)
@@ -34,7 +34,7 @@
  */
 
        .macro ldrb1 ptr, regB, val
-       USER(9998f, ldrb  \ptr, [\regB], \val)
+       uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
        .endm
 
        .macro strb1 ptr, regB, val
@@ -42,7 +42,7 @@
        .endm
 
        .macro ldrh1 ptr, regB, val
-       USER(9998f, ldrh  \ptr, [\regB], \val)
+       uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
        .endm
 
        .macro strh1 ptr, regB, val
@@ -50,7 +50,7 @@
        .endm
 
        .macro ldr1 ptr, regB, val
-       USER(9998f, ldr \ptr, [\regB], \val)
+       uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
        .endm
 
        .macro str1 ptr, regB, val
@@ -58,7 +58,7 @@
        .endm
 
        .macro ldp1 ptr, regB, regC, val
-       USER(9998f, ldp \ptr, \regB, [\regC], \val)
+       uao_ldp 9998f, \ptr, \regB, \regC, \val
        .endm
 
        .macro stp1 ptr, regB, regC, val
 
 end    .req    x5
 ENTRY(__copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        add     end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        mov     x0, #0                          // Nothing to copy
        ret
index 81c8fc93c100b7be7da17ebf96b1edeeb806671f..f7292dd08c840f27d39874fe7cc08aa89bdfb66d 100644 (file)
  *     x0 - bytes not copied
  */
        .macro ldrb1 ptr, regB, val
-       USER(9998f, ldrb  \ptr, [\regB], \val)
+       uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
        .endm
 
        .macro strb1 ptr, regB, val
-       USER(9998f, strb \ptr, [\regB], \val)
+       uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
        .endm
 
        .macro ldrh1 ptr, regB, val
-       USER(9998f, ldrh  \ptr, [\regB], \val)
+       uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
        .endm
 
        .macro strh1 ptr, regB, val
-       USER(9998f, strh \ptr, [\regB], \val)
+       uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
        .endm
 
        .macro ldr1 ptr, regB, val
-       USER(9998f, ldr \ptr, [\regB], \val)
+       uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
        .endm
 
        .macro str1 ptr, regB, val
-       USER(9998f, str \ptr, [\regB], \val)
+       uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
        .endm
 
        .macro ldp1 ptr, regB, regC, val
-       USER(9998f, ldp \ptr, \regB, [\regC], \val)
+       uao_ldp 9998f, \ptr, \regB, \regC, \val
        .endm
 
        .macro stp1 ptr, regB, regC, val
-       USER(9998f, stp \ptr, \regB, [\regC], \val)
+       uao_stp 9998f, \ptr, \regB, \regC, \val
        .endm
 
 end    .req    x5
 ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        add     end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        mov     x0, #0
        ret
index 512b9a7b980e98bbed9a699107e936e4b1913dca..4c1e700840b6ced5a0b2f868bfb4f37dddc8abc0 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/const.h>
 #include <asm/assembler.h>
 #include <asm/page.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
 
 /*
  * Copy a page from src to dest (both are page aligned)
  *     x1 - src
  */
 ENTRY(copy_page)
-       /* Assume cache line size is 64 bytes. */
-       prfm    pldl1strm, [x1, #64]
-1:     ldp     x2, x3, [x1]
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+       nop
+       nop
+alternative_else
+       # Prefetch two cache lines ahead.
+       prfm    pldl1strm, [x1, #128]
+       prfm    pldl1strm, [x1, #256]
+alternative_endif
+
+       ldp     x2, x3, [x1]
        ldp     x4, x5, [x1, #16]
        ldp     x6, x7, [x1, #32]
        ldp     x8, x9, [x1, #48]
-       add     x1, x1, #64
-       prfm    pldl1strm, [x1, #64]
+       ldp     x10, x11, [x1, #64]
+       ldp     x12, x13, [x1, #80]
+       ldp     x14, x15, [x1, #96]
+       ldp     x16, x17, [x1, #112]
+
+       mov     x18, #(PAGE_SIZE - 128)
+       add     x1, x1, #128
+1:
+       subs    x18, x18, #128
+
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+       nop
+alternative_else
+       prfm    pldl1strm, [x1, #384]
+alternative_endif
+
        stnp    x2, x3, [x0]
+       ldp     x2, x3, [x1]
        stnp    x4, x5, [x0, #16]
+       ldp     x4, x5, [x1, #16]
        stnp    x6, x7, [x0, #32]
+       ldp     x6, x7, [x1, #32]
        stnp    x8, x9, [x0, #48]
-       add     x0, x0, #64
-       tst     x1, #(PAGE_SIZE - 1)
-       b.ne    1b
+       ldp     x8, x9, [x1, #48]
+       stnp    x10, x11, [x0, #64]
+       ldp     x10, x11, [x1, #64]
+       stnp    x12, x13, [x0, #80]
+       ldp     x12, x13, [x1, #80]
+       stnp    x14, x15, [x0, #96]
+       ldp     x14, x15, [x1, #96]
+       stnp    x16, x17, [x0, #112]
+       ldp     x16, x17, [x1, #112]
+
+       add     x0, x0, #128
+       add     x1, x1, #128
+
+       b.gt    1b
+
+       stnp    x2, x3, [x0]
+       stnp    x4, x5, [x0, #16]
+       stnp    x6, x7, [x0, #32]
+       stnp    x8, x9, [x0, #48]
+       stnp    x10, x11, [x0, #64]
+       stnp    x12, x13, [x0, #80]
+       stnp    x14, x15, [x0, #96]
+       stnp    x16, x17, [x0, #112]
+
        ret
 ENDPROC(copy_page)
index 7512bbbc07ac39dbe8c963745281f25c2d60efa4..21faae60f9887ecbbfccb7ba1fb918839d47a291 100644 (file)
@@ -37,7 +37,7 @@
        .endm
 
        .macro strb1 ptr, regB, val
-       USER(9998f, strb \ptr, [\regB], \val)
+       uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
        .endm
 
        .macro ldrh1 ptr, regB, val
@@ -45,7 +45,7 @@
        .endm
 
        .macro strh1 ptr, regB, val
-       USER(9998f, strh \ptr, [\regB], \val)
+       uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
        .endm
 
        .macro ldr1 ptr, regB, val
@@ -53,7 +53,7 @@
        .endm
 
        .macro str1 ptr, regB, val
-       USER(9998f, str \ptr, [\regB], \val)
+       uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
        .endm
 
        .macro ldp1 ptr, regB, regC, val
        .endm
 
        .macro stp1 ptr, regB, regC, val
-       USER(9998f, stp \ptr, \regB, [\regC], \val)
+       uao_stp 9998f, \ptr, \regB, \regC, \val
        .endm
 
 end    .req    x5
 ENTRY(__copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        add     end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
            CONFIG_ARM64_PAN)
        mov     x0, #0
        ret
index ffbdec00327d0463ca0bd608b7612e471a08347d..2a4e239bd17a03441881ebc85beb4a8100824098 100644 (file)
@@ -211,7 +211,7 @@ CPU_LE( lsr tmp2, tmp2, tmp1 )
 .Lunequal_proc:
        cbz     diff, .Lremain8
 
-/*There is differnence occured in the latest comparison.*/
+/* There is difference occurred in the latest comparison. */
 .Lnot_limit:
 /*
 * For little endian,reverse the low significant equal bits into MSB,then
index e87f53ff5f583aeb47b3ec3187d229df380b2c50..c90c3c5f46af5e87c60c594949f22c3b1436266e 100644 (file)
@@ -24,6 +24,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/mmu_context.h>
+#include <asm/smp.h>
 #include <asm/tlbflush.h>
 
 static u32 asid_bits;
@@ -40,6 +41,45 @@ static cpumask_t tlb_flush_pending;
 #define ASID_FIRST_VERSION     (1UL << asid_bits)
 #define NUM_USER_ASIDS         ASID_FIRST_VERSION
 
+/* Get the ASIDBits supported by the current CPU */
+static u32 get_cpu_asid_bits(void)
+{
+       u32 asid;
+       int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1),
+                                               ID_AA64MMFR0_ASID_SHIFT);
+
+       switch (fld) {
+       default:
+               pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n",
+                                       smp_processor_id(),  fld);
+               /* Fallthrough */
+       case 0:
+               asid = 8;
+               break;
+       case 2:
+               asid = 16;
+       }
+
+       return asid;
+}
+
+/* Check if the current cpu's ASIDBits is compatible with asid_bits */
+void verify_cpu_asid_bits(void)
+{
+       u32 asid = get_cpu_asid_bits();
+
+       if (asid < asid_bits) {
+               /*
+                * We cannot decrease the ASID size at runtime, so panic if we support
+                * fewer ASID bits than the boot CPU.
+                */
+               pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
+                               smp_processor_id(), asid, asid_bits);
+               update_cpu_boot_status(CPU_PANIC_KERNEL);
+               cpu_park_loop();
+       }
+}
+
 static void flush_context(unsigned int cpu)
 {
        int i;
@@ -187,19 +227,7 @@ switch_mm_fastpath:
 
 static int asids_init(void)
 {
-       int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
-
-       switch (fld) {
-       default:
-               pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
-               /* Fallthrough */
-       case 0:
-               asid_bits = 8;
-               break;
-       case 2:
-               asid_bits = 16;
-       }
-
+       asid_bits = get_cpu_asid_bits();
        /* If we end up with more CPUs than ASIDs, expect things to crash */
        WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
        atomic64_set(&asid_generation, ASID_FIRST_VERSION);
index 0adbebbc28037110afd911a5cfe14693101e19ea..f9271cb2f5e3d94a620946e4e36971b33a43f390 100644 (file)
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
 
-#define LOWEST_ADDR    (UL(0xffffffffffffffff) << VA_BITS)
-
 struct addr_marker {
        unsigned long start_address;
        const char *name;
 };
 
 enum address_markers_idx {
-       VMALLOC_START_NR = 0,
+       MODULES_START_NR = 0,
+       MODULES_END_NR,
+       VMALLOC_START_NR,
        VMALLOC_END_NR,
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
        VMEMMAP_START_NR,
@@ -45,12 +45,12 @@ enum address_markers_idx {
        FIXADDR_END_NR,
        PCI_START_NR,
        PCI_END_NR,
-       MODULES_START_NR,
-       MODULES_END_NR,
        KERNEL_SPACE_NR,
 };
 
 static struct addr_marker address_markers[] = {
+       { MODULES_VADDR,        "Modules start" },
+       { MODULES_END,          "Modules end" },
        { VMALLOC_START,        "vmalloc() Area" },
        { VMALLOC_END,          "vmalloc() End" },
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -61,9 +61,7 @@ static struct addr_marker address_markers[] = {
        { FIXADDR_TOP,          "Fixmap end" },
        { PCI_IO_START,         "PCI I/O start" },
        { PCI_IO_END,           "PCI I/O end" },
-       { MODULES_VADDR,        "Modules start" },
-       { MODULES_END,          "Modules end" },
-       { PAGE_OFFSET,          "Kernel Mapping" },
+       { PAGE_OFFSET,          "Linear Mapping" },
        { -1,                   NULL },
 };
 
@@ -90,6 +88,11 @@ struct prot_bits {
 
 static const struct prot_bits pte_bits[] = {
        {
+               .mask   = PTE_VALID,
+               .val    = PTE_VALID,
+               .set    = " ",
+               .clear  = "F",
+       }, {
                .mask   = PTE_USER,
                .val    = PTE_USER,
                .set    = "USR",
@@ -316,7 +319,7 @@ static int ptdump_show(struct seq_file *m, void *v)
                .marker = address_markers,
        };
 
-       walk_pgd(&st, &init_mm, LOWEST_ADDR);
+       walk_pgd(&st, &init_mm, VA_START);
 
        note_page(&st, 0, 0, 0);
        return 0;
index 79444279ba8c674316e34cfe0861c42021fa92f3..81acd4706878f85d8821f0ff924bff05adc31c97 100644 (file)
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
 
        fixup = search_exception_tables(instruction_pointer(regs));
        if (fixup)
-               regs->pc = fixup->fixup;
+               regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
 
        return fixup != NULL;
 }
index abe2a9542b3a367c778a96051dcf3618881d6fa1..0077674b2b380cbccf0a718a71f04dfc5207791e 100644 (file)
@@ -192,6 +192,14 @@ out:
        return fault;
 }
 
+static inline int permission_fault(unsigned int esr)
+{
+       unsigned int ec       = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
+       unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+       return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+}
+
 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                                   struct pt_regs *regs)
 {
@@ -225,12 +233,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                mm_flags |= FAULT_FLAG_WRITE;
        }
 
-       /*
-        * PAN bit set implies the fault happened in kernel space, but not
-        * in the arch's user access functions.
-        */
-       if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
-               goto no_context;
+       if (permission_fault(esr) && (addr < USER_DS)) {
+               if (get_fs() == KERNEL_DS)
+                       die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+
+               if (!search_exception_tables(regs->pc))
+                       die("Accessing user space memory outside uaccess.h routines", regs, esr);
+       }
 
        /*
         * As per x86, we may deadlock here. However, since the kernel only
@@ -568,3 +577,16 @@ void cpu_enable_pan(void *__unused)
        config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 }
 #endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+void cpu_enable_uao(void *__unused)
+{
+       asm(SET_PSTATE_UAO(1));
+}
+#endif /* CONFIG_ARM64_UAO */
index 7802f216a67a588ab778b7d2b7649a2979e68b87..61a38eaf0895c5db483594a6411cc9b460761cb4 100644 (file)
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
 
+#include <asm/boot.h>
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 
 #include "mm.h"
 
-phys_addr_t memstart_addr __read_mostly = 0;
+/*
+ * We need to be able to catch inadvertent references to memstart_addr
+ * that occur (potentially in generic code) before arm64_memblock_init()
+ * executes, which assigns it its actual value. So use a default value
+ * that cannot be mistaken for a real physical address.
+ */
+s64 memstart_addr __read_mostly = -1;
 phys_addr_t arm64_dma_phys_limit __read_mostly;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -58,8 +67,8 @@ static int __init early_initrd(char *p)
        if (*endp == ',') {
                size = memparse(endp + 1, NULL);
 
-               initrd_start = (unsigned long)__va(start);
-               initrd_end = (unsigned long)__va(start + size);
+               initrd_start = start;
+               initrd_end = start + size;
        }
        return 0;
 }
@@ -159,7 +168,57 @@ early_param("mem", early_mem);
 
 void __init arm64_memblock_init(void)
 {
-       memblock_enforce_memory_limit(memory_limit);
+       const s64 linear_region_size = -(s64)PAGE_OFFSET;
+
+       /*
+        * Ensure that the linear region takes up exactly half of the kernel
+        * virtual address space. This way, we can distinguish a linear address
+        * from a kernel/module/vmalloc address by testing a single bit.
+        */
+       BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
+
+       /*
+        * Select a suitable value for the base of physical memory.
+        */
+       memstart_addr = round_down(memblock_start_of_DRAM(),
+                                  ARM64_MEMSTART_ALIGN);
+
+       /*
+        * Remove the memory that we will not be able to cover with the
+        * linear mapping. Take care not to clip the kernel which may be
+        * high in memory.
+        */
+       memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
+                       ULLONG_MAX);
+       if (memblock_end_of_DRAM() > linear_region_size)
+               memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+
+       /*
+        * Apply the memory limit if it was set. Since the kernel may be loaded
+        * high up in memory, add back the kernel region that must be accessible
+        * via the linear mapping.
+        */
+       if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+               memblock_enforce_memory_limit(memory_limit);
+               memblock_add(__pa(_text), (u64)(_end - _text));
+       }
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+               extern u16 memstart_offset_seed;
+               u64 range = linear_region_size -
+                           (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+
+               /*
+                * If the size of the linear region exceeds, by a sufficient
+                * margin, the size of the region that the available physical
+                * memory spans, randomize the linear region as well.
+                */
+               if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+                       range = range / ARM64_MEMSTART_ALIGN + 1;
+                       memstart_addr -= ARM64_MEMSTART_ALIGN *
+                                        ((range * memstart_offset_seed) >> 16);
+               }
+       }
 
        /*
         * Register the kernel text, kernel data, initrd, and initial
@@ -167,8 +226,13 @@ void __init arm64_memblock_init(void)
         */
        memblock_reserve(__pa(_text), _end - _text);
 #ifdef CONFIG_BLK_DEV_INITRD
-       if (initrd_start)
-               memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
+       if (initrd_start) {
+               memblock_reserve(initrd_start, initrd_end - initrd_start);
+
+               /* the generic initrd code expects virtual addresses */
+               initrd_start = __phys_to_virt(initrd_start);
+               initrd_end = __phys_to_virt(initrd_end);
+       }
 #endif
 
        early_init_fdt_scan_reserved_mem();
@@ -302,35 +366,38 @@ void __init mem_init(void)
 #ifdef CONFIG_KASAN
                  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #endif
+                 "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
                  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+                 "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+                 "    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+                 "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+                 "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
                  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
                  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
 #endif
                  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
                  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-                 "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-                 "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-                 "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-                 "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-                 "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+                 "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 #ifdef CONFIG_KASAN
                  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
 #endif
+                 MLM(MODULES_VADDR, MODULES_END),
                  MLG(VMALLOC_START, VMALLOC_END),
+                 MLK_ROUNDUP(_text, __start_rodata),
+                 MLK_ROUNDUP(__start_rodata, _etext),
+                 MLK_ROUNDUP(__init_begin, __init_end),
+                 MLK_ROUNDUP(_sdata, _edata),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
                  MLG(VMEMMAP_START,
                      VMEMMAP_START + VMEMMAP_SIZE),
-                 MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+                 MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
                      (unsigned long)virt_to_page(high_memory)),
 #endif
                  MLK(FIXADDR_START, FIXADDR_TOP),
                  MLM(PCI_IO_START, PCI_IO_END),
-                 MLM(MODULES_VADDR, MODULES_END),
-                 MLM(PAGE_OFFSET, (unsigned long)high_memory),
-                 MLK_ROUNDUP(__init_begin, __init_end),
-                 MLK_ROUNDUP(_text, _etext),
-                 MLK_ROUNDUP(_sdata, _edata));
+                 MLM(__phys_to_virt(memblock_start_of_DRAM()),
+                     (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM
@@ -343,8 +410,6 @@ void __init mem_init(void)
 #ifdef CONFIG_COMPAT
        BUILD_BUG_ON(TASK_SIZE_32                       > TASK_SIZE_64);
 #endif
-       BUILD_BUG_ON(TASK_SIZE_64                       > MODULES_VADDR);
-       BUG_ON(TASK_SIZE_64                             > MODULES_VADDR);
 
        if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
                extern int sysctl_overcommit_memory;
@@ -358,8 +423,8 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-       fixup_init();
        free_initmem_default(0);
+       fixup_init();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -380,3 +445,28 @@ static int __init keepinitrd_setup(char *__unused)
 
 __setup("keepinitrd", keepinitrd_setup);
 #endif
+
+/*
+ * Dump out memory limit information on panic.
+ */
+static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
+{
+       if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+               pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
+       } else {
+               pr_emerg("Memory Limit: none\n");
+       }
+       return 0;
+}
+
+static struct notifier_block mem_limit_notifier = {
+       .notifier_call = dump_mem_limit,
+};
+
+static int __init register_mem_limit_dumper(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &mem_limit_notifier);
+       return 0;
+}
+__initcall(register_mem_limit_dumper);
index cab7a5be40aa85cbd933635d48208d2af54bcf1c..757009daa9ede454abccbcab6b3a0421a355b49c 100644 (file)
 #include <linux/memblock.h>
 #include <linux/start_kernel.h>
 
+#include <asm/mmu_context.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
@@ -32,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
        if (pmd_none(*pmd))
                pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
 
-       pte = pte_offset_kernel(pmd, addr);
+       pte = pte_offset_kimg(pmd, addr);
        do {
                next = addr + PAGE_SIZE;
                set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
@@ -50,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
        if (pud_none(*pud))
                pud_populate(&init_mm, pud, kasan_zero_pmd);
 
-       pmd = pmd_offset(pud, addr);
+       pmd = pmd_offset_kimg(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
                kasan_early_pte_populate(pmd, addr, next);
@@ -67,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
        if (pgd_none(*pgd))
                pgd_populate(&init_mm, pgd, kasan_zero_pud);
 
-       pud = pud_offset(pgd, addr);
+       pud = pud_offset_kimg(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
                kasan_early_pmd_populate(pud, addr, next);
@@ -96,6 +99,21 @@ asmlinkage void __init kasan_early_init(void)
        kasan_map_early_shadow();
 }
 
+/*
+ * Copy the current shadow region into a new pgdir.
+ */
+void __init kasan_copy_shadow(pgd_t *pgdir)
+{
+       pgd_t *pgd, *pgd_new, *pgd_end;
+
+       pgd = pgd_offset_k(KASAN_SHADOW_START);
+       pgd_end = pgd_offset_k(KASAN_SHADOW_END);
+       pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+       do {
+               set_pgd(pgd_new, *pgd);
+       } while (pgd++, pgd_new++, pgd != pgd_end);
+}
+
 static void __init clear_pgds(unsigned long start,
                        unsigned long end)
 {
@@ -108,20 +126,19 @@ static void __init clear_pgds(unsigned long start,
                set_pgd(pgd_offset_k(start), __pgd(0));
 }
 
-static void __init cpu_set_ttbr1(unsigned long ttbr1)
-{
-       asm(
-       "       msr     ttbr1_el1, %0\n"
-       "       isb"
-       :
-       : "r" (ttbr1));
-}
-
 void __init kasan_init(void)
 {
+       u64 kimg_shadow_start, kimg_shadow_end;
+       u64 mod_shadow_start, mod_shadow_end;
        struct memblock_region *reg;
        int i;
 
+       kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
+       kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
+
+       mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
+       mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
+
        /*
         * We are going to perform proper setup of shadow memory.
         * At first we should unmap early shadow (clear_pgds() call bellow).
@@ -130,13 +147,33 @@ void __init kasan_init(void)
         * setup will be finished.
         */
        memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
-       cpu_set_ttbr1(__pa(tmp_pg_dir));
-       flush_tlb_all();
+       dsb(ishst);
+       cpu_replace_ttbr1(tmp_pg_dir);
 
        clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
+       vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
+                        pfn_to_nid(virt_to_pfn(_text)));
+
+       /*
+        * vmemmap_populate() has populated the shadow region that covers the
+        * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
+        * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
+        * kasan_populate_zero_shadow() from replacing the page table entries
+        * (PMD or PTE) at the edges of the shadow region for the kernel
+        * image.
+        */
+       kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
+       kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+
        kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
-                       kasan_mem_to_shadow((void *)MODULES_VADDR));
+                                  (void *)mod_shadow_start);
+       kasan_populate_zero_shadow((void *)kimg_shadow_end,
+                                  kasan_mem_to_shadow((void *)PAGE_OFFSET));
+
+       if (kimg_shadow_start > mod_shadow_end)
+               kasan_populate_zero_shadow((void *)mod_shadow_end,
+                                          (void *)kimg_shadow_start);
 
        for_each_memblock(memory, reg) {
                void *start = (void *)__phys_to_virt(reg->base);
@@ -165,8 +202,7 @@ void __init kasan_init(void)
                        pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
 
        memset(kasan_zero_page, 0, PAGE_SIZE);
-       cpu_set_ttbr1(__pa(swapper_pg_dir));
-       flush_tlb_all();
+       cpu_replace_ttbr1(swapper_pg_dir);
 
        /* At this point kasan is fully initialized. Enable error messages */
        init_task.kasan_depth = 0;
index 58faeaa7fbdc6c73a73319e15c18bbd2a0948f06..d2d8b8c2e17f7c41a221098cd211eb478349c87a 100644 (file)
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 
+#include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 
+u64 kimage_voffset __read_mostly;
+EXPORT_SYMBOL(kimage_voffset);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
  */
-struct page *empty_zero_page;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                              unsigned long size, pgprot_t vma_prot)
 {
@@ -62,16 +71,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static void __init *early_alloc(unsigned long sz)
+static phys_addr_t __init early_pgtable_alloc(void)
 {
        phys_addr_t phys;
        void *ptr;
 
-       phys = memblock_alloc(sz, sz);
+       phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
        BUG_ON(!phys);
-       ptr = __va(phys);
-       memset(ptr, 0, sz);
-       return ptr;
+
+       /*
+        * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+        * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+        * any level of table.
+        */
+       ptr = pte_set_fixmap(phys);
+
+       memset(ptr, 0, PAGE_SIZE);
+
+       /*
+        * Implicit barriers also ensure the zeroed page is visible to the page
+        * table walker
+        */
+       pte_clear_fixmap();
+
+       return phys;
 }
 
 /*
@@ -95,24 +118,30 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
                                  unsigned long end, unsigned long pfn,
                                  pgprot_t prot,
-                                 void *(*alloc)(unsigned long size))
+                                 phys_addr_t (*pgtable_alloc)(void))
 {
        pte_t *pte;
 
        if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-               pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+               phys_addr_t pte_phys;
+               BUG_ON(!pgtable_alloc);
+               pte_phys = pgtable_alloc();
+               pte = pte_set_fixmap(pte_phys);
                if (pmd_sect(*pmd))
                        split_pmd(pmd, pte);
-               __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
                flush_tlb_all();
+               pte_clear_fixmap();
        }
        BUG_ON(pmd_bad(*pmd));
 
-       pte = pte_offset_kernel(pmd, addr);
+       pte = pte_set_fixmap_offset(pmd, addr);
        do {
                set_pte(pte, pfn_pte(pfn, prot));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
+
+       pte_clear_fixmap();
 }
 
 static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -127,10 +156,29 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
        } while (pmd++, i++, i < PTRS_PER_PMD);
 }
 
-static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
-                                 unsigned long addr, unsigned long end,
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+
+       /*
+        * If debug_page_alloc is enabled we must map the linear map
+        * using pages. However, other mappings created by
+        * create_mapping_noalloc must use sections in some cases. Allow
+        * sections to be used in those cases, where no pgtable_alloc
+        * function is provided.
+        */
+       return !pgtable_alloc || !debug_pagealloc_enabled();
+}
+#else
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+       return true;
+}
+#endif
+
+static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
                                  phys_addr_t phys, pgprot_t prot,
-                                 void *(*alloc)(unsigned long size))
+                                 phys_addr_t (*pgtable_alloc)(void))
 {
        pmd_t *pmd;
        unsigned long next;
@@ -139,7 +187,10 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
         * Check for initial section mappings in the pgd/pud and remove them.
         */
        if (pud_none(*pud) || pud_sect(*pud)) {
-               pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+               phys_addr_t pmd_phys;
+               BUG_ON(!pgtable_alloc);
+               pmd_phys = pgtable_alloc();
+               pmd = pmd_set_fixmap(pmd_phys);
                if (pud_sect(*pud)) {
                        /*
                         * need to have the 1G of mappings continue to be
@@ -147,16 +198,18 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
                         */
                        split_pud(pud, pmd);
                }
-               pud_populate(mm, pud, pmd);
+               __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
                flush_tlb_all();
+               pmd_clear_fixmap();
        }
        BUG_ON(pud_bad(*pud));
 
-       pmd = pmd_offset(pud, addr);
+       pmd = pmd_set_fixmap_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
                /* try section mapping first */
-               if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+               if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+                     block_mappings_allowed(pgtable_alloc)) {
                        pmd_t old_pmd =*pmd;
                        set_pmd(pmd, __pmd(phys |
                                           pgprot_val(mk_sect_prot(prot))));
@@ -167,17 +220,19 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
                        if (!pmd_none(old_pmd)) {
                                flush_tlb_all();
                                if (pmd_table(old_pmd)) {
-                                       phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+                                       phys_addr_t table = pmd_page_paddr(old_pmd);
                                        if (!WARN_ON_ONCE(slab_is_available()))
                                                memblock_free(table, PAGE_SIZE);
                                }
                        }
                } else {
                        alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-                                      prot, alloc);
+                                      prot, pgtable_alloc);
                }
                phys += next - addr;
        } while (pmd++, addr = next, addr != end);
+
+       pmd_clear_fixmap();
 }
 
 static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -192,28 +247,30 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
        return true;
 }
 
-static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
-                                 unsigned long addr, unsigned long end,
+static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
                                  phys_addr_t phys, pgprot_t prot,
-                                 void *(*alloc)(unsigned long size))
+                                 phys_addr_t (*pgtable_alloc)(void))
 {
        pud_t *pud;
        unsigned long next;
 
        if (pgd_none(*pgd)) {
-               pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
-               pgd_populate(mm, pgd, pud);
+               phys_addr_t pud_phys;
+               BUG_ON(!pgtable_alloc);
+               pud_phys = pgtable_alloc();
+               __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
        }
        BUG_ON(pgd_bad(*pgd));
 
-       pud = pud_offset(pgd, addr);
+       pud = pud_set_fixmap_offset(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
 
                /*
                 * For 4K granule only, attempt to put down a 1GB block
                 */
-               if (use_1G_block(addr, next, phys)) {
+               if (use_1G_block(addr, next, phys) &&
+                   block_mappings_allowed(pgtable_alloc)) {
                        pud_t old_pud = *pud;
                        set_pud(pud, __pud(phys |
                                           pgprot_val(mk_sect_prot(prot))));
@@ -228,26 +285,28 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
                        if (!pud_none(old_pud)) {
                                flush_tlb_all();
                                if (pud_table(old_pud)) {
-                                       phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+                                       phys_addr_t table = pud_page_paddr(old_pud);
                                        if (!WARN_ON_ONCE(slab_is_available()))
                                                memblock_free(table, PAGE_SIZE);
                                }
                        }
                } else {
-                       alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+                       alloc_init_pmd(pud, addr, next, phys, prot,
+                                      pgtable_alloc);
                }
                phys += next - addr;
        } while (pud++, addr = next, addr != end);
+
+       pud_clear_fixmap();
 }
 
 /*
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
-                                   phys_addr_t phys, unsigned long virt,
+static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
                                    phys_addr_t size, pgprot_t prot,
-                                   void *(*alloc)(unsigned long size))
+                                   phys_addr_t (*pgtable_alloc)(void))
 {
        unsigned long addr, length, end, next;
 
@@ -265,22 +324,35 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
        end = addr + length;
        do {
                next = pgd_addr_end(addr, end);
-               alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+               alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
                phys += next - addr;
        } while (pgd++, addr = next, addr != end);
 }
 
-static void *late_alloc(unsigned long size)
+static phys_addr_t late_pgtable_alloc(void)
 {
-       void *ptr;
-
-       BUG_ON(size > PAGE_SIZE);
-       ptr = (void *)__get_free_page(PGALLOC_GFP);
+       void *ptr = (void *)__get_free_page(PGALLOC_GFP);
        BUG_ON(!ptr);
-       return ptr;
+
+       /* Ensure the zeroed page is visible to the page table walker */
+       dsb(ishst);
+       return __pa(ptr);
 }
 
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+                                unsigned long virt, phys_addr_t size,
+                                pgprot_t prot,
+                                phys_addr_t (*alloc)(void))
+{
+       init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
+}
+
+/*
+ * This function can only be used to modify existing table entries,
+ * without allocating new levels of table. Note that this permits the
+ * creation of new section or page entries.
+ */
+static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
                                  phys_addr_t size, pgprot_t prot)
 {
        if (virt < VMALLOC_START) {
@@ -288,16 +360,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
                        &phys, virt);
                return;
        }
-       __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
-                        size, prot, early_alloc);
+       __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+                            NULL);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
                               unsigned long virt, phys_addr_t size,
                               pgprot_t prot)
 {
-       __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
-                               late_alloc);
+       __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+                            late_pgtable_alloc);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -309,69 +381,57 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
                return;
        }
 
-       return __create_mapping(&init_mm, pgd_offset_k(virt),
-                               phys, virt, size, prot, late_alloc);
+       __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+                            late_pgtable_alloc);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
+       unsigned long kernel_start = __pa(_stext);
+       unsigned long kernel_end = __pa(_etext);
+
        /*
-        * Set up the executable regions using the existing section mappings
-        * for now. This will get more fine grained later once all memory
-        * is mapped
+        * Take care not to create a writable alias for the
+        * read-only text and rodata sections of the kernel image.
         */
-       unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
-       unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
-
-       if (end < kernel_x_start) {
-               create_mapping(start, __phys_to_virt(start),
-                       end - start, PAGE_KERNEL);
-       } else if (start >= kernel_x_end) {
-               create_mapping(start, __phys_to_virt(start),
-                       end - start, PAGE_KERNEL);
-       } else {
-               if (start < kernel_x_start)
-                       create_mapping(start, __phys_to_virt(start),
-                               kernel_x_start - start,
-                               PAGE_KERNEL);
-               create_mapping(kernel_x_start,
-                               __phys_to_virt(kernel_x_start),
-                               kernel_x_end - kernel_x_start,
-                               PAGE_KERNEL_EXEC);
-               if (kernel_x_end < end)
-                       create_mapping(kernel_x_end,
-                               __phys_to_virt(kernel_x_end),
-                               end - kernel_x_end,
-                               PAGE_KERNEL);
+
+       /* No overlap with the kernel text */
+       if (end < kernel_start || start >= kernel_end) {
+               __create_pgd_mapping(pgd, start, __phys_to_virt(start),
+                                    end - start, PAGE_KERNEL,
+                                    early_pgtable_alloc);
+               return;
        }
 
+       /*
+        * This block overlaps the kernel text mapping.
+        * Map the portion(s) which don't overlap.
+        */
+       if (start < kernel_start)
+               __create_pgd_mapping(pgd, start,
+                                    __phys_to_virt(start),
+                                    kernel_start - start, PAGE_KERNEL,
+                                    early_pgtable_alloc);
+       if (kernel_end < end)
+               __create_pgd_mapping(pgd, kernel_end,
+                                    __phys_to_virt(kernel_end),
+                                    end - kernel_end, PAGE_KERNEL,
+                                    early_pgtable_alloc);
+
+       /*
+        * Map the linear alias of the [_stext, _etext) interval as
+        * read-only/non-executable. This makes the contents of the
+        * region accessible to subsystems such as hibernate, but
+        * protects it from inadvertent modification or execution.
+        */
+       __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
+                            kernel_end - kernel_start, PAGE_KERNEL_RO,
+                            early_pgtable_alloc);
 }
-#else
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
-{
-       create_mapping(start, __phys_to_virt(start), end - start,
-                       PAGE_KERNEL_EXEC);
-}
-#endif
 
-static void __init map_mem(void)
+static void __init map_mem(pgd_t *pgd)
 {
        struct memblock_region *reg;
-       phys_addr_t limit;
-
-       /*
-        * Temporarily limit the memblock range. We need to do this as
-        * create_mapping requires puds, pmds and ptes to be allocated from
-        * memory addressable from the initial direct kernel mapping.
-        *
-        * The initial direct kernel mapping, located at swapper_pg_dir, gives
-        * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
-        * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
-        * per Documentation/arm64/booting.txt).
-        */
-       limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
-       memblock_set_current_limit(limit);
 
        /* map all the memory banks */
        for_each_memblock(memory, reg) {
@@ -383,69 +443,94 @@ static void __init map_mem(void)
                if (memblock_is_nomap(reg))
                        continue;
 
-               if (ARM64_SWAPPER_USES_SECTION_MAPS) {
-                       /*
-                        * For the first memory bank align the start address and
-                        * current memblock limit to prevent create_mapping() from
-                        * allocating pte page tables from unmapped memory. With
-                        * the section maps, if the first block doesn't end on section
-                        * size boundary, create_mapping() will try to allocate a pte
-                        * page, which may be returned from an unmapped area.
-                        * When section maps are not used, the pte page table for the
-                        * current limit is already present in swapper_pg_dir.
-                        */
-                       if (start < limit)
-                               start = ALIGN(start, SECTION_SIZE);
-                       if (end < limit) {
-                               limit = end & SECTION_MASK;
-                               memblock_set_current_limit(limit);
-                       }
-               }
-               __map_memblock(start, end);
+               __map_memblock(pgd, start, end);
        }
-
-       /* Limit no longer required. */
-       memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
-static void __init fixup_executable(void)
+void mark_rodata_ro(void)
 {
-#ifdef CONFIG_DEBUG_RODATA
-       /* now that we are actually fully mapped, make the start/end more fine grained */
-       if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
-               unsigned long aligned_start = round_down(__pa(_stext),
-                                                        SWAPPER_BLOCK_SIZE);
+       unsigned long section_size;
 
-               create_mapping(aligned_start, __phys_to_virt(aligned_start),
-                               __pa(_stext) - aligned_start,
-                               PAGE_KERNEL);
-       }
+       section_size = (unsigned long)__start_rodata - (unsigned long)_stext;
+       create_mapping_late(__pa(_stext), (unsigned long)_stext,
+                           section_size, PAGE_KERNEL_ROX);
+       /*
+        * mark .rodata as read only. Use _etext rather than __end_rodata to
+        * cover NOTES and EXCEPTION_TABLE.
+        */
+       section_size = (unsigned long)_etext - (unsigned long)__start_rodata;
+       create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+                           section_size, PAGE_KERNEL_RO);
+}
 
-       if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
-               unsigned long aligned_end = round_up(__pa(__init_end),
-                                                         SWAPPER_BLOCK_SIZE);
-               create_mapping(__pa(__init_end), (unsigned long)__init_end,
-                               aligned_end - __pa(__init_end),
-                               PAGE_KERNEL);
-       }
-#endif
+void fixup_init(void)
+{
+       /*
+        * Unmap the __init region but leave the VM area in place. This
+        * prevents the region from being reused for kernel modules, which
+        * is not supported by kallsyms.
+        */
+       unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void)
+static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
+                                   pgprot_t prot, struct vm_struct *vma)
 {
-       create_mapping_late(__pa(_stext), (unsigned long)_stext,
-                               (unsigned long)_etext - (unsigned long)_stext,
-                               PAGE_KERNEL_ROX);
+       phys_addr_t pa_start = __pa(va_start);
+       unsigned long size = va_end - va_start;
+
+       BUG_ON(!PAGE_ALIGNED(pa_start));
+       BUG_ON(!PAGE_ALIGNED(size));
+
+       __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+                            early_pgtable_alloc);
+
+       vma->addr       = va_start;
+       vma->phys_addr  = pa_start;
+       vma->size       = size;
+       vma->flags      = VM_MAP;
+       vma->caller     = __builtin_return_address(0);
 
+       vm_area_add_early(vma);
 }
-#endif
 
-void fixup_init(void)
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgd)
 {
-       create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
-                       (unsigned long)__init_end - (unsigned long)__init_begin,
-                       PAGE_KERNEL);
+       static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
+
+       map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
+       map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
+       map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+                        &vmlinux_init);
+       map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+
+       if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+               /*
+                * The fixmap falls in a separate pgd to the kernel, and doesn't
+                * live in the carveout for the swapper_pg_dir. We can simply
+                * re-use the existing dir for the fixmap.
+                */
+               set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+                       *pgd_offset_k(FIXADDR_START));
+       } else if (CONFIG_PGTABLE_LEVELS > 3) {
+               /*
+                * The fixmap shares its top level pgd entry with the kernel
+                * mapping. This can really only occur when we are running
+                * with 16k/4 levels, so we can simply reuse the pud level
+                * entry instead.
+                */
+               BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+               set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+                       __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+               pud_clear_fixmap();
+       } else {
+               BUG();
+       }
+
+       kasan_copy_shadow(pgd);
 }
 
 /*
@@ -454,28 +539,35 @@ void fixup_init(void)
  */
 void __init paging_init(void)
 {
-       void *zero_page;
+       phys_addr_t pgd_phys = early_pgtable_alloc();
+       pgd_t *pgd = pgd_set_fixmap(pgd_phys);
 
-       map_mem();
-       fixup_executable();
+       map_kernel(pgd);
+       map_mem(pgd);
 
-       /* allocate the zero page. */
-       zero_page = early_alloc(PAGE_SIZE);
-
-       bootmem_init();
-
-       empty_zero_page = virt_to_page(zero_page);
+       /*
+        * We want to reuse the original swapper_pg_dir so we don't have to
+        * communicate the new address to non-coherent secondaries in
+        * secondary_entry, and so cpu_switch_mm can generate the address with
+        * adrp+add rather than a load from some global variable.
+        *
+        * To do this we need to go via a temporary pgd.
+        */
+       cpu_replace_ttbr1(__va(pgd_phys));
+       memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+       cpu_replace_ttbr1(swapper_pg_dir);
 
-       /* Ensure the zero page is visible to the page table walker */
-       dsb(ishst);
+       pgd_clear_fixmap();
+       memblock_free(pgd_phys, PAGE_SIZE);
 
        /*
-        * TTBR0 is only used for the identity mapping at this stage. Make it
-        * point to zero page to avoid speculatively fetching new entries.
+        * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
+        * allocated with it.
         */
-       cpu_set_reserved_ttbr0();
-       local_flush_tlb_all();
-       cpu_set_default_tcr_t0sz();
+       memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+                     SWAPPER_DIR_SIZE - PAGE_SIZE);
+
+       bootmem_init();
 }
 
 /*
@@ -562,21 +654,13 @@ void vmemmap_free(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
        pgd_t *pgd = pgd_offset_k(addr);
 
        BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
 
-       return pud_offset(pgd, addr);
+       return pud_offset_kimg(pgd, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
@@ -585,16 +669,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
 
        BUG_ON(pud_none(*pud) || pud_bad(*pud));
 
-       return pmd_offset(pud, addr);
+       return pmd_offset_kimg(pud, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
 {
-       pmd_t *pmd = fixmap_pmd(addr);
-
-       BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-       return pte_offset_kernel(pmd, addr);
+       return &bm_pte[pte_index(addr)];
 }
 
 void __init early_fixmap_init(void)
@@ -605,15 +685,26 @@ void __init early_fixmap_init(void)
        unsigned long addr = FIXADDR_START;
 
        pgd = pgd_offset_k(addr);
-       pgd_populate(&init_mm, pgd, bm_pud);
-       pud = pud_offset(pgd, addr);
+       if (CONFIG_PGTABLE_LEVELS > 3 &&
+           !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+               /*
+                * We only end up here if the kernel mapping and the fixmap
+                * share the top level pgd entry, which should only happen on
+                * 16k/4 levels configurations.
+                */
+               BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+               pud = pud_offset_kimg(pgd, addr);
+       } else {
+               pgd_populate(&init_mm, pgd, bm_pud);
+               pud = fixmap_pud(addr);
+       }
        pud_populate(&init_mm, pud, bm_pmd);
-       pmd = pmd_offset(pud, addr);
+       pmd = fixmap_pmd(addr);
        pmd_populate_kernel(&init_mm, pmd, bm_pte);
 
        /*
         * The boot-ioremap range spans multiple pmds, for which
-        * we are not preparted:
+        * we are not prepared:
         */
        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
@@ -652,11 +743,10 @@ void __set_fixmap(enum fixed_addresses idx,
        }
 }
 
-void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 {
        const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
-       pgprot_t prot = PAGE_KERNEL_RO;
-       int size, offset;
+       int offset;
        void *dt_virt;
 
        /*
@@ -673,7 +763,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
        /*
         * Make sure that the FDT region can be mapped without the need to
         * allocate additional translation table pages, so that it is safe
-        * to call create_mapping() this early.
+        * to call create_mapping_noalloc() this early.
         *
         * On 64k pages, the FDT will be mapped using PTEs, so we need to
         * be in the same PMD as the rest of the fixmap.
@@ -689,21 +779,73 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
        dt_virt = (void *)dt_virt_base + offset;
 
        /* map the first chunk so we can read the size from the header */
-       create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-                      SWAPPER_BLOCK_SIZE, prot);
+       create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+                       dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
 
        if (fdt_check_header(dt_virt) != 0)
                return NULL;
 
-       size = fdt_totalsize(dt_virt);
-       if (size > MAX_FDT_SIZE)
+       *size = fdt_totalsize(dt_virt);
+       if (*size > MAX_FDT_SIZE)
                return NULL;
 
-       if (offset + size > SWAPPER_BLOCK_SIZE)
-               create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-                              round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+       if (offset + *size > SWAPPER_BLOCK_SIZE)
+               create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+                              round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
 
-       memblock_reserve(dt_phys, size);
+       return dt_virt;
+}
+
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+       void *dt_virt;
+       int size;
+
+       dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
+       if (!dt_virt)
+               return NULL;
 
+       memblock_reserve(dt_phys, size);
        return dt_virt;
 }
+
+int __init arch_ioremap_pud_supported(void)
+{
+       /* only 4k granule supports level 1 block mappings */
+       return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+       return 1;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+       BUG_ON(phys & ~PUD_MASK);
+       set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+       return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+       BUG_ON(phys & ~PMD_MASK);
+       set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+       return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+       if (!pud_sect(*pud))
+               return 0;
+       pud_clear(pud);
+       return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+       if (!pmd_sect(*pmd))
+               return 0;
+       pmd_clear(pmd);
+       return 1;
+}
index 0795c3a36d8f0d140cf4952bd91b14962e7f81b4..ca6d268e3313229b0941ce7d33439c7a4c861120 100644 (file)
@@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
        return 0;
 }
 
+/*
+ * This function assumes that the range is mapped with PAGE_SIZE pages.
+ */
+static int __change_memory_common(unsigned long start, unsigned long size,
+                               pgprot_t set_mask, pgprot_t clear_mask)
+{
+       struct page_change_data data;
+       int ret;
+
+       data.set_mask = set_mask;
+       data.clear_mask = clear_mask;
+
+       ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+                                       &data);
+
+       flush_tlb_kernel_range(start, start + size);
+       return ret;
+}
+
 static int change_memory_common(unsigned long addr, int numpages,
                                pgprot_t set_mask, pgprot_t clear_mask)
 {
        unsigned long start = addr;
        unsigned long size = PAGE_SIZE*numpages;
        unsigned long end = start + size;
-       int ret;
-       struct page_change_data data;
        struct vm_struct *area;
 
        if (!PAGE_ALIGNED(addr)) {
@@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages,
        if (!numpages)
                return 0;
 
-       data.set_mask = set_mask;
-       data.clear_mask = clear_mask;
-
-       ret = apply_to_page_range(&init_mm, start, size, change_page_range,
-                                       &data);
-
-       flush_tlb_kernel_range(start, end);
-       return ret;
+       return __change_memory_common(start, size, set_mask, clear_mask);
 }
 
 int set_memory_ro(unsigned long addr, int numpages)
@@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages)
                                        __pgprot(PTE_PXN));
 }
 EXPORT_SYMBOL_GPL(set_memory_x);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+       unsigned long addr = (unsigned long) page_address(page);
+
+       if (enable)
+               __change_memory_common(addr, PAGE_SIZE * numpages,
+                                       __pgprot(PTE_VALID),
+                                       __pgprot(0));
+       else
+               __change_memory_common(addr, PAGE_SIZE * numpages,
+                                       __pgprot(0),
+                                       __pgprot(PTE_VALID));
+}
+#endif
index c164d2cb35c05f12e7266592ee52a3b425483670..543f5198005a9425c51e49aeeaee8a89480ee7e3 100644 (file)
@@ -25,6 +25,8 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
 
 #include "proc-macros.S"
 
@@ -137,9 +139,47 @@ ENTRY(cpu_do_switch_mm)
        bfi     x0, x1, #48, #16                // set the ASID
        msr     ttbr0_el1, x0                   // set TTBR0
        isb
+alternative_if_not ARM64_WORKAROUND_CAVIUM_27456
        ret
+       nop
+       nop
+       nop
+alternative_else
+       ic      iallu
+       dsb     nsh
+       isb
+       ret
+alternative_endif
 ENDPROC(cpu_do_switch_mm)
 
+       .pushsection ".idmap.text", "ax"
+/*
+ * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
+ *
+ * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
+ * called by anything else. It can only be executed from a TTBR0 mapping.
+ */
+ENTRY(idmap_cpu_replace_ttbr1)
+       mrs     x2, daif
+       msr     daifset, #0xf
+
+       adrp    x1, empty_zero_page
+       msr     ttbr1_el1, x1
+       isb
+
+       tlbi    vmalle1
+       dsb     nsh
+       isb
+
+       msr     ttbr1_el1, x0
+       isb
+
+       msr     daif, x2
+
+       ret
+ENDPROC(idmap_cpu_replace_ttbr1)
+       .popsection
+
 /*
  *     __cpu_setup
  *
index 0010c78c4998cf0702299ea2f8a9229e09bb6438..08b1f2f6ea50c186933d0d9e79c82da0b9da4f3f 100644 (file)
@@ -25,6 +25,8 @@
 #define EFI32_LOADER_SIGNATURE "EL32"
 #define EFI64_LOADER_SIGNATURE "EL64"
 
+#define MAX_CMDLINE_ADDRESS    UINT_MAX
+
 #ifdef CONFIG_X86_32
 
 
index aaf9c0bab42e8fc29e093da64f609d99064046fd..ad077944aa0eca1a4a6d0f3d0ce2f8d3ac0820dd 100644 (file)
@@ -36,7 +36,7 @@ lib-$(CONFIG_EFI_ARMSTUB)     += arm-stub.o fdt.o string.o \
                                   $(patsubst %.c,lib-%.o,$(arm-deps))
 
 lib-$(CONFIG_ARM)              += arm32-stub.o
-lib-$(CONFIG_ARM64)            += arm64-stub.o
+lib-$(CONFIG_ARM64)            += arm64-stub.o random.o
 CFLAGS_arm64-stub.o            := -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
index 3397902e4040a30f566ff4b84555f17d7150d8f8..4deb3e7faa0ea622bce16d9877bb445bb3246399 100644 (file)
@@ -18,6 +18,8 @@
 
 #include "efistub.h"
 
+bool __nokaslr;
+
 static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
 {
        static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
@@ -207,14 +209,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
                pr_efi_err(sys_table, "Failed to find DRAM base\n");
                goto fail;
        }
-       status = handle_kernel_image(sys_table, image_addr, &image_size,
-                                    &reserve_addr,
-                                    &reserve_size,
-                                    dram_base, image);
-       if (status != EFI_SUCCESS) {
-               pr_efi_err(sys_table, "Failed to relocate kernel\n");
-               goto fail;
-       }
 
        /*
         * Get the command line from EFI, using the LOADED_IMAGE
@@ -224,7 +218,28 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
        cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size);
        if (!cmdline_ptr) {
                pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n");
-               goto fail_free_image;
+               goto fail;
+       }
+
+       /* check whether 'nokaslr' was passed on the command line */
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+               static const u8 default_cmdline[] = CONFIG_CMDLINE;
+               const u8 *str, *cmdline = cmdline_ptr;
+
+               if (IS_ENABLED(CONFIG_CMDLINE_FORCE))
+                       cmdline = default_cmdline;
+               str = strstr(cmdline, "nokaslr");
+               if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+                       __nokaslr = true;
+       }
+
+       status = handle_kernel_image(sys_table, image_addr, &image_size,
+                                    &reserve_addr,
+                                    &reserve_size,
+                                    dram_base, image);
+       if (status != EFI_SUCCESS) {
+               pr_efi_err(sys_table, "Failed to relocate kernel\n");
+               goto fail_free_cmdline;
        }
 
        status = efi_parse_options(cmdline_ptr);
@@ -244,7 +259,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 
                if (status != EFI_SUCCESS) {
                        pr_efi_err(sys_table, "Failed to load device tree!\n");
-                       goto fail_free_cmdline;
+                       goto fail_free_image;
                }
        }
 
@@ -286,12 +301,11 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
        efi_free(sys_table, initrd_size, initrd_addr);
        efi_free(sys_table, fdt_size, fdt_addr);
 
-fail_free_cmdline:
-       efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
-
 fail_free_image:
        efi_free(sys_table, image_size, *image_addr);
        efi_free(sys_table, reserve_size, reserve_addr);
+fail_free_cmdline:
+       efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
 fail:
        return EFI_ERROR;
 }
index 78dfbd34b6bffd2fa36312da89dc6ca43f036c3c..e0e6b74fef8f7becdef4c0481919ead298023250 100644 (file)
 #include <asm/efi.h>
 #include <asm/sections.h>
 
+#include "efistub.h"
+
+extern bool __nokaslr;
+
 efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
                                        unsigned long *image_addr,
                                        unsigned long *image_size,
@@ -23,26 +27,52 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 {
        efi_status_t status;
        unsigned long kernel_size, kernel_memsize = 0;
-       unsigned long nr_pages;
        void *old_image_addr = (void *)*image_addr;
        unsigned long preferred_offset;
+       u64 phys_seed = 0;
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+               if (!__nokaslr) {
+                       status = efi_get_random_bytes(sys_table_arg,
+                                                     sizeof(phys_seed),
+                                                     (u8 *)&phys_seed);
+                       if (status == EFI_NOT_FOUND) {
+                               pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
+                       } else if (status != EFI_SUCCESS) {
+                               pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n");
+                               return status;
+                       }
+               } else {
+                       pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n");
+               }
+       }
 
        /*
         * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond
         * a 2 MB aligned base, which itself may be lower than dram_base, as
         * long as the resulting offset equals or exceeds it.
         */
-       preferred_offset = round_down(dram_base, SZ_2M) + TEXT_OFFSET;
+       preferred_offset = round_down(dram_base, MIN_KIMG_ALIGN) + TEXT_OFFSET;
        if (preferred_offset < dram_base)
-               preferred_offset += SZ_2M;
+               preferred_offset += MIN_KIMG_ALIGN;
 
-       /* Relocate the image, if required. */
        kernel_size = _edata - _text;
-       if (*image_addr != preferred_offset) {
-               kernel_memsize = kernel_size + (_end - _edata);
+       kernel_memsize = kernel_size + (_end - _edata);
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
+               /*
+                * If KASLR is enabled, and we have some randomness available,
+                * locate the kernel at a randomized offset in physical memory.
+                */
+               *reserve_size = kernel_memsize + TEXT_OFFSET;
+               status = efi_random_alloc(sys_table_arg, *reserve_size,
+                                         MIN_KIMG_ALIGN, reserve_addr,
+                                         phys_seed);
 
+               *image_addr = *reserve_addr + TEXT_OFFSET;
+       } else {
                /*
-                * First, try a straight allocation at the preferred offset.
+                * Else, try a straight allocation at the preferred offset.
                 * This will work around the issue where, if dram_base == 0x0,
                 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
                 * address of the allocation to be mistaken for a FAIL return
@@ -52,27 +82,31 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
                 * Mustang), we can still place the kernel at the address
                 * 'dram_base + TEXT_OFFSET'.
                 */
+               if (*image_addr == preferred_offset)
+                       return EFI_SUCCESS;
+
                *image_addr = *reserve_addr = preferred_offset;
-               nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
-                          EFI_PAGE_SIZE;
+               *reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN);
+
                status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-                                       EFI_LOADER_DATA, nr_pages,
+                                       EFI_LOADER_DATA,
+                                       *reserve_size / EFI_PAGE_SIZE,
                                        (efi_physical_addr_t *)reserve_addr);
-               if (status != EFI_SUCCESS) {
-                       kernel_memsize += TEXT_OFFSET;
-                       status = efi_low_alloc(sys_table_arg, kernel_memsize,
-                                              SZ_2M, reserve_addr);
+       }
 
-                       if (status != EFI_SUCCESS) {
-                               pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
-                               return status;
-                       }
-                       *image_addr = *reserve_addr + TEXT_OFFSET;
+       if (status != EFI_SUCCESS) {
+               *reserve_size = kernel_memsize + TEXT_OFFSET;
+               status = efi_low_alloc(sys_table_arg, *reserve_size,
+                                      MIN_KIMG_ALIGN, reserve_addr);
+
+               if (status != EFI_SUCCESS) {
+                       pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
+                       *reserve_size = 0;
+                       return status;
                }
-               memcpy((void *)*image_addr, old_image_addr, kernel_size);
-               *reserve_size = kernel_memsize;
+               *image_addr = *reserve_addr + TEXT_OFFSET;
        }
-
+       memcpy((void *)*image_addr, old_image_addr, kernel_size);
 
        return EFI_SUCCESS;
 }
index f07d4a67fa76b3a3cb542e31a24a093c6f7aff97..29ed2f9b218ca9892bfcc72da2d91ba4750f4c97 100644 (file)
@@ -649,6 +649,10 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
        return dst;
 }
 
+#ifndef MAX_CMDLINE_ADDRESS
+#define MAX_CMDLINE_ADDRESS    ULONG_MAX
+#endif
+
 /*
  * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
@@ -684,7 +688,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
 
        options_bytes++;        /* NUL termination */
 
-       status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr);
+       status = efi_high_alloc(sys_table_arg, options_bytes, 0,
+                               &cmdline_addr, MAX_CMDLINE_ADDRESS);
        if (status != EFI_SUCCESS)
                return NULL;
 
index 6b6548fda0895ecb0ca7e9a60699d7100e335566..5ed3d3f3816637cd10d007f381797320f0567305 100644 (file)
@@ -43,4 +43,11 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
                     unsigned long desc_size, efi_memory_desc_t *runtime_map,
                     int *count);
 
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
+                                 unsigned long size, u8 *out);
+
+efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+                             unsigned long size, unsigned long align,
+                             unsigned long *addr, unsigned long random_seed);
+
 #endif
index cf7b7d46302a269839c8932f9d8866b93a814699..6dba78aef3370c208ebb4a5437108dd75ffe5301 100644 (file)
@@ -147,6 +147,20 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
        if (status)
                goto fdt_set_fail;
 
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+               efi_status_t efi_status;
+
+               efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64),
+                                                 (u8 *)&fdt_val64);
+               if (efi_status == EFI_SUCCESS) {
+                       status = fdt_setprop(fdt, node, "kaslr-seed",
+                                            &fdt_val64, sizeof(fdt_val64));
+                       if (status)
+                               goto fdt_set_fail;
+               } else if (efi_status != EFI_NOT_FOUND) {
+                       return efi_status;
+               }
+       }
        return EFI_SUCCESS;
 
 fdt_set_fail:
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
new file mode 100644 (file)
index 0000000..53f6d3f
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd;  <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+struct efi_rng_protocol {
+       efi_status_t (*get_info)(struct efi_rng_protocol *,
+                                unsigned long *, efi_guid_t *);
+       efi_status_t (*get_rng)(struct efi_rng_protocol *,
+                               efi_guid_t *, unsigned long, u8 *out);
+};
+
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
+                                 unsigned long size, u8 *out)
+{
+       efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
+       efi_status_t status;
+       struct efi_rng_protocol *rng;
+
+       status = efi_call_early(locate_protocol, &rng_proto, NULL,
+                               (void **)&rng);
+       if (status != EFI_SUCCESS)
+               return status;
+
+       return rng->get_rng(rng, NULL, size, out);
+}
+
+/*
+ * Return the number of slots covered by this entry, i.e., the number of
+ * addresses it covers that are suitably aligned and supply enough room
+ * for the allocation.
+ */
+static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
+                                        unsigned long size,
+                                        unsigned long align)
+{
+       u64 start, end;
+
+       if (md->type != EFI_CONVENTIONAL_MEMORY)
+               return 0;
+
+       start = round_up(md->phys_addr, align);
+       end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size,
+                        align);
+
+       if (start > end)
+               return 0;
+
+       return (end - start + 1) / align;
+}
+
+/*
+ * The UEFI memory descriptors have a virtual address field that is only used
+ * when installing the virtual mapping using SetVirtualAddressMap(). Since it
+ * is unused here, we can reuse it to keep track of each descriptor's slot
+ * count.
+ */
+#define MD_NUM_SLOTS(md)       ((md)->virt_addr)
+
+efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+                             unsigned long size,
+                             unsigned long align,
+                             unsigned long *addr,
+                             unsigned long random_seed)
+{
+       unsigned long map_size, desc_size, total_slots = 0, target_slot;
+       efi_status_t status;
+       efi_memory_desc_t *memory_map;
+       int map_offset;
+
+       status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size,
+                                   &desc_size, NULL, NULL);
+       if (status != EFI_SUCCESS)
+               return status;
+
+       if (align < EFI_ALLOC_ALIGN)
+               align = EFI_ALLOC_ALIGN;
+
+       /* count the suitable slots in each memory map entry */
+       for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+               efi_memory_desc_t *md = (void *)memory_map + map_offset;
+               unsigned long slots;
+
+               slots = get_entry_num_slots(md, size, align);
+               MD_NUM_SLOTS(md) = slots;
+               total_slots += slots;
+       }
+
+       /* find a random number between 0 and total_slots */
+       target_slot = (total_slots * (u16)random_seed) >> 16;
+
+       /*
+        * target_slot is now a value in the range [0, total_slots), and so
+        * it corresponds with exactly one of the suitable slots we recorded
+        * when iterating over the memory map the first time around.
+        *
+        * So iterate over the memory map again, subtracting the number of
+        * slots of each entry at each iteration, until we have found the entry
+        * that covers our chosen slot. Use the residual value of target_slot
+        * to calculate the randomly chosen address, and allocate it directly
+        * using EFI_ALLOCATE_ADDRESS.
+        */
+       for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+               efi_memory_desc_t *md = (void *)memory_map + map_offset;
+               efi_physical_addr_t target;
+               unsigned long pages;
+
+               if (target_slot >= MD_NUM_SLOTS(md)) {
+                       target_slot -= MD_NUM_SLOTS(md);
+                       continue;
+               }
+
+               target = round_up(md->phys_addr, align) + target_slot * align;
+               pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+
+               status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
+                                       EFI_LOADER_DATA, pages, &target);
+               if (status == EFI_SUCCESS)
+                       *addr = target;
+               break;
+       }
+
+       efi_call_early(free_pool, memory_map);
+
+       return status;
+}
index e2295b2c983673e862e76d3b8109449c99b2bf03..3349d2aa66346335fa3c84e485668bdbf77f6e52 100644 (file)
@@ -760,6 +760,16 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
+#ifndef __early_init_dt_declare_initrd
+static void __early_init_dt_declare_initrd(unsigned long start,
+                                          unsigned long end)
+{
+       initrd_start = (unsigned long)__va(start);
+       initrd_end = (unsigned long)__va(end);
+       initrd_below_start_ok = 1;
+}
+#endif
+
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
  * @node: reference to node containing initrd location ('chosen')
@@ -782,9 +792,7 @@ static void __init early_init_dt_check_for_initrd(unsigned long node)
                return;
        end = of_read_number(prop, len/4);
 
-       initrd_start = (unsigned long)__va(start);
-       initrd_end = (unsigned long)__va(end);
-       initrd_below_start_ok = 1;
+       __early_init_dt_declare_initrd(start, end);
 
        pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n",
                 (unsigned long long)start, (unsigned long long)end);
@@ -974,13 +982,16 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK
+#ifndef MIN_MEMBLOCK_ADDR
+#define MIN_MEMBLOCK_ADDR      __pa(PAGE_OFFSET)
+#endif
 #ifndef MAX_MEMBLOCK_ADDR
 #define MAX_MEMBLOCK_ADDR      ((phys_addr_t)~0)
 #endif
 
 void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-       const u64 phys_offset = __pa(PAGE_OFFSET);
+       const u64 phys_offset = MIN_MEMBLOCK_ADDR;
 
        if (!PAGE_ALIGNED(base)) {
                if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {
index 1cbb8338edf391bd83c4d1b0bc0dff2cbbe56e75..827e4d3bbc7a46ef59222651a8020234addc82cb 100644 (file)
@@ -70,12 +70,12 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 #endif
 
 /* Return a pointer with offset calculated */
-#define __set_fixmap_offset(idx, phys, flags)                \
-({                                                           \
-       unsigned long addr;                                   \
-       __set_fixmap(idx, phys, flags);                       \
-       addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
-       addr;                                                 \
+#define __set_fixmap_offset(idx, phys, flags)                          \
+({                                                                     \
+       unsigned long ________addr;                                     \
+       __set_fixmap(idx, phys, flags);                                 \
+       ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1));   \
+       ________addr;                                                   \
 })
 
 #define set_fixmap_offset(idx, phys) \
index 47be3ad7d3e5bad63b48a8fa344dbea65c0a97dd..333d0ca6940f0214f37b69377716d58852240a3c 100644 (file)
@@ -299,7 +299,7 @@ typedef struct {
        void *open_protocol_information;
        void *protocols_per_handle;
        void *locate_handle_buffer;
-       void *locate_protocol;
+       efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **);
        void *install_multiple_protocol_interfaces;
        void *uninstall_multiple_protocol_interfaces;
        void *calculate_crc32;
@@ -599,6 +599,10 @@ void efi_native_runtime_setup(void);
 #define EFI_PROPERTIES_TABLE_GUID \
     EFI_GUID(  0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 )
 
+#define EFI_RNG_PROTOCOL_GUID \
+       EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
+                0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+
 typedef struct {
        efi_guid_t guid;
        u64 table;
index 4cac81ec225e09af4cfd69c36d63a574a8418110..0be02ad561e9e346c01a65434873f9960472047a 100644 (file)
 #include <linux/sort.h>
 #include <asm/uaccess.h>
 
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define ex_to_insn(x)  ((x)->insn)
+#else
+static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
+{
+       return (unsigned long)&x->insn + x->insn;
+}
+#endif
+
 #ifndef ARCH_HAS_SORT_EXTABLE
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define swap_ex                NULL
+#else
+static void swap_ex(void *a, void *b, int size)
+{
+       struct exception_table_entry *x = a, *y = b, tmp;
+       int delta = b - a;
+
+       tmp = *x;
+       x->insn = y->insn + delta;
+       y->insn = tmp.insn - delta;
+
+#ifdef swap_ex_entry_fixup
+       swap_ex_entry_fixup(x, y, tmp, delta);
+#else
+       x->fixup = y->fixup + delta;
+       y->fixup = tmp.fixup - delta;
+#endif
+}
+#endif /* ARCH_HAS_RELATIVE_EXTABLE */
+
 /*
  * The exception table needs to be sorted so that the binary
  * search that we use to find entries in it works properly.
@@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b)
        const struct exception_table_entry *x = a, *y = b;
 
        /* avoid overflow */
-       if (x->insn > y->insn)
+       if (ex_to_insn(x) > ex_to_insn(y))
                return 1;
-       if (x->insn < y->insn)
+       if (ex_to_insn(x) < ex_to_insn(y))
                return -1;
        return 0;
 }
@@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start,
                  struct exception_table_entry *finish)
 {
        sort(start, finish - start, sizeof(struct exception_table_entry),
-            cmp_ex, NULL);
+            cmp_ex, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start,
 void trim_init_extable(struct module *m)
 {
        /*trim the beginning*/
-       while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+       while (m->num_exentries &&
+              within_module_init(ex_to_insn(&m->extable[0]), m)) {
                m->extable++;
                m->num_exentries--;
        }
        /*trim the end*/
        while (m->num_exentries &&
-               within_module_init(m->extable[m->num_exentries-1].insn, m))
+              within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]),
+                                 m))
                m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
@@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first,
                 * careful, the distance between value and insn
                 * can be larger than MAX_LONG:
                 */
-               if (mid->insn < value)
+               if (ex_to_insn(mid) < value)
                        first = mid + 1;
-               else if (mid->insn > value)
+               else if (ex_to_insn(mid) > value)
                        last = mid - 1;
                else
                        return mid;
-        }
-        return NULL;
+       }
+       return NULL;
 }
 #endif
index 7b29fb14f870283ae4174e34643640150f7e098b..62a1822e0f41beece4606304c4ce0412bbd5e4cb 100644 (file)
@@ -295,9 +295,9 @@ do_file(char const *const fname)
                break;
        }  /* end switch */
        if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0
-       ||  r2(&ehdr->e_type) != ET_EXEC
+       ||  (r2(&ehdr->e_type) != ET_EXEC && r2(&ehdr->e_type) != ET_DYN)
        ||  ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
-               fprintf(stderr, "unrecognized ET_EXEC file %s\n", fname);
+               fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname);
                fail_file();
        }
 
@@ -314,12 +314,12 @@ do_file(char const *const fname)
                break;
 
        case EM_S390:
+       case EM_AARCH64:
                custom_sort = sort_relative_table;
                break;
        case EM_ARCOMPACT:
        case EM_ARCV2:
        case EM_ARM:
-       case EM_AARCH64:
        case EM_MICROBLAZE:
        case EM_MIPS:
        case EM_XTENSA:
@@ -336,7 +336,7 @@ do_file(char const *const fname)
                if (r2(&ehdr->e_ehsize) != sizeof(Elf32_Ehdr)
                ||  r2(&ehdr->e_shentsize) != sizeof(Elf32_Shdr)) {
                        fprintf(stderr,
-                               "unrecognized ET_EXEC file: %s\n", fname);
+                               "unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
                        fail_file();
                }
                do32(ehdr, fname, custom_sort);
@@ -346,7 +346,7 @@ do_file(char const *const fname)
                if (r2(&ghdr->e_ehsize) != sizeof(Elf64_Ehdr)
                ||  r2(&ghdr->e_shentsize) != sizeof(Elf64_Shdr)) {
                        fprintf(stderr,
-                               "unrecognized ET_EXEC file: %s\n", fname);
+                               "unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
                        fail_file();
                }
                do64(ghdr, fname, custom_sort);