Merge tag 'kvmarm-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm...
authorPaolo Bonzini <pbonzini@redhat.com>
Mon, 20 Feb 2023 11:12:42 +0000 (06:12 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 20 Feb 2023 11:12:42 +0000 (06:12 -0500)
KVM/arm64 updates for 6.3

 - Provide a virtual cache topology to the guest to avoid
   inconsistencies with migration on heterogenous systems. Non secure
   software has no practical need to traverse the caches by set/way in
   the first place.

 - Add support for taking stage-2 access faults in parallel. This was an
   accidental omission in the original parallel faults implementation,
   but should provide a marginal improvement to machines w/o FEAT_HAFDBS
   (such as hardware from the fruit company).

 - A preamble to adding support for nested virtualization to KVM,
   including vEL2 register state, rudimentary nested exception handling
   and masking unsupported features for nested guests.

 - Fixes to the PSCI relay that avoid an unexpected host SVE trap when
   resuming a CPU when running pKVM.

 - VGIC maintenance interrupt support for the AIC

 - Improvements to the arch timer emulation, primarily aimed at reducing
   the trap overhead of running nested.

 - Add CONFIG_USERFAULTFD to the KVM selftests config fragment in the
   interest of CI systems.

 - Avoid VM-wide stop-the-world operations when a vCPU accesses its own
   redistributor.

 - Serialize when toggling CPACR_EL1.SMEN to avoid unexpected exceptions
   in the host.

 - Aesthetic and comment/kerneldoc fixes

 - Drop the vestiges of the old Columbia mailing list and add [Oliver]
   as co-maintainer

This also drags in arm64's 'for-next/sme2' branch, because both it and
the PSCI relay changes touch the EL2 initialization code.

85 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/arm64/booting.rst
Documentation/arm64/elf_hwcaps.rst
Documentation/arm64/sme.rst
MAINTAINERS
arch/arm64/include/asm/cache.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/el2_setup.h
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/fpsimdmacros.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/kvm_nested.h [new file with mode: 0644]
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/include/uapi/asm/sigcontext.h
arch/arm64/kernel/cacheinfo.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/entry-fpsimd.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/hyp-stub.S
arch/arm64/kernel/idreg-override.c
arch/arm64/kernel/process.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/signal.c
arch/arm64/kvm/Makefile
arch/arm64/kvm/arch_timer.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/emulate-nested.c [new file with mode: 0644]
arch/arm64/kvm/fpsimd.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/exception.c
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
arch/arm64/kvm/hyp/nvhe/hyp-init.S
arch/arm64/kvm/hyp/nvhe/sys_regs.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/switch.c
arch/arm64/kvm/hypercalls.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/nested.c [new file with mode: 0644]
arch/arm64/kvm/pvtime.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/sys_regs.h
arch/arm64/kvm/trace_arm.h
arch/arm64/kvm/vgic/vgic-init.c
arch/arm64/kvm/vgic/vgic-mmio.c
arch/arm64/kvm/vgic/vgic-v3.c
arch/arm64/tools/cpucaps
arch/arm64/tools/gen-sysreg.awk
arch/arm64/tools/sysreg
arch/x86/include/asm/kvm_host.h
drivers/irqchip/irq-apple-aic.c
include/linux/kvm_types.h
include/uapi/linux/elf.h
tools/testing/selftests/arm64/abi/hwcap.c
tools/testing/selftests/arm64/abi/syscall-abi-asm.S
tools/testing/selftests/arm64/abi/syscall-abi.c
tools/testing/selftests/arm64/fp/.gitignore
tools/testing/selftests/arm64/fp/Makefile
tools/testing/selftests/arm64/fp/fp-stress.c
tools/testing/selftests/arm64/fp/sme-inst.h
tools/testing/selftests/arm64/fp/zt-ptrace.c [new file with mode: 0644]
tools/testing/selftests/arm64/fp/zt-test.S [new file with mode: 0644]
tools/testing/selftests/arm64/signal/.gitignore
tools/testing/selftests/arm64/signal/test_signals.h
tools/testing/selftests/arm64/signal/test_signals_utils.c
tools/testing/selftests/arm64/signal/testcases/testcases.c
tools/testing/selftests/arm64/signal/testcases/testcases.h
tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/zt_regs.c [new file with mode: 0644]
tools/testing/selftests/kvm/aarch64/page_fault_test.c
tools/testing/selftests/kvm/config
tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c

index 6cfa6e3996cf75ee6bb1e8b399daa4d5701ab92b..b7b0704e360e4cdf362bb869ea2818857ecb142c 100644 (file)
                        protected: nVHE-based mode with support for guests whose
                                   state is kept private from the host.
 
+                       nested: VHE-based mode with support for nested
+                               virtualization. Requires at least ARMv8.3
+                               hardware.
+
                        Defaults to VHE/nVHE based on hardware support. Setting
                        mode to "protected" will disable kexec and hibernation
-                       for the host.
+                       for the host. "nested" is experimental and should be
+                       used with extreme caution.
 
        kvm-arm.vgic_v3_group0_trap=
                        [KVM,ARM] Trap guest accesses to GICv3 group-0
index 96fe10ec6c24018b655f0db36a9d662ee9f52c17..f8d0a7288c73922b1337cc69c56c8aa68f529e33 100644 (file)
@@ -369,6 +369,16 @@ Before jumping into the kernel, the following conditions must be met:
 
     - HCR_EL2.ATA (bit 56) must be initialised to 0b1.
 
+  For CPUs with the Scalable Matrix Extension version 2 (FEAT_SME2):
+
+  - If EL3 is present:
+
+    - SMCR_EL3.EZT0 (bit 30) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+    - SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented
index 6fed84f935dfed57f2227a05ce99dce56b90db6b..8a9d4bf7daf41fb3aeab4d5952cc314f6995e683 100644 (file)
@@ -284,6 +284,24 @@ HWCAP2_RPRFM
 HWCAP2_SVE2P1
     Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
 
+HWCAP2_SME2
+    Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
+
+HWCAP2_SME2P1
+    Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0010.
+
+HWCAP2_SMEI16I32
+    Functionality implied by ID_AA64SMFR0_EL1.I16I32 == 0b0101
+
+HWCAP2_SMEBI32I32
+    Functionality implied by ID_AA64SMFR0_EL1.BI32I32 == 0b1
+
+HWCAP2_SMEB16B16
+    Functionality implied by ID_AA64SMFR0_EL1.B16B16 == 0b1
+
+HWCAP2_SMEF16F16
+    Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1
+
 4. Unused AT_HWCAP bits
 -----------------------
 
index 16d2db4c2e2e9b08c5a1005dbfd1ab9d8be7c7d0..68d1efb7d171fb1153b36108fb0fa42e8cb6e86c 100644 (file)
@@ -18,14 +18,19 @@ model features for SME is included in Appendix A.
 1.  General
 -----------
 
-* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
-  register state and TPIDR2_EL0 are tracked per thread.
+* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA and (when
+  present) ZTn register state and TPIDR2_EL0 are tracked per thread.
 
 * The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
   AT_HWCAP2 entry.  Presence of this flag implies the presence of the SME
   instructions and registers, and the Linux-specific system interfaces
   described in this document.  SME is reported in /proc/cpuinfo as "sme".
 
+* The presence of SME2 is reported to userspace via HWCAP2_SME2 in the
+  aux vector AT_HWCAP2 entry.  Presence of this flag implies the presence of
+  the SME2 instructions and ZT0, and the Linux-specific system interfaces
+  described in this document.  SME2 is reported in /proc/cpuinfo as "sme2".
+
 * Support for the execution of SME instructions in userspace can also be
   detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
   instruction, and checking that the value of the SME field is nonzero. [3]
@@ -44,6 +49,7 @@ model features for SME is included in Appendix A.
        HWCAP2_SME_B16F32
        HWCAP2_SME_F32F32
        HWCAP2_SME_FA64
+        HWCAP2_SME2
 
   This list may be extended over time as the SME architecture evolves.
 
@@ -52,8 +58,8 @@ model features for SME is included in Appendix A.
   cpu-feature-registers.txt for details.
 
 * Debuggers should restrict themselves to interacting with the target via the
-  NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets.  The recommended way
-  of detecting support for these regsets is to connect to a target process
+  NT_ARM_SVE, NT_ARM_SSVE, NT_ARM_ZA and NT_ARM_ZT regsets.  The recommended
+  way of detecting support for these regsets is to connect to a target process
   first and then attempt a
 
        ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
@@ -89,13 +95,13 @@ be zeroed.
 -------------------------
 
 * On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
-  ZA matrix are preserved.
+  ZA matrix and ZTn (if present) are preserved.
 
 * On syscall PSTATE.SM will be cleared and the SVE registers will be handled
   as per the standard SVE ABI.
 
-* Neither the SVE registers nor ZA are used to pass arguments to or receive
-  results from any syscall.
+* None of the SVE registers, ZA or ZTn are used to pass arguments to
+  or receive results from any syscall.
 
 * On process creation (eg, clone()) the newly created process will have
   PSTATE.SM cleared.
@@ -134,6 +140,14 @@ be zeroed.
   __reserved[] referencing this space.  za_context is then written in the
   extra space.  Refer to [1] for further details about this mechanism.
 
+* If ZTn is supported and PSTATE.ZA==1 then a signal frame record for ZTn will
+  be generated.
+
+* The signal record for ZTn has magic ZT_MAGIC (0x5a544e01) and consists of a
+  standard signal frame header followed by a struct zt_context specifying
+  the number of ZTn registers supported by the system, then zt_context.nregs
+  blocks of 64 bytes of data per register.
+
 
 5.  Signal return
 -----------------
@@ -151,6 +165,9 @@ When returning from a signal handler:
   the signal frame does not match the current vector length, the signal return
   attempt is treated as illegal, resulting in a forced SIGSEGV.
 
+* If ZTn is not supported or PSTATE.ZA==0 then it is illegal to have a
+  signal frame record for ZTn, resulting in a forced SIGSEGV.
+
 
 6.  prctl extensions
 --------------------
@@ -214,8 +231,8 @@ prctl(PR_SME_SET_VL, unsigned long arg)
       vector length that will be applied at the next execve() by the calling
       thread.
 
-    * Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
-      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+    * Changing the vector length causes all of ZA, ZTn, P0..P15, FFR and all
+      bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
       unspecified, including both streaming and non-streaming SVE state.
       Calling PR_SME_SET_VL with vl equal to the thread's current vector
       length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
@@ -317,6 +334,15 @@ The regset data starts with struct user_za_header, containing:
 
 * The effect of writing a partial, incomplete payload is unspecified.
 
+* A new regset NT_ARM_ZT is defined for access to ZTn state via
+  PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+* The NT_ARM_ZT regset consists of a single 512 bit register.
+
+* When PSTATE.ZA==0 reads of NT_ARM_ZT will report all bits of ZTn as 0.
+
+* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
+
 
 8.  ELF coredump extensions
 ---------------------------
@@ -331,6 +357,11 @@ The regset data starts with struct user_za_header, containing:
   been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
   when the coredump was generated.
 
+* A NT_ARM_ZT note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_ZT were executed for each thread
+  when the coredump was generated.
+
 * The NT_ARM_TLS note will be extended to two registers, the second register
   will contain TPIDR2_EL0 on systems that support SME and will be read as
   zero with writes ignored otherwise.
@@ -406,6 +437,9 @@ In A64 state, SME adds the following:
   For best system performance it is strongly encouraged for software to enable
   ZA only when it is actively being used.
 
+* A new ZT0 register is introduced when SME2 is present. This is a 512 bit
+  register which is accessible when PSTATE.ZA is set, as ZA itself is.
+
 * Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
   SMSTOP instructions or by access to the SVCR system register:
 
index fb1471cb5ed3dc1b1b8582e6ba3ace5a35c767ea..233ded73ff036d3600514736ea4d95050dcf0725 100644 (file)
@@ -11362,13 +11362,12 @@ F:    virt/kvm/*
 
 KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
 M:     Marc Zyngier <maz@kernel.org>
+M:     Oliver Upton <oliver.upton@linux.dev>
 R:     James Morse <james.morse@arm.com>
 R:     Suzuki K Poulose <suzuki.poulose@arm.com>
-R:     Oliver Upton <oliver.upton@linux.dev>
 R:     Zenghui Yu <yuzenghui@huawei.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     kvmarm@lists.linux.dev
-L:     kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
 F:     arch/arm64/include/asm/kvm*
index c0b178d1bb4f06b6fc99460084c26718798df35b..a51e6e8f31711014a73ac1d1e6b634f9e0b1d951 100644 (file)
 #define CLIDR_LOC(clidr)       (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
 #define CLIDR_LOUIS(clidr)     (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
 
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level)       (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level)                (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level)      \
+       (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
+#define CLIDR_TTYPE_SHIFT(level)       (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
+
 /*
  * Memory returned by kmalloc() may be used for DMA, so we must make
  * sure that all such allocations are cache aligned. Otherwise,
index 03d1c9d7af8216d3bfd62d698fc754b76c887c88..fc2c739f48f104c8b6a659f3722d1e9704a11cb6 100644 (file)
@@ -769,6 +769,12 @@ static __always_inline bool system_supports_sme(void)
                cpus_have_const_cap(ARM64_SME);
 }
 
+static __always_inline bool system_supports_sme2(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_SME) &&
+               cpus_have_const_cap(ARM64_SME2);
+}
+
 static __always_inline bool system_supports_fa64(void)
 {
        return IS_ENABLED(CONFIG_ARM64_SME) &&
index 668569adf4d33138849ba9f93d3035a4afee0f7f..ea78c095a9c785a923c74cb2dd1324e6a4d4cfbc 100644 (file)
        __init_el2_nvhe_prepare_eret
 .endm
 
+#ifndef __KVM_NVHE_HYPERVISOR__
+// This will clobber tmp1 and tmp2, and expect tmp1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2
+       ubfx    \tmp1, \tmp1, #\fld, #\width
+       cbz     \tmp1, \fail
+
+       adr_l   \tmp1, \idreg\()_override
+       ldr     \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET]
+       ldr     \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET]
+       ubfx    \tmp2, \tmp2, #\fld, #\width
+       ubfx    \tmp1, \tmp1, #\fld, #\width
+       cmp     \tmp1, xzr
+       and     \tmp2, \tmp2, \tmp1
+       csinv   \tmp2, \tmp2, xzr, ne
+       cbnz    \tmp2, \pass
+       b       \fail
+.endm
+
+// This will clobber tmp1 and tmp2
+.macro check_override idreg, fld, pass, fail, tmp1, tmp2
+       mrs     \tmp1, \idreg\()_el1
+       __check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2
+.endm
+#else
+// This will clobber tmp
+.macro __check_override idreg, fld, width, pass, fail, tmp, ignore
+       ldr_l   \tmp, \idreg\()_el1_sys_val
+       ubfx    \tmp, \tmp, #\fld, #\width
+       cbnz    \tmp, \pass
+       b       \fail
+.endm
+
+.macro check_override idreg, fld, pass, fail, tmp, ignore
+       __check_override \idreg \fld 4 \pass \fail \tmp \ignore
+.endm
+#endif
+
+.macro finalise_el2_state
+       check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
+
+.Linit_sve_\@: /* SVE register access */
+       mrs     x0, cptr_el2                    // Disable SVE traps
+       bic     x0, x0, #CPTR_EL2_TZ
+       msr     cptr_el2, x0
+       isb
+       mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
+       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
+
+.Lskip_sve_\@:
+       check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
+
+.Linit_sme_\@: /* SME register access and priority mapping */
+       mrs     x0, cptr_el2                    // Disable SME traps
+       bic     x0, x0, #CPTR_EL2_TSM
+       msr     cptr_el2, x0
+       isb
+
+       mrs     x1, sctlr_el2
+       orr     x1, x1, #SCTLR_ELx_ENTP2        // Disable TPIDR2 traps
+       msr     sctlr_el2, x1
+       isb
+
+       mov     x0, #0                          // SMCR controls
+
+       // Full FP in SM?
+       mrs_s   x1, SYS_ID_AA64SMFR0_EL1
+       __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2
+
+.Linit_sme_fa64_\@:
+       orr     x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+       // ZT0 available?
+       mrs_s   x1, SYS_ID_AA64SMFR0_EL1
+       __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2
+.Linit_sme_zt0_\@:
+       orr     x0, x0, SMCR_ELx_EZT0_MASK
+.Lskip_sme_zt0_\@:
+
+       orr     x0, x0, #SMCR_ELx_LEN_MASK      // Enable full SME vector
+       msr_s   SYS_SMCR_EL2, x0                // length for EL1.
+
+       mrs_s   x1, SYS_SMIDR_EL1               // Priority mapping supported?
+       ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+       cbz     x1, .Lskip_sme_\@
+
+       msr_s   SYS_SMPRIMAP_EL2, xzr           // Make all priorities equal
+
+       mrs     x1, id_aa64mmfr1_el1            // HCRX_EL2 present?
+       ubfx    x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+       cbz     x1, .Lskip_sme_\@
+
+       mrs_s   x1, SYS_HCRX_EL2
+       orr     x1, x1, #HCRX_EL2_SMPME_MASK    // Enable priority mapping
+       msr_s   SYS_HCRX_EL2, x1
+.Lskip_sme_\@:
+.endm
+
 #endif /* __ARM_KVM_INIT_H__ */
index 206de10524e338c9406d57641170f374527dd178..8487aec9b6587759eb6a57a9bd8df054785bd9a8 100644 (file)
                (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >>          \
                 ESR_ELx_SYS64_ISS_OP2_SHIFT))
 
+/* ISS field definitions for ERET/ERETAA/ERETAB trapping */
+#define ESR_ELx_ERET_ISS_ERET          0x2
+#define ESR_ELx_ERET_ISS_ERETA         0x1
+
 /*
  * ISS field definitions for floating-point exception traps
  * (FP_EXC_32/FP_EXC_64).
 #define ESR_ELx_SME_ISS_ILL            1
 #define ESR_ELx_SME_ISS_SM_DISABLED    2
 #define ESR_ELx_SME_ISS_ZA_DISABLED    3
+#define ESR_ELx_SME_ISS_ZT_DISABLED    4
 
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
index e6fa1e2982c8a72782a52ba12da643108b041ce3..67f2fb781f59e8ad3fdc5dc6fe9e692a1fbd27d2 100644 (file)
@@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
 struct cpu_fp_state {
        struct user_fpsimd_state *st;
        void *sve_state;
-       void *za_state;
+       void *sme_state;
        u64 *svcr;
        unsigned int sve_vl;
        unsigned int sme_vl;
@@ -105,6 +105,13 @@ static inline void *sve_pffr(struct thread_struct *thread)
        return (char *)thread->sve_state + sve_ffr_offset(vl);
 }
 
+static inline void *thread_zt_state(struct thread_struct *thread)
+{
+       /* The ZT register state is stored immediately after the ZA state */
+       unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+       return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+}
+
 extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
                           int restore_ffr);
@@ -112,12 +119,13 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
 extern void sve_set_vq(unsigned long vq_minus_1);
 extern void sme_set_vq(unsigned long vq_minus_1);
-extern void za_save_state(void *state);
-extern void za_load_state(void const *state);
+extern void sme_save_state(void *state, int zt);
+extern void sme_load_state(void const *state, int zt);
 
 struct arm64_cpu_capabilities;
 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 
 extern u64 read_zcr_features(void);
@@ -355,14 +363,20 @@ extern int sme_get_current_vl(void);
 
 /*
  * Return how many bytes of memory are required to store the full SME
- * specific state (currently just ZA) for task, given task's currently
- * configured vector length.
+ * specific state for task, given task's currently configured vector
+ * length.
  */
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
 {
        unsigned int vl = task_get_sme_vl(task);
+       size_t size;
+
+       size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+
+       if (system_supports_sme2())
+               size += ZT_SIG_REG_SIZE;
 
-       return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+       return size;
 }
 
 #else
@@ -382,7 +396,7 @@ static inline int sme_max_virtualisable_vl(void) { return 0; }
 static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
 static inline int sme_get_current_vl(void) { return -EINVAL; }
 
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
 {
        return 0;
 }
index 5e0910cf483216774bed55087db0462bacdcaa5a..cd03819a3b686be41883f5f63ca4d74c882e9a71 100644 (file)
                | ((\offset) & 7)
 .endm
 
+/*
+ * LDR (ZT0)
+ *
+ *     LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+       _check_general_reg \nx
+       .inst   0xe11f8000      \
+                | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ *     STR ZT0, nx
+ */
+.macro _str_zt nx
+       _check_general_reg \nx
+       .inst   0xe13f8000              \
+               | (\nx << 5)
+.endm
+
 /*
  * Zero the entire ZA array
  *     ZERO ZA
index 06dd12c514e61cdc6824aa0cc55278408f2c50a7..475c803ecf42852851833e3dc6a7d90e6f1d51ad 100644 (file)
 #define KERNEL_HWCAP_CSSC              __khwcap2_feature(CSSC)
 #define KERNEL_HWCAP_RPRFM             __khwcap2_feature(RPRFM)
 #define KERNEL_HWCAP_SVE2P1            __khwcap2_feature(SVE2P1)
+#define KERNEL_HWCAP_SME2              __khwcap2_feature(SME2)
+#define KERNEL_HWCAP_SME2P1            __khwcap2_feature(SME2P1)
+#define KERNEL_HWCAP_SME_I16I32                __khwcap2_feature(SME_I16I32)
+#define KERNEL_HWCAP_SME_BI32I32       __khwcap2_feature(SME_BI32I32)
+#define KERNEL_HWCAP_SME_B16B16                __khwcap2_feature(SME_B16B16)
+#define KERNEL_HWCAP_SME_F16F16                __khwcap2_feature(SME_F16F16)
 
 /*
  * This yields a mask that user programs can use to figure out what
index 26b0c97df98636274163ab0bb6a5ea5e5c6f38bc..baef29fcbeeedc9a978a8ee2aa2346ad7382f314 100644 (file)
  * SWIO:       Turn set/way invalidates into set/way clean+invalidate
  * PTW:                Take a stage2 fault if a stage1 walk steps in device memory
  * TID3:       Trap EL1 reads of group 3 ID registers
+ * TID2:       Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
                         HCR_BSU_IS | HCR_FB | HCR_TACR | \
                         HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
-                        HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 )
+                        HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
        ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
        ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
        ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
-       ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+       ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
 
-#define CPACR_EL1_TTA          (1 << 28)
 #define CPACR_EL1_DEFAULT      (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
                                 CPACR_EL1_ZEN_EL1EN)
 
+#define kvm_mode_names                         \
+       { PSR_MODE_EL0t,        "EL0t" },       \
+       { PSR_MODE_EL1t,        "EL1t" },       \
+       { PSR_MODE_EL1h,        "EL1h" },       \
+       { PSR_MODE_EL2t,        "EL2t" },       \
+       { PSR_MODE_EL2h,        "EL2h" },       \
+       { PSR_MODE_EL3t,        "EL3t" },       \
+       { PSR_MODE_EL3h,        "EL3h" },       \
+       { PSR_AA32_MODE_USR,    "32-bit USR" }, \
+       { PSR_AA32_MODE_FIQ,    "32-bit FIQ" }, \
+       { PSR_AA32_MODE_IRQ,    "32-bit IRQ" }, \
+       { PSR_AA32_MODE_SVC,    "32-bit SVC" }, \
+       { PSR_AA32_MODE_ABT,    "32-bit ABT" }, \
+       { PSR_AA32_MODE_HYP,    "32-bit HYP" }, \
+       { PSR_AA32_MODE_UND,    "32-bit UND" }, \
+       { PSR_AA32_MODE_SYS,    "32-bit SYS" }
+
 #endif /* __ARM64_KVM_ARM_H__ */
index 193583df2d9c45761ded5a28a750e7d539d96945..b31b32ecbe2d12697dd8c686a6e72de3ee0da79f 100644 (file)
@@ -33,6 +33,12 @@ enum exception_type {
        except_type_serror      = 0x180,
 };
 
+#define kvm_exception_type_names               \
+       { except_type_sync,     "SYNC"   },     \
+       { except_type_irq,      "IRQ"    },     \
+       { except_type_fiq,      "FIQ"    },     \
+       { except_type_serror,   "SERROR" }
+
 bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
 void kvm_skip_instr32(struct kvm_vcpu *vcpu);
 
@@ -44,6 +50,10 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
 
 void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
 
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+
 #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
 static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
 {
@@ -88,10 +98,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
        if (vcpu_el1_is_32bit(vcpu))
                vcpu->arch.hcr_el2 &= ~HCR_RW;
 
-       if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
-           vcpu_el1_is_32bit(vcpu))
-               vcpu->arch.hcr_el2 |= HCR_TID2;
-
        if (kvm_has_mte(vcpu->kvm))
                vcpu->arch.hcr_el2 |= HCR_ATA;
 }
@@ -183,6 +189,62 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
                vcpu_gp_regs(vcpu)->regs[reg_num] = val;
 }
 
+static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
+{
+       switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
+       case PSR_MODE_EL2h:
+       case PSR_MODE_EL2t:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
+{
+       return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
+}
+
+static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
+{
+       return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
+}
+
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+{
+       return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt);
+}
+
+static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt)
+{
+       return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE;
+}
+
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
+{
+       return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt);
+}
+
+static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
+{
+       /*
+        * We are in a hypervisor context if the vcpu mode is EL2 or
+        * E2H and TGE bits are set. The latter means we are in the user space
+        * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
+        *
+        * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
+        * rest of the KVM code, and will result in a misbehaving guest.
+        */
+       return vcpu_is_el2_ctxt(ctxt) ||
+               (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) ||
+               __vcpu_el2_tge_is_set(ctxt);
+}
+
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
+{
+       return __is_hyp_ctxt(&vcpu->arch.ctxt);
+}
+
 /*
  * The layout of SPSR for an AArch32 state is different when observed from an
  * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
index 113e20fdbb56bed82b5db0aa2a4a10fc5f3137a6..a1892a8f603236984721b7c2f74e810f1d5bbe21 100644 (file)
 enum kvm_mode {
        KVM_MODE_DEFAULT,
        KVM_MODE_PROTECTED,
+       KVM_MODE_NV,
        KVM_MODE_NONE,
 };
+#ifdef CONFIG_KVM
 enum kvm_mode kvm_get_mode(void);
+#else
+static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
+#endif
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -252,6 +257,7 @@ struct kvm_vcpu_fault_info {
 enum vcpu_sysreg {
        __INVALID_SYSREG__,   /* 0 is reserved as an invalid value */
        MPIDR_EL1,      /* MultiProcessor Affinity Register */
+       CLIDR_EL1,      /* Cache Level ID Register */
        CSSELR_EL1,     /* Cache Size Selection Register */
        SCTLR_EL1,      /* System Control Register */
        ACTLR_EL1,      /* Auxiliary Control Register */
@@ -320,12 +326,43 @@ enum vcpu_sysreg {
        TFSR_EL1,       /* Tag Fault Status Register (EL1) */
        TFSRE0_EL1,     /* Tag Fault Status Register (EL0) */
 
-       /* 32bit specific registers. Keep them at the end of the range */
+       /* 32bit specific registers. */
        DACR32_EL2,     /* Domain Access Control Register */
        IFSR32_EL2,     /* Instruction Fault Status Register */
        FPEXC32_EL2,    /* Floating-Point Exception Control Register */
        DBGVCR32_EL2,   /* Debug Vector Catch Register */
 
+       /* EL2 registers */
+       VPIDR_EL2,      /* Virtualization Processor ID Register */
+       VMPIDR_EL2,     /* Virtualization Multiprocessor ID Register */
+       SCTLR_EL2,      /* System Control Register (EL2) */
+       ACTLR_EL2,      /* Auxiliary Control Register (EL2) */
+       HCR_EL2,        /* Hypervisor Configuration Register */
+       MDCR_EL2,       /* Monitor Debug Configuration Register (EL2) */
+       CPTR_EL2,       /* Architectural Feature Trap Register (EL2) */
+       HSTR_EL2,       /* Hypervisor System Trap Register */
+       HACR_EL2,       /* Hypervisor Auxiliary Control Register */
+       TTBR0_EL2,      /* Translation Table Base Register 0 (EL2) */
+       TTBR1_EL2,      /* Translation Table Base Register 1 (EL2) */
+       TCR_EL2,        /* Translation Control Register (EL2) */
+       VTTBR_EL2,      /* Virtualization Translation Table Base Register */
+       VTCR_EL2,       /* Virtualization Translation Control Register */
+       SPSR_EL2,       /* EL2 saved program status register */
+       ELR_EL2,        /* EL2 exception link register */
+       AFSR0_EL2,      /* Auxiliary Fault Status Register 0 (EL2) */
+       AFSR1_EL2,      /* Auxiliary Fault Status Register 1 (EL2) */
+       ESR_EL2,        /* Exception Syndrome Register (EL2) */
+       FAR_EL2,        /* Fault Address Register (EL2) */
+       HPFAR_EL2,      /* Hypervisor IPA Fault Address Register */
+       MAIR_EL2,       /* Memory Attribute Indirection Register (EL2) */
+       AMAIR_EL2,      /* Auxiliary Memory Attribute Indirection Register (EL2) */
+       VBAR_EL2,       /* Vector Base Address Register (EL2) */
+       RVBAR_EL2,      /* Reset Vector Base Address Register */
+       CONTEXTIDR_EL2, /* Context ID Register (EL2) */
+       TPIDR_EL2,      /* EL2 Software Thread ID Register */
+       CNTHCTL_EL2,    /* Counter-timer Hypervisor Control register */
+       SP_EL2,         /* EL2 Stack Pointer */
+
        NR_SYS_REGS     /* Nothing after this line! */
 };
 
@@ -501,6 +538,9 @@ struct kvm_vcpu_arch {
                u64 last_steal;
                gpa_t base;
        } steal;
+
+       /* Per-vcpu CCSIDR override or NULL */
+       u32 *ccsidr;
 };
 
 /*
@@ -598,7 +638,7 @@ struct kvm_vcpu_arch {
 #define EXCEPT_AA64_EL1_IRQ    __vcpu_except_flags(1)
 #define EXCEPT_AA64_EL1_FIQ    __vcpu_except_flags(2)
 #define EXCEPT_AA64_EL1_SERR   __vcpu_except_flags(3)
-/* For AArch64 with NV (one day): */
+/* For AArch64 with NV: */
 #define EXCEPT_AA64_EL2_SYNC   __vcpu_except_flags(4)
 #define EXCEPT_AA64_EL2_IRQ    __vcpu_except_flags(5)
 #define EXCEPT_AA64_EL2_FIQ    __vcpu_except_flags(6)
@@ -609,6 +649,8 @@ struct kvm_vcpu_arch {
 #define DEBUG_STATE_SAVE_SPE   __vcpu_single_flag(iflags, BIT(5))
 /* Save TRBE context if active  */
 #define DEBUG_STATE_SAVE_TRBE  __vcpu_single_flag(iflags, BIT(6))
+/* vcpu running in HYP context */
+#define VCPU_HYP_CONTEXT       __vcpu_single_flag(iflags, BIT(7))
 
 /* SVE enabled for host EL0 */
 #define HOST_SVE_ENABLED       __vcpu_single_flag(sflags, BIT(0))
@@ -705,7 +747,6 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
                return false;
 
        switch (reg) {
-       case CSSELR_EL1:        *val = read_sysreg_s(SYS_CSSELR_EL1);   break;
        case SCTLR_EL1:         *val = read_sysreg_s(SYS_SCTLR_EL12);   break;
        case CPACR_EL1:         *val = read_sysreg_s(SYS_CPACR_EL12);   break;
        case TTBR0_EL1:         *val = read_sysreg_s(SYS_TTBR0_EL12);   break;
@@ -750,7 +791,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
                return false;
 
        switch (reg) {
-       case CSSELR_EL1:        write_sysreg_s(val, SYS_CSSELR_EL1);    break;
        case SCTLR_EL1:         write_sysreg_s(val, SYS_SCTLR_EL12);    break;
        case CPACR_EL1:         write_sysreg_s(val, SYS_CPACR_EL12);    break;
        case TTBR0_EL1:         write_sysreg_s(val, SYS_TTBR0_EL12);    break;
@@ -916,12 +956,12 @@ void kvm_arm_vmid_clear_active(void);
 
 static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
 {
-       vcpu_arch->steal.base = GPA_INVALID;
+       vcpu_arch->steal.base = INVALID_GPA;
 }
 
 static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
 {
-       return (vcpu_arch->steal.base != GPA_INVALID);
+       return (vcpu_arch->steal.base != INVALID_GPA);
 }
 
 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
index 6797eafe7890b596ec97b65beba1a5312143a2f9..bdd9cf546d95585814bed3151a021d553aad8320 100644 (file)
@@ -122,6 +122,7 @@ extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
 
 extern unsigned long kvm_nvhe_sym(__icache_flags);
 extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
index 7f7c1231679e284530e8c370f5d73064ce2f1b6a..083cc47dca086a272cd80e04a50a206aeff9fb09 100644 (file)
@@ -115,6 +115,7 @@ alternative_cb_end
 #include <asm/cache.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
+#include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
 
 void kvm_update_va_mask(struct alt_instr *alt,
@@ -192,7 +193,15 @@ struct kvm;
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
-       return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+       u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C;
+       int reg;
+
+       if (vcpu_is_el2(vcpu))
+               reg = SCTLR_EL2;
+       else
+               reg = SCTLR_EL1;
+
+       return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits;
 }
 
 static inline void __clean_dcache_guest_page(void *va, size_t size)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
new file mode 100644 (file)
index 0000000..8fb67f0
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_NESTED_H
+#define __ARM64_KVM_NESTED_H
+
+#include <linux/kvm_host.h>
+
+static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
+{
+       return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
+               cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+               test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
+}
+
+struct sys_reg_params;
+struct sys_reg_desc;
+
+void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
+                         const struct sys_reg_desc *r);
+
+#endif /* __ARM64_KVM_NESTED_H */
index 63f81b27a4e302cede0605430ca0759d308f6f17..4cd6762bda805d16fe798a1db258155ced0d1a28 100644 (file)
@@ -71,6 +71,11 @@ static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
        return pte;
 }
 
+static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
+{
+       return __phys_to_pfn(kvm_pte_to_phys(pte));
+}
+
 static inline u64 kvm_granule_shift(u32 level)
 {
        /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
@@ -188,12 +193,15 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
  *                                     children.
  * @KVM_PGTABLE_WALK_SHARED:           Indicates the page-tables may be shared
  *                                     with other software walkers.
+ * @KVM_PGTABLE_WALK_HANDLE_FAULT:     Indicates the page-table walk was
+ *                                     invoked from a fault handler.
  */
 enum kvm_pgtable_walk_flags {
        KVM_PGTABLE_WALK_LEAF                   = BIT(0),
        KVM_PGTABLE_WALK_TABLE_PRE              = BIT(1),
        KVM_PGTABLE_WALK_TABLE_POST             = BIT(2),
        KVM_PGTABLE_WALK_SHARED                 = BIT(3),
+       KVM_PGTABLE_WALK_HANDLE_FAULT           = BIT(4),
 };
 
 struct kvm_pgtable_visit_ctx {
index d51b32a6930960b248a763eea3bb9be9ab44fdce..3918f2a679707464ee35190cbef92aa4ee8866d7 100644 (file)
@@ -161,7 +161,7 @@ struct thread_struct {
        enum fp_type            fp_type;        /* registers FPSIMD or SVE? */
        unsigned int            fpsimd_cpu;
        void                    *sve_state;     /* SVE registers, if any */
-       void                    *za_state;      /* ZA register, if any */
+       void                    *sme_state;     /* ZA and ZT state, if any */
        unsigned int            vl[ARM64_VEC_MAX];      /* vector length */
        unsigned int            vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
        unsigned long           fault_address;  /* fault info */
index 1312fb48f18b5a510358b9fe55a8e9f9888f5e92..2be7fe8c5f104106d0f7c5ccbc048438dbd644ed 100644 (file)
 
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
-#define SYS_CCSIDR_EL1                 sys_reg(3, 1, 0, 0, 0)
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 
 #define SYS_RNDR_EL0                   sys_reg(3, 3, 2, 4, 0)
 
 #define SYS_PMCCFILTR_EL0              sys_reg(3, 3, 14, 15, 7)
 
+#define SYS_VPIDR_EL2                  sys_reg(3, 4, 0, 0, 0)
+#define SYS_VMPIDR_EL2                 sys_reg(3, 4, 0, 0, 5)
+
 #define SYS_SCTLR_EL2                  sys_reg(3, 4, 1, 0, 0)
+#define SYS_ACTLR_EL2                  sys_reg(3, 4, 1, 0, 1)
+#define SYS_HCR_EL2                    sys_reg(3, 4, 1, 1, 0)
+#define SYS_MDCR_EL2                   sys_reg(3, 4, 1, 1, 1)
+#define SYS_CPTR_EL2                   sys_reg(3, 4, 1, 1, 2)
+#define SYS_HSTR_EL2                   sys_reg(3, 4, 1, 1, 3)
 #define SYS_HFGRTR_EL2                 sys_reg(3, 4, 1, 1, 4)
 #define SYS_HFGWTR_EL2                 sys_reg(3, 4, 1, 1, 5)
 #define SYS_HFGITR_EL2                 sys_reg(3, 4, 1, 1, 6)
+#define SYS_HACR_EL2                   sys_reg(3, 4, 1, 1, 7)
+
+#define SYS_TTBR0_EL2                  sys_reg(3, 4, 2, 0, 0)
+#define SYS_TTBR1_EL2                  sys_reg(3, 4, 2, 0, 1)
+#define SYS_TCR_EL2                    sys_reg(3, 4, 2, 0, 2)
+#define SYS_VTTBR_EL2                  sys_reg(3, 4, 2, 1, 0)
+#define SYS_VTCR_EL2                   sys_reg(3, 4, 2, 1, 2)
+
 #define SYS_TRFCR_EL2                  sys_reg(3, 4, 1, 2, 1)
 #define SYS_HDFGRTR_EL2                        sys_reg(3, 4, 3, 1, 4)
 #define SYS_HDFGWTR_EL2                        sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2                        sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2                   sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2                    sys_reg(3, 4, 4, 0, 1)
+#define SYS_SP_EL1                     sys_reg(3, 4, 4, 1, 0)
 #define SYS_IFSR32_EL2                 sys_reg(3, 4, 5, 0, 1)
+#define SYS_AFSR0_EL2                  sys_reg(3, 4, 5, 1, 0)
+#define SYS_AFSR1_EL2                  sys_reg(3, 4, 5, 1, 1)
 #define SYS_ESR_EL2                    sys_reg(3, 4, 5, 2, 0)
 #define SYS_VSESR_EL2                  sys_reg(3, 4, 5, 2, 3)
 #define SYS_FPEXC32_EL2                        sys_reg(3, 4, 5, 3, 0)
 #define SYS_TFSR_EL2                   sys_reg(3, 4, 5, 6, 0)
 
-#define SYS_VDISR_EL2                  sys_reg(3, 4, 12, 1,  1)
+#define SYS_FAR_EL2                    sys_reg(3, 4, 6, 0, 0)
+#define SYS_HPFAR_EL2                  sys_reg(3, 4, 6, 0, 4)
+
+#define SYS_MAIR_EL2                   sys_reg(3, 4, 10, 2, 0)
+#define SYS_AMAIR_EL2                  sys_reg(3, 4, 10, 3, 0)
+
+#define SYS_VBAR_EL2                   sys_reg(3, 4, 12, 0, 0)
+#define SYS_RVBAR_EL2                  sys_reg(3, 4, 12, 0, 1)
+#define SYS_RMR_EL2                    sys_reg(3, 4, 12, 0, 2)
+#define SYS_VDISR_EL2                  sys_reg(3, 4, 12, 1, 1)
 #define __SYS__AP0Rx_EL2(x)            sys_reg(3, 4, 12, 8, x)
 #define SYS_ICH_AP0R0_EL2              __SYS__AP0Rx_EL2(0)
 #define SYS_ICH_AP0R1_EL2              __SYS__AP0Rx_EL2(1)
 #define SYS_ICH_LR14_EL2               __SYS__LR8_EL2(6)
 #define SYS_ICH_LR15_EL2               __SYS__LR8_EL2(7)
 
+#define SYS_CONTEXTIDR_EL2             sys_reg(3, 4, 13, 0, 1)
+#define SYS_TPIDR_EL2                  sys_reg(3, 4, 13, 0, 2)
+
+#define SYS_CNTVOFF_EL2                        sys_reg(3, 4, 14, 0, 3)
+#define SYS_CNTHCTL_EL2                        sys_reg(3, 4, 14, 1, 0)
+
 /* VHE encodings for architectural EL0/1 system registers */
 #define SYS_SCTLR_EL12                 sys_reg(3, 5, 1, 0, 0)
 #define SYS_TTBR0_EL12                 sys_reg(3, 5, 2, 0, 0)
 #define SYS_CNTV_CTL_EL02              sys_reg(3, 5, 14, 3, 1)
 #define SYS_CNTV_CVAL_EL02             sys_reg(3, 5, 14, 3, 2)
 
+#define SYS_SP_EL2                     sys_reg(3, 6,  4, 1, 0)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_ENTP2        (BIT(60))
 #define SCTLR_ELx_DSSBS        (BIT(44))
index b713d30544f139211bd619680daf6fa3f452b645..69a4fb749c65d42197a7696a93756eb861caf227 100644 (file)
 #define HWCAP2_CSSC            (1UL << 34)
 #define HWCAP2_RPRFM           (1UL << 35)
 #define HWCAP2_SVE2P1          (1UL << 36)
+#define HWCAP2_SME2            (1UL << 37)
+#define HWCAP2_SME2P1          (1UL << 38)
+#define HWCAP2_SME_I16I32      (1UL << 39)
+#define HWCAP2_SME_BI32I32     (1UL << 40)
+#define HWCAP2_SME_B16B16      (1UL << 41)
+#define HWCAP2_SME_F16F16      (1UL << 42)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index a7a857f1784d80d6264eeebecbeb371c7d13ba5b..f8129c624b0709815cbd266d569bb12fb70e6047 100644 (file)
@@ -109,6 +109,7 @@ struct kvm_regs {
 #define KVM_ARM_VCPU_SVE               4 /* enable SVE for this CPU */
 #define KVM_ARM_VCPU_PTRAUTH_ADDRESS   5 /* VCPU uses address authentication */
 #define KVM_ARM_VCPU_PTRAUTH_GENERIC   6 /* VCPU uses generic authentication */
+#define KVM_ARM_VCPU_HAS_EL2           7 /* Support nested virtualization */
 
 struct kvm_vcpu_init {
        __u32 target;
index 9525041e4a1488b1cef97e2d913b39d3c2177765..46e9072985a5522b54a346c0e17b1c3dde529a11 100644 (file)
@@ -152,6 +152,14 @@ struct za_context {
        __u16 __reserved[3];
 };
 
+#define ZT_MAGIC       0x5a544e01
+
+struct zt_context {
+       struct _aarch64_ctx head;
+       __u16 nregs;
+       __u16 __reserved[3];
+};
+
 #endif /* !__ASSEMBLY__ */
 
 #include <asm/sve_context.h>
@@ -304,4 +312,15 @@ struct za_context {
 #define ZA_SIG_CONTEXT_SIZE(vq) \
                (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
 
+#define ZT_SIG_REG_SIZE 512
+
+#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8)
+
+#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
+
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
+
+#define ZT_SIG_CONTEXT_SIZE(n) \
+       (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
+
 #endif /* _UAPI__ASM_SIGCONTEXT_H */
index 97c42be71338a9ee29b79ce823592d238c5ba0fe..daa7b3f55997aafd46a58f07cfa73d2eaacf0b7a 100644 (file)
 #include <linux/of.h>
 
 #define MAX_CACHE_LEVEL                        7       /* Max 7 level supported */
-/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
-#define CLIDR_CTYPE_SHIFT(level)       (3 * (level - 1))
-#define CLIDR_CTYPE_MASK(level)                (7 << CLIDR_CTYPE_SHIFT(level))
-#define CLIDR_CTYPE(clidr, level)      \
-       (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
 
 int cache_line_size(void)
 {
index a77315b338e6113c4525ce8a24bae485ea869541..23bd2a926b7428b9a0a19a98c988a711cc080539 100644 (file)
@@ -282,16 +282,26 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
                       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
        ARM64_FTR_END,
@@ -1956,6 +1966,20 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
                write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
 }
 
+static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
+                                   int scope)
+{
+       if (kvm_get_mode() != KVM_MODE_NV)
+               return false;
+
+       if (!has_cpuid_feature(cap, scope)) {
+               pr_warn("unavailable: %s\n", cap->desc);
+               return false;
+       }
+
+       return true;
+}
+
 #ifdef CONFIG_ARM64_PAN
 static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
 {
@@ -2215,6 +2239,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = runs_at_el2,
                .cpu_enable = cpu_copy_el2regs,
        },
+       {
+               .desc = "Nested Virtualization Support",
+               .capability = ARM64_HAS_NESTED_VIRT,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .matches = has_nested_virt_support,
+               .sys_reg = SYS_ID_AA64MMFR2_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64MMFR2_EL1_NV_SHIFT,
+               .field_width = 4,
+               .min_field_value = ID_AA64MMFR2_EL1_NV_IMP,
+       },
        {
                .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
@@ -2649,6 +2684,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .cpu_enable = fa64_kernel_enable,
        },
+       {
+               .desc = "SME2",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_SME2,
+               .sys_reg = SYS_ID_AA64PFR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64PFR1_EL1_SME_SHIFT,
+               .field_width = ID_AA64PFR1_EL1_SME_WIDTH,
+               .min_field_value = ID_AA64PFR1_EL1_SME_SME2,
+               .matches = has_cpuid_feature,
+               .cpu_enable = sme2_kernel_enable,
+       },
 #endif /* CONFIG_ARM64_SME */
        {
                .desc = "WFx with timeout",
@@ -2827,11 +2874,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 #ifdef CONFIG_ARM64_SME
        HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16B16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_BI32I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
        HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
 #endif /* CONFIG_ARM64_SME */
        {},
index 379695262b77ede7e8fa568ce8d437e300582de6..85e54417d141cefa8f73131f9f3445a4657719c1 100644 (file)
@@ -119,6 +119,12 @@ static const char *const hwcap_str[] = {
        [KERNEL_HWCAP_CSSC]             = "cssc",
        [KERNEL_HWCAP_RPRFM]            = "rprfm",
        [KERNEL_HWCAP_SVE2P1]           = "sve2p1",
+       [KERNEL_HWCAP_SME2]             = "sme2",
+       [KERNEL_HWCAP_SME2P1]           = "sme2p1",
+       [KERNEL_HWCAP_SME_I16I32]       = "smei16i32",
+       [KERNEL_HWCAP_SME_BI32I32]      = "smebi32i32",
+       [KERNEL_HWCAP_SME_B16B16]       = "smeb16b16",
+       [KERNEL_HWCAP_SME_F16F16]       = "smef16f16",
 };
 
 #ifdef CONFIG_COMPAT
index 229436f33df5affb02df5552f62adbe053b72eef..6325db1a2179cf5ddfefd9ea32cb5abce2bd2c7c 100644 (file)
@@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq)
 SYM_FUNC_END(sme_set_vq)
 
 /*
- * Save the SME state
+ * Save the ZA and ZT state
  *
  * x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
  */
-SYM_FUNC_START(za_save_state)
-       _sme_rdsvl      1, 1            // x1 = VL/8
-       sme_save_za 0, x1, 12
+SYM_FUNC_START(sme_save_state)
+       _sme_rdsvl      2, 1            // x2 = VL/8
+       sme_save_za 0, x2, 12           // Leaves x0 pointing to the end of ZA
+
+       cbz     x1, 1f
+       _str_zt 0
+1:
        ret
-SYM_FUNC_END(za_save_state)
+SYM_FUNC_END(sme_save_state)
 
 /*
- * Load the SME state
+ * Load the ZA and ZT state
  *
  * x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
  */
-SYM_FUNC_START(za_load_state)
-       _sme_rdsvl      1, 1            // x1 = VL/8
-       sme_load_za 0, x1, 12
+SYM_FUNC_START(sme_load_state)
+       _sme_rdsvl      2, 1            // x2 = VL/8
+       sme_load_za 0, x2, 12           // Leaves x0 pointing to the end of ZA
+
+       cbz     x1, 1f
+       _ldr_zt 0
+1:
        ret
-SYM_FUNC_END(za_load_state)
+SYM_FUNC_END(sme_load_state)
 
 #endif /* CONFIG_ARM64_SME */
index b6ef1af0122ebde4984b504e8a14578fe6996ffb..7c67190c44e48ece5f46e0f51aad186cdfdcdb49 100644 (file)
@@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
 /*
  * TIF_SME controls whether a task can use SME without trapping while
  * in userspace, when TIF_SME is set then we must have storage
- * alocated in sve_state and za_state to store the contents of both ZA
+ * alocated in sve_state and sme_state to store the contents of both ZA
  * and the SVE registers for both streaming and non-streaming modes.
  *
  * If both SVCR.ZA and SVCR.SM are disabled then at any point we
@@ -429,7 +429,8 @@ static void task_fpsimd_load(void)
                write_sysreg_s(current->thread.svcr, SYS_SVCR);
 
                if (thread_za_enabled(&current->thread))
-                       za_load_state(current->thread.za_state);
+                       sme_load_state(current->thread.sme_state,
+                                      system_supports_sme2());
 
                if (thread_sm_enabled(&current->thread))
                        restore_ffr = system_supports_fa64();
@@ -490,7 +491,8 @@ static void fpsimd_save(void)
                *svcr = read_sysreg_s(SYS_SVCR);
 
                if (*svcr & SVCR_ZA_MASK)
-                       za_save_state(last->za_state);
+                       sme_save_state(last->sme_state,
+                                      system_supports_sme2());
 
                /* If we are in streaming mode override regular SVE. */
                if (*svcr & SVCR_SM_MASK) {
@@ -1257,30 +1259,30 @@ void fpsimd_release_task(struct task_struct *dead_task)
 #ifdef CONFIG_ARM64_SME
 
 /*
- * Ensure that task->thread.za_state is allocated and sufficiently large.
+ * Ensure that task->thread.sme_state is allocated and sufficiently large.
  *
  * This function should be used only in preparation for replacing
- * task->thread.za_state with new data.  The memory is always zeroed
+ * task->thread.sme_state with new data.  The memory is always zeroed
  * here to prevent stale data from showing through: this is done in
  * the interest of testability and predictability, the architecture
  * guarantees that when ZA is enabled it will be zeroed.
  */
 void sme_alloc(struct task_struct *task)
 {
-       if (task->thread.za_state) {
-               memset(task->thread.za_state, 0, za_state_size(task));
+       if (task->thread.sme_state) {
+               memset(task->thread.sme_state, 0, sme_state_size(task));
                return;
        }
 
        /* This could potentially be up to 64K. */
-       task->thread.za_state =
-               kzalloc(za_state_size(task), GFP_KERNEL);
+       task->thread.sme_state =
+               kzalloc(sme_state_size(task), GFP_KERNEL);
 }
 
 static void sme_free(struct task_struct *task)
 {
-       kfree(task->thread.za_state);
-       task->thread.za_state = NULL;
+       kfree(task->thread.sme_state);
+       task->thread.sme_state = NULL;
 }
 
 void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
@@ -1298,6 +1300,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
        isb();
 }
 
+/*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
+void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+       /* Allow use of ZT0 */
+       write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
+                      SYS_SMCR_EL1);
+}
+
 /*
  * This must be called after sme_kernel_enable(), we rely on the
  * feature table being sorted to ensure this.
@@ -1488,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
 
        sve_alloc(current, false);
        sme_alloc(current);
-       if (!current->thread.sve_state || !current->thread.za_state) {
+       if (!current->thread.sve_state || !current->thread.sme_state) {
                force_sig(SIGKILL);
                return;
        }
@@ -1609,7 +1622,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
 void fpsimd_flush_thread(void)
 {
        void *sve_state = NULL;
-       void *za_state = NULL;
+       void *sme_state = NULL;
 
        if (!system_supports_fpsimd())
                return;
@@ -1634,8 +1647,8 @@ void fpsimd_flush_thread(void)
                clear_thread_flag(TIF_SME);
 
                /* Defer kfree() while in atomic context */
-               za_state = current->thread.za_state;
-               current->thread.za_state = NULL;
+               sme_state = current->thread.sme_state;
+               current->thread.sme_state = NULL;
 
                fpsimd_flush_thread_vl(ARM64_VEC_SME);
                current->thread.svcr = 0;
@@ -1645,7 +1658,7 @@ void fpsimd_flush_thread(void)
 
        put_cpu_fpsimd_context();
        kfree(sve_state);
-       kfree(za_state);
+       kfree(sme_state);
 }
 
 /*
@@ -1711,7 +1724,7 @@ static void fpsimd_bind_task_to_cpu(void)
        WARN_ON(!system_supports_fpsimd());
        last->st = &current->thread.uw.fpsimd_state;
        last->sve_state = current->thread.sve_state;
-       last->za_state = current->thread.za_state;
+       last->sme_state = current->thread.sme_state;
        last->sve_vl = task_get_sve_vl(current);
        last->sme_vl = task_get_sme_vl(current);
        last->svcr = &current->thread.svcr;
index 2ee18c860f2ab61de44444c7faa5ec77de7b3ca4..9439240c3fcf37dca6e043958f2da25ecb8c0e64 100644 (file)
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
-// Warning, hardcoded register allocation
-// This will clobber x1 and x2, and expect x1 to contain
-// the id register value as read from the HW
-.macro __check_override idreg, fld, width, pass, fail
-       ubfx    x1, x1, #\fld, #\width
-       cbz     x1, \fail
-
-       adr_l   x1, \idreg\()_override
-       ldr     x2, [x1, FTR_OVR_VAL_OFFSET]
-       ldr     x1, [x1, FTR_OVR_MASK_OFFSET]
-       ubfx    x2, x2, #\fld, #\width
-       ubfx    x1, x1, #\fld, #\width
-       cmp     x1, xzr
-       and     x2, x2, x1
-       csinv   x2, x2, xzr, ne
-       cbnz    x2, \pass
-       b       \fail
-.endm
-
-.macro check_override idreg, fld, pass, fail
-       mrs     x1, \idreg\()_el1
-       __check_override \idreg \fld 4 \pass \fail
-.endm
-
        .text
        .pushsection    .hyp.text, "ax"
 
@@ -98,58 +74,7 @@ SYM_CODE_START_LOCAL(elx_sync)
 SYM_CODE_END(elx_sync)
 
 SYM_CODE_START_LOCAL(__finalise_el2)
-       check_override id_aa64pfr0 ID_AA64PFR0_EL1_SVE_SHIFT .Linit_sve .Lskip_sve
-
-.Linit_sve:    /* SVE register access */
-       mrs     x0, cptr_el2                    // Disable SVE traps
-       bic     x0, x0, #CPTR_EL2_TZ
-       msr     cptr_el2, x0
-       isb
-       mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
-       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
-
-.Lskip_sve:
-       check_override id_aa64pfr1 ID_AA64PFR1_EL1_SME_SHIFT .Linit_sme .Lskip_sme
-
-.Linit_sme:    /* SME register access and priority mapping */
-       mrs     x0, cptr_el2                    // Disable SME traps
-       bic     x0, x0, #CPTR_EL2_TSM
-       msr     cptr_el2, x0
-       isb
-
-       mrs     x1, sctlr_el2
-       orr     x1, x1, #SCTLR_ELx_ENTP2        // Disable TPIDR2 traps
-       msr     sctlr_el2, x1
-       isb
-
-       mov     x0, #0                          // SMCR controls
-
-       // Full FP in SM?
-       mrs_s   x1, SYS_ID_AA64SMFR0_EL1
-       __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64
-
-.Linit_sme_fa64:
-       orr     x0, x0, SMCR_ELx_FA64_MASK
-.Lskip_sme_fa64:
-
-       orr     x0, x0, #SMCR_ELx_LEN_MASK      // Enable full SME vector
-       msr_s   SYS_SMCR_EL2, x0                // length for EL1.
-
-       mrs_s   x1, SYS_SMIDR_EL1               // Priority mapping supported?
-       ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
-       cbz     x1, .Lskip_sme
-
-       msr_s   SYS_SMPRIMAP_EL2, xzr           // Make all priorities equal
-
-       mrs     x1, id_aa64mmfr1_el1            // HCRX_EL2 present?
-       ubfx    x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
-       cbz     x1, .Lskip_sme
-
-       mrs_s   x1, SYS_HCRX_EL2
-       orr     x1, x1, #HCRX_EL2_SMPME_MASK    // Enable priority mapping
-       msr_s   SYS_HCRX_EL2, x1
-
-.Lskip_sme:
+       finalise_el2_state
 
        // nVHE? No way! Give me the real thing!
        // Sanity check: MMU *must* be off
@@ -157,7 +82,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
        tbnz    x1, #0, 1f
 
        // Needs to be VHE capable, obviously
-       check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f
+       check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f x1 x2
 
 1:     mov_q   x0, HVC_STUB_ERR
        eret
index 95133765ed29a0e4f9c68e1fecfb9d9332034454..d833d78a7f313563f86ab58c1d238e87492f0e88 100644 (file)
@@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = {
        .name           = "id_aa64smfr0",
        .override       = &id_aa64smfr0_override,
        .fields         = {
+               FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
                /* FA64 is a one bit field... :-/ */
                { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
                {}
index 269ac1c25ae2750d1e7eafd0333907db370ae1cb..71d59b5abede11aa0471f6cbd0cb8edb88575db0 100644 (file)
@@ -307,27 +307,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        /*
         * In the unlikely event that we create a new thread with ZA
-        * enabled we should retain the ZA state so duplicate it here.
-        * This may be shortly freed if we exec() or if CLONE_SETTLS
-        * but it's simpler to do it here. To avoid confusing the rest
-        * of the code ensure that we have a sve_state allocated
-        * whenever za_state is allocated.
+        * enabled we should retain the ZA and ZT state so duplicate
+        * it here.  This may be shortly freed if we exec() or if
+        * CLONE_SETTLS but it's simpler to do it here. To avoid
+        * confusing the rest of the code ensure that we have a
+        * sve_state allocated whenever sme_state is allocated.
         */
        if (thread_za_enabled(&src->thread)) {
                dst->thread.sve_state = kzalloc(sve_state_size(src),
                                                GFP_KERNEL);
                if (!dst->thread.sve_state)
                        return -ENOMEM;
-               dst->thread.za_state = kmemdup(src->thread.za_state,
-                                              za_state_size(src),
-                                              GFP_KERNEL);
-               if (!dst->thread.za_state) {
+
+               dst->thread.sme_state = kmemdup(src->thread.sme_state,
+                                               sme_state_size(src),
+                                               GFP_KERNEL);
+               if (!dst->thread.sme_state) {
                        kfree(dst->thread.sve_state);
                        dst->thread.sve_state = NULL;
                        return -ENOMEM;
                }
        } else {
-               dst->thread.za_state = NULL;
+               dst->thread.sme_state = NULL;
                clear_tsk_thread_flag(dst, TIF_SME);
        }
 
index 0c321ad23cd3a48d60a52f3a0a1209c4b7218ac2..89b87f1021edbe6d82669e7c3da74c2ffe50cf8d 100644 (file)
@@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target,
        if (thread_za_enabled(&target->thread)) {
                start = end;
                end = ZA_PT_SIZE(vq);
-               membuf_write(&to, target->thread.za_state, end - start);
+               membuf_write(&to, target->thread.sme_state, end - start);
        }
 
        /* Zero any trailing padding */
@@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target,
 
        /* Allocate/reinit ZA storage */
        sme_alloc(target);
-       if (!target->thread.za_state) {
+       if (!target->thread.sme_state) {
                ret = -ENOMEM;
                goto out;
        }
@@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target,
        start = ZA_PT_ZA_OFFSET;
        end = ZA_PT_SIZE(vq);
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                target->thread.za_state,
+                                target->thread.sme_state,
                                 start, end);
        if (ret)
                goto out;
@@ -1138,6 +1138,51 @@ out:
        return ret;
 }
 
+static int zt_get(struct task_struct *target,
+                 const struct user_regset *regset,
+                 struct membuf to)
+{
+       if (!system_supports_sme2())
+               return -EINVAL;
+
+       /*
+        * If PSTATE.ZA is not set then ZT will be zeroed when it is
+        * enabled so report the current register value as zero.
+        */
+       if (thread_za_enabled(&target->thread))
+               membuf_write(&to, thread_zt_state(&target->thread),
+                            ZT_SIG_REG_BYTES);
+       else
+               membuf_zero(&to, ZT_SIG_REG_BYTES);
+
+       return 0;
+}
+
+static int zt_set(struct task_struct *target,
+                 const struct user_regset *regset,
+                 unsigned int pos, unsigned int count,
+                 const void *kbuf, const void __user *ubuf)
+{
+       int ret;
+
+       if (!system_supports_sme2())
+               return -EINVAL;
+
+       if (!thread_za_enabled(&target->thread)) {
+               sme_alloc(target);
+               if (!target->thread.sme_state)
+                       return -ENOMEM;
+       }
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                thread_zt_state(&target->thread),
+                                0, ZT_SIG_REG_BYTES);
+       if (ret == 0)
+               target->thread.svcr |= SVCR_ZA_MASK;
+
+       return ret;
+}
+
 #endif /* CONFIG_ARM64_SME */
 
 #ifdef CONFIG_ARM64_PTR_AUTH
@@ -1360,6 +1405,7 @@ enum aarch64_regset {
 #ifdef CONFIG_ARM64_SME
        REGSET_SSVE,
        REGSET_ZA,
+       REGSET_ZT,
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
        REGSET_PAC_MASK,
@@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = {
                .regset_get = za_get,
                .set = za_set,
        },
+       [REGSET_ZT] = { /* SME ZT */
+               .core_note_type = NT_ARM_ZT,
+               .n = 1,
+               .size = ZT_SIG_REG_BYTES,
+               .align = sizeof(u64),
+               .regset_get = zt_get,
+               .set = zt_set,
+       },
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
        [REGSET_PAC_MASK] = {
index be279fd482480de2a3b2dfff80a37300907555d8..14779619375bcce3ddab16edc6f70a3b33726203 100644 (file)
@@ -57,6 +57,7 @@ struct rt_sigframe_user_layout {
        unsigned long esr_offset;
        unsigned long sve_offset;
        unsigned long za_offset;
+       unsigned long zt_offset;
        unsigned long extra_offset;
        unsigned long end_offset;
 };
@@ -221,6 +222,7 @@ struct user_ctxs {
        struct fpsimd_context __user *fpsimd;
        struct sve_context __user *sve;
        struct za_context __user *za;
+       struct zt_context __user *zt;
 };
 
 #ifdef CONFIG_ARM64_SVE
@@ -394,7 +396,7 @@ static int preserve_za_context(struct za_context __user *ctx)
                 * fpsimd_signal_preserve_current_state().
                 */
                err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
-                                     current->thread.za_state,
+                                     current->thread.sme_state,
                                      ZA_SIG_REGS_SIZE(vq));
        }
 
@@ -425,7 +427,7 @@ static int restore_za_context(struct user_ctxs *user)
 
        /*
         * Careful: we are about __copy_from_user() directly into
-        * thread.za_state with preemption enabled, so protection is
+        * thread.sme_state with preemption enabled, so protection is
         * needed to prevent a racing context switch from writing stale
         * registers back over the new data.
         */
@@ -434,13 +436,13 @@ static int restore_za_context(struct user_ctxs *user)
        /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
 
        sme_alloc(current);
-       if (!current->thread.za_state) {
+       if (!current->thread.sme_state) {
                current->thread.svcr &= ~SVCR_ZA_MASK;
                clear_thread_flag(TIF_SME);
                return -ENOMEM;
        }
 
-       err = __copy_from_user(current->thread.za_state,
+       err = __copy_from_user(current->thread.sme_state,
                               (char __user const *)user->za +
                                        ZA_SIG_REGS_OFFSET,
                               ZA_SIG_REGS_SIZE(vq));
@@ -452,11 +454,81 @@ static int restore_za_context(struct user_ctxs *user)
 
        return 0;
 }
+
+static int preserve_zt_context(struct zt_context __user *ctx)
+{
+       int err = 0;
+       u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+
+       if (WARN_ON(!thread_za_enabled(&current->thread)))
+               return -EINVAL;
+
+       memset(reserved, 0, sizeof(reserved));
+
+       __put_user_error(ZT_MAGIC, &ctx->head.magic, err);
+       __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
+                        &ctx->head.size, err);
+       __put_user_error(1, &ctx->nregs, err);
+       BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+       err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+       /*
+        * This assumes that the ZT state has already been saved to
+        * the task struct by calling the function
+        * fpsimd_signal_preserve_current_state().
+        */
+       err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
+                             thread_zt_state(&current->thread),
+                             ZT_SIG_REGS_SIZE(1));
+
+       return err ? -EFAULT : 0;
+}
+
+static int restore_zt_context(struct user_ctxs *user)
+{
+       int err;
+       struct zt_context zt;
+
+       /* ZA must be restored first for this check to be valid */
+       if (!thread_za_enabled(&current->thread))
+               return -EINVAL;
+
+       if (__copy_from_user(&zt, user->zt, sizeof(zt)))
+               return -EFAULT;
+
+       if (zt.nregs != 1)
+               return -EINVAL;
+
+       if (zt.head.size != ZT_SIG_CONTEXT_SIZE(zt.nregs))
+               return -EINVAL;
+
+       /*
+        * Careful: we are about __copy_from_user() directly into
+        * thread.zt_state with preemption enabled, so protection is
+        * needed to prevent a racing context switch from writing stale
+        * registers back over the new data.
+        */
+
+       fpsimd_flush_task_state(current);
+       /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
+
+       err = __copy_from_user(thread_zt_state(&current->thread),
+                              (char __user const *)user->zt +
+                                       ZT_SIG_REGS_OFFSET,
+                              ZT_SIG_REGS_SIZE(1));
+       if (err)
+               return -EFAULT;
+
+       return 0;
+}
+
 #else /* ! CONFIG_ARM64_SME */
 
 /* Turn any non-optimised out attempts to use these into a link error: */
 extern int preserve_za_context(void __user *ctx);
 extern int restore_za_context(struct user_ctxs *user);
+extern int preserve_zt_context(void __user *ctx);
+extern int restore_zt_context(struct user_ctxs *user);
 
 #endif /* ! CONFIG_ARM64_SME */
 
@@ -474,6 +546,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
        user->fpsimd = NULL;
        user->sve = NULL;
        user->za = NULL;
+       user->zt = NULL;
 
        if (!IS_ALIGNED((unsigned long)base, 16))
                goto invalid;
@@ -552,6 +625,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
                        user->za = (struct za_context __user *)head;
                        break;
 
+               case ZT_MAGIC:
+                       if (!system_supports_sme2())
+                               goto invalid;
+
+                       if (user->zt)
+                               goto invalid;
+
+                       if (size < sizeof(*user->zt))
+                               goto invalid;
+
+                       user->zt = (struct zt_context __user *)head;
+                       break;
+
                case EXTRA_MAGIC:
                        if (have_extra_context)
                                goto invalid;
@@ -674,6 +760,9 @@ static int restore_sigframe(struct pt_regs *regs,
        if (err == 0 && system_supports_sme() && user.za)
                err = restore_za_context(&user);
 
+       if (err == 0 && system_supports_sme2() && user.zt)
+               err = restore_zt_context(&user);
+
        return err;
 }
 
@@ -774,6 +863,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
                        return err;
        }
 
+       if (system_supports_sme2()) {
+               if (add_all || thread_za_enabled(&current->thread)) {
+                       err = sigframe_alloc(user, &user->zt_offset,
+                                            ZT_SIG_CONTEXT_SIZE(1));
+                       if (err)
+                               return err;
+               }
+       }
+
        return sigframe_alloc_end(user);
 }
 
@@ -829,6 +927,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
                err |= preserve_za_context(za_ctx);
        }
 
+       /* ZT state if present */
+       if (system_supports_sme2() && err == 0 && user->zt_offset) {
+               struct zt_context __user *zt_ctx =
+                       apply_user_offset(user, user->zt_offset);
+               err |= preserve_zt_context(zt_ctx);
+       }
+
        if (err == 0 && user->extra_offset) {
                char __user *sfp = (char __user *)user->sigframe;
                char __user *userp =
index 5e33c2d4645a5d5f2da5beb78dfaec33c35ad166..c0c050e53157d9908c91fd781aa1b5d3271e4092 100644 (file)
@@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
         inject_fault.o va_layout.o handle_exit.o \
         guest.o debug.o reset.o sys_regs.o stacktrace.o \
         vgic-sys-reg-v3.o fpsimd.o pkvm.o \
-        arch_timer.o trng.o vmid.o \
+        arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
         vgic/vgic.o vgic/vgic-init.o \
         vgic/vgic-irqfd.o vgic/vgic-v2.o \
         vgic/vgic-v3.o vgic/vgic-v4.o \
index 23346585a29429d8dd1ab92b40d618460b738c1b..00610477ec7bd8da4a7ccbd5a764851145989490 100644 (file)
@@ -428,14 +428,17 @@ static void timer_emulate(struct arch_timer_context *ctx)
         * scheduled for the future.  If the timer cannot fire at all,
         * then we also don't need a soft timer.
         */
-       if (!kvm_timer_irq_can_fire(ctx)) {
-               soft_timer_cancel(&ctx->hrtimer);
+       if (should_fire || !kvm_timer_irq_can_fire(ctx))
                return;
-       }
 
        soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 }
 
+static void set_cntvoff(u64 cntvoff)
+{
+       kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
+}
+
 static void timer_save_state(struct arch_timer_context *ctx)
 {
        struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@@ -459,6 +462,22 @@ static void timer_save_state(struct arch_timer_context *ctx)
                write_sysreg_el0(0, SYS_CNTV_CTL);
                isb();
 
+               /*
+                * The kernel may decide to run userspace after
+                * calling vcpu_put, so we reset cntvoff to 0 to
+                * ensure a consistent read between user accesses to
+                * the virtual counter and kernel access to the
+                * physical counter of non-VHE case.
+                *
+                * For VHE, the virtual counter uses a fixed virtual
+                * offset of zero, so no need to zero CNTVOFF_EL2
+                * register, but this is actually useful when switching
+                * between EL1/vEL2 with NV.
+                *
+                * Do it unconditionally, as this is either unavoidable
+                * or dirt cheap.
+                */
+               set_cntvoff(0);
                break;
        case TIMER_PTIMER:
                timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
@@ -532,6 +551,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 
        switch (index) {
        case TIMER_VTIMER:
+               set_cntvoff(timer_get_offset(ctx));
                write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
                isb();
                write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
@@ -552,11 +572,6 @@ out:
        local_irq_restore(flags);
 }
 
-static void set_cntvoff(u64 cntvoff)
-{
-       kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
-}
-
 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
 {
        int r;
@@ -631,8 +646,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
                kvm_timer_vcpu_load_nogic(vcpu);
        }
 
-       set_cntvoff(timer_get_offset(map.direct_vtimer));
-
        kvm_timer_unblocking(vcpu);
 
        timer_restore_state(map.direct_vtimer);
@@ -688,15 +701,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 
        if (kvm_vcpu_is_blocking(vcpu))
                kvm_timer_blocking(vcpu);
-
-       /*
-        * The kernel may decide to run userspace after calling vcpu_put, so
-        * we reset cntvoff to 0 to ensure a consistent read between user
-        * accesses to the virtual counter and kernel access to the physical
-        * counter of non-VHE case. For VHE, the virtual counter uses a fixed
-        * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
-        */
-       set_cntvoff(0);
 }
 
 /*
@@ -934,14 +938,22 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
                              enum kvm_arch_timers tmr,
                              enum kvm_arch_timer_regs treg)
 {
+       struct arch_timer_context *timer;
+       struct timer_map map;
        u64 val;
 
+       get_timer_map(vcpu, &map);
+       timer = vcpu_get_timer(vcpu, tmr);
+
+       if (timer == map.emul_ptimer)
+               return kvm_arm_timer_read(vcpu, timer, treg);
+
        preempt_disable();
-       kvm_timer_vcpu_put(vcpu);
+       timer_save_state(timer);
 
-       val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
+       val = kvm_arm_timer_read(vcpu, timer, treg);
 
-       kvm_timer_vcpu_load(vcpu);
+       timer_restore_state(timer);
        preempt_enable();
 
        return val;
@@ -975,13 +987,22 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
                                enum kvm_arch_timer_regs treg,
                                u64 val)
 {
-       preempt_disable();
-       kvm_timer_vcpu_put(vcpu);
-
-       kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
+       struct arch_timer_context *timer;
+       struct timer_map map;
 
-       kvm_timer_vcpu_load(vcpu);
-       preempt_enable();
+       get_timer_map(vcpu, &map);
+       timer = vcpu_get_timer(vcpu, tmr);
+       if (timer == map.emul_ptimer) {
+               soft_timer_cancel(&timer->hrtimer);
+               kvm_arm_timer_write(vcpu, timer, treg, val);
+               timer_emulate(timer);
+       } else {
+               preempt_disable();
+               timer_save_state(timer);
+               kvm_arm_timer_write(vcpu, timer, treg, val);
+               timer_restore_state(timer);
+               preempt_enable();
+       }
 }
 
 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
index 698787ed87e92e3411c2969949d1b288871ab2d8..3bd732eaf08725509f7f0991cb359d1c1c7672cc 100644 (file)
@@ -136,7 +136,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        if (ret)
                goto err_unshare_kvm;
 
-       if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) {
                ret = -ENOMEM;
                goto err_unshare_kvm;
        }
@@ -1899,6 +1899,7 @@ static void kvm_hyp_init_symbols(void)
        kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
        kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
        kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
+       kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
        kvm_nvhe_sym(__icache_flags) = __icache_flags;
        kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
 }
@@ -1921,9 +1922,7 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
        return 0;
 }
 
-/**
- * Inits Hyp-mode on all online CPUs
- */
+/* Inits Hyp-mode on all online CPUs */
 static int __init init_hyp_mode(void)
 {
        u32 hyp_va_bits;
@@ -2199,9 +2198,7 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
        kvm_arm_resume_guest(irqfd->kvm);
 }
 
-/**
- * Initialize Hyp-mode and memory mappings on all CPUs.
- */
+/* Initialize Hyp-mode and memory mappings on all CPUs */
 static __init int kvm_arm_init(void)
 {
        int err;
@@ -2325,6 +2322,11 @@ static int __init early_kvm_mode_cfg(char *arg)
                return 0;
        }
 
+       if (strcmp(arg, "nested") == 0 && !WARN_ON(!is_kernel_in_hyp_mode())) {
+               kvm_mode = KVM_MODE_NV;
+               return 0;
+       }
+
        return -EINVAL;
 }
 early_param("kvm-arm.mode", early_kvm_mode_cfg);
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
new file mode 100644 (file)
index 0000000..b966620
--- /dev/null
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016 - Linaro and Columbia University
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+
+#include "hyp/include/hyp/adjust_pc.h"
+
+#include "trace.h"
+
+static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
+{
+       u64 mode = spsr & PSR_MODE_MASK;
+
+       /*
+        * Possible causes for an Illegal Exception Return from EL2:
+        * - trying to return to EL3
+        * - trying to return to an illegal M value
+        * - trying to return to a 32bit EL
+        * - trying to return to EL1 with HCR_EL2.TGE set
+        */
+       if (mode == PSR_MODE_EL3t   || mode == PSR_MODE_EL3h ||
+           mode == 0b00001         || (mode & BIT(1))       ||
+           (spsr & PSR_MODE32_BIT) ||
+           (vcpu_el2_tge_is_set(vcpu) && (mode == PSR_MODE_EL1t ||
+                                          mode == PSR_MODE_EL1h))) {
+               /*
+                * The guest is playing with our nerves. Preserve EL, SP,
+                * masks, flags from the existing PSTATE, and set IL.
+                * The HW will then generate an Illegal State Exception
+                * immediately after ERET.
+                */
+               spsr = *vcpu_cpsr(vcpu);
+
+               spsr &= (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT |
+                        PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT |
+                        PSR_MODE_MASK | PSR_MODE32_BIT);
+               spsr |= PSR_IL_BIT;
+       }
+
+       return spsr;
+}
+
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
+{
+       u64 spsr, elr, mode;
+       bool direct_eret;
+
+       /*
+        * Going through the whole put/load motions is a waste of time
+        * if this is a VHE guest hypervisor returning to its own
+        * userspace, or the hypervisor performing a local exception
+        * return. No need to save/restore registers, no need to
+        * switch S2 MMU. Just do the canonical ERET.
+        */
+       spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
+       spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+
+       mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+       direct_eret  = (mode == PSR_MODE_EL0t &&
+                       vcpu_el2_e2h_is_set(vcpu) &&
+                       vcpu_el2_tge_is_set(vcpu));
+       direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
+
+       if (direct_eret) {
+               *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
+               *vcpu_cpsr(vcpu) = spsr;
+               trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
+               return;
+       }
+
+       preempt_disable();
+       kvm_arch_vcpu_put(vcpu);
+
+       elr = __vcpu_sys_reg(vcpu, ELR_EL2);
+
+       trace_kvm_nested_eret(vcpu, elr, spsr);
+
+       /*
+        * Note that the current exception level is always the virtual EL2,
+        * since we set HCR_EL2.NV bit only when entering the virtual EL2.
+        */
+       *vcpu_pc(vcpu) = elr;
+       *vcpu_cpsr(vcpu) = spsr;
+
+       kvm_arch_vcpu_load(vcpu, smp_processor_id());
+       preempt_enable();
+}
+
+static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
+                                    enum exception_type type)
+{
+       trace_kvm_inject_nested_exception(vcpu, esr_el2, type);
+
+       switch (type) {
+       case except_type_sync:
+               kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+               vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2);
+               break;
+       case except_type_irq:
+               kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
+               break;
+       default:
+               WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
+       }
+}
+
+/*
+ * Emulate taking an exception to EL2.
+ * See ARM ARM J8.1.2 AArch64.TakeException()
+ */
+static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
+                            enum exception_type type)
+{
+       u64 pstate, mode;
+       bool direct_inject;
+
+       if (!vcpu_has_nv(vcpu)) {
+               kvm_err("Unexpected call to %s for the non-nesting configuration\n",
+                               __func__);
+               return -EINVAL;
+       }
+
+       /*
+        * As for ERET, we can avoid doing too much on the injection path by
+        * checking that we either took the exception from a VHE host
+        * userspace or from vEL2. In these cases, there is no change in
+        * translation regime (or anything else), so let's do as little as
+        * possible.
+        */
+       pstate = *vcpu_cpsr(vcpu);
+       mode = pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+       direct_inject  = (mode == PSR_MODE_EL0t &&
+                         vcpu_el2_e2h_is_set(vcpu) &&
+                         vcpu_el2_tge_is_set(vcpu));
+       direct_inject |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
+
+       if (direct_inject) {
+               kvm_inject_el2_exception(vcpu, esr_el2, type);
+               return 1;
+       }
+
+       preempt_disable();
+
+       /*
+        * We may have an exception or PC update in the EL0/EL1 context.
+        * Commit it before entering EL2.
+        */
+       __kvm_adjust_pc(vcpu);
+
+       kvm_arch_vcpu_put(vcpu);
+
+       kvm_inject_el2_exception(vcpu, esr_el2, type);
+
+       /*
+        * A hard requirement is that a switch between EL1 and EL2
+        * contexts has to happen between a put/load, so that we can
+        * pick the correct timer and interrupt configuration, among
+        * other things.
+        *
+        * Make sure the exception actually took place before we load
+        * the new context.
+        */
+       __kvm_adjust_pc(vcpu);
+
+       kvm_arch_vcpu_load(vcpu, smp_processor_id());
+       preempt_enable();
+
+       return 1;
+}
+
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+       return kvm_inject_nested(vcpu, esr_el2, except_type_sync);
+}
+
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Do not inject an irq if the:
+        *  - Current exception level is EL2, and
+        *  - virtual HCR_EL2.TGE == 0
+        *  - virtual HCR_EL2.IMO == 0
+        *
+        * See Table D1-17 "Physical interrupt target and masking when EL3 is
+        * not implemented and EL2 is implemented" in ARM DDI 0487C.a.
+        */
+
+       if (vcpu_is_el2(vcpu) && !vcpu_el2_tge_is_set(vcpu) &&
+           !(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO))
+               return 1;
+
+       /* esr_el2 value doesn't matter for exits due to irqs. */
+       return kvm_inject_nested(vcpu, 0, except_type_irq);
+}
index 02dd7e9ebd391fe13e7b367c74b707812534a609..1279949599b5fc84948f4061d6571537883c54e9 100644 (file)
@@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
                fp_state.st = &vcpu->arch.ctxt.fp_regs;
                fp_state.sve_state = vcpu->arch.sve_state;
                fp_state.sve_vl = vcpu->arch.sve_max_vl;
-               fp_state.za_state = NULL;
+               fp_state.sme_state = NULL;
                fp_state.svcr = &vcpu->arch.svcr;
                fp_state.fp_type = &vcpu->arch.fp_type;
 
@@ -184,6 +184,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
                        sysreg_clear_set(CPACR_EL1,
                                         CPACR_EL1_SMEN_EL0EN,
                                         CPACR_EL1_SMEN_EL1EN);
+               isb();
        }
 
        if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
index cf4c495a4321332b7bc5a1979fc84d3084da355e..07444fa2288887ec6f8455045fada76d02e77862 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/fpsimd.h>
 #include <asm/kvm.h>
 #include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
 #include <asm/sigcontext.h>
 
 #include "trace.h"
@@ -253,6 +254,11 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
                        if (!vcpu_el1_is_32bit(vcpu))
                                return -EINVAL;
                        break;
+               case PSR_MODE_EL2h:
+               case PSR_MODE_EL2t:
+                       if (!vcpu_has_nv(vcpu))
+                               return -EINVAL;
+                       fallthrough;
                case PSR_MODE_EL0t:
                case PSR_MODE_EL1t:
                case PSR_MODE_EL1h:
index e778eefcf214d8876f14eefd8bf8b881ddf96e09..a798c0b4d7177020ee9ed28ecc2ee345e565ee2e 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/debug-monitors.h>
 #include <asm/stacktrace/nvhe.h>
 #include <asm/traps.h>
@@ -41,6 +42,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
                            kvm_vcpu_hvc_get_imm(vcpu));
        vcpu->stat.hvc_exit_stat++;
 
+       /* Forward hvc instructions to the virtual EL2 if the guest has EL2. */
+       if (vcpu_has_nv(vcpu)) {
+               if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD)
+                       kvm_inject_undefined(vcpu);
+               else
+                       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+               return 1;
+       }
+
        ret = kvm_hvc_call_handler(vcpu);
        if (ret < 0) {
                vcpu_set_reg(vcpu, 0, ~0UL);
@@ -52,6 +63,8 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 
 static int handle_smc(struct kvm_vcpu *vcpu)
 {
+       int ret;
+
        /*
         * "If an SMC instruction executed at Non-secure EL1 is
         * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
@@ -59,10 +72,30 @@ static int handle_smc(struct kvm_vcpu *vcpu)
         *
         * We need to advance the PC after the trap, as it would
         * otherwise return to the same address...
+        *
+        * Only handle SMCs from the virtual EL2 with an immediate of zero and
+        * skip it otherwise.
         */
-       vcpu_set_reg(vcpu, 0, ~0UL);
+       if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
+               vcpu_set_reg(vcpu, 0, ~0UL);
+               kvm_incr_pc(vcpu);
+               return 1;
+       }
+
+       /*
+        * If imm is zero then it is likely an SMCCC call.
+        *
+        * Note that on ARMv8.3, even if EL3 is not implemented, SMC executed
+        * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
+        * being treated as UNDEFINED.
+        */
+       ret = kvm_hvc_call_handler(vcpu);
+       if (ret < 0)
+               vcpu_set_reg(vcpu, 0, ~0UL);
+
        kvm_incr_pc(vcpu);
-       return 1;
+
+       return ret;
 }
 
 /*
@@ -196,6 +229,15 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int kvm_handle_eret(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
+               return kvm_handle_ptrauth(vcpu);
+
+       kvm_emulate_nested_eret(vcpu);
+       return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
        [0 ... ESR_ELx_EC_MAX]  = kvm_handle_unknown_ec,
        [ESR_ELx_EC_WFx]        = kvm_handle_wfx,
@@ -211,6 +253,7 @@ static exit_handle_fn arm_exit_handlers[] = {
        [ESR_ELx_EC_SMC64]      = handle_smc,
        [ESR_ELx_EC_SYS64]      = kvm_handle_sys_reg,
        [ESR_ELx_EC_SVE]        = handle_sve,
+       [ESR_ELx_EC_ERET]       = kvm_handle_eret,
        [ESR_ELx_EC_IABT_LOW]   = kvm_handle_guest_abort,
        [ESR_ELx_EC_DABT_LOW]   = kvm_handle_guest_abort,
        [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
index 791d3de767713c8ad6f48d81b1e371fdbcab9063..424a5107cddb5e1cdd75ef3581adef03aaadabb7 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 
 #if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
 #error Hypervisor code only!
@@ -23,7 +24,9 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
 {
        u64 val;
 
-       if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+       if (unlikely(vcpu_has_nv(vcpu)))
+               return vcpu_read_sys_reg(vcpu, reg);
+       else if (__vcpu_read_sys_reg_from_cpu(reg, &val))
                return val;
 
        return __vcpu_sys_reg(vcpu, reg);
@@ -31,18 +34,25 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
 
 static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
 {
-       if (__vcpu_write_sys_reg_to_cpu(val, reg))
-               return;
-
-        __vcpu_sys_reg(vcpu, reg) = val;
+       if (unlikely(vcpu_has_nv(vcpu)))
+               vcpu_write_sys_reg(vcpu, val, reg);
+       else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
+               __vcpu_sys_reg(vcpu, reg) = val;
 }
 
-static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
+static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
+                             u64 val)
 {
-       if (has_vhe())
+       if (unlikely(vcpu_has_nv(vcpu))) {
+               if (target_mode == PSR_MODE_EL1h)
+                       vcpu_write_sys_reg(vcpu, val, SPSR_EL1);
+               else
+                       vcpu_write_sys_reg(vcpu, val, SPSR_EL2);
+       } else if (has_vhe()) {
                write_sysreg_el1(val, SYS_SPSR);
-       else
+       } else {
                __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
+       }
 }
 
 static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
@@ -101,6 +111,11 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
                sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
                __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
                break;
+       case PSR_MODE_EL2h:
+               vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL2);
+               sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+               __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL2);
+               break;
        default:
                /* Don't do that */
                BUG();
@@ -153,7 +168,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
        new |= target_mode;
 
        *vcpu_cpsr(vcpu) = new;
-       __vcpu_write_spsr(vcpu, old);
+       __vcpu_write_spsr(vcpu, target_mode, old);
 }
 
 /*
@@ -323,11 +338,20 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
                case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
                        enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
                        break;
+
+               case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
+                       enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync);
+                       break;
+
+               case unpack_vcpu_flag(EXCEPT_AA64_EL2_IRQ):
+                       enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
+                       break;
+
                default:
                        /*
-                        * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ}
-                        * will be implemented at some point. Everything
-                        * else gets silently ignored.
+                        * Only EL1_SYNC and EL2_{SYNC,IRQ} makes
+                        * sense so far. Everything else gets silently
+                        * ignored.
                         */
                        break;
                }
index baa5b9b3dde58ac46bfdf56dde0de4db38996460..699ea1f8d409c7de996906a6522a41da08475dc8 100644 (file)
@@ -39,7 +39,6 @@ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
 
 static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
-       ctxt_sys_reg(ctxt, CSSELR_EL1)  = read_sysreg(csselr_el1);
        ctxt_sys_reg(ctxt, SCTLR_EL1)   = read_sysreg_el1(SYS_SCTLR);
        ctxt_sys_reg(ctxt, CPACR_EL1)   = read_sysreg_el1(SYS_CPACR);
        ctxt_sys_reg(ctxt, TTBR0_EL1)   = read_sysreg_el1(SYS_TTBR0);
@@ -95,7 +94,6 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
 static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 {
        write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1),     vmpidr_el2);
-       write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1),    csselr_el1);
 
        if (has_vhe() ||
            !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -156,9 +154,26 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
        write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1),  SYS_SPSR);
 }
 
+/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
+static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt)
+{
+       u64 mode = ctxt->regs.pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+       switch (mode) {
+       case PSR_MODE_EL2t:
+               mode = PSR_MODE_EL1t;
+               break;
+       case PSR_MODE_EL2h:
+               mode = PSR_MODE_EL1h;
+               break;
+       }
+
+       return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+}
+
 static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
 {
-       u64 pstate = ctxt->regs.pstate;
+       u64 pstate = to_hw_pstate(ctxt);
        u64 mode = pstate & PSR_AA32_MODE_MASK;
 
        /*
index c953fb4b9a137b1f9f6d8679a91bfbbc0e91e196..a6d67c2bb5ae9fc8eb4e74e0ef0b154f6977c887 100644 (file)
@@ -183,6 +183,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
 
        /* Initialize EL2 CPU state to sane values. */
        init_el2_state                          // Clobbers x0..x2
+       finalise_el2_state
 
        /* Enable MMU, set vectors and stack. */
        mov     x0, x28
index 0f9ac25afdf40218b0059f36d541648e7e6facb3..08d2b004f4b73cd61bd80f5b10b6749fd1052459 100644 (file)
@@ -26,6 +26,7 @@ u64 id_aa64isar2_el1_sys_val;
 u64 id_aa64mmfr0_el1_sys_val;
 u64 id_aa64mmfr1_el1_sys_val;
 u64 id_aa64mmfr2_el1_sys_val;
+u64 id_aa64smfr0_el1_sys_val;
 
 /*
  * Inject an unknown/undefined exception to an AArch64 guest while most of its
index b11cf2c618a6c9a7a52762d5eae98667fa821086..3d61bd3e591d27e9858b028220a9eb81a7498f6d 100644 (file)
@@ -168,6 +168,25 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
        return walker->cb(ctx, visit);
 }
 
+static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
+                                     int r)
+{
+       /*
+        * Visitor callbacks return EAGAIN when the conditions that led to a
+        * fault are no longer reflected in the page tables due to a race to
+        * update a PTE. In the context of a fault handler this is interpreted
+        * as a signal to retry guest execution.
+        *
+        * Ignore the return code altogether for walkers outside a fault handler
+        * (e.g. write protecting a range of memory) and chug along with the
+        * page table walk.
+        */
+       if (r == -EAGAIN)
+               return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT);
+
+       return !r;
+}
+
 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
                              struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
 
@@ -200,7 +219,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
                table = kvm_pte_table(ctx.old, level);
        }
 
-       if (ret)
+       if (!kvm_pgtable_walk_continue(data->walker, ret))
                goto out;
 
        if (!table) {
@@ -211,13 +230,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 
        childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops);
        ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1);
-       if (ret)
+       if (!kvm_pgtable_walk_continue(data->walker, ret))
                goto out;
 
        if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST)
                ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST);
 
 out:
+       if (kvm_pgtable_walk_continue(data->walker, ret))
+               return 0;
+
        return ret;
 }
 
@@ -584,12 +606,14 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
                lvls = 2;
        vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
 
+#ifdef CONFIG_ARM64_HW_AFDBM
        /*
         * Enable the Hardware Access Flag management, unconditionally
         * on all CPUs. The features is RES0 on CPUs without the support
         * and must be ignored by the CPUs.
         */
        vtcr |= VTCR_EL2_HA;
+#endif /* CONFIG_ARM64_HW_AFDBM */
 
        /* Set the vmid bits */
        vtcr |= (get_vmid_bits(mmfr1) == 16) ?
@@ -1026,7 +1050,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
        struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
        if (!kvm_pte_valid(ctx->old))
-               return 0;
+               return -EAGAIN;
 
        data->level = ctx->level;
        data->pte = pte;
@@ -1094,9 +1118,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
 {
        kvm_pte_t pte = 0;
-       stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
-                                &pte, NULL, 0);
-       dsb(ishst);
+       int ret;
+
+       ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
+                                      &pte, NULL,
+                                      KVM_PGTABLE_WALK_HANDLE_FAULT |
+                                      KVM_PGTABLE_WALK_SHARED);
+       if (!ret)
+               dsb(ishst);
+
        return pte;
 }
 
@@ -1141,6 +1171,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
                clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
 
        ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
+                                      KVM_PGTABLE_WALK_HANDLE_FAULT |
                                       KVM_PGTABLE_WALK_SHARED);
        if (!ret)
                kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
index 1a97391fedd29335647796ce491f5bda03a3f189..cd3f3117bf164b8ea618482870349a53a48844c1 100644 (file)
@@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
        ___activate_traps(vcpu);
 
        val = read_sysreg(cpacr_el1);
-       val |= CPACR_EL1_TTA;
+       val |= CPACR_ELx_TTA;
        val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
                 CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
 
@@ -120,6 +120,25 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
 
 static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
+       /*
+        * If we were in HYP context on entry, adjust the PSTATE view
+        * so that the usual helpers work correctly.
+        */
+       if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+               u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+               switch (mode) {
+               case PSR_MODE_EL1t:
+                       mode = PSR_MODE_EL2t;
+                       break;
+               case PSR_MODE_EL1h:
+                       mode = PSR_MODE_EL2h;
+                       break;
+               }
+
+               *vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
+               *vcpu_cpsr(vcpu) |= mode;
+       }
 }
 
 /* Switch to the guest for VHE systems running in EL2 */
@@ -154,6 +173,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
        sysreg_restore_guest_state_vhe(guest_ctxt);
        __debug_switch_to_guest(vcpu);
 
+       if (is_hyp_ctxt(vcpu))
+               vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
+       else
+               vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
+
        do {
                /* Jump in the fire! */
                exit_code = __guest_enter(vcpu);
index c9f401fa01a93c00ac20a3f6d9e2146a5592a12a..64c086c02c603167ddffa612f59b43daa2d9830f 100644 (file)
@@ -198,7 +198,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
                break;
        case ARM_SMCCC_HV_PV_TIME_ST:
                gpa = kvm_init_stolen_time(vcpu);
-               if (gpa != GPA_INVALID)
+               if (gpa != INVALID_GPA)
                        val[0] = gpa;
                break;
        case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
index f32f4a2a347f3f81409d4de07070cb32416f4286..64c3aec0d937c36b84d49b7ca403933429b993c8 100644 (file)
 
 #include <linux/kvm_host.h>
 #include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
 #include <asm/esr.h>
 
+static void pend_sync_exception(struct kvm_vcpu *vcpu)
+{
+       /* If not nesting, EL1 is the only possible exception target */
+       if (likely(!vcpu_has_nv(vcpu))) {
+               kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+               return;
+       }
+
+       /*
+        * With NV, we need to pick between EL1 and EL2. Note that we
+        * never deal with a nesting exception here, hence never
+        * changing context, and the exception itself can be delayed
+        * until the next entry.
+        */
+       switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) {
+       case PSR_MODE_EL2h:
+       case PSR_MODE_EL2t:
+               kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+               break;
+       case PSR_MODE_EL1h:
+       case PSR_MODE_EL1t:
+               kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+               break;
+       case PSR_MODE_EL0t:
+               if (vcpu_el2_tge_is_set(vcpu))
+                       kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+               else
+                       kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target)
+{
+       return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target);
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
        unsigned long cpsr = *vcpu_cpsr(vcpu);
        bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
        u64 esr = 0;
 
-       kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
-
-       vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
+       pend_sync_exception(vcpu);
 
        /*
         * Build an {i,d}abort, depending on the level and the
@@ -43,14 +81,22 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
        if (!is_iabt)
                esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
 
-       vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
+       esr |= ESR_ELx_FSC_EXTABT;
+
+       if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) {
+               vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
+               vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+       } else {
+               vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
+               vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
+       }
 }
 
 static void inject_undef64(struct kvm_vcpu *vcpu)
 {
        u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-       kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+       pend_sync_exception(vcpu);
 
        /*
         * Build an unknown exception, depending on the instruction
@@ -59,7 +105,10 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
        if (kvm_vcpu_trap_il_is32bit(vcpu))
                esr |= ESR_ELx_IL;
 
-       vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+       if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC)))
+               vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+       else
+               vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
 }
 
 #define DFSR_FSC_EXTABT_LPAE   0x10
index 01352f5838a002152a607765a6db677ce024ff74..7113587222ffe8e1befff0c4d4d7c29e4fde29c5 100644 (file)
@@ -46,16 +46,17 @@ static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
  * long will also starve other vCPUs. We have to also make sure that the page
  * tables are not freed while we released the lock.
  */
-static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
+static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
                              phys_addr_t end,
                              int (*fn)(struct kvm_pgtable *, u64, u64),
                              bool resched)
 {
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
        int ret;
        u64 next;
 
        do {
-               struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+               struct kvm_pgtable *pgt = mmu->pgt;
                if (!pgt)
                        return -EINVAL;
 
@@ -71,8 +72,8 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
        return ret;
 }
 
-#define stage2_apply_range_resched(kvm, addr, end, fn)                 \
-       stage2_apply_range(kvm, addr, end, fn, true)
+#define stage2_apply_range_resched(mmu, addr, end, fn)                 \
+       stage2_apply_range(mmu, addr, end, fn, true)
 
 static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 {
@@ -235,7 +236,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
 
        lockdep_assert_held_write(&kvm->mmu_lock);
        WARN_ON(size & ~PAGE_MASK);
-       WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
+       WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
                                   may_block));
 }
 
@@ -250,7 +251,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
        phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
        phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
 
-       stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush);
+       stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush);
 }
 
 /**
@@ -934,8 +935,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
  */
 static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
 {
-       struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
-       stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
+       stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
 }
 
 /**
@@ -1383,7 +1383,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        else
                ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
                                             __pfn_to_phys(pfn), prot,
-                                            memcache, KVM_PGTABLE_WALK_SHARED);
+                                            memcache,
+                                            KVM_PGTABLE_WALK_HANDLE_FAULT |
+                                            KVM_PGTABLE_WALK_SHARED);
 
        /* Mark the page dirty only if the fault is handled successfully */
        if (writable && !ret) {
@@ -1401,20 +1403,18 @@ out_unlock:
 /* Resolve the access fault by making the page young again. */
 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 {
-       pte_t pte;
-       kvm_pte_t kpte;
+       kvm_pte_t pte;
        struct kvm_s2_mmu *mmu;
 
        trace_kvm_access_fault(fault_ipa);
 
-       write_lock(&vcpu->kvm->mmu_lock);
+       read_lock(&vcpu->kvm->mmu_lock);
        mmu = vcpu->arch.hw_mmu;
-       kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
-       write_unlock(&vcpu->kvm->mmu_lock);
+       pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
+       read_unlock(&vcpu->kvm->mmu_lock);
 
-       pte = __pte(kpte);
-       if (pte_valid(pte))
-               kvm_set_pfn_accessed(pte_pfn(pte));
+       if (kvm_pte_valid(pte))
+               kvm_set_pfn_accessed(kvm_pte_to_pfn(pte));
 }
 
 /**
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
new file mode 100644 (file)
index 0000000..315354d
--- /dev/null
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 - Columbia University and Linaro Ltd.
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+#include <asm/sysreg.h>
+
+#include "sys_regs.h"
+
+/* Protection against the sysreg repainting madness... */
+#define NV_FTR(r, f)           ID_AA64##r##_EL1_##f
+
+/*
+ * Our emulated CPU doesn't support all the possible features. For the
+ * sake of simplicity (and probably mental sanity), wipe out a number
+ * of feature bits we don't intend to support for the time being.
+ * This list should get updated as new features get added to the NV
+ * support, and new extension to the architecture.
+ */
+void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
+                         const struct sys_reg_desc *r)
+{
+       u32 id = reg_to_encoding(r);
+       u64 val, tmp;
+
+       val = p->regval;
+
+       switch (id) {
+       case SYS_ID_AA64ISAR0_EL1:
+               /* Support everything but TME, O.S. and Range TLBIs */
+               val &= ~(NV_FTR(ISAR0, TLB)             |
+                        NV_FTR(ISAR0, TME));
+               break;
+
+       case SYS_ID_AA64ISAR1_EL1:
+               /* Support everything but PtrAuth and Spec Invalidation */
+               val &= ~(GENMASK_ULL(63, 56)    |
+                        NV_FTR(ISAR1, SPECRES) |
+                        NV_FTR(ISAR1, GPI)     |
+                        NV_FTR(ISAR1, GPA)     |
+                        NV_FTR(ISAR1, API)     |
+                        NV_FTR(ISAR1, APA));
+               break;
+
+       case SYS_ID_AA64PFR0_EL1:
+               /* No AMU, MPAM, S-EL2, RAS or SVE */
+               val &= ~(GENMASK_ULL(55, 52)    |
+                        NV_FTR(PFR0, AMU)      |
+                        NV_FTR(PFR0, MPAM)     |
+                        NV_FTR(PFR0, SEL2)     |
+                        NV_FTR(PFR0, RAS)      |
+                        NV_FTR(PFR0, SVE)      |
+                        NV_FTR(PFR0, EL3)      |
+                        NV_FTR(PFR0, EL2)      |
+                        NV_FTR(PFR0, EL1));
+               /* 64bit EL1/EL2/EL3 only */
+               val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
+               val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
+               val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
+               break;
+
+       case SYS_ID_AA64PFR1_EL1:
+               /* Only support SSBS */
+               val &= NV_FTR(PFR1, SSBS);
+               break;
+
+       case SYS_ID_AA64MMFR0_EL1:
+               /* Hide ECV, FGT, ExS, Secure Memory */
+               val &= ~(GENMASK_ULL(63, 43)            |
+                        NV_FTR(MMFR0, TGRAN4_2)        |
+                        NV_FTR(MMFR0, TGRAN16_2)       |
+                        NV_FTR(MMFR0, TGRAN64_2)       |
+                        NV_FTR(MMFR0, SNSMEM));
+
+               /* Disallow unsupported S2 page sizes */
+               switch (PAGE_SIZE) {
+               case SZ_64K:
+                       val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
+                       fallthrough;
+               case SZ_16K:
+                       val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
+                       fallthrough;
+               case SZ_4K:
+                       /* Support everything */
+                       break;
+               }
+               /*
+                * Since we can't support a guest S2 page size smaller than
+                * the host's own page size (due to KVM only populating its
+                * own S2 using the kernel's page size), advertise the
+                * limitation using FEAT_GTG.
+                */
+               switch (PAGE_SIZE) {
+               case SZ_4K:
+                       val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
+                       fallthrough;
+               case SZ_16K:
+                       val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
+                       fallthrough;
+               case SZ_64K:
+                       val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
+                       break;
+               }
+               /* Cap PARange to 48bits */
+               tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
+               if (tmp > 0b0101) {
+                       val &= ~NV_FTR(MMFR0, PARANGE);
+                       val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
+               }
+               break;
+
+       case SYS_ID_AA64MMFR1_EL1:
+               val &= (NV_FTR(MMFR1, PAN)      |
+                       NV_FTR(MMFR1, LO)       |
+                       NV_FTR(MMFR1, HPDS)     |
+                       NV_FTR(MMFR1, VH)       |
+                       NV_FTR(MMFR1, VMIDBits));
+               break;
+
+       case SYS_ID_AA64MMFR2_EL1:
+               val &= ~(NV_FTR(MMFR2, EVT)     |
+                        NV_FTR(MMFR2, BBM)     |
+                        NV_FTR(MMFR2, TTL)     |
+                        GENMASK_ULL(47, 44)    |
+                        NV_FTR(MMFR2, ST)      |
+                        NV_FTR(MMFR2, CCIDX)   |
+                        NV_FTR(MMFR2, VARange));
+
+               /* Force TTL support */
+               val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
+               break;
+
+       case SYS_ID_AA64DFR0_EL1:
+               /* Only limited support for PMU, Debug, BPs and WPs */
+               val &= (NV_FTR(DFR0, PMUVer)    |
+                       NV_FTR(DFR0, WRPs)      |
+                       NV_FTR(DFR0, BRPs)      |
+                       NV_FTR(DFR0, DebugVer));
+
+               /* Cap Debug to ARMv8.1 */
+               tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
+               if (tmp > 0b0111) {
+                       val &= ~NV_FTR(DFR0, DebugVer);
+                       val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
+               }
+               break;
+
+       default:
+               /* Unknown register, just wipe it clean */
+               val = 0;
+               break;
+       }
+
+       p->regval = val;
+}
index 78a09f7a66373bfeb6b61b325a6830cb6249b5b3..4ceabaa4c30bddace82026253d84b0e89fe62a0c 100644 (file)
@@ -19,7 +19,7 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
        u64 steal = 0;
        int idx;
 
-       if (base == GPA_INVALID)
+       if (base == INVALID_GPA)
                return;
 
        idx = srcu_read_lock(&kvm->srcu);
@@ -40,7 +40,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
        switch (feature) {
        case ARM_SMCCC_HV_PV_TIME_FEATURES:
        case ARM_SMCCC_HV_PV_TIME_ST:
-               if (vcpu->arch.steal.base != GPA_INVALID)
+               if (vcpu->arch.steal.base != INVALID_GPA)
                        val = SMCCC_RET_SUCCESS;
                break;
        }
@@ -54,7 +54,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
        struct kvm *kvm = vcpu->kvm;
        u64 base = vcpu->arch.steal.base;
 
-       if (base == GPA_INVALID)
+       if (base == INVALID_GPA)
                return base;
 
        /*
@@ -89,7 +89,7 @@ int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
                return -EFAULT;
        if (!IS_ALIGNED(ipa, 64))
                return -EINVAL;
-       if (vcpu->arch.steal.base != GPA_INVALID)
+       if (vcpu->arch.steal.base != INVALID_GPA)
                return -EEXIST;
 
        /* Check the address is in a valid memslot */
index 2bc74739a6df42387598e02b9b31ce4933b1a9e8..49a3257dec46d90d640456235f8f75dd24912482 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/virt.h>
 
 /* Maximum phys_shift supported for any VM on this host */
@@ -38,6 +39,9 @@ static u32 __ro_after_init kvm_ipa_limit;
 #define VCPU_RESET_PSTATE_EL1  (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
                                 PSR_F_BIT | PSR_D_BIT)
 
+#define VCPU_RESET_PSTATE_EL2  (PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
+                                PSR_F_BIT | PSR_D_BIT)
+
 #define VCPU_RESET_PSTATE_SVC  (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
                                 PSR_AA32_I_BIT | PSR_AA32_F_BIT)
 
@@ -157,6 +161,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
        if (sve_state)
                kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
        kfree(sve_state);
+       kfree(vcpu->arch.ccsidr);
 }
 
 static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
@@ -220,6 +225,10 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
        if (kvm_has_mte(kvm) && is32bit)
                return -EINVAL;
 
+       /* NV is incompatible with AArch32 */
+       if (vcpu_has_nv(vcpu) && is32bit)
+               return -EINVAL;
+
        if (is32bit)
                set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
 
@@ -272,6 +281,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
        if (loaded)
                kvm_arch_vcpu_put(vcpu);
 
+       /* Disallow NV+SVE for the time being */
+       if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
                if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
                        ret = kvm_vcpu_enable_sve(vcpu);
@@ -294,6 +309,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
        default:
                if (vcpu_el1_is_32bit(vcpu)) {
                        pstate = VCPU_RESET_PSTATE_SVC;
+               } else if (vcpu_has_nv(vcpu)) {
+                       pstate = VCPU_RESET_PSTATE_EL2;
                } else {
                        pstate = VCPU_RESET_PSTATE_EL1;
                }
index 46d161fe08d35fa814c66bd541e664bd144b0dcf..53749d3a0996d73646290c4ceeb96cc21511446d 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/bsearch.h>
+#include <linux/cacheinfo.h>
 #include <linux/kvm_host.h>
 #include <linux/mm.h>
 #include <linux/printk.h>
@@ -24,6 +25,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/perf_event.h>
 #include <asm/sysreg.h>
 
@@ -78,28 +80,112 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
            __vcpu_write_sys_reg_to_cpu(val, reg))
                return;
 
-        __vcpu_sys_reg(vcpu, reg) = val;
+       __vcpu_sys_reg(vcpu, reg) = val;
 }
 
-/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
-static u32 __ro_after_init cache_levels;
-
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 14
 
+/*
+ * Returns the minimum line size for the selected cache, expressed as
+ * Log2(bytes).
+ */
+static u8 get_min_cache_line_size(bool icache)
+{
+       u64 ctr = read_sanitised_ftr_reg(SYS_CTR_EL0);
+       u8 field;
+
+       if (icache)
+               field = SYS_FIELD_GET(CTR_EL0, IminLine, ctr);
+       else
+               field = SYS_FIELD_GET(CTR_EL0, DminLine, ctr);
+
+       /*
+        * Cache line size is represented as Log2(words) in CTR_EL0.
+        * Log2(bytes) can be derived with the following:
+        *
+        * Log2(words) + 2 = Log2(bytes / 4) + 2
+        *                 = Log2(bytes) - 2 + 2
+        *                 = Log2(bytes)
+        */
+       return field + 2;
+}
+
 /* Which cache CCSIDR represents depends on CSSELR value. */
-static u32 get_ccsidr(u32 csselr)
+static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
 {
-       u32 ccsidr;
+       u8 line_size;
 
-       /* Make sure noone else changes CSSELR during this! */
-       local_irq_disable();
-       write_sysreg(csselr, csselr_el1);
-       isb();
-       ccsidr = read_sysreg(ccsidr_el1);
-       local_irq_enable();
+       if (vcpu->arch.ccsidr)
+               return vcpu->arch.ccsidr[csselr];
 
-       return ccsidr;
+       line_size = get_min_cache_line_size(csselr & CSSELR_EL1_InD);
+
+       /*
+        * Fabricate a CCSIDR value as the overriding value does not exist.
+        * The real CCSIDR value will not be used as it can vary by the
+        * physical CPU which the vcpu currently resides in.
+        *
+        * The line size is determined with get_min_cache_line_size(), which
+        * should be valid for all CPUs even if they have different cache
+        * configuration.
+        *
+        * The associativity bits are cleared, meaning the geometry of all data
+        * and unified caches (which are guaranteed to be PIPT and thus
+        * non-aliasing) are 1 set and 1 way.
+        * Guests should not be doing cache operations by set/way at all, and
+        * for this reason, we trap them and attempt to infer the intent, so
+        * that we can flush the entire guest's address space at the appropriate
+        * time. The exposed geometry minimizes the number of the traps.
+        * [If guests should attempt to infer aliasing properties from the
+        * geometry (which is not permitted by the architecture), they would
+        * only do so for virtually indexed caches.]
+        *
+        * We don't check if the cache level exists as it is allowed to return
+        * an UNKNOWN value if not.
+        */
+       return SYS_FIELD_PREP(CCSIDR_EL1, LineSize, line_size - 4);
+}
+
+static int set_ccsidr(struct kvm_vcpu *vcpu, u32 csselr, u32 val)
+{
+       u8 line_size = FIELD_GET(CCSIDR_EL1_LineSize, val) + 4;
+       u32 *ccsidr = vcpu->arch.ccsidr;
+       u32 i;
+
+       if ((val & CCSIDR_EL1_RES0) ||
+           line_size < get_min_cache_line_size(csselr & CSSELR_EL1_InD))
+               return -EINVAL;
+
+       if (!ccsidr) {
+               if (val == get_ccsidr(vcpu, csselr))
+                       return 0;
+
+               ccsidr = kmalloc_array(CSSELR_MAX, sizeof(u32), GFP_KERNEL_ACCOUNT);
+               if (!ccsidr)
+                       return -ENOMEM;
+
+               for (i = 0; i < CSSELR_MAX; i++)
+                       ccsidr[i] = get_ccsidr(vcpu, i);
+
+               vcpu->arch.ccsidr = ccsidr;
+       }
+
+       ccsidr[csselr] = val;
+
+       return 0;
+}
+
+static bool access_rw(struct kvm_vcpu *vcpu,
+                     struct sys_reg_params *p,
+                     const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+       else
+               p->regval = vcpu_read_sys_reg(vcpu, r->reg);
+
+       return true;
 }
 
 /*
@@ -260,6 +346,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
                return read_zero(vcpu, p);
 }
 
+static bool trap_undef(struct kvm_vcpu *vcpu,
+                      struct sys_reg_params *p,
+                      const struct sys_reg_desc *r)
+{
+       kvm_inject_undefined(vcpu);
+       return false;
+}
+
 /*
  * ARMv8.1 mandates at least a trivial LORegion implementation, where all the
  * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
@@ -370,12 +464,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
                            struct sys_reg_params *p,
                            const struct sys_reg_desc *r)
 {
-       if (p->is_write) {
-               vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+       access_rw(vcpu, p, r);
+       if (p->is_write)
                vcpu_set_flag(vcpu, DEBUG_DIRTY);
-       } else {
-               p->regval = vcpu_read_sys_reg(vcpu, r->reg);
-       }
 
        trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
 
@@ -1049,7 +1140,9 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
                treg = TIMER_REG_CVAL;
                break;
        default:
-               BUG();
+               print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
+               kvm_inject_undefined(vcpu);
+               return false;
        }
 
        if (p->is_write)
@@ -1155,6 +1248,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
                val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon),
                                  pmuver_to_perfmon(vcpu_pmuver(vcpu)));
                break;
+       case SYS_ID_AA64MMFR2_EL1:
+               val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
+               break;
+       case SYS_ID_MMFR4_EL1:
+               val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
+               break;
        }
 
        return val;
@@ -1205,6 +1304,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
                return write_to_read_only(vcpu, p, r);
 
        p->regval = read_id_reg(vcpu, r);
+       if (vcpu_has_nv(vcpu))
+               access_nested_id_reg(vcpu, p, r);
+
        return true;
 }
 
@@ -1385,10 +1487,78 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
        if (p->is_write)
                return write_to_read_only(vcpu, p, r);
 
-       p->regval = read_sysreg(clidr_el1);
+       p->regval = __vcpu_sys_reg(vcpu, r->reg);
        return true;
 }
 
+/*
+ * Fabricate a CLIDR_EL1 value instead of using the real value, which can vary
+ * by the physical CPU which the vcpu currently resides in.
+ */
+static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+       u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+       u64 clidr;
+       u8 loc;
+
+       if ((ctr_el0 & CTR_EL0_IDC)) {
+               /*
+                * Data cache clean to the PoU is not required so LoUU and LoUIS
+                * will not be set and a unified cache, which will be marked as
+                * LoC, will be added.
+                *
+                * If not DIC, let the unified cache L2 so that an instruction
+                * cache can be added as L1 later.
+                */
+               loc = (ctr_el0 & CTR_EL0_DIC) ? 1 : 2;
+               clidr = CACHE_TYPE_UNIFIED << CLIDR_CTYPE_SHIFT(loc);
+       } else {
+               /*
+                * Data cache clean to the PoU is required so let L1 have a data
+                * cache and mark it as LoUU and LoUIS. As L1 has a data cache,
+                * it can be marked as LoC too.
+                */
+               loc = 1;
+               clidr = 1 << CLIDR_LOUU_SHIFT;
+               clidr |= 1 << CLIDR_LOUIS_SHIFT;
+               clidr |= CACHE_TYPE_DATA << CLIDR_CTYPE_SHIFT(1);
+       }
+
+       /*
+        * Instruction cache invalidation to the PoU is required so let L1 have
+        * an instruction cache. If L1 already has a data cache, it will be
+        * CACHE_TYPE_SEPARATE.
+        */
+       if (!(ctr_el0 & CTR_EL0_DIC))
+               clidr |= CACHE_TYPE_INST << CLIDR_CTYPE_SHIFT(1);
+
+       clidr |= loc << CLIDR_LOC_SHIFT;
+
+       /*
+        * Add tag cache unified to data cache. Allocation tags and data are
+        * unified in a cache line so that it looks valid even if there is only
+        * one cache line.
+        */
+       if (kvm_has_mte(vcpu->kvm))
+               clidr |= 2 << CLIDR_TTYPE_SHIFT(loc);
+
+       __vcpu_sys_reg(vcpu, r->reg) = clidr;
+}
+
+static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+                     u64 val)
+{
+       u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+       u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val));
+
+       if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+               return -EINVAL;
+
+       __vcpu_sys_reg(vcpu, rd->reg) = val;
+
+       return 0;
+}
+
 static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
                          const struct sys_reg_desc *r)
 {
@@ -1410,22 +1580,10 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
                return write_to_read_only(vcpu, p, r);
 
        csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
-       p->regval = get_ccsidr(csselr);
+       csselr &= CSSELR_EL1_Level | CSSELR_EL1_InD;
+       if (csselr < CSSELR_MAX)
+               p->regval = get_ccsidr(vcpu, csselr);
 
-       /*
-        * Guests should not be doing cache operations by set/way at all, and
-        * for this reason, we trap them and attempt to infer the intent, so
-        * that we can flush the entire guest's address space at the appropriate
-        * time.
-        * To prevent this trapping from causing performance problems, let's
-        * expose the geometry of all data and unified caches (which are
-        * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way.
-        * [If guests should attempt to infer aliasing properties from the
-        * geometry (which is not permitted by the architecture), they would
-        * only do so for virtually indexed caches.]
-        */
-       if (!(csselr & 1)) // data or unified cache
-               p->regval &= ~GENMASK(27, 3);
        return true;
 }
 
@@ -1446,6 +1604,44 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
        .visibility = mte_visibility,           \
 }
 
+static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
+                                  const struct sys_reg_desc *rd)
+{
+       if (vcpu_has_nv(vcpu))
+               return 0;
+
+       return REG_HIDDEN;
+}
+
+#define EL2_REG(name, acc, rst, v) {           \
+       SYS_DESC(SYS_##name),                   \
+       .access = acc,                          \
+       .reset = rst,                           \
+       .reg = name,                            \
+       .visibility = el2_visibility,           \
+       .val = v,                               \
+}
+
+/*
+ * EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
+ * HCR_EL2.E2H==1, and only in the sysreg table for convenience of
+ * handling traps. Given that, they are always hidden from userspace.
+ */
+static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
+                                   const struct sys_reg_desc *rd)
+{
+       return REG_HIDDEN_USER;
+}
+
+#define EL12_REG(name, acc, rst, v) {          \
+       SYS_DESC(SYS_##name##_EL12),            \
+       .access = acc,                          \
+       .reset = rst,                           \
+       .reg = name##_EL1,                      \
+       .val = v,                               \
+       .visibility = elx2_visibility,          \
+}
+
 /* sys_reg_desc initialiser for known cpufeature ID registers */
 #define ID_SANITISED(name) {                   \
        SYS_DESC(SYS_##name),                   \
@@ -1490,6 +1686,42 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
        .visibility = raz_visibility,           \
 }
 
+static bool access_sp_el1(struct kvm_vcpu *vcpu,
+                         struct sys_reg_params *p,
+                         const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               __vcpu_sys_reg(vcpu, SP_EL1) = p->regval;
+       else
+               p->regval = __vcpu_sys_reg(vcpu, SP_EL1);
+
+       return true;
+}
+
+static bool access_elr(struct kvm_vcpu *vcpu,
+                      struct sys_reg_params *p,
+                      const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               vcpu_write_sys_reg(vcpu, p->regval, ELR_EL1);
+       else
+               p->regval = vcpu_read_sys_reg(vcpu, ELR_EL1);
+
+       return true;
+}
+
+static bool access_spsr(struct kvm_vcpu *vcpu,
+                       struct sys_reg_params *p,
+                       const struct sys_reg_desc *r)
+{
+       if (p->is_write)
+               __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval;
+       else
+               p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1);
+
+       return true;
+}
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -1646,6 +1878,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        PTRAUTH_KEY(APDB),
        PTRAUTH_KEY(APGA),
 
+       { SYS_DESC(SYS_SPSR_EL1), access_spsr},
+       { SYS_DESC(SYS_ELR_EL1), access_elr},
+
        { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
        { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
        { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
@@ -1693,7 +1928,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_LORC_EL1), trap_loregion },
        { SYS_DESC(SYS_LORID_EL1), trap_loregion },
 
-       { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+       { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
        { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
 
        { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
@@ -1717,7 +1952,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
 
        { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
-       { SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+       { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1,
+         .set_user = set_clidr },
+       { SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
        { SYS_DESC(SYS_SMIDR_EL1), undef_access },
        { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
        { SYS_DESC(SYS_CTR_EL0), access_ctr },
@@ -1913,9 +2150,67 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
          .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
 
+       EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
+       EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0),
+       EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
+       EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HCR_EL2, access_rw, reset_val, 0),
+       EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
+       EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ),
+       EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HACR_EL2, access_rw, reset_val, 0),
+
+       EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
+       EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
+       EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
+       EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
+       EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
+
        { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+       EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
+       EL2_REG(ELR_EL2, access_rw, reset_val, 0),
+       { SYS_DESC(SYS_SP_EL1), access_sp_el1},
+
        { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
+       EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
+       EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
+       EL2_REG(ESR_EL2, access_rw, reset_val, 0),
        { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
+
+       EL2_REG(FAR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
+
+       EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
+       EL2_REG(AMAIR_EL2, access_rw, reset_val, 0),
+
+       EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
+       EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
+       { SYS_DESC(SYS_RMR_EL2), trap_undef },
+
+       EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
+       EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
+
+       EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0),
+       EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
+
+       EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078),
+       EL12_REG(CPACR, access_rw, reset_val, 0),
+       EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0),
+       EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0),
+       EL12_REG(TCR, access_vm_reg, reset_val, 0),
+       { SYS_DESC(SYS_SPSR_EL12), access_spsr},
+       { SYS_DESC(SYS_ELR_EL12), access_elr},
+       EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0),
+       EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0),
+       EL12_REG(ESR, access_vm_reg, reset_unknown, 0),
+       EL12_REG(FAR, access_vm_reg, reset_unknown, 0),
+       EL12_REG(MAIR, access_vm_reg, reset_unknown, 0),
+       EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0),
+       EL12_REG(VBAR, access_rw, reset_val, 0),
+       EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0),
+       EL12_REG(CNTKCTL, access_rw, reset_val, 0),
+
+       EL2_REG(SP_EL2, NULL, reset_unknown, 0),
 };
 
 static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
@@ -2219,6 +2514,10 @@ static const struct sys_reg_desc cp15_regs[] = {
 
        { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
        { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
+
+       /* CCSIDR2 */
+       { Op1(1), CRn( 0), CRm( 0),  Op2(2), undef_access },
+
        { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 },
 };
 
@@ -2724,7 +3023,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
 
 FUNCTION_INVARIANT(midr_el1)
 FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(clidr_el1)
 FUNCTION_INVARIANT(aidr_el1)
 
 static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
@@ -2736,7 +3034,6 @@ static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
 static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
        { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
        { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
-       { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
        { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
        { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
 };
@@ -2773,33 +3070,7 @@ static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
        return 0;
 }
 
-static bool is_valid_cache(u32 val)
-{
-       u32 level, ctype;
-
-       if (val >= CSSELR_MAX)
-               return false;
-
-       /* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
-       level = (val >> 1);
-       ctype = (cache_levels >> (level * 3)) & 7;
-
-       switch (ctype) {
-       case 0: /* No cache */
-               return false;
-       case 1: /* Instruction cache only */
-               return (val & 1);
-       case 2: /* Data cache only */
-       case 4: /* Unified cache */
-               return !(val & 1);
-       case 3: /* Separate instruction and data caches */
-               return true;
-       default: /* Reserved: we can't know instruction or data. */
-               return false;
-       }
-}
-
-static int demux_c15_get(u64 id, void __user *uaddr)
+static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
 {
        u32 val;
        u32 __user *uval = uaddr;
@@ -2815,16 +3086,16 @@ static int demux_c15_get(u64 id, void __user *uaddr)
                        return -ENOENT;
                val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
                        >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
-               if (!is_valid_cache(val))
+               if (val >= CSSELR_MAX)
                        return -ENOENT;
 
-               return put_user(get_ccsidr(val), uval);
+               return put_user(get_ccsidr(vcpu, val), uval);
        default:
                return -ENOENT;
        }
 }
 
-static int demux_c15_set(u64 id, void __user *uaddr)
+static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
 {
        u32 val, newval;
        u32 __user *uval = uaddr;
@@ -2840,16 +3111,13 @@ static int demux_c15_set(u64 id, void __user *uaddr)
                        return -ENOENT;
                val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
                        >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
-               if (!is_valid_cache(val))
+               if (val >= CSSELR_MAX)
                        return -ENOENT;
 
                if (get_user(newval, uval))
                        return -EFAULT;
 
-               /* This is also invariant: you can't change it. */
-               if (newval != get_ccsidr(val))
-                       return -EINVAL;
-               return 0;
+               return set_ccsidr(vcpu, val, newval);
        default:
                return -ENOENT;
        }
@@ -2864,7 +3132,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
        int ret;
 
        r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
-       if (!r)
+       if (!r || sysreg_hidden_user(vcpu, r))
                return -ENOENT;
 
        if (r->get_user) {
@@ -2886,7 +3154,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
        int err;
 
        if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
-               return demux_c15_get(reg->id, uaddr);
+               return demux_c15_get(vcpu, reg->id, uaddr);
 
        err = get_invariant_sys_reg(reg->id, uaddr);
        if (err != -ENOENT)
@@ -2908,7 +3176,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
                return -EFAULT;
 
        r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
-       if (!r)
+       if (!r || sysreg_hidden_user(vcpu, r))
                return -ENOENT;
 
        if (sysreg_user_write_ignore(vcpu, r))
@@ -2930,7 +3198,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
        int err;
 
        if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
-               return demux_c15_set(reg->id, uaddr);
+               return demux_c15_set(vcpu, reg->id, uaddr);
 
        err = set_invariant_sys_reg(reg->id, uaddr);
        if (err != -ENOENT)
@@ -2942,13 +3210,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
 
 static unsigned int num_demux_regs(void)
 {
-       unsigned int i, count = 0;
-
-       for (i = 0; i < CSSELR_MAX; i++)
-               if (is_valid_cache(i))
-                       count++;
-
-       return count;
+       return CSSELR_MAX;
 }
 
 static int write_demux_regids(u64 __user *uindices)
@@ -2958,8 +3220,6 @@ static int write_demux_regids(u64 __user *uindices)
 
        val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
        for (i = 0; i < CSSELR_MAX; i++) {
-               if (!is_valid_cache(i))
-                       continue;
                if (put_user(val | i, uindices))
                        return -EFAULT;
                uindices++;
@@ -3002,7 +3262,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
        if (!(rd->reg || rd->get_user))
                return 0;
 
-       if (sysreg_hidden(vcpu, rd))
+       if (sysreg_hidden_user(vcpu, rd))
                return 0;
 
        if (!copy_reg_to_user(rd, uind))
@@ -3061,7 +3321,6 @@ int __init kvm_sys_reg_table_init(void)
 {
        bool valid = true;
        unsigned int i;
-       struct sys_reg_desc clidr;
 
        /* Make sure tables are unique and in order. */
        valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
@@ -3078,23 +3337,5 @@ int __init kvm_sys_reg_table_init(void)
        for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
                invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
 
-       /*
-        * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
-        *
-        *   If software reads the Cache Type fields from Ctype1
-        *   upwards, once it has seen a value of 0b000, no caches
-        *   exist at further-out levels of the hierarchy. So, for
-        *   example, if Ctype3 is the first Cache Type field with a
-        *   value of 0b000, the values of Ctype4 to Ctype7 must be
-        *   ignored.
-        */
-       get_clidr_el1(NULL, &clidr); /* Ugly... */
-       cache_levels = clidr.val;
-       for (i = 0; i < 7; i++)
-               if (((cache_levels >> (i*3)) & 7) == 0)
-                       break;
-       /* Clear all higher bits. */
-       cache_levels &= (1 << (i*3))-1;
-
        return 0;
 }
index e4ebb3a379fdb95e1d4fcc45e8e7d3221f0cf64d..6b11f2cc71467281620dc9b5d6bae2151b340cda 100644 (file)
@@ -85,8 +85,9 @@ struct sys_reg_desc {
 };
 
 #define REG_HIDDEN             (1 << 0) /* hidden from userspace and guest */
-#define REG_RAZ                        (1 << 1) /* RAZ from userspace and guest */
-#define REG_USER_WI            (1 << 2) /* WI from userspace only */
+#define REG_HIDDEN_USER                (1 << 1) /* hidden from userspace only */
+#define REG_RAZ                        (1 << 2) /* RAZ from userspace and guest */
+#define REG_USER_WI            (1 << 3) /* WI from userspace only */
 
 static __printf(2, 3)
 inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -152,6 +153,15 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
        return sysreg_visibility(vcpu, r) & REG_HIDDEN;
 }
 
+static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu,
+                                     const struct sys_reg_desc *r)
+{
+       if (likely(!r->visibility))
+               return false;
+
+       return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER);
+}
+
 static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
                                         const struct sys_reg_desc *r)
 {
index 33e4e7dd27199acb57822e5654d97d5e9ae0ca9c..f3e46a9761256d2a6ba5713a606686479d2164da 100644 (file)
@@ -2,6 +2,7 @@
 #if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_ARM_ARM64_KVM_H
 
+#include <asm/kvm_emulate.h>
 #include <kvm/arm_arch_timer.h>
 #include <linux/tracepoint.h>
 
@@ -301,6 +302,64 @@ TRACE_EVENT(kvm_timer_emulate,
                  __entry->timer_idx, __entry->should_fire)
 );
 
+TRACE_EVENT(kvm_nested_eret,
+       TP_PROTO(struct kvm_vcpu *vcpu, unsigned long elr_el2,
+                unsigned long spsr_el2),
+       TP_ARGS(vcpu, elr_el2, spsr_el2),
+
+       TP_STRUCT__entry(
+               __field(struct kvm_vcpu *,      vcpu)
+               __field(unsigned long,          elr_el2)
+               __field(unsigned long,          spsr_el2)
+               __field(unsigned long,          target_mode)
+               __field(unsigned long,          hcr_el2)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu = vcpu;
+               __entry->elr_el2 = elr_el2;
+               __entry->spsr_el2 = spsr_el2;
+               __entry->target_mode = spsr_el2 & (PSR_MODE_MASK | PSR_MODE32_BIT);
+               __entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+       ),
+
+       TP_printk("elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx",
+                 __entry->elr_el2, __entry->spsr_el2,
+                 __print_symbolic(__entry->target_mode, kvm_mode_names),
+                 __entry->hcr_el2)
+);
+
+TRACE_EVENT(kvm_inject_nested_exception,
+       TP_PROTO(struct kvm_vcpu *vcpu, u64 esr_el2, int type),
+       TP_ARGS(vcpu, esr_el2, type),
+
+       TP_STRUCT__entry(
+               __field(struct kvm_vcpu *,              vcpu)
+               __field(unsigned long,                  esr_el2)
+               __field(int,                            type)
+               __field(unsigned long,                  spsr_el2)
+               __field(unsigned long,                  pc)
+               __field(unsigned long,                  source_mode)
+               __field(unsigned long,                  hcr_el2)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu = vcpu;
+               __entry->esr_el2 = esr_el2;
+               __entry->type = type;
+               __entry->spsr_el2 = *vcpu_cpsr(vcpu);
+               __entry->pc = *vcpu_pc(vcpu);
+               __entry->source_mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+               __entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+       ),
+
+       TP_printk("%s: esr_el2 0x%lx elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx",
+                 __print_symbolic(__entry->type, kvm_exception_type_names),
+                 __entry->esr_el2, __entry->pc, __entry->spsr_el2,
+                 __print_symbolic(__entry->source_mode, kvm_mode_names),
+                 __entry->hcr_el2)
+);
+
 #endif /* _TRACE_ARM_ARM64_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index 6c7f6ae21ec056987a770494abaf7cb97d8967d0..cd134db41a57cc980fa2a0b39aa9cc874870f33d 100644 (file)
@@ -570,7 +570,7 @@ int kvm_vgic_hyp_init(void)
        if (ret)
                return ret;
 
-       if (!has_mask)
+       if (!has_mask && !kvm_vgic_global_state.maint_irq)
                return 0;
 
        ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
index b32d434c1d4a4eadefc04a9ca055646b20812b1f..e67b3b2c80440273336c4c9ef7cd1671e183d6c1 100644 (file)
@@ -473,9 +473,10 @@ int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
  * active state can be overwritten when the VCPU's state is synced coming back
  * from the guest.
  *
- * For shared interrupts as well as GICv3 private interrupts, we have to
- * stop all the VCPUs because interrupts can be migrated while we don't hold
- * the IRQ locks and we don't want to be chasing moving targets.
+ * For shared interrupts as well as GICv3 private interrupts accessed from the
+ * non-owning CPU, we have to stop all the VCPUs because interrupts can be
+ * migrated while we don't hold the IRQ locks and we don't want to be chasing
+ * moving targets.
  *
  * For GICv2 private interrupts we don't have to do anything because
  * userspace accesses to the VGIC state already require all VCPUs to be
@@ -484,7 +485,8 @@ int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
  */
 static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
 {
-       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+       if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
+            vcpu != kvm_get_running_vcpu()) ||
            intid >= VGIC_NR_PRIVATE_IRQS)
                kvm_arm_halt_guest(vcpu->kvm);
 }
@@ -492,7 +494,8 @@ static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
 /* See vgic_access_active_prepare */
 static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid)
 {
-       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+       if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
+            vcpu != kvm_get_running_vcpu()) ||
            intid >= VGIC_NR_PRIVATE_IRQS)
                kvm_arm_resume_guest(vcpu->kvm);
 }
index 684bdfaad4a9de9b10bbb6e80be54a7c8a994318..469d816f356f3fe94b021f5e2fd14ea91310f114 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/irqchip/arm-gic-v3.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/kstrtox.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <kvm/arm_vgic.h>
@@ -584,25 +585,25 @@ DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
 
 static int __init early_group0_trap_cfg(char *buf)
 {
-       return strtobool(buf, &group0_trap);
+       return kstrtobool(buf, &group0_trap);
 }
 early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg);
 
 static int __init early_group1_trap_cfg(char *buf)
 {
-       return strtobool(buf, &group1_trap);
+       return kstrtobool(buf, &group1_trap);
 }
 early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
 
 static int __init early_common_trap_cfg(char *buf)
 {
-       return strtobool(buf, &common_trap);
+       return kstrtobool(buf, &common_trap);
 }
 early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
 
 static int __init early_gicv4_enable(char *buf)
 {
-       return strtobool(buf, &gicv4_enable);
+       return kstrtobool(buf, &gicv4_enable);
 }
 early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
 
index dfeb2c51e2573dc07d58be19e58cb89d568051ab..82c7e579a8bacab95f23eaf1a65461c76a966883 100644 (file)
@@ -31,6 +31,7 @@ HAS_GENERIC_AUTH_IMP_DEF
 HAS_IRQ_PRIO_MASKING
 HAS_LDAPR
 HAS_LSE_ATOMICS
+HAS_NESTED_VIRT
 HAS_NO_FPSIMD
 HAS_NO_HW_PREFETCH
 HAS_PAN
@@ -50,6 +51,7 @@ MTE
 MTE_ASYMM
 SME
 SME_FA64
+SME2
 SPECTRE_V2
 SPECTRE_V3A
 SPECTRE_V4
index c350164a395502484aff2c029aa4ff03e778bd23..e1df4b9565962b296ee18d077c67172b5f26fb1c 100755 (executable)
@@ -98,6 +98,7 @@ END {
 
        res0 = "UL(0)"
        res1 = "UL(0)"
+       unkn = "UL(0)"
 
        next_bit = 63
 
@@ -112,11 +113,13 @@ END {
 
        define(reg "_RES0", "(" res0 ")")
        define(reg "_RES1", "(" res1 ")")
+       define(reg "_UNKN", "(" unkn ")")
        print ""
 
        reg = null
        res0 = null
        res1 = null
+       unkn = null
 
        next
 }
@@ -134,6 +137,7 @@ END {
 
        res0 = "UL(0)"
        res1 = "UL(0)"
+       unkn = "UL(0)"
 
        define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2)
        define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")")
@@ -161,7 +165,9 @@ END {
                define(reg "_RES0", "(" res0 ")")
        if (res1 != null)
                define(reg "_RES1", "(" res1 ")")
-       if (res0 != null || res1 != null)
+       if (unkn != null)
+               define(reg "_UNKN", "(" unkn ")")
+       if (res0 != null || res1 != null || unkn != null)
                print ""
 
        reg = null
@@ -172,6 +178,7 @@ END {
        op2 = null
        res0 = null
        res1 = null
+       unkn = null
 
        next
 }
@@ -190,6 +197,7 @@ END {
         next_bit = 0
        res0 = null
        res1 = null
+       unkn = null
 
        next
 }
@@ -215,6 +223,16 @@ END {
        next
 }
 
+/^Unkn/ && (block == "Sysreg" || block == "SysregFields") {
+       expect_fields(2)
+       parse_bitdef(reg, "UNKN", $2)
+       field = "UNKN_" msb "_" lsb
+
+       unkn = unkn " | GENMASK_ULL(" msb ", " lsb ")"
+
+       next
+}
+
 /^Field/ && (block == "Sysreg" || block == "SysregFields") {
        expect_fields(3)
        field = $3
index 184e58fd5631a9bcdc84ba2c05479fbcd300c897..330569fb2336262bafb51e0bc3426dbe61a101ce 100644 (file)
@@ -15,6 +15,8 @@
 
 # Res1 <msb>[:<lsb>]
 
+# Unkn <msb>[:<lsb>]
+
 # Field        <msb>[:<lsb>]   <name>
 
 # Enum <msb>[:<lsb>]   <name>
@@ -894,6 +896,7 @@ EndEnum
 Enum   27:24   SME
        0b0000  NI
        0b0001  IMP
+       0b0010  SME2
 EndEnum
 Res0   23:20
 Enum   19:16   MPAM_frac
@@ -975,7 +978,9 @@ Enum        63      FA64
 EndEnum
 Res0   62:60
 Enum   59:56   SMEver
-       0b0000  IMP
+       0b0000  SME
+       0b0001  SME2
+       0b0010  SME2p1
 EndEnum
 Enum   55:52   I16I64
        0b0000  NI
@@ -986,7 +991,19 @@ Enum       48      F64F64
        0b0     NI
        0b1     IMP
 EndEnum
-Res0   47:40
+Enum   47:44   I16I32
+       0b0000  NI
+       0b0101  IMP
+EndEnum
+Enum   43      B16B16
+       0b0     NI
+       0b1     IMP
+EndEnum
+Enum   42      F16F16
+       0b0     NI
+       0b1     IMP
+EndEnum
+Res0   41:40
 Enum   39:36   I8I32
        0b0000  NI
        0b1111  IMP
@@ -999,7 +1016,10 @@ Enum      34      B16F32
        0b0     NI
        0b1     IMP
 EndEnum
-Res0   33
+Enum   33      BI32I32
+       0b0     NI
+       0b1     IMP
+EndEnum
 Enum   32      F32F32
        0b0     NI
        0b1     IMP
@@ -1599,7 +1619,8 @@ EndSysreg
 SysregFields   SMCR_ELx
 Res0   63:32
 Field  31      FA64
-Res0   30:9
+Field  30      EZT0
+Res0   29:9
 Raz    8:4
 Field  3:0     LEN
 EndSysregFields
@@ -1635,6 +1656,16 @@ Sysreg   SCXTNUM_EL1     3       0       13      0       7
 Field  63:0    SoftwareContextNumber
 EndSysreg
 
+# The bit layout for CCSIDR_EL1 depends on whether FEAT_CCIDX is implemented.
+# The following is for case when FEAT_CCIDX is not implemented.
+Sysreg CCSIDR_EL1      3       1       0       0       0
+Res0   63:32
+Unkn   31:28
+Field  27:13   NumSets
+Field  12:3    Associativity
+Field  2:0     LineSize
+EndSysreg
+
 Sysreg CLIDR_EL1       3       1       0       0       1
 Res0   63:47
 Field  46:33   Ttypen
@@ -1651,6 +1682,11 @@ Field    5:3     Ctype2
 Field  2:0     Ctype1
 EndSysreg
 
+Sysreg CCSIDR2_EL1     3       1       0       0       2
+Res0   63:24
+Field  23:0    NumSets
+EndSysreg
+
 Sysreg GMID_EL1        3       1       0       0       4
 Res0   63:4
 Field  3:0     BS
index 792a6037047adf27cb6d13c9be5a89e63ad8b24a..808c292ad3f4935b50864d1287fd7a7941432c55 100644 (file)
 #define INVALID_PAGE (~(hpa_t)0)
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
-#define INVALID_GPA (~(gpa_t)0)
-
 /* KVM Hugepage definitions for x86 */
 #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
 #define KVM_NR_PAGE_SIZES      (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
index ae3437f03e6c2b7983fd66f8bce77b761b8bfd06..76ee7c5e7b7e033e12ee02ca2c1abbb7e48d5962 100644 (file)
                                 FIELD_PREP(AIC_EVENT_NUM, x))
 #define AIC_HWIRQ_IRQ(x)       FIELD_GET(AIC_EVENT_NUM, x)
 #define AIC_HWIRQ_DIE(x)       FIELD_GET(AIC_EVENT_DIE, x)
-#define AIC_NR_FIQ             6
 #define AIC_NR_SWIPI           32
 
 /*
  * running at EL2 (with VHE). When the kernel is running at EL1, the
  * mapping differs and aic_irq_domain_translate() performs the remapping.
  */
-
-#define AIC_TMR_EL0_PHYS       AIC_TMR_HV_PHYS
-#define AIC_TMR_EL0_VIRT       AIC_TMR_HV_VIRT
-#define AIC_TMR_EL02_PHYS      AIC_TMR_GUEST_PHYS
-#define AIC_TMR_EL02_VIRT      AIC_TMR_GUEST_VIRT
+enum fiq_hwirq {
+       /* Must be ordered as in apple-aic.h */
+       AIC_TMR_EL0_PHYS        = AIC_TMR_HV_PHYS,
+       AIC_TMR_EL0_VIRT        = AIC_TMR_HV_VIRT,
+       AIC_TMR_EL02_PHYS       = AIC_TMR_GUEST_PHYS,
+       AIC_TMR_EL02_VIRT       = AIC_TMR_GUEST_VIRT,
+       AIC_CPU_PMU_Effi        = AIC_CPU_PMU_E,
+       AIC_CPU_PMU_Perf        = AIC_CPU_PMU_P,
+       /* No need for this to be discovered from DT */
+       AIC_VGIC_MI,
+       AIC_NR_FIQ
+};
 
 static DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
 
@@ -384,14 +390,20 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
 
        /*
         * vGIC maintenance interrupts end up here too, so we need to check
-        * for them separately. This should never trigger if KVM is working
-        * properly, because it will have already taken care of clearing it
-        * on guest exit before this handler runs.
+        * for them separately. It should however only trigger when NV is
+        * in use, and be cleared when coming back from the handler.
         */
-       if (is_kernel_in_hyp_mode() && (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&
-               read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
-               pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");
-               sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
+       if (is_kernel_in_hyp_mode() &&
+           (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&
+           read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
+               generic_handle_domain_irq(aic_irqc->hw_domain,
+                                         AIC_FIQ_HWIRQ(AIC_VGIC_MI));
+
+               if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&
+                            read_sysreg_s(SYS_ICH_MISR_EL2))) {
+                       pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");
+                       sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
+               }
        }
 }
 
@@ -1178,6 +1190,21 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
                          "irqchip/apple-aic/ipi:starting",
                          aic_init_cpu, NULL);
 
+       if (is_kernel_in_hyp_mode()) {
+               struct irq_fwspec mi = {
+                       .fwnode         = of_node_to_fwnode(node),
+                       .param_count    = 3,
+                       .param          = {
+                               [0]     = AIC_FIQ, /* This is a lie */
+                               [1]     = AIC_VGIC_MI,
+                               [2]     = IRQ_TYPE_LEVEL_HIGH,
+                       },
+               };
+
+               vgic_info.maint_irq = irq_create_fwspec_mapping(&mi);
+               WARN_ON(!vgic_info.maint_irq);
+       }
+
        vgic_set_kvm_info(&vgic_info);
 
        pr_info("Initialized with %d/%d IRQs * %d/%d die(s), %d FIQs, %d vIPIs",
index 76de36e56cdfe828e17b46f88a6d60f14a7f0bc2..2728d49bbdf6d34cf6cf94dc9adebf2480ddbe22 100644 (file)
@@ -40,7 +40,7 @@ typedef unsigned long  gva_t;
 typedef u64            gpa_t;
 typedef u64            gfn_t;
 
-#define GPA_INVALID    (~(gpa_t)0)
+#define INVALID_GPA    (~(gpa_t)0)
 
 typedef unsigned long  hva_t;
 typedef u64            hpa_t;
index 4c6a8fa5e7ed624a61412160b7e996bbe615b3e3..68de6f4c4eee35661ae45775238cfcdc1232c322 100644 (file)
@@ -434,6 +434,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PAC_ENABLED_KEYS        0x40a   /* arm64 ptr auth enabled keys (prctl()) */
 #define NT_ARM_SSVE    0x40b           /* ARM Streaming SVE registers */
 #define NT_ARM_ZA      0x40c           /* ARM SME ZA registers */
+#define NT_ARM_ZT      0x40d           /* ARM SME ZT registers */
 #define NT_ARC_V2      0x600           /* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD    0x700           /* Vmcore Device Dump Note */
 #define NT_MIPS_DSP    0x800           /* MIPS DSP ASE registers */
index 9f255bc5f31cbe8f7fce7a5a774af5844fed1e4d..93333a90bf3a7ffbd6f9e6d9bc7dfb0c9464ca66 100644 (file)
@@ -50,6 +50,78 @@ static void sme_sigill(void)
        asm volatile(".inst 0x04bf5800" : : : "x0");
 }
 
+static void sme2_sigill(void)
+{
+       /* SMSTART ZA */
+       asm volatile("msr S0_3_C4_C5_3, xzr" : : : );
+
+       /* ZERO ZT0 */
+       asm volatile(".inst 0xc0480001" : : : );
+
+       /* SMSTOP */
+       asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme2p1_sigill(void)
+{
+       /* SMSTART SM */
+       asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+       /* BFCLAMP { Z0.H - Z1.H }, Z0.H, Z0.H */
+       asm volatile(".inst 0xc120C000" : : : );
+
+       /* SMSTOP */
+       asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smei16i32_sigill(void)
+{
+       /* SMSTART */
+       asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+       /* SMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
+       asm volatile(".inst 0xa0800000" : : : );
+
+       /* SMSTOP */
+       asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smebi32i32_sigill(void)
+{
+       /* SMSTART */
+       asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+       /* BMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
+       asm volatile(".inst 0x80800008" : : : );
+
+       /* SMSTOP */
+       asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smeb16b16_sigill(void)
+{
+       /* SMSTART */
+       asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+       /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+       asm volatile(".inst 0xC1E41C00" : : : );
+
+       /* SMSTOP */
+       asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef16f16_sigill(void)
+{
+       /* SMSTART */
+       asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+       /* FADD ZA.H[W0, 0], { Z0.H-Z1.H } */
+       asm volatile(".inst 0xc1a41C00" : : : );
+
+       /* SMSTOP */
+       asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
 static void sve_sigill(void)
 {
        /* RDVL x0, #0 */
@@ -158,6 +230,49 @@ static const struct hwcap_data {
                .sigill_fn = sme_sigill,
                .sigill_reliable = true,
        },
+       {
+               .name = "SME2",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SME2,
+               .cpuinfo = "sme2",
+               .sigill_fn = sme2_sigill,
+               .sigill_reliable = true,
+       },
+       {
+               .name = "SME 2.1",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SME2P1,
+               .cpuinfo = "sme2p1",
+               .sigill_fn = sme2p1_sigill,
+       },
+       {
+               .name = "SME I16I32",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SME_I16I32,
+               .cpuinfo = "smei16i32",
+               .sigill_fn = smei16i32_sigill,
+       },
+       {
+               .name = "SME BI32I32",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SME_BI32I32,
+               .cpuinfo = "smebi32i32",
+               .sigill_fn = smebi32i32_sigill,
+       },
+       {
+               .name = "SME B16B16",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SME_B16B16,
+               .cpuinfo = "smeb16b16",
+               .sigill_fn = smeb16b16_sigill,
+       },
+       {
+               .name = "SME F16F16",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_SME_F16F16,
+               .cpuinfo = "smef16f16",
+               .sigill_fn = smef16f16_sigill,
+       },
        {
                .name = "SVE",
                .at_hwcap = AT_HWCAP,
index acd5e9f3bc0b424b4dddbfb5de88913d2f49056b..6ddf392329c9e7e40776b285b9313f533879b622 100644 (file)
@@ -23,6 +23,9 @@
 
 .arch_extension sve
 
+#define ID_AA64SMFR0_EL1_SMEver_SHIFT           56
+#define ID_AA64SMFR0_EL1_SMEver_WIDTH           4
+
 /*
  * LDR (vector to ZA array):
  *     LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
                | ((\offset) & 7)
 .endm
 
+/*
+ * LDR (ZT0)
+ *
+ *     LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+       .inst   0xe11f8000                      \
+               | (((\nx) & 0x1f) << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ *     STR ZT0, nx
+ */
+.macro _str_zt nx
+       .inst   0xe13f8000                      \
+               | (((\nx) & 0x1f) << 5)
+.endm
+
 .globl do_syscall
 do_syscall:
        // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
@@ -64,7 +87,7 @@ do_syscall:
        msr     S3_3_C4_C2_2, x2
 1:
 
-       // Load ZA if it's enabled - uses x12 as scratch due to SME LDR
+       // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR
        tbz     x2, #SVCR_ZA_SHIFT, 1f
        mov     w12, #0
        ldr     x2, =za_in
@@ -73,6 +96,15 @@ do_syscall:
        add     x12, x12, #1
        cmp     x1, x12
        bne     2b
+
+       // ZT0
+       mrs     x2, S3_0_C0_C4_5        // ID_AA64SMFR0_EL1
+       ubfx    x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
+                        #ID_AA64SMFR0_EL1_SMEver_WIDTH
+       cbz     x2, 1f
+       adrp    x2, zt_in
+       add     x2, x2, :lo12:zt_in
+       _ldr_zt 2
 1:
 
        // Load GPRs x8-x28, and save our SP/FP for later comparison
@@ -235,6 +267,15 @@ do_syscall:
        add     x12, x12, #1
        cmp     x1, x12
        bne     2b
+
+       // ZT0
+       mrs     x2, S3_0_C0_C4_5        // ID_AA64SMFR0_EL1
+       ubfx    x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
+                       #ID_AA64SMFR0_EL1_SMEver_WIDTH
+       cbz     x2, 1f
+       adrp    x2, zt_out
+       add     x2, x2, :lo12:zt_out
+       _str_zt 2
 1:
 
        // Save the SVE state if we have some
index dd7ebe536d05faec4cc7053ee10c9e39bf31e094..9800f9dc6b35faceb6bc785b0c5e5beeb612efac 100644 (file)
@@ -311,6 +311,35 @@ static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
        return errors;
 }
 
+uint8_t zt_in[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
+uint8_t zt_out[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
+
+static void setup_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                    uint64_t svcr)
+{
+       fill_random(zt_in, sizeof(zt_in));
+       memset(zt_out, 0, sizeof(zt_out));
+}
+
+static int check_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                   uint64_t svcr)
+{
+       int errors = 0;
+
+       if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2))
+               return 0;
+
+       if (!(svcr & SVCR_ZA_MASK))
+               return 0;
+
+       if (memcmp(zt_in, zt_out, sizeof(zt_in)) != 0) {
+               ksft_print_msg("SME VL %d ZT does not match\n", sme_vl);
+               errors++;
+       }
+
+       return errors;
+}
+
 typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
                         uint64_t svcr);
 typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
@@ -334,6 +363,7 @@ static struct {
        { setup_ffr, check_ffr },
        { setup_svcr, check_svcr },
        { setup_za, check_za },
+       { setup_zt, check_zt },
 };
 
 static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
@@ -474,6 +504,7 @@ int main(void)
 {
        int i;
        int tests = 1;  /* FPSIMD */
+       int sme_ver;
 
        srandom(getpid());
 
@@ -482,10 +513,15 @@ int main(void)
        tests += (sve_count_vls() * sme_count_vls()) * 3;
        ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
 
+       if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+               sme_ver = 2;
+       else
+               sme_ver = 1;
+
        if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
-               ksft_print_msg("SME with FA64\n");
+               ksft_print_msg("SME%d with FA64\n", sme_ver);
        else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
-               ksft_print_msg("SME without FA64\n");
+               ksft_print_msg("SME%d without FA64\n", sme_ver);
 
        for (i = 0; i < ARRAY_SIZE(syscalls); i++)
                test_one_syscall(&syscalls[i]);
index df79d29664a1bb8e7ed39c553aa4eba7351cffc6..ebc86757bdd82026b0d2ee888831e5625d6c9011 100644 (file)
@@ -12,3 +12,5 @@ vlset
 za-fork
 za-ptrace
 za-test
+zt-ptrace
+zt-test
index 36db61358ed5bc77fa5dc808c36df3b50d348208..50a70220ba6c79a68d96f169c657b892777806a2 100644 (file)
@@ -14,6 +14,8 @@ TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
        sve-test \
        ssve-test \
        za-test \
+       zt-ptrace \
+       zt-test \
        vlset
 TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
 
@@ -41,5 +43,8 @@ $(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
 $(OUTPUT)/za-ptrace: za-ptrace.c
 $(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
        $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/zt-ptrace: zt-ptrace.c
+$(OUTPUT)/zt-test: zt-test.S $(OUTPUT)/asm-utils.o
+       $(CC) -nostdlib $^ -o $@
 
 include ../../lib.mk
index f8b2f41aac36feeea601715112677208cedd9a1a..520385fcfede6b16745a8244cab1eeda66e3306a 100644 (file)
@@ -370,6 +370,19 @@ static void start_za(struct child_data *child, int vl, int cpu)
        ksft_print_msg("Started %s\n", child->name);
 }
 
+static void start_zt(struct child_data *child, int cpu)
+{
+       int ret;
+
+       ret = asprintf(&child->name, "ZT-%d", cpu);
+       if (ret == -1)
+               ksft_exit_fail_msg("asprintf() failed\n");
+
+       child_start(child, "./zt-test");
+
+       ksft_print_msg("Started %s\n", child->name);
+}
+
 static void probe_vls(int vls[], int *vl_count, int set_vl)
 {
        unsigned int vq;
@@ -426,6 +439,7 @@ int main(int argc, char **argv)
        bool all_children_started = false;
        int seen_children;
        int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
+       bool have_sme2;
        struct sigaction sa;
 
        while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
@@ -458,6 +472,13 @@ int main(int argc, char **argv)
                sme_vl_count = 0;
        }
 
+       if (getauxval(AT_HWCAP2) & HWCAP2_SME2) {
+               tests += cpus;
+               have_sme2 = true;
+       } else {
+               have_sme2 = false;
+       }
+
        /* Force context switching if we only have FPSIMD */
        if (!sve_vl_count && !sme_vl_count)
                fpsimd_per_cpu = 2;
@@ -468,8 +489,9 @@ int main(int argc, char **argv)
        ksft_print_header();
        ksft_set_plan(tests);
 
-       ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n",
-                      cpus, sve_vl_count, sme_vl_count);
+       ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs, SME2 %s\n",
+                      cpus, sve_vl_count, sme_vl_count,
+                      have_sme2 ? "present" : "absent");
 
        if (timeout > 0)
                ksft_print_msg("Will run for %ds\n", timeout);
@@ -527,6 +549,9 @@ int main(int argc, char **argv)
                        start_ssve(&children[num_children++], sme_vls[j], i);
                        start_za(&children[num_children++], sme_vls[j], i);
                }
+
+               if (have_sme2)
+                       start_zt(&children[num_children++], i);
        }
 
        /*
index 7191e53ca1c0463b18d23ba1a2648ae4721f09ae..9292bba5400bb81b8e34769fa3eb70811746d8b8 100644 (file)
                | ((\offset) & 7)
 .endm
 
+/*
+ * LDR (ZT0)
+ *
+ *     LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+       .inst   0xe11f8000                      \
+               | (((\nx) & 0x1f) << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ *     STR ZT0, nx
+ */
+.macro _str_zt nx
+       .inst   0xe13f8000                      \
+               | (((\nx) & 0x1f) << 5)
+.endm
+
 #endif
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
new file mode 100644 (file)
index 0000000..996d961
--- /dev/null
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+#ifndef NT_ARM_ZT
+#define NT_ARM_ZT 0x40d
+#endif
+
+#define EXPECTED_TESTS 3
+
+static int sme_vl;
+
+static void fill_buf(char *buf, size_t size)
+{
+       int i;
+
+       for (i = 0; i < size; i++)
+               buf[i] = random();
+}
+
+static int do_child(void)
+{
+       if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+               ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+       if (raise(SIGSTOP))
+               ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+       return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+       struct user_za_header *za;
+       void *p;
+       size_t sz = sizeof(*za);
+       struct iovec iov;
+
+       while (1) {
+               if (*size < sz) {
+                       p = realloc(*buf, sz);
+                       if (!p) {
+                               errno = ENOMEM;
+                               goto error;
+                       }
+
+                       *buf = p;
+                       *size = sz;
+               }
+
+               iov.iov_base = *buf;
+               iov.iov_len = sz;
+               if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+                       goto error;
+
+               za = *buf;
+               if (za->size <= sz)
+                       break;
+
+               sz = za->size;
+       }
+
+       return za;
+
+error:
+       return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+       struct iovec iov;
+
+       iov.iov_base = (void *)za;
+       iov.iov_len = za->size;
+       return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
+{
+       struct iovec iov;
+
+       iov.iov_base = zt;
+       iov.iov_len = ZT_SIG_REG_BYTES;
+       return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
+}
+
+
+static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
+{
+       struct iovec iov;
+
+       iov.iov_base = (void *)zt;
+       iov.iov_len = ZT_SIG_REG_BYTES;
+       return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZT, &iov);
+}
+
+/* Reading with ZA disabled returns all zeros */
+static void ptrace_za_disabled_read_zt(pid_t child)
+{
+       struct user_za_header za;
+       char zt[ZT_SIG_REG_BYTES];
+       int ret, i;
+       bool fail = false;
+
+       /* Disable PSTATE.ZA using the ZA interface */
+       memset(&za, 0, sizeof(za));
+       za.vl = sme_vl;
+       za.size = sizeof(za);
+
+       ret = set_za(child, &za);
+       if (ret != 0) {
+               ksft_print_msg("Failed to disable ZA\n");
+               fail = true;
+       }
+
+       /* Read back ZT */
+       ret = get_zt(child, zt);
+       if (ret != 0) {
+               ksft_print_msg("Failed to read ZT\n");
+               fail = true;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(zt); i++) {
+               if (zt[i]) {
+                       ksft_print_msg("zt[%d]: 0x%x != 0\n", i, zt[i]);
+                       fail = true;
+               }
+       }
+
+       ksft_test_result(!fail, "ptrace_za_disabled_read_zt\n");
+}
+
+/* Writing then reading ZT should return the data written */
+static void ptrace_set_get_zt(pid_t child)
+{
+       char zt_in[ZT_SIG_REG_BYTES];
+       char zt_out[ZT_SIG_REG_BYTES];
+       int ret, i;
+       bool fail = false;
+
+       fill_buf(zt_in, sizeof(zt_in));
+
+       ret = set_zt(child, zt_in);
+       if (ret != 0) {
+               ksft_print_msg("Failed to set ZT\n");
+               fail = true;
+       }
+
+       ret = get_zt(child, zt_out);
+       if (ret != 0) {
+               ksft_print_msg("Failed to read ZT\n");
+               fail = true;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(zt_in); i++) {
+               if (zt_in[i] != zt_out[i]) {
+                       ksft_print_msg("zt[%d]: 0x%x != 0x%x\n", i, 
+                                      zt_in[i], zt_out[i]);
+                       fail = true;
+               }
+       }
+
+       ksft_test_result(!fail, "ptrace_set_get_zt\n");
+}
+
+/* Writing ZT should set PSTATE.ZA */
+static void ptrace_enable_za_via_zt(pid_t child)
+{
+       struct user_za_header za_in;
+       struct user_za_header *za_out;
+       char zt[ZT_SIG_REG_BYTES];
+       char *za_data;
+       size_t za_out_size;
+       int ret, i, vq;
+       bool fail = false;
+
+       /* Disable PSTATE.ZA using the ZA interface */
+       memset(&za_in, 0, sizeof(za_in));
+       za_in.vl = sme_vl;
+       za_in.size = sizeof(za_in);
+
+       ret = set_za(child, &za_in);
+       if (ret != 0) {
+               ksft_print_msg("Failed to disable ZA\n");
+               fail = true;
+       }
+
+       /* Write ZT */
+       fill_buf(zt, sizeof(zt));
+       ret = set_zt(child, zt);
+       if (ret != 0) {
+               ksft_print_msg("Failed to set ZT\n");
+               fail = true;
+       }
+
+       /* Read back ZA and check for register data */
+       za_out = NULL;
+       za_out_size = 0;
+       if (get_za(child, (void **)&za_out, &za_out_size)) {
+               /* Should have an unchanged VL */
+               if (za_out->vl != sme_vl) {
+                       ksft_print_msg("VL changed from %d to %d\n",
+                                      sme_vl, za_out->vl);
+                       fail = true;
+               }
+               vq = __sve_vq_from_vl(za_out->vl);
+               za_data = (char *)za_out + ZA_PT_ZA_OFFSET;
+
+               /* Should have register data */
+               if (za_out->size < ZA_PT_SIZE(vq)) {
+                       ksft_print_msg("ZA data less than expected: %u < %u\n",
+                                      za_out->size, ZA_PT_SIZE(vq));
+                       fail = true;
+                       vq = 0;
+               }
+
+               /* That register data should be non-zero */
+               for (i = 0; i < ZA_PT_ZA_SIZE(vq); i++) {
+                       if (za_data[i]) {
+                               ksft_print_msg("ZA byte %d is %x\n",
+                                              i, za_data[i]);
+                               fail = true;
+                       }
+               }
+       } else {
+               ksft_print_msg("Failed to read ZA\n");
+               fail = true;
+       }
+
+       ksft_test_result(!fail, "ptrace_enable_za_via_zt\n");
+}
+
+static int do_parent(pid_t child)
+{
+       int ret = EXIT_FAILURE;
+       pid_t pid;
+       int status;
+       siginfo_t si;
+
+       /* Attach to the child */
+       while (1) {
+               int sig;
+
+               pid = wait(&status);
+               if (pid == -1) {
+                       perror("wait");
+                       goto error;
+               }
+
+               /*
+                * This should never happen but it's hard to flag in
+                * the framework.
+                */
+               if (pid != child)
+                       continue;
+
+               if (WIFEXITED(status) || WIFSIGNALED(status))
+                       ksft_exit_fail_msg("Child died unexpectedly\n");
+
+               if (!WIFSTOPPED(status))
+                       goto error;
+
+               sig = WSTOPSIG(status);
+
+               if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+                       if (errno == ESRCH)
+                               goto disappeared;
+
+                       if (errno == EINVAL) {
+                               sig = 0; /* bust group-stop */
+                               goto cont;
+                       }
+
+                       ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+                                             strerror(errno));
+                       goto error;
+               }
+
+               if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+                   si.si_pid == pid)
+                       break;
+
+       cont:
+               if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+                       if (errno == ESRCH)
+                               goto disappeared;
+
+                       ksft_test_result_fail("PTRACE_CONT: %s\n",
+                                             strerror(errno));
+                       goto error;
+               }
+       }
+
+       ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+       ptrace_za_disabled_read_zt(child);
+       ptrace_set_get_zt(child);
+       ptrace_enable_za_via_zt(child);
+
+       ret = EXIT_SUCCESS;
+
+error:
+       kill(child, SIGKILL);
+
+disappeared:
+       return ret;
+}
+
+int main(void)
+{
+       int ret = EXIT_SUCCESS;
+       pid_t child;
+
+       srandom(getpid());
+
+       ksft_print_header();
+
+       if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) {
+               ksft_set_plan(1);
+               ksft_exit_skip("SME2 not available\n");
+       }
+
+       /* We need a valid SME VL to enable/disable ZA */
+       sme_vl = prctl(PR_SME_GET_VL);
+       if (sme_vl == -1) {
+               ksft_set_plan(1);
+               ksft_exit_skip("Failed to read SME VL: %d (%s)\n",
+                              errno, strerror(errno));
+       }
+
+       ksft_set_plan(EXPECTED_TESTS);
+
+       child = fork();
+       if (!child)
+               return do_child();
+
+       if (do_parent(child))
+               ret = EXIT_FAILURE;
+
+       ksft_print_cnts();
+
+       return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
new file mode 100644 (file)
index 0000000..d632863
--- /dev/null
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZT context switch test
+// Repeatedly writes unique test patterns into ZT0
+// and reads them back to verify integrity.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define ZT_SZ  512
+#define ZT_B   (ZT_SZ / 8)
+
+// Declare some storage space to shadow ZT register contents and a
+// scratch buffer.
+.pushsection .text
+.data
+.align 4
+ztref:
+       .space  ZT_B
+scratch:
+       .space  ZT_B
+.popsection
+
+
+// Generate a test pattern for storage in ZT
+// x0: pid
+// x1: generation
+
+// These values are used to construct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:24  generation number (increments once per test_loop)
+// bits 23: 8  pid
+// bits  7: 0  32-bit lane index
+
+function pattern
+       mov     w3, wzr
+       bfi     w3, w0, #8, #16         // PID
+       bfi     w3, w1, #24, #8         // Generation
+
+       ldr     x0, =scratch
+       mov     w1, #ZT_B / 4
+
+0:     str     w3, [x0], #4
+       add     w3, w3, #1              // Lane
+       subs    w1, w1, #1
+       b.ne    0b
+
+       ret
+endfunction
+
+// Set up test pattern in a ZT horizontal vector
+// x0: pid
+// x1: generation
+function setup_zt
+       mov     x4, x30
+
+       bl      pattern                 // Get pattern in scratch buffer
+       ldr     x0, =ztref
+       ldr     x1, =scratch
+       mov     x2, #ZT_B
+       bl      memcpy
+
+       ldr     x0, =ztref
+       _ldr_zt 0                       // load zt0 from pointer x0
+
+       ret     x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+       cbz     x2, 2f
+
+       stp     x0, x1, [sp, #-0x20]!
+       str     x2, [sp, #0x10]
+
+       mov     x5, #0
+0:     ldrb    w3, [x0, x5]
+       ldrb    w4, [x1, x5]
+       add     x5, x5, #1
+       cmp     w3, w4
+       b.ne    1f
+       subs    x2, x2, #1
+       b.ne    0b
+
+1:     ldr     x2, [sp, #0x10]
+       ldp     x0, x1, [sp], #0x20
+       b.ne    barf
+
+2:     ret
+endfunction
+
+// Verify that a ZT vector matches its shadow in memory, else abort
+// Clobbers x0-x3
+function check_zt
+       mov     x3, x30
+
+       ldr     x0, =scratch            // Poison scratch
+       mov     x1, #ZT_B
+       bl      memfill_ae
+
+       ldr     x0, =scratch
+       _str_zt 0
+
+       ldr     x0, =ztref
+       ldr     x1, =scratch
+       mov     x2, #ZT_B
+       mov     x30, x3
+       b       memcmp
+endfunction
+
+// Any SME register modified here can cause corruption in the main
+// thread -- but *only* the locations modified here.
+function irritator_handler
+       // Increment the irritation signal count (x23):
+       ldr     x0, [x2, #ucontext_regs + 8 * 23]
+       add     x0, x0, #1
+       str     x0, [x2, #ucontext_regs + 8 * 23]
+
+       // Corrupt some random ZT data
+#if 0
+       adr     x0, .text + (irritator_handler - .text) / 16 * 16
+       movi    v0.8b, #1
+       movi    v9.16b, #2
+       movi    v31.8b, #3
+#endif
+
+       ret
+endfunction
+
+function tickle_handler
+       // Increment the signal count (x23):
+       ldr     x0, [x2, #ucontext_regs + 8 * 23]
+       add     x0, x0, #1
+       str     x0, [x2, #ucontext_regs + 8 * 23]
+
+       ret
+endfunction
+
+function terminate_handler
+       mov     w21, w0
+       mov     x20, x2
+
+       puts    "Terminated by signal "
+       mov     w0, w21
+       bl      putdec
+       puts    ", no error, iterations="
+       ldr     x0, [x20, #ucontext_regs + 8 * 22]
+       bl      putdec
+       puts    ", signals="
+       ldr     x0, [x20, #ucontext_regs + 8 * 23]
+       bl      putdecn
+
+       mov     x0, #0
+       mov     x8, #__NR_exit
+       svc     #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+       str     x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+       mov     w4, w0
+       mov     x5, x1
+       mov     w6, w2
+
+       add     x0, sp, #16
+       mov     x1, #sa_sz
+       bl      memclr
+
+       mov     w0, w4
+       add     x1, sp, #16
+       str     w6, [x1, #sa_flags]
+       str     x5, [x1, #sa_handler]
+       mov     x2, #0
+       mov     x3, #sa_mask_sz
+       mov     x8, #__NR_rt_sigaction
+       svc     #0
+
+       cbz     w0, 1f
+
+       puts    "sigaction failure\n"
+       b       .Labort
+
+1:     ldr     x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+       ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+       mov     x23, #0         // signal count
+
+       mov     w0, #SIGINT
+       adr     x1, terminate_handler
+       mov     w2, #SA_SIGINFO
+       bl      setsignal
+
+       mov     w0, #SIGTERM
+       adr     x1, terminate_handler
+       mov     w2, #SA_SIGINFO
+       bl      setsignal
+
+       mov     w0, #SIGUSR1
+       adr     x1, irritator_handler
+       mov     w2, #SA_SIGINFO
+       orr     w2, w2, #SA_NODEFER
+       bl      setsignal
+
+       mov     w0, #SIGUSR2
+       adr     x1, tickle_handler
+       mov     w2, #SA_SIGINFO
+       orr     w2, w2, #SA_NODEFER
+       bl      setsignal
+
+       smstart_za
+
+       // Obtain our PID, to ensure test pattern uniqueness between processes
+       mov     x8, #__NR_getpid
+       svc     #0
+       mov     x20, x0
+
+       puts    "PID:\t"
+       mov     x0, x20
+       bl      putdecn
+
+       mov     x22, #0         // generation number, increments per iteration
+.Ltest_loop:
+       mov     x0, x20
+       mov     x1, x22
+       bl      setup_zt
+
+       mov     x8, #__NR_sched_yield   // Encourage preemption
+       svc     #0
+
+       mrs     x0, S3_3_C4_C2_2        // SVCR should have ZA=1,SM=0
+       and     x1, x0, #3
+       cmp     x1, #2
+       b.ne    svcr_barf
+
+       bl      check_zt
+
+       add     x22, x22, #1    // Everything still working
+       b       .Ltest_loop
+
+.Labort:
+       mov     x0, #0
+       mov     x1, #SIGABRT
+       mov     x8, #__NR_kill
+       svc     #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+//     ldr     w0, =0xdeadc0de
+//     mov     w8, #__NR_exit
+//     svc     #0
+// end hack
+       smstop
+       mov     x10, x0 // expected data
+       mov     x11, x1 // actual data
+       mov     x12, x2 // data size
+
+       puts    "Mismatch: PID="
+       mov     x0, x20
+       bl      putdec
+       puts    ", iteration="
+       mov     x0, x22
+       bl      putdec
+       puts    "\tExpected ["
+       mov     x0, x10
+       mov     x1, x12
+       bl      dumphex
+       puts    "]\n\tGot      ["
+       mov     x0, x11
+       mov     x1, x12
+       bl      dumphex
+       puts    "]\n"
+
+       mov     x8, #__NR_getpid
+       svc     #0
+// fpsimd.c acitivty log dump hack
+//     ldr     w0, =0xdeadc0de
+//     mov     w8, #__NR_exit
+//     svc     #0
+// ^ end of hack
+       mov     x1, #SIGABRT
+       mov     x8, #__NR_kill
+       svc     #0
+//     mov     x8, #__NR_exit
+//     mov     x1, #1
+//     svc     #0
+endfunction
+
+function svcr_barf
+       mov     x10, x0
+
+       puts    "Bad SVCR: "
+       mov     x0, x10
+       bl      putdecn
+
+       mov     x8, #__NR_exit
+       mov     x1, #1
+       svc     #0
+endfunction
index e8d2b57f73ec146e430ef3f1f480f7c24b11ad67..b7fbb65183e85e33cd23c91c19dd750dbc07a48b 100644 (file)
@@ -5,4 +5,5 @@ sme_*
 ssve_*
 sve_*
 za_*
+zt_*
 !*.[ch]
index 0c645834ddc302fd461c2e3e72f0e9c5d936bb6c..1e6273d815759d9c10978ae9af6c9285068b6bde 100644 (file)
@@ -34,6 +34,7 @@ enum {
        FSVE_BIT,
        FSME_BIT,
        FSME_FA64_BIT,
+       FSME2_BIT,
        FMAX_END
 };
 
@@ -41,6 +42,7 @@ enum {
 #define FEAT_SVE               (1UL << FSVE_BIT)
 #define FEAT_SME               (1UL << FSME_BIT)
 #define FEAT_SME_FA64          (1UL << FSME_FA64_BIT)
+#define FEAT_SME2              (1UL << FSME2_BIT)
 
 /*
  * A descriptor used to describe and configure a test case.
index 308e229e58abb00b469ad867c757228c87ce819f..07f518f0e58d347e72599cbc611c27bab6eb3475 100644 (file)
@@ -29,6 +29,7 @@ static char const *const feats_names[FMAX_END] = {
        " SVE ",
        " SME ",
        " FA64 ",
+       " SME2 ",
 };
 
 #define MAX_FEATS_SZ   128
@@ -323,6 +324,8 @@ int test_init(struct tdescr *td)
                        td->feats_supported |= FEAT_SME;
                if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
                        td->feats_supported |= FEAT_SME_FA64;
+               if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+                       td->feats_supported |= FEAT_SME2;
                if (feats_ok(td)) {
                        if (td->feats_required & td->feats_supported)
                                fprintf(stderr,
index d2eda7b5de26cdb74d1cf2ad6623d02534d9f057..27d495fa52f8e0645abdee606eaa0fc52b5acec6 100644 (file)
@@ -108,6 +108,26 @@ bool validate_za_context(struct za_context *za, char **err)
        return true;
 }
 
+bool validate_zt_context(struct zt_context *zt, char **err)
+{
+       if (!zt || !err)
+               return false;
+
+       /* If the context is present there should be at least one register */
+       if (zt->nregs == 0) {
+               *err = "no registers";
+               return false;
+       }
+
+       /* Size should agree with the number of registers */
+       if (zt->head.size != ZT_SIG_CONTEXT_SIZE(zt->nregs)) {
+               *err = "register count does not match size";
+               return false;
+       }
+
+       return true;
+}
+
 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 {
        bool terminated = false;
@@ -117,6 +137,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
        struct extra_context *extra = NULL;
        struct sve_context *sve = NULL;
        struct za_context *za = NULL;
+       struct zt_context *zt = NULL;
        struct _aarch64_ctx *head =
                (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
        void *extra_data = NULL;
@@ -177,6 +198,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
                        za = (struct za_context *)head;
                        new_flags |= ZA_CTX;
                        break;
+               case ZT_MAGIC:
+                       if (flags & ZT_CTX)
+                               *err = "Multiple ZT_MAGIC";
+                       /* Size is validated in validate_za_context() */
+                       zt = (struct zt_context *)head;
+                       new_flags |= ZT_CTX;
+                       break;
                case EXTRA_MAGIC:
                        if (flags & EXTRA_CTX)
                                *err = "Multiple EXTRA_MAGIC";
@@ -234,6 +262,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
                if (new_flags & ZA_CTX)
                        if (!validate_za_context(za, err))
                                return false;
+               if (new_flags & ZT_CTX)
+                       if (!validate_zt_context(zt, err))
+                               return false;
 
                flags |= new_flags;
 
@@ -245,6 +276,11 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
                return false;
        }
 
+       if (terminated && (flags & ZT_CTX) && !(flags & ZA_CTX)) {
+               *err = "ZT context but no ZA context";
+               return false;
+       }
+
        return true;
 }
 
index 040afded0b76eaf8a22e2fd665a74ae4be6dc030..a08ab0d6207a3ef7b49402c8a0faad856f484b53 100644 (file)
@@ -18,6 +18,7 @@
 #define SVE_CTX                (1 << 1)
 #define ZA_CTX         (1 << 2)
 #define EXTRA_CTX      (1 << 3)
+#define ZT_CTX         (1 << 4)
 
 #define KSFT_BAD_MAGIC 0xdeadbeef
 
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c
new file mode 100644 (file)
index 0000000..34f69bc
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+       ucontext_t uc;
+       char buf[1024 * 128];
+} context;
+
+int zt_no_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       size_t offset;
+       struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+
+       /*
+        * Get a signal context which should not have a ZT frame and
+        * registers in it.
+        */
+       if (!get_current_context(td, &context.uc, sizeof(context)))
+               return 1;
+
+       head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+       if (head) {
+               fprintf(stderr, "Got unexpected ZT context\n");
+               return 1;
+       }
+
+       td->pass = 1;
+
+       return 0;
+}
+
+struct tdescr tde = {
+       .name = "ZT register data not present",
+       .descr = "Validate that ZT is not present when ZA is disabled",
+       .feats_required = FEAT_SME2,
+       .timeout = 3,
+       .sanity_disabled = true,
+       .run = zt_no_regs_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
new file mode 100644 (file)
index 0000000..e1eb4d5
--- /dev/null
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+       ucontext_t uc;
+       char buf[1024 * 128];
+} context;
+
+static void enable_za(void)
+{
+       /* smstart za; real data is TODO */
+       asm volatile(".inst 0xd503457f" : : : );
+}
+
+int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       size_t offset;
+       struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+       struct zt_context *zt;
+       char *zeros;
+
+       /*
+        * Get a signal context which should have a ZT frame and registers
+        * in it.
+        */
+       enable_za();
+       if (!get_current_context(td, &context.uc, sizeof(context)))
+               return 1;
+
+       head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+       if (!head) {
+               fprintf(stderr, "No ZT context\n");
+               return 1;
+       }
+
+       zt = (struct zt_context *)head;
+       if (zt->nregs == 0) {
+               fprintf(stderr, "Got context with no registers\n");
+               return 1;
+       }
+
+       fprintf(stderr, "Got expected size %u for %d registers\n",
+               head->size, zt->nregs);
+
+       /* We didn't load any data into ZT so it should be all zeros */
+       zeros = malloc(ZT_SIG_REGS_SIZE(zt->nregs));
+       if (!zeros) {
+               fprintf(stderr, "Out of memory, nregs=%u\n", zt->nregs);
+               return 1;
+       }
+       memset(zeros, 0, ZT_SIG_REGS_SIZE(zt->nregs));
+
+       if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
+                  ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
+               fprintf(stderr, "ZT data invalid\n");
+               return 1;
+       }
+
+       free(zeros);
+
+       td->pass = 1;
+
+       return 0;
+}
+
+struct tdescr tde = {
+       .name = "ZT register data",
+       .descr = "Validate that ZT is present and has data when ZA is enabled",
+       .feats_required = FEAT_SME2,
+       .timeout = 3,
+       .sanity_disabled = true,
+       .run = zt_regs_run,
+};
index 54680dc5887fae044306040c127e5c9ff099ea7b..df10f1ffa20d9b69fd9445f847669a3f1f101840 100644 (file)
@@ -1112,8 +1112,6 @@ int main(int argc, char *argv[])
        enum vm_mem_backing_src_type src_type;
        int opt;
 
-       setbuf(stdout, NULL);
-
        src_type = DEFAULT_VM_MEM_SRC;
 
        while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
index 63ed533f73d6e82419b281b512cf32cbec100eb2..d011b38e259eafbb9781f915b24de49ab0803c30 100644 (file)
@@ -1,3 +1,4 @@
 CONFIG_KVM=y
 CONFIG_KVM_INTEL=y
 CONFIG_KVM_AMD=y
+CONFIG_USERFAULTFD=y
index 37c61f712fd5cef5e0f0d000906419465381f11f..e334844d6e1d795473aba76a31133d8e73b35870 100644 (file)
@@ -26,9 +26,6 @@ int main(int argc, char *argv[])
        struct kvm_vcpu *vcpu;
        struct kvm_vm *vm;
 
-       /* Tell stdout not to buffer its content */
-       setbuf(stdout, NULL);
-
        TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
 
        vm = vm_create_with_one_vcpu(&vcpu, guest_code);