Merge tag 'kvm-4.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 16 Nov 2017 21:00:24 +0000 (13:00 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 16 Nov 2017 21:00:24 +0000 (13:00 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Nov 2017 21:00:24 +0000 (13:00 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Nov 2017 21:00:24 +0000 (13:00 -0800)
diff --combined arch/arm/include/uapi/asm/kvm.h

index 1f57bbe82b6fb8582c2a3a1617345266c22e33e8,b56895593c84007386a54ab9a2fbbba478d717c7..6edd177bb1c7c66e0ec32caf7ec8d2c3680ed2f3
--- 1/arch/arm/include/uapi/asm/kvm.h
--- 2/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   /*
    * Copyright (C) 2012 - Virtual Open Systems and Columbia University
    * Author: Christoffer Dall <c.dall@virtualopensystems.com>
@@@ -152,6 -151,12 +152,12 @@@ struct kvm_arch_memory_slot 
         (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
   #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
   
+ /* PL1 Physical Timer Registers */
+ #define KVM_REG_ARM_PTIMER_CTL                ARM_CP15_REG32(0, 14, 2, 1)
+ #define KVM_REG_ARM_PTIMER_CNT                ARM_CP15_REG64(0, 14)
+ #define KVM_REG_ARM_PTIMER_CVAL               ARM_CP15_REG64(2, 14)
+ 
+ /* Virtual Timer Registers */
   #define KVM_REG_ARM_TIMER_CTL         ARM_CP15_REG32(0, 14, 3, 1)
   #define KVM_REG_ARM_TIMER_CNT         ARM_CP15_REG64(1, 14)
   #define KVM_REG_ARM_TIMER_CVAL                ARM_CP15_REG64(3, 14)
@@@ -216,6 -221,7 +222,7 @@@
   #define   KVM_DEV_ARM_ITS_SAVE_TABLES         1
   #define   KVM_DEV_ARM_ITS_RESTORE_TABLES      2
   #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES        3
+ #define   KVM_DEV_ARM_ITS_CTRL_RESET          4
   
   /* KVM_IRQ_LINE irq field index values */
   #define KVM_ARM_IRQ_TYPE_SHIFT                24
diff --combined arch/arm64/include/asm/arch_timer.h

index bdedd8f748d17c42c36a033b80c26b86e48c3661,04275de614dbdff465738c132cef49fb68bd7789..f2a234d6516cf5b537b80134b137729dd9abb798
--- 1/arch/arm64/include/asm/arch_timer.h
--- 2/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@@ -52,6 -52,7 +52,7 @@@ struct arch_timer_erratum_workaround 
         const char *desc;
         u32 (*read_cntp_tval_el0)(void);
         u32 (*read_cntv_tval_el0)(void);
+       u64 (*read_cntpct_el0)(void);
         u64 (*read_cntvct_el0)(void);
         int (*set_next_event_phys)(unsigned long, struct clock_event_device *);
         int (*set_next_event_virt)(unsigned long, struct clock_event_device *);
@@@ -144,16 -145,12 +145,13 @@@ static inline u32 arch_timer_get_cntkct
   static inline void arch_timer_set_cntkctl(u32 cntkctl)
   {
         write_sysreg(cntkctl, cntkctl_el1);
+ +      isb();
   }
   
   static inline u64 arch_counter_get_cntpct(void)
   {
-       /*
-        * AArch64 kernel and user space mandate the use of CNTVCT.
-        */
-       BUG();
-       return 0;
+       isb();
+       return arch_timer_reg_read_stable(cntpct_el0);
   }
   
   static inline u64 arch_counter_get_cntvct(void)
diff --combined arch/arm64/include/uapi/asm/kvm.h

index 51149ec75fe480b324fd74d2697579a936438fdc,37ca7394549cf8191cd982c301f2662c8bd59dad..9abbf30446545a0668083b0891461f015563bcb1
--- 1/arch/arm64/include/uapi/asm/kvm.h
--- 2/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   /*
    * Copyright (C) 2012,2013 - ARM Ltd
    * Author: Marc Zyngier <marc.zyngier@arm.com>
@@@ -196,6 -195,12 +196,12 @@@ struct kvm_arch_memory_slot 
   
   #define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
   
+ /* Physical Timer EL0 Registers */
+ #define KVM_REG_ARM_PTIMER_CTL                ARM64_SYS_REG(3, 3, 14, 2, 1)
+ #define KVM_REG_ARM_PTIMER_CVAL               ARM64_SYS_REG(3, 3, 14, 2, 2)
+ #define KVM_REG_ARM_PTIMER_CNT                ARM64_SYS_REG(3, 3, 14, 0, 1)
+ 
+ /* EL0 Virtual Timer Registers */
   #define KVM_REG_ARM_TIMER_CTL         ARM64_SYS_REG(3, 3, 14, 3, 1)
   #define KVM_REG_ARM_TIMER_CNT         ARM64_SYS_REG(3, 3, 14, 3, 2)
   #define KVM_REG_ARM_TIMER_CVAL                ARM64_SYS_REG(3, 3, 14, 0, 2)
@@@ -228,6 -233,7 +234,7 @@@
   #define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
   #define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
   #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES        3
+ #define   KVM_DEV_ARM_ITS_CTRL_RESET          4
   
   /* Device Control API on vcpu fd */
   #define KVM_ARM_VCPU_PMU_V3_CTRL      0
diff --combined arch/arm64/kvm/hyp/switch.c

index 951f3ebaff26e6df8e58a8af1e63548c587e0b4d,4994f4bdaca5d49f7bc31f6a229b46ce7f0257d6..525c01f48867808b6efa257063daaa4c8207252e
--- 1/arch/arm64/kvm/hyp/switch.c
--- 2/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@@ -48,7 -48,7 +48,7 @@@ static void __hyp_text __activate_traps
   
         val = read_sysreg(cpacr_el1);
         val |= CPACR_EL1_TTA;
- -      val &= ~CPACR_EL1_FPEN;
+ +      val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
         write_sysreg(val, cpacr_el1);
   
         write_sysreg(__kvm_hyp_vector, vbar_el1);
@@@ -59,7 -59,7 +59,7 @@@ static void __hyp_text __activate_traps
         u64 val;
   
         val = CPTR_EL2_DEFAULT;
- -      val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+ +      val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
         write_sysreg(val, cptr_el2);
   }
   
@@@ -81,17 -81,11 +81,17 @@@ static void __hyp_text __activate_traps
          * it will cause an exception.
          */
         val = vcpu->arch.hcr_el2;
+ +
         if (!(val & HCR_RW) && system_supports_fpsimd()) {
                 write_sysreg(1 << 30, fpexc32_el2);
                 isb();
         }
+ +
+ +      if (val & HCR_RW) /* for AArch64 only: */
+ +              val |= HCR_TID3; /* TID3: trap feature register accesses */
+ +
         write_sysreg(val, hcr_el2);
+ +
         /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
         write_sysreg(1 << 15, hstr_el2);
         /*
@@@ -117,7 -111,7 +117,7 @@@ static void __hyp_text __deactivate_tra
   
         write_sysreg(mdcr_el2, mdcr_el2);
         write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
- -      write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
+ +      write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
         write_sysreg(vectors, vbar_el1);
   }
   
@@@ -304,7 -298,7 +304,7 @@@ int __hyp_text __kvm_vcpu_run(struct kv
         __activate_vm(vcpu);
   
         __vgic_restore_state(vcpu);
-       __timer_restore_state(vcpu);
+       __timer_enable_traps(vcpu);
   
         /*
          * We must restore the 32-bit state before the sysregs, thanks
@@@ -374,7 -368,7 +374,7 @@@ again
   
         __sysreg_save_guest_state(guest_ctxt);
         __sysreg32_save_state(vcpu);
-       __timer_save_state(vcpu);
+       __timer_disable_traps(vcpu);
         __vgic_save_state(vcpu);
   
         __deactivate_traps(vcpu);
@@@ -442,7 -436,7 +442,7 @@@ void __hyp_text __noreturn __hyp_panic(
   
                 vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
                 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
-               __timer_save_state(vcpu);
+               __timer_disable_traps(vcpu);
                 __deactivate_traps(vcpu);
                 __deactivate_vm(vcpu);
                 __sysreg_restore_host_state(host_ctxt);
diff --combined arch/arm64/kvm/sys_regs.c

index a0ee9b05e3d445b80011c71f016555c927609324,bb0e41b3154e619458a61abff97960842a198276..1830ebc227d18d7c5ad06e8f1858bbe8f33fa53a
--- 1/arch/arm64/kvm/sys_regs.c
--- 2/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@@ -23,7 -23,6 +23,7 @@@
   #include <linux/bsearch.h>
   #include <linux/kvm_host.h>
   #include <linux/mm.h>
+ +#include <linux/printk.h>
   #include <linux/uaccess.h>
   
   #include <asm/cacheflush.h>
@@@ -842,13 -841,16 +842,16 @@@ static bool access_cntp_tval(struct kvm
                 struct sys_reg_params *p,
                 const struct sys_reg_desc *r)
   {
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
         u64 now = kvm_phys_timer_read();
+       u64 cval;
   
-       if (p->is_write)
-               ptimer->cnt_cval = p->regval + now;
-       else
-               p->regval = ptimer->cnt_cval - now;
+       if (p->is_write) {
+               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL,
+                                     p->regval + now);
+       } else {
+               cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+               p->regval = cval - now;
+       }
   
         return true;
   }
@@@ -857,24 -859,10 +860,10 @@@ static bool access_cntp_ctl(struct kvm_
                 struct sys_reg_params *p,
                 const struct sys_reg_desc *r)
   {
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- 
-       if (p->is_write) {
-               /* ISTATUS bit is read-only */
-               ptimer->cnt_ctl = p->regval & ~ARCH_TIMER_CTRL_IT_STAT;
-       } else {
-               u64 now = kvm_phys_timer_read();
- 
-               p->regval = ptimer->cnt_ctl;
-               /*
-                * Set ISTATUS bit if it's expired.
-                * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
-                * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
-                * regardless of ENABLE bit for our implementation convenience.
-                */
-               if (ptimer->cnt_cval <= now)
-                       p->regval |= ARCH_TIMER_CTRL_IT_STAT;
-       }
+       if (p->is_write)
+               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval);
+       else
+               p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
   
         return true;
   }
@@@ -883,156 -871,14 +872,154 @@@ static bool access_cntp_cval(struct kvm
                 struct sys_reg_params *p,
                 const struct sys_reg_desc *r)
   {
-       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- 
         if (p->is_write)
-               ptimer->cnt_cval = p->regval;
+               kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval);
         else
-               p->regval = ptimer->cnt_cval;
+               p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
   
         return true;
   }
   
+ +/* Read a sanitised cpufeature ID register by sys_reg_desc */
+ +static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
+ +{
+ +      u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+ +                       (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+ +      u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
+ +
+ +      if (id == SYS_ID_AA64PFR0_EL1) {
+ +              if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
+ +                      pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n",
+ +                                  task_pid_nr(current));
+ +
+ +              val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+ +      }
+ +
+ +      return val;
+ +}
+ +
+ +/* cpufeature ID register access trap handlers */
+ +
+ +static bool __access_id_reg(struct kvm_vcpu *vcpu,
+ +                          struct sys_reg_params *p,
+ +                          const struct sys_reg_desc *r,
+ +                          bool raz)
+ +{
+ +      if (p->is_write)
+ +              return write_to_read_only(vcpu, p, r);
+ +
+ +      p->regval = read_id_reg(r, raz);
+ +      return true;
+ +}
+ +
+ +static bool access_id_reg(struct kvm_vcpu *vcpu,
+ +                        struct sys_reg_params *p,
+ +                        const struct sys_reg_desc *r)
+ +{
+ +      return __access_id_reg(vcpu, p, r, false);
+ +}
+ +
+ +static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
+ +                            struct sys_reg_params *p,
+ +                            const struct sys_reg_desc *r)
+ +{
+ +      return __access_id_reg(vcpu, p, r, true);
+ +}
+ +
+ +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
+ +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
+ +static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+ +
+ +/*
+ + * cpufeature ID register user accessors
+ + *
+ + * For now, these registers are immutable for userspace, so no values
+ + * are stored, and for set_id_reg() we don't allow the effective value
+ + * to be changed.
+ + */
+ +static int __get_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+ +                      bool raz)
+ +{
+ +      const u64 id = sys_reg_to_index(rd);
+ +      const u64 val = read_id_reg(rd, raz);
+ +
+ +      return reg_to_user(uaddr, &val, id);
+ +}
+ +
+ +static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+ +                      bool raz)
+ +{
+ +      const u64 id = sys_reg_to_index(rd);
+ +      int err;
+ +      u64 val;
+ +
+ +      err = reg_from_user(&val, uaddr, id);
+ +      if (err)
+ +              return err;
+ +
+ +      /* This is what we mean by invariant: you can't change it. */
+ +      if (val != read_id_reg(rd, raz))
+ +              return -EINVAL;
+ +
+ +      return 0;
+ +}
+ +
+ +static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ +                    const struct kvm_one_reg *reg, void __user *uaddr)
+ +{
+ +      return __get_id_reg(rd, uaddr, false);
+ +}
+ +
+ +static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ +                    const struct kvm_one_reg *reg, void __user *uaddr)
+ +{
+ +      return __set_id_reg(rd, uaddr, false);
+ +}
+ +
+ +static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ +                        const struct kvm_one_reg *reg, void __user *uaddr)
+ +{
+ +      return __get_id_reg(rd, uaddr, true);
+ +}
+ +
+ +static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ +                        const struct kvm_one_reg *reg, void __user *uaddr)
+ +{
+ +      return __set_id_reg(rd, uaddr, true);
+ +}
+ +
+ +/* sys_reg_desc initialiser for known cpufeature ID registers */
+ +#define ID_SANITISED(name) {                  \
+ +      SYS_DESC(SYS_##name),                   \
+ +      .access = access_id_reg,                \
+ +      .get_user = get_id_reg,                 \
+ +      .set_user = set_id_reg,                 \
+ +}
+ +
+ +/*
+ + * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ + * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ + * (1 <= crm < 8, 0 <= Op2 < 8).
+ + */
+ +#define ID_UNALLOCATED(crm, op2) {                    \
+ +      Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2),     \
+ +      .access = access_raz_id_reg,                    \
+ +      .get_user = get_raz_id_reg,                     \
+ +      .set_user = set_raz_id_reg,                     \
+ +}
+ +
+ +/*
+ + * sys_reg_desc initialiser for known ID registers that we hide from guests.
+ + * For now, these are exposed just like unallocated ID regs: they appear
+ + * RAZ for the guest.
+ + */
+ +#define ID_HIDDEN(name) {                     \
+ +      SYS_DESC(SYS_##name),                   \
+ +      .access = access_raz_id_reg,            \
+ +      .get_user = get_raz_id_reg,             \
+ +      .set_user = set_raz_id_reg,             \
+ +}
+ +
   /*
    * Architected system registers.
    * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@@ -1085,84 -931,6 +1072,84 @@@ static const struct sys_reg_desc sys_re
         { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
   
         { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
+ +
+ +      /*
+ +       * ID regs: all ID_SANITISED() entries here must have corresponding
+ +       * entries in arm64_ftr_regs[].
+ +       */
+ +
+ +      /* AArch64 mappings of the AArch32 ID registers */
+ +      /* CRm=1 */
+ +      ID_SANITISED(ID_PFR0_EL1),
+ +      ID_SANITISED(ID_PFR1_EL1),
+ +      ID_SANITISED(ID_DFR0_EL1),
+ +      ID_HIDDEN(ID_AFR0_EL1),
+ +      ID_SANITISED(ID_MMFR0_EL1),
+ +      ID_SANITISED(ID_MMFR1_EL1),
+ +      ID_SANITISED(ID_MMFR2_EL1),
+ +      ID_SANITISED(ID_MMFR3_EL1),
+ +
+ +      /* CRm=2 */
+ +      ID_SANITISED(ID_ISAR0_EL1),
+ +      ID_SANITISED(ID_ISAR1_EL1),
+ +      ID_SANITISED(ID_ISAR2_EL1),
+ +      ID_SANITISED(ID_ISAR3_EL1),
+ +      ID_SANITISED(ID_ISAR4_EL1),
+ +      ID_SANITISED(ID_ISAR5_EL1),
+ +      ID_SANITISED(ID_MMFR4_EL1),
+ +      ID_UNALLOCATED(2,7),
+ +
+ +      /* CRm=3 */
+ +      ID_SANITISED(MVFR0_EL1),
+ +      ID_SANITISED(MVFR1_EL1),
+ +      ID_SANITISED(MVFR2_EL1),
+ +      ID_UNALLOCATED(3,3),
+ +      ID_UNALLOCATED(3,4),
+ +      ID_UNALLOCATED(3,5),
+ +      ID_UNALLOCATED(3,6),
+ +      ID_UNALLOCATED(3,7),
+ +
+ +      /* AArch64 ID registers */
+ +      /* CRm=4 */
+ +      ID_SANITISED(ID_AA64PFR0_EL1),
+ +      ID_SANITISED(ID_AA64PFR1_EL1),
+ +      ID_UNALLOCATED(4,2),
+ +      ID_UNALLOCATED(4,3),
+ +      ID_UNALLOCATED(4,4),
+ +      ID_UNALLOCATED(4,5),
+ +      ID_UNALLOCATED(4,6),
+ +      ID_UNALLOCATED(4,7),
+ +
+ +      /* CRm=5 */
+ +      ID_SANITISED(ID_AA64DFR0_EL1),
+ +      ID_SANITISED(ID_AA64DFR1_EL1),
+ +      ID_UNALLOCATED(5,2),
+ +      ID_UNALLOCATED(5,3),
+ +      ID_HIDDEN(ID_AA64AFR0_EL1),
+ +      ID_HIDDEN(ID_AA64AFR1_EL1),
+ +      ID_UNALLOCATED(5,6),
+ +      ID_UNALLOCATED(5,7),
+ +
+ +      /* CRm=6 */
+ +      ID_SANITISED(ID_AA64ISAR0_EL1),
+ +      ID_SANITISED(ID_AA64ISAR1_EL1),
+ +      ID_UNALLOCATED(6,2),
+ +      ID_UNALLOCATED(6,3),
+ +      ID_UNALLOCATED(6,4),
+ +      ID_UNALLOCATED(6,5),
+ +      ID_UNALLOCATED(6,6),
+ +      ID_UNALLOCATED(6,7),
+ +
+ +      /* CRm=7 */
+ +      ID_SANITISED(ID_AA64MMFR0_EL1),
+ +      ID_SANITISED(ID_AA64MMFR1_EL1),
+ +      ID_SANITISED(ID_AA64MMFR2_EL1),
+ +      ID_UNALLOCATED(7,3),
+ +      ID_UNALLOCATED(7,4),
+ +      ID_UNALLOCATED(7,5),
+ +      ID_UNALLOCATED(7,6),
+ +      ID_UNALLOCATED(7,7),
+ +
         { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
         { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
         { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
@@@ -2009,8 -1777,8 +1996,8 @@@ static const struct sys_reg_desc *index
         if (!r)
                 r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
   
- -      /* Not saved in the sys_reg array? */
- -      if (r && !r->reg)
+ +      /* Not saved in the sys_reg array and not otherwise accessible? */
+ +      if (r && !(r->reg || r->get_user))
                 r = NULL;
   
         return r;
@@@ -2034,6 -1802,20 +2021,6 @@@
   FUNCTION_INVARIANT(midr_el1)
   FUNCTION_INVARIANT(ctr_el0)
   FUNCTION_INVARIANT(revidr_el1)
- -FUNCTION_INVARIANT(id_pfr0_el1)
- -FUNCTION_INVARIANT(id_pfr1_el1)
- -FUNCTION_INVARIANT(id_dfr0_el1)
- -FUNCTION_INVARIANT(id_afr0_el1)
- -FUNCTION_INVARIANT(id_mmfr0_el1)
- -FUNCTION_INVARIANT(id_mmfr1_el1)
- -FUNCTION_INVARIANT(id_mmfr2_el1)
- -FUNCTION_INVARIANT(id_mmfr3_el1)
- -FUNCTION_INVARIANT(id_isar0_el1)
- -FUNCTION_INVARIANT(id_isar1_el1)
- -FUNCTION_INVARIANT(id_isar2_el1)
- -FUNCTION_INVARIANT(id_isar3_el1)
- -FUNCTION_INVARIANT(id_isar4_el1)
- -FUNCTION_INVARIANT(id_isar5_el1)
   FUNCTION_INVARIANT(clidr_el1)
   FUNCTION_INVARIANT(aidr_el1)
   
@@@ -2041,6 -1823,20 +2028,6 @@@
   static struct sys_reg_desc invariant_sys_regs[] = {
         { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
         { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
- -      { SYS_DESC(SYS_ID_PFR0_EL1), NULL, get_id_pfr0_el1 },
- -      { SYS_DESC(SYS_ID_PFR1_EL1), NULL, get_id_pfr1_el1 },
- -      { SYS_DESC(SYS_ID_DFR0_EL1), NULL, get_id_dfr0_el1 },
- -      { SYS_DESC(SYS_ID_AFR0_EL1), NULL, get_id_afr0_el1 },
- -      { SYS_DESC(SYS_ID_MMFR0_EL1), NULL, get_id_mmfr0_el1 },
- -      { SYS_DESC(SYS_ID_MMFR1_EL1), NULL, get_id_mmfr1_el1 },
- -      { SYS_DESC(SYS_ID_MMFR2_EL1), NULL, get_id_mmfr2_el1 },
- -      { SYS_DESC(SYS_ID_MMFR3_EL1), NULL, get_id_mmfr3_el1 },
- -      { SYS_DESC(SYS_ID_ISAR0_EL1), NULL, get_id_isar0_el1 },
- -      { SYS_DESC(SYS_ID_ISAR1_EL1), NULL, get_id_isar1_el1 },
- -      { SYS_DESC(SYS_ID_ISAR2_EL1), NULL, get_id_isar2_el1 },
- -      { SYS_DESC(SYS_ID_ISAR3_EL1), NULL, get_id_isar3_el1 },
- -      { SYS_DESC(SYS_ID_ISAR4_EL1), NULL, get_id_isar4_el1 },
- -      { SYS_DESC(SYS_ID_ISAR5_EL1), NULL, get_id_isar5_el1 },
         { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
         { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
         { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
@@@ -2270,31 -2066,12 +2257,31 @@@ static bool copy_reg_to_user(const stru
         return true;
   }
   
+ +static int walk_one_sys_reg(const struct sys_reg_desc *rd,
+ +                          u64 __user **uind,
+ +                          unsigned int *total)
+ +{
+ +      /*
+ +       * Ignore registers we trap but don't save,
+ +       * and for which no custom user accessor is provided.
+ +       */
+ +      if (!(rd->reg || rd->get_user))
+ +              return 0;
+ +
+ +      if (!copy_reg_to_user(rd, uind))
+ +              return -EFAULT;
+ +
+ +      (*total)++;
+ +      return 0;
+ +}
+ +
   /* Assumed ordered tables, see kvm_sys_reg_table_init. */
   static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
   {
         const struct sys_reg_desc *i1, *i2, *end1, *end2;
         unsigned int total = 0;
         size_t num;
+ +      int err;
   
         /* We check for duplicates here, to allow arch-specific overrides. */
         i1 = get_target_table(vcpu->arch.target, true, &num);
@@@ -2308,13 -2085,21 +2295,13 @@@
         while (i1 || i2) {
                 int cmp = cmp_sys_reg(i1, i2);
                 /* target-specific overrides generic entry. */
- -              if (cmp <= 0) {
- -                      /* Ignore registers we trap but don't save. */
- -                      if (i1->reg) {
- -                              if (!copy_reg_to_user(i1, &uind))
- -                                      return -EFAULT;
- -                              total++;
- -                      }
- -              } else {
- -                      /* Ignore registers we trap but don't save. */
- -                      if (i2->reg) {
- -                              if (!copy_reg_to_user(i2, &uind))
- -                                      return -EFAULT;
- -                              total++;
- -                      }
- -              }
+ +              if (cmp <= 0)
+ +                      err = walk_one_sys_reg(i1, &uind, &total);
+ +              else
+ +                      err = walk_one_sys_reg(i2, &uind, &total);
+ +
+ +              if (err)
+ +                      return err;
   
                 if (cmp <= 0 && ++i1 == end1)
                         i1 = NULL;
diff --combined arch/powerpc/kernel/asm-offsets.c

index 9aace433491adf30ebfaaad68fac9723f5585e5f,519fad556113196273fd38d13f4d18c7918d5ebd..6b958414b4e036ac1e4c97bceb61277ffab65e76
--- 1/arch/powerpc/kernel/asm-offsets.c
--- 2/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@@ -185,7 -185,7 +185,7 @@@ int main(void
   #ifdef CONFIG_PPC_MM_SLICES
         OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
         OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
- -      DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
+ +      OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
         DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
   #endif /* CONFIG_PPC_MM_SLICES */
   #endif
@@@ -208,7 -208,7 +208,7 @@@
         OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
   #endif /* CONFIG_PPC_BOOK3E */
   
- -#ifdef CONFIG_PPC_STD_MMU_64
+ +#ifdef CONFIG_PPC_BOOK3S_64
         OFFSET(PACASLBCACHE, paca_struct, slb_cache);
         OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
         OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
@@@ -230,7 -230,7 +230,7 @@@
         OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
         OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
         OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
- -#endif /* CONFIG_PPC_STD_MMU_64 */
+ +#endif /* CONFIG_PPC_BOOK3S_64 */
         OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
   #ifdef CONFIG_PPC_BOOK3S_64
         OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
@@@ -642,6 -642,7 +642,7 @@@
         HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
         HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
         HSTATE_FIELD(HSTATE_PTID, ptid);
+       HSTATE_FIELD(HSTATE_TID, tid);
         HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
         HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
         HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@@ -667,6 -668,8 +668,8 @@@
         OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
         OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
         OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
+       OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
+       OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
   #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
   
   #ifdef CONFIG_PPC_BOOK3S_64
diff --combined arch/powerpc/kvm/book3s_hv.c

index 40e5857c4b1c44312dc9c74eab27d3b835a8be16,18b16c3957fcd745c0f65e532f75b878196e280b..79ea3d9269dbf568904e504d78cc56850c77860d
--- 1/arch/powerpc/kvm/book3s_hv.c
--- 2/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@@ -19,6 -19,7 +19,7 @@@
    */
   
   #include <linux/kvm_host.h>
+ #include <linux/kernel.h>
   #include <linux/err.h>
   #include <linux/slab.h>
   #include <linux/preempt.h>
@@@ -47,7 -48,6 +48,7 @@@
   
   #include <asm/reg.h>
   #include <asm/ppc-opcode.h>
+ +#include <asm/asm-prototypes.h>
   #include <asm/disassemble.h>
   #include <asm/cputable.h>
   #include <asm/cacheflush.h>
@@@ -98,6 -98,10 +99,10 @@@ static int target_smt_mode
   module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
   MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
   
+ static bool indep_threads_mode = true;
+ module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
+ MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
+ 
   #ifdef CONFIG_KVM_XICS
   static struct kernel_param_ops module_param_ops = {
         .set = param_set_int,
@@@ -115,6 -119,7 +120,7 @@@ MODULE_PARM_DESC(h_ipi_redirect, "Redir
   
   static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
   static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+ static void kvmppc_setup_partition_table(struct kvm *kvm);
   
   static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
                 int *ip)
@@@ -1090,10 -1095,9 +1096,10 @@@ static int kvmppc_handle_exit_hv(struc
                 vcpu->stat.ext_intr_exits++;
                 r = RESUME_GUEST;
                 break;
- -      /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+ +      /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
         case BOOK3S_INTERRUPT_HMI:
         case BOOK3S_INTERRUPT_PERFMON:
+ +      case BOOK3S_INTERRUPT_SYSTEM_RESET:
                 r = RESUME_GUEST;
                 break;
         case BOOK3S_INTERRUPT_MACHINE_CHECK:
@@@ -1734,9 -1738,9 +1740,9 @@@ static int kvmppc_set_one_reg_hv(struc
    * MMU mode (radix or HPT), unfortunately, but since we only support
    * HPT guests on a HPT host so far, that isn't an impediment yet.
    */
- static int threads_per_vcore(void)
+ static int threads_per_vcore(struct kvm *kvm)
   {
-       if (cpu_has_feature(CPU_FTR_ARCH_300))
+       if (kvm->arch.threads_indep)
                 return 1;
         return threads_per_subcore;
   }
@@@ -1774,7 -1778,7 +1780,7 @@@ static struct debugfs_timings_element 
         {"cede",        offsetof(struct kvm_vcpu, arch.cede_time)},
   };
   
- #define N_TIMINGS     (sizeof(timings) / sizeof(timings[0]))
+ #define N_TIMINGS     (ARRAY_SIZE(timings))
   
   struct debugfs_timings_state {
         struct kvm_vcpu *vcpu;
@@@ -2228,11 -2232,10 +2234,10 @@@ static void kvmppc_start_thread(struct 
                 kvmppc_ipi_thread(cpu);
   }
   
- static void kvmppc_wait_for_nap(void)
+ static void kvmppc_wait_for_nap(int n_threads)
   {
         int cpu = smp_processor_id();
         int i, loops;
-       int n_threads = threads_per_vcore();
   
         if (n_threads <= 1)
                 return;
@@@ -2319,7 -2322,7 +2324,7 @@@ static void kvmppc_vcore_preempt(struc
   
         vc->vcore_state = VCORE_PREEMPT;
         vc->pcpu = smp_processor_id();
-       if (vc->num_threads < threads_per_vcore()) {
+       if (vc->num_threads < threads_per_vcore(vc->kvm)) {
                 spin_lock(&lp->lock);
                 list_add_tail(&vc->preempt_list, &lp->list);
                 spin_unlock(&lp->lock);
@@@ -2357,7 -2360,7 +2362,7 @@@ struct core_info 
   
   /*
    * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
-  * respectively in 2-way micro-threading (split-core) mode.
+  * respectively in 2-way micro-threading (split-core) mode on POWER8.
    */
   static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
   
@@@ -2373,7 -2376,14 +2378,14 @@@ static void init_core_info(struct core_
   
   static bool subcore_config_ok(int n_subcores, int n_threads)
   {
-       /* Can only dynamically split if unsplit to begin with */
+       /*
+        * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
+        * mode, with one thread per subcore.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return n_subcores <= 4 && n_threads == 1;
+ 
+       /* On POWER8, can only dynamically split if unsplit to begin with */
         if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
                 return false;
         if (n_subcores > MAX_SUBCORES)
@@@ -2404,6 -2414,11 +2416,11 @@@ static bool can_dynamic_split(struct kv
         if (!cpu_has_feature(CPU_FTR_ARCH_207S))
                 return false;
   
+       /* POWER9 currently requires all threads to be in the same MMU mode */
+       if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+           kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
+               return false;
+ 
         if (n_threads < cip->max_subcore_threads)
                 n_threads = cip->max_subcore_threads;
         if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
@@@ -2606,9 -2621,6 +2623,9 @@@ static void set_irq_happened(int trap
         case BOOK3S_INTERRUPT_HMI:
                 local_paca->irq_happened |= PACA_IRQ_HMI;
                 break;
+ +      case BOOK3S_INTERRUPT_SYSTEM_RESET:
+ +              replay_system_reset();
+ +              break;
         }
   }
   
@@@ -2632,6 -2644,8 +2649,8 @@@ static noinline void kvmppc_run_core(st
         int target_threads;
         int controlled_threads;
         int trap;
+       bool is_power8;
+       bool hpt_on_radix;
   
         /*
          * Remove from the list any threads that have a signal pending
@@@ -2654,15 -2668,19 +2673,19 @@@
          * the number of threads per subcore, except on POWER9,
          * where it's 1 because the threads are (mostly) independent.
          */
-       controlled_threads = threads_per_vcore();
+       controlled_threads = threads_per_vcore(vc->kvm);
   
         /*
          * Make sure we are running on primary threads, and that secondary
          * threads are offline.  Also check if the number of threads in this
          * guest are greater than the current system threads per guest.
+        * On POWER9, we need to be not in independent-threads mode if
+        * this is a HPT guest on a radix host.
          */
-       if ((controlled_threads > 1) &&
-           ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
+       hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
+       if (((controlled_threads > 1) &&
+            ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
+           (hpt_on_radix && vc->kvm->arch.threads_indep)) {
                 for_each_runnable_thread(i, vcpu, vc) {
                         vcpu->arch.ret = -EBUSY;
                         kvmppc_remove_runnable(vc, vcpu);
@@@ -2699,14 -2717,13 +2722,13 @@@
          * Hard-disable interrupts, and check resched flag and signals.
          * If we need to reschedule or deliver a signal, clean up
          * and return without going into the guest(s).
-        * If the hpte_setup_done flag has been cleared, don't go into the
+        * If the mmu_ready flag has been cleared, don't go into the
          * guest because that means a HPT resize operation is in progress.
          */
         local_irq_disable();
         hard_irq_disable();
         if (lazy_irq_pending() || need_resched() ||
-           recheck_signals(&core_info) ||
-           (!kvm_is_radix(vc->kvm) && !vc->kvm->arch.hpte_setup_done)) {
+           recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
                 local_irq_enable();
                 vc->vcore_state = VCORE_INACTIVE;
                 /* Unlock all except the primary vcore */
@@@ -2728,32 -2745,51 +2750,51 @@@
         cmd_bit = stat_bit = 0;
         split = core_info.n_subcores;
         sip = NULL;
-       if (split > 1) {
-               /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
-               if (split == 2 && (dynamic_mt_modes & 2)) {
-                       cmd_bit = HID0_POWER8_1TO2LPAR;
-                       stat_bit = HID0_POWER8_2LPARMODE;
-               } else {
-                       split = 4;
-                       cmd_bit = HID0_POWER8_1TO4LPAR;
-                       stat_bit = HID0_POWER8_4LPARMODE;
-               }
-               subcore_size = MAX_SMT_THREADS / split;
+       is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
+               && !cpu_has_feature(CPU_FTR_ARCH_300);
+ 
+       if (split > 1 || hpt_on_radix) {
                 sip = &split_info;
                 memset(&split_info, 0, sizeof(split_info));
-               split_info.rpr = mfspr(SPRN_RPR);
-               split_info.pmmar = mfspr(SPRN_PMMAR);
-               split_info.ldbar = mfspr(SPRN_LDBAR);
-               split_info.subcore_size = subcore_size;
                 for (sub = 0; sub < core_info.n_subcores; ++sub)
                         split_info.vc[sub] = core_info.vc[sub];
+ 
+               if (is_power8) {
+                       if (split == 2 && (dynamic_mt_modes & 2)) {
+                               cmd_bit = HID0_POWER8_1TO2LPAR;
+                               stat_bit = HID0_POWER8_2LPARMODE;
+                       } else {
+                               split = 4;
+                               cmd_bit = HID0_POWER8_1TO4LPAR;
+                               stat_bit = HID0_POWER8_4LPARMODE;
+                       }
+                       subcore_size = MAX_SMT_THREADS / split;
+                       split_info.rpr = mfspr(SPRN_RPR);
+                       split_info.pmmar = mfspr(SPRN_PMMAR);
+                       split_info.ldbar = mfspr(SPRN_LDBAR);
+                       split_info.subcore_size = subcore_size;
+               } else {
+                       split_info.subcore_size = 1;
+                       if (hpt_on_radix) {
+                               /* Use the split_info for LPCR/LPIDR changes */
+                               split_info.lpcr_req = vc->lpcr;
+                               split_info.lpidr_req = vc->kvm->arch.lpid;
+                               split_info.host_lpcr = vc->kvm->arch.host_lpcr;
+                               split_info.do_set = 1;
+                       }
+               }
+ 
                 /* order writes to split_info before kvm_split_mode pointer */
                 smp_wmb();
         }
-       for (thr = 0; thr < controlled_threads; ++thr)
+ 
+       for (thr = 0; thr < controlled_threads; ++thr) {
+               paca[pcpu + thr].kvm_hstate.tid = thr;
+               paca[pcpu + thr].kvm_hstate.napping = 0;
                 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+       }
   
-       /* Initiate micro-threading (split-core) if required */
+       /* Initiate micro-threading (split-core) on POWER8 if required */
         if (cmd_bit) {
                 unsigned long hid0 = mfspr(SPRN_HID0);
   
@@@ -2772,7 -2808,7 +2813,7 @@@
         /* Start all the threads */
         active = 0;
         for (sub = 0; sub < core_info.n_subcores; ++sub) {
-               thr = subcore_thread_map[sub];
+               thr = is_power8 ? subcore_thread_map[sub] : sub;
                 thr0_done = false;
                 active |= 1 << thr;
                 pvc = core_info.vc[sub];
@@@ -2799,18 -2835,20 +2840,20 @@@
          * the vcore pointer in the PACA of the secondaries.
          */
         smp_mb();
-       if (cmd_bit)
-               split_info.do_nap = 1;  /* ask secondaries to nap when done */
   
         /*
          * When doing micro-threading, poke the inactive threads as well.
          * This gets them to the nap instruction after kvm_do_nap,
          * which reduces the time taken to unsplit later.
+        * For POWER9 HPT guest on radix host, we need all the secondary
+        * threads woken up so they can do the LPCR/LPIDR change.
          */
-       if (split > 1)
+       if (cmd_bit || hpt_on_radix) {
+               split_info.do_nap = 1;  /* ask secondaries to nap when done */
                 for (thr = 1; thr < threads_per_subcore; ++thr)
                         if (!(active & (1 << thr)))
                                 kvmppc_ipi_thread(pcpu + thr);
+       }
   
         vc->vcore_state = VCORE_RUNNING;
         preempt_disable();
@@@ -2844,10 -2882,10 +2887,10 @@@
         vc->vcore_state = VCORE_EXITING;
   
         /* wait for secondary threads to finish writing their state to memory */
-       kvmppc_wait_for_nap();
+       kvmppc_wait_for_nap(controlled_threads);
   
         /* Return to whole-core mode if we split the core earlier */
-       if (split > 1) {
+       if (cmd_bit) {
                 unsigned long hid0 = mfspr(SPRN_HID0);
                 unsigned long loops = 0;
   
@@@ -2863,8 -2901,17 +2906,17 @@@
                         cpu_relax();
                         ++loops;
                 }
-               split_info.do_nap = 0;
+       } else if (hpt_on_radix) {
+               /* Wait for all threads to have seen final sync */
+               for (thr = 1; thr < controlled_threads; ++thr) {
+                       while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
+                               HMT_low();
+                               barrier();
+                       }
+                       HMT_medium();
+               }
         }
+       split_info.do_nap = 0;
   
         kvmppc_set_host_core(pcpu);
   
@@@ -3073,6 -3120,25 +3125,25 @@@ out
         trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
   }
   
+ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
+ {
+       int r = 0;
+       struct kvm *kvm = vcpu->kvm;
+ 
+       mutex_lock(&kvm->lock);
+       if (!kvm->arch.mmu_ready) {
+               if (!kvm_is_radix(kvm))
+                       r = kvmppc_hv_setup_htab_rma(vcpu);
+               if (!r) {
+                       if (cpu_has_feature(CPU_FTR_ARCH_300))
+                               kvmppc_setup_partition_table(kvm);
+                       kvm->arch.mmu_ready = 1;
+               }
+       }
+       mutex_unlock(&kvm->lock);
+       return r;
+ }
+ 
   static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
   {
         int n_ceded, i, r;
@@@ -3129,15 -3195,15 +3200,15 @@@
   
         while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
                !signal_pending(current)) {
-               /* See if the HPT and VRMA are ready to go */
-               if (!kvm_is_radix(vcpu->kvm) &&
-                   !vcpu->kvm->arch.hpte_setup_done) {
+               /* See if the MMU is ready to go */
+               if (!vcpu->kvm->arch.mmu_ready) {
                         spin_unlock(&vc->lock);
-                       r = kvmppc_hv_setup_htab_rma(vcpu);
+                       r = kvmhv_setup_mmu(vcpu);
                         spin_lock(&vc->lock);
                         if (r) {
                                 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-                               kvm_run->fail_entry.hardware_entry_failure_reason = 0;
+                               kvm_run->fail_entry.
+                                       hardware_entry_failure_reason = 0;
                                 vcpu->arch.ret = r;
                                 break;
                         }
@@@ -3219,6 -3285,7 +3290,7 @@@ static int kvmppc_vcpu_run_hv(struct kv
         unsigned long ebb_regs[3] = {}; /* shut up GCC */
         unsigned long user_tar = 0;
         unsigned int user_vrsave;
+       struct kvm *kvm;
   
         if (!vcpu->arch.sane) {
                 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@@ -3256,8 -3323,9 +3328,9 @@@
                 return -EINTR;
         }
   
-       atomic_inc(&vcpu->kvm->arch.vcpus_running);
-       /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */
+       kvm = vcpu->kvm;
+       atomic_inc(&kvm->arch.vcpus_running);
+       /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
         smp_mb();
   
         flush_all_to_thread(current);
@@@ -3285,10 -3353,10 +3358,10 @@@
                         trace_kvm_hcall_exit(vcpu, r);
                         kvmppc_core_prepare_to_enter(vcpu);
                 } else if (r == RESUME_PAGE_FAULT) {
-                       srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+                       srcu_idx = srcu_read_lock(&kvm->srcu);
                         r = kvmppc_book3s_hv_page_fault(run, vcpu,
                                 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
-                       srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+                       srcu_read_unlock(&kvm->srcu, srcu_idx);
                 } else if (r == RESUME_PASSTHROUGH) {
                         if (WARN_ON(xive_enabled()))
                                 r = H_SUCCESS;
@@@ -3308,27 -3376,26 +3381,26 @@@
         mtspr(SPRN_VRSAVE, user_vrsave);
   
         vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
-       atomic_dec(&vcpu->kvm->arch.vcpus_running);
+       atomic_dec(&kvm->arch.vcpus_running);
         return r;
   }
   
   static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
-                                    int linux_psize)
+                                    int shift, int sllp)
   {
-       struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
- 
-       if (!def->shift)
-               return;
-       (*sps)->page_shift = def->shift;
-       (*sps)->slb_enc = def->sllp;
-       (*sps)->enc[0].page_shift = def->shift;
-       (*sps)->enc[0].pte_enc = def->penc[linux_psize];
+       (*sps)->page_shift = shift;
+       (*sps)->slb_enc = sllp;
+       (*sps)->enc[0].page_shift = shift;
+       (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
         /*
-        * Add 16MB MPSS support if host supports it
+        * Add 16MB MPSS support (may get filtered out by userspace)
          */
-       if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
-               (*sps)->enc[1].page_shift = 24;
-               (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
+       if (shift != 24) {
+               int penc = kvmppc_pgsize_lp_encoding(shift, 24);
+               if (penc != -1) {
+                       (*sps)->enc[1].page_shift = 24;
+                       (*sps)->enc[1].pte_enc = penc;
+               }
         }
         (*sps)++;
   }
@@@ -3338,13 -3405,6 +3410,6 @@@ static int kvm_vm_ioctl_get_smmu_info_h
   {
         struct kvm_ppc_one_seg_page_size *sps;
   
-       /*
-        * Since we don't yet support HPT guests on a radix host,
-        * return an error if the host uses radix.
-        */
-       if (radix_enabled())
-               return -EINVAL;
- 
         /*
          * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
          * POWER7 doesn't support keys for instruction accesses,
@@@ -3353,16 -3413,15 +3418,15 @@@
         info->data_keys = 32;
         info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
   
-       info->flags = KVM_PPC_PAGE_SIZES_REAL;
-       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
-               info->flags |= KVM_PPC_1T_SEGMENTS;
-       info->slb_size = mmu_slb_size;
+       /* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
+       info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
+       info->slb_size = 32;
   
         /* We only support these sizes for now, and no muti-size segments */
         sps = &info->sps[0];
-       kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
-       kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
-       kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+       kvmppc_add_seg_page_size(&sps, 12, 0);
+       kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
+       kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
   
         return 0;
   }
@@@ -3377,7 -3436,7 +3441,7 @@@ static int kvm_vm_ioctl_get_dirty_log_h
         struct kvm_memory_slot *memslot;
         int i, r;
         unsigned long n;
-       unsigned long *buf;
+       unsigned long *buf, *p;
         struct kvm_vcpu *vcpu;
   
         mutex_lock(&kvm->slots_lock);
@@@ -3393,8 -3452,8 +3457,8 @@@
                 goto out;
   
         /*
-        * Use second half of bitmap area because radix accumulates
-        * bits in the first half.
+        * Use second half of bitmap area because both HPT and radix
+        * accumulate bits in the first half.
          */
         n = kvm_dirty_bitmap_bytes(memslot);
         buf = memslot->dirty_bitmap + n / sizeof(long);
@@@ -3407,6 -3466,16 +3471,16 @@@
         if (r)
                 goto out;
   
+       /*
+        * We accumulate dirty bits in the first half of the
+        * memslot's dirty_bitmap area, for when pages are paged
+        * out or modified by the host directly.  Pick up these
+        * bits and add them to the map.
+        */
+       p = memslot->dirty_bitmap;
+       for (i = 0; i < n / sizeof(long); ++i)
+               buf[i] |= xchg(&p[i], 0);
+ 
         /* Harvest dirty bits from VPA and DTL updates */
         /* Note: we never modify the SLB shadow buffer areas */
         kvm_for_each_vcpu(i, vcpu, kvm) {
@@@ -3438,15 -3507,6 +3512,6 @@@ static void kvmppc_core_free_memslot_hv
   static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
                                          unsigned long npages)
   {
-       /*
-        * For now, if radix_enabled() then we only support radix guests,
-        * and in that case we don't need the rmap array.
-        */
-       if (radix_enabled()) {
-               slot->arch.rmap = NULL;
-               return 0;
-       }
- 
         slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
         if (!slot->arch.rmap)
                 return -ENOMEM;
@@@ -3467,8 -3527,6 +3532,6 @@@ static void kvmppc_core_commit_memory_r
                                 const struct kvm_memory_slot *new)
   {
         unsigned long npages = mem->memory_size >> PAGE_SHIFT;
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
   
         /*
          * If we are making a new memslot, it might make
@@@ -3478,18 -3536,6 +3541,6 @@@
          */
         if (npages)
                 atomic64_inc(&kvm->arch.mmio_update);
- 
-       if (npages && old->npages && !kvm_is_radix(kvm)) {
-               /*
-                * If modifying a memslot, reset all the rmap dirty bits.
-                * If this is a new memslot, we don't need to do anything
-                * since the rmap array starts out as all zeroes,
-                * i.e. no pages are dirty.
-                */
-               slots = kvm_memslots(kvm);
-               memslot = id_to_memslot(slots, mem->slot);
-               kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
-       }
   }
   
   /*
@@@ -3545,6 -3591,10 +3596,10 @@@ static void kvmppc_setup_partition_tabl
         mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
   }
   
+ /*
+  * Set up HPT (hashed page table) and RMA (real-mode area).
+  * Must be called with kvm->lock held.
+  */
   static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
   {
         int err = 0;
@@@ -3556,10 -3606,6 +3611,6 @@@
         unsigned long psize, porder;
         int srcu_idx;
   
-       mutex_lock(&kvm->lock);
-       if (kvm->arch.hpte_setup_done)
-               goto out;       /* another vcpu beat us to it */
- 
         /* Allocate hashed page table (if not done already) and reset it */
         if (!kvm->arch.hpt.virt) {
                 int order = KVM_DEFAULT_HPT_ORDER;
@@@ -3618,18 -3664,14 +3669,14 @@@
                 /* the -4 is to account for senc values starting at 0x10 */
                 lpcr = senc << (LPCR_VRMASD_SH - 4);
                 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
-       } else {
-               kvmppc_setup_partition_table(kvm);
         }
   
-       /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
+       /* Order updates to kvm->arch.lpcr etc. vs. mmu_ready */
         smp_wmb();
-       kvm->arch.hpte_setup_done = 1;
         err = 0;
    out_srcu:
         srcu_read_unlock(&kvm->srcu, srcu_idx);
    out:
-       mutex_unlock(&kvm->lock);
         return err;
   
    up_out:
@@@ -3637,6 -3679,34 +3684,34 @@@
         goto out_srcu;
   }
   
+ /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
+ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
+ {
+       kvmppc_free_radix(kvm);
+       kvmppc_update_lpcr(kvm, LPCR_VPM1,
+                          LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+       kvmppc_rmap_reset(kvm);
+       kvm->arch.radix = 0;
+       kvm->arch.process_table = 0;
+       return 0;
+ }
+ 
+ /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
+ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
+ {
+       int err;
+ 
+       err = kvmppc_init_vm_radix(kvm);
+       if (err)
+               return err;
+ 
+       kvmppc_free_hpt(&kvm->arch.hpt);
+       kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
+                          LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+       kvm->arch.radix = 1;
+       return 0;
+ }
+ 
   #ifdef CONFIG_KVM_XICS
   /*
    * Allocate a per-core structure for managing state about which cores are
@@@ -3780,10 -3850,11 +3855,11 @@@ static int kvmppc_core_init_vm_hv(struc
         }
   
         /*
-        * For now, if the host uses radix, the guest must be radix.
+        * If the host uses radix, the guest starts out as radix.
          */
         if (radix_enabled()) {
                 kvm->arch.radix = 1;
+               kvm->arch.mmu_ready = 1;
                 lpcr &= ~LPCR_VPM1;
                 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
                 ret = kvmppc_init_vm_radix(kvm);
@@@ -3803,7 -3874,7 +3879,7 @@@
          * Work out how many sets the TLB has, for the use of
          * the TLB invalidation loop in book3s_hv_rmhandlers.S.
          */
-       if (kvm_is_radix(kvm))
+       if (radix_enabled())
                 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX;     /* 128 */
         else if (cpu_has_feature(CPU_FTR_ARCH_300))
                 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH;      /* 256 */
@@@ -3815,10 -3886,12 +3891,12 @@@
         /*
          * Track that we now have a HV mode VM active. This blocks secondary
          * CPU threads from coming online.
-        * On POWER9, we only need to do this for HPT guests on a radix
-        * host, which is not yet supported.
+        * On POWER9, we only need to do this if the "indep_threads_mode"
+        * module parameter has been set to N.
          */
-       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               kvm->arch.threads_indep = indep_threads_mode;
+       if (!kvm->arch.threads_indep)
                 kvm_hv_vm_activated();
   
         /*
@@@ -3858,7 -3931,7 +3936,7 @@@ static void kvmppc_core_destroy_vm_hv(s
   {
         debugfs_remove_recursive(kvm->arch.debugfs_dir);
   
-       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+       if (!kvm->arch.threads_indep)
                 kvm_hv_vm_deactivated();
   
         kvmppc_free_vcores(kvm);
@@@ -4193,6 -4266,7 +4271,7 @@@ static int kvmhv_configure_mmu(struct k
   {
         unsigned long lpcr;
         int radix;
+       int err;
   
         /* If not on a POWER9, reject it */
         if (!cpu_has_feature(CPU_FTR_ARCH_300))
@@@ -4202,12 -4276,8 +4281,8 @@@
         if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
                 return -EINVAL;
   
-       /* We can't change a guest to/from radix yet */
-       radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
-       if (radix != kvm_is_radix(kvm))
-               return -EINVAL;
- 
         /* GR (guest radix) bit in process_table field must match */
+       radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
         if (!!(cfg->process_table & PATB_GR) != radix)
                 return -EINVAL;
   
@@@ -4215,15 -4285,40 +4290,40 @@@
         if ((cfg->process_table & PRTS_MASK) > 24)
                 return -EINVAL;
   
+       /* We can change a guest to/from radix now, if the host is radix */
+       if (radix && !radix_enabled())
+               return -EINVAL;
+ 
         mutex_lock(&kvm->lock);
+       if (radix != kvm_is_radix(kvm)) {
+               if (kvm->arch.mmu_ready) {
+                       kvm->arch.mmu_ready = 0;
+                       /* order mmu_ready vs. vcpus_running */
+                       smp_mb();
+                       if (atomic_read(&kvm->arch.vcpus_running)) {
+                               kvm->arch.mmu_ready = 1;
+                               err = -EBUSY;
+                               goto out_unlock;
+                       }
+               }
+               if (radix)
+                       err = kvmppc_switch_mmu_to_radix(kvm);
+               else
+                       err = kvmppc_switch_mmu_to_hpt(kvm);
+               if (err)
+                       goto out_unlock;
+       }
+ 
         kvm->arch.process_table = cfg->process_table;
         kvmppc_setup_partition_table(kvm);
   
         lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
         kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
-       mutex_unlock(&kvm->lock);
+       err = 0;
   
-       return 0;
+  out_unlock:
+       mutex_unlock(&kvm->lock);
+       return err;
   }
   
   static struct kvmppc_ops kvm_ops_hv = {
@@@ -4365,4 -4460,3 +4465,3 @@@ module_exit(kvmppc_book3s_exit_hv)
   MODULE_LICENSE("GPL");
   MODULE_ALIAS_MISCDEV(KVM_MINOR);
   MODULE_ALIAS("devname:kvm");
- 
diff --combined arch/s390/kvm/interrupt.c

index 329b2843fee2161093f13ef55db77977f6c5449a,c8aacced23fb3d2fccec48290527b556620688e7..fa557372d600a0283663635ff198895cfad91709
--- 1/arch/s390/kvm/interrupt.c
--- 2/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@@ -213,6 -213,16 +213,16 @@@ static inline unsigned long pending_irq
                vcpu->arch.local_int.pending_irqs;
   }
   
+ static inline int isc_to_irq_type(unsigned long isc)
+ {
+       return IRQ_PEND_IO_ISC_0 + isc;
+ }
+ 
+ static inline int irq_type_to_isc(unsigned long irq_type)
+ {
+       return irq_type - IRQ_PEND_IO_ISC_0;
+ }
+ 
   static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
                                    unsigned long active_mask)
   {
@@@ -220,7 -230,7 +230,7 @@@
   
         for (i = 0; i <= MAX_ISC; i++)
                 if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
-                       active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+                       active_mask &= ~(1UL << (isc_to_irq_type(i)));
   
         return active_mask;
   }
@@@ -901,7 -911,7 +911,7 @@@ static int __must_check __deliver_io(st
         fi = &vcpu->kvm->arch.float_int;
   
         spin_lock(&fi->lock);
-       isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+       isc_list = &fi->lists[irq_type_to_isc(irq_type)];
         inti = list_first_entry_or_null(isc_list,
                                         struct kvm_s390_interrupt_info,
                                         list);
@@@ -1074,6 -1084,12 +1084,12 @@@ void kvm_s390_vcpu_wakeup(struct kvm_vc
          * in kvm_vcpu_block without having the waitqueue set (polling)
          */
         vcpu->valid_wakeup = true;
+       /*
+        * This is mostly to document, that the read in swait_active could
+        * be moved before other stores, leading to subtle races.
+        * All current users do not store or use an atomic like update
+        */
+       smp_mb__after_atomic();
         if (swait_active(&vcpu->wq)) {
                 /*
                  * The vcpu gave up the cpu voluntarily, mark it as a good
@@@ -1395,7 -1411,7 +1411,7 @@@ static struct kvm_s390_interrupt_info *
                 list_del_init(&iter->list);
                 fi->counters[FIRQ_CNTR_IO] -= 1;
                 if (list_empty(isc_list))
-                       clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+                       clear_bit(isc_to_irq_type(isc), &fi->pending_irqs);
                 spin_unlock(&fi->lock);
                 return iter;
         }
@@@ -1522,7 -1538,7 +1538,7 @@@ static int __inject_io(struct kvm *kvm
         isc = int_word_to_isc(inti->io.io_int_word);
         list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
         list_add_tail(&inti->list, list);
-       set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+       set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
         spin_unlock(&fi->lock);
         return 0;
   }
@@@ -2175,6 -2191,8 +2191,8 @@@ static int clear_io_irq(struct kvm *kvm
                 return -EINVAL;
         if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
                 return -EFAULT;
+       if (!schid)
+               return -EINVAL;
         kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
         /*
          * If userspace is conforming to the architecture, we can have at most
@@@ -2483,11 -2501,11 +2501,11 @@@ void kvm_s390_reinject_machine_check(st
   
         mci.val = mcck_info->mcic;
         if (mci.sr)
- -              cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
+ +              cr14 |= CR14_RECOVERY_SUBMASK;
         if (mci.dg)
- -              cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
+ +              cr14 |= CR14_DEGRADATION_SUBMASK;
         if (mci.w)
- -              cr14 |= MCCK_CR14_WARN_SUB_MASK;
+ +              cr14 |= CR14_WARNING_SUBMASK;
   
         mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
         mchk->cr14 = cr14;
diff --combined arch/s390/kvm/kvm-s390.c

index 4bc70afe0a104dcb2680cc7c8762983b952eb180,8f4b655f65d78f4d9bf0b57094f26a0b6c627d8b..98ad8b9e036093c8a784cfc0dfd3887e925c6357
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -395,6 -395,7 +395,7 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_S390_USER_INSTR0:
         case KVM_CAP_S390_CMMA_MIGRATION:
         case KVM_CAP_S390_AIS:
+       case KVM_CAP_S390_AIS_MIGRATION:
                 r = 1;
                 break;
         case KVM_CAP_S390_MEM_OP:
@@@ -3281,7 -3282,7 +3282,7 @@@ static void sync_regs(struct kvm_vcpu *
          */
         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
             test_kvm_facility(vcpu->kvm, 64) &&
- -          riccb->valid &&
+ +          riccb->v &&
             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
diff --combined arch/x86/include/asm/kvm_emulate.h

index ee23a43386a2908c140e96b8b85e8b82bc4fbd27,ad38c5e918ecc97d02301f3b69e667569e960d27..034caa1a084e360ff74c77e84116bb0de6e28dcd
--- 1/arch/x86/include/asm/kvm_emulate.h
--- 2/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   /******************************************************************************
    * x86_emulate.h
    *
@@@ -226,6 -225,8 +226,8 @@@ struct x86_emulate_ops 
   
         unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
         void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
+       int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
+ 
   };
   
   typedef u32 __attribute__((vector_size(16))) sse128_t;
diff --combined arch/x86/include/asm/kvm_host.h

index 9d7d856b2d8965f605412d2716717e7069cefdb9,7233445a20bdf2e7b3a4691b33f6c95316876242..1bfb99770c34197b6c0627897753d282b3e5c378
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -1061,6 -1061,11 +1061,11 @@@ struct kvm_x86_ops 
         void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
   
         void (*setup_mce)(struct kvm_vcpu *vcpu);
+ 
+       int (*smi_allowed)(struct kvm_vcpu *vcpu);
+       int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
+       int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+       int (*enable_smi_window)(struct kvm_vcpu *vcpu);
   };
   
   struct kvm_arch_async_pf {
@@@ -1419,11 -1424,14 +1424,14 @@@ static inline void kvm_arch_vcpu_block_
   static inline int kvm_cpu_get_apicid(int mps_cpu)
   {
   #ifdef CONFIG_X86_LOCAL_APIC
- -      return __default_cpu_present_to_apicid(mps_cpu);
+ +      return default_cpu_present_to_apicid(mps_cpu);
   #else
         WARN_ON_ONCE(1);
         return BAD_APICID;
   #endif
   }
   
+ #define put_smstate(type, buf, offset, val)                      \
+       *(type *)((buf) + (offset) - 0x7e00) = val
+ 
   #endif /* _ASM_X86_KVM_HOST_H */
diff --combined arch/x86/kvm/lapic.c

index 36c90d631096d8c4eea10291d2958d6fb1393b4b,a778f1ae2927df889aa4416aba59ef676011ecbd..943acbf00c69d8f423289116bc363159144f883a
--- 1/arch/x86/kvm/lapic.c
--- 2/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@@ -1301,14 -1301,42 +1301,42 @@@ static void update_divide_count(struct 
                                    apic->divide_count);
   }
   
+ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
+ {
+       /*
+        * Do not allow the guest to program periodic timers with small
+        * interval, since the hrtimers are not throttled by the host
+        * scheduler.
+        */
+       if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
+               s64 min_period = min_timer_period_us * 1000LL;
+ 
+               if (apic->lapic_timer.period < min_period) {
+                       pr_info_ratelimited(
+                           "kvm: vcpu %i: requested %lld ns "
+                           "lapic timer period limited to %lld ns\n",
+                           apic->vcpu->vcpu_id,
+                           apic->lapic_timer.period, min_period);
+                       apic->lapic_timer.period = min_period;
+               }
+       }
+ }
+ 
   static void apic_update_lvtt(struct kvm_lapic *apic)
   {
         u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
                         apic->lapic_timer.timer_mode_mask;
   
         if (apic->lapic_timer.timer_mode != timer_mode) {
+               if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
+                               APIC_LVT_TIMER_TSCDEADLINE)) {
+                       hrtimer_cancel(&apic->lapic_timer.timer);
+                       kvm_lapic_set_reg(apic, APIC_TMICT, 0);
+                       apic->lapic_timer.period = 0;
+                       apic->lapic_timer.tscdeadline = 0;
+               }
                 apic->lapic_timer.timer_mode = timer_mode;
-               hrtimer_cancel(&apic->lapic_timer.timer);
+               limit_periodic_timer_frequency(apic);
         }
   }
   
@@@ -1430,6 -1458,30 +1458,30 @@@ static void start_sw_period(struct kvm_
                 HRTIMER_MODE_ABS_PINNED);
   }
   
+ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
+ {
+       ktime_t now, remaining;
+       u64 ns_remaining_old, ns_remaining_new;
+ 
+       apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+               * APIC_BUS_CYCLE_NS * apic->divide_count;
+       limit_periodic_timer_frequency(apic);
+ 
+       now = ktime_get();
+       remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
+       if (ktime_to_ns(remaining) < 0)
+               remaining = 0;
+ 
+       ns_remaining_old = ktime_to_ns(remaining);
+       ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
+                                          apic->divide_count, old_divisor);
+ 
+       apic->lapic_timer.tscdeadline +=
+               nsec_to_cycles(apic->vcpu, ns_remaining_new) -
+               nsec_to_cycles(apic->vcpu, ns_remaining_old);
+       apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
+ }
+ 
   static bool set_target_expiration(struct kvm_lapic *apic)
   {
         ktime_t now;
@@@ -1439,27 -1491,13 +1491,13 @@@
         apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
                 * APIC_BUS_CYCLE_NS * apic->divide_count;
   
-       if (!apic->lapic_timer.period)
+       if (!apic->lapic_timer.period) {
+               apic->lapic_timer.tscdeadline = 0;
                 return false;
- 
-       /*
-        * Do not allow the guest to program periodic timers with small
-        * interval, since the hrtimers are not throttled by the host
-        * scheduler.
-        */
-       if (apic_lvtt_period(apic)) {
-               s64 min_period = min_timer_period_us * 1000LL;
- 
-               if (apic->lapic_timer.period < min_period) {
-                       pr_info_ratelimited(
-                           "kvm: vcpu %i: requested %lld ns "
-                           "lapic timer period limited to %lld ns\n",
-                           apic->vcpu->vcpu_id,
-                           apic->lapic_timer.period, min_period);
-                       apic->lapic_timer.period = min_period;
-               }
         }
   
+       limit_periodic_timer_frequency(apic);
+ 
         apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
                    PRIx64 ", "
                    "timer initial count 0x%x, period %lldns, "
@@@ -1515,6 -1553,9 +1553,9 @@@ static bool start_hv_timer(struct kvm_l
         if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
                 return false;
   
+       if (!ktimer->tscdeadline)
+               return false;
+ 
         r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
         if (r < 0)
                 return false;
@@@ -1738,13 -1779,21 +1779,21 @@@ int kvm_lapic_reg_write(struct kvm_lapi
                 start_apic_timer(apic);
                 break;
   
-       case APIC_TDCR:
+       case APIC_TDCR: {
+               uint32_t old_divisor = apic->divide_count;
+ 
                 if (val & 4)
                         apic_debug("KVM_WRITE:TDCR %x\n", val);
                 kvm_lapic_set_reg(apic, APIC_TDCR, val);
                 update_divide_count(apic);
+               if (apic->divide_count != old_divisor &&
+                               apic->lapic_timer.period) {
+                       hrtimer_cancel(&apic->lapic_timer.timer);
+                       update_target_expiration(apic, old_divisor);
+                       restart_apic_timer(apic);
+               }
                 break;
- 
+       }
         case APIC_ESR:
                 if (apic_x2apic_mode(apic) && val != 0) {
                         apic_debug("KVM_WRITE:ESR not zero %x\n", val);
@@@ -1992,11 -2041,6 +2041,11 @@@ void kvm_lapic_reset(struct kvm_vcpu *v
                                 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
         vcpu->arch.pv_eoi.msr_val = 0;
         apic_update_ppr(apic);
+ +      if (vcpu->arch.apicv_active) {
+ +              kvm_x86_ops->apicv_post_state_restore(vcpu);
+ +              kvm_x86_ops->hwapic_irr_update(vcpu, -1);
+ +              kvm_x86_ops->hwapic_isr_update(vcpu, -1);
+ +      }
   
         vcpu->arch.apic_arb_prio = 0;
         vcpu->arch.apic_attention = 0;
diff --combined arch/x86/kvm/mmu.c

index a119b361b8b7a9c916e4df7ecd9e69622e64c1b3,0b481cc9c72533d5eb5f9f43856d05354f39f880..e5e66e5c664057bb5cc5ad2660008ccbf19b69e5
--- 1/arch/x86/kvm/mmu.c
--- 2/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@@ -150,6 -150,20 +150,20 @@@ module_param(dbg, bool, 0644)
   /* make pte_list_desc fit well in cache line */
   #define PTE_LIST_EXT 3
   
+ /*
+  * Return values of handle_mmio_page_fault and mmu.page_fault:
+  * RET_PF_RETRY: let CPU fault again on the address.
+  * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
+  *
+  * For handle_mmio_page_fault only:
+  * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
+  */
+ enum {
+       RET_PF_RETRY = 0,
+       RET_PF_EMULATE = 1,
+       RET_PF_INVALID = 2,
+ };
+ 
   struct pte_list_desc {
         u64 *sptes[PTE_LIST_EXT];
         struct pte_list_desc *more;
@@@ -443,7 -457,7 +457,7 @@@ static u64 __update_clear_spte_slow(u6
   
   static u64 __get_spte_lockless(u64 *sptep)
   {
- -      return ACCESS_ONCE(*sptep);
+ +      return READ_ONCE(*sptep);
   }
   #else
   union split_spte {
@@@ -2424,7 -2438,7 +2438,7 @@@ static void __shadow_walk_next(struct k
   
   static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
   {
-       return __shadow_walk_next(iterator, *iterator->sptep);
+       __shadow_walk_next(iterator, *iterator->sptep);
   }
   
   static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
@@@ -2794,13 -2808,13 +2808,13 @@@ done
         return ret;
   }
   
- static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
-                        int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
-                        bool speculative, bool host_writable)
+ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+                       int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
+                       bool speculative, bool host_writable)
   {
         int was_rmapped = 0;
         int rmap_count;
-       bool emulate = false;
+       int ret = RET_PF_RETRY;
   
         pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
                  *sptep, write_fault, gfn);
@@@ -2830,12 -2844,12 +2844,12 @@@
         if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
               true, host_writable)) {
                 if (write_fault)
-                       emulate = true;
+                       ret = RET_PF_EMULATE;
                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
         }
   
         if (unlikely(is_mmio_spte(*sptep)))
-               emulate = true;
+               ret = RET_PF_EMULATE;
   
         pgprintk("%s: setting spte %llx\n", __func__, *sptep);
         pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
@@@ -2855,7 -2869,7 +2869,7 @@@
   
         kvm_release_pfn_clean(pfn);
   
-       return emulate;
+       return ret;
   }
   
   static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
@@@ -2994,14 -3008,13 +3008,13 @@@ static int kvm_handle_bad_page(struct k
          * Do not cache the mmio info caused by writing the readonly gfn
          * into the spte otherwise read access on readonly gfn also can
          * caused mmio page fault and treat it as mmio access.
-        * Return 1 to tell kvm to emulate it.
          */
         if (pfn == KVM_PFN_ERR_RO_FAULT)
-               return 1;
+               return RET_PF_EMULATE;
   
         if (pfn == KVM_PFN_ERR_HWPOISON) {
                 kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
-               return 0;
+               return RET_PF_RETRY;
         }
   
         return -EFAULT;
@@@ -3286,13 -3299,13 +3299,13 @@@ static int nonpaging_map(struct kvm_vcp
         }
   
         if (fast_page_fault(vcpu, v, level, error_code))
-               return 0;
+               return RET_PF_RETRY;
   
         mmu_seq = vcpu->kvm->mmu_notifier_seq;
         smp_rmb();
   
         if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
-               return 0;
+               return RET_PF_RETRY;
   
         if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
                 return r;
@@@ -3312,7 -3325,7 +3325,7 @@@
   out_unlock:
         spin_unlock(&vcpu->kvm->mmu_lock);
         kvm_release_pfn_clean(pfn);
-       return 0;
+       return RET_PF_RETRY;
   }
   
   
@@@ -3659,54 -3672,38 +3672,38 @@@ exit
         return reserved;
   }
   
- /*
-  * Return values of handle_mmio_page_fault:
-  * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
-  *                    directly.
-  * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
-  *                    fault path update the mmio spte.
-  * RET_MMIO_PF_RETRY: let CPU fault again on the address.
-  * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
-  */
- enum {
-       RET_MMIO_PF_EMULATE = 1,
-       RET_MMIO_PF_INVALID = 2,
-       RET_MMIO_PF_RETRY = 0,
-       RET_MMIO_PF_BUG = -1
- };
- 
   static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
   {
         u64 spte;
         bool reserved;
   
         if (mmio_info_in_cache(vcpu, addr, direct))
-               return RET_MMIO_PF_EMULATE;
+               return RET_PF_EMULATE;
   
         reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
         if (WARN_ON(reserved))
-               return RET_MMIO_PF_BUG;
+               return -EINVAL;
   
         if (is_mmio_spte(spte)) {
                 gfn_t gfn = get_mmio_spte_gfn(spte);
                 unsigned access = get_mmio_spte_access(spte);
   
                 if (!check_mmio_spte(vcpu, spte))
-                       return RET_MMIO_PF_INVALID;
+                       return RET_PF_INVALID;
   
                 if (direct)
                         addr = 0;
   
                 trace_handle_mmio_page_fault(addr, gfn, access);
                 vcpu_cache_mmio_info(vcpu, addr, gfn, access);
-               return RET_MMIO_PF_EMULATE;
+               return RET_PF_EMULATE;
         }
   
         /*
          * If the page table is zapped by other cpus, let CPU fault again on
          * the address.
          */
-       return RET_MMIO_PF_RETRY;
+       return RET_PF_RETRY;
   }
   EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
   
@@@ -3756,7 -3753,7 +3753,7 @@@ static int nonpaging_page_fault(struct 
         pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
   
         if (page_fault_handle_page_track(vcpu, error_code, gfn))
-               return 1;
+               return RET_PF_EMULATE;
   
         r = mmu_topup_memory_caches(vcpu);
         if (r)
@@@ -3820,8 -3817,7 +3817,7 @@@ static bool try_async_pf(struct kvm_vcp
   }
   
   int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
-                               u64 fault_address, char *insn, int insn_len,
-                               bool need_unprotect)
+                               u64 fault_address, char *insn, int insn_len)
   {
         int r = 1;
   
@@@ -3829,7 -3825,7 +3825,7 @@@
         default:
                 trace_kvm_page_fault(fault_address, error_code);
   
-               if (need_unprotect && kvm_event_needs_reinjection(vcpu))
+               if (kvm_event_needs_reinjection(vcpu))
                         kvm_mmu_unprotect_page_virt(vcpu, fault_address);
                 r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
                                 insn_len);
@@@ -3876,7 -3872,7 +3872,7 @@@ static int tdp_page_fault(struct kvm_vc
         MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
   
         if (page_fault_handle_page_track(vcpu, error_code, gfn))
-               return 1;
+               return RET_PF_EMULATE;
   
         r = mmu_topup_memory_caches(vcpu);
         if (r)
@@@ -3893,13 -3889,13 +3889,13 @@@
         }
   
         if (fast_page_fault(vcpu, gpa, level, error_code))
-               return 0;
+               return RET_PF_RETRY;
   
         mmu_seq = vcpu->kvm->mmu_notifier_seq;
         smp_rmb();
   
         if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
-               return 0;
+               return RET_PF_RETRY;
   
         if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
                 return r;
@@@ -3919,7 -3915,7 +3915,7 @@@
   out_unlock:
         spin_unlock(&vcpu->kvm->mmu_lock);
         kvm_release_pfn_clean(pfn);
-       return 0;
+       return RET_PF_RETRY;
   }
   
   static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@@ -4819,7 -4815,7 +4815,7 @@@ static void kvm_mmu_pte_write(struct kv
          * If we don't have indirect shadow pages, it means no page is
          * write-protected, so we can exit simply.
          */
- -      if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
+ +      if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
                 return;
   
         remote_flush = local_flush = false;
@@@ -4918,25 -4914,25 +4914,25 @@@ int kvm_mmu_page_fault(struct kvm_vcpu 
                 vcpu->arch.gpa_val = cr2;
         }
   
+       r = RET_PF_INVALID;
         if (unlikely(error_code & PFERR_RSVD_MASK)) {
                 r = handle_mmio_page_fault(vcpu, cr2, direct);
-               if (r == RET_MMIO_PF_EMULATE) {
+               if (r == RET_PF_EMULATE) {
                         emulation_type = 0;
                         goto emulate;
                 }
-               if (r == RET_MMIO_PF_RETRY)
-                       return 1;
-               if (r < 0)
-                       return r;
-               /* Must be RET_MMIO_PF_INVALID.  */
         }
   
-       r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
-                                     false);
+       if (r == RET_PF_INVALID) {
+               r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
+                                             false);
+               WARN_ON(r == RET_PF_INVALID);
+       }
+ 
+       if (r == RET_PF_RETRY)
+               return 1;
         if (r < 0)
                 return r;
-       if (!r)
-               return 1;
   
         /*
          * Before emulating the instruction, check if the error code
@@@ -4993,8 -4989,7 +4989,7 @@@ EXPORT_SYMBOL_GPL(kvm_disable_tdp)
   static void free_mmu_pages(struct kvm_vcpu *vcpu)
   {
         free_page((unsigned long)vcpu->arch.mmu.pae_root);
-       if (vcpu->arch.mmu.lm_root != NULL)
-               free_page((unsigned long)vcpu->arch.mmu.lm_root);
+       free_page((unsigned long)vcpu->arch.mmu.lm_root);
   }
   
   static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@@ -5464,10 -5459,8 +5459,8 @@@ static struct shrinker mmu_shrinker = 
   
   static void mmu_destroy_caches(void)
   {
-       if (pte_list_desc_cache)
-               kmem_cache_destroy(pte_list_desc_cache);
-       if (mmu_page_header_cache)
-               kmem_cache_destroy(mmu_page_header_cache);
+       kmem_cache_destroy(pte_list_desc_cache);
+       kmem_cache_destroy(mmu_page_header_cache);
   }
   
   int kvm_mmu_module_init(void)
@@@ -5476,13 -5469,13 +5469,13 @@@
   
         pte_list_desc_cache = kmem_cache_create("pte_list_desc",
                                             sizeof(struct pte_list_desc),
-                                           0, 0, NULL);
+                                           0, SLAB_ACCOUNT, NULL);
         if (!pte_list_desc_cache)
                 goto nomem;
   
         mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
                                                   sizeof(struct kvm_mmu_page),
-                                                 0, 0, NULL);
+                                                 0, SLAB_ACCOUNT, NULL);
         if (!mmu_page_header_cache)
                 goto nomem;
   
diff --combined arch/x86/kvm/mmu.h

index efc857615d8ea48305be79ba4205371f70837540,1092302aa16a5404a9f8f9fabf0a669faa109343..5b408c0ad6121223db18caf757e18f7bebd38300
--- 1/arch/x86/kvm/mmu.h
--- 2/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef __KVM_X86_MMU_H
   #define __KVM_X86_MMU_H
   
@@@ -66,8 -65,7 +66,7 @@@ void kvm_init_shadow_ept_mmu(struct kvm
                              bool accessed_dirty);
   bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
   int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
-                               u64 fault_address, char *insn, int insn_len,
-                               bool need_unprotect);
+                               u64 fault_address, char *insn, int insn_len);
   
   static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
   {
diff --combined arch/x86/kvm/vmx.c

index a6f4f095f8f4eb4aa5b4bae2a21dd66cccd824e7,e6c8ffa849683161734ee25af2e14752542a3500..7c3522a989d0b37713a802be82ee1f265fe64c9a
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -486,6 -486,14 +486,14 @@@ struct nested_vmx 
         u64 nested_vmx_cr4_fixed1;
         u64 nested_vmx_vmcs_enum;
         u64 nested_vmx_vmfunc_controls;
+ 
+       /* SMM related state */
+       struct {
+               /* in VMX operation on SMM entry? */
+               bool vmxon;
+               /* in guest mode on SMM entry? */
+               bool guest_mode;
+       } smm;
   };
   
   #define POSTED_INTR_ON  0
@@@ -900,16 -908,13 +908,13 @@@ static bool nested_ept_ad_enabled(struc
   static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
   static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
   static bool vmx_xsaves_supported(void);
- static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
   static void vmx_set_segment(struct kvm_vcpu *vcpu,
                             struct kvm_segment *var, int seg);
   static void vmx_get_segment(struct kvm_vcpu *vcpu,
                             struct kvm_segment *var, int seg);
   static bool guest_state_valid(struct kvm_vcpu *vcpu);
   static u32 vmx_segment_access_rights(struct kvm_segment *var);
- static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
   static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
- static int alloc_identity_pagetable(struct kvm *kvm);
   static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
   static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
   static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
@@@ -1598,18 -1603,15 +1603,15 @@@ static inline void vpid_sync_context(in
   
   static inline void ept_sync_global(void)
   {
-       if (cpu_has_vmx_invept_global())
-               __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
+       __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
   }
   
   static inline void ept_sync_context(u64 eptp)
   {
-       if (enable_ept) {
-               if (cpu_has_vmx_invept_context())
-                       __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
-               else
-                       ept_sync_global();
-       }
+       if (cpu_has_vmx_invept_context())
+               __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
+       else
+               ept_sync_global();
   }
   
   static __always_inline void vmcs_check16(unsigned long field)
@@@ -2831,8 -2833,7 +2833,7 @@@ static void nested_vmx_setup_ctls_msrs(
                                 SECONDARY_EXEC_ENABLE_PML;
                         vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
                 }
-       } else
-               vmx->nested.nested_vmx_ept_caps = 0;
+       }
   
         if (cpu_has_vmx_vmfunc()) {
                 vmx->nested.nested_vmx_secondary_ctls_high |=
@@@ -2841,8 -2842,9 +2842,9 @@@
                  * Advertise EPTP switching unconditionally
                  * since we emulate it
                  */
-               vmx->nested.nested_vmx_vmfunc_controls =
-                       VMX_VMFUNC_EPTP_SWITCHING;
+               if (enable_ept)
+                       vmx->nested.nested_vmx_vmfunc_controls =
+                               VMX_VMFUNC_EPTP_SWITCHING;
         }
   
         /*
@@@ -2856,8 -2858,7 +2858,7 @@@
                         SECONDARY_EXEC_ENABLE_VPID;
                 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
                         VMX_VPID_EXTENT_SUPPORTED_MASK;
-       } else
-               vmx->nested.nested_vmx_vpid_caps = 0;
+       }
   
         if (enable_unrestricted_guest)
                 vmx->nested.nested_vmx_secondary_ctls_high |=
@@@ -3544,7 -3545,8 +3545,8 @@@ static int hardware_enable(void
                 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
         }
         kvm_cpu_vmxon(phys_addr);
-       ept_sync_global();
+       if (enable_ept)
+               ept_sync_global();
   
         return 0;
   }
@@@ -3657,8 -3659,8 +3659,8 @@@ static __init int setup_vmcs_config(str
                         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                         SECONDARY_EXEC_SHADOW_VMCS |
                         SECONDARY_EXEC_XSAVES |
-                       SECONDARY_EXEC_RDSEED |
-                       SECONDARY_EXEC_RDRAND |
+                       SECONDARY_EXEC_RDSEED_EXITING |
+                       SECONDARY_EXEC_RDRAND_EXITING |
                         SECONDARY_EXEC_ENABLE_PML |
                         SECONDARY_EXEC_TSC_SCALING |
                         SECONDARY_EXEC_ENABLE_VMFUNC;
@@@ -3679,14 -3681,25 +3681,25 @@@
                                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
   
+       rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
+               &vmx_capability.ept, &vmx_capability.vpid);
+ 
         if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
                 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
                    enabled */
                 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
                                              CPU_BASED_CR3_STORE_EXITING |
                                              CPU_BASED_INVLPG_EXITING);
-               rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
-                     vmx_capability.ept, vmx_capability.vpid);
+       } else if (vmx_capability.ept) {
+               vmx_capability.ept = 0;
+               pr_warn_once("EPT CAP should not exist if not support "
+                               "1-setting enable EPT VM-execution control\n");
+       }
+       if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
+               vmx_capability.vpid) {
+               vmx_capability.vpid = 0;
+               pr_warn_once("VPID CAP should not exist if not support "
+                               "1-setting enable VPID VM-execution control\n");
         }
   
         min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
@@@ -4781,18 -4794,18 +4794,18 @@@ static int init_rmode_identity_map(stru
         kvm_pfn_t identity_map_pfn;
         u32 tmp;
   
-       if (!enable_ept)
-               return 0;
- 
         /* Protect kvm->arch.ept_identity_pagetable_done. */
         mutex_lock(&kvm->slots_lock);
   
         if (likely(kvm->arch.ept_identity_pagetable_done))
                 goto out2;
   
+       if (!kvm->arch.ept_identity_map_addr)
+               kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
         identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
   
-       r = alloc_identity_pagetable(kvm);
+       r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+                                   kvm->arch.ept_identity_map_addr, PAGE_SIZE);
         if (r < 0)
                 goto out2;
   
@@@ -4864,20 -4877,6 +4877,6 @@@ out
         return r;
   }
   
- static int alloc_identity_pagetable(struct kvm *kvm)
- {
-       /* Called with kvm->slots_lock held. */
- 
-       int r = 0;
- 
-       BUG_ON(kvm->arch.ept_identity_pagetable_done);
- 
-       r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-                                   kvm->arch.ept_identity_map_addr, PAGE_SIZE);
- 
-       return r;
- }
- 
   static int allocate_vpid(void)
   {
         int vpid;
@@@ -5282,13 -5281,13 +5281,13 @@@ static u32 vmx_exec_control(struct vcpu
   static bool vmx_rdrand_supported(void)
   {
         return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_RDRAND;
+               SECONDARY_EXEC_RDRAND_EXITING;
   }
   
   static bool vmx_rdseed_supported(void)
   {
         return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_RDSEED;
+               SECONDARY_EXEC_RDSEED_EXITING;
   }
   
   static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
@@@ -5382,30 -5381,30 +5381,30 @@@
         if (vmx_rdrand_supported()) {
                 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
                 if (rdrand_enabled)
-                       exec_control &= ~SECONDARY_EXEC_RDRAND;
+                       exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
   
                 if (nested) {
                         if (rdrand_enabled)
                                 vmx->nested.nested_vmx_secondary_ctls_high |=
-                                       SECONDARY_EXEC_RDRAND;
+                                       SECONDARY_EXEC_RDRAND_EXITING;
                         else
                                 vmx->nested.nested_vmx_secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_RDRAND;
+                                       ~SECONDARY_EXEC_RDRAND_EXITING;
                 }
         }
   
         if (vmx_rdseed_supported()) {
                 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
                 if (rdseed_enabled)
-                       exec_control &= ~SECONDARY_EXEC_RDSEED;
+                       exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
   
                 if (nested) {
                         if (rdseed_enabled)
                                 vmx->nested.nested_vmx_secondary_ctls_high |=
-                                       SECONDARY_EXEC_RDSEED;
+                                       SECONDARY_EXEC_RDSEED_EXITING;
                         else
                                 vmx->nested.nested_vmx_secondary_ctls_high &=
-                                       ~SECONDARY_EXEC_RDSEED;
+                                       ~SECONDARY_EXEC_RDSEED_EXITING;
                 }
         }
   
@@@ -5426,7 -5425,7 +5425,7 @@@ static void ept_set_mmio_spte_mask(void
   /*
    * Sets up the vmcs for emulated real mode.
    */
- static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
   {
   #ifdef CONFIG_X86_64
         unsigned long a;
@@@ -5539,8 -5538,6 +5538,6 @@@
                 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
                 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
         }
- 
-       return 0;
   }
   
   static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@@ -5604,6 -5601,8 +5601,8 @@@
         vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
         vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
         vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+       if (kvm_mpx_supported())
+               vmcs_write64(GUEST_BNDCFGS, 0);
   
         setup_msrs(vmx);
   
@@@ -5619,6 -5618,9 +5618,6 @@@
   
         kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
   
- -      if (kvm_vcpu_apicv_active(vcpu))
- -              memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
- -
         if (vmx->vpid != 0)
                 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
   
@@@ -5912,8 -5914,7 +5911,7 @@@ static int handle_exception(struct kvm_
                 cr2 = vmcs_readl(EXIT_QUALIFICATION);
                 /* EPT won't cause page fault directly */
                 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
-               return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
-                               true);
+               return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
         }
   
         ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@@ -6747,16 -6748,14 +6745,14 @@@ static __init int hardware_setup(void
   
         if (!cpu_has_vmx_ept() ||
             !cpu_has_vmx_ept_4levels() ||
-           !cpu_has_vmx_ept_mt_wb()) {
+           !cpu_has_vmx_ept_mt_wb() ||
+           !cpu_has_vmx_invept_global())
                 enable_ept = 0;
-               enable_unrestricted_guest = 0;
-               enable_ept_ad_bits = 0;
-       }
   
         if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
                 enable_ept_ad_bits = 0;
   
-       if (!cpu_has_vmx_unrestricted_guest())
+       if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
                 enable_unrestricted_guest = 0;
   
         if (!cpu_has_vmx_flexpriority())
@@@ -6776,8 -6775,13 +6772,13 @@@
         if (enable_ept && !cpu_has_vmx_ept_2m_page())
                 kvm_disable_largepages();
   
-       if (!cpu_has_vmx_ple())
+       if (!cpu_has_vmx_ple()) {
                 ple_gap = 0;
+               ple_window = 0;
+               ple_window_grow = 0;
+               ple_window_max = 0;
+               ple_window_shrink = 0;
+       }
   
         if (!cpu_has_vmx_apicv()) {
                 enable_apicv = 0;
@@@ -8415,9 -8419,9 +8416,9 @@@ static bool nested_vmx_exit_reflected(s
         case EXIT_REASON_RDPMC:
                 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
         case EXIT_REASON_RDRAND:
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
         case EXIT_REASON_RDSEED:
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
         case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
                 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
         case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@@ -9475,7 -9479,6 +9476,6 @@@ static void vmx_switch_vmcs(struct kvm_
         vmx->loaded_vmcs = vmcs;
         vmx_vcpu_put(vcpu);
         vmx_vcpu_load(vcpu, cpu);
-       vcpu->cpu = cpu;
         put_cpu();
   }
   
@@@ -9556,11 -9559,9 +9556,9 @@@ static struct kvm_vcpu *vmx_create_vcpu
         cpu = get_cpu();
         vmx_vcpu_load(&vmx->vcpu, cpu);
         vmx->vcpu.cpu = cpu;
-       err = vmx_vcpu_setup(vmx);
+       vmx_vcpu_setup(vmx);
         vmx_vcpu_put(&vmx->vcpu);
         put_cpu();
-       if (err)
-               goto free_vmcs;
         if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
                 err = alloc_apic_access_page(kvm);
                 if (err)
@@@ -9568,9 -9569,6 +9566,6 @@@
         }
   
         if (enable_ept) {
-               if (!kvm->arch.ept_identity_map_addr)
-                       kvm->arch.ept_identity_map_addr =
-                               VMX_EPT_IDENTITY_PAGETABLE_ADDR;
                 err = init_rmode_identity_map(kvm);
                 if (err)
                         goto free_vmcs;
@@@ -11325,6 -11323,8 +11320,8 @@@ static void load_vmcs12_host_state(stru
         vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
         vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
         vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+       vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
+       vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
   
         /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
         if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
@@@ -11421,8 -11421,11 +11418,11 @@@ static void nested_vmx_vmexit(struct kv
         leave_guest_mode(vcpu);
   
         if (likely(!vmx->fail)) {
-               prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
-                              exit_qualification);
+               if (exit_reason == -1)
+                       sync_vmcs12(vcpu, vmcs12);
+               else
+                       prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
+                                      exit_qualification);
   
                 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
                                          vmcs12->vm_exit_msr_store_count))
@@@ -11486,7 -11489,7 +11486,7 @@@
          */
         kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
   
-       if (enable_shadow_vmcs)
+       if (enable_shadow_vmcs && exit_reason != -1)
                 vmx->nested.sync_shadow_vmcs = true;
   
         /* in case we halted in L2 */
@@@ -11510,12 -11513,13 +11510,13 @@@
                                 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
                 }
   
-               trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
-                                              vmcs12->exit_qualification,
-                                              vmcs12->idt_vectoring_info_field,
-                                              vmcs12->vm_exit_intr_info,
-                                              vmcs12->vm_exit_intr_error_code,
-                                              KVM_ISA_VMX);
+               if (exit_reason != -1)
+                       trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
+                                                      vmcs12->exit_qualification,
+                                                      vmcs12->idt_vectoring_info_field,
+                                                      vmcs12->vm_exit_intr_info,
+                                                      vmcs12->vm_exit_intr_error_code,
+                                                      KVM_ISA_VMX);
   
                 load_vmcs12_host_state(vcpu, vmcs12);
   
@@@ -11938,6 -11942,54 +11939,54 @@@ static void vmx_setup_mce(struct kvm_vc
                         ~FEATURE_CONTROL_LMCE;
   }
   
+ static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
+ {
+       /* we need a nested vmexit to enter SMM, postpone if run is pending */
+       if (to_vmx(vcpu)->nested.nested_run_pending)
+               return 0;
+       return 1;
+ }
+ 
+ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 
+       vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
+       if (vmx->nested.smm.guest_mode)
+               nested_vmx_vmexit(vcpu, -1, 0, 0);
+ 
+       vmx->nested.smm.vmxon = vmx->nested.vmxon;
+       vmx->nested.vmxon = false;
+       return 0;
+ }
+ 
+ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int ret;
+ 
+       if (vmx->nested.smm.vmxon) {
+               vmx->nested.vmxon = true;
+               vmx->nested.smm.vmxon = false;
+       }
+ 
+       if (vmx->nested.smm.guest_mode) {
+               vcpu->arch.hflags &= ~HF_SMM_MASK;
+               ret = enter_vmx_non_root_mode(vcpu, false);
+               vcpu->arch.hflags |= HF_SMM_MASK;
+               if (ret)
+                       return ret;
+ 
+               vmx->nested.smm.guest_mode = false;
+       }
+       return 0;
+ }
+ 
+ static int enable_smi_window(struct kvm_vcpu *vcpu)
+ {
+       return 0;
+ }
+ 
   static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .cpu_has_kvm_support = cpu_has_kvm_support,
         .disabled_by_bios = vmx_disabled_by_bios,
@@@ -12063,6 -12115,11 +12112,11 @@@
   #endif
   
         .setup_mce = vmx_setup_mce,
+ 
+       .smi_allowed = vmx_smi_allowed,
+       .pre_enter_smm = vmx_pre_enter_smm,
+       .pre_leave_smm = vmx_pre_leave_smm,
+       .enable_smi_window = enable_smi_window,
   };
   
   static int __init vmx_init(void)
diff --combined drivers/clocksource/arm_arch_timer.c

index 538bfa8ba9b4a321beb3157ad74cb41723eb44a0,061476e92db724f9fcce3acd61483260f365a5fd..57cb2f00fc07ce7f5ffb526bd9bb03ed11287626
--- 1/drivers/clocksource/arm_arch_timer.c
--- 2/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@@ -77,7 -77,6 +77,7 @@@ static bool arch_timer_mem_use_virtual
   static bool arch_counter_suspend_stop;
   static bool vdso_default = true;
   
+ +static cpumask_t evtstrm_available = CPU_MASK_NONE;
   static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
   
   static int __init early_evtstrm_cfg(char *buf)
@@@ -159,6 -158,7 +159,7 @@@ u32 arch_timer_reg_read(int access, enu
    * if we don't have the cp15 accessors we won't have a problem.
    */
   u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct;
+ EXPORT_SYMBOL_GPL(arch_timer_read_counter);
   
   static u64 arch_counter_read(struct clocksource *cs)
   {
@@@ -218,6 -218,11 +219,11 @@@ static u32 notrace fsl_a008585_read_cnt
         return __fsl_a008585_read_reg(cntv_tval_el0);
   }
   
+ static u64 notrace fsl_a008585_read_cntpct_el0(void)
+ {
+       return __fsl_a008585_read_reg(cntpct_el0);
+ }
+ 
   static u64 notrace fsl_a008585_read_cntvct_el0(void)
   {
         return __fsl_a008585_read_reg(cntvct_el0);
@@@ -259,6 -264,11 +265,11 @@@ static u32 notrace hisi_161010101_read_
         return __hisi_161010101_read_reg(cntv_tval_el0);
   }
   
+ static u64 notrace hisi_161010101_read_cntpct_el0(void)
+ {
+       return __hisi_161010101_read_reg(cntpct_el0);
+ }
+ 
   static u64 notrace hisi_161010101_read_cntvct_el0(void)
   {
         return __hisi_161010101_read_reg(cntvct_el0);
@@@ -289,6 -299,15 +300,15 @@@ static struct ate_acpi_oem_info hisi_16
   #endif
   
   #ifdef CONFIG_ARM64_ERRATUM_858921
+ static u64 notrace arm64_858921_read_cntpct_el0(void)
+ {
+       u64 old, new;
+ 
+       old = read_sysreg(cntpct_el0);
+       new = read_sysreg(cntpct_el0);
+       return (((old ^ new) >> 32) & 1) ? old : new;
+ }
+ 
   static u64 notrace arm64_858921_read_cntvct_el0(void)
   {
         u64 old, new;
@@@ -300,7 -319,8 +320,7 @@@
   #endif
   
   #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
- -DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *,
- -             timer_unstable_counter_workaround);
+ +DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
   EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
   
   DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
@@@ -310,16 -330,19 +330,19 @@@ static void erratum_set_next_event_tval
                                                 struct clock_event_device *clk)
   {
         unsigned long ctrl;
-       u64 cval = evt + arch_counter_get_cntvct();
+       u64 cval;
   
         ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
         ctrl |= ARCH_TIMER_CTRL_ENABLE;
         ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
   
-       if (access == ARCH_TIMER_PHYS_ACCESS)
+       if (access == ARCH_TIMER_PHYS_ACCESS) {
+               cval = evt + arch_counter_get_cntpct();
                 write_sysreg(cval, cntp_cval_el0);
-       else
+       } else {
+               cval = evt + arch_counter_get_cntvct();
                 write_sysreg(cval, cntv_cval_el0);
+       }
   
         arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
   }
@@@ -346,6 -369,7 +369,7 @@@ static const struct arch_timer_erratum_
                 .desc = "Freescale erratum a005858",
                 .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0,
                 .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0,
+               .read_cntpct_el0 = fsl_a008585_read_cntpct_el0,
                 .read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
                 .set_next_event_phys = erratum_set_next_event_tval_phys,
                 .set_next_event_virt = erratum_set_next_event_tval_virt,
@@@ -358,6 -382,7 +382,7 @@@
                 .desc = "HiSilicon erratum 161010101",
                 .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
                 .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
+               .read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
                 .read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
                 .set_next_event_phys = erratum_set_next_event_tval_phys,
                 .set_next_event_virt = erratum_set_next_event_tval_virt,
@@@ -368,6 -393,7 +393,7 @@@
                 .desc = "HiSilicon erratum 161010101",
                 .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
                 .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
+               .read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
                 .read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
                 .set_next_event_phys = erratum_set_next_event_tval_phys,
                 .set_next_event_virt = erratum_set_next_event_tval_virt,
@@@ -378,6 -404,7 +404,7 @@@
                 .match_type = ate_match_local_cap_id,
                 .id = (void *)ARM64_WORKAROUND_858921,
                 .desc = "ARM erratum 858921",
+               .read_cntpct_el0 = arm64_858921_read_cntpct_el0,
                 .read_cntvct_el0 = arm64_858921_read_cntvct_el0,
         },
   #endif
@@@ -740,7 -767,6 +767,7 @@@ static void arch_timer_evtstrm_enable(i
   #ifdef CONFIG_COMPAT
         compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
   #endif
+ +      cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
   }
   
   static void arch_timer_configure_evtstream(void)
@@@ -865,16 -891,6 +892,16 @@@ u32 arch_timer_get_rate(void
         return arch_timer_rate;
   }
   
+ +bool arch_timer_evtstrm_available(void)
+ +{
+ +      /*
+ +       * We might get called from a preemptible context. This is fine
+ +       * because availability of the event stream should be always the same
+ +       * for a preemptible context and context where we might resume a task.
+ +       */
+ +      return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
+ +}
+ +
   static u64 arch_counter_get_cntvct_mem(void)
   {
         u32 vct_lo, vct_hi, tmp_hi;
@@@ -901,7 -917,7 +928,7 @@@ static void __init arch_counter_registe
   
         /* Register the CP15 based counter if we have one */
         if (type & ARCH_TIMER_TYPE_CP15) {
-               if (IS_ENABLED(CONFIG_ARM64) ||
+               if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
                     arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
                         arch_timer_read_counter = arch_counter_get_cntvct;
                 else
@@@ -940,8 -956,6 +967,8 @@@ static int arch_timer_dying_cpu(unsigne
   {
         struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
   
+ +      cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+ +
         arch_timer_stop(clk);
         return 0;
   }
@@@ -951,16 -965,10 +978,16 @@@ static DEFINE_PER_CPU(unsigned long, sa
   static int arch_timer_cpu_pm_notify(struct notifier_block *self,
                                     unsigned long action, void *hcpu)
   {
- -      if (action == CPU_PM_ENTER)
+ +      if (action == CPU_PM_ENTER) {
                 __this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
- -      else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT)
+ +
+ +              cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+ +      } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
                 arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
+ +
+ +              if (elf_hwcap & HWCAP_EVTSTRM)
+ +                      cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
+ +      }
         return NOTIFY_OK;
   }
   
@@@ -1036,6 -1044,7 +1063,6 @@@ static int __init arch_timer_register(v
         if (err)
                 goto out_unreg_notify;
   
- -
         /* Register and immediately configure the timer on the boot CPU */
         err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
                                 "clockevents/arm/arch_timer:starting",
@@@ -1286,6 -1295,10 +1313,6 @@@ arch_timer_mem_find_best_frame(struct a
   
         iounmap(cntctlbase);
   
- -      if (!best_frame)
- -              pr_err("Unable to find a suitable frame in timer @ %pa\n",
- -                      &timer_mem->cntctlbase);
- -
         return best_frame;
   }
   
@@@ -1386,8 -1399,6 +1413,8 @@@ static int __init arch_timer_mem_of_ini
   
         frame = arch_timer_mem_find_best_frame(timer_mem);
         if (!frame) {
+ +              pr_err("Unable to find a suitable frame in timer @ %pa\n",
+ +                      &timer_mem->cntctlbase);
                 ret = -EINVAL;
                 goto out;
         }
@@@ -1436,7 -1447,7 +1463,7 @@@ arch_timer_mem_verify_cntfrq(struct arc
   static int __init arch_timer_mem_acpi_init(int platform_timer_count)
   {
         struct arch_timer_mem *timers, *timer;
- -      struct arch_timer_mem_frame *frame;
+ +      struct arch_timer_mem_frame *frame, *best_frame = NULL;
         int timer_count, i, ret = 0;
   
         timers = kcalloc(platform_timer_count, sizeof(*timers),
@@@ -1448,6 -1459,14 +1475,6 @@@
         if (ret || !timer_count)
                 goto out;
   
- -      for (i = 0; i < timer_count; i++) {
- -              ret = arch_timer_mem_verify_cntfrq(&timers[i]);
- -              if (ret) {
- -                      pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
- -                      goto out;
- -              }
- -      }
- -
         /*
          * While unlikely, it's theoretically possible that none of the frames
          * in a timer expose the combination of feature we want.
@@@ -1456,26 -1475,12 +1483,26 @@@
                 timer = &timers[i];
   
                 frame = arch_timer_mem_find_best_frame(timer);
- -              if (frame)
- -                      break;
+ +              if (!best_frame)
+ +                      best_frame = frame;
+ +
+ +              ret = arch_timer_mem_verify_cntfrq(timer);
+ +              if (ret) {
+ +                      pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
+ +                      goto out;
+ +              }
+ +
+ +              if (!best_frame) /* implies !frame */
+ +                      /*
+ +                       * Only complain about missing suitable frames if we
+ +                       * haven't already found one in a previous iteration.
+ +                       */
+ +                      pr_err("Unable to find a suitable frame in timer @ %pa\n",
+ +                              &timer->cntctlbase);
         }
   
- -      if (frame)
- -              ret = arch_timer_mem_frame_register(frame);
+ +      if (best_frame)
+ +              ret = arch_timer_mem_frame_register(best_frame);
   out:
         kfree(timers);
         return ret;
diff --combined drivers/irqchip/irq-gic-v3.c

index b54b55597ffb9c8351ff98e97dc05535503ec493,854334a6f225488f2cd4dd89f47e61e1fd086a40..17221143f5057ce35f84f6021bf972f32b61bc48
--- 1/drivers/irqchip/irq-gic-v3.c
--- 2/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@@ -55,7 -55,6 +55,7 @@@ struct gic_chip_data 
         struct irq_domain       *domain;
         u64                     redist_stride;
         u32                     nr_redist_regions;
+ +      bool                    has_rss;
         unsigned int            irq_nr;
         struct partition_desc   *ppi_descs[16];
   };
@@@ -64,9 -63,7 +64,9 @@@ static struct gic_chip_data gic_data __
   static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
   
   static struct gic_kvm_info gic_v3_kvm_info;
+ +static DEFINE_PER_CPU(bool, has_rss);
   
+ +#define MPIDR_RS(mpidr)                       (((mpidr) & 0xF0UL) >> 4)
   #define gic_data_rdist()              (this_cpu_ptr(gic_data.rdists.rdist))
   #define gic_data_rdist_rd_base()      (gic_data_rdist()->rd_base)
   #define gic_data_rdist_sgi_base()     (gic_data_rdist_rd_base() + SZ_64K)
@@@ -529,10 -526,6 +529,10 @@@ static void gic_update_vlpi_properties(
   
   static void gic_cpu_sys_reg_init(void)
   {
+ +      int i, cpu = smp_processor_id();
+ +      u64 mpidr = cpu_logical_map(cpu);
+ +      u64 need_rss = MPIDR_RS(mpidr);
+ +
         /*
          * Need to check that the SRE bit has actually been set. If
          * not, it means that SRE is disabled at EL2. We're going to
@@@ -564,30 -557,6 +564,30 @@@
   
         /* ... and let's hit the road... */
         gic_write_grpen1(1);
+ +
+ +      /* Keep the RSS capability status in per_cpu variable */
+ +      per_cpu(has_rss, cpu) = !!(gic_read_ctlr() & ICC_CTLR_EL1_RSS);
+ +
+ +      /* Check all the CPUs have capable of sending SGIs to other CPUs */
+ +      for_each_online_cpu(i) {
+ +              bool have_rss = per_cpu(has_rss, i) && per_cpu(has_rss, cpu);
+ +
+ +              need_rss |= MPIDR_RS(cpu_logical_map(i));
+ +              if (need_rss && (!have_rss))
+ +                      pr_crit("CPU%d (%lx) can't SGI CPU%d (%lx), no RSS\n",
+ +                              cpu, (unsigned long)mpidr,
+ +                              i, (unsigned long)cpu_logical_map(i));
+ +      }
+ +
+ +      /**
+ +       * GIC spec says, when ICC_CTLR_EL1.RSS==1 and GICD_TYPER.RSS==0,
+ +       * writing ICC_ASGI1R_EL1 register with RS != 0 is a CONSTRAINED
+ +       * UNPREDICTABLE choice of :
+ +       *   - The write is ignored.
+ +       *   - The RS field is treated as 0.
+ +       */
+ +      if (need_rss && (!gic_data.has_rss))
+ +              pr_crit_once("RSS is required but GICD doesn't support it\n");
   }
   
   static int gic_dist_supports_lpis(void)
@@@ -622,9 -591,6 +622,9 @@@ static void gic_cpu_init(void
   
   #ifdef CONFIG_SMP
   
+ +#define MPIDR_TO_SGI_RS(mpidr)        (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT)
+ +#define MPIDR_TO_SGI_CLUSTER_ID(mpidr)        ((mpidr) & ~0xFUL)
+ +
   static int gic_starting_cpu(unsigned int cpu)
   {
         gic_cpu_init();
@@@ -639,6 -605,13 +639,6 @@@ static u16 gic_compute_target_list(int 
         u16 tlist = 0;
   
         while (cpu < nr_cpu_ids) {
- -              /*
- -               * If we ever get a cluster of more than 16 CPUs, just
- -               * scream and skip that CPU.
- -               */
- -              if (WARN_ON((mpidr & 0xff) >= 16))
- -                      goto out;
- -
                 tlist |= 1 << (mpidr & 0xf);
   
                 next_cpu = cpumask_next(cpu, mask);
@@@ -648,7 -621,7 +648,7 @@@
   
                 mpidr = cpu_logical_map(cpu);
   
- -              if (cluster_id != (mpidr & ~0xffUL)) {
+ +              if (cluster_id != MPIDR_TO_SGI_CLUSTER_ID(mpidr)) {
                         cpu--;
                         goto out;
                 }
@@@ -670,7 -643,6 +670,7 @@@ static void gic_send_sgi(u64 cluster_id
                MPIDR_TO_SGI_AFFINITY(cluster_id, 2)     |
                irq << ICC_SGI1R_SGI_ID_SHIFT            |
                MPIDR_TO_SGI_AFFINITY(cluster_id, 1)     |
+ +             MPIDR_TO_SGI_RS(cluster_id)              |
                tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
   
         pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
@@@ -691,7 -663,7 +691,7 @@@ static void gic_raise_softirq(const str
         smp_wmb();
   
         for_each_cpu(cpu, mask) {
- -              unsigned long cluster_id = cpu_logical_map(cpu) & ~0xffUL;
+ +              u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
                 u16 tlist;
   
                 tlist = gic_compute_target_list(&cpu, mask, cluster_id);
@@@ -1035,10 -1007,6 +1035,10 @@@ static int __init gic_init_bases(void _
                 goto out_free;
         }
   
+ +      gic_data.has_rss = !!(typer & GICD_TYPER_RSS);
+ +      pr_info("Distributor has %sRange Selector support\n",
+ +              gic_data.has_rss ? "" : "no ");
+ +
         set_handle_irq(gic_handle_irq);
   
         gic_update_vlpi_properties();
@@@ -1260,7 -1228,9 +1260,9 @@@ static int __init gic_of_init(struct de
                 goto out_unmap_rdist;
   
         gic_populate_ppi_partitions(node);
-       gic_of_setup_kvm_info(node);
+ 
+       if (static_key_true(&supports_deactivate))
+               gic_of_setup_kvm_info(node);
         return 0;
   
   out_unmap_rdist:
@@@ -1549,7 -1519,9 +1551,9 @@@ gic_acpi_init(struct acpi_subtable_head
                 goto out_fwhandle_free;
   
         acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
-       gic_acpi_setup_kvm_info();
+ 
+       if (static_key_true(&supports_deactivate))
+               gic_acpi_setup_kvm_info();
   
         return 0;
   
diff --combined drivers/irqchip/irq-gic.c

index f641e8e2c78d1e7af926b808fa46d00285c86fff,cd9371b749c2e67902ec7938230bcf70fb7ab075..121af5cf688f957fa209ef29aa9656a79674c5ef
--- 1/drivers/irqchip/irq-gic.c
--- 2/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@@ -1256,19 -1256,6 +1256,19 @@@ static void gic_teardown(struct gic_chi
   
   #ifdef CONFIG_OF
   static int gic_cnt __initdata;
+ +static bool gicv2_force_probe;
+ +
+ +static int __init gicv2_force_probe_cfg(char *buf)
+ +{
+ +      return strtobool(buf, &gicv2_force_probe);
+ +}
+ +early_param("irqchip.gicv2_force_probe", gicv2_force_probe_cfg);
+ +
+ +static bool gic_check_gicv2(void __iomem *base)
+ +{
+ +      u32 val = readl_relaxed(base + GIC_CPU_IDENT);
+ +      return (val & 0xff0fff) == 0x02043B;
+ +}
   
   static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
   {
@@@ -1278,60 -1265,20 +1278,60 @@@
   
         if (!is_hyp_mode_available())
                 return false;
- -      if (resource_size(&cpuif_res) < SZ_8K)
- -              return false;
- -      if (resource_size(&cpuif_res) == SZ_128K) {
- -              u32 val_low, val_high;
+ +      if (resource_size(&cpuif_res) < SZ_8K) {
+ +              void __iomem *alt;
+ +              /*
+ +               * Check for a stupid firmware that only exposes the
+ +               * first page of a GICv2.
+ +               */
+ +              if (!gic_check_gicv2(*base))
+ +                      return false;
+ +
+ +              if (!gicv2_force_probe) {
+ +                      pr_warn("GIC: GICv2 detected, but range too small and irqchip.gicv2_force_probe not set\n");
+ +                      return false;
+ +              }
+ +
+ +              alt = ioremap(cpuif_res.start, SZ_8K);
+ +              if (!alt)
+ +                      return false;
+ +              if (!gic_check_gicv2(alt + SZ_4K)) {
+ +                      /*
+ +                       * The first page was that of a GICv2, and
+ +                       * the second was *something*. Let's trust it
+ +                       * to be a GICv2, and update the mapping.
+ +                       */
+ +                      pr_warn("GIC: GICv2 at %pa, but range is too small (broken DT?), assuming 8kB\n",
+ +                              &cpuif_res.start);
+ +                      iounmap(*base);
+ +                      *base = alt;
+ +                      return true;
+ +              }
   
                 /*
- -               * Verify that we have the first 4kB of a GIC400
+ +               * We detected *two* initial GICv2 pages in a
+ +               * row. Could be a GICv2 aliased over two 64kB
+ +               * pages. Update the resource, map the iospace, and
+ +               * pray.
+ +               */
+ +              iounmap(alt);
+ +              alt = ioremap(cpuif_res.start, SZ_128K);
+ +              if (!alt)
+ +                      return false;
+ +              pr_warn("GIC: Aliased GICv2 at %pa, trying to find the canonical range over 128kB\n",
+ +                      &cpuif_res.start);
+ +              cpuif_res.end = cpuif_res.start + SZ_128K -1;
+ +              iounmap(*base);
+ +              *base = alt;
+ +      }
+ +      if (resource_size(&cpuif_res) == SZ_128K) {
+ +              /*
+ +               * Verify that we have the first 4kB of a GICv2
                  * aliased over the first 64kB by checking the
                  * GICC_IIDR register on both ends.
                  */
- -              val_low = readl_relaxed(*base + GIC_CPU_IDENT);
- -              val_high = readl_relaxed(*base + GIC_CPU_IDENT + 0xf000);
- -              if ((val_low & 0xffff0fff) != 0x0202043B ||
- -                  val_low != val_high)
+ +              if (!gic_check_gicv2(*base) ||
+ +                  !gic_check_gicv2(*base + 0xf000))
                         return false;
   
                 /*
@@@ -1420,7 -1367,8 +1420,8 @@@ static void __init gic_of_setup_kvm_inf
         if (ret)
                 return;
   
-       gic_set_kvm_info(&gic_v2_kvm_info);
+       if (static_key_true(&supports_deactivate))
+               gic_set_kvm_info(&gic_v2_kvm_info);
   }
   
   int __init
@@@ -1652,7 -1600,8 +1653,8 @@@ static int __init gic_v2_acpi_init(stru
         if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
                 gicv2m_init(NULL, gic_data[0].domain);
   
-       gic_acpi_setup_kvm_info();
+       if (static_key_true(&supports_deactivate))
+               gic_acpi_setup_kvm_info();
   
         return 0;
   }
diff --combined include/uapi/linux/kvm.h

index 7e99999d6236fa2940fa2b565442e8b1b1331407,b605956968368ac86de08e7c0b4753d3940066f1..282d7613fce8788bc466913d7fcacc960dd1c6de
--- 1/include/uapi/linux/kvm.h
--- 2/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   #ifndef __LINUX_KVM_H
   #define __LINUX_KVM_H
   
@@@ -931,6 -930,7 +931,7 @@@ struct kvm_ppc_resize_hpt 
   #define KVM_CAP_PPC_SMT_POSSIBLE 147
   #define KVM_CAP_HYPERV_SYNIC2 148
   #define KVM_CAP_HYPERV_VP_INDEX 149
+ #define KVM_CAP_S390_AIS_MIGRATION 150
   
   #ifdef KVM_CAP_IRQ_ROUTING
   
diff --combined virt/kvm/arm/arm.c

index 4cf9b91e6c9b28ab0d2c92f23701daa7f9ad24ce,bc126fb99a3d2ce1c4bc55781c1db51f4aada264..772bf74ac2e9ae8380e0ba2b87b385883eca6d4c
--- 1/virt/kvm/arm/arm.c
--- 2/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@@ -307,8 -307,7 +307,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
   
   int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
   {
-       return kvm_timer_should_fire(vcpu_vtimer(vcpu)) ||
-              kvm_timer_should_fire(vcpu_ptimer(vcpu));
+       return kvm_timer_is_pending(vcpu);
   }
   
   void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
@@@ -354,18 -353,18 +353,18 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
         vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
   
         kvm_arm_set_running_vcpu(vcpu);
- 
         kvm_vgic_load(vcpu);
+       kvm_timer_vcpu_load(vcpu);
   }
   
   void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
   {
+       kvm_timer_vcpu_put(vcpu);
         kvm_vgic_put(vcpu);
   
         vcpu->cpu = -1;
   
         kvm_arm_set_running_vcpu(NULL);
-       kvm_timer_vcpu_put(vcpu);
   }
   
   static void vcpu_power_off(struct kvm_vcpu *vcpu)
@@@ -652,16 -651,12 +651,15 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                  */
                 preempt_disable();
   
+ +              /* Flush FP/SIMD state that can't survive guest entry/exit */
+ +              kvm_fpsimd_flush_cpu_state();
+ +
                 kvm_pmu_flush_hwstate(vcpu);
   
-               kvm_timer_flush_hwstate(vcpu);
-               kvm_vgic_flush_hwstate(vcpu);
- 
                 local_irq_disable();
   
+               kvm_vgic_flush_hwstate(vcpu);
+ 
                 /*
                  * If we have a singal pending, or need to notify a userspace
                  * irqchip about timer or PMU level changes, then we exit (and
@@@ -686,10 -681,10 +684,10 @@@
                 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
                     kvm_request_pending(vcpu)) {
                         vcpu->mode = OUTSIDE_GUEST_MODE;
-                       local_irq_enable();
                         kvm_pmu_sync_hwstate(vcpu);
                         kvm_timer_sync_hwstate(vcpu);
                         kvm_vgic_sync_hwstate(vcpu);
+                       local_irq_enable();
                         preempt_enable();
                         continue;
                 }
@@@ -712,6 -707,27 +710,27 @@@
   
                 kvm_arm_clear_debug(vcpu);
   
+               /*
+                * We must sync the PMU state before the vgic state so
+                * that the vgic can properly sample the updated state of the
+                * interrupt line.
+                */
+               kvm_pmu_sync_hwstate(vcpu);
+ 
+               /*
+                * Sync the vgic state before syncing the timer state because
+                * the timer code needs to know if the virtual timer
+                * interrupts are active.
+                */
+               kvm_vgic_sync_hwstate(vcpu);
+ 
+               /*
+                * Sync the timer hardware state before enabling interrupts as
+                * we don't want vtimer interrupts to race with syncing the
+                * timer virtual interrupt state.
+                */
+               kvm_timer_sync_hwstate(vcpu);
+ 
                 /*
                  * We may have taken a host interrupt in HYP mode (ie
                  * while executing the guest). This interrupt is still
@@@ -735,16 -751,6 +754,6 @@@
                 guest_exit();
                 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
   
-               /*
-                * We must sync the PMU and timer state before the vgic state so
-                * that the vgic can properly sample the updated state of the
-                * interrupt line.
-                */
-               kvm_pmu_sync_hwstate(vcpu);
-               kvm_timer_sync_hwstate(vcpu);
- 
-               kvm_vgic_sync_hwstate(vcpu);
- 
                 preempt_enable();
   
                 ret = handle_exit(vcpu, run, ret);
@@@ -1329,12 -1335,21 +1338,12 @@@ static void teardown_hyp_mode(void
   {
         int cpu;
   
- -      if (is_kernel_in_hyp_mode())
- -              return;
- -
         free_hyp_pgds();
         for_each_possible_cpu(cpu)
                 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
         hyp_cpu_pm_exit();
   }
   
- -static int init_vhe_mode(void)
- -{
- -      kvm_info("VHE mode initialized successfully\n");
- -      return 0;
- -}
- -
   /**
    * Inits Hyp-mode on all online CPUs
    */
@@@ -1415,6 -1430,8 +1424,6 @@@ static int init_hyp_mode(void
                 }
         }
   
- -      kvm_info("Hyp mode initialized successfully\n");
- -
         return 0;
   
   out_err:
@@@ -1448,7 -1465,6 +1457,7 @@@ int kvm_arch_init(void *opaque
   {
         int err;
         int ret, cpu;
+ +      bool in_hyp_mode;
   
         if (!is_hyp_mode_available()) {
                 kvm_err("HYP mode not available\n");
@@@ -1467,28 -1483,21 +1476,28 @@@
         if (err)
                 return err;
   
- -      if (is_kernel_in_hyp_mode())
- -              err = init_vhe_mode();
- -      else
+ +      in_hyp_mode = is_kernel_in_hyp_mode();
+ +
+ +      if (!in_hyp_mode) {
                 err = init_hyp_mode();
- -      if (err)
- -              goto out_err;
+ +              if (err)
+ +                      goto out_err;
+ +      }
   
         err = init_subsystems();
         if (err)
                 goto out_hyp;
   
+ +      if (in_hyp_mode)
+ +              kvm_info("VHE mode initialized successfully\n");
+ +      else
+ +              kvm_info("Hyp mode initialized successfully\n");
+ +
         return 0;
   
   out_hyp:
- -      teardown_hyp_mode();
+ +      if (!in_hyp_mode)
+ +              teardown_hyp_mode();
   out_err:
         teardown_common_resources();
         return err;
diff --combined virt/kvm/arm/vgic/vgic-its.c

index 547f12dc4d543bafd3b28c74352761aafc62f0a0,40791c12171059c8444928f7e03e5edacb057e68..d2a99ab0ade7a2a83a36466d3e76bff88b1e48a5
--- 1/virt/kvm/arm/vgic/vgic-its.c
--- 2/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@@ -278,6 -278,7 +278,7 @@@ static int update_lpi_config(struct kv
         u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
         u8 prop;
         int ret;
+       unsigned long flags;
   
         ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
                              &prop, 1);
@@@ -285,15 -286,15 +286,15 @@@
         if (ret)
                 return ret;
   
-       spin_lock(&irq->irq_lock);
+       spin_lock_irqsave(&irq->irq_lock, flags);
   
         if (!filter_vcpu || filter_vcpu == irq->target_vcpu) {
                 irq->priority = LPI_PROP_PRIORITY(prop);
                 irq->enabled = LPI_PROP_ENABLE_BIT(prop);
   
-               vgic_queue_irq_unlock(kvm, irq);
+               vgic_queue_irq_unlock(kvm, irq, flags);
         } else {
-               spin_unlock(&irq->irq_lock);
+               spin_unlock_irqrestore(&irq->irq_lock, flags);
         }
   
         return 0;
@@@ -393,6 -394,7 +394,7 @@@ static int its_sync_lpi_pending_table(s
         int ret = 0;
         u32 *intids;
         int nr_irqs, i;
+       unsigned long flags;
   
         nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
         if (nr_irqs < 0)
@@@ -420,9 -422,9 +422,9 @@@
                 }
   
                 irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
-               spin_lock(&irq->irq_lock);
+               spin_lock_irqsave(&irq->irq_lock, flags);
                 irq->pending_latch = pendmask & (1U << bit_nr);
-               vgic_queue_irq_unlock(vcpu->kvm, irq);
+               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
                 vgic_put_irq(vcpu->kvm, irq);
         }
   
@@@ -515,6 -517,7 +517,7 @@@ static int vgic_its_trigger_msi(struct 
   {
         struct kvm_vcpu *vcpu;
         struct its_ite *ite;
+       unsigned long flags;
   
         if (!its->enabled)
                 return -EBUSY;
@@@ -530,9 -533,9 +533,9 @@@
         if (!vcpu->arch.vgic_cpu.lpis_enabled)
                 return -EBUSY;
   
-       spin_lock(&ite->irq->irq_lock);
+       spin_lock_irqsave(&ite->irq->irq_lock, flags);
         ite->irq->pending_latch = true;
-       vgic_queue_irq_unlock(kvm, ite->irq);
+       vgic_queue_irq_unlock(kvm, ite->irq, flags);
   
         return 0;
   }
@@@ -894,7 -897,7 +897,7 @@@ static int vgic_its_cmd_handle_mapi(str
   }
   
   /* Requires the its_lock to be held. */
- static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
+ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
   {
         struct its_ite *ite, *temp;
   
@@@ -910,6 -913,24 +913,24 @@@
         kfree(device);
   }
   
+ /* its lock must be held */
+ static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
+ {
+       struct its_device *cur, *temp;
+ 
+       list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
+               vgic_its_free_device(kvm, cur);
+ }
+ 
+ /* its lock must be held */
+ static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its)
+ {
+       struct its_collection *cur, *temp;
+ 
+       list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list)
+               vgic_its_free_collection(its, cur->collection_id);
+ }
+ 
   /* Must be called with its_lock mutex held */
   static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
                                                 u32 device_id, gpa_t itt_addr,
@@@ -957,7 -978,7 +978,7 @@@ static int vgic_its_cmd_handle_mapd(str
          * by removing the mapping and re-establishing it.
          */
         if (device)
-               vgic_its_unmap_device(kvm, device);
+               vgic_its_free_device(kvm, device);
   
         /*
          * The spec does not say whether unmapping a not-mapped device
@@@ -1410,7 -1431,7 +1431,7 @@@ static void vgic_mmio_write_its_baser(s
                                       unsigned long val)
   {
         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
-       u64 entry_size, device_type;
+       u64 entry_size, table_type;
         u64 reg, *regptr, clearbits = 0;
   
         /* When GITS_CTLR.Enable is 1, we ignore write accesses. */
@@@ -1421,12 -1442,12 +1442,12 @@@
         case 0:
                 regptr = &its->baser_device_table;
                 entry_size = abi->dte_esz;
-               device_type = GITS_BASER_TYPE_DEVICE;
+               table_type = GITS_BASER_TYPE_DEVICE;
                 break;
         case 1:
                 regptr = &its->baser_coll_table;
                 entry_size = abi->cte_esz;
-               device_type = GITS_BASER_TYPE_COLLECTION;
+               table_type = GITS_BASER_TYPE_COLLECTION;
                 clearbits = GITS_BASER_INDIRECT;
                 break;
         default:
@@@ -1438,10 -1459,24 +1459,24 @@@
         reg &= ~clearbits;
   
         reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT;
-       reg |= device_type << GITS_BASER_TYPE_SHIFT;
+       reg |= table_type << GITS_BASER_TYPE_SHIFT;
         reg = vgic_sanitise_its_baser(reg);
   
         *regptr = reg;
+ 
+       if (!(reg & GITS_BASER_VALID)) {
+               /* Take the its_lock to prevent a race with a save/restore */
+               mutex_lock(&its->its_lock);
+               switch (table_type) {
+               case GITS_BASER_TYPE_DEVICE:
+                       vgic_its_free_device_list(kvm, its);
+                       break;
+               case GITS_BASER_TYPE_COLLECTION:
+                       vgic_its_free_collection_list(kvm, its);
+                       break;
+               }
+               mutex_unlock(&its->its_lock);
+       }
   }
   
   static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
@@@ -1466,16 -1501,6 +1501,16 @@@ static void vgic_mmio_write_its_ctlr(st
   {
         mutex_lock(&its->cmd_lock);
   
+ +      /*
+ +       * It is UNPREDICTABLE to enable the ITS if any of the CBASER or
+ +       * device/collection BASER are invalid
+ +       */
+ +      if (!its->enabled && (val & GITS_CTLR_ENABLE) &&
+ +              (!(its->baser_device_table & GITS_BASER_VALID) ||
+ +               !(its->baser_coll_table & GITS_BASER_VALID) ||
+ +               !(its->cbaser & GITS_CBASER_VALID)))
+ +              goto out;
+ +
         its->enabled = !!(val & GITS_CTLR_ENABLE);
   
         /*
@@@ -1484,7 -1509,6 +1519,7 @@@
          */
         vgic_its_process_commands(kvm, its);
   
+ +out:
         mutex_unlock(&its->cmd_lock);
   }
   
@@@ -1623,46 -1647,17 +1658,17 @@@ static int vgic_its_create(struct kvm_d
         return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
   }
   
- static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev)
- {
-       struct its_ite *ite, *tmp;
- 
-       list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list)
-               its_free_ite(kvm, ite);
-       list_del(&dev->dev_list);
-       kfree(dev);
- }
- 
   static void vgic_its_destroy(struct kvm_device *kvm_dev)
   {
         struct kvm *kvm = kvm_dev->kvm;
         struct vgic_its *its = kvm_dev->private;
-       struct list_head *cur, *temp;
- 
-       /*
-        * We may end up here without the lists ever having been initialized.
-        * Check this and bail out early to avoid dereferencing a NULL pointer.
-        */
-       if (!its->device_list.next)
-               return;
   
         mutex_lock(&its->its_lock);
-       list_for_each_safe(cur, temp, &its->device_list) {
-               struct its_device *dev;
   
-               dev = list_entry(cur, struct its_device, dev_list);
-               vgic_its_free_device(kvm, dev);
-       }
+       vgic_its_free_device_list(kvm, its);
+       vgic_its_free_collection_list(kvm, its);
   
-       list_for_each_safe(cur, temp, &its->collection_list) {
-               struct its_collection *coll;
- 
-               coll = list_entry(cur, struct its_collection, coll_list);
-               list_del(cur);
-               kfree(coll);
-       }
         mutex_unlock(&its->its_lock);
- 
         kfree(its);
   }
   
@@@ -1812,33 -1807,37 +1818,33 @@@ typedef int (*entry_fn_t)(struct vgic_i
   static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
                           int start_id, entry_fn_t fn, void *opaque)
   {
- -      void *entry = kzalloc(esz, GFP_KERNEL);
         struct kvm *kvm = its->dev->kvm;
         unsigned long len = size;
         int id = start_id;
         gpa_t gpa = base;
+ +      char entry[esz];
         int ret;
   
+ +      memset(entry, 0, esz);
+ +
         while (len > 0) {
                 int next_offset;
                 size_t byte_offset;
   
                 ret = kvm_read_guest(kvm, gpa, entry, esz);
                 if (ret)
- -                      goto out;
+ +                      return ret;
   
                 next_offset = fn(its, id, entry, opaque);
- -              if (next_offset <= 0) {
- -                      ret = next_offset;
- -                      goto out;
- -              }
+ +              if (next_offset <= 0)
+ +                      return next_offset;
   
                 byte_offset = next_offset * esz;
                 id += next_offset;
                 gpa += byte_offset;
                 len -= byte_offset;
         }
- -      ret =  1;
- -
- -out:
- -      kfree(entry);
- -      return ret;
+ +      return 1;
   }
   
   /**
@@@ -1947,14 -1946,6 +1953,14 @@@ static int vgic_its_save_itt(struct vgi
         return 0;
   }
   
+ +/**
+ + * vgic_its_restore_itt - restore the ITT of a device
+ + *
+ + * @its: its handle
+ + * @dev: device handle
+ + *
+ + * Return 0 on success, < 0 on error
+ + */
   static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
   {
         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
@@@ -1966,10 -1957,6 +1972,10 @@@
         ret = scan_its_table(its, base, max_size, ite_esz, 0,
                              vgic_its_restore_ite, dev);
   
+ +      /* scan_its_table returns +1 if all ITEs are invalid */
+ +      if (ret > 0)
+ +              ret = 0;
+ +
         return ret;
   }
   
@@@ -2067,12 -2054,11 +2073,12 @@@ static int vgic_its_device_cmp(void *pr
   static int vgic_its_save_device_tables(struct vgic_its *its)
   {
         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ +      u64 baser = its->baser_device_table;
         struct its_device *dev;
         int dte_esz = abi->dte_esz;
- -      u64 baser;
   
- -      baser = its->baser_device_table;
+ +      if (!(baser & GITS_BASER_VALID))
+ +              return 0;
   
         list_sort(NULL, &its->device_list, vgic_its_device_cmp);
   
@@@ -2127,7 -2113,10 +2133,7 @@@ static int handle_l1_dte(struct vgic_it
         ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
                              l2_start_id, vgic_its_restore_dte, NULL);
   
- -      if (ret <= 0)
- -              return ret;
- -
- -      return 1;
+ +      return ret;
   }
   
   /**
@@@ -2157,9 -2146,8 +2163,9 @@@ static int vgic_its_restore_device_tabl
                                      vgic_its_restore_dte, NULL);
         }
   
+ +      /* scan_its_table returns +1 if all entries are invalid */
         if (ret > 0)
- -              ret = -EINVAL;
+ +              ret = 0;
   
         return ret;
   }
@@@ -2216,17 -2204,17 +2222,17 @@@ static int vgic_its_restore_cte(struct 
   static int vgic_its_save_collection_table(struct vgic_its *its)
   {
         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ +      u64 baser = its->baser_coll_table;
+ +      gpa_t gpa = BASER_ADDRESS(baser);
         struct its_collection *collection;
         u64 val;
- -      gpa_t gpa;
         size_t max_size, filled = 0;
         int ret, cte_esz = abi->cte_esz;
   
- -      gpa = BASER_ADDRESS(its->baser_coll_table);
- -      if (!gpa)
+ +      if (!(baser & GITS_BASER_VALID))
                 return 0;
   
- -      max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+ +      max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
   
         list_for_each_entry(collection, &its->collection_list, coll_list) {
                 ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
@@@ -2257,18 -2245,17 +2263,18 @@@
   static int vgic_its_restore_collection_table(struct vgic_its *its)
   {
         const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ +      u64 baser = its->baser_coll_table;
         int cte_esz = abi->cte_esz;
         size_t max_size, read = 0;
         gpa_t gpa;
         int ret;
   
- -      if (!(its->baser_coll_table & GITS_BASER_VALID))
+ +      if (!(baser & GITS_BASER_VALID))
                 return 0;
   
- -      gpa = BASER_ADDRESS(its->baser_coll_table);
+ +      gpa = BASER_ADDRESS(baser);
   
- -      max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+ +      max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
   
         while (read < max_size) {
                 ret = vgic_its_restore_cte(its, gpa, cte_esz);
@@@ -2277,10 -2264,6 +2283,10 @@@
                 gpa += cte_esz;
                 read += cte_esz;
         }
+ +
+ +      if (ret > 0)
+ +              return 0;
+ +
         return ret;
   }
   
@@@ -2290,29 -2273,13 +2296,13 @@@
    */
   static int vgic_its_save_tables_v0(struct vgic_its *its)
   {
-       struct kvm *kvm = its->dev->kvm;
         int ret;
   
-       mutex_lock(&kvm->lock);
-       mutex_lock(&its->its_lock);
- 
-       if (!lock_all_vcpus(kvm)) {
-               mutex_unlock(&its->its_lock);
-               mutex_unlock(&kvm->lock);
-               return -EBUSY;
-       }
- 
         ret = vgic_its_save_device_tables(its);
         if (ret)
-               goto out;
- 
-       ret = vgic_its_save_collection_table(its);
+               return ret;
   
- out:
-       unlock_all_vcpus(kvm);
-       mutex_unlock(&its->its_lock);
-       mutex_unlock(&kvm->lock);
-       return ret;
+       return vgic_its_save_collection_table(its);
   }
   
   /**
@@@ -2322,29 -2289,13 +2312,13 @@@
    */
   static int vgic_its_restore_tables_v0(struct vgic_its *its)
   {
-       struct kvm *kvm = its->dev->kvm;
         int ret;
   
-       mutex_lock(&kvm->lock);
-       mutex_lock(&its->its_lock);
- 
-       if (!lock_all_vcpus(kvm)) {
-               mutex_unlock(&its->its_lock);
-               mutex_unlock(&kvm->lock);
-               return -EBUSY;
-       }
- 
         ret = vgic_its_restore_collection_table(its);
         if (ret)
-               goto out;
- 
-       ret = vgic_its_restore_device_tables(its);
- out:
-       unlock_all_vcpus(kvm);
-       mutex_unlock(&its->its_lock);
-       mutex_unlock(&kvm->lock);
+               return ret;
   
-       return ret;
+       return vgic_its_restore_device_tables(its);
   }
   
   static int vgic_its_commit_v0(struct vgic_its *its)
@@@ -2363,6 -2314,19 +2337,19 @@@
         return 0;
   }
   
+ static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its)
+ {
+       /* We need to keep the ABI specific field values */
+       its->baser_coll_table &= ~GITS_BASER_VALID;
+       its->baser_device_table &= ~GITS_BASER_VALID;
+       its->cbaser = 0;
+       its->creadr = 0;
+       its->cwriter = 0;
+       its->enabled = 0;
+       vgic_its_free_device_list(kvm, its);
+       vgic_its_free_collection_list(kvm, its);
+ }
+ 
   static int vgic_its_has_attr(struct kvm_device *dev,
                              struct kvm_device_attr *attr)
   {
@@@ -2377,6 -2341,8 +2364,8 @@@
                 switch (attr->attr) {
                 case KVM_DEV_ARM_VGIC_CTRL_INIT:
                         return 0;
+               case KVM_DEV_ARM_ITS_CTRL_RESET:
+                       return 0;
                 case KVM_DEV_ARM_ITS_SAVE_TABLES:
                         return 0;
                 case KVM_DEV_ARM_ITS_RESTORE_TABLES:
@@@ -2389,6 -2355,41 +2378,41 @@@
         return -ENXIO;
   }
   
+ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
+ {
+       const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+       int ret = 0;
+ 
+       if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
+               return 0;
+ 
+       mutex_lock(&kvm->lock);
+       mutex_lock(&its->its_lock);
+ 
+       if (!lock_all_vcpus(kvm)) {
+               mutex_unlock(&its->its_lock);
+               mutex_unlock(&kvm->lock);
+               return -EBUSY;
+       }
+ 
+       switch (attr) {
+       case KVM_DEV_ARM_ITS_CTRL_RESET:
+               vgic_its_reset(kvm, its);
+               break;
+       case KVM_DEV_ARM_ITS_SAVE_TABLES:
+               ret = abi->save_tables(its);
+               break;
+       case KVM_DEV_ARM_ITS_RESTORE_TABLES:
+               ret = abi->restore_tables(its);
+               break;
+       }
+ 
+       unlock_all_vcpus(kvm);
+       mutex_unlock(&its->its_lock);
+       mutex_unlock(&kvm->lock);
+       return ret;
+ }
+ 
   static int vgic_its_set_attr(struct kvm_device *dev,
                              struct kvm_device_attr *attr)
   {
@@@ -2414,19 -2415,8 +2438,8 @@@
   
                 return vgic_register_its_iodev(dev->kvm, its, addr);
         }
-       case KVM_DEV_ARM_VGIC_GRP_CTRL: {
-               const struct vgic_its_abi *abi = vgic_its_get_abi(its);
- 
-               switch (attr->attr) {
-               case KVM_DEV_ARM_VGIC_CTRL_INIT:
-                       /* Nothing to do */
-                       return 0;
-               case KVM_DEV_ARM_ITS_SAVE_TABLES:
-                       return abi->save_tables(its);
-               case KVM_DEV_ARM_ITS_RESTORE_TABLES:
-                       return abi->restore_tables(its);
-               }
-       }
+       case KVM_DEV_ARM_VGIC_GRP_CTRL:
+               return vgic_its_ctrl(dev->kvm, its, attr->attr);
         case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
                 u64 reg;
diff --combined virt/kvm/kvm_main.c

index ce507ae1d4f50e6af019c94baaa800a8aaaa695c,c114d7948743f8e24420e29bd5d7837c887dce44..2dd1a9ca459988f6101c952b07e09995577e4986
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -122,7 -122,6 +122,6 @@@ static void hardware_disable_all(void)
   
   static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
   
- static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
   static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
   
   __visible bool kvm_rebooting;
@@@ -1679,11 -1678,12 +1678,12 @@@ void kvm_release_page_dirty(struct pag
   }
   EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
   
- static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
+ void kvm_release_pfn_dirty(kvm_pfn_t pfn)
   {
         kvm_set_pfn_dirty(pfn);
         kvm_release_pfn_clean(pfn);
   }
+ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
   
   void kvm_set_pfn_dirty(kvm_pfn_t pfn)
   {
@@@ -2302,7 -2302,7 +2302,7 @@@ void kvm_vcpu_on_spin(struct kvm_vcpu *
                                 continue;
                         } else if (pass && i > last_boosted_vcpu)
                                 break;
- -                      if (!ACCESS_ONCE(vcpu->preempted))
+ +                      if (!READ_ONCE(vcpu->preempted))
                                 continue;
                         if (vcpu == me)
                                 continue;
@@@ -4010,7 -4010,7 +4010,7 @@@ int kvm_init(void *opaque, unsigned vcp
         if (!vcpu_align)
                 vcpu_align = __alignof__(struct kvm_vcpu);
         kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
-                                          0, NULL);
+                                          SLAB_ACCOUNT, NULL);
         if (!kvm_vcpu_cache) {
                 r = -ENOMEM;
                 goto out_free_3;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 16 Nov 2017 21:00:24 +0000 (13:00 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 16 Nov 2017 21:00:24 +0000 (13:00 -0800)
		1	2
arch/arm/include/uapi/asm/kvm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/arch_timer.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/uapi/asm/kvm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/hyp/switch.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/sys_regs.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/asm-offsets.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/interrupt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_emulate.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/lapic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/clocksource/arm_arch_timer.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/irqchip/irq-gic-v3.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/irqchip/irq-gic.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/kvm.h	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/arm/arm.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/arm/vgic/vgic-its.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history