Merge branch 'x86-seves-for-paolo' of https://git.kernel.org/pub/scm/linux/kernel...

author Paolo Bonzini <pbonzini@redhat.com>

Tue, 22 Sep 2020 10:43:17 +0000 (06:43 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Tue, 22 Sep 2020 10:43:17 +0000 (06:43 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Tue, 22 Sep 2020 10:43:17 +0000 (06:43 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Tue, 22 Sep 2020 10:43:17 +0000 (06:43 -0400)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index eb3a1316f03ec91fc1a310b29ad2191493bf102f..51191b56e61cbc1af31df42f76cca3df6eea1b43 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6130,7 +6130,7 @@ HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
  8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
  -----------------------------------
  
-:Architecture: x86
+:Architectures: x86
  
  This capability indicates that KVM running on top of Hyper-V hypervisor
  enables Direct TLB flush for its guests meaning that TLB flush
@@ -6143,19 +6143,53 @@ in CPUID and only exposes Hyper-V identification. In this case, guest
  thinks it's running on Hyper-V and only use Hyper-V hypercalls.
  
  8.22 KVM_CAP_S390_VCPU_RESETS
+-----------------------------
  
-Architectures: s390
+:Architectures: s390
  
  This capability indicates that the KVM_S390_NORMAL_RESET and
  KVM_S390_CLEAR_RESET ioctls are available.
  
  8.23 KVM_CAP_S390_PROTECTED
+---------------------------
  
-Architecture: s390
-
+:Architectures: s390
  
  This capability indicates that the Ultravisor has been initialized and
  KVM can therefore start protected VMs.
  This capability governs the KVM_S390_PV_COMMAND ioctl and the
  KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected
  guests when the state change is invalid.
+
+8.24 KVM_CAP_STEAL_TIME
+-----------------------
+
+:Architectures: arm64, x86
+
+This capability indicates that KVM supports steal time accounting.
+When steal time accounting is supported it may be enabled with
+architecture-specific interfaces.  This capability and the architecture-
+specific interfaces must be consistent, i.e. if one says the feature
+is supported, than the other should as well and vice versa.  For arm64
+see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL".
+For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME".
+
+8.25 KVM_CAP_S390_DIAG318
+-------------------------
+
+:Architectures: s390
+
+This capability enables a guest to set information about its control program
+(i.e. guest kernel type and version). The information is helpful during
+system/firmware service events, providing additional data about the guest
+environments running on the machine.
+
+The information is associated with the DIAGNOSE 0x318 instruction, which sets
+an 8-byte value consisting of a one-byte Control Program Name Code (CPNC) and
+a 7-byte Control Program Version Code (CPVC). The CPNC determines what
+environment the control program is running in (e.g. Linux, z/VM...), and the
+CPVC is used for information specific to OS (e.g. Linux version, Linux
+distribution...)
+
+If this capability is available, then the CPNC and CPVC can be synchronized
+between KVM and userspace via the sync regs mechanism (KVM_SYNC_DIAG318).
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h

index 49a55be2b9a20a260cd6fd2c0a31c7f875a8c692..1cc5f5f72d0bfe12e869d7f907c744613c8f6c71 100644 (file)
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -298,15 +298,15 @@ static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
         return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
  }
  
-static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu)
  {
         return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW);
  }
  
+/* Always check for S1PTW *before* using this. */
  static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
  {
-       return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) ||
-               kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
+       return kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR;
  }
  
  static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
@@ -335,6 +335,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
         return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
  }
  
+static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu)
+{
+       return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu);
+}
+
  static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
  {
         return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
@@ -372,6 +377,9 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
  
  static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
  {
+       if (kvm_vcpu_abt_iss1tw(vcpu))
+               return true;
+
         if (kvm_vcpu_trap_is_iabt(vcpu))
                 return false;
  
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index e52c927aade53856a3d07622710891d23712be45..905c2b87e05acc8fb778b11a2220cccaa90041d4 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -368,7 +368,6 @@ struct kvm_vcpu_arch {
  
         /* Guest PV state */
         struct {
-               u64 steal;
                 u64 last_steal;
                 gpa_t base;
         } steal;
@@ -544,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
  gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
  void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
  
+bool kvm_arm_pvtime_supported(void);
  int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
                             struct kvm_device_attr *attr);
  int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index 46dc3d75cf1359f0d128dfef9fe98f82caccc82d..b588c3b5c2f07b580be180ff727b3ff2db7d7c78 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -206,6 +206,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                  */
                 r = 1;
                 break;
+       case KVM_CAP_STEAL_TIME:
+               r = kvm_arm_pvtime_supported();
+               break;
         default:
                 r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
                 break;
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h

index 5b6b8fa00f0af061156e0077caa9d60f335a5b09..0261308bf944ac85d316a6d208c80b3e918ac931 100644 (file)
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -449,7 +449,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
                         kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
                         kvm_vcpu_dabt_isvalid(vcpu) &&
                         !kvm_vcpu_abt_issea(vcpu) &&
-                       !kvm_vcpu_dabt_iss1tw(vcpu);
+                       !kvm_vcpu_abt_iss1tw(vcpu);
  
                 if (valid) {
                         int ret = __vgic_v2_perform_cpuif_access(vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index ba00bcc0c88463b31ac11d7c1632fb6c86f710b2..3d26b47a1343080cfb502733390f5dc72f2970f4 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1849,7 +1849,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
  
         write_fault = kvm_is_write_fault(vcpu);
-       exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
+       exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
         VM_BUG_ON(write_fault && exec_fault);
  
         if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
@@ -1877,6 +1877,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
             !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
                 force_pte = true;
                 vma_pagesize = PAGE_SIZE;
+               vma_shift = PAGE_SHIFT;
         }
  
         /*
@@ -1970,7 +1971,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                 (fault_status == FSC_PERM &&
                  stage2_is_exec(mmu, fault_ipa, vma_pagesize));
  
-       if (vma_pagesize == PUD_SIZE) {
+       /*
+        * If PUD_SIZE == PMD_SIZE, there is no real PUD level, and
+        * all we have is a 2-level page table. Trying to map a PUD in
+        * this case would be fatally wrong.
+        */
+       if (PUD_SIZE != PMD_SIZE && vma_pagesize == PUD_SIZE) {
                 pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
  
                 new_pud = kvm_pud_mkhuge(new_pud);
@@ -2125,7 +2131,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
                         goto out;
                 }
  
-               if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+               if (kvm_vcpu_abt_iss1tw(vcpu)) {
                         kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
                         ret = 1;
                         goto out_unlock;
diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c

index f7b52ce1557ec393635d1fb71aa4a76274627335..920ac43077ad3b48023c917bdd2eb8fb925e22dc 100644 (file)
--- a/arch/arm64/kvm/pvtime.c
+++ b/arch/arm64/kvm/pvtime.c
@@ -13,25 +13,22 @@
  void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
  {
         struct kvm *kvm = vcpu->kvm;
-       u64 steal;
-       __le64 steal_le;
-       u64 offset;
-       int idx;
         u64 base = vcpu->arch.steal.base;
+       u64 last_steal = vcpu->arch.steal.last_steal;
+       u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+       u64 steal = 0;
+       int idx;
  
         if (base == GPA_INVALID)
                 return;
  
-       /* Let's do the local bookkeeping */
-       steal = vcpu->arch.steal.steal;
-       steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
-       vcpu->arch.steal.last_steal = current->sched_info.run_delay;
-       vcpu->arch.steal.steal = steal;
-
-       steal_le = cpu_to_le64(steal);
         idx = srcu_read_lock(&kvm->srcu);
-       offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
-       kvm_put_guest(kvm, base + offset, steal_le, u64);
+       if (!kvm_get_guest(kvm, base + offset, steal)) {
+               steal = le64_to_cpu(steal);
+               vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay);
+               steal += vcpu->arch.steal.last_steal - last_steal;
+               kvm_put_guest(kvm, base + offset, cpu_to_le64(steal));
+       }
         srcu_read_unlock(&kvm->srcu, idx);
  }
  
@@ -43,7 +40,8 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
         switch (feature) {
         case ARM_SMCCC_HV_PV_TIME_FEATURES:
         case ARM_SMCCC_HV_PV_TIME_ST:
-               val = SMCCC_RET_SUCCESS;
+               if (vcpu->arch.steal.base != GPA_INVALID)
+                       val = SMCCC_RET_SUCCESS;
                 break;
         }
  
@@ -64,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
          * Start counting stolen time from the time the guest requests
          * the feature enabled.
          */
-       vcpu->arch.steal.steal = 0;
         vcpu->arch.steal.last_steal = current->sched_info.run_delay;
  
         idx = srcu_read_lock(&kvm->srcu);
@@ -74,7 +71,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
         return base;
  }
  
-static bool kvm_arm_pvtime_supported(void)
+bool kvm_arm_pvtime_supported(void)
  {
         return !!sched_info_on();
  }
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h

index 4691053c5ee426dc8601f009ac73e41972a8788a..ff0444352bba3eea5dfe8f1634a3d615a552b0f3 100644 (file)
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -23,7 +23,7 @@ TRACE_EVENT(kvm_entry,
                 __entry->vcpu_pc                = vcpu_pc;
         ),
  
-       TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+       TP_printk("PC: 0x%016lx", __entry->vcpu_pc)
  );
  
  TRACE_EVENT(kvm_exit,
@@ -42,7 +42,7 @@ TRACE_EVENT(kvm_exit,
                 __entry->vcpu_pc                = vcpu_pc;
         ),
  
-       TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
+       TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%016lx",
                   __print_symbolic(__entry->ret, kvm_arm_exception_type),
                   __entry->esr_ec,
                   __print_symbolic(__entry->esr_ec, kvm_arm_exception_class),
@@ -69,7 +69,7 @@ TRACE_EVENT(kvm_guest_fault,
                 __entry->ipa                    = ipa;
         ),
  
-       TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+       TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#016lx",
                   __entry->ipa, __entry->hsr,
                   __entry->hxfar, __entry->vcpu_pc)
  );
@@ -131,7 +131,7 @@ TRACE_EVENT(kvm_mmio_emulate,
                 __entry->cpsr                   = cpsr;
         ),
  
-       TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+       TP_printk("Emulate MMIO at: 0x%016lx (instr: %08lx, cpsr: %08lx)",
                   __entry->vcpu_pc, __entry->instr, __entry->cpsr)
  );
  
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_unmap_hva_range,
                 __entry->end            = end;
         ),
  
-       TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
+       TP_printk("mmu notifier unmap range: %#016lx -- %#016lx",
                   __entry->start, __entry->end)
  );
  
@@ -165,7 +165,7 @@ TRACE_EVENT(kvm_set_spte_hva,
                 __entry->hva            = hva;
         ),
  
-       TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
+       TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
  );
  
  TRACE_EVENT(kvm_age_hva,
@@ -182,7 +182,7 @@ TRACE_EVENT(kvm_age_hva,
                 __entry->end            = end;
         ),
  
-       TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+       TP_printk("mmu notifier age hva: %#016lx -- %#016lx",
                   __entry->start, __entry->end)
  );
  
@@ -198,7 +198,7 @@ TRACE_EVENT(kvm_test_age_hva,
                 __entry->hva            = hva;
         ),
  
-       TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+       TP_printk("mmu notifier test age hva: %#016lx", __entry->hva)
  );
  
  TRACE_EVENT(kvm_set_way_flush,
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h

index 2c56d1e0f5bd543060ea99185409cee510dd6407..8d78acc4fba7efd0837ea396c751373b787dde87 100644 (file)
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -22,7 +22,7 @@ TRACE_EVENT(kvm_wfx_arm64,
                 __entry->is_wfe  = is_wfe;
         ),
  
-       TP_printk("guest executed wf%c at: 0x%08lx",
+       TP_printk("guest executed wf%c at: 0x%016lx",
                   __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
  );
  
@@ -42,7 +42,7 @@ TRACE_EVENT(kvm_hvc_arm64,
                 __entry->imm = imm;
         ),
  
-       TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
+       TP_printk("HVC at 0x%016lx (r0: 0x%016lx, imm: 0x%lx)",
                   __entry->vcpu_pc, __entry->r0, __entry->imm)
  );
  
@@ -135,7 +135,7 @@ TRACE_EVENT(trap_reg,
                 __entry->write_value = write_value;
         ),
  
-       TP_printk("%s %s reg %d (0x%08llx)", __entry->fn,  __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
+       TP_printk("%s %s reg %d (0x%016llx)", __entry->fn,  __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
  );
  
  TRACE_EVENT(kvm_handle_sys_reg,
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c

index 7de85d2253ff5c436446e46358b8d7e7d5688710..0c50ac4442221f07814ed38d0e07d7dd67c8a2bc 100644 (file)
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -137,6 +137,8 @@ extern void kvm_init_loongson_ipi(struct kvm *kvm);
  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
  {
         switch (type) {
+       case KVM_VM_MIPS_AUTO:
+               break;
  #ifdef CONFIG_KVM_MIPS_VZ
         case KVM_VM_MIPS_VZ:
  #else
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index 08320b0b2b276f0ceb82e28b07812d12dfdd0465..9663ba31347c20141e4d1806731f2e8512239c52 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -270,9 +270,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
  {
         struct pt_regs *old_regs = set_irq_regs(regs);
         u32 token;
-       irqentry_state_t state;
  
-       state = irqentry_enter(regs);
+       ack_APIC_irq();
  
         inc_irq_stat(irq_hv_callback_count);
  
@@ -283,7 +282,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
                 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
         }
  
-       irqentry_exit(regs, state);
         set_irq_regs(old_regs);
  }
  
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index 5299ef5ff18d0bde48c9245a0f4caea7451dad36..2f6510de6b0c037ae969aa8f992b5814be694127 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2505,9 +2505,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
                 *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
  
         val = GET_SMSTATE(u32, smstate, 0x7fcc);
-       ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+
+       if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
+               return X86EMUL_UNHANDLEABLE;
+
         val = GET_SMSTATE(u32, smstate, 0x7fc8);
-       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+       if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
+               return X86EMUL_UNHANDLEABLE;
  
         selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
         set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
@@ -2560,16 +2565,23 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
         ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
  
         val = GET_SMSTATE(u32, smstate, 0x7f68);
-       ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+
+       if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
+               return X86EMUL_UNHANDLEABLE;
+
         val = GET_SMSTATE(u32, smstate, 0x7f60);
-       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+       if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
+               return X86EMUL_UNHANDLEABLE;
  
         cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
         cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
         cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
         ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
         val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
-       ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
+
+       if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+               return X86EMUL_UNHANDLEABLE;
  
         selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
         rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 43fdb0c12a5dd5b794106831337b0ffcad757141..71aa3da2a0b7b015ad091e7b229c4dc0a20182c4 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2469,7 +2469,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                 }
  
                 if (sp->unsync_children)
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+                       kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
  
                 __clear_sp_write_flooding_count(sp);
  
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c

index 28036629abf8d7a4e655245081de4e5ffe51c3dc..598a769f19617a81baf6af473b1da6f79c41e847 100644 (file)
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -586,7 +586,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
         svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
  
         /* Give the current vmcb to the guest */
-       svm_set_gif(svm, false);
  
         nested_vmcb->save.es     = vmcb->save.es;
         nested_vmcb->save.cs     = vmcb->save.cs;
@@ -632,6 +631,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
         /* Restore the original control entries */
         copy_vmcb_control_area(&vmcb->control, &hsave->control);
  
+       /* On vmexit the  GIF is set to false */
+       svm_set_gif(svm, false);
+
         svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
                 svm->vcpu.arch.l1_tsc_offset;
  
@@ -1145,6 +1147,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
         load_nested_vmcb_control(svm, ctl);
         nested_prepare_vmcb_control(svm);
  
+       if (!nested_svm_vmrun_msrpm(svm))
+               return -EINVAL;
+
  out_set_gif:
         svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
  
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

index 402dc4234e397861daef0be131a61118c8f2681a..7bf7bf734979488ac0f1cf10808f32ce3be882a9 100644 (file)
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1106,6 +1106,7 @@ void sev_vm_destroy(struct kvm *kvm)
                 list_for_each_safe(pos, q, head) {
                         __unregister_enc_region_locked(kvm,
                                 list_entry(pos, struct enc_region, list));
+                       cond_resched();
                 }
         }
  
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index 1db4fdcb4906db32a5f2d957e9afc0e5584668be..c91acabf18d026f73b77332e8ba08095b2799b12 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2938,8 +2938,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
         if (npt_enabled)
                 vcpu->arch.cr3 = svm->vmcb->save.cr3;
  
-       svm_complete_interrupts(svm);
-
         if (is_guest_mode(vcpu)) {
                 int vmexit;
  
@@ -3504,7 +3502,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
         stgi();
  
         /* Any pending NMI will happen here */
-       exit_fastpath = svm_exit_handlers_fastpath(vcpu);
  
         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
                 kvm_after_interrupt(&svm->vcpu);
@@ -3518,6 +3515,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
         }
  
         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+       vmcb_mark_all_clean(svm->vmcb);
  
         /* if exit due to PF check for async PF */
         if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
@@ -3537,7 +3535,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                      SVM_EXIT_EXCP_BASE + MC_VECTOR))
                 svm_handle_mce(svm);
  
-       vmcb_mark_all_clean(svm->vmcb);
+       svm_complete_interrupts(svm);
+       exit_fastpath = svm_exit_handlers_fastpath(vcpu);
         return exit_fastpath;
  }
  
@@ -3900,21 +3899,28 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
  static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
-       struct vmcb *nested_vmcb;
         struct kvm_host_map map;
-       u64 guest;
-       u64 vmcb;
         int ret = 0;
  
-       guest = GET_SMSTATE(u64, smstate, 0x7ed8);
-       vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
+       if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+               u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
+               u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+               u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
  
-       if (guest) {
-               if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
-                       return 1;
-               nested_vmcb = map.hva;
-               ret = enter_svm_guest_mode(svm, vmcb, nested_vmcb);
-               kvm_vcpu_unmap(&svm->vcpu, &map, true);
+               if (guest) {
+                       if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+                               return 1;
+
+                       if (!(saved_efer & EFER_SVME))
+                               return 1;
+
+                       if (kvm_vcpu_map(&svm->vcpu,
+                                        gpa_to_gfn(vmcb), &map) == -EINVAL)
+                               return 1;
+
+                       ret = enter_svm_guest_mode(svm, vmcb, map.hva);
+                       kvm_vcpu_unmap(&svm->vcpu, &map, true);
+               }
         }
  
         return ret;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index 23b58c28a1c926f461cb87c9ee1f03a310f23e25..1bb6b31eb64666d57eb5095566f5579b1d26694c 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4404,6 +4404,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
         if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
                 kvm_vcpu_flush_tlb_current(vcpu);
  
+       /*
+        * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
+        * now and the new vmentry.  Ensure that the VMCS02 PDPTR fields are
+        * up-to-date before switching to L1.
+        */
+       if (enable_ept && is_pae_paging(vcpu))
+               vmx_ept_load_pdptrs(vcpu);
+
         leave_guest_mode(vcpu);
  
         if (nested_cpu_has_preemption_timer(vmcs12))
@@ -4668,7 +4676,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
                 vmx->nested.msrs.entry_ctls_high &=
                                 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
                 vmx->nested.msrs.exit_ctls_high &=
-                               ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+                               ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
         }
  }
  
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index 819c185adf09b37a62a05bd75b6368d348c92509..8646a797b7a838d3f484e868bf8ac762e3a8858f 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2971,7 +2971,7 @@ static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
         vpid_sync_context(to_vmx(vcpu)->vpid);
  }
  
-static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
+void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
  {
         struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
  
@@ -3114,7 +3114,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
                         guest_cr3 = vcpu->arch.cr3;
                 else /* vmcs01.GUEST_CR3 is already up-to-date. */
                         update_guest_cr3 = false;
-               ept_load_pdptrs(vcpu);
+               vmx_ept_load_pdptrs(vcpu);
         } else {
                 guest_cr3 = pgd;
         }
@@ -6054,6 +6054,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                         (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
                         exit_reason != EXIT_REASON_EPT_VIOLATION &&
                         exit_reason != EXIT_REASON_PML_FULL &&
+                       exit_reason != EXIT_REASON_APIC_ACCESS &&
                         exit_reason != EXIT_REASON_TASK_SWITCH)) {
                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h

index 26175a4759fa5f8eef34b443dcb3fd6415b4f6b6..a2f82127c1707ae71c0c2836bef6ed3898361f27 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -356,6 +356,7 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
  int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
  int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
                               struct x86_exception *e);
+void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
  
  #define POSTED_INTR_ON  0
  #define POSTED_INTR_SN  1
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index d39d6cf1d4737b177ca3d4860bc05730c0ad6a93..1994602a0851f2011bf237c329b18c0374e52bb1 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2731,7 +2731,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
                 return 1;
  
         if (!lapic_in_kernel(vcpu))
-               return 1;
+               return data ? 1 : 0;
  
         vcpu->arch.apf.msr_en_val = data;
  
@@ -3578,6 +3578,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         case KVM_CAP_SMALLER_MAXPHYADDR:
                 r = (int) allow_smaller_maxphyaddr;
                 break;
+       case KVM_CAP_STEAL_TIME:
+               r = sched_info_on();
+               break;
         default:
                 break;
         }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index a23076765b4cc26040bd6b3110f06c1d14724e7b..05e3c2fb3ef7828438c8456cd3553ecaa4561bc6 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -749,25 +749,46 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
  int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
                               gpa_t gpa, unsigned long len);
  
-#define __kvm_put_guest(kvm, gfn, offset, value, type)                 \
+#define __kvm_get_guest(kvm, gfn, offset, v)                           \
  ({                                                                     \
         unsigned long __addr = gfn_to_hva(kvm, gfn);                    \
-       type __user *__uaddr = (type __user *)(__addr + offset);        \
+       typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
         int __ret = -EFAULT;                                            \
                                                                         \
         if (!kvm_is_error_hva(__addr))                                  \
-               __ret = put_user(value, __uaddr);                       \
+               __ret = get_user(v, __uaddr);                           \
+       __ret;                                                          \
+})
+
+#define kvm_get_guest(kvm, gpa, v)                                     \
+({                                                                     \
+       gpa_t __gpa = gpa;                                              \
+       struct kvm *__kvm = kvm;                                        \
+                                                                       \
+       __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT,                     \
+                       offset_in_page(__gpa), v);                      \
+})
+
+#define __kvm_put_guest(kvm, gfn, offset, v)                           \
+({                                                                     \
+       unsigned long __addr = gfn_to_hva(kvm, gfn);                    \
+       typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \
+       int __ret = -EFAULT;                                            \
+                                                                       \
+       if (!kvm_is_error_hva(__addr))                                  \
+               __ret = put_user(v, __uaddr);                           \
         if (!__ret)                                                     \
                 mark_page_dirty(kvm, gfn);                              \
         __ret;                                                          \
  })
  
-#define kvm_put_guest(kvm, gpa, value, type)                           \
+#define kvm_put_guest(kvm, gpa, v)                                     \
  ({                                                                     \
         gpa_t __gpa = gpa;                                              \
         struct kvm *__kvm = kvm;                                        \
+                                                                       \
         __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT,                     \
-                       offset_in_page(__gpa), (value), type);          \
+                       offset_in_page(__gpa), v);                      \
  })
  
  int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index f6d86033c4fa3d21cc7c643c85d443b053c704c9..7d8eced6f459b065c445ba8dcbc107f785dc24fa 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -790,9 +790,10 @@ struct kvm_ppc_resize_hpt {
  #define KVM_VM_PPC_HV 1
  #define KVM_VM_PPC_PR 2
  
-/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
-#define KVM_VM_MIPS_TE         0
+/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */
+#define KVM_VM_MIPS_AUTO       0
  #define KVM_VM_MIPS_VZ         1
+#define KVM_VM_MIPS_TE         2
  
  #define KVM_S390_SIE_PAGE_OFFSET 1
  
@@ -1035,6 +1036,7 @@ struct kvm_ppc_resize_hpt {
  #define KVM_CAP_LAST_CPU 184
  #define KVM_CAP_SMALLER_MAXPHYADDR 185
  #define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_STEAL_TIME 187
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 67cd0b88a6b6ff91b0225eb705b84386370eb97c..cf88233b819a0803e7f47dd85552a9275b135a68 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4332,7 +4332,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
  void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
                                struct kvm_io_device *dev)
  {
-       int i;
+       int i, j;
         struct kvm_io_bus *new_bus, *bus;
  
         bus = kvm_get_bus(kvm, bus_idx);
@@ -4349,17 +4349,20 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
  
         new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
                           GFP_KERNEL_ACCOUNT);
-       if (!new_bus)  {
+       if (new_bus) {
+               memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+               new_bus->dev_count--;
+               memcpy(new_bus->range + i, bus->range + i + 1,
+                      (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+       } else {
                 pr_err("kvm: failed to shrink bus, removing it completely\n");
-               goto broken;
+               for (j = 0; j < bus->dev_count; j++) {
+                       if (j == i)
+                               continue;
+                       kvm_iodevice_destructor(bus->range[j].dev);
+               }
         }
  
-       memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
-       new_bus->dev_count--;
-       memcpy(new_bus->range + i, bus->range + i + 1,
-              (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
-
-broken:
         rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
         synchronize_srcu_expedited(&kvm->srcu);
         kfree(bus);
author	Paolo Bonzini <pbonzini@redhat.com>
	Tue, 22 Sep 2020 10:43:17 +0000 (06:43 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Tue, 22 Sep 2020 10:43:17 +0000 (06:43 -0400)
Documentation/virt/kvm/api.rst		patch \| blob \| history
arch/arm64/include/asm/kvm_emulate.h		patch \| blob \| history
arch/arm64/include/asm/kvm_host.h		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/arm64/kvm/hyp/include/hyp/switch.h		patch \| blob \| history
arch/arm64/kvm/mmu.c		patch \| blob \| history
arch/arm64/kvm/pvtime.c		patch \| blob \| history
arch/arm64/kvm/trace_arm.h		patch \| blob \| history
arch/arm64/kvm/trace_handle_exit.h		patch \| blob \| history
arch/mips/kvm/mips.c		patch \| blob \| history
arch/x86/kernel/kvm.c		patch \| blob \| history
arch/x86/kvm/emulate.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/svm/nested.c		patch \| blob \| history
arch/x86/kvm/svm/sev.c		patch \| blob \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx/nested.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history