Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / arch / x86 / kvm / vmx.c
index 4253adef9044c10429094495e01074da46b0d864..06c0c6d0541e9bf95eabbcaa8d20c8ec45f19496 100644 (file)
@@ -5012,7 +5012,7 @@ static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_activ
        }
 }
 
-static bool vmx_get_enable_apicv(void)
+static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
 {
        return enable_apicv;
 }
@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
        vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
        vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-       native_store_idt(&dt);
+       store_idt(&dt);
        vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
        vmx->host_idt_base = dt.address;
 
@@ -8344,12 +8344,14 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-       trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
-                               vmcs_readl(EXIT_QUALIFICATION),
-                               vmx->idt_vectoring_info,
-                               intr_info,
-                               vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
-                               KVM_ISA_VMX);
+       if (vmx->nested.nested_run_pending)
+               return false;
+
+       if (unlikely(vmx->fail)) {
+               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
+                                   vmcs_read32(VM_INSTRUCTION_ERROR));
+               return true;
+       }
 
        /*
         * The host physical addresses of some pages of guest memory
@@ -8363,14 +8365,12 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
         */
        nested_mark_vmcs12_pages_dirty(vcpu);
 
-       if (vmx->nested.nested_run_pending)
-               return false;
-
-       if (unlikely(vmx->fail)) {
-               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
-                                   vmcs_read32(VM_INSTRUCTION_ERROR));
-               return true;
-       }
+       trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
+                               vmcs_readl(EXIT_QUALIFICATION),
+                               vmx->idt_vectoring_info,
+                               intr_info,
+                               vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+                               KVM_ISA_VMX);
 
        switch (exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
@@ -9424,12 +9424,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                                  | (1 << VCPU_EXREG_CR3));
        vcpu->arch.regs_dirty = 0;
 
-       vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
-       vmx->loaded_vmcs->launched = 1;
-
-       vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
-
        /*
         * eager fpu is enabled if PKEY is supported and CR4 is switched
         * back on host, so it is safe to read guest PKRU from current
@@ -9451,6 +9445,14 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                kvm_make_request(KVM_REQ_EVENT, vcpu);
 
        vmx->nested.nested_run_pending = 0;
+       vmx->idt_vectoring_info = 0;
+
+       vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
+       if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+               return;
+
+       vmx->loaded_vmcs->launched = 1;
+       vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
        vmx_complete_atomic_exit(vmx);
        vmx_recover_nmi_blocking(vmx);
@@ -10525,6 +10527,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        if (exec_control & CPU_BASED_TPR_SHADOW) {
                vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
                vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+       } else {
+#ifdef CONFIG_X86_64
+               exec_control |= CPU_BASED_CR8_LOAD_EXITING |
+                               CPU_BASED_CR8_STORE_EXITING;
+#endif
        }
 
        /*
@@ -11388,46 +11395,30 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-       u32 vm_inst_error = 0;
 
        /* trying to cancel vmlaunch/vmresume is a bug */
        WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
+       /*
+        * The only expected VM-instruction error is "VM entry with
+        * invalid control field(s)." Anything else indicates a
+        * problem with L0.
+        */
+       WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
+                                  VMXERR_ENTRY_INVALID_CONTROL_FIELD));
+
        leave_guest_mode(vcpu);
-       prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
-                      exit_qualification);
 
-       if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
-                                vmcs12->vm_exit_msr_store_count))
-               nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+       if (likely(!vmx->fail)) {
+               prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
+                              exit_qualification);
 
-       if (unlikely(vmx->fail))
-               vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+               if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
+                                        vmcs12->vm_exit_msr_store_count))
+                       nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+       }
 
        vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-
-       /*
-        * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of
-        * the VM-exit interrupt information (valid interrupt) is always set to
-        * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need
-        * kvm_cpu_has_interrupt().  See the commit message for details.
-        */
-       if (nested_exit_intr_ack_set(vcpu) &&
-           exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
-           kvm_cpu_has_interrupt(vcpu)) {
-               int irq = kvm_cpu_get_interrupt(vcpu);
-               WARN_ON(irq < 0);
-               vmcs12->vm_exit_intr_info = irq |
-                       INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
-       }
-
-       trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
-                                      vmcs12->exit_qualification,
-                                      vmcs12->idt_vectoring_info_field,
-                                      vmcs12->vm_exit_intr_info,
-                                      vmcs12->vm_exit_intr_error_code,
-                                      KVM_ISA_VMX);
-
        vm_entry_controls_reset_shadow(vmx);
        vm_exit_controls_reset_shadow(vmx);
        vmx_segment_cache_clear(vmx);
@@ -11436,8 +11427,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        if (VMCS02_POOL_SIZE == 0)
                nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
 
-       load_vmcs12_host_state(vcpu, vmcs12);
-
        /* Update any VMCS fields that might have changed while L2 ran */
        vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
        vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
@@ -11486,21 +11475,57 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
         */
        kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-       /*
-        * Exiting from L2 to L1, we're now back to L1 which thinks it just
-        * finished a VMLAUNCH or VMRESUME instruction, so we need to set the
-        * success or failure flag accordingly.
-        */
-       if (unlikely(vmx->fail)) {
-               vmx->fail = 0;
-               nested_vmx_failValid(vcpu, vm_inst_error);
-       } else
-               nested_vmx_succeed(vcpu);
        if (enable_shadow_vmcs)
                vmx->nested.sync_shadow_vmcs = true;
 
        /* in case we halted in L2 */
        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+       if (likely(!vmx->fail)) {
+               /*
+                * TODO: SDM says that with acknowledge interrupt on
+                * exit, bit 31 of the VM-exit interrupt information
+                * (valid interrupt) is always set to 1 on
+                * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
+                * need kvm_cpu_has_interrupt().  See the commit
+                * message for details.
+                */
+               if (nested_exit_intr_ack_set(vcpu) &&
+                   exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+                   kvm_cpu_has_interrupt(vcpu)) {
+                       int irq = kvm_cpu_get_interrupt(vcpu);
+                       WARN_ON(irq < 0);
+                       vmcs12->vm_exit_intr_info = irq |
+                               INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
+               }
+
+               trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
+                                              vmcs12->exit_qualification,
+                                              vmcs12->idt_vectoring_info_field,
+                                              vmcs12->vm_exit_intr_info,
+                                              vmcs12->vm_exit_intr_error_code,
+                                              KVM_ISA_VMX);
+
+               load_vmcs12_host_state(vcpu, vmcs12);
+
+               return;
+       }
+       
+       /*
+        * After an early L2 VM-entry failure, we're now back
+        * in L1 which thinks it just finished a VMLAUNCH or
+        * VMRESUME instruction, so we need to set the failure
+        * flag and the VM-instruction error field of the VMCS
+        * accordingly.
+        */
+       nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+       /*
+        * The emulated instruction was already skipped in
+        * nested_vmx_run, but the updated RIP was never
+        * written back to the vmcs01.
+        */
+       skip_emulated_instruction(vcpu);
+       vmx->fail = 0;
 }
 
 /*
@@ -11829,7 +11854,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
        struct kvm_lapic_irq irq;
        struct kvm_vcpu *vcpu;
        struct vcpu_data vcpu_info;
-       int idx, ret = -EINVAL;
+       int idx, ret = 0;
 
        if (!kvm_arch_has_assigned_device(kvm) ||
                !irq_remapping_cap(IRQ_POSTING_CAP) ||
@@ -11838,7 +11863,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 
        idx = srcu_read_lock(&kvm->irq_srcu);
        irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-       BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+       if (guest_irq >= irq_rt->nr_rt_entries ||
+           hlist_empty(&irq_rt->map[guest_irq])) {
+               pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+                            guest_irq, irq_rt->nr_rt_entries);
+               goto out;
+       }
 
        hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
                if (e->type != KVM_IRQ_ROUTING_MSI)