Merge tag 'nfs-for-4.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

[sfrench/cifs-2.6.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 06412ba46aa36eaca6cd1f111a6b8df6d795d969..e665aa7167cf9729aac82a075c358236d9f03aec 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
  
  #define MSR_BITMAP_MODE_X2APIC         1
  #define MSR_BITMAP_MODE_X2APIC_APICV   2
-#define MSR_BITMAP_MODE_LM             4
  
  #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
  
@@ -857,6 +856,7 @@ struct nested_vmx {
  
         /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
         u64 vmcs01_debugctl;
+       u64 vmcs01_guest_bndcfgs;
  
         u16 vpid02;
         u16 last_vpid;
@@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
                 goto out;
         }
  
+       /*
+        * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
+        * base of EPT PML4 table, strip off EPT configuration information.
+        */
         ret = hyperv_flush_guest_mapping(
-                       to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+                       to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
  
  out:
         spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
@@ -2899,8 +2903,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
                 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
         }
  
-       if (is_long_mode(&vmx->vcpu))
-               wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+       wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
  #else
         savesegment(fs, fs_sel);
         savesegment(gs, gs_sel);
@@ -2951,8 +2954,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
         vmx->loaded_cpu_state = NULL;
  
  #ifdef CONFIG_X86_64
-       if (is_long_mode(&vmx->vcpu))
-               rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+       rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
  #endif
         if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
                 kvm_load_ldt(host_state->ldt_sel);
@@ -2980,24 +2982,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
  #ifdef CONFIG_X86_64
  static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
  {
-       if (is_long_mode(&vmx->vcpu)) {
-               preempt_disable();
-               if (vmx->loaded_cpu_state)
-                       rdmsrl(MSR_KERNEL_GS_BASE,
-                              vmx->msr_guest_kernel_gs_base);
-               preempt_enable();
-       }
+       preempt_disable();
+       if (vmx->loaded_cpu_state)
+               rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+       preempt_enable();
         return vmx->msr_guest_kernel_gs_base;
  }
  
  static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
  {
-       if (is_long_mode(&vmx->vcpu)) {
-               preempt_disable();
-               if (vmx->loaded_cpu_state)
-                       wrmsrl(MSR_KERNEL_GS_BASE, data);
-               preempt_enable();
-       }
+       preempt_disable();
+       if (vmx->loaded_cpu_state)
+               wrmsrl(MSR_KERNEL_GS_BASE, data);
+       preempt_enable();
         vmx->msr_guest_kernel_gs_base = data;
  }
  #endif
@@ -3533,9 +3530,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
                 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
                 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
  
-       if (kvm_mpx_supported())
-               msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
-
         /* We support free control of debug control saving. */
         msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
  
@@ -3552,8 +3546,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
                 VM_ENTRY_LOAD_IA32_PAT;
         msrs->entry_ctls_high |=
                 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
-       if (kvm_mpx_supported())
-               msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
  
         /* We support free control of debug control loading. */
         msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -3601,12 +3593,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
                 msrs->secondary_ctls_high);
         msrs->secondary_ctls_low = 0;
         msrs->secondary_ctls_high &=
-               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                 SECONDARY_EXEC_DESC |
                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                 SECONDARY_EXEC_WBINVD_EXITING;
+
         /*
          * We can emulate "VMCS shadowing," even if the hardware
          * doesn't support it.
@@ -3663,6 +3655,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
                 msrs->secondary_ctls_high |=
                         SECONDARY_EXEC_UNRESTRICTED_GUEST;
  
+       if (flexpriority_enabled)
+               msrs->secondary_ctls_high |=
+                       SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
         /* miscellaneous data */
         rdmsr(MSR_IA32_VMX_MISC,
                 msrs->misc_low,
@@ -5073,19 +5069,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
         if (!msr)
                 return;
  
-       /*
-        * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
-        * 64-bit mode as a 64-bit kernel may frequently access the
-        * MSR.  This means we need to manually save/restore the MSR
-        * when switching between guest and host state, but only if
-        * the guest is in 64-bit mode.  Sync our cached value if the
-        * guest is transitioning to 32-bit mode and the CPU contains
-        * guest state, i.e. the cache is stale.
-        */
-#ifdef CONFIG_X86_64
-       if (!(efer & EFER_LMA))
-               (void)vmx_read_guest_kernel_gs_base(vmx);
-#endif
         vcpu->arch.efer = efer;
         if (efer & EFER_LMA) {
                 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@ -6078,9 +6061,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
                         mode |= MSR_BITMAP_MODE_X2APIC_APICV;
         }
  
-       if (is_long_mode(vcpu))
-               mode |= MSR_BITMAP_MODE_LM;
-
         return mode;
  }
  
@@ -6121,9 +6101,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
         if (!changed)
                 return;
  
-       vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
-                                 !(mode & MSR_BITMAP_MODE_LM));
-
         if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
                 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
  
@@ -6189,6 +6166,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
         nested_mark_vmcs12_pages_dirty(vcpu);
  }
  
+static u8 vmx_get_rvi(void)
+{
+       return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+}
+
  static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6201,7 +6183,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
                 WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
                 return false;
  
-       rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+       rvi = vmx_get_rvi();
  
         vapic_page = kmap(vmx->nested.virtual_apic_page);
         vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
@@ -10245,15 +10227,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
         if (!lapic_in_kernel(vcpu))
                 return;
  
+       if (!flexpriority_enabled &&
+           !cpu_has_vmx_virtualize_x2apic_mode())
+               return;
+
         /* Postpone execution until vmcs01 is the current VMCS. */
         if (is_guest_mode(vcpu)) {
                 to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
                 return;
         }
  
-       if (!cpu_need_tpr_shadow(vcpu))
-               return;
-
         sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
         sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                               SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -10375,6 +10358,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
         return max_irr;
  }
  
+static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
+{
+       u8 rvi = vmx_get_rvi();
+       u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
+
+       return ((rvi & 0xf0) > (vppr & 0xf0));
+}
+
  static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
  {
         if (!kvm_vcpu_apicv_active(vcpu))
@@ -11264,6 +11255,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
  #undef cr4_fixed1_update
  }
  
+static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (kvm_mpx_supported()) {
+               bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
+
+               if (mpx_enabled) {
+                       vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+                       vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+               } else {
+                       vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
+                       vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
+               }
+       }
+}
+
  static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -11280,8 +11288,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
                 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
                         ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
  
-       if (nested_vmx_allowed(vcpu))
+       if (nested_vmx_allowed(vcpu)) {
                 nested_vmx_cr_fixed1_bits_update(vcpu);
+               nested_vmx_entry_exit_ctls_update(vcpu);
+       }
  }
  
  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -12049,8 +12059,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  
         set_cr4_guest_host_mask(vmx);
  
-       if (vmx_mpx_supported())
-               vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+       if (kvm_mpx_supported()) {
+               if (vmx->nested.nested_run_pending &&
+                       (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+                       vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+               else
+                       vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+       }
  
         if (enable_vpid) {
                 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
@@ -12595,15 +12610,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
         bool from_vmentry = !!exit_qual;
         u32 dummy_exit_qual;
-       u32 vmcs01_cpu_exec_ctrl;
+       bool evaluate_pending_interrupts;
         int r = 0;
  
-       vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+               (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
+       if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+               evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
  
         enter_guest_mode(vcpu);
  
         if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
                 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+       if (kvm_mpx_supported() &&
+               !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+               vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
  
         vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
         vmx_segment_cache_clear(vmx);
@@ -12643,16 +12664,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
          * to L1 or delivered directly to L2 (e.g. In case L1 don't
          * intercept EXTERNAL_INTERRUPT).
          *
-        * Usually this would be handled by L0 requesting a
-        * IRQ/NMI window by setting VMCS accordingly. However,
-        * this setting was done on VMCS01 and now VMCS02 is active
-        * instead. Thus, we force L0 to perform pending event
-        * evaluation by requesting a KVM_REQ_EVENT.
-        */
-       if (vmcs01_cpu_exec_ctrl &
-               (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+        * Usually this would be handled by the processor noticing an
+        * IRQ/NMI window request, or checking RVI during evaluation of
+        * pending virtual interrupts.  However, this setting was done
+        * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
+        * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
+        */
+       if (unlikely(evaluate_pending_interrupts))
                 kvm_make_request(KVM_REQ_EVENT, vcpu);
-       }
  
         /*
          * Note no nested_vmx_succeed or nested_vmx_fail here. At this point