Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / arch / x86 / kvm / svm / svm.c
index 9709c98d0d6c1059f08789d3c98bc05a74f120f0..2f32fd09e2598b24563e4dbfafb1e98174e95151 100644 (file)
@@ -91,7 +91,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
 static const struct svm_direct_access_msrs {
        u32 index;   /* Index of the MSR */
        bool always; /* True if intercept is always on */
-} direct_access_msrs[] = {
+} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
        { .index = MSR_STAR,                            .always = true  },
        { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
 #ifdef CONFIG_X86_64
@@ -263,9 +263,10 @@ static int get_max_npt_level(void)
 #endif
 }
 
-void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       u64 old_efer = vcpu->arch.efer;
        vcpu->arch.efer = efer;
 
        if (!npt_enabled) {
@@ -276,13 +277,32 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
                        efer &= ~EFER_LME;
        }
 
-       if (!(efer & EFER_SVME)) {
-               svm_leave_nested(svm);
-               svm_set_gif(svm, true);
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+                       svm_leave_nested(svm);
+                       svm_set_gif(svm, true);
+
+                       /*
+                        * Free the nested guest state, unless we are in SMM.
+                        * In this case we will return to the nested guest
+                        * as soon as we leave SMM.
+                        */
+                       if (!is_smm(&svm->vcpu))
+                               svm_free_nested(svm);
+
+               } else {
+                       int ret = svm_allocate_nested(svm);
+
+                       if (ret) {
+                               vcpu->arch.efer = old_efer;
+                               return ret;
+                       }
+               }
        }
 
        svm->vmcb->save.efer = efer | EFER_SVME;
        vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+       return 0;
 }
 
 static int is_external_interrupt(u32 info)
@@ -553,18 +573,44 @@ free_cpu_data:
 
 }
 
-static bool valid_msr_intercept(u32 index)
+static int direct_access_msr_slot(u32 msr)
 {
-       int i;
+       u32 i;
 
        for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
-               if (direct_access_msrs[i].index == index)
-                       return true;
+               if (direct_access_msrs[i].index == msr)
+                       return i;
 
-       return false;
+       return -ENOENT;
+}
+
+static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
+                                    int write)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       int slot = direct_access_msr_slot(msr);
+
+       if (slot == -ENOENT)
+               return;
+
+       /* Set the shadow bitmaps to the desired intercept states */
+       if (read)
+               set_bit(slot, svm->shadow_msr_intercept.read);
+       else
+               clear_bit(slot, svm->shadow_msr_intercept.read);
+
+       if (write)
+               set_bit(slot, svm->shadow_msr_intercept.write);
+       else
+               clear_bit(slot, svm->shadow_msr_intercept.write);
 }
 
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
+static bool valid_msr_intercept(u32 index)
+{
+       return direct_access_msr_slot(index) != -ENOENT;
+}
+
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
 {
        u8 bit_write;
        unsigned long tmp;
@@ -583,8 +629,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
        return !!test_bit(bit_write,  &tmp);
 }
 
-static void set_msr_interception(u32 *msrpm, unsigned msr,
-                                int read, int write)
+static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
+                                       u32 msr, int read, int write)
 {
        u8 bit_read, bit_write;
        unsigned long tmp;
@@ -596,6 +642,13 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
         */
        WARN_ON(!valid_msr_intercept(msr));
 
+       /* Enforce non allowed MSRs to trap */
+       if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
+               read = 0;
+
+       if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
+               write = 0;
+
        offset    = svm_msrpm_offset(msr);
        bit_read  = 2 * (msr & 0x0f);
        bit_write = 2 * (msr & 0x0f) + 1;
@@ -609,17 +662,60 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
        msrpm[offset] = tmp;
 }
 
-static void svm_vcpu_init_msrpm(u32 *msrpm)
+static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+                                int read, int write)
 {
-       int i;
+       set_shadow_msr_intercept(vcpu, msr, read, write);
+       set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
+}
+
+u32 *svm_vcpu_alloc_msrpm(void)
+{
+       struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
+       u32 *msrpm;
+
+       if (!pages)
+               return NULL;
 
+       msrpm = page_address(pages);
        memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
 
+       return msrpm;
+}
+
+void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
+{
+       int i;
+
        for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
                if (!direct_access_msrs[i].always)
                        continue;
+               set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1);
+       }
+}
 
-               set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
+
+void svm_vcpu_free_msrpm(u32 *msrpm)
+{
+       __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER);
+}
+
+static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u32 i;
+
+       /*
+        * Set intercept permissions for all direct access MSRs again. They
+        * will automatically get filtered through the MSR filter, so we are
+        * back in sync after this.
+        */
+       for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
+               u32 msr = direct_access_msrs[i].index;
+               u32 read = test_bit(i, svm->shadow_msr_intercept.read);
+               u32 write = test_bit(i, svm->shadow_msr_intercept.write);
+
+               set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write);
        }
 }
 
@@ -666,26 +762,26 @@ static void init_msrpm_offsets(void)
        }
 }
 
-static void svm_enable_lbrv(struct vcpu_svm *svm)
+static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
 {
-       u32 *msrpm = svm->msrpm;
+       struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
 }
 
-static void svm_disable_lbrv(struct vcpu_svm *svm)
+static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
 {
-       u32 *msrpm = svm->msrpm;
+       struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
-       set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
-       set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
 void disable_nmi_singlestep(struct vcpu_svm *svm)
@@ -813,6 +909,9 @@ static __init void svm_set_cpu_caps(void)
        if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
            boot_cpu_has(X86_FEATURE_AMD_SSBD))
                kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+       /* Enable INVPCID feature */
+       kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
 }
 
 static __init int svm_hardware_setup(void)
@@ -985,6 +1084,21 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        return svm->vmcb->control.tsc_offset;
 }
 
+static void svm_check_invpcid(struct vcpu_svm *svm)
+{
+       /*
+        * Intercept INVPCID instruction only if shadow page table is
+        * enabled. Interception is not required with nested page table
+        * enabled.
+        */
+       if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
+               if (!npt_enabled)
+                       svm_set_intercept(svm, INTERCEPT_INVPCID);
+               else
+                       svm_clr_intercept(svm, INTERCEPT_INVPCID);
+       }
+}
+
 static void init_vmcb(struct vcpu_svm *svm)
 {
        struct vmcb_control_area *control = &svm->vmcb->control;
@@ -992,14 +1106,14 @@ static void init_vmcb(struct vcpu_svm *svm)
 
        svm->vcpu.arch.hflags = 0;
 
-       set_cr_intercept(svm, INTERCEPT_CR0_READ);
-       set_cr_intercept(svm, INTERCEPT_CR3_READ);
-       set_cr_intercept(svm, INTERCEPT_CR4_READ);
-       set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
-       set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
-       set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
+       svm_set_intercept(svm, INTERCEPT_CR0_READ);
+       svm_set_intercept(svm, INTERCEPT_CR3_READ);
+       svm_set_intercept(svm, INTERCEPT_CR4_READ);
+       svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
+       svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
+       svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
        if (!kvm_vcpu_apicv_active(&svm->vcpu))
-               set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+               svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
 
        set_dr_intercepts(svm);
 
@@ -1094,15 +1208,15 @@ static void init_vmcb(struct vcpu_svm *svm)
                control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
                svm_clr_intercept(svm, INTERCEPT_INVLPG);
                clr_exception_intercept(svm, PF_VECTOR);
-               clr_cr_intercept(svm, INTERCEPT_CR3_READ);
-               clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
+               svm_clr_intercept(svm, INTERCEPT_CR3_READ);
+               svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
                save->g_pat = svm->vcpu.arch.pat;
                save->cr3 = 0;
                save->cr4 = 0;
        }
        svm->asid_generation = 0;
 
-       svm->nested.vmcb = 0;
+       svm->nested.vmcb12_gpa = 0;
        svm->vcpu.arch.hflags = 0;
 
        if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
@@ -1114,6 +1228,8 @@ static void init_vmcb(struct vcpu_svm *svm)
                svm_clr_intercept(svm, INTERCEPT_PAUSE);
        }
 
+       svm_check_invpcid(svm);
+
        if (kvm_vcpu_apicv_active(&svm->vcpu))
                avic_init_vmcb(svm);
 
@@ -1171,35 +1287,20 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm;
-       struct page *page;
-       struct page *msrpm_pages;
-       struct page *hsave_page;
-       struct page *nested_msrpm_pages;
+       struct page *vmcb_page;
        int err;
 
        BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
        svm = to_svm(vcpu);
 
        err = -ENOMEM;
-       page = alloc_page(GFP_KERNEL_ACCOUNT);
-       if (!page)
+       vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+       if (!vmcb_page)
                goto out;
 
-       msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
-       if (!msrpm_pages)
-               goto free_page1;
-
-       nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
-       if (!nested_msrpm_pages)
-               goto free_page2;
-
-       hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
-       if (!hsave_page)
-               goto free_page3;
-
        err = avic_init_vcpu(svm);
        if (err)
-               goto free_page4;
+               goto error_free_vmcb_page;
 
        /* We initialize this flag to true to make sure that the is_running
         * bit would be set the first time the vcpu is loaded.
@@ -1207,18 +1308,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
                svm->avic_is_running = true;
 
-       svm->nested.hsave = page_address(hsave_page);
-       clear_page(svm->nested.hsave);
-
-       svm->msrpm = page_address(msrpm_pages);
-       svm_vcpu_init_msrpm(svm->msrpm);
+       svm->msrpm = svm_vcpu_alloc_msrpm();
+       if (!svm->msrpm)
+               goto error_free_vmcb_page;
 
-       svm->nested.msrpm = page_address(nested_msrpm_pages);
-       svm_vcpu_init_msrpm(svm->nested.msrpm);
+       svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
-       svm->vmcb = page_address(page);
-       clear_page(svm->vmcb);
-       svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
+       svm->vmcb = page_address(vmcb_page);
+       svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
        svm->asid_generation = 0;
        init_vmcb(svm);
 
@@ -1227,14 +1324,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
        return 0;
 
-free_page4:
-       __free_page(hsave_page);
-free_page3:
-       __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
-free_page2:
-       __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
-free_page1:
-       __free_page(page);
+error_free_vmcb_page:
+       __free_page(vmcb_page);
 out:
        return err;
 }
@@ -1258,10 +1349,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
         */
        svm_clear_current_vmcb(svm->vmcb);
 
+       svm_free_nested(svm);
+
        __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
-       __free_page(virt_to_page(svm->nested.hsave));
-       __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1549,11 +1640,11 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
        vmcb_mark_dirty(svm->vmcb, VMCB_CR);
 
        if (gcr0 == *hcr0) {
-               clr_cr_intercept(svm, INTERCEPT_CR0_READ);
-               clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+               svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+               svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
        } else {
-               set_cr_intercept(svm, INTERCEPT_CR0_READ);
-               set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+               svm_set_intercept(svm, INTERCEPT_CR0_READ);
+               svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
        }
 }
 
@@ -2224,12 +2315,9 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
 {
        unsigned long cr0 = svm->vcpu.arch.cr0;
        bool ret = false;
-       u64 intercept;
-
-       intercept = svm->nested.ctl.intercept;
 
        if (!is_guest_mode(&svm->vcpu) ||
-           (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
+           (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
                return false;
 
        cr0 &= ~SVM_CR0_SELECTIVE_MASK;
@@ -2267,6 +2355,7 @@ static int cr_interception(struct vcpu_svm *svm)
        if (cr >= 16) { /* mov to cr */
                cr -= 16;
                val = kvm_register_read(&svm->vcpu, reg);
+               trace_kvm_cr_write(cr, val);
                switch (cr) {
                case 0:
                        if (!check_selective_cr0_intercepted(svm, val))
@@ -2312,6 +2401,7 @@ static int cr_interception(struct vcpu_svm *svm)
                        return 1;
                }
                kvm_register_write(&svm->vcpu, reg, val);
+               trace_kvm_cr_read(cr, val);
        }
        return kvm_complete_insn_gp(&svm->vcpu, err);
 }
@@ -2562,7 +2652,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 * We update the L1 MSR bit as well since it will end up
                 * touching the MSR anyway now.
                 */
-               set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
                break;
        case MSR_IA32_PRED_CMD:
                if (!msr->host_initiated &&
@@ -2577,7 +2667,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                        break;
 
                wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-               set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
                break;
        case MSR_AMD64_VIRT_SPEC_CTRL:
                if (!msr->host_initiated &&
@@ -2641,9 +2731,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                svm->vmcb->save.dbgctl = data;
                vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
                if (data & (1ULL<<0))
-                       svm_enable_lbrv(svm);
+                       svm_enable_lbrv(vcpu);
                else
-                       svm_disable_lbrv(svm);
+                       svm_disable_lbrv(vcpu);
                break;
        case MSR_VM_HSAVE_PA:
                svm->nested.hsave_msr = data;
@@ -2739,6 +2829,33 @@ static int mwait_interception(struct vcpu_svm *svm)
        return nop_interception(svm);
 }
 
+static int invpcid_interception(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       unsigned long type;
+       gva_t gva;
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       /*
+        * For an INVPCID intercept:
+        * EXITINFO1 provides the linear address of the memory operand.
+        * EXITINFO2 provides the contents of the register operand.
+        */
+       type = svm->vmcb->control.exit_info_2;
+       gva = svm->vmcb->control.exit_info_1;
+
+       if (type > 3) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+
+       return kvm_handle_invpcid(vcpu, type, gva);
+}
+
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR0]                     = cr_interception,
        [SVM_EXIT_READ_CR3]                     = cr_interception,
@@ -2801,6 +2918,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_RDPRU]                        = rdpru_interception,
+       [SVM_EXIT_INVPCID]                      = invpcid_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
@@ -2819,12 +2937,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        }
 
        pr_err("VMCB Control Area:\n");
-       pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
-       pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
-       pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
-       pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
-       pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
-       pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
+       pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
+       pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
+       pr_err("%-20s%04x\n", "dr_read:", control->intercepts[INTERCEPT_DR] & 0xffff);
+       pr_err("%-20s%04x\n", "dr_write:", control->intercepts[INTERCEPT_DR] >> 16);
+       pr_err("%-20s%08x\n", "exceptions:", control->intercepts[INTERCEPT_EXCEPTION]);
+       pr_err("%-20s%08x %08x\n", "intercepts:",
+              control->intercepts[INTERCEPT_WORD3],
+              control->intercepts[INTERCEPT_WORD4]);
        pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
        pr_err("%-20s%d\n", "pause filter threshold:",
               control->pause_filter_thresh);
@@ -2923,12 +3043,19 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "excp_to:", save->last_excp_to);
 }
 
-static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+                             u32 *intr_info, u32 *error_code)
 {
        struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
 
        *info1 = control->exit_info_1;
        *info2 = control->exit_info_2;
+       *intr_info = control->exit_int_info;
+       if ((*intr_info & SVM_EXITINTINFO_VALID) &&
+           (*intr_info & SVM_EXITINTINFO_VALID_ERR))
+               *error_code = control->exit_int_info_err;
+       else
+               *error_code = 0;
 }
 
 static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
@@ -2939,7 +3066,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
        trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-       if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
+       if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
                vcpu->arch.cr0 = svm->vmcb->save.cr0;
        if (npt_enabled)
                vcpu->arch.cr3 = svm->vmcb->save.cr3;
@@ -2947,12 +3074,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
        if (is_guest_mode(vcpu)) {
                int vmexit;
 
-               trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
-                                       svm->vmcb->control.exit_info_1,
-                                       svm->vmcb->control.exit_info_2,
-                                       svm->vmcb->control.exit_int_info,
-                                       svm->vmcb->control.exit_int_info_err,
-                                       KVM_ISA_SVM);
+               trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM);
 
                vmexit = nested_svm_exit_special(svm);
 
@@ -3062,13 +3184,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
-       clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+       svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
 
        if (irr == -1)
                return;
 
        if (tpr >= irr)
-               set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+               svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
 }
 
 bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
@@ -3256,7 +3378,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
-       if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
+       if (!svm_is_intercept(svm, INTERCEPT_CR8_WRITE)) {
                int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
                kvm_set_cr8(vcpu, cr8);
        }
@@ -3353,8 +3475,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 
 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 {
-       if (!is_guest_mode(vcpu) &&
-           to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+       if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
            to_svm(vcpu)->vmcb->control.exit_info_1)
                return handle_fastpath_set_msr_irqoff(vcpu);
 
@@ -3419,7 +3540,6 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
-       fastpath_t exit_fastpath;
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@ -3460,9 +3580,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
        clgi();
        kvm_load_guest_xsave_state(vcpu);
 
-       if (lapic_in_kernel(vcpu) &&
-               vcpu->arch.apic->lapic_timer.timer_advance_ns)
-               kvm_wait_lapic_expire(vcpu);
+       kvm_wait_lapic_expire(vcpu);
 
        /*
         * If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -3542,8 +3660,11 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                svm_handle_mce(svm);
 
        svm_complete_interrupts(svm);
-       exit_fastpath = svm_exit_handlers_fastpath(vcpu);
-       return exit_fastpath;
+
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
+       return svm_exit_handlers_fastpath(vcpu);
 }
 
 static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
@@ -3629,6 +3750,9 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
        svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
                             guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
 
+       /* Check again if INVPCID interception if required */
+       svm_check_invpcid(svm);
+
        if (!kvm_vcpu_apicv_active(vcpu))
                return;
 
@@ -3743,7 +3867,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
                break;
        case SVM_EXIT_WRITE_CR0: {
                unsigned long cr0, val;
-               u64 intercept;
 
                if (info->intercept == x86_intercept_cr_write)
                        icpt_info.exit_code += info->modrm_reg;
@@ -3752,9 +3875,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
                    info->intercept == x86_intercept_clts)
                        break;
 
-               intercept = svm->nested.ctl.intercept;
-
-               if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
+               if (!(vmcb_is_intercept(&svm->nested.ctl,
+                                       INTERCEPT_SELECTIVE_CR0)))
                        break;
 
                cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
@@ -3889,7 +4011,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
                /* FED8h - SVM Guest */
                put_smstate(u64, smstate, 0x7ed8, 1);
                /* FEE0h - SVM Guest VMCB Physical Address */
-               put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
+               put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
 
                svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
                svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -3911,7 +4033,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
        if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
                u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
                u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
-               u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
+               u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
 
                if (guest) {
                        if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -3921,10 +4043,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
                                return 1;
 
                        if (kvm_vcpu_map(&svm->vcpu,
-                                        gpa_to_gfn(vmcb), &map) == -EINVAL)
+                                        gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+                               return 1;
+
+                       if (svm_allocate_nested(svm))
                                return 1;
 
-                       ret = enter_svm_guest_mode(svm, vmcb, map.hva);
+                       ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva);
                        kvm_vcpu_unmap(&svm->vcpu, &map, true);
                }
        }
@@ -3945,19 +4070,10 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
        }
 }
 
-static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
 {
-       unsigned long cr4 = kvm_read_cr4(vcpu);
-       bool smep = cr4 & X86_CR4_SMEP;
-       bool smap = cr4 & X86_CR4_SMAP;
-       bool is_user = svm_get_cpl(vcpu) == 3;
-
-       /*
-        * If RIP is invalid, go ahead with emulation which will cause an
-        * internal error exit.
-        */
-       if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
-               return true;
+       bool smep, smap, is_user;
+       unsigned long cr4;
 
        /*
         * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
@@ -3999,6 +4115,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
         * instruction pointer so we will not able to workaround it. Lets
         * print the error and request to kill the guest.
         */
+       if (likely(!insn || insn_len))
+               return true;
+
+       /*
+        * If RIP is invalid, go ahead with emulation which will cause an
+        * internal error exit.
+        */
+       if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+               return true;
+
+       cr4 = kvm_read_cr4(vcpu);
+       smep = cr4 & X86_CR4_SMEP;
+       smap = cr4 & X86_CR4_SMAP;
+       is_user = svm_get_cpl(vcpu) == 3;
        if (smap && (!smep || is_user)) {
                if (!sev_guest(vcpu->kvm))
                        return true;
@@ -4022,7 +4152,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
         * if an INIT signal is pending.
         */
        return !gif_set(svm) ||
-                  (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
+                  (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
 static void svm_vm_destroy(struct kvm *kvm)
@@ -4160,9 +4290,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .mem_enc_reg_region = svm_register_enc_region,
        .mem_enc_unreg_region = svm_unregister_enc_region,
 
-       .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
+       .can_emulate_instruction = svm_can_emulate_instruction,
 
        .apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+       .msr_filter_changed = svm_msr_filter_changed,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {