Staging: Merge staging-next into Linus's tree
[sfrench/cifs-2.6.git] / arch / x86 / kvm / vmx.c
index ee03679efe788d61e73d76f818931d8c1a45fad2..27a0222c29460d79e6e65992ac00fc2845f8f25b 100644 (file)
@@ -5,6 +5,7 @@
  * machines without emulation or binary translation.
  *
  * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
  *
  * Authors:
  *   Avi Kivity   <avi@qumranet.com>
@@ -36,6 +37,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -63,6 +66,9 @@ module_param_named(unrestricted_guest,
 static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
+static int __read_mostly vmm_exclusive = 1;
+module_param(vmm_exclusive, bool, S_IRUGO);
+
 #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST                          \
        (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
 #define KVM_GUEST_CR0_MASK                                             \
@@ -173,10 +179,13 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 
 static int init_rmode(struct kvm *kvm);
 static u64 construct_eptp(unsigned long root_hpa);
+static void kvm_cpu_vmxon(u64 addr);
+static void kvm_cpu_vmxoff(void);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
+static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
@@ -334,6 +343,11 @@ static inline bool cpu_has_vmx_ept_1g_page(void)
        return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
 }
 
+static inline bool cpu_has_vmx_ept_4levels(void)
+{
+       return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
+}
+
 static inline bool cpu_has_vmx_invept_individual_addr(void)
 {
        return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
@@ -349,6 +363,16 @@ static inline bool cpu_has_vmx_invept_global(void)
        return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
 }
 
+static inline bool cpu_has_vmx_invvpid_single(void)
+{
+       return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
+}
+
+static inline bool cpu_has_vmx_invvpid_global(void)
+{
+       return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+}
+
 static inline bool cpu_has_vmx_ept(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -389,6 +413,12 @@ static inline bool cpu_has_virtual_nmis(void)
        return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
 }
 
+static inline bool cpu_has_vmx_wbinvd_exit(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_WBINVD_EXITING;
+}
+
 static inline bool report_flexpriority(void)
 {
        return flexpriority_enabled;
@@ -453,6 +483,19 @@ static void vmcs_clear(struct vmcs *vmcs)
                       vmcs, phys_addr);
 }
 
+static void vmcs_load(struct vmcs *vmcs)
+{
+       u64 phys_addr = __pa(vmcs);
+       u8 error;
+
+       asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
+                       : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+                       : "cc", "memory");
+       if (error)
+               printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+                      vmcs, phys_addr);
+}
+
 static void __vcpu_clear(void *arg)
 {
        struct vcpu_vmx *vmx = arg;
@@ -475,12 +518,27 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
        smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
 }
 
-static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
+static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
 {
        if (vmx->vpid == 0)
                return;
 
-       __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+       if (cpu_has_vmx_invvpid_single())
+               __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+}
+
+static inline void vpid_sync_vcpu_global(void)
+{
+       if (cpu_has_vmx_invvpid_global())
+               __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
+}
+
+static inline void vpid_sync_context(struct vcpu_vmx *vmx)
+{
+       if (cpu_has_vmx_invvpid_single())
+               vpid_sync_vcpu_single(vmx);
+       else
+               vpid_sync_vcpu_global();
 }
 
 static inline void ept_sync_global(void)
@@ -812,6 +870,9 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
                wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
        }
 #endif
+       if (current_thread_info()->status & TS_USEDFPU)
+               clts();
+       load_gdt(&__get_cpu_var(host_gdt));
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -828,35 +889,30 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u64 phys_addr = __pa(vmx->vmcs);
        u64 tsc_this, delta, new_offset;
+       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 
-       if (vcpu->cpu != cpu) {
+       if (!vmm_exclusive)
+               kvm_cpu_vmxon(phys_addr);
+       else if (vcpu->cpu != cpu)
                vcpu_clear(vmx);
-               kvm_migrate_timers(vcpu);
-               set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-               local_irq_disable();
-               list_add(&vmx->local_vcpus_link,
-                        &per_cpu(vcpus_on_cpu, cpu));
-               local_irq_enable();
-       }
 
        if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
-               u8 error;
-
                per_cpu(current_vmcs, cpu) = vmx->vmcs;
-               asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
-                             : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
-                             : "cc");
-               if (error)
-                       printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-                              vmx->vmcs, phys_addr);
+               vmcs_load(vmx->vmcs);
        }
 
        if (vcpu->cpu != cpu) {
                struct desc_ptr dt;
                unsigned long sysenter_esp;
 
+               kvm_migrate_timers(vcpu);
+               kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+               local_irq_disable();
+               list_add(&vmx->local_vcpus_link,
+                        &per_cpu(vcpus_on_cpu, cpu));
+               local_irq_enable();
+
                vcpu->cpu = cpu;
                /*
                 * Linux uses per-cpu TSS and GDT, so set these when switching
@@ -884,6 +940,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
        __vmx_load_host_state(to_vmx(vcpu));
+       if (!vmm_exclusive) {
+               __vcpu_clear(to_vmx(vcpu));
+               kvm_cpu_vmxoff();
+       }
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -1286,6 +1346,13 @@ static __init int vmx_disabled_by_bios(void)
        /* locked but not enabled */
 }
 
+static void kvm_cpu_vmxon(u64 addr)
+{
+       asm volatile (ASM_VMX_VMXON_RAX
+                       : : "a"(&addr), "m"(addr)
+                       : "memory", "cc");
+}
+
 static int hardware_enable(void *garbage)
 {
        int cpu = raw_smp_processor_id();
@@ -1308,11 +1375,13 @@ static int hardware_enable(void *garbage)
                wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
        }
        write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
-       asm volatile (ASM_VMX_VMXON_RAX
-                     : : "a"(&phys_addr), "m"(phys_addr)
-                     : "memory", "cc");
 
-       ept_sync_global();
+       if (vmm_exclusive) {
+               kvm_cpu_vmxon(phys_addr);
+               ept_sync_global();
+       }
+
+       store_gdt(&__get_cpu_var(host_gdt));
 
        return 0;
 }
@@ -1334,13 +1403,15 @@ static void vmclear_local_vcpus(void)
 static void kvm_cpu_vmxoff(void)
 {
        asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
-       write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
 static void hardware_disable(void *garbage)
 {
-       vmclear_local_vcpus();
-       kvm_cpu_vmxoff();
+       if (vmm_exclusive) {
+               vmclear_local_vcpus();
+               kvm_cpu_vmxoff();
+       }
+       write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -1539,7 +1610,8 @@ static __init int hardware_setup(void)
        if (!cpu_has_vmx_vpid())
                enable_vpid = 0;
 
-       if (!cpu_has_vmx_ept()) {
+       if (!cpu_has_vmx_ept() ||
+           !cpu_has_vmx_ept_4levels()) {
                enable_ept = 0;
                enable_unrestricted_guest = 0;
        }
@@ -1628,7 +1700,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
                gfn_t base_gfn;
 
                slots = kvm_memslots(kvm);
-               base_gfn = kvm->memslots->memslots[0].base_gfn +
+               base_gfn = slots->memslots[0].base_gfn +
                                 kvm->memslots->memslots[0].npages - 3;
                return base_gfn << PAGE_SHIFT;
        }
@@ -1759,9 +1831,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
-       vpid_sync_vcpu_all(to_vmx(vcpu));
-       if (enable_ept)
+       vpid_sync_context(to_vmx(vcpu));
+       if (enable_ept) {
+               if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+                       return;
                ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+       }
 }
 
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -2507,7 +2582,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
        vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
 
-       vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */
+       vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS);  /* 22.2.3 */
        vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
        vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
 
@@ -2599,21 +2674,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 static int init_rmode(struct kvm *kvm)
 {
+       int idx, ret = 0;
+
+       idx = srcu_read_lock(&kvm->srcu);
        if (!init_rmode_tss(kvm))
-               return 0;
+               goto exit;
        if (!init_rmode_identity_map(kvm))
-               return 0;
-       return 1;
+               goto exit;
+
+       ret = 1;
+exit:
+       srcu_read_unlock(&kvm->srcu, idx);
+       return ret;
 }
 
 static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 msr;
-       int ret, idx;
+       int ret;
 
        vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
        if (!init_rmode(vmx->vcpu.kvm)) {
                ret = -ENOMEM;
                goto out;
@@ -2630,7 +2711,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                msr |= MSR_IA32_APICBASE_BSP;
        kvm_set_apic_base(&vmx->vcpu, msr);
 
-       fx_init(&vmx->vcpu);
+       ret = fx_init(&vmx->vcpu);
+       if (ret != 0)
+               goto out;
 
        seg_setup(VCPU_SREG_CS);
        /*
@@ -2713,7 +2796,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmx_fpu_activate(&vmx->vcpu);
        update_exception_bitmap(&vmx->vcpu);
 
-       vpid_sync_vcpu_all(vmx);
+       vpid_sync_context(vmx);
 
        ret = 0;
 
@@ -2721,7 +2804,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmx->emulation_required = 0;
 
 out:
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
        return ret;
 }
 
@@ -2826,9 +2908,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
 {
        if (!cpu_has_virtual_nmis())
                return to_vmx(vcpu)->soft_vnmi_blocked;
-       else
-               return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-                         GUEST_INTR_STATE_NMI);
+       return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
 }
 
 static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
@@ -3070,7 +3150,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
        ++vcpu->stat.io_exits;
 
        if (string || in)
-               return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
+               return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
@@ -3090,11 +3170,20 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
        hypercall[2] = 0xc1;
 }
 
+static void complete_insn_gp(struct kvm_vcpu *vcpu, int err)
+{
+       if (err)
+               kvm_inject_gp(vcpu, 0);
+       else
+               skip_emulated_instruction(vcpu);
+}
+
 static int handle_cr(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification, val;
        int cr;
        int reg;
+       int err;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        cr = exit_qualification & 15;
@@ -3105,16 +3194,16 @@ static int handle_cr(struct kvm_vcpu *vcpu)
                trace_kvm_cr_write(cr, val);
                switch (cr) {
                case 0:
-                       kvm_set_cr0(vcpu, val);
-                       skip_emulated_instruction(vcpu);
+                       err = kvm_set_cr0(vcpu, val);
+                       complete_insn_gp(vcpu, err);
                        return 1;
                case 3:
-                       kvm_set_cr3(vcpu, val);
-                       skip_emulated_instruction(vcpu);
+                       err = kvm_set_cr3(vcpu, val);
+                       complete_insn_gp(vcpu, err);
                        return 1;
                case 4:
-                       kvm_set_cr4(vcpu, val);
-                       skip_emulated_instruction(vcpu);
+                       err = kvm_set_cr4(vcpu, val);
+                       complete_insn_gp(vcpu, err);
                        return 1;
                case 8: {
                                u8 cr8_prev = kvm_get_cr8(vcpu);
@@ -3321,30 +3410,25 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
        skip_emulated_instruction(vcpu);
-       /* TODO: Add support for VT-d/pass-through device */
+       kvm_emulate_wbinvd(vcpu);
        return 1;
 }
 
-static int handle_apic_access(struct kvm_vcpu *vcpu)
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
 {
-       unsigned long exit_qualification;
-       enum emulation_result er;
-       unsigned long offset;
+       u64 new_bv = kvm_read_edx_eax(vcpu);
+       u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
 
-       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-       offset = exit_qualification & 0xffful;
-
-       er = emulate_instruction(vcpu, 0, 0, 0);
-
-       if (er !=  EMULATE_DONE) {
-               printk(KERN_ERR
-                      "Fail to handle apic access vmexit! Offset is 0x%lx\n",
-                      offset);
-               return -ENOEXEC;
-       }
+       if (kvm_set_xcr(vcpu, index, new_bv) == 0)
+               skip_emulated_instruction(vcpu);
        return 1;
 }
 
+static int handle_apic_access(struct kvm_vcpu *vcpu)
+{
+       return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
+}
+
 static int handle_task_switch(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3554,13 +3638,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                        goto out;
                }
 
-               if (err != EMULATE_DONE) {
-                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-                       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-                       vcpu->run->internal.ndata = 0;
-                       ret = 0;
-                       goto out;
-               }
+               if (err != EMULATE_DONE)
+                       return 0;
 
                if (signal_pending(current))
                        goto out;
@@ -3623,6 +3702,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
+       [EXIT_REASON_XSETBV]                  = handle_xsetbv,
        [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
        [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
        [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
@@ -3656,6 +3736,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
        if (enable_ept && is_paging(vcpu))
                vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 
+       if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+               vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+               vcpu->run->fail_entry.hardware_entry_failure_reason
+                       = exit_reason;
+               return 0;
+       }
+
        if (unlikely(vmx->fail)) {
                vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                vcpu->run->fail_entry.hardware_entry_failure_reason
@@ -3861,11 +3948,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                vmx_set_interrupt_shadow(vcpu, 0);
 
-       /*
-        * Loading guest fpu may have cleared host cr0.ts
-        */
-       vmcs_writel(HOST_CR0, read_cr0());
-
        asm(
                /* Store host registers */
                "push %%"R"dx; push %%"R"bp;"
@@ -4001,6 +4083,19 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
        kmem_cache_free(kvm_vcpu_cache, vmx);
 }
 
+static inline void vmcs_init(struct vmcs *vmcs)
+{
+       u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id()));
+
+       if (!vmm_exclusive)
+               kvm_cpu_vmxon(phys_addr);
+
+       vmcs_clear(vmcs);
+
+       if (!vmm_exclusive)
+               kvm_cpu_vmxoff();
+}
+
 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 {
        int err;
@@ -4026,7 +4121,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        if (!vmx->vmcs)
                goto free_msrs;
 
-       vmcs_clear(vmx->vmcs);
+       vmcs_init(vmx->vmcs);
 
        cpu = get_cpu();
        vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -4265,6 +4360,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .rdtscp_supported = vmx_rdtscp_supported,
 
        .set_supported_cpuid = vmx_set_supported_cpuid,
+
+       .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 };
 
 static int __init vmx_init(void)