Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / arch / x86 / kvm / x86.c
index dc621f73e96b3b0aad91a1ec0948cd22af13bfd5..536b78c4af6e6f3c067ed7f0497d8a29a01b2984 100644 (file)
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
 static u32 __read_mostly tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds.  '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns.  '0' disables
+ * advancement entirely.  Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
 
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
@@ -3717,15 +3721,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
         */
        valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
        while (valid) {
-               u64 feature = valid & -valid;
-               int index = fls64(feature) - 1;
-               void *src = get_xsave_addr(xsave, feature);
+               u64 xfeature_mask = valid & -valid;
+               int xfeature_nr = fls64(xfeature_mask) - 1;
+               void *src = get_xsave_addr(xsave, xfeature_nr);
 
                if (src) {
                        u32 size, offset, ecx, edx;
-                       cpuid_count(XSTATE_CPUID, index,
+                       cpuid_count(XSTATE_CPUID, xfeature_nr,
                                    &size, &offset, &ecx, &edx);
-                       if (feature == XFEATURE_MASK_PKRU)
+                       if (xfeature_nr == XFEATURE_PKRU)
                                memcpy(dest + offset, &vcpu->arch.pkru,
                                       sizeof(vcpu->arch.pkru));
                        else
@@ -3733,7 +3737,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 
                }
 
-               valid -= feature;
+               valid -= xfeature_mask;
        }
 }
 
@@ -3760,22 +3764,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
         */
        valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
        while (valid) {
-               u64 feature = valid & -valid;
-               int index = fls64(feature) - 1;
-               void *dest = get_xsave_addr(xsave, feature);
+               u64 xfeature_mask = valid & -valid;
+               int xfeature_nr = fls64(xfeature_mask) - 1;
+               void *dest = get_xsave_addr(xsave, xfeature_nr);
 
                if (dest) {
                        u32 size, offset, ecx, edx;
-                       cpuid_count(XSTATE_CPUID, index,
+                       cpuid_count(XSTATE_CPUID, xfeature_nr,
                                    &size, &offset, &ecx, &edx);
-                       if (feature == XFEATURE_MASK_PKRU)
+                       if (xfeature_nr == XFEATURE_PKRU)
                                memcpy(&vcpu->arch.pkru, src + offset,
                                       sizeof(vcpu->arch.pkru));
                        else
                                memcpy(dest, src + offset, size);
                }
 
-               valid -= feature;
+               valid -= xfeature_mask;
        }
 }
 
@@ -6573,6 +6577,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
 
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.pio.count = 0;
+       return 1;
+}
+
 static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.pio.count = 0;
@@ -6589,12 +6599,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
        unsigned long val = kvm_rax_read(vcpu);
        int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
                                            size, port, &val, 1);
+       if (ret)
+               return ret;
 
-       if (!ret) {
+       /*
+        * Workaround userspace that relies on old KVM behavior of %rip being
+        * incremented prior to exiting to userspace to handle "OUT 0x7e".
+        */
+       if (port == 0x7e &&
+           kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+               vcpu->arch.complete_userspace_io =
+                       complete_fast_pio_out_port_0x7e;
+               kvm_skip_emulated_instruction(vcpu);
+       } else {
                vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
                vcpu->arch.complete_userspace_io = complete_fast_pio_out;
        }
-       return ret;
+       return 0;
 }
 
 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -6714,10 +6735,8 @@ static void kvm_hyperv_tsc_notifier(void)
 }
 #endif
 
-static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-                                    void *data)
+static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
 {
-       struct cpufreq_freqs *freq = data;
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
        int i, send_ipi = 0;
@@ -6761,17 +6780,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
         *
         */
 
-       if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
-               return 0;
-       if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
-               return 0;
-
-       smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+       smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
-                       if (vcpu->cpu != freq->cpu)
+                       if (vcpu->cpu != cpu)
                                continue;
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
                        if (vcpu->cpu != smp_processor_id())
@@ -6793,8 +6807,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                 * guest context is entered kvmclock will be updated,
                 * so the guest will not see stale values.
                 */
-               smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+               smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
        }
+}
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+                                    void *data)
+{
+       struct cpufreq_freqs *freq = data;
+       int cpu;
+
+       if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+               return 0;
+       if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+               return 0;
+
+       for_each_cpu(cpu, freq->policy->cpus)
+               __kvmclock_cpufreq_notifier(freq, cpu);
+
        return 0;
 }
 
@@ -7920,10 +7950,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
-       if (lapic_timer_advance_ns)
+       if (lapic_in_kernel(vcpu) &&
+           vcpu->arch.apic->lapic_timer.timer_advance_ns)
                wait_lapic_expire(vcpu);
        guest_enter_irqoff();
 
+       fpregs_assert_state_consistent();
+       if (test_thread_flag(TIF_NEED_FPU_LOAD))
+               switch_fpu_return();
+
        if (unlikely(vcpu->arch.switch_db_regs)) {
                set_debugreg(0, 7);
                set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -8182,22 +8217,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 /* Swap (qemu) user FPU context for the guest FPU context. */
 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       preempt_disable();
+       fpregs_lock();
+
        copy_fpregs_to_fpstate(&current->thread.fpu);
        /* PKRU is separately restored in kvm_x86_ops->run.  */
        __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
                                ~XFEATURE_MASK_PKRU);
-       preempt_enable();
+
+       fpregs_mark_activate();
+       fpregs_unlock();
+
        trace_kvm_fpu(1);
 }
 
 /* When vcpu_run ends, restore user space FPU context. */
 static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       preempt_disable();
+       fpregs_lock();
+
        copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
        copy_kernel_to_fpregs(&current->thread.fpu.state);
-       preempt_enable();
+
+       fpregs_mark_activate();
+       fpregs_unlock();
+
        ++vcpu->stat.fpu_reload;
        trace_kvm_fpu(0);
 }
@@ -8895,11 +8938,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
                if (init_event)
                        kvm_put_guest_fpu(vcpu);
                mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-                                       XFEATURE_MASK_BNDREGS);
+                                       XFEATURE_BNDREGS);
                if (mpx_state_buffer)
                        memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
                mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-                                       XFEATURE_MASK_BNDCSR);
+                                       XFEATURE_BNDCSR);
                if (mpx_state_buffer)
                        memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
                if (init_event)
@@ -9108,7 +9151,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        if (irqchip_in_kernel(vcpu->kvm)) {
                vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
-               r = kvm_create_lapic(vcpu);
+               r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
                if (r < 0)
                        goto fail_mmu_destroy;
        } else