Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / arch / x86 / kvm / x86.c
index 32bf19ef3115f65c9dffc23a655be2763babcaff..2b2dd030ea3be3b7d5599be94fdddded0077db9e 100644 (file)
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+               for (i = 0; i < KVM_NR_DB_REGS; i++)
+                       vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+               vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+       }
+}
+
 static void kvm_update_dr6(struct kvm_vcpu *vcpu)
 {
        if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -3149,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
        kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = dbgregs->dr7;
@@ -4114,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
        do {
                n = min(len, 8);
                if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+                   && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
                handled += n;
                addr += n;
@@ -4134,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
        do {
                n = min(len, 8);
                if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+                                        addr, n, v))
+                   && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
                handled += n;
@@ -4475,7 +4488,8 @@ mmio:
        return X86EMUL_CONTINUE;
 }
 
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+                       unsigned long addr,
                        void *val, unsigned int bytes,
                        struct x86_exception *exception,
                        const struct read_write_emulator_ops *ops)
@@ -4538,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
                                   exception, &read_emultor);
 }
 
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
                            unsigned long addr,
                            const void *val,
                            unsigned int bytes,
@@ -4629,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
        int r;
 
        if (vcpu->arch.pio.in)
-               r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+               r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
                                    vcpu->arch.pio.size, pd);
        else
-               r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+               r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
                                     vcpu->arch.pio.port, vcpu->arch.pio.size,
                                     pd);
        return r;
@@ -4705,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
        kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
        if (!need_emulate_wbinvd(vcpu))
                return X86EMUL_CONTINUE;
@@ -4722,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
                wbinvd();
        return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-       kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+       kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long *dest)
 {
        return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long value)
 {
 
        return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5816,7 +5840,7 @@ void kvm_arch_exit(void)
        free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
        if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5827,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
                return 0;
        }
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_vcpu_halt(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5903,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
        lapic_irq.dest_id = apicid;
 
        lapic_irq.delivery_mode = APIC_DM_REMRD;
-       kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+       kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5911,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        unsigned long nr, a0, a1, a2, a3, ret;
        int op_64_bit, r = 1;
 
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+
        if (kvm_hv_hypercall_enabled(vcpu->kvm))
                return kvm_hv_hypercall(vcpu);
 
@@ -6164,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 }
 
 /*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
  * userspace.
  */
@@ -6301,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                set_debugreg(vcpu->arch.eff_db[2], 2);
                set_debugreg(vcpu->arch.eff_db[3], 3);
                set_debugreg(vcpu->arch.dr6, 6);
+               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
@@ -6382,42 +6416,47 @@ out:
        return r;
 }
 
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+       if (!kvm_arch_vcpu_runnable(vcpu)) {
+               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_block(vcpu);
+               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+                       return 1;
+       }
+
+       kvm_apic_accept_events(vcpu);
+       switch(vcpu->arch.mp_state) {
+       case KVM_MP_STATE_HALTED:
+               vcpu->arch.pv.pv_unhalted = false;
+               vcpu->arch.mp_state =
+                       KVM_MP_STATE_RUNNABLE;
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.apf.halted = false;
+               break;
+       case KVM_MP_STATE_INIT_RECEIVED:
+               break;
+       default:
+               return -EINTR;
+               break;
+       }
+       return 1;
+}
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
        struct kvm *kvm = vcpu->kvm;
 
        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
-       r = 1;
-       while (r > 0) {
+       for (;;) {
                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                    !vcpu->arch.apf.halted)
                        r = vcpu_enter_guest(vcpu);
-               else {
-                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-                       kvm_vcpu_block(vcpu);
-                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-                       if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
-                               kvm_apic_accept_events(vcpu);
-                               switch(vcpu->arch.mp_state) {
-                               case KVM_MP_STATE_HALTED:
-                                       vcpu->arch.pv.pv_unhalted = false;
-                                       vcpu->arch.mp_state =
-                                               KVM_MP_STATE_RUNNABLE;
-                               case KVM_MP_STATE_RUNNABLE:
-                                       vcpu->arch.apf.halted = false;
-                                       break;
-                               case KVM_MP_STATE_INIT_RECEIVED:
-                                       break;
-                               default:
-                                       r = -EINTR;
-                                       break;
-                               }
-                       }
-               }
-
+               else
+                       r = vcpu_block(kvm, vcpu);
                if (r <= 0)
                        break;
 
@@ -6429,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.request_irq_exits;
+                       break;
                }
 
                kvm_check_async_pf_completion(vcpu);
@@ -6437,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.signal_exits;
+                       break;
                }
                if (need_resched()) {
                        srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6568,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
-       r = __vcpu_run(vcpu);
+       r = vcpu_run(vcpu);
 
 out:
        post_kvm_run_save(vcpu);
@@ -7075,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
        kvm_clear_exception_queue(vcpu);
 
        memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = DR6_INIT;
        kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = DR7_FIXED_1;
        kvm_update_dr7(vcpu);
 
+       vcpu->arch.cr2 = 0;
+
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        vcpu->arch.apf.msr_val = 0;
        vcpu->arch.st.msr_val = 0;
@@ -7240,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.pv.pv_unhalted = false;
        vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        else
                vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7288,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        vcpu->arch.guest_supported_xcr0 = 0;
        vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
@@ -7428,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
        for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
                if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
-                       kvm_kvfree(free->arch.rmap[i]);
+                       kvfree(free->arch.rmap[i]);
                        free->arch.rmap[i] = NULL;
                }
                if (i == 0)
@@ -7436,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
                if (!dont || free->arch.lpage_info[i - 1] !=
                             dont->arch.lpage_info[i - 1]) {
-                       kvm_kvfree(free->arch.lpage_info[i - 1]);
+                       kvfree(free->arch.lpage_info[i - 1]);
                        free->arch.lpage_info[i - 1] = NULL;
                }
        }
@@ -7490,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 
 out_free:
        for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-               kvm_kvfree(slot->arch.rmap[i]);
+               kvfree(slot->arch.rmap[i]);
                slot->arch.rmap[i] = NULL;
                if (i == 0)
                        continue;
 
-               kvm_kvfree(slot->arch.lpage_info[i - 1]);
+               kvfree(slot->arch.lpage_info[i - 1]);
                slot->arch.lpage_info[i - 1] = NULL;
        }
        return -ENOMEM;
@@ -7617,6 +7663,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
        /* It's OK to get 'new' slot here as it has already been installed */
        new = id_to_memslot(kvm->memslots, mem->slot);
 
+       /*
+        * Dirty logging tracks sptes in 4k granularity, meaning that large
+        * sptes have to be split.  If live migration is successful, the guest
+        * in the source machine will be destroyed and large sptes will be
+        * created in the destination. However, if the guest continues to run
+        * in the source machine (for example if live migration fails), small
+        * sptes will remain around and cause bad performance.
+        *
+        * Scan sptes if dirty logging has been stopped, dropping those
+        * which can be collapsed into a single large-page spte.  Later
+        * page faults will create the large-page sptes.
+        */
+       if ((change != KVM_MR_DELETE) &&
+               (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+               !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+
        /*
         * Set up write protection and/or dirty logging for the new slot.
         *