Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

[sfrench/cifs-2.6.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 32bf19ef3115f65c9dffc23a655be2763babcaff..2b2dd030ea3be3b7d5599be94fdddded0077db9e 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
  }
  EXPORT_SYMBOL_GPL(kvm_get_cr8);
  
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+               for (i = 0; i < KVM_NR_DB_REGS; i++)
+                       vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+               vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+       }
+}
+
  static void kvm_update_dr6(struct kvm_vcpu *vcpu)
  {
         if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -3149,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                 return -EINVAL;
  
         memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
         vcpu->arch.dr6 = dbgregs->dr6;
         kvm_update_dr6(vcpu);
         vcpu->arch.dr7 = dbgregs->dr7;
@@ -4114,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
         do {
                 n = min(len, 8);
                 if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+                   && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
                         break;
                 handled += n;
                 addr += n;
@@ -4134,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
         do {
                 n = min(len, 8);
                 if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+                                        addr, n, v))
+                   && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                         break;
                 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
                 handled += n;
@@ -4475,7 +4488,8 @@ mmio:
         return X86EMUL_CONTINUE;
  }
  
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+                       unsigned long addr,
                         void *val, unsigned int bytes,
                         struct x86_exception *exception,
                         const struct read_write_emulator_ops *ops)
@@ -4538,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
                                    exception, &read_emultor);
  }
  
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
                             unsigned long addr,
                             const void *val,
                             unsigned int bytes,
@@ -4629,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
         int r;
  
         if (vcpu->arch.pio.in)
-               r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+               r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
                                     vcpu->arch.pio.size, pd);
         else
-               r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+               r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
                                      vcpu->arch.pio.port, vcpu->arch.pio.size,
                                      pd);
         return r;
@@ -4705,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
         kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
  }
  
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
  {
         if (!need_emulate_wbinvd(vcpu))
                 return X86EMUL_CONTINUE;
@@ -4722,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
                 wbinvd();
         return X86EMUL_CONTINUE;
  }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_emulate_wbinvd_noskip(vcpu);
+}
  EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
  
+
+
  static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
  {
-       kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+       kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
  }
  
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long *dest)
  {
         return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
  }
  
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long value)
  {
  
         return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5816,7 +5840,7 @@ void kvm_arch_exit(void)
         free_percpu(shared_msrs);
  }
  
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
  {
         ++vcpu->stat.halt_exits;
         if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5827,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
                 return 0;
         }
  }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_vcpu_halt(vcpu);
+}
  EXPORT_SYMBOL_GPL(kvm_emulate_halt);
  
  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5903,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
         lapic_irq.dest_id = apicid;
  
         lapic_irq.delivery_mode = APIC_DM_REMRD;
-       kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+       kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
  }
  
  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5911,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
         unsigned long nr, a0, a1, a2, a3, ret;
         int op_64_bit, r = 1;
  
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+
         if (kvm_hv_hypercall_enabled(vcpu->kvm))
                 return kvm_hv_hypercall(vcpu);
  
@@ -6164,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  }
  
  /*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
   * exiting to the userspace.  Otherwise, the value will be returned to the
   * userspace.
   */
@@ -6301,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 set_debugreg(vcpu->arch.eff_db[2], 2);
                 set_debugreg(vcpu->arch.eff_db[3], 3);
                 set_debugreg(vcpu->arch.dr6, 6);
+               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
         }
  
         trace_kvm_entry(vcpu->vcpu_id);
@@ -6382,42 +6416,47 @@ out:
         return r;
  }
  
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+       if (!kvm_arch_vcpu_runnable(vcpu)) {
+               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_block(vcpu);
+               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+                       return 1;
+       }
+
+       kvm_apic_accept_events(vcpu);
+       switch(vcpu->arch.mp_state) {
+       case KVM_MP_STATE_HALTED:
+               vcpu->arch.pv.pv_unhalted = false;
+               vcpu->arch.mp_state =
+                       KVM_MP_STATE_RUNNABLE;
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.apf.halted = false;
+               break;
+       case KVM_MP_STATE_INIT_RECEIVED:
+               break;
+       default:
+               return -EINTR;
+               break;
+       }
+       return 1;
+}
  
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_run(struct kvm_vcpu *vcpu)
  {
         int r;
         struct kvm *kvm = vcpu->kvm;
  
         vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
  
-       r = 1;
-       while (r > 0) {
+       for (;;) {
                 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                     !vcpu->arch.apf.halted)
                         r = vcpu_enter_guest(vcpu);
-               else {
-                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-                       kvm_vcpu_block(vcpu);
-                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-                       if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
-                               kvm_apic_accept_events(vcpu);
-                               switch(vcpu->arch.mp_state) {
-                               case KVM_MP_STATE_HALTED:
-                                       vcpu->arch.pv.pv_unhalted = false;
-                                       vcpu->arch.mp_state =
-                                               KVM_MP_STATE_RUNNABLE;
-                               case KVM_MP_STATE_RUNNABLE:
-                                       vcpu->arch.apf.halted = false;
-                                       break;
-                               case KVM_MP_STATE_INIT_RECEIVED:
-                                       break;
-                               default:
-                                       r = -EINTR;
-                                       break;
-                               }
-                       }
-               }
-
+               else
+                       r = vcpu_block(kvm, vcpu);
                 if (r <= 0)
                         break;
  
@@ -6429,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                         r = -EINTR;
                         vcpu->run->exit_reason = KVM_EXIT_INTR;
                         ++vcpu->stat.request_irq_exits;
+                       break;
                 }
  
                 kvm_check_async_pf_completion(vcpu);
@@ -6437,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                         r = -EINTR;
                         vcpu->run->exit_reason = KVM_EXIT_INTR;
                         ++vcpu->stat.signal_exits;
+                       break;
                 }
                 if (need_resched()) {
                         srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6568,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         } else
                 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
  
-       r = __vcpu_run(vcpu);
+       r = vcpu_run(vcpu);
  
  out:
         post_kvm_run_save(vcpu);
@@ -7075,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
         kvm_clear_exception_queue(vcpu);
  
         memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
         vcpu->arch.dr6 = DR6_INIT;
         kvm_update_dr6(vcpu);
         vcpu->arch.dr7 = DR7_FIXED_1;
         kvm_update_dr7(vcpu);
  
+       vcpu->arch.cr2 = 0;
+
         kvm_make_request(KVM_REQ_EVENT, vcpu);
         vcpu->arch.apf.msr_val = 0;
         vcpu->arch.st.msr_val = 0;
@@ -7240,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  
         vcpu->arch.pv.pv_unhalted = false;
         vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
         else
                 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7288,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
         vcpu->arch.guest_supported_xcr0 = 0;
         vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
  
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
         kvm_async_pf_hash_reset(vcpu);
         kvm_pmu_init(vcpu);
  
@@ -7428,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
  
         for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
                 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
-                       kvm_kvfree(free->arch.rmap[i]);
+                       kvfree(free->arch.rmap[i]);
                         free->arch.rmap[i] = NULL;
                 }
                 if (i == 0)
@@ -7436,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
  
                 if (!dont || free->arch.lpage_info[i - 1] !=
                              dont->arch.lpage_info[i - 1]) {
-                       kvm_kvfree(free->arch.lpage_info[i - 1]);
+                       kvfree(free->arch.lpage_info[i - 1]);
                         free->arch.lpage_info[i - 1] = NULL;
                 }
         }
@@ -7490,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
  
  out_free:
         for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-               kvm_kvfree(slot->arch.rmap[i]);
+               kvfree(slot->arch.rmap[i]);
                 slot->arch.rmap[i] = NULL;
                 if (i == 0)
                         continue;
  
-               kvm_kvfree(slot->arch.lpage_info[i - 1]);
+               kvfree(slot->arch.lpage_info[i - 1]);
                 slot->arch.lpage_info[i - 1] = NULL;
         }
         return -ENOMEM;
@@ -7617,6 +7663,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         /* It's OK to get 'new' slot here as it has already been installed */
         new = id_to_memslot(kvm->memslots, mem->slot);
  
+       /*
+        * Dirty logging tracks sptes in 4k granularity, meaning that large
+        * sptes have to be split.  If live migration is successful, the guest
+        * in the source machine will be destroyed and large sptes will be
+        * created in the destination. However, if the guest continues to run
+        * in the source machine (for example if live migration fails), small
+        * sptes will remain around and cause bad performance.
+        *
+        * Scan sptes if dirty logging has been stopped, dropping those
+        * which can be collapsed into a single large-page spte.  Later
+        * page faults will create the large-page sptes.
+        */
+       if ((change != KVM_MR_DELETE) &&
+               (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+               !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+
         /*
          * Set up write protection and/or dirty logging for the new slot.
          *