KVM: x86: improve the usability of the 'kvm_pio' tracepoint
[sfrench/cifs-2.6.git] / arch / x86 / kvm / x86.c
index d1c55f8722c66cf6139222c3544c97700169c523..de0931cb3f58c01a447e80f7fd7a7f797140b8bf 100644 (file)
@@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
                return 1;
 
+       if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+               return 1;
+
        if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
                return 1;
 
@@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
            (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
                kvm_mmu_reset_context(vcpu);
 
+       if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
+               update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
+
        if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
                kvm_update_cpuid(vcpu);
 
@@ -695,26 +701,11 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                return 0;
        }
 
-       if (is_long_mode(vcpu)) {
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
-                       if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
-                               return 1;
-               } else
-                       if (cr3 & CR3_L_MODE_RESERVED_BITS)
-                               return 1;
-       } else {
-               if (is_pae(vcpu)) {
-                       if (cr3 & CR3_PAE_RESERVED_BITS)
-                               return 1;
-                       if (is_paging(vcpu) &&
-                           !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
-                               return 1;
-               }
-               /*
-                * We don't check reserved bits in nonpae mode, because
-                * this isn't enforced, and VMware depends on this.
-                */
-       }
+       if (is_long_mode(vcpu) && (cr3 & CR3_L_MODE_RESERVED_BITS))
+               return 1;
+       if (is_pae(vcpu) && is_paging(vcpu) &&
+           !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+               return 1;
 
        vcpu->arch.cr3 = cr3;
        __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
@@ -1117,7 +1108,6 @@ static inline u64 get_kernel_ns(void)
 {
        struct timespec ts;
 
-       WARN_ON(preemptible());
        ktime_get_ts(&ts);
        monotonic_to_bootbased(&ts);
        return timespec_to_ns(&ts);
@@ -2639,6 +2629,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IRQ_INJECT_STATUS:
        case KVM_CAP_IRQFD:
        case KVM_CAP_IOEVENTFD:
+       case KVM_CAP_IOEVENTFD_NO_LENGTH:
        case KVM_CAP_PIT2:
        case KVM_CAP_PIT_STATE2:
        case KVM_CAP_SET_IDENTITY_MAP_ADDR:
@@ -3641,11 +3632,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
                offset = i * BITS_PER_LONG;
                kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
        }
-       if (is_dirty)
-               kvm_flush_remote_tlbs(kvm);
 
        spin_unlock(&kvm->mmu_lock);
 
+       /* See the comments in kvm_mmu_slot_remove_write_access(). */
+       lockdep_assert_held(&kvm->slots_lock);
+
+       /*
+        * All the TLBs can be flushed out of mmu lock, see the comments in
+        * kvm_mmu_slot_remove_write_access().
+        */
+       if (is_dirty)
+               kvm_flush_remote_tlbs(kvm);
+
        r = -EFAULT;
        if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
                goto out;
@@ -4164,7 +4163,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
                | (write ? PFERR_WRITE_MASK : 0);
 
        if (vcpu_match_mmio_gva(vcpu, gva)
-           && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
+           && !permission_fault(vcpu, vcpu->arch.walk_mmu,
+                                vcpu->arch.access, access)) {
                *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
                                        (gva & (PAGE_SIZE - 1));
                trace_vcpu_match_mmio(gva, *gpa, write, false);
@@ -4480,8 +4480,6 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
                               unsigned short port, void *val,
                               unsigned int count, bool in)
 {
-       trace_kvm_pio(!in, port, size, count);
-
        vcpu->arch.pio.port = port;
        vcpu->arch.pio.in = in;
        vcpu->arch.pio.count  = count;
@@ -4516,6 +4514,7 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
        if (ret) {
 data_avail:
                memcpy(val, vcpu->arch.pio_data, size * count);
+               trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
                vcpu->arch.pio.count = 0;
                return 1;
        }
@@ -4530,6 +4529,7 @@ static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
        memcpy(vcpu->arch.pio_data, val, size * count);
+       trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
        return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
 }
 
@@ -4896,7 +4896,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        ctxt->eip = kvm_rip_read(vcpu);
        ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                     (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
-                    cs_l                               ? X86EMUL_MODE_PROT64 :
+                    (cs_l && is_long_mode(vcpu))       ? X86EMUL_MODE_PROT64 :
                     cs_db                              ? X86EMUL_MODE_PROT32 :
                                                          X86EMUL_MODE_PROT16;
        ctxt->guest_mode = is_guest_mode(vcpu);
@@ -5422,7 +5422,8 @@ static void kvm_timer_init(void)
        int cpu;
 
        max_tsc_khz = tsc_khz;
-       register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+
+       cpu_notifier_register_begin();
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_CPU_FREQ
                struct cpufreq_policy policy;
@@ -5439,6 +5440,10 @@ static void kvm_timer_init(void)
        pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
        for_each_online_cpu(cpu)
                smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
+
+       __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+       cpu_notifier_register_done();
+
 }
 
 static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -7318,8 +7323,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
        /*
         * Write protect all pages for dirty logging.
-        * Existing largepage mappings are destroyed here and new ones will
-        * not be created until the end of the logging.
+        *
+        * All the sptes including the large sptes which point to this
+        * slot are set to readonly. We can not create any new large
+        * spte on this slot until the end of the logging.
+        *
+        * See the comments in fast_page_fault().
         */
        if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
                kvm_mmu_slot_remove_write_access(kvm, mem->slot);