KVM: PPC: Book3S HV: Avoid touching arch.mmu_ready in XIVE release functions
authorPaul Mackerras <paulus@ozlabs.org>
Thu, 23 May 2019 06:35:07 +0000 (16:35 +1000)
committerPaul Mackerras <paulus@ozlabs.org>
Wed, 29 May 2019 03:44:36 +0000 (13:44 +1000)
Currently, kvmppc_xive_release() and kvmppc_xive_native_release() clear
kvm->arch.mmu_ready and call kick_all_cpus_sync() as a way of ensuring
that no vcpus are executing in the guest.  However, future patches will
change the mutex associated with kvm->arch.mmu_ready to a new mutex that
nests inside the vcpu mutexes, making it difficult to continue to use
this method.

In fact, taking the vcpu mutex for a vcpu excludes execution of that
vcpu, and we already take the vcpu mutex around the call to
kvmppc_xive_[native_]cleanup_vcpu().  Once the cleanup function is
done and we release the vcpu mutex, the vcpu can execute once again,
but because we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type,
vcpu->arch.xive_esc_vaddr and vcpu->arch.xive_esc_raddr, that vcpu will
not be going into XIVE code any more.  Thus, once we have cleaned up
all of the vcpus, we are safe to clean up the rest of the XIVE state,
and we don't need to use kvm->arch.mmu_ready to hold off vcpu execution.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive_native.c

index 4953957333b7812b2c154c5140b6f07776a95f11..f623451ec0a3b684444c7d4f2c201c8d2eb2ea91 100644 (file)
@@ -1859,21 +1859,10 @@ static void kvmppc_xive_release(struct kvm_device *dev)
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
        int i;
-       int was_ready;
 
        pr_devel("Releasing xive device\n");
 
-       debugfs_remove(xive->dentry);
-
        /*
-        * Clearing mmu_ready temporarily while holding kvm->lock
-        * is a way of ensuring that no vcpus can enter the guest
-        * until we drop kvm->lock.  Doing kick_all_cpus_sync()
-        * ensures that any vcpu executing inside the guest has
-        * exited the guest.  Once kick_all_cpus_sync() has finished,
-        * we know that no vcpu can be executing the XIVE push or
-        * pull code, or executing a XICS hcall.
-        *
         * Since this is the device release function, we know that
         * userspace does not have any open fd referring to the
         * device.  Therefore there can not be any of the device
@@ -1881,9 +1870,8 @@ static void kvmppc_xive_release(struct kvm_device *dev)
         * and similarly, the connect_vcpu and set/clr_mapped
         * functions also cannot be being executed.
         */
-       was_ready = kvm->arch.mmu_ready;
-       kvm->arch.mmu_ready = 0;
-       kick_all_cpus_sync();
+
+       debugfs_remove(xive->dentry);
 
        /*
         * We should clean up the vCPU interrupt presenters first.
@@ -1892,12 +1880,22 @@ static void kvmppc_xive_release(struct kvm_device *dev)
                /*
                 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
                 * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
+                * Holding the vcpu->mutex also means that the vcpu cannot
+                * be executing the KVM_RUN ioctl, and therefore it cannot
+                * be executing the XIVE push or pull code or accessing
+                * the XIVE MMIO regions.
                 */
                mutex_lock(&vcpu->mutex);
                kvmppc_xive_cleanup_vcpu(vcpu);
                mutex_unlock(&vcpu->mutex);
        }
 
+       /*
+        * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
+        * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
+        * against xive code getting called during vcpu execution or
+        * set/get one_reg operations.
+        */
        kvm->arch.xive = NULL;
 
        /* Mask and free interrupts */
@@ -1911,8 +1909,6 @@ static void kvmppc_xive_release(struct kvm_device *dev)
        if (xive->vp_base != XIVE_INVALID_VP)
                xive_native_free_vp_block(xive->vp_base);
 
-       kvm->arch.mmu_ready = was_ready;
-
        /*
         * A reference of the kvmppc_xive pointer is now kept under
         * the xive_devices struct of the machine for reuse. It is
index 6a8e698c4b6ee4e501d5ac99788a8b2f0bf0ff22..da31dd05fd72fcb346fb6bf4987c780d5d435812 100644 (file)
@@ -973,21 +973,10 @@ static void kvmppc_xive_native_release(struct kvm_device *dev)
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
        int i;
-       int was_ready;
-
-       debugfs_remove(xive->dentry);
 
        pr_devel("Releasing xive native device\n");
 
        /*
-        * Clearing mmu_ready temporarily while holding kvm->lock
-        * is a way of ensuring that no vcpus can enter the guest
-        * until we drop kvm->lock.  Doing kick_all_cpus_sync()
-        * ensures that any vcpu executing inside the guest has
-        * exited the guest.  Once kick_all_cpus_sync() has finished,
-        * we know that no vcpu can be executing the XIVE push or
-        * pull code or accessing the XIVE MMIO regions.
-        *
         * Since this is the device release function, we know that
         * userspace does not have any open fd or mmap referring to
         * the device.  Therefore there can not be any of the
@@ -996,9 +985,8 @@ static void kvmppc_xive_native_release(struct kvm_device *dev)
         * connect_vcpu and set/clr_mapped functions also cannot
         * be being executed.
         */
-       was_ready = kvm->arch.mmu_ready;
-       kvm->arch.mmu_ready = 0;
-       kick_all_cpus_sync();
+
+       debugfs_remove(xive->dentry);
 
        /*
         * We should clean up the vCPU interrupt presenters first.
@@ -1007,12 +995,22 @@ static void kvmppc_xive_native_release(struct kvm_device *dev)
                /*
                 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
                 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
+                * Holding the vcpu->mutex also means that the vcpu cannot
+                * be executing the KVM_RUN ioctl, and therefore it cannot
+                * be executing the XIVE push or pull code or accessing
+                * the XIVE MMIO regions.
                 */
                mutex_lock(&vcpu->mutex);
                kvmppc_xive_native_cleanup_vcpu(vcpu);
                mutex_unlock(&vcpu->mutex);
        }
 
+       /*
+        * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
+        * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
+        * against xive code getting called during vcpu execution or
+        * set/get one_reg operations.
+        */
        kvm->arch.xive = NULL;
 
        for (i = 0; i <= xive->max_sbid; i++) {
@@ -1025,8 +1023,6 @@ static void kvmppc_xive_native_release(struct kvm_device *dev)
        if (xive->vp_base != XIVE_INVALID_VP)
                xive_native_free_vp_block(xive->vp_base);
 
-       kvm->arch.mmu_ready = was_ready;
-
        /*
         * A reference of the kvmppc_xive pointer is now kept under
         * the xive_devices struct of the machine for reuse. It is