KVM: arm/arm64: Fix unintended stage 2 PMD mappings

author Christoffer Dall <christoffer.dall@arm.com>

Fri, 2 Nov 2018 07:53:22 +0000 (08:53 +0100)

committer Marc Zyngier <marc.zyngier@arm.com>

Wed, 19 Dec 2018 17:47:52 +0000 (17:47 +0000)
author Christoffer Dall <christoffer.dall@arm.com>
Fri, 2 Nov 2018 07:53:22 +0000 (08:53 +0100)
committer Marc Zyngier <marc.zyngier@arm.com>
Wed, 19 Dec 2018 17:47:52 +0000 (17:47 +0000)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c

index f605514395a1e81d539a8165da5845e451bdd9e3..dee3dbd98712450ce387bdac5a521c3d7d05bb09 100644 (file)
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1595,6 +1595,63 @@ static void kvm_send_hwpoison_signal(unsigned long address,
         send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
  }
  
+static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
+                                              unsigned long hva)
+{
+       gpa_t gpa_start, gpa_end;
+       hva_t uaddr_start, uaddr_end;
+       size_t size;
+
+       size = memslot->npages * PAGE_SIZE;
+
+       gpa_start = memslot->base_gfn << PAGE_SHIFT;
+       gpa_end = gpa_start + size;
+
+       uaddr_start = memslot->userspace_addr;
+       uaddr_end = uaddr_start + size;
+
+       /*
+        * Pages belonging to memslots that don't have the same alignment
+        * within a PMD for userspace and IPA cannot be mapped with stage-2
+        * PMD entries, because we'll end up mapping the wrong pages.
+        *
+        * Consider a layout like the following:
+        *
+        *    memslot->userspace_addr:
+        *    +-----+--------------------+--------------------+---+
+        *    |abcde|fgh  Stage-1 PMD    |    Stage-1 PMD   tv|xyz|
+        *    +-----+--------------------+--------------------+---+
+        *
+        *    memslot->base_gfn << PAGE_SIZE:
+        *      +---+--------------------+--------------------+-----+
+        *      |abc|def  Stage-2 PMD    |    Stage-2 PMD     |tvxyz|
+        *      +---+--------------------+--------------------+-----+
+        *
+        * If we create those stage-2 PMDs, we'll end up with this incorrect
+        * mapping:
+        *   d -> f
+        *   e -> g
+        *   f -> h
+        */
+       if ((gpa_start & ~S2_PMD_MASK) != (uaddr_start & ~S2_PMD_MASK))
+               return false;
+
+       /*
+        * Next, let's make sure we're not trying to map anything not covered
+        * by the memslot. This means we have to prohibit PMD size mappings
+        * for the beginning and end of a non-PMD aligned and non-PMD sized
+        * memory slot (illustrated by the head and tail parts of the
+        * userspace view above containing pages 'abcde' and 'xyz',
+        * respectively).
+        *
+        * Note that it doesn't matter if we do the check using the
+        * userspace_addr or the base_gfn, as both are equally aligned (per
+        * the check above) and equally sized.
+        */
+       return (hva & S2_PMD_MASK) >= uaddr_start &&
+              (hva & S2_PMD_MASK) + S2_PMD_SIZE <= uaddr_end;
+}
+
  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                           struct kvm_memory_slot *memslot, unsigned long hva,
                           unsigned long fault_status)
@@ -1621,6 +1678,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                 return -EFAULT;
         }
  
+       if (!fault_supports_stage2_pmd_mappings(memslot, hva))
+               force_pte = true;
+
+       if (logging_active)
+               force_pte = true;
+
         /* Let's check if we will get back a huge page backed by hugetlbfs */
         down_read(&current->mm->mmap_sem);
         vma = find_vma_intersection(current->mm, hva, hva + 1);
@@ -1637,28 +1700,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
          */
         if ((vma_pagesize == PMD_SIZE ||
              (vma_pagesize == PUD_SIZE && kvm_stage2_has_pud(kvm))) &&
-           !logging_active) {
+           !force_pte) {
                 gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
-       } else {
-               /*
-                * Fallback to PTE if it's not one of the Stage 2
-                * supported hugepage sizes or the corresponding level
-                * doesn't exist
-                */
-               vma_pagesize = PAGE_SIZE;
-
-               /*
-                * Pages belonging to memslots that don't have the same
-                * alignment for userspace and IPA cannot be mapped using
-                * block descriptors even if the pages belong to a THP for
-                * the process, because the stage-2 block descriptor will
-                * cover more than a single THP and we loose atomicity for
-                * unmapping, updates, and splits of the THP or other pages
-                * in the stage-2 block range.
-                */
-               if ((memslot->userspace_addr & ~PMD_MASK) !=
-                   ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
-                       force_pte = true;
         }
         up_read(&current->mm->mmap_sem);
  
@@ -1697,7 +1740,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                  * should not be mapped with huge pages (it introduces churn
                  * and performance degradation), so force a pte mapping.
                  */
-               force_pte = true;
                 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
  
                 /*
author	Christoffer Dall <christoffer.dall@arm.com>
	Fri, 2 Nov 2018 07:53:22 +0000 (08:53 +0100)
committer	Marc Zyngier <marc.zyngier@arm.com>
	Wed, 19 Dec 2018 17:47:52 +0000 (17:47 +0000)