Merge tag 'nfs-for-4.20-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

[sfrench/cifs-2.6.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 21a5e6e4758b9810e80f1824fd1029681e9b5500..4ad2d293ddc2605d2ae44ce075de68beb93841e0 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1520,19 +1520,16 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
  }
  EXPORT_SYMBOL(vm_insert_page);
  
-static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                         pfn_t pfn, pgprot_t prot, bool mkwrite)
  {
         struct mm_struct *mm = vma->vm_mm;
-       int retval;
         pte_t *pte, entry;
         spinlock_t *ptl;
  
-       retval = -ENOMEM;
         pte = get_locked_pte(mm, addr, &ptl);
         if (!pte)
-               goto out;
-       retval = -EBUSY;
+               return VM_FAULT_OOM;
         if (!pte_none(*pte)) {
                 if (mkwrite) {
                         /*
@@ -1540,10 +1537,15 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                          * in may not match the PFN we have mapped if the
                          * mapped PFN is a writeable COW page.  In the mkwrite
                          * case we are creating a writable PTE for a shared
-                        * mapping and we expect the PFNs to match.
+                        * mapping and we expect the PFNs to match. If they
+                        * don't match, we are likely racing with block
+                        * allocation and mapping invalidation so just skip the
+                        * update.
                          */
-                       if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn)))
+                       if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) {
+                               WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
                                 goto out_unlock;
+                       }
                         entry = *pte;
                         goto out_mkwrite;
                 } else
@@ -1565,56 +1567,32 @@ out_mkwrite:
         set_pte_at(mm, addr, pte, entry);
         update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
  
-       retval = 0;
  out_unlock:
         pte_unmap_unlock(pte, ptl);
-out:
-       return retval;
-}
-
-/**
- * vm_insert_pfn - insert single pfn into user vma
- * @vma: user vma to map to
- * @addr: target user address of this page
- * @pfn: source kernel pfn
- *
- * Similar to vm_insert_page, this allows drivers to insert individual pages
- * they've allocated into a user vma. Same comments apply.
- *
- * This function should only be called from a vm_ops->fault handler, and
- * in that case the handler should return NULL.
- *
- * vma cannot be a COW mapping.
- *
- * As this is called only for pages that do not currently exist, we
- * do not need to flush old virtual caches or the TLB.
- */
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-                       unsigned long pfn)
-{
-       return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+       return VM_FAULT_NOPAGE;
  }
-EXPORT_SYMBOL(vm_insert_pfn);
  
  /**
- * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
   * @vma: user vma to map to
   * @addr: target user address of this page
   * @pfn: source kernel pfn
   * @pgprot: pgprot flags for the inserted page
   *
- * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * This is exactly like vmf_insert_pfn(), except that it allows drivers to
   * to override pgprot on a per-page basis.
   *
   * This only makes sense for IO mappings, and it makes no sense for
- * cow mappings.  In general, using multiple vmas is preferable;
- * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * COW mappings.  In general, using multiple vmas is preferable;
+ * vmf_insert_pfn_prot should only be used if using multiple VMAs is
   * impractical.
+ *
+ * Context: Process context.  May allocate using %GFP_KERNEL.
+ * Return: vm_fault_t value.
   */
-int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
                         unsigned long pfn, pgprot_t pgprot)
  {
-       int ret;
         /*
          * Technically, architectures with pte_special can avoid all these
          * restrictions (same for remap_pfn_range).  However we would like
@@ -1628,19 +1606,44 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
         BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
  
         if (addr < vma->vm_start || addr >= vma->vm_end)
-               return -EFAULT;
+               return VM_FAULT_SIGBUS;
  
         if (!pfn_modify_allowed(pfn, pgprot))
-               return -EACCES;
+               return VM_FAULT_SIGBUS;
  
         track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
  
-       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
+       return insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
                         false);
+}
+EXPORT_SYMBOL(vmf_insert_pfn_prot);
  
-       return ret;
+/**
+ * vmf_insert_pfn - insert single pfn into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ *
+ * Similar to vm_insert_page, this allows drivers to insert individual pages
+ * they've allocated into a user vma. Same comments apply.
+ *
+ * This function should only be called from a vm_ops->fault handler, and
+ * in that case the handler should return the result of this function.
+ *
+ * vma cannot be a COW mapping.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+ *
+ * Context: Process context.  May allocate using %GFP_KERNEL.
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn)
+{
+       return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
  }
-EXPORT_SYMBOL(vm_insert_pfn_prot);
+EXPORT_SYMBOL(vmf_insert_pfn);
  
  static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
  {
@@ -1656,20 +1659,21 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
         return false;
  }
  
-static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-                       pfn_t pfn, bool mkwrite)
+static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
+               unsigned long addr, pfn_t pfn, bool mkwrite)
  {
         pgprot_t pgprot = vma->vm_page_prot;
+       int err;
  
         BUG_ON(!vm_mixed_ok(vma, pfn));
  
         if (addr < vma->vm_start || addr >= vma->vm_end)
-               return -EFAULT;
+               return VM_FAULT_SIGBUS;
  
         track_pfn_insert(vma, &pgprot, pfn);
  
         if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
-               return -EACCES;
+               return VM_FAULT_SIGBUS;
  
         /*
          * If we don't have pte special, then we have to use the pfn_valid()
@@ -1688,36 +1692,35 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                  * result in pfn_t_has_page() == false.
                  */
                 page = pfn_to_page(pfn_t_to_pfn(pfn));
-               return insert_page(vma, addr, page, pgprot);
+               err = insert_page(vma, addr, page, pgprot);
+       } else {
+               return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
         }
-       return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
+
+       if (err == -ENOMEM)
+               return VM_FAULT_OOM;
+       if (err < 0 && err != -EBUSY)
+               return VM_FAULT_SIGBUS;
+
+       return VM_FAULT_NOPAGE;
  }
  
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-                       pfn_t pfn)
+vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+               pfn_t pfn)
  {
         return __vm_insert_mixed(vma, addr, pfn, false);
-
  }
-EXPORT_SYMBOL(vm_insert_mixed);
+EXPORT_SYMBOL(vmf_insert_mixed);
  
  /*
   *  If the insertion of PTE failed because someone else already added a
   *  different entry in the mean time, we treat that as success as we assume
   *  the same entry was actually inserted.
   */
-
  vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
                 unsigned long addr, pfn_t pfn)
  {
-       int err;
-
-       err =  __vm_insert_mixed(vma, addr, pfn, true);
-       if (err == -ENOMEM)
-               return VM_FAULT_OOM;
-       if (err < 0 && err != -EBUSY)
-               return VM_FAULT_SIGBUS;
-       return VM_FAULT_NOPAGE;
+       return __vm_insert_mixed(vma, addr, pfn, true);
  }
  EXPORT_SYMBOL(vmf_insert_mixed_mkwrite);
  
@@ -3498,10 +3501,36 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
         struct vm_area_struct *vma = vmf->vma;
         vm_fault_t ret;
  
-       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
-       if (!vma->vm_ops->fault)
-               ret = VM_FAULT_SIGBUS;
-       else if (!(vmf->flags & FAULT_FLAG_WRITE))
+       /*
+        * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
+        */
+       if (!vma->vm_ops->fault) {
+               /*
+                * If we find a migration pmd entry or a none pmd entry, which
+                * should never happen, return SIGBUS
+                */
+               if (unlikely(!pmd_present(*vmf->pmd)))
+                       ret = VM_FAULT_SIGBUS;
+               else {
+                       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm,
+                                                      vmf->pmd,
+                                                      vmf->address,
+                                                      &vmf->ptl);
+                       /*
+                        * Make sure this is not a temporary clearing of pte
+                        * by holding ptl and checking again. A R/M/W update
+                        * of pte involves: take ptl, clearing the pte so that
+                        * we don't have concurrent modification by hardware
+                        * followed by an update.
+                        */
+                       if (unlikely(pte_none(*vmf->pte)))
+                               ret = VM_FAULT_SIGBUS;
+                       else
+                               ret = VM_FAULT_NOPAGE;
+
+                       pte_unmap_unlock(vmf->pte, vmf->ptl);
+               }
+       } else if (!(vmf->flags & FAULT_FLAG_WRITE))
                 ret = do_read_fault(vmf);
         else if (!(vma->vm_flags & VM_SHARED))
                 ret = do_cow_fault(vmf);