Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...

[sfrench/cifs-2.6.git] / mm / mempolicy.c
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index f48693f75b37a0ece6ba8bf9c7ebb9a25ce3a7fd..65e0874fce1736a65f8ced74f2a94d3e23d718c9 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
         },
  };
  
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
                                 unsigned long flags);
  
  struct queue_pages {
@@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page,
  }
  
  /*
- * queue_pages_pmd() has three possible return values:
- * 1 - pages are placed on the right node or queued successfully.
- * 0 - THP was split.
- * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
- *        page was already on a node that does not follow the policy.
+ * queue_pages_pmd() has four possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * 2 - THP was split.
+ * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+ *        existing page was already on a node that does not follow the
+ *        policy.
   */
  static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
                                 unsigned long end, struct mm_walk *walk)
@@ -451,23 +454,20 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
         if (is_huge_zero_page(page)) {
                 spin_unlock(ptl);
                 __split_huge_pmd(walk->vma, pmd, addr, false, NULL);
+               ret = 2;
                 goto out;
         }
-       if (!queue_pages_required(page, qp)) {
-               ret = 1;
+       if (!queue_pages_required(page, qp))
                 goto unlock;
-       }
  
-       ret = 1;
         flags = qp->flags;
         /* go to thp migration */
         if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-               if (!vma_migratable(walk->vma)) {
-                       ret = -EIO;
+               if (!vma_migratable(walk->vma) ||
+                   migrate_page_add(page, qp->pagelist, flags)) {
+                       ret = 1;
                         goto unlock;
                 }
-
-               migrate_page_add(page, qp->pagelist, flags);
         } else
                 ret = -EIO;
  unlock:
@@ -479,6 +479,13 @@ out:
  /*
   * Scan through pages checking if pages follow certain conditions,
   * and move them to the pagelist if they do.
+ *
+ * queue_pages_pte_range() has three possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
+ *        on a node that does not follow the policy.
   */
  static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
                         unsigned long end, struct mm_walk *walk)
@@ -488,17 +495,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
         struct queue_pages *qp = walk->private;
         unsigned long flags = qp->flags;
         int ret;
+       bool has_unmovable = false;
         pte_t *pte;
         spinlock_t *ptl;
  
         ptl = pmd_trans_huge_lock(pmd, vma);
         if (ptl) {
                 ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
-               if (ret > 0)
-                       return 0;
-               else if (ret < 0)
+               if (ret != 2)
                         return ret;
         }
+       /* THP was split, fall through to pte walk */
  
         if (pmd_trans_unstable(pmd))
                 return 0;
@@ -519,14 +526,28 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
                 if (!queue_pages_required(page, qp))
                         continue;
                 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-                       if (!vma_migratable(vma))
+                       /* MPOL_MF_STRICT must be specified if we get here */
+                       if (!vma_migratable(vma)) {
+                               has_unmovable = true;
                                 break;
-                       migrate_page_add(page, qp->pagelist, flags);
+                       }
+
+                       /*
+                        * Do not abort immediately since there may be
+                        * temporary off LRU pages in the range.  Still
+                        * need migrate other LRU pages.
+                        */
+                       if (migrate_page_add(page, qp->pagelist, flags))
+                               has_unmovable = true;
                 } else
                         break;
         }
         pte_unmap_unlock(pte - 1, ptl);
         cond_resched();
+
+       if (has_unmovable)
+               return 1;
+
         return addr != end ? -EIO : 0;
  }
  
@@ -639,7 +660,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
   *
   * If pages found in a given range are on a set of nodes (determined by
   * @nodes and @flags,) it's isolated and queued to the pagelist which is
- * passed via @private.)
+ * passed via @private.
+ *
+ * queue_pages_range() has three possible return values:
+ * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * 0 - queue pages successfully or no misplaced page.
+ * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
   */
  static int
  queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -940,7 +967,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
  /*
   * page migration, thp tail pages can be passed.
   */
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
                                 unsigned long flags)
  {
         struct page *head = compound_head(page);
@@ -953,8 +980,19 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
                         mod_node_page_state(page_pgdat(head),
                                 NR_ISOLATED_ANON + page_is_file_cache(head),
                                 hpage_nr_pages(head));
+               } else if (flags & MPOL_MF_STRICT) {
+                       /*
+                        * Non-movable page may reach here.  And, there may be
+                        * temporary off LRU pages or non-LRU movable pages.
+                        * Treat them as unmovable pages since they can't be
+                        * isolated, so they can't be moved at the moment.  It
+                        * should return -EIO for this case too.
+                        */
+                       return -EIO;
                 }
         }
+
+       return 0;
  }
  
  /* page allocation callback for NUMA node migration */
@@ -1142,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
         } else if (PageTransHuge(page)) {
                 struct page *thp;
  
-               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-                                        HPAGE_PMD_ORDER);
+               thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+                               address, numa_node_id());
                 if (!thp)
                         return NULL;
                 prep_transhuge_page(thp);
@@ -1157,9 +1195,10 @@ static struct page *new_page(struct page *page, unsigned long start)
  }
  #else
  
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
                                 unsigned long flags)
  {
+       return -EIO;
  }
  
  int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
@@ -1182,6 +1221,7 @@ static long do_mbind(unsigned long start, unsigned long len,
         struct mempolicy *new;
         unsigned long end;
         int err;
+       int ret;
         LIST_HEAD(pagelist);
  
         if (flags & ~(unsigned long)MPOL_MF_VALID)
@@ -1243,10 +1283,15 @@ static long do_mbind(unsigned long start, unsigned long len,
         if (err)
                 goto mpol_out;
  
-       err = queue_pages_range(mm, start, end, nmask,
+       ret = queue_pages_range(mm, start, end, nmask,
                           flags | MPOL_MF_INVERT, &pagelist);
-       if (!err)
-               err = mbind_range(mm, start, end, new);
+
+       if (ret < 0) {
+               err = -EIO;
+               goto up_out;
+       }
+
+       err = mbind_range(mm, start, end, new);
  
         if (!err) {
                 int nr_failed = 0;
@@ -1259,13 +1304,14 @@ static long do_mbind(unsigned long start, unsigned long len,
                                 putback_movable_pages(&pagelist);
                 }
  
-               if (nr_failed && (flags & MPOL_MF_STRICT))
+               if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
                         err = -EIO;
         } else
                 putback_movable_pages(&pagelist);
  
+up_out:
         up_write(&mm->mmap_sem);
- mpol_out:
+mpol_out:
         mpol_put(new);
         return err;
  }
@@ -1688,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
   * freeing by another task.  It is the caller's responsibility to free the
   * extra reference for shared policies.
   */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                                 unsigned long addr)
  {
         struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2037,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   *     @vma:  Pointer to VMA or NULL if not available.
   *     @addr: Virtual Address of the allocation. Must be inside the VMA.
   *     @node: Which node to prefer for allocation (modulo policy).
- *     @hugepage: for hugepages try only the preferred node if possible
   *
   *     This function allocates a page from the kernel page pool and applies
   *     a NUMA policy associated with the VMA or the current process.
@@ -2048,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   */
  struct page *
  alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node, bool hugepage)
+               unsigned long addr, int node)
  {
         struct mempolicy *pol;
         struct page *page;
@@ -2066,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
                 goto out;
         }
  
-       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-               int hpage_node = node;
-
-               /*
-                * For hugepage allocation and non-interleave policy which
-                * allows the current node (or other explicitly preferred
-                * node) we only try to allocate from the current/preferred
-                * node and don't fall back to other nodes, as the cost of
-                * remote accesses would likely offset THP benefits.
-                *
-                * If the policy is interleave, or does not allow the current
-                * node in its nodemask, we allocate the standard way.
-                */
-               if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
-                       hpage_node = pol->v.preferred_node;
-
-               nmask = policy_nodemask(gfp, pol);
-               if (!nmask || node_isset(hpage_node, *nmask)) {
-                       mpol_cond_put(pol);
-                       page = __alloc_pages_node(hpage_node,
-                                               gfp | __GFP_THISNODE, order);
-                       goto out;
-               }
-       }
-
         nmask = policy_nodemask(gfp, pol);
         preferred_nid = policy_node(gfp, pol, node);
         page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);