mm, hugetlb: unclutter hugetlb allocation layers

author Michal Hocko <mhocko@suse.com>

Mon, 10 Jul 2017 22:49:08 +0000 (15:49 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 10 Jul 2017 23:32:32 +0000 (16:32 -0700)
author Michal Hocko <mhocko@suse.com>
Mon, 10 Jul 2017 22:49:08 +0000 (15:49 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Jul 2017 23:32:32 +0000 (16:32 -0700)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 8fd0725d3f304caf3658e11732b698b3a9a856ee..66b621469f52fcea02441d693af7fb3aaaf96e19 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,7 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
  struct page *alloc_huge_page_node(struct hstate *h, int nid);
  struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
                                 unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask);
+struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask);
  int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                         pgoff_t idx);
  
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 9077865818121e4c5ed8bf9400ac2f8dede0d1ef..fd6e0c50f949b5e87b851aa9ccfdeed7a2f17389 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1521,82 +1521,19 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
         return rc;
  }
  
-/*
- * There are 3 ways this can get called:
- * 1. With vma+addr: we use the VMA's memory policy
- * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
- *    page from any node, and let the buddy allocator itself figure
- *    it out.
- * 3. With !vma, but nid!=NUMA_NO_NODE.  We allocate a huge page
- *    strictly from 'nid'
- */
  static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
-               struct vm_area_struct *vma, unsigned long addr, int nid)
+               gfp_t gfp_mask, int nid, nodemask_t *nmask)
  {
         int order = huge_page_order(h);
-       gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
-       unsigned int cpuset_mems_cookie;
  
-       /*
-        * We need a VMA to get a memory policy.  If we do not
-        * have one, we use the 'nid' argument.
-        *
-        * The mempolicy stuff below has some non-inlined bits
-        * and calls ->vm_ops.  That makes it hard to optimize at
-        * compile-time, even when NUMA is off and it does
-        * nothing.  This helps the compiler optimize it out.
-        */
-       if (!IS_ENABLED(CONFIG_NUMA) || !vma) {
-               /*
-                * If a specific node is requested, make sure to
-                * get memory from there, but only when a node
-                * is explicitly specified.
-                */
-               if (nid != NUMA_NO_NODE)
-                       gfp |= __GFP_THISNODE;
-               /*
-                * Make sure to call something that can handle
-                * nid=NUMA_NO_NODE
-                */
-               return alloc_pages_node(nid, gfp, order);
-       }
-
-       /*
-        * OK, so we have a VMA.  Fetch the mempolicy and try to
-        * allocate a huge page with it.  We will only reach this
-        * when CONFIG_NUMA=y.
-        */
-       do {
-               struct page *page;
-               struct mempolicy *mpol;
-               int nid;
-               nodemask_t *nodemask;
-
-               cpuset_mems_cookie = read_mems_allowed_begin();
-               nid = huge_node(vma, addr, gfp, &mpol, &nodemask);
-               mpol_cond_put(mpol);
-               page = __alloc_pages_nodemask(gfp, order, nid, nodemask);
-               if (page)
-                       return page;
-       } while (read_mems_allowed_retry(cpuset_mems_cookie));
-
-       return NULL;
+       gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+       if (nid == NUMA_NO_NODE)
+               nid = numa_mem_id();
+       return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
  }
  
-/*
- * There are two ways to allocate a huge page:
- * 1. When you have a VMA and an address (like a fault)
- * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
- *
- * 'vma' and 'addr' are only for (1).  'nid' is always NUMA_NO_NODE in
- * this case which signifies that the allocation should be done with
- * respect for the VMA's memory policy.
- *
- * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
- * implies that memory policies will not be taken in to account.
- */
-static struct page *__alloc_buddy_huge_page(struct hstate *h,
-               struct vm_area_struct *vma, unsigned long addr, int nid)
+static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
+               int nid, nodemask_t *nmask)
  {
         struct page *page;
         unsigned int r_nid;
@@ -1604,15 +1541,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
         if (hstate_is_gigantic(h))
                 return NULL;
  
-       /*
-        * Make sure that anyone specifying 'nid' is not also specifying a VMA.
-        * This makes sure the caller is picking _one_ of the modes with which
-        * we can call this function, not both.
-        */
-       if (vma || (addr != -1)) {
-               VM_WARN_ON_ONCE(addr == -1);
-               VM_WARN_ON_ONCE(nid != NUMA_NO_NODE);
-       }
         /*
          * Assume we will successfully allocate the surplus page to
          * prevent racing processes from causing the surplus to exceed
@@ -1646,7 +1574,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
         }
         spin_unlock(&hugetlb_lock);
  
-       page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
+       page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
  
         spin_lock(&hugetlb_lock);
         if (page) {
@@ -1670,19 +1598,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
         return page;
  }
  
-/*
- * Allocate a huge page from 'nid'.  Note, 'nid' may be
- * NUMA_NO_NODE, which means that it may be allocated
- * anywhere.
- */
-static
-struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
-{
-       unsigned long addr = -1;
-
-       return __alloc_buddy_huge_page(h, NULL, addr, nid);
-}
-
  /*
   * Use the VMA's mpolicy to allocate a huge page from the buddy.
   */
@@ -1690,7 +1605,17 @@ static
  struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
                 struct vm_area_struct *vma, unsigned long addr)
  {
-       return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
+       struct page *page;
+       struct mempolicy *mpol;
+       gfp_t gfp_mask = htlb_alloc_mask(h);
+       int nid;
+       nodemask_t *nodemask;
+
+       nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
+       page = __alloc_buddy_huge_page(h, gfp_mask, nid, nodemask);
+       mpol_cond_put(mpol);
+
+       return page;
  }
  
  /*
@@ -1700,21 +1625,26 @@ struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
   */
  struct page *alloc_huge_page_node(struct hstate *h, int nid)
  {
+       gfp_t gfp_mask = htlb_alloc_mask(h);
         struct page *page = NULL;
  
+       if (nid != NUMA_NO_NODE)
+               gfp_mask |= __GFP_THISNODE;
+
         spin_lock(&hugetlb_lock);
         if (h->free_huge_pages - h->resv_huge_pages > 0)
                 page = dequeue_huge_page_node(h, nid);
         spin_unlock(&hugetlb_lock);
  
         if (!page)
-               page = __alloc_buddy_huge_page_no_mpol(h, nid);
+               page = __alloc_buddy_huge_page(h, gfp_mask, nid, NULL);
  
         return page;
  }
  
-struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
+struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask)
  {
+       gfp_t gfp_mask = htlb_alloc_mask(h);
         struct page *page = NULL;
         int node;
  
@@ -1731,13 +1661,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
                 return page;
  
         /* No reservations, try to overcommit */
-       for_each_node_mask(node, *nmask) {
-               page = __alloc_buddy_huge_page_no_mpol(h, node);
-               if (page)
-                       return page;
-       }
-
-       return NULL;
+       return __alloc_buddy_huge_page(h, gfp_mask, NUMA_NO_NODE, nmask);
  }
  
  /*
@@ -1765,7 +1689,8 @@ static int gather_surplus_pages(struct hstate *h, int delta)
  retry:
         spin_unlock(&hugetlb_lock);
         for (i = 0; i < needed; i++) {
-               page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
+               page = __alloc_buddy_huge_page(h, htlb_alloc_mask(h),
+                               NUMA_NO_NODE, NULL);
                 if (!page) {
                         alloc_ok = false;
                         break;
author	Michal Hocko <mhocko@suse.com>
	Mon, 10 Jul 2017 22:49:08 +0000 (15:49 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 10 Jul 2017 23:32:32 +0000 (16:32 -0700)
include/linux/hugetlb.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history