Revert "Revert "Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_dir...

author David Rientjes <rientjes@google.com>

Wed, 4 Sep 2019 19:54:20 +0000 (12:54 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 28 Sep 2019 21:05:38 +0000 (14:05 -0700)
author David Rientjes <rientjes@google.com>
Wed, 4 Sep 2019 19:54:20 +0000 (12:54 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 21:05:38 +0000 (14:05 -0700)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h

index f33881688f42b2d6605d4f868546fbc06b1cd778..fb07b503dc453ddfe16c5f0f959d46be01ad55ba 100644 (file)
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
  }
  extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                         struct vm_area_struct *vma, unsigned long addr,
-                       int node);
+                       int node, bool hugepage);
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+       alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
  #else
  #define alloc_pages(gfp_mask, order) \
                 alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+       alloc_pages(gfp_mask, order)
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
         alloc_pages(gfp_mask, order)
  #endif
  #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
  #define alloc_page_vma(gfp_mask, vma, addr)                    \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
  #define alloc_page_vma_node(gfp_mask, vma, addr, node)         \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+       alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
  
  extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
  extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 62f0d8e9d76b3e5415a108fd8370344fda6bc4f1..aec462cc5d4632e01301f55eb7687a1419fea351 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -645,30 +645,30 @@ release:
   *         available
   * never: never stall for any thp allocation
   */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
  {
         const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-       const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
  
         /* Always do synchronous compaction */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE | __GFP_THISNODE |
-                      (vma_madvised ? 0 : __GFP_NORETRY);
+               return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
  
         /* Kick kcompactd and fail quickly */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | __GFP_KSWAPD_RECLAIM;
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
  
         /* Synchronous compaction if madvised, otherwise kick kcompactd */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                 __GFP_KSWAPD_RECLAIM);
+               return GFP_TRANSHUGE_LIGHT |
+                       (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                       __GFP_KSWAPD_RECLAIM);
  
         /* Only do synchronous compaction if madvised */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+               return GFP_TRANSHUGE_LIGHT |
+                      (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
  
-       return gfp_mask;
+       return GFP_TRANSHUGE_LIGHT;
  }
  
  /* Caller must hold page table lock. */
@@ -740,8 +740,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                         pte_free(vma->vm_mm, pgtable);
                 return ret;
         }
-       gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+       gfp = alloc_hugepage_direct_gfpmask(vma);
+       page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
         if (unlikely(!page)) {
                 count_vm_event(THP_FAULT_FALLBACK);
                 return VM_FAULT_FALLBACK;
@@ -1348,9 +1348,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
  alloc:
         if (__transparent_hugepage_enabled(vma) &&
             !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-               new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
-                               haddr, numa_node_id());
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
         } else
                 new_page = NULL;
  
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 9c9877a43d5832eb93c5694f36faf3afc0d8663e..547cd403ed020eb17f260829942be7b0c35d5e47 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
         } else if (PageTransHuge(page)) {
                 struct page *thp;
  
-               thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
-                               address, numa_node_id());
+               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+                                        HPAGE_PMD_ORDER);
                 if (!thp)
                         return NULL;
                 prep_transhuge_page(thp);
@@ -2083,6 +2083,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   *     @vma:  Pointer to VMA or NULL if not available.
   *     @addr: Virtual Address of the allocation. Must be inside the VMA.
   *     @node: Which node to prefer for allocation (modulo policy).
+ *     @hugepage: for hugepages try only the preferred node if possible
   *
   *     This function allocates a page from the kernel page pool and applies
   *     a NUMA policy associated with the VMA or the current process.
@@ -2093,7 +2094,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   */
  struct page *
  alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node)
+               unsigned long addr, int node, bool hugepage)
  {
         struct mempolicy *pol;
         struct page *page;
@@ -2111,6 +2112,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
                 goto out;
         }
  
+       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+               int hpage_node = node;
+
+               /*
+                * For hugepage allocation and non-interleave policy which
+                * allows the current node (or other explicitly preferred
+                * node) we only try to allocate from the current/preferred
+                * node and don't fall back to other nodes, as the cost of
+                * remote accesses would likely offset THP benefits.
+                *
+                * If the policy is interleave, or does not allow the current
+                * node in its nodemask, we allocate the standard way.
+                */
+               if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
+                       hpage_node = pol->v.preferred_node;
+
+               nmask = policy_nodemask(gfp, pol);
+               if (!nmask || node_isset(hpage_node, *nmask)) {
+                       mpol_cond_put(pol);
+                       page = __alloc_pages_node(hpage_node,
+                                               gfp | __GFP_THISNODE, order);
+                       goto out;
+               }
+       }
+
         nmask = policy_nodemask(gfp, pol);
         preferred_nid = policy_node(gfp, pol, node);
         page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c

index 2bed4761f2795695b2e970c304f8985aeacef9f6..626d8c74b973f173d3062ee118580b649d35073a 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
  
         shmem_pseudo_vma_init(&pvma, info, hindex);
         page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
+                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
         shmem_pseudo_vma_destroy(&pvma);
         if (page)
                 prep_transhuge_page(page);
author	David Rientjes <rientjes@google.com>
	Wed, 4 Sep 2019 19:54:20 +0000 (12:54 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 28 Sep 2019 21:05:38 +0000 (14:05 -0700)
include/linux/gfp.h		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history
mm/shmem.c		patch \| blob \| history