mm, page_owner: handle THP splits correctly

[sfrench/cifs-2.6.git] / mm / huge_memory.c
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index f7e388b8662d83a03b965be80f8aa3ceaea0d92a..de1f15969e2782edd648a43fc5e7c6d7edacb38b 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -32,6 +32,7 @@
  #include <linux/shmem_fs.h>
  #include <linux/oom.h>
  #include <linux/numa.h>
+#include <linux/page_owner.h>
  
  #include <asm/tlb.h>
  #include <asm/pgalloc.h>
@@ -647,27 +648,37 @@ release:
  static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
  {
         const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-       const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
+       gfp_t this_node = 0;
  
-       /* Always do synchronous compaction */
-       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE | __GFP_THISNODE |
-                      (vma_madvised ? 0 : __GFP_NORETRY);
+#ifdef CONFIG_NUMA
+       struct mempolicy *pol;
+       /*
+        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+        * specified, to express a general desire to stay on the current
+        * node for optimistic allocation attempts. If the defrag mode
+        * and/or madvise hint requires the direct reclaim then we prefer
+        * to fallback to other node rather than node reclaim because that
+        * can lead to excessive reclaim even though there is free memory
+        * on other nodes. We expect that NUMA preferences are specified
+        * by memory policies.
+        */
+       pol = get_vma_policy(vma, addr);
+       if (pol->mode != MPOL_BIND)
+               this_node = __GFP_THISNODE;
+       mpol_cond_put(pol);
+#endif
  
-       /* Kick kcompactd and fail quickly */
+       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+               return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | __GFP_KSWAPD_RECLAIM;
-
-       /* Synchronous compaction if madvised, otherwise kick kcompactd */
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                 __GFP_KSWAPD_RECLAIM);
-
-       /* Only do synchronous compaction if madvised */
+               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                                            __GFP_KSWAPD_RECLAIM | this_node);
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
-
-       return gfp_mask;
+               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                                            this_node);
+       return GFP_TRANSHUGE_LIGHT | this_node;
  }
  
  /* Caller must hold page table lock. */
@@ -2506,6 +2517,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
         }
  
         ClearPageCompound(head);
+
+       split_page_owner(head, HPAGE_PMD_ORDER);
+
         /* See comment in __split_huge_page_tail() */
         if (PageAnon(head)) {
                 /* Additional pin to swap cache */