mm, page_owner: handle THP splits correctly
[sfrench/cifs-2.6.git] / mm / huge_memory.c
index f7e388b8662d83a03b965be80f8aa3ceaea0d92a..de1f15969e2782edd648a43fc5e7c6d7edacb38b 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/oom.h>
 #include <linux/numa.h>
+#include <linux/page_owner.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
@@ -647,27 +648,37 @@ release:
 static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
        const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-       const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
+       gfp_t this_node = 0;
 
-       /* Always do synchronous compaction */
-       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE | __GFP_THISNODE |
-                      (vma_madvised ? 0 : __GFP_NORETRY);
+#ifdef CONFIG_NUMA
+       struct mempolicy *pol;
+       /*
+        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+        * specified, to express a general desire to stay on the current
+        * node for optimistic allocation attempts. If the defrag mode
+        * and/or madvise hint requires the direct reclaim then we prefer
+        * to fallback to other node rather than node reclaim because that
+        * can lead to excessive reclaim even though there is free memory
+        * on other nodes. We expect that NUMA preferences are specified
+        * by memory policies.
+        */
+       pol = get_vma_policy(vma, addr);
+       if (pol->mode != MPOL_BIND)
+               this_node = __GFP_THISNODE;
+       mpol_cond_put(pol);
+#endif
 
-       /* Kick kcompactd and fail quickly */
+       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+               return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | __GFP_KSWAPD_RECLAIM;
-
-       /* Synchronous compaction if madvised, otherwise kick kcompactd */
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                 __GFP_KSWAPD_RECLAIM);
-
-       /* Only do synchronous compaction if madvised */
+               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                                            __GFP_KSWAPD_RECLAIM | this_node);
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
-
-       return gfp_mask;
+               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                                            this_node);
+       return GFP_TRANSHUGE_LIGHT | this_node;
 }
 
 /* Caller must hold page table lock. */
@@ -2506,6 +2517,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        }
 
        ClearPageCompound(head);
+
+       split_page_owner(head, HPAGE_PMD_ORDER);
+
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
                /* Additional pin to swap cache */