Merge branch 'fixes' of git://git.armlinux.org.uk/~rmk/linux-arm
[sfrench/cifs-2.6.git] / mm / huge_memory.c
index 55478ab3c83be372f9fa4d654f16e32ffdeb1e29..f2d19e4fe854f3941bbc24dd1e1cc76d0622b7eb 100644 (file)
@@ -632,37 +632,27 @@ release:
 static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
        const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-       gfp_t this_node = 0;
-
-#ifdef CONFIG_NUMA
-       struct mempolicy *pol;
-       /*
-        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
-        * specified, to express a general desire to stay on the current
-        * node for optimistic allocation attempts. If the defrag mode
-        * and/or madvise hint requires the direct reclaim then we prefer
-        * to fallback to other node rather than node reclaim because that
-        * can lead to excessive reclaim even though there is free memory
-        * on other nodes. We expect that NUMA preferences are specified
-        * by memory policies.
-        */
-       pol = get_vma_policy(vma, addr);
-       if (pol->mode != MPOL_BIND)
-               this_node = __GFP_THISNODE;
-       mpol_cond_put(pol);
-#endif
+       const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
+       /* Always do synchronous compaction */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+               return GFP_TRANSHUGE | __GFP_THISNODE |
+                      (vma_madvised ? 0 : __GFP_NORETRY);
+
+       /* Kick kcompactd and fail quickly */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
+               return gfp_mask | __GFP_KSWAPD_RECLAIM;
+
+       /* Synchronous compaction if madvised, otherwise kick kcompactd */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            __GFP_KSWAPD_RECLAIM | this_node);
+               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                                 __GFP_KSWAPD_RECLAIM);
+
+       /* Only do synchronous compaction if madvised */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            this_node);
-       return GFP_TRANSHUGE_LIGHT | this_node;
+               return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+
+       return gfp_mask;
 }
 
 /* Caller must hold page table lock. */
@@ -2350,7 +2340,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
        }
 }
 
-static void freeze_page(struct page *page)
+static void unmap_page(struct page *page)
 {
        enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
                TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
@@ -2365,7 +2355,7 @@ static void freeze_page(struct page *page)
        VM_BUG_ON_PAGE(!unmap_success, page);
 }
 
-static void unfreeze_page(struct page *page)
+static void remap_page(struct page *page)
 {
        int i;
        if (PageTransHuge(page)) {
@@ -2402,6 +2392,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
                         (1L << PG_unevictable) |
                         (1L << PG_dirty)));
 
+       /* ->mapping in first tail page is compound_mapcount */
+       VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+                       page_tail);
+       page_tail->mapping = head->mapping;
+       page_tail->index = head->index + tail;
+
        /* Page flags must be visible before we make the page non-compound. */
        smp_wmb();
 
@@ -2422,12 +2418,6 @@ static void __split_huge_page_tail(struct page *head, int tail,
        if (page_is_idle(head))
                set_page_idle(page_tail);
 
-       /* ->mapping in first tail page is compound_mapcount */
-       VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
-                       page_tail);
-       page_tail->mapping = head->mapping;
-
-       page_tail->index = head->index + tail;
        page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
 
        /*
@@ -2439,12 +2429,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
 }
 
 static void __split_huge_page(struct page *page, struct list_head *list,
-               unsigned long flags)
+               pgoff_t end, unsigned long flags)
 {
        struct page *head = compound_head(page);
        struct zone *zone = page_zone(head);
        struct lruvec *lruvec;
-       pgoff_t end = -1;
        int i;
 
        lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -2452,9 +2441,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        /* complete memcg works before add pages to LRU */
        mem_cgroup_split_huge_fixup(head);
 
-       if (!PageAnon(page))
-               end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
-
        for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
                __split_huge_page_tail(head, i, lruvec, list);
                /* Some pages can be beyond i_size: drop them from page cache */
@@ -2483,7 +2469,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
        spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
 
-       unfreeze_page(head);
+       remap_page(head);
 
        for (i = 0; i < HPAGE_PMD_NR; i++) {
                struct page *subpage = head + i;
@@ -2626,6 +2612,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        int count, mapcount, extra_pins, ret;
        bool mlocked;
        unsigned long flags;
+       pgoff_t end;
 
        VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
        VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2648,6 +2635,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        ret = -EBUSY;
                        goto out;
                }
+               end = -1;
                mapping = NULL;
                anon_vma_lock_write(anon_vma);
        } else {
@@ -2661,10 +2649,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 
                anon_vma = NULL;
                i_mmap_lock_read(mapping);
+
+               /*
+                *__split_huge_page() may need to trim off pages beyond EOF:
+                * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
+                * which cannot be nested inside the page tree lock. So note
+                * end now: i_size itself may be changed at any moment, but
+                * head page lock is good enough to serialize the trimming.
+                */
+               end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
        }
 
        /*
-        * Racy check if we can split the page, before freeze_page() will
+        * Racy check if we can split the page, before unmap_page() will
         * split PMDs
         */
        if (!can_split_huge_page(head, &extra_pins)) {
@@ -2673,7 +2670,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        mlocked = PageMlocked(page);
-       freeze_page(head);
+       unmap_page(head);
        VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
        /* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -2707,7 +2704,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                if (mapping)
                        __dec_node_page_state(page, NR_SHMEM_THPS);
                spin_unlock(&pgdata->split_queue_lock);
-               __split_huge_page(page, list, flags);
+               __split_huge_page(page, list, end, flags);
                if (PageSwapCache(head)) {
                        swp_entry_t entry = { .val = page_private(head) };
 
@@ -2727,7 +2724,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 fail:          if (mapping)
                        xa_unlock(&mapping->i_pages);
                spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
-               unfreeze_page(head);
+               remap_page(head);
                ret = -EBUSY;
        }