thp: split_huge_page paging
authorAndrea Arcangeli <aarcange@redhat.com>
Thu, 13 Jan 2011 23:46:47 +0000 (15:46 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Jan 2011 01:32:41 +0000 (17:32 -0800)
Paging logic that splits the page before it is unmapped and added to swap
to ensure backwards compatibility with the legacy swap code.  Eventually
swap should natively pageout the hugepages to increase performance and
decrease seeking and fragmentation of swap space.  swapoff can just skip
over huge pmd as they cannot be part of swap yet.  In add_to_swap be
careful to split the page only if we got a valid swap entry so we don't
split hugepages with a full swap.

In theory we could split pages before isolating them during the lru scan,
but for khugepaged to be safe, I'm relying on either mmap_sem write mode,
or PG_lock taken, so split_huge_page has to run either with mmap_sem
read/write mode or PG_lock taken.  Calling it from isolate_lru_page would
make locking more complicated, in addition to that split_huge_page would
deadlock if called by __isolate_lru_page because it has to take the lru
lock to add the tail pages.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/memory-failure.c
mm/rmap.c
mm/swap_state.c
mm/swapfile.c

index 2323a8039a9872e40985a226984a8e21b385284e..6a283cc9317c52053e16a2bcf8ebaabb23634ad8 100644 (file)
@@ -386,6 +386,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
        struct task_struct *tsk;
        struct anon_vma *av;
 
+       if (unlikely(split_huge_page(page)))
+               return;
        read_lock(&tasklist_lock);
        av = page_lock_anon_vma(page);
        if (av == NULL) /* Not actually mapped anymore */
index c95d2ba27a0b104a50f7560740b741678c29227c..a3197a8a295b8ea515c6aa85c11512cb6d2bb06e 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1400,6 +1400,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
        int ret;
 
        BUG_ON(!PageLocked(page));
+       BUG_ON(PageTransHuge(page));
 
        if (unlikely(PageKsm(page)))
                ret = try_to_unmap_ksm(page, flags);
index e10f5833167f6d5b564a534484985dc3c7dee745..5c8cfabbc9bc3abdbf7f342656ea8c58b727aae8 100644 (file)
@@ -157,6 +157,12 @@ int add_to_swap(struct page *page)
        if (!entry.val)
                return 0;
 
+       if (unlikely(PageTransHuge(page)))
+               if (unlikely(split_huge_page(page))) {
+                       swapcache_free(entry, NULL);
+                       return 0;
+               }
+
        /*
         * Radix-tree node allocations from PF_MEMALLOC contexts could
         * completely exhaust the page allocator. __GFP_NOMEMALLOC
index b6adcfbf6f485e34b7fee53cfcb17679ef8be270..07a458d72fa880f5adc366b8acf03e610841880e 100644 (file)
@@ -964,6 +964,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
+               if (unlikely(pmd_trans_huge(*pmd)))
+                       continue;
                if (pmd_none_or_clear_bad(pmd))
                        continue;
                ret = unuse_pte_range(vma, pmd, addr, next, entry, page);