mm, proc: adjust PSS calculation
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Sat, 16 Jan 2016 00:52:13 +0000 (16:52 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Jan 2016 01:56:32 +0000 (17:56 -0800)
The goal of this patchset is to make refcounting on THP pages cheaper
with simpler semantics and allow the same THP compound page to be mapped
with PMD and PTEs.  This is required to get reasonable THP-pagecache
implementation.

With the new refcounting design it's much easier to protect against
split_huge_page(): simple reference on a page will make you the deal.
It makes gup_fast() implementation simpler and doesn't require
special-case in futex code to handle tail THP pages.

It should improve THP utilization over the system since splitting THP in
one process doesn't necessary lead to splitting the page in all other
processes have the page mapped.

The patchset drastically lower complexity of get_page()/put_page()
codepaths.  I encourage people look on this code before-and-after to
justify time budget on reviewing this patchset.

This patch (of 37):

With new refcounting all subpages of the compound page are not necessary
have the same mapcount.  We need to take into account mapcount of every
sub-page.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jerome Marchand <jmarchan@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/proc/task_mmu.c

index a353b4c6e86e5d24007ac4f9ead1d80d577037d6..b74e7dec37dd4137b643f759c465078b9f98ed1a 100644 (file)
@@ -466,9 +466,10 @@ struct mem_size_stats {
 };
 
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
-               unsigned long size, bool young, bool dirty)
+               bool compound, bool young, bool dirty)
 {
-       int mapcount;
+       int i, nr = compound ? HPAGE_PMD_NR : 1;
+       unsigned long size = nr * PAGE_SIZE;
 
        if (PageAnon(page))
                mss->anonymous += size;
@@ -477,23 +478,37 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
        /* Accumulate the size in pages that have been accessed. */
        if (young || page_is_young(page) || PageReferenced(page))
                mss->referenced += size;
-       mapcount = page_mapcount(page);
-       if (mapcount >= 2) {
-               u64 pss_delta;
 
-               if (dirty || PageDirty(page))
-                       mss->shared_dirty += size;
-               else
-                       mss->shared_clean += size;
-               pss_delta = (u64)size << PSS_SHIFT;
-               do_div(pss_delta, mapcount);
-               mss->pss += pss_delta;
-       } else {
+       /*
+        * page_count(page) == 1 guarantees the page is mapped exactly once.
+        * If any subpage of the compound page mapped with PTE it would elevate
+        * page_count().
+        */
+       if (page_count(page) == 1) {
                if (dirty || PageDirty(page))
                        mss->private_dirty += size;
                else
                        mss->private_clean += size;
                mss->pss += (u64)size << PSS_SHIFT;
+               return;
+       }
+
+       for (i = 0; i < nr; i++, page++) {
+               int mapcount = page_mapcount(page);
+
+               if (mapcount >= 2) {
+                       if (dirty || PageDirty(page))
+                               mss->shared_dirty += PAGE_SIZE;
+                       else
+                               mss->shared_clean += PAGE_SIZE;
+                       mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+               } else {
+                       if (dirty || PageDirty(page))
+                               mss->private_dirty += PAGE_SIZE;
+                       else
+                               mss->private_clean += PAGE_SIZE;
+                       mss->pss += PAGE_SIZE << PSS_SHIFT;
+               }
        }
 }
 
@@ -554,7 +569,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 
        if (!page)
                return;
-       smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
+
+       smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -570,8 +586,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
        if (IS_ERR_OR_NULL(page))
                return;
        mss->anonymous_thp += HPAGE_PMD_SIZE;
-       smaps_account(mss, page, HPAGE_PMD_SIZE,
-                       pmd_young(*pmd), pmd_dirty(*pmd));
+       smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
 }
 #else
 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,