mm, thp: Do not make pmd/pud dirty without a reason
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Mon, 27 Nov 2017 03:21:26 +0000 (06:21 +0300)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Nov 2017 20:26:29 +0000 (12:26 -0800)
Currently we make page table entries dirty all the time regardless of
access type and don't even consider if the mapping is write-protected.
The reasoning is that we don't really need dirty tracking on THP and
making the entry dirty upfront may save some time on first write to the
page.

Unfortunately, such approach may result in false-positive
can_follow_write_pmd() for huge zero page or read-only shmem file.

Let's only make page dirty only if we about to write to the page anyway
(as we do for small pages).

I've restructured the code to make entry dirty inside
maybe_p[mu]d_mkwrite(). It also takes into account if the vma is
write-protected.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/huge_memory.c
mm/internal.h
mm/khugepaged.c
mm/memory.c
mm/migrate.c

index 0e7ded98d114d184877d2fc9bd0f02c3187f2ed5..f22401fd83b5cd53da62498a76d75e96a69317b8 100644 (file)
@@ -474,10 +474,13 @@ out:
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
-pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty)
 {
-       if (likely(vma->vm_flags & VM_WRITE))
+       if (likely(vma->vm_flags & VM_WRITE)) {
                pmd = pmd_mkwrite(pmd);
+               if (dirty)
+                       pmd = pmd_mkdirty(pmd);
+       }
        return pmd;
 }
 
@@ -599,7 +602,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
                }
 
                entry = mk_huge_pmd(page, vma->vm_page_prot);
-               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+               entry = maybe_pmd_mkwrite(entry, vma, true);
                page_add_new_anon_rmap(page, vma, haddr, true);
                mem_cgroup_commit_charge(page, memcg, false, true);
                lru_cache_add_active_or_unevictable(page, vma);
@@ -741,8 +744,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
        if (pfn_t_devmap(pfn))
                entry = pmd_mkdevmap(entry);
        if (write) {
-               entry = pmd_mkyoung(pmd_mkdirty(entry));
-               entry = maybe_pmd_mkwrite(entry, vma);
+               entry = pmd_mkyoung(entry);
+               entry = maybe_pmd_mkwrite(entry, vma, true);
        }
 
        if (pgtable) {
@@ -788,10 +791,14 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
+static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma,
+               bool dirty)
 {
-       if (likely(vma->vm_flags & VM_WRITE))
+       if (likely(vma->vm_flags & VM_WRITE)) {
                pud = pud_mkwrite(pud);
+               if (dirty)
+                       pud = pud_mkdirty(pud);
+       }
        return pud;
 }
 
@@ -807,8 +814,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
        if (pfn_t_devmap(pfn))
                entry = pud_mkdevmap(entry);
        if (write) {
-               entry = pud_mkyoung(pud_mkdirty(entry));
-               entry = maybe_pud_mkwrite(entry, vma);
+               entry = pud_mkyoung(entry);
+               entry = maybe_pud_mkwrite(entry, vma, true);
        }
        set_pud_at(mm, addr, pud, entry);
        update_mmu_cache_pud(vma, addr, pud);
@@ -1279,7 +1286,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
        if (reuse_swap_page(page, NULL)) {
                pmd_t entry;
                entry = pmd_mkyoung(orig_pmd);
-               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+               entry = maybe_pmd_mkwrite(entry, vma, true);
                if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
                        update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
                ret |= VM_FAULT_WRITE;
@@ -1349,7 +1356,7 @@ alloc:
        } else {
                pmd_t entry;
                entry = mk_huge_pmd(new_page, vma->vm_page_prot);
-               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+               entry = maybe_pmd_mkwrite(entry, vma, true);
                pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
                page_add_new_anon_rmap(new_page, vma, haddr, true);
                mem_cgroup_commit_charge(new_page, memcg, false, true);
@@ -2928,7 +2935,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
        if (pmd_swp_soft_dirty(*pvmw->pmd))
                pmde = pmd_mksoft_dirty(pmde);
        if (is_write_migration_entry(entry))
-               pmde = maybe_pmd_mkwrite(pmde, vma);
+               pmde = maybe_pmd_mkwrite(pmde, vma, false);
 
        flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
        page_add_anon_rmap(new, vma, mmun_start, true);
index e6bd35182daee1226b684464fd202df86762c636..b35cdebda0cef459ed55bb559b0412f7c0a53ddb 100644 (file)
@@ -328,7 +328,8 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
        }
 }
 
-extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma,
+               bool dirty);
 
 /*
  * At what user virtual address is page expected in @vma?
index ea4ff259b67195b32d3335795d4de80c4e6e5bcf..db43dc8a8ae619c0fc58f8efeecf7d2c99654938 100644 (file)
@@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        pgtable = pmd_pgtable(_pmd);
 
        _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
-       _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
+       _pmd = maybe_pmd_mkwrite(_pmd, vma, false);
 
        /*
         * spin_lock() below is not the equivalent of smp_wmb(), so
index 85e7a87da79fe4a5487e1f3f6216e61b9827515c..b10c1d26f675be98aca602142084cdf35621719a 100644 (file)
@@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
 
        entry = mk_huge_pmd(page, vma->vm_page_prot);
        if (write)
-               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+               entry = maybe_pmd_mkwrite(entry, vma, true);
 
        add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
        page_add_file_rmap(page, true);
index 4d0be47a322a8a33491c94072100ebcec829becd..57865fc8cfe332282f35b25770bb72f75d7152f0 100644 (file)
@@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        }
 
        entry = mk_huge_pmd(new_page, vma->vm_page_prot);
-       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+       entry = maybe_pmd_mkwrite(entry, vma, false);
 
        /*
         * Clear the old entry under pagetable lock and establish the new PTE.