mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd
authorDan Williams <dan.j.williams@intel.com>
Sat, 16 Jan 2016 00:56:52 +0000 (16:56 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Jan 2016 01:56:32 +0000 (17:56 -0800)
A dax-huge-page mapping while it uses some thp helpers is ultimately not
a transparent huge page.  The distinction is especially important in the
get_user_pages() path.  pmd_devmap() is used to distinguish dax-pmds
from pmd_huge() and pmd_trans_huge() which have slightly different
semantics.

Explicitly mark the pmd_trans_huge() helpers that dax needs by adding
pmd_devmap() checks.

[kirill.shutemov@linux.intel.com: fix regression in handling mlocked pages in  __split_huge_pmd()]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/x86/include/asm/pgtable.h
include/linux/huge_mm.h
include/linux/mm.h
mm/huge_memory.c
mm/memory.c
mm/mprotect.c
mm/pgtable-generic.c

index 6585a8b10fea5d393a25ef1ec0b3b3ce3aff7e90..6a0ad82c8d0f442d61105c081d469c4699ad6e19 100644 (file)
@@ -164,13 +164,20 @@ static inline int pmd_large(pmd_t pte)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_trans_huge(pmd_t pmd)
 {
-       return pmd_val(pmd) & _PAGE_PSE;
+       return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 }
 
 static inline int has_transparent_hugepage(void)
 {
        return cpu_has_pse;
 }
+
+#ifdef __HAVE_ARCH_PTE_DEVMAP
+static inline int pmd_devmap(pmd_t pmd)
+{
+       return !!(pmd_val(pmd) & _PAGE_DEVMAP);
+}
+#endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
index 8ca35a131904db4a6fa39dca298d5636e24f7585..d39fa60bd6bfe297e1fc2edc0a02fee89fb96680 100644 (file)
@@ -104,7 +104,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 #define split_huge_pmd(__vma, __pmd, __address)                                \
        do {                                                            \
                pmd_t *____pmd = (__pmd);                               \
-               if (pmd_trans_huge(*____pmd))                           \
+               if (pmd_trans_huge(*____pmd)                            \
+                                       || pmd_devmap(*____pmd))        \
                        __split_huge_pmd(__vma, __pmd, __address);      \
        }  while (0)
 
@@ -124,7 +125,7 @@ static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
                spinlock_t **ptl)
 {
        VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
-       if (pmd_trans_huge(*pmd))
+       if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
                return __pmd_trans_huge_lock(pmd, vma, ptl);
        else
                return false;
index a9902152449f0e8a30b72b037568269eae98ee6e..cd123272d28d803649bdd7fc7f0b441387030886 100644 (file)
@@ -329,6 +329,13 @@ struct inode;
 #define page_private(page)             ((page)->private)
 #define set_page_private(page, v)      ((page)->private = (v))
 
+#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static inline int pmd_devmap(pmd_t pmd)
+{
+       return 0;
+}
+#endif
+
 /*
  * FIXME: take this include out, include page-flags.h in
  * files which need it (119 of them)
index d93706013a5564a1fca71c005a598a1b24188c48..82bed2bec3ed8afdf787d78b0ad14f7b70df6cb1 100644 (file)
@@ -995,7 +995,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
        ret = -EAGAIN;
        pmd = *src_pmd;
-       if (unlikely(!pmd_trans_huge(pmd))) {
+       if (unlikely(!pmd_trans_huge(pmd) && !pmd_devmap(pmd))) {
                pte_free(dst_mm, pgtable);
                goto out_unlock;
        }
@@ -1018,17 +1018,20 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                goto out_unlock;
        }
 
-       src_page = pmd_page(pmd);
-       VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
-       get_page(src_page);
-       page_dup_rmap(src_page, true);
-       add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+       if (pmd_trans_huge(pmd)) {
+               /* thp accounting separate from pmd_devmap accounting */
+               src_page = pmd_page(pmd);
+               VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
+               get_page(src_page);
+               page_dup_rmap(src_page, true);
+               add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+               atomic_long_inc(&dst_mm->nr_ptes);
+               pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+       }
 
        pmdp_set_wrprotect(src_mm, addr, src_pmd);
        pmd = pmd_mkold(pmd_wrprotect(pmd));
-       pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
        set_pmd_at(dst_mm, addr, dst_pmd, pmd);
-       atomic_long_inc(&dst_mm->nr_ptes);
 
        ret = 0;
 out_unlock:
@@ -1716,7 +1719,7 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
                spinlock_t **ptl)
 {
        *ptl = pmd_lock(vma->vm_mm, pmd);
-       if (likely(pmd_trans_huge(*pmd)))
+       if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
                return true;
        spin_unlock(*ptl);
        return false;
@@ -2788,7 +2791,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
        VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
        VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
-       VM_BUG_ON(!pmd_trans_huge(*pmd));
+       VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd));
 
        count_vm_event(THP_SPLIT_PMD);
 
@@ -2901,14 +2904,15 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
        mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
        ptl = pmd_lock(mm, pmd);
-       if (unlikely(!pmd_trans_huge(*pmd)))
+       if (pmd_trans_huge(*pmd)) {
+               page = pmd_page(*pmd);
+               if (PageMlocked(page))
+                       get_page(page);
+               else
+                       page = NULL;
+       } else if (!pmd_devmap(*pmd))
                goto out;
-       page = pmd_page(*pmd);
        __split_huge_pmd_locked(vma, pmd, haddr, false);
-       if (PageMlocked(page))
-               get_page(page);
-       else
-               page = NULL;
 out:
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
@@ -2938,7 +2942,7 @@ static void split_huge_pmd_address(struct vm_area_struct *vma,
                return;
 
        pmd = pmd_offset(pud, address);
-       if (!pmd_present(*pmd) || !pmd_trans_huge(*pmd))
+       if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
                return;
        /*
         * Caller holds the mmap_sem write mode, so a huge pmd cannot
index 552ae3d694354f14eefb60fd347f8ea9c377c45e..ff17850a52d92c37817dd3dd45aba37b6bb4da7f 100644 (file)
@@ -950,7 +950,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
        src_pmd = pmd_offset(src_pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (pmd_trans_huge(*src_pmd)) {
+               if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
                        int err;
                        VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
                        err = copy_huge_pmd(dst_mm, src_mm,
@@ -1177,7 +1177,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (pmd_trans_huge(*pmd)) {
+               if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                        if (next - addr != HPAGE_PMD_SIZE) {
 #ifdef CONFIG_DEBUG_VM
                                if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
@@ -3375,7 +3375,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                int ret;
 
                barrier();
-               if (pmd_trans_huge(orig_pmd)) {
+               if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
                        unsigned int dirty = flags & FAULT_FLAG_WRITE;
 
                        if (pmd_protnone(orig_pmd))
@@ -3404,7 +3404,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
            unlikely(__pte_alloc(mm, vma, pmd, address)))
                return VM_FAULT_OOM;
        /* if an huge pmd materialized from under us just retry later */
-       if (unlikely(pmd_trans_huge(*pmd)))
+       if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
                return 0;
        /*
         * A regular pmd is established and it can't morph into a huge pmd
index 6047707085c11b9ca385bad8f26b3cebf07f7a22..8eb7bb40dc40b6e8e89d05fbb7e05f8c836e05da 100644 (file)
@@ -149,7 +149,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                unsigned long this_pages;
 
                next = pmd_addr_end(addr, end);
-               if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
+               if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
+                               && pmd_none_or_clear_bad(pmd))
                        continue;
 
                /* invoke the mmu notifier if the pmd is populated */
@@ -158,7 +159,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                        mmu_notifier_invalidate_range_start(mm, mni_start, end);
                }
 
-               if (pmd_trans_huge(*pmd)) {
+               if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                        if (next - addr != HPAGE_PMD_SIZE)
                                split_huge_pmd(vma, pmd, addr);
                        else {
index c311a2ec6fea49bbb5acd1887be303c97e570133..9d4767698a1cd6988d4f71b37ef3f384eff5b3b1 100644 (file)
@@ -132,7 +132,7 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 {
        pmd_t pmd;
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-       VM_BUG_ON(!pmd_trans_huge(*pmdp));
+       VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;