Revert "mm: replace p??_write with pte_access_permitted in fault + gup paths"
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Dec 2017 02:53:22 +0000 (18:53 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Dec 2017 02:53:22 +0000 (18:53 -0800)
This reverts commits 5c9d2d5c269cc7da82b894e9, and e7fe7b5cae90.

We'll probably need to revisit this, but basically we should not
complicate the get_user_pages_fast() case, and checking the actual page
table protection key bits will require more care anyway, since the
protection keys depend on the exact state of the VM in question.

Particularly when doing a "remote" page lookup (ie in somebody elses VM,
not your own), you need to be much more careful than this was.  Dave
Hansen says:

 "So, the underlying bug here is that we now a get_user_pages_remote()
  and then go ahead and do the p*_access_permitted() checks against the
  current PKRU. This was introduced recently with the addition of the
  new p??_access_permitted() calls.

  We have checks in the VMA path for the "remote" gups and we avoid
  consulting PKRU for them. This got missed in the pkeys selftests
  because I did a ptrace read, but not a *write*. I also didn't
  explicitly test it against something where a COW needed to be done"

It's also not entirely clear that it makes sense to check the protection
key bits at this level at all.  But one possible eventual solution is to
make the get_user_pages_fast() case just abort if it sees protection key
bits set, which makes us fall back to the regular get_user_pages() case,
which then has a vma and can do the check there if we want to.

We'll see.

Somewhat related to this all: what we _do_ want to do some day is to
check the PAGE_USER bit - it should obviously always be set for user
pages, but it would be a good check to have back.  Because we have no
generic way to test for it, we lost it as part of moving over from the
architecture-specific x86 GUP implementation to the generic one in
commit e585513b76f7 ("x86/mm/gup: Switch GUP to the generic
get_user_page_fast() implementation").

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/s390/include/asm/pgtable.h
arch/sparc/mm/gup.c
fs/dax.c
mm/gup.c
mm/hmm.c
mm/huge_memory.c
mm/memory.c

index 57d7bc92e0b8a766d24520ea5234fca56971b646..0a6b0286c32e9e0a7283d9cfb66dc357fe2e36fa 100644 (file)
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
        return pud;
 }
 
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
-       return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
-}
-
 static inline pud_t pud_mkclean(pud_t pud)
 {
        if (pud_large(pud)) {
index 33c0f8bb0f33de0c6beadd3dd8a9bef6253bcb10..5335ba3c850ed3acdc074ffe639d3ddac101f2ad 100644 (file)
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
        if (!(pmd_val(pmd) & _PAGE_VALID))
                return 0;
 
-       if (!pmd_access_permitted(pmd, write))
+       if (write && !pmd_write(pmd))
                return 0;
 
        refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
        if (!(pud_val(pud) & _PAGE_VALID))
                return 0;
 
-       if (!pud_access_permitted(pud, write))
+       if (write && !pud_write(pud))
                return 0;
 
        refs = 0;
index 78b72c48374e5eed09587292f3b7eee62059e18b..95981591977a04d08f300c0795fcd96a4211adc1 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
 
                        if (pfn != pmd_pfn(*pmdp))
                                goto unlock_pmd;
-                       if (!pmd_dirty(*pmdp)
-                                       && !pmd_access_permitted(*pmdp, WRITE))
+                       if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
                                goto unlock_pmd;
 
                        flush_cache_page(vma, address, pfn);
index d3fb60e5bfacd4c733957dc526c28c41bd2321d1..e0d82b6706d72d82637bca5eaef1e35e15a1abdf 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
  */
 static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
 {
-       return pte_access_permitted(pte, WRITE) ||
+       return pte_write(pte) ||
                ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
 }
 
index 3a5c172af56039bb26007ea4cc5ec4ca0a9bf659..ea19742a5d60b1a6270629a024d88a13b9c5f3c1 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -391,11 +391,11 @@ again:
                if (pmd_protnone(pmd))
                        return hmm_vma_walk_clear(start, end, walk);
 
-               if (!pmd_access_permitted(pmd, write_fault))
+               if (write_fault && !pmd_write(pmd))
                        return hmm_vma_walk_clear(start, end, walk);
 
                pfn = pmd_pfn(pmd) + pte_index(addr);
-               flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
+               flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
                for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
                        pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
                return 0;
@@ -456,11 +456,11 @@ again:
                        continue;
                }
 
-               if (!pte_access_permitted(pte, write_fault))
+               if (write_fault && !pte_write(pte))
                        goto fault;
 
                pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
-               pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
+               pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
                continue;
 
 fault:
index 2f2f5e77490278f58c6e9a923899255efff77551..0e7ded98d114d184877d2fc9bd0f02c3187f2ed5 100644 (file)
@@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
         */
        WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
 
-       if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
+       if (flags & FOLL_WRITE && !pmd_write(*pmd))
                return NULL;
 
        if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 
        assert_spin_locked(pud_lockptr(mm, pud));
 
-       if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
+       if (flags & FOLL_WRITE && !pud_write(*pud))
                return NULL;
 
        if (pud_present(*pud) && pud_devmap(*pud))
@@ -1386,7 +1386,7 @@ out_unlock:
  */
 static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
 {
-       return pmd_access_permitted(pmd, WRITE) ||
+       return pmd_write(pmd) ||
               ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
 }
 
index cfaba62877029a4173c3e04a8abc12d490ae7869..ca5674cbaff2b65c4e51086e5922fbbd274f2cfa 100644 (file)
@@ -3949,7 +3949,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
        if (unlikely(!pte_same(*vmf->pte, entry)))
                goto unlock;
        if (vmf->flags & FAULT_FLAG_WRITE) {
-               if (!pte_access_permitted(entry, WRITE))
+               if (!pte_write(entry))
                        return do_wp_page(vmf);
                entry = pte_mkdirty(entry);
        }
@@ -4014,7 +4014,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 
                        /* NUMA case for anonymous PUDs would go here */
 
-                       if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
+                       if (dirty && !pud_write(orig_pud)) {
                                ret = wp_huge_pud(&vmf, orig_pud);
                                if (!(ret & VM_FAULT_FALLBACK))
                                        return ret;
@@ -4047,7 +4047,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
                        if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
                                return do_huge_pmd_numa_page(&vmf, orig_pmd);
 
-                       if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
+                       if (dirty && !pmd_write(orig_pmd)) {
                                ret = wp_huge_pmd(&vmf, orig_pmd);
                                if (!(ret & VM_FAULT_FALLBACK))
                                        return ret;
@@ -4337,7 +4337,7 @@ int follow_phys(struct vm_area_struct *vma,
                goto out;
        pte = *ptep;
 
-       if (!pte_access_permitted(pte, flags & FOLL_WRITE))
+       if ((flags & FOLL_WRITE) && !pte_write(pte))
                goto unlock;
 
        *prot = pgprot_val(pte_pgprot(pte));