Merge tag 'linux-kselftest-4.14-rc1-update' of git://git.kernel.org/pub/scm/linux...
[sfrench/cifs-2.6.git] / mm / memory.c
index 56e48e4593cb76b1f89be8d2afd72762ebbfff4d..13ee83b4387872b325414bf46b8f44710c2230f2 100644 (file)
@@ -1513,8 +1513,20 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
        tlb_gather_mmu(&tlb, mm, start, end);
        update_hiwater_rss(mm);
        mmu_notifier_invalidate_range_start(mm, start, end);
-       for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
+       for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
                unmap_single_vma(&tlb, vma, start, end, NULL);
+
+               /*
+                * zap_page_range does not specify whether mmap_sem should be
+                * held for read or write. That allows parallel zap_page_range
+                * operations to unmap a PTE and defer a flush meaning that
+                * this call observes pte_none and fails to flush the TLB.
+                * Rather than adding a complex API, ensure that no stale
+                * TLB entries exist when this call returns.
+                */
+               flush_tlb_range(vma, start, end);
+       }
+
        mmu_notifier_invalidate_range_end(mm, start, end);
        tlb_finish_mmu(&tlb, start, end);
 }
@@ -1676,7 +1688,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
 EXPORT_SYMBOL(vm_insert_page);
 
 static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-                       pfn_t pfn, pgprot_t prot)
+                       pfn_t pfn, pgprot_t prot, bool mkwrite)
 {
        struct mm_struct *mm = vma->vm_mm;
        int retval;
@@ -1688,14 +1700,35 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
        if (!pte)
                goto out;
        retval = -EBUSY;
-       if (!pte_none(*pte))
-               goto out_unlock;
+       if (!pte_none(*pte)) {
+               if (mkwrite) {
+                       /*
+                        * For read faults on private mappings the PFN passed
+                        * in may not match the PFN we have mapped if the
+                        * mapped PFN is a writeable COW page.  In the mkwrite
+                        * case we are creating a writable PTE for a shared
+                        * mapping and we expect the PFNs to match.
+                        */
+                       if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn)))
+                               goto out_unlock;
+                       entry = *pte;
+                       goto out_mkwrite;
+               } else
+                       goto out_unlock;
+       }
 
        /* Ok, finally just insert the thing.. */
        if (pfn_t_devmap(pfn))
                entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
        else
                entry = pte_mkspecial(pfn_t_pte(pfn, prot));
+
+out_mkwrite:
+       if (mkwrite) {
+               entry = pte_mkyoung(entry);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+       }
+
        set_pte_at(mm, addr, pte, entry);
        update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
 
@@ -1766,14 +1799,15 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 
        track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
 
-       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
+       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
+                       false);
 
        return ret;
 }
 EXPORT_SYMBOL(vm_insert_pfn_prot);
 
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-                       pfn_t pfn)
+static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       pfn_t pfn, bool mkwrite)
 {
        pgprot_t pgprot = vma->vm_page_prot;
 
@@ -1802,10 +1836,24 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                page = pfn_to_page(pfn_t_to_pfn(pfn));
                return insert_page(vma, addr, page, pgprot);
        }
-       return insert_pfn(vma, addr, pfn, pgprot);
+       return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
+}
+
+int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       pfn_t pfn)
+{
+       return __vm_insert_mixed(vma, addr, pfn, false);
+
 }
 EXPORT_SYMBOL(vm_insert_mixed);
 
+int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr,
+                       pfn_t pfn)
+{
+       return __vm_insert_mixed(vma, addr, pfn, true);
+}
+EXPORT_SYMBOL(vm_insert_mixed_mkwrite);
+
 /*
  * maps a range of physical memory into the requested pages. the old
  * mappings are removed. any references to nonexistent pages results
@@ -2571,7 +2619,7 @@ static int do_wp_page(struct vm_fault *vmf)
         * not dirty accountable.
         */
        if (PageAnon(vmf->page) && !PageKsm(vmf->page)) {
-               int total_mapcount;
+               int total_map_swapcount;
                if (!trylock_page(vmf->page)) {
                        get_page(vmf->page);
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2586,8 +2634,8 @@ static int do_wp_page(struct vm_fault *vmf)
                        }
                        put_page(vmf->page);
                }
-               if (reuse_swap_page(vmf->page, &total_mapcount)) {
-                       if (total_mapcount == 1) {
+               if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
+                       if (total_map_swapcount == 1) {
                                /*
                                 * The page is all ours. Move it to
                                 * our anon_vma so the rmap code will
@@ -2704,16 +2752,23 @@ EXPORT_SYMBOL(unmap_mapping_range);
 int do_swap_page(struct vm_fault *vmf)
 {
        struct vm_area_struct *vma = vmf->vma;
-       struct page *page, *swapcache;
+       struct page *page = NULL, *swapcache;
        struct mem_cgroup *memcg;
+       struct vma_swap_readahead swap_ra;
        swp_entry_t entry;
        pte_t pte;
        int locked;
        int exclusive = 0;
        int ret = 0;
+       bool vma_readahead = swap_use_vma_readahead();
 
-       if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
+       if (vma_readahead)
+               page = swap_readahead_detect(vmf, &swap_ra);
+       if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
+               if (page)
+                       put_page(page);
                goto out;
+       }
 
        entry = pte_to_swp_entry(vmf->orig_pte);
        if (unlikely(non_swap_entry(entry))) {
@@ -2729,10 +2784,16 @@ int do_swap_page(struct vm_fault *vmf)
                goto out;
        }
        delayacct_set_flag(DELAYACCT_PF_SWAPIN);
-       page = lookup_swap_cache(entry);
+       if (!page)
+               page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
+                                        vmf->address);
        if (!page) {
-               page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma,
-                                       vmf->address);
+               if (vma_readahead)
+                       page = do_swap_page_readahead(entry,
+                               GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
+               else
+                       page = swapin_readahead(entry,
+                               GFP_HIGHUSER_MOVABLE, vma, vmf->address);
                if (!page) {
                        /*
                         * Back out if somebody else faulted in this pte
@@ -4356,19 +4417,53 @@ static void clear_gigantic_page(struct page *page,
        }
 }
 void clear_huge_page(struct page *page,
-                    unsigned long addr, unsigned int pages_per_huge_page)
+                    unsigned long addr_hint, unsigned int pages_per_huge_page)
 {
-       int i;
+       int i, n, base, l;
+       unsigned long addr = addr_hint &
+               ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
 
        if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
                clear_gigantic_page(page, addr, pages_per_huge_page);
                return;
        }
 
+       /* Clear sub-page to access last to keep its cache lines hot */
        might_sleep();
-       for (i = 0; i < pages_per_huge_page; i++) {
+       n = (addr_hint - addr) / PAGE_SIZE;
+       if (2 * n <= pages_per_huge_page) {
+               /* If sub-page to access in first half of huge page */
+               base = 0;
+               l = n;
+               /* Clear sub-pages at the end of huge page */
+               for (i = pages_per_huge_page - 1; i >= 2 * n; i--) {
+                       cond_resched();
+                       clear_user_highpage(page + i, addr + i * PAGE_SIZE);
+               }
+       } else {
+               /* If sub-page to access in second half of huge page */
+               base = pages_per_huge_page - 2 * (pages_per_huge_page - n);
+               l = pages_per_huge_page - n;
+               /* Clear sub-pages at the begin of huge page */
+               for (i = 0; i < base; i++) {
+                       cond_resched();
+                       clear_user_highpage(page + i, addr + i * PAGE_SIZE);
+               }
+       }
+       /*
+        * Clear remaining sub-pages in left-right-left-right pattern
+        * towards the sub-page to access
+        */
+       for (i = 0; i < l; i++) {
+               int left_idx = base + i;
+               int right_idx = base + 2 * l - 1 - i;
+
+               cond_resched();
+               clear_user_highpage(page + left_idx,
+                                   addr + left_idx * PAGE_SIZE);
                cond_resched();
-               clear_user_highpage(page + i, addr + i * PAGE_SIZE);
+               clear_user_highpage(page + right_idx,
+                                   addr + right_idx * PAGE_SIZE);
        }
 }