Merge tag 'afs-next-20171113' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowel...
[sfrench/cifs-2.6.git] / mm / filemap.c
index 594d73fef8b43bae852f4f7ace1e8cfc46b23690..ee83baaf855d555eac2f06a8adbd333836b3e6f7 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
+#include <linux/shmem_fs.h>
 #include <linux/rmap.h>
 #include "internal.h"
 
@@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
                        *shadowp = p;
        }
        __radix_tree_replace(&mapping->page_tree, node, slot, page,
-                            workingset_update_node, mapping);
+                            workingset_lookup_update(mapping));
        mapping->nrpages++;
        return 0;
 }
@@ -162,9 +163,12 @@ static void page_cache_tree_delete(struct address_space *mapping,
 
                radix_tree_clear_tags(&mapping->page_tree, node, slot);
                __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
-                                    workingset_update_node, mapping);
+                               workingset_lookup_update(mapping));
        }
 
+       page->mapping = NULL;
+       /* Leave page->index set: truncation lookup relies upon it */
+
        if (shadow) {
                mapping->nrexceptional += nr;
                /*
@@ -178,17 +182,11 @@ static void page_cache_tree_delete(struct address_space *mapping,
        mapping->nrpages -= nr;
 }
 
-/*
- * Delete a page from the page cache and free it. Caller has to make
- * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold the mapping's tree_lock.
- */
-void __delete_from_page_cache(struct page *page, void *shadow)
+static void unaccount_page_cache_page(struct address_space *mapping,
+                                     struct page *page)
 {
-       struct address_space *mapping = page->mapping;
-       int nr = hpage_nr_pages(page);
+       int nr;
 
-       trace_mm_filemap_delete_from_page_cache(page);
        /*
         * if we're uptodate, flush out into the cleancache, otherwise
         * invalidate any existing cleancache entries.  We can't leave
@@ -224,15 +222,12 @@ void __delete_from_page_cache(struct page *page, void *shadow)
                }
        }
 
-       page_cache_tree_delete(mapping, page, shadow);
-
-       page->mapping = NULL;
-       /* Leave page->index set: truncation lookup relies upon it */
-
        /* hugetlb pages do not participate in page cache accounting. */
        if (PageHuge(page))
                return;
 
+       nr = hpage_nr_pages(page);
+
        __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
        if (PageSwapBacked(page)) {
                __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
@@ -243,17 +238,51 @@ void __delete_from_page_cache(struct page *page, void *shadow)
        }
 
        /*
-        * At this point page must be either written or cleaned by truncate.
-        * Dirty page here signals a bug and loss of unwritten data.
+        * At this point page must be either written or cleaned by
+        * truncate.  Dirty page here signals a bug and loss of
+        * unwritten data.
         *
-        * This fixes dirty accounting after removing the page entirely but
-        * leaves PageDirty set: it has no effect for truncated page and
-        * anyway will be cleared before returning page into buddy allocator.
+        * This fixes dirty accounting after removing the page entirely
+        * but leaves PageDirty set: it has no effect for truncated
+        * page and anyway will be cleared before returning page into
+        * buddy allocator.
         */
        if (WARN_ON_ONCE(PageDirty(page)))
                account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
 }
 
+/*
+ * Delete a page from the page cache and free it. Caller has to make
+ * sure the page is locked and that nobody else uses it - or that usage
+ * is safe.  The caller must hold the mapping's tree_lock.
+ */
+void __delete_from_page_cache(struct page *page, void *shadow)
+{
+       struct address_space *mapping = page->mapping;
+
+       trace_mm_filemap_delete_from_page_cache(page);
+
+       unaccount_page_cache_page(mapping, page);
+       page_cache_tree_delete(mapping, page, shadow);
+}
+
+static void page_cache_free_page(struct address_space *mapping,
+                               struct page *page)
+{
+       void (*freepage)(struct page *);
+
+       freepage = mapping->a_ops->freepage;
+       if (freepage)
+               freepage(page);
+
+       if (PageTransHuge(page) && !PageHuge(page)) {
+               page_ref_sub(page, HPAGE_PMD_NR);
+               VM_BUG_ON_PAGE(page_count(page) <= 0, page);
+       } else {
+               put_page(page);
+       }
+}
+
 /**
  * delete_from_page_cache - delete page from page cache
  * @page: the page which the kernel is trying to remove from page cache
@@ -266,27 +295,98 @@ void delete_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page_mapping(page);
        unsigned long flags;
-       void (*freepage)(struct page *);
 
        BUG_ON(!PageLocked(page));
-
-       freepage = mapping->a_ops->freepage;
-
        spin_lock_irqsave(&mapping->tree_lock, flags);
        __delete_from_page_cache(page, NULL);
        spin_unlock_irqrestore(&mapping->tree_lock, flags);
 
-       if (freepage)
-               freepage(page);
+       page_cache_free_page(mapping, page);
+}
+EXPORT_SYMBOL(delete_from_page_cache);
 
-       if (PageTransHuge(page) && !PageHuge(page)) {
-               page_ref_sub(page, HPAGE_PMD_NR);
-               VM_BUG_ON_PAGE(page_count(page) <= 0, page);
-       } else {
-               put_page(page);
+/*
+ * page_cache_tree_delete_batch - delete several pages from page cache
+ * @mapping: the mapping to which pages belong
+ * @pvec: pagevec with pages to delete
+ *
+ * The function walks over mapping->page_tree and removes pages passed in @pvec
+ * from the radix tree. The function expects @pvec to be sorted by page index.
+ * It tolerates holes in @pvec (radix tree entries at those indices are not
+ * modified). The function expects only THP head pages to be present in the
+ * @pvec and takes care to delete all corresponding tail pages from the radix
+ * tree as well.
+ *
+ * The function expects mapping->tree_lock to be held.
+ */
+static void
+page_cache_tree_delete_batch(struct address_space *mapping,
+                            struct pagevec *pvec)
+{
+       struct radix_tree_iter iter;
+       void **slot;
+       int total_pages = 0;
+       int i = 0, tail_pages = 0;
+       struct page *page;
+       pgoff_t start;
+
+       start = pvec->pages[0]->index;
+       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+               if (i >= pagevec_count(pvec) && !tail_pages)
+                       break;
+               page = radix_tree_deref_slot_protected(slot,
+                                                      &mapping->tree_lock);
+               if (radix_tree_exceptional_entry(page))
+                       continue;
+               if (!tail_pages) {
+                       /*
+                        * Some page got inserted in our range? Skip it. We
+                        * have our pages locked so they are protected from
+                        * being removed.
+                        */
+                       if (page != pvec->pages[i])
+                               continue;
+                       WARN_ON_ONCE(!PageLocked(page));
+                       if (PageTransHuge(page) && !PageHuge(page))
+                               tail_pages = HPAGE_PMD_NR - 1;
+                       page->mapping = NULL;
+                       /*
+                        * Leave page->index set: truncation lookup relies
+                        * upon it
+                        */
+                       i++;
+               } else {
+                       tail_pages--;
+               }
+               radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
+               __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
+                               workingset_lookup_update(mapping));
+               total_pages++;
        }
+       mapping->nrpages -= total_pages;
+}
+
+void delete_from_page_cache_batch(struct address_space *mapping,
+                                 struct pagevec *pvec)
+{
+       int i;
+       unsigned long flags;
+
+       if (!pagevec_count(pvec))
+               return;
+
+       spin_lock_irqsave(&mapping->tree_lock, flags);
+       for (i = 0; i < pagevec_count(pvec); i++) {
+               trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
+
+               unaccount_page_cache_page(mapping, pvec->pages[i]);
+       }
+       page_cache_tree_delete_batch(mapping, pvec);
+       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+
+       for (i = 0; i < pagevec_count(pvec); i++)
+               page_cache_free_page(mapping, pvec->pages[i]);
 }
-EXPORT_SYMBOL(delete_from_page_cache);
 
 int filemap_check_errors(struct address_space *mapping)
 {
@@ -419,20 +519,18 @@ static void __filemap_fdatawait_range(struct address_space *mapping,
        if (end_byte < start_byte)
                return;
 
-       pagevec_init(&pvec, 0);
-       while ((index <= end) &&
-                       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                       PAGECACHE_TAG_WRITEBACK,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
+       pagevec_init(&pvec);
+       while (index <= end) {
                unsigned i;
 
+               nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
+                               end, PAGECACHE_TAG_WRITEBACK);
+               if (!nr_pages)
+                       break;
+
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
 
-                       /* until radix tree lookup accepts end_index */
-                       if (page->index > end)
-                               continue;
-
                        wait_on_page_writeback(page);
                        ClearPageError(page);
                }
@@ -1041,6 +1139,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
        wait_queue_head_t *q = page_waitqueue(page);
        return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
 }
+EXPORT_SYMBOL(wait_on_page_bit_killable);
 
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -1754,9 +1853,10 @@ repeat:
 EXPORT_SYMBOL(find_get_pages_contig);
 
 /**
- * find_get_pages_tag - find and return pages that match @tag
+ * find_get_pages_range_tag - find and return pages in given range matching @tag
  * @mapping:   the address_space to search
  * @index:     the starting page index
+ * @end:       The final page index (inclusive)
  * @tag:       the tag index
  * @nr_pages:  the maximum number of pages
  * @pages:     where the resulting pages are placed
@@ -1764,8 +1864,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
  * Like find_get_pages, except we only return pages which are tagged with
  * @tag.   We update @index to index the next page for the traversal.
  */
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-                       int tag, unsigned int nr_pages, struct page **pages)
+unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
+                       pgoff_t end, int tag, unsigned int nr_pages,
+                       struct page **pages)
 {
        struct radix_tree_iter iter;
        void **slot;
@@ -1778,6 +1879,9 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
        radix_tree_for_each_tagged(slot, &mapping->page_tree,
                                   &iter, *index, tag) {
                struct page *head, *page;
+
+               if (iter.index > end)
+                       break;
 repeat:
                page = radix_tree_deref_slot(slot);
                if (unlikely(!page))
@@ -1819,18 +1923,28 @@ repeat:
                }
 
                pages[ret] = page;
-               if (++ret == nr_pages)
-                       break;
+               if (++ret == nr_pages) {
+                       *index = pages[ret - 1]->index + 1;
+                       goto out;
+               }
        }
 
+       /*
+        * We come here when we got at @end. We take care to not overflow the
+        * index @index as it confuses some of the callers. This breaks the
+        * iteration when there is page at index -1 but that is already broken
+        * anyway.
+        */
+       if (end == (pgoff_t)-1)
+               *index = (pgoff_t)-1;
+       else
+               *index = end + 1;
+out:
        rcu_read_unlock();
 
-       if (ret)
-               *index = pages[ret - 1]->index + 1;
-
        return ret;
 }
-EXPORT_SYMBOL(find_get_pages_tag);
+EXPORT_SYMBOL(find_get_pages_range_tag);
 
 /**
  * find_get_entries_tag - find and return entries that match @tag
@@ -2159,7 +2273,7 @@ no_cached_page:
                 * Ok, it wasn't cached, so we need to create a new
                 * page..
                 */
-               page = page_cache_alloc_cold(mapping);
+               page = page_cache_alloc(mapping);
                if (!page) {
                        error = -ENOMEM;
                        goto out;
@@ -2271,7 +2385,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
        int ret;
 
        do {
-               page = __page_cache_alloc(gfp_mask|__GFP_COLD);
+               page = __page_cache_alloc(gfp_mask);
                if (!page)
                        return -ENOMEM;
 
@@ -2675,7 +2789,7 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 repeat:
        page = find_get_page(mapping, index);
        if (!page) {
-               page = __page_cache_alloc(gfp | __GFP_COLD);
+               page = __page_cache_alloc(gfp);
                if (!page)
                        return ERR_PTR(-ENOMEM);
                err = add_to_page_cache_lru(page, mapping, index, gfp);