afs: Set up the iov_iter before calling afs_extract_data()
[sfrench/cifs-2.6.git] / fs / afs / dir.c
index d8825ce63eba3880094556332ff02ac2ad795248..8c093bfff8b6a89072aae714f8e2d77dcb47d690 100644 (file)
@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
        struct afs_fid          fids[50];
 };
 
+/*
+ * Drop the refs that we're holding on the pages we were reading into.  We've
+ * got refs on the first nr_pages pages.
+ */
+static void afs_dir_read_cleanup(struct afs_read *req)
+{
+       struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
+       struct page *page;
+       pgoff_t last = req->nr_pages - 1;
+
+       XA_STATE(xas, &mapping->i_pages, 0);
+
+       if (unlikely(!req->nr_pages))
+               return;
+
+       rcu_read_lock();
+       xas_for_each(&xas, page, last) {
+               if (xas_retry(&xas, page))
+                       continue;
+               BUG_ON(xa_is_value(page));
+               BUG_ON(PageCompound(page));
+               ASSERTCMP(page->mapping, ==, mapping);
+
+               put_page(page);
+       }
+
+       rcu_read_unlock();
+}
+
 /*
  * check that a directory page is valid
  */
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
        qty /= sizeof(union afs_xdr_dir_block);
 
        /* check them */
-       dbuf = kmap(page);
+       dbuf = kmap_atomic(page);
        for (tmp = 0; tmp < qty; tmp++) {
                if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
                        printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
                ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
        }
 
-       kunmap(page);
+       kunmap_atomic(dbuf);
 
 checked:
        afs_stat_v(dvnode, n_read_dir);
@@ -157,35 +186,74 @@ error:
 }
 
 /*
- * Check the contents of a directory that we've just read.
+ * Dump the contents of a directory.
  */
-static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
+static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
 {
        struct afs_xdr_dir_page *dbuf;
-       unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+       struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+       struct page *page;
+       unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+       pgoff_t last = req->nr_pages - 1;
 
-       for (i = 0; i < req->nr_pages; i++)
-               if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
-                       goto bad;
-       return true;
+       XA_STATE(xas, &mapping->i_pages, 0);
 
-bad:
-       pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
+       pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
                dvnode->fid.vid, dvnode->fid.vnode,
-               req->file_size, req->len, req->actual_len, req->remain);
-       pr_warn("DIR %llx %x %x %x\n",
-               req->pos, req->index, req->nr_pages, req->offset);
+               req->file_size, req->len, req->actual_len);
+       pr_warn("DIR %llx %x %zx %zx\n",
+               req->pos, req->nr_pages,
+               req->iter->iov_offset,  iov_iter_count(req->iter));
 
-       for (i = 0; i < req->nr_pages; i++) {
-               dbuf = kmap(req->pages[i]);
-               for (j = 0; j < qty; j++) {
-                       union afs_xdr_dir_block *block = &dbuf->blocks[j];
+       xas_for_each(&xas, page, last) {
+               if (xas_retry(&xas, page))
+                       continue;
+
+               BUG_ON(PageCompound(page));
+               BUG_ON(page->mapping != mapping);
+
+               dbuf = kmap_atomic(page);
+               for (i = 0; i < qty; i++) {
+                       union afs_xdr_dir_block *block = &dbuf->blocks[i];
 
-                       pr_warn("[%02x] %32phN\n", i * qty + j, block);
+                       pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
                }
-               kunmap(req->pages[i]);
+               kunmap_atomic(dbuf);
        }
-       return false;
+}
+
+/*
+ * Check all the pages in a directory.  All the pages are held pinned.
+ */
+static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+{
+       struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+       struct page *page;
+       pgoff_t last = req->nr_pages - 1;
+       int ret = 0;
+
+       XA_STATE(xas, &mapping->i_pages, 0);
+
+       if (unlikely(!req->nr_pages))
+               return 0;
+
+       rcu_read_lock();
+       xas_for_each(&xas, page, last) {
+               if (xas_retry(&xas, page))
+                       continue;
+
+               BUG_ON(PageCompound(page));
+               BUG_ON(page->mapping != mapping);
+
+               if (!afs_dir_check_page(dvnode, page, req->file_size)) {
+                       afs_dir_dump(dvnode, req);
+                       ret = -EIO;
+                       break;
+               }
+       }
+
+       rcu_read_unlock();
+       return ret;
 }
 
 /*
@@ -214,58 +282,57 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 {
        struct afs_read *req;
        loff_t i_size;
-       int nr_pages, nr_inline, i, n;
-       int ret = -ENOMEM;
+       int nr_pages, i, n;
+       int ret;
+
+       _enter("");
 
-retry:
+       req = kzalloc(sizeof(*req), GFP_KERNEL);
+       if (!req)
+               return ERR_PTR(-ENOMEM);
+
+       refcount_set(&req->usage, 1);
+       req->vnode = dvnode;
+       req->key = key_get(key);
+       req->cleanup = afs_dir_read_cleanup;
+
+expand:
        i_size = i_size_read(&dvnode->vfs_inode);
-       if (i_size < 2048)
-               return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
+       if (i_size < 2048) {
+               ret = afs_bad(dvnode, afs_file_error_dir_small);
+               goto error;
+       }
        if (i_size > 2048 * 1024) {
                trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
-               return ERR_PTR(-EFBIG);
+               ret = -EFBIG;
+               goto error;
        }
 
        _enter("%llu", i_size);
 
-       /* Get a request record to hold the page list.  We want to hold it
-        * inline if we can, but we don't want to make an order 1 allocation.
-        */
        nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
-       nr_inline = nr_pages;
-       if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
-               nr_inline = 0;
 
-       req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
-       if (!req)
-               return ERR_PTR(-ENOMEM);
-
-       refcount_set(&req->usage, 1);
-       req->key = key_get(key);
-       req->nr_pages = nr_pages;
        req->actual_len = i_size; /* May change */
        req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
        req->data_version = dvnode->status.data_version; /* May change */
-       if (nr_inline > 0) {
-               req->pages = req->array;
-       } else {
-               req->pages = kcalloc(nr_pages, sizeof(struct page *),
-                                    GFP_KERNEL);
-               if (!req->pages)
-                       goto error;
-       }
+       iov_iter_xarray(&req->def_iter, READ, &dvnode->vfs_inode.i_mapping->i_pages,
+                       0, i_size);
+       req->iter = &req->def_iter;
 
-       /* Get a list of all the pages that hold or will hold the directory
-        * content.  We need to fill in any gaps that we might find where the
-        * memory reclaimer has been at work.  If there are any gaps, we will
+       /* Fill in any gaps that we might find where the memory reclaimer has
+        * been at work and pin all the pages.  If there are any gaps, we will
         * need to reread the entire directory contents.
         */
-       i = 0;
-       do {
+       i = req->nr_pages;
+       while (i < nr_pages) {
+               struct page *pages[8], *page;
+
                n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
-                                         req->nr_pages - i,
-                                         req->pages + i);
-               _debug("find %u at %u/%u", n, i, req->nr_pages);
+                                         min_t(unsigned int, nr_pages - i,
+                                               ARRAY_SIZE(pages)),
+                                         pages);
+               _debug("find %u at %u/%u", n, i, nr_pages);
+
                if (n == 0) {
                        gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
 
@@ -273,22 +340,24 @@ retry:
                                afs_stat_v(dvnode, n_inval);
 
                        ret = -ENOMEM;
-                       req->pages[i] = __page_cache_alloc(gfp);
-                       if (!req->pages[i])
+                       page = __page_cache_alloc(gfp);
+                       if (!page)
                                goto error;
-                       ret = add_to_page_cache_lru(req->pages[i],
+                       ret = add_to_page_cache_lru(page,
                                                    dvnode->vfs_inode.i_mapping,
                                                    i, gfp);
                        if (ret < 0)
                                goto error;
 
-                       attach_page_private(req->pages[i], (void *)1);
-                       unlock_page(req->pages[i]);
+                       attach_page_private(page, (void *)1);
+                       unlock_page(page);
+                       req->nr_pages++;
                        i++;
                } else {
+                       req->nr_pages += n;
                        i += n;
                }
-       } while (i < req->nr_pages);
+       }
 
        /* If we're going to reload, we need to lock all the pages to prevent
         * races.
@@ -312,12 +381,17 @@ retry:
 
                task_io_account_read(PAGE_SIZE * req->nr_pages);
 
-               if (req->len < req->file_size)
-                       goto content_has_grown;
+               if (req->len < req->file_size) {
+                       /* The content has grown, so we need to expand the
+                        * buffer.
+                        */
+                       up_write(&dvnode->validate_lock);
+                       goto expand;
+               }
 
                /* Validate the data we just read. */
-               ret = -EIO;
-               if (!afs_dir_check_pages(dvnode, req))
+               ret = afs_dir_check(dvnode, req);
+               if (ret < 0)
                        goto error_unlock;
 
                // TODO: Trim excess pages
@@ -335,11 +409,6 @@ error:
        afs_put_read(req);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
-
-content_has_grown:
-       up_write(&dvnode->validate_lock);
-       afs_put_read(req);
-       goto retry;
 }
 
 /*
@@ -449,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
        struct afs_read *req;
        struct page *page;
        unsigned blkoff, limit;
+       void __rcu **slot;
        int ret;
 
        _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -473,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
                blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
 
                /* Fetch the appropriate page from the directory and re-add it
-                * to the LRU.
+                * to the LRU.  We have all the pages pinned with an extra ref.
                 */
-               page = req->pages[blkoff / PAGE_SIZE];
+               rcu_read_lock();
+               page = NULL;
+               slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
+                                             blkoff / PAGE_SIZE);
+               if (slot)
+                       page = radix_tree_deref_slot(slot);
+               rcu_read_unlock();
                if (!page) {
                        ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
                        break;