fuse: add max_pages to init_out
authorConstantine Shulyupin <const@MakeLinux.com>
Thu, 6 Sep 2018 12:37:06 +0000 (15:37 +0300)
committerMiklos Szeredi <mszeredi@redhat.com>
Mon, 1 Oct 2018 08:07:06 +0000 (10:07 +0200)
Replace FUSE_MAX_PAGES_PER_REQ with the configurable parameter max_pages to
improve performance.

Old RFC with detailed description of the problem and many fixes by Mitsuo
Hayasaka (mitsuo.hayasaka.hu@hitachi.com):
 - https://lkml.org/lkml/2012/7/5/136

We've encountered performance degradation and fixed it on a big and complex
virtual environment.

Environment to reproduce degradation and improvement:

1. Add lag to user mode FUSE
Add nanosleep(&(struct timespec){ 0, 1000 }, NULL); to xmp_write_buf in
passthrough_fh.c

2. patch UM fuse with configurable max_pages parameter. The patch will be
provided latter.

3. run test script and perform test on tmpfs
fuse_test()
{

       cd /tmp
       mkdir -p fusemnt
       passthrough_fh -o max_pages=$1 /tmp/fusemnt
       grep fuse /proc/self/mounts
       dd conv=fdatasync oflag=dsync if=/dev/zero of=fusemnt/tmp/tmp \
count=1K bs=1M 2>&1 | grep -v records
       rm fusemnt/tmp/tmp
       killall passthrough_fh
}

Test results:

passthrough_fh /tmp/fusemnt fuse.passthrough_fh \
rw,nosuid,nodev,relatime,user_id=0,group_id=0 0 0
1073741824 bytes (1.1 GB) copied, 1.73867 s, 618 MB/s

passthrough_fh /tmp/fusemnt fuse.passthrough_fh \
rw,nosuid,nodev,relatime,user_id=0,group_id=0,max_pages=256 0 0
1073741824 bytes (1.1 GB) copied, 1.15643 s, 928 MB/s

Obviously with bigger lag the difference between 'before' and 'after'
will be more significant.

Mitsuo Hayasaka, in 2012 (https://lkml.org/lkml/2012/7/5/136),
observed improvement from 400-550 to 520-740.

Signed-off-by: Constantine Shulyupin <const@MakeLinux.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
fs/fuse/dev.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
include/uapi/linux/fuse.h

index fefb9dd8a2f479b8f43b7a146e3ebb8f1cb07cb8..69d4df78a417c92337e141a115b6c3fc85fe64d4 100644 (file)
@@ -61,6 +61,7 @@ static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
                struct page **pages = NULL;
                struct fuse_page_desc *page_descs = NULL;
 
+               WARN_ON(npages > FUSE_MAX_MAX_PAGES);
                if (npages > FUSE_REQ_INLINE_PAGES) {
                        pages = kzalloc(npages * (sizeof(*pages) +
                                                  sizeof(*page_descs)), flags);
@@ -1674,7 +1675,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        unsigned int num;
        unsigned int offset;
        size_t total_len = 0;
-       int num_pages;
+       unsigned int num_pages;
 
        offset = outarg->offset & ~PAGE_MASK;
        file_size = i_size_read(inode);
@@ -1686,7 +1687,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                num = file_size - outarg->offset;
 
        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
+       num_pages = min(num_pages, fc->max_pages);
 
        req = fuse_get_req(fc, num_pages);
        if (IS_ERR(req))
index b10d14baeb1f0711ee4005e1321f411817556af0..035843b501fe5b19bdd682ae6950d3bb30bc7144 100644 (file)
@@ -850,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page)
        fuse_wait_on_page_writeback(inode, page->index);
 
        if (req->num_pages &&
-           (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+           (req->num_pages == fc->max_pages ||
             (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
             req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-               int nr_alloc = min_t(unsigned, data->nr_pages,
-                                    FUSE_MAX_PAGES_PER_REQ);
+               unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
+                                             fc->max_pages);
                fuse_send_readpages(req, data->file);
                if (fc->async_read)
                        req = fuse_get_req_for_background(fc, nr_alloc);
@@ -889,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_fill_data data;
        int err;
-       int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
+       unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
 
        err = -EIO;
        if (is_bad_inode(inode))
@@ -1104,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
        return count > 0 ? count : err;
 }
 
-static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
+static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
+                                    unsigned int max_pages)
 {
-       return min_t(unsigned,
+       return min_t(unsigned int,
                     ((pos + len - 1) >> PAGE_SHIFT) -
                     (pos >> PAGE_SHIFT) + 1,
-                    FUSE_MAX_PAGES_PER_REQ);
+                    max_pages);
 }
 
 static ssize_t fuse_perform_write(struct kiocb *iocb,
@@ -1131,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
        do {
                struct fuse_req *req;
                ssize_t count;
-               unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
+               unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
+                                                     fc->max_pages);
 
                req = fuse_get_req(fc, nr_pages);
                if (IS_ERR(req)) {
@@ -1321,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
        return ret < 0 ? ret : 0;
 }
 
-static inline int fuse_iter_npages(const struct iov_iter *ii_p)
-{
-       return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
-}
-
 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
                       loff_t *ppos, int flags)
 {
@@ -1345,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        int err = 0;
 
        if (io->async)
-               req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
+               req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
+                                                               fc->max_pages));
        else
-               req = fuse_get_req(fc, fuse_iter_npages(iter));
+               req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -1392,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
                        fuse_put_request(fc, req);
                        if (io->async)
                                req = fuse_get_req_for_background(fc,
-                                       fuse_iter_npages(iter));
+                                       iov_iter_npages(iter, fc->max_pages));
                        else
-                               req = fuse_get_req(fc, fuse_iter_npages(iter));
+                               req = fuse_get_req(fc, iov_iter_npages(iter,
+                                                               fc->max_pages));
                        if (IS_ERR(req))
                                break;
                }
@@ -1823,7 +1822,7 @@ static int fuse_writepages_fill(struct page *page,
        is_writeback = fuse_page_is_writeback(inode, page->index);
 
        if (req && req->num_pages &&
-           (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+           (is_writeback || req->num_pages == fc->max_pages ||
             (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
             data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
                fuse_writepages_send(data);
@@ -1851,7 +1850,7 @@ static int fuse_writepages_fill(struct page *page,
                struct fuse_inode *fi = get_fuse_inode(inode);
 
                err = -ENOMEM;
-               req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+               req = fuse_request_alloc_nofs(fc->max_pages);
                if (!req) {
                        __free_page(tmp_page);
                        goto out_unlock;
@@ -1908,6 +1907,7 @@ static int fuse_writepages(struct address_space *mapping,
                           struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
+       struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_fill_wb_data data;
        int err;
 
@@ -1920,7 +1920,7 @@ static int fuse_writepages(struct address_space *mapping,
        data.ff = NULL;
 
        err = -ENOMEM;
-       data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+       data.orig_pages = kcalloc(fc->max_pages,
                                  sizeof(struct page *),
                                  GFP_NOFS);
        if (!data.orig_pages)
@@ -2391,10 +2391,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
 }
 
 /* Make sure iov_length() won't overflow */
-static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
+                                size_t count)
 {
        size_t n;
-       u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+       u32 max = fc->max_pages << PAGE_SHIFT;
 
        for (n = 0; n < count; n++, iov++) {
                if (iov->iov_len > (size_t) max)
@@ -2518,7 +2519,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
        err = -ENOMEM;
-       pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
+       pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
        iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
        if (!pages || !iov_page)
                goto out;
@@ -2557,7 +2558,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 
        /* make sure there are enough buffer pages and init request with them */
        err = -ENOMEM;
-       if (max_pages > FUSE_MAX_PAGES_PER_REQ)
+       if (max_pages > fc->max_pages)
                goto out;
        while (num_pages < max_pages) {
                pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2644,11 +2645,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                in_iov = iov_page;
                out_iov = in_iov + in_iovs;
 
-               err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+               err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
                if (err)
                        goto out;
 
-               err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+               err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
                if (err)
                        goto out;
 
@@ -2839,9 +2840,9 @@ static void fuse_do_truncate(struct file *file)
        fuse_do_setattr(file_dentry(file), &attr, file);
 }
 
-static inline loff_t fuse_round_up(loff_t off)
+static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
 {
-       return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+       return round_up(off, fc->max_pages << PAGE_SHIFT);
 }
 
 static ssize_t
@@ -2870,7 +2871,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
                if (offset >= i_size)
                        return 0;
-               iov_iter_truncate(iter, fuse_round_up(i_size - offset));
+               iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
                count = iov_iter_count(iter);
        }
 
index f5bdce84e76673a20d21a32be0efce8ffaa92587..3d578745c852497eca2846910b975fa2e9acce22 100644 (file)
 #include <linux/refcount.h>
 #include <linux/user_namespace.h>
 
-/** Max number of pages that can be used in a single read request */
-#define FUSE_MAX_PAGES_PER_REQ 32
+/** Default max number of pages that can be used in a single read request */
+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
+
+/** Maximum of max_pages received in init_out */
+#define FUSE_MAX_MAX_PAGES 256
 
 /** Bias for fi->writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
@@ -525,6 +528,9 @@ struct fuse_conn {
        /** Maximum write size */
        unsigned max_write;
 
+       /** Maxmum number of pages that can be used in a single request */
+       unsigned int max_pages;
+
        /** Input queue */
        struct fuse_iqueue iq;
 
index 82db1ab53420955c146955482fac3d5b58ceb1b2..8cebf4d5f51b3cc2af62716a36be4a00b52d2ce2 100644 (file)
@@ -928,6 +928,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                        }
                        if (arg->flags & FUSE_ABORT_ERROR)
                                fc->abort_err = 1;
+                       if (arg->flags & FUSE_MAX_PAGES) {
+                               fc->max_pages =
+                                       min_t(unsigned int, FUSE_MAX_MAX_PAGES,
+                                       max_t(unsigned int, arg->max_pages, 1));
+                       }
                } else {
                        ra_pages = fc->max_read / PAGE_SIZE;
                        fc->no_lock = 1;
@@ -959,7 +964,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
                FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
                FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
                FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
-               FUSE_ABORT_ERROR;
+               FUSE_ABORT_ERROR | FUSE_MAX_PAGES;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
@@ -1152,6 +1157,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        fc->user_id = d.user_id;
        fc->group_id = d.group_id;
        fc->max_read = max_t(unsigned, 4096, d.max_read);
+       fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
 
        /* Used by get_root_inode() */
        sb->s_fs_info = fc;
index 31a504f1ee605255117d0871a0e04fa638fae160..76f46f159992c9fb9133bd4fd9ca1ecf2c8179f2 100644 (file)
  *  7.28
  *  - add FUSE_COPY_FILE_RANGE
  *  - add FOPEN_CACHE_DIR
+ *  - add FUSE_MAX_PAGES, add max_pages to init_out
  */
 
 #ifndef _LINUX_FUSE_H
@@ -255,6 +256,7 @@ struct fuse_file_lock {
  * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
  * FUSE_POSIX_ACL: filesystem supports posix acls
  * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -278,6 +280,7 @@ struct fuse_file_lock {
 #define FUSE_HANDLE_KILLPRIV   (1 << 19)
 #define FUSE_POSIX_ACL         (1 << 20)
 #define FUSE_ABORT_ERROR       (1 << 21)
+#define FUSE_MAX_PAGES         (1 << 22)
 
 /**
  * CUSE INIT request/reply flags
@@ -617,7 +620,9 @@ struct fuse_init_out {
        uint16_t        congestion_threshold;
        uint32_t        max_write;
        uint32_t        time_gran;
-       uint32_t        unused[9];
+       uint16_t        max_pages;
+       uint16_t        padding;
+       uint32_t        unused[8];
 };
 
 #define CUSE_INIT_INFO_MAX 4096