io_uring: retain iov_iter state over io_read/io_write calls
authorJens Axboe <axboe@kernel.dk>
Thu, 13 Aug 2020 15:47:43 +0000 (09:47 -0600)
committerJens Axboe <axboe@kernel.dk>
Thu, 13 Aug 2020 20:53:34 +0000 (13:53 -0700)
Instead of maintaining (and setting/remembering) iov_iter size and
segment counts, just put the iov_iter in the async part of the IO
structure.

This is mostly a preparation patch for doing appropriate internal retries
for short reads, but it also cleans up the state handling nicely and
simplifies it quite a bit.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c

index 1ec25ee71372915fe868310dbc5806783ce8d342..584f861786718d0f6812118e52dcc0cf11466caa 100644 (file)
@@ -508,9 +508,8 @@ struct io_async_msghdr {
 
 struct io_async_rw {
        struct iovec                    fast_iov[UIO_FASTIOV];
-       struct iovec                    *iov;
-       ssize_t                         nr_segs;
-       ssize_t                         size;
+       const struct iovec              *free_iovec;
+       struct iov_iter                 iter;
        struct wait_page_queue          wpq;
 };
 
@@ -915,8 +914,8 @@ static void io_file_put_work(struct work_struct *work);
 static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
                               struct iovec **iovec, struct iov_iter *iter,
                               bool needs_lock);
-static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
-                            struct iovec *iovec, struct iovec *fast_iov,
+static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
+                            const struct iovec *fast_iov,
                             struct iov_iter *iter);
 
 static struct kmem_cache *req_cachep;
@@ -2299,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
        ret = io_import_iovec(rw, req, &iovec, &iter, false);
        if (ret < 0)
                goto end_req;
-       ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
+       ret = io_setup_async_rw(req, iovec, inline_vecs, &iter);
        if (!ret)
                return true;
        kfree(iovec);
@@ -2820,6 +2819,13 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
        ssize_t ret;
        u8 opcode;
 
+       if (req->io) {
+               struct io_async_rw *iorw = &req->io->rw;
+
+               *iovec = NULL;
+               return iov_iter_count(&iorw->iter);
+       }
+
        opcode = req->opcode;
        if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
                *iovec = NULL;
@@ -2845,14 +2851,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
                return ret < 0 ? ret : sqe_len;
        }
 
-       if (req->io) {
-               struct io_async_rw *iorw = &req->io->rw;
-
-               iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
-               *iovec = NULL;
-               return iorw->size;
-       }
-
        if (req->flags & REQ_F_BUFFER_SELECT) {
                ret = io_iov_buffer_select(req, *iovec, needs_lock);
                if (!ret) {
@@ -2930,21 +2928,29 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
        return ret;
 }
 
-static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
-                         struct iovec *iovec, struct iovec *fast_iov,
-                         struct iov_iter *iter)
+static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
+                         const struct iovec *fast_iov, struct iov_iter *iter)
 {
        struct io_async_rw *rw = &req->io->rw;
 
-       rw->nr_segs = iter->nr_segs;
-       rw->size = io_size;
+       memcpy(&rw->iter, iter, sizeof(*iter));
+       rw->free_iovec = NULL;
+       /* can only be fixed buffers, no need to do anything */
+       if (iter->type == ITER_BVEC)
+               return;
        if (!iovec) {
-               rw->iov = rw->fast_iov;
-               if (rw->iov != fast_iov)
-                       memcpy(rw->iov, fast_iov,
+               unsigned iov_off = 0;
+
+               rw->iter.iov = rw->fast_iov;
+               if (iter->iov != fast_iov) {
+                       iov_off = iter->iov - fast_iov;
+                       rw->iter.iov += iov_off;
+               }
+               if (rw->fast_iov != fast_iov)
+                       memcpy(rw->fast_iov + iov_off, fast_iov + iov_off,
                               sizeof(struct iovec) * iter->nr_segs);
        } else {
-               rw->iov = iovec;
+               rw->free_iovec = iovec;
                req->flags |= REQ_F_NEED_CLEANUP;
        }
 }
@@ -2963,8 +2969,8 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
        return  __io_alloc_async_ctx(req);
 }
 
-static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
-                            struct iovec *iovec, struct iovec *fast_iov,
+static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
+                            const struct iovec *fast_iov,
                             struct iov_iter *iter)
 {
        if (!io_op_defs[req->opcode].async_ctx)
@@ -2973,7 +2979,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
                if (__io_alloc_async_ctx(req))
                        return -ENOMEM;
 
-               io_req_map_rw(req, io_size, iovec, fast_iov, iter);
+               io_req_map_rw(req, iovec, fast_iov, iter);
        }
        return 0;
 }
@@ -2981,18 +2987,19 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
                                   bool force_nonblock)
 {
-       struct io_async_ctx *io = req->io;
-       struct iov_iter iter;
+       struct io_async_rw *iorw = &req->io->rw;
        ssize_t ret;
 
-       io->rw.iov = io->rw.fast_iov;
+       iorw->iter.iov = iorw->fast_iov;
+       /* reset ->io around the iovec import, we don't want to use it */
        req->io = NULL;
-       ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock);
-       req->io = io;
+       ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov,
+                               &iorw->iter, !force_nonblock);
+       req->io = container_of(iorw, struct io_async_ctx, rw);
        if (unlikely(ret < 0))
                return ret;
 
-       io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+       io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter);
        return 0;
 }
 
@@ -3090,7 +3097,8 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
  * succeed, or in rare cases where it fails, we then fall back to using the
  * async worker threads for a blocking retry.
  */
-static bool io_rw_should_retry(struct io_kiocb *req)
+static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
+                              struct iovec *fast_iov, struct iov_iter *iter)
 {
        struct kiocb *kiocb = &req->rw.kiocb;
        int ret;
@@ -3113,8 +3121,11 @@ static bool io_rw_should_retry(struct io_kiocb *req)
         * If request type doesn't require req->io to defer in general,
         * we need to allocate it here
         */
-       if (!req->io && __io_alloc_async_ctx(req))
-               return false;
+       if (!req->io) {
+               if (__io_alloc_async_ctx(req))
+                       return false;
+               io_req_map_rw(req, iovec, fast_iov, iter);
+       }
 
        ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
                                                io_async_buf_func, req);
@@ -3141,12 +3152,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 {
        struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
        struct kiocb *kiocb = &req->rw.kiocb;
-       struct iov_iter iter;
+       struct iov_iter __iter, *iter = &__iter;
        size_t iov_count;
-       ssize_t io_size, ret, ret2;
-       unsigned long nr_segs;
+       ssize_t io_size, ret, ret2 = 0;
+
+       if (req->io)
+               iter = &req->io->rw.iter;
 
-       ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock);
+       ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
        if (ret < 0)
                return ret;
        io_size = ret;
@@ -3160,30 +3173,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
        if (force_nonblock && !io_file_supports_async(req->file, READ))
                goto copy_iov;
 
-       iov_count = iov_iter_count(&iter);
-       nr_segs = iter.nr_segs;
+       iov_count = iov_iter_count(iter);
        ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
        if (unlikely(ret))
                goto out_free;
 
-       ret2 = io_iter_do_read(req, &iter);
+       ret2 = io_iter_do_read(req, iter);
 
        /* Catch -EAGAIN return for forced non-blocking submission */
        if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
                kiocb_done(kiocb, ret2, cs);
        } else {
-               iter.count = iov_count;
-               iter.nr_segs = nr_segs;
 copy_iov:
-               ret = io_setup_async_rw(req, io_size, iovec, inline_vecs,
-                                       &iter);
+               ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
                if (ret)
                        goto out_free;
                /* it's copied and will be cleaned with ->io */
                iovec = NULL;
                /* if we can retry, do so with the callbacks armed */
-               if (io_rw_should_retry(req)) {
-                       ret2 = io_iter_do_read(req, &iter);
+               if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
+                       ret2 = io_iter_do_read(req, iter);
                        if (ret2 == -EIOCBQUEUED) {
                                goto out_free;
                        } else if (ret2 != -EAGAIN) {
@@ -3223,12 +3232,14 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 {
        struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
        struct kiocb *kiocb = &req->rw.kiocb;
-       struct iov_iter iter;
+       struct iov_iter __iter, *iter = &__iter;
        size_t iov_count;
        ssize_t ret, ret2, io_size;
-       unsigned long nr_segs;
 
-       ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock);
+       if (req->io)
+               iter = &req->io->rw.iter;
+
+       ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
        if (ret < 0)
                return ret;
        io_size = ret;
@@ -3247,8 +3258,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
            (req->flags & REQ_F_ISREG))
                goto copy_iov;
 
-       iov_count = iov_iter_count(&iter);
-       nr_segs = iter.nr_segs;
+       iov_count = iov_iter_count(iter);
        ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
        if (unlikely(ret))
                goto out_free;
@@ -3269,9 +3279,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
        kiocb->ki_flags |= IOCB_WRITE;
 
        if (req->file->f_op->write_iter)
-               ret2 = call_write_iter(req->file, kiocb, &iter);
+               ret2 = call_write_iter(req->file, kiocb, iter);
        else if (req->file->f_op->write)
-               ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
+               ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter);
        else
                ret2 = -EINVAL;
 
@@ -3284,16 +3294,10 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
        if (!force_nonblock || ret2 != -EAGAIN) {
                kiocb_done(kiocb, ret2, cs);
        } else {
-               iter.count = iov_count;
-               iter.nr_segs = nr_segs;
 copy_iov:
-               ret = io_setup_async_rw(req, io_size, iovec, inline_vecs,
-                                       &iter);
-               if (ret)
-                       goto out_free;
-               /* it's copied and will be cleaned with ->io */
-               iovec = NULL;
-               return -EAGAIN;
+               ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
+               if (!ret)
+                       return -EAGAIN;
        }
 out_free:
        if (iovec)
@@ -5583,8 +5587,8 @@ static void __io_clean_op(struct io_kiocb *req)
                case IORING_OP_WRITEV:
                case IORING_OP_WRITE_FIXED:
                case IORING_OP_WRITE:
-                       if (io->rw.iov != io->rw.fast_iov)
-                               kfree(io->rw.iov);
+                       if (io->rw.free_iovec)
+                               kfree(io->rw.free_iovec);
                        break;
                case IORING_OP_RECVMSG:
                case IORING_OP_SENDMSG: