vfs_io_uring: retry after a short writes in vfs_io_uring_pwrite_completion()
[vlendec/samba-autobuild/.git] / source3 / modules / vfs_io_uring.c
index f94453d99956e2b2fc86a716008b2234b8bfca4d..4625e16c37eece723f0a16cd0cd24100b544bbde 100644 (file)
@@ -26,6 +26,7 @@
 #include "smbd/globals.h"
 #include "lib/util/tevent_unix.h"
 #include "lib/util/sys_rw.h"
+#include "lib/util/iov_buf.h"
 #include "smbprofile.h"
 #include <liburing.h>
 
@@ -371,6 +372,17 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
        config->busy = false;
 }
 
+static void vfs_io_uring_request_submit(struct vfs_io_uring_request *cur)
+{
+       struct vfs_io_uring_config *config = cur->config;
+
+       io_uring_sqe_set_data(&cur->sqe, cur);
+       DLIST_ADD_END(config->queue, cur);
+       cur->list_head = &config->queue;
+
+       vfs_io_uring_queue_run(config);
+}
+
 static void vfs_io_uring_fd_handler(struct tevent_context *ev,
                                    struct tevent_fd *fde,
                                    uint16_t flags,
@@ -388,10 +400,13 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev,
 
 struct vfs_io_uring_pread_state {
        struct vfs_io_uring_request ur;
+       struct files_struct *fsp;
+       off_t offset;
        struct iovec iov;
        size_t nread;
 };
 
+static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state);
 static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
                                          const char *location);
 
@@ -430,17 +445,11 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
                return tevent_req_post(req, ev);
        }
 
+       state->fsp = fsp;
+       state->offset = offset;
        state->iov.iov_base = (void *)data;
        state->iov.iov_len = n;
-       io_uring_prep_readv(&state->ur.sqe,
-                           fsp->fh->fd,
-                           &state->iov, 1,
-                           offset);
-       io_uring_sqe_set_data(&state->ur.sqe, &state->ur);
-       DLIST_ADD_END(config->queue, &state->ur);
-       state->ur.list_head = &config->queue;
-
-       vfs_io_uring_queue_run(config);
+       vfs_io_uring_pread_submit(state);
 
        if (!tevent_req_is_in_progress(req)) {
                return tevent_req_post(req, ev);
@@ -450,11 +459,23 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
        return req;
 }
 
+static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state)
+{
+       io_uring_prep_readv(&state->ur.sqe,
+                           state->fsp->fh->fd,
+                           &state->iov, 1,
+                           state->offset);
+       vfs_io_uring_request_submit(&state->ur);
+}
+
 static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
                                          const char *location)
 {
        struct vfs_io_uring_pread_state *state = tevent_req_data(
                cur->req, struct vfs_io_uring_pread_state);
+       struct iovec *iov = &state->iov;
+       int num_iov = 1;
+       bool ok;
 
        /*
         * We rely on being inside the _send() function
@@ -468,8 +489,38 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
                return;
        }
 
-       state->nread = state->ur.cqe.res;
-       tevent_req_done(cur->req);
+       if (cur->cqe.res == 0) {
+               /*
+                * We reached EOF, we're done
+                */
+               tevent_req_done(cur->req);
+               return;
+       }
+
+       ok = iov_advance(&iov, &num_iov, cur->cqe.res);
+       if (!ok) {
+               /* This is not expected! */
+               DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n",
+                       (int)cur->cqe.res,
+                       (int)state->iov.iov_len);
+               tevent_req_error(cur->req, EIO);
+               return;
+       }
+
+       /* sys_valid_io_range() already checked the boundaries */
+       state->nread += state->ur.cqe.res;
+       if (num_iov == 0) {
+               /* We're done */
+               tevent_req_done(cur->req);
+               return;
+       }
+
+       /*
+        * sys_valid_io_range() already checked the boundaries
+        * now try to get the rest.
+        */
+       state->offset += state->ur.cqe.res;
+       vfs_io_uring_pread_submit(state);
 }
 
 static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
@@ -497,10 +548,13 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
 
 struct vfs_io_uring_pwrite_state {
        struct vfs_io_uring_request ur;
+       struct files_struct *fsp;
+       off_t offset;
        struct iovec iov;
        size_t nwritten;
 };
 
+static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state);
 static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
                                           const char *location);
 
@@ -539,17 +593,11 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
                return tevent_req_post(req, ev);
        }
 
+       state->fsp = fsp;
+       state->offset = offset;
        state->iov.iov_base = discard_const(data);
        state->iov.iov_len = n;
-       io_uring_prep_writev(&state->ur.sqe,
-                            fsp->fh->fd,
-                            &state->iov, 1,
-                            offset);
-       io_uring_sqe_set_data(&state->ur.sqe, &state->ur);
-       DLIST_ADD_END(config->queue, &state->ur);
-       state->ur.list_head = &config->queue;
-
-       vfs_io_uring_queue_run(config);
+       vfs_io_uring_pwrite_submit(state);
 
        if (!tevent_req_is_in_progress(req)) {
                return tevent_req_post(req, ev);
@@ -559,11 +607,23 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
        return req;
 }
 
+static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state)
+{
+       io_uring_prep_writev(&state->ur.sqe,
+                            state->fsp->fh->fd,
+                            &state->iov, 1,
+                            state->offset);
+       vfs_io_uring_request_submit(&state->ur);
+}
+
 static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
                                           const char *location)
 {
        struct vfs_io_uring_pwrite_state *state = tevent_req_data(
                cur->req, struct vfs_io_uring_pwrite_state);
+       struct iovec *iov = &state->iov;
+       int num_iov = 1;
+       bool ok;
 
        /*
         * We rely on being inside the _send() function
@@ -577,8 +637,38 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
                return;
        }
 
-       state->nwritten = state->ur.cqe.res;
-       tevent_req_done(cur->req);
+       if (cur->cqe.res == 0) {
+               /*
+                * Ensure we can never spin.
+                */
+               tevent_req_error(cur->req, ENOSPC);
+               return;
+       }
+
+       ok = iov_advance(&iov, &num_iov, cur->cqe.res);
+       if (!ok) {
+               /* This is not expected! */
+               DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n",
+                       (int)cur->cqe.res,
+                       (int)state->iov.iov_len);
+               tevent_req_error(cur->req, EIO);
+               return;
+       }
+
+       /* sys_valid_io_range() already checked the boundaries */
+       state->nwritten += state->ur.cqe.res;
+       if (num_iov == 0) {
+               /* We're done */
+               tevent_req_done(cur->req);
+               return;
+       }
+
+       /*
+        * sys_valid_io_range() already checked the boundaries
+        * now try to write the rest.
+        */
+       state->offset += state->ur.cqe.res;
+       vfs_io_uring_pwrite_submit(state);
 }
 
 static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req,
@@ -640,11 +730,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand
        io_uring_prep_fsync(&state->ur.sqe,
                            fsp->fh->fd,
                            0); /* fsync_flags */
-       io_uring_sqe_set_data(&state->ur.sqe, &state->ur);
-       DLIST_ADD_END(config->queue, &state->ur);
-       state->ur.list_head = &config->queue;
-
-       vfs_io_uring_queue_run(config);
+       vfs_io_uring_request_submit(&state->ur);
 
        if (!tevent_req_is_in_progress(req)) {
                return tevent_req_post(req, ev);
@@ -669,6 +755,13 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
                return;
        }
 
+       if (cur->cqe.res > 0) {
+               /* This is not expected! */
+               DBG_ERR("got cur->cqe.res=%d\n", (int)cur->cqe.res);
+               tevent_req_error(cur->req, EIO);
+               return;
+       }
+
        tevent_req_done(cur->req);
 }