vfs_aio_fork: Drop "volatile" from the mmap area in aio_fork
[nivanova/samba-autobuild/.git] / source3 / modules / vfs_aio_fork.c
index 4f6574bb475e05c83da08a29dda09bf62c0b5fc8..4069d935d2471917fae339c6f3d3765e60b0c5a8 100644 (file)
 #include "smbd/globals.h"
 #include "lib/async_req/async_sock.h"
 #include "lib/util/tevent_unix.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/sys_rw_data.h"
+#include "lib/util/msghdr.h"
+#include "smbprofile.h"
+
+#if !defined(HAVE_STRUCT_MSGHDR_MSG_CONTROL) && !defined(HAVE_STRUCT_MSGHDR_MSG_ACCRIGHTS)
+# error Can not pass file descriptors
+#endif
 
 #undef recvmsg
 
@@ -39,12 +47,12 @@ struct aio_fork_config {
 
 struct mmap_area {
        size_t size;
-       volatile void *ptr;
+       void *ptr;
 };
 
 static int mmap_area_destructor(struct mmap_area *area)
 {
-       munmap((void *)area->ptr, area->size);
+       munmap(discard_const(area->ptr), area->size);
        return 0;
 }
 
@@ -68,13 +76,12 @@ static struct mmap_area *mmap_area_init(TALLOC_CTX *mem_ctx, size_t size)
 
        result->ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
                           MAP_SHARED|MAP_FILE, fd, 0);
+       close(fd);
        if (result->ptr == MAP_FAILED) {
                DEBUG(1, ("mmap failed: %s\n", strerror(errno)));
                goto fail;
        }
 
-       close(fd);
-
        result->size = size;
        talloc_set_destructor(result, mmap_area_destructor);
 
@@ -122,6 +129,7 @@ struct rw_cmd {
 struct rw_ret {
        ssize_t size;
        int ret_errno;
+       uint64_t duration;
 };
 
 struct aio_child_list;
@@ -138,7 +146,7 @@ struct aio_child {
 
 struct aio_child_list {
        struct aio_child *children;
-       struct timed_event *cleanup_event;
+       struct tevent_timer *cleanup_event;
 };
 
 static void free_aio_children(void **p)
@@ -148,116 +156,72 @@ static void free_aio_children(void **p)
 
 static ssize_t read_fd(int fd, void *ptr, size_t nbytes, int *recvfd)
 {
-       struct msghdr msg;
        struct iovec iov[1];
+       struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1 };
        ssize_t n;
-#ifndef HAVE_MSGHDR_MSG_CONTROL
-       int newfd;
-#endif
-
-#ifdef HAVE_MSGHDR_MSG_CONTROL
-       union {
-         struct cmsghdr        cm;
-         char                          control[CMSG_SPACE(sizeof(int))];
-       } control_un;
-       struct cmsghdr  *cmptr;
-
-       msg.msg_control = control_un.control;
-       msg.msg_controllen = sizeof(control_un.control);
-#else
-#if HAVE_MSGHDR_MSG_ACCTRIGHTS
-       msg.msg_accrights = (caddr_t) &newfd;
-       msg.msg_accrightslen = sizeof(int);
-#else
-#error Can not pass file descriptors
-#endif
-#endif
+       size_t bufsize = msghdr_prep_recv_fds(NULL, NULL, 0, 1);
+       uint8_t buf[bufsize];
 
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
-       msg.msg_flags = 0;
+       msghdr_prep_recv_fds(&msg, buf, bufsize, 1);
 
        iov[0].iov_base = (void *)ptr;
        iov[0].iov_len = nbytes;
-       msg.msg_iov = iov;
-       msg.msg_iovlen = 1;
 
-       if ( (n = recvmsg(fd, &msg, 0)) <= 0) {
-               return(n);
+       do {
+               n = recvmsg(fd, &msg, 0);
+       } while ((n == -1) && (errno == EINTR));
+
+       if (n <= 0) {
+               return n;
        }
 
-#ifdef HAVE_MSGHDR_MSG_CONTROL
-       if ((cmptr = CMSG_FIRSTHDR(&msg)) != NULL
-           && cmptr->cmsg_len == CMSG_LEN(sizeof(int))) {
-               if (cmptr->cmsg_level != SOL_SOCKET) {
-                       DEBUG(10, ("control level != SOL_SOCKET"));
-                       errno = EINVAL;
-                       return -1;
-               }
-               if (cmptr->cmsg_type != SCM_RIGHTS) {
-                       DEBUG(10, ("control type != SCM_RIGHTS"));
-                       errno = EINVAL;
-                       return -1;
+       {
+               size_t num_fds = msghdr_extract_fds(&msg, NULL, 0);
+               int fds[num_fds];
+
+               msghdr_extract_fds(&msg, fds, num_fds);
+
+               if (num_fds != 1) {
+                       size_t i;
+
+                       for (i=0; i<num_fds; i++) {
+                               close(fds[i]);
+                       }
+
+                       *recvfd = -1;
+                       return n;
                }
-               memcpy(recvfd, CMSG_DATA(cmptr), sizeof(*recvfd));
-       } else {
-               *recvfd = -1;           /* descriptor was not passed */
-       }
-#else
-       if (msg.msg_accrightslen == sizeof(int)) {
-               *recvfd = newfd;
-       }
-       else {
-               *recvfd = -1;           /* descriptor was not passed */
+
+               *recvfd = fds[0];
        }
-#endif
 
        return(n);
 }
 
 static ssize_t write_fd(int fd, void *ptr, size_t nbytes, int sendfd)
 {
-       struct msghdr   msg;
-       struct iovec    iov[1];
-
-#ifdef HAVE_MSGHDR_MSG_CONTROL
-       union {
-               struct cmsghdr  cm;
-               char control[CMSG_SPACE(sizeof(int))];
-       } control_un;
-       struct cmsghdr  *cmptr;
-
-       ZERO_STRUCT(msg);
-       ZERO_STRUCT(control_un);
-
-       msg.msg_control = control_un.control;
-       msg.msg_controllen = sizeof(control_un.control);
-
-       cmptr = CMSG_FIRSTHDR(&msg);
-       cmptr->cmsg_len = CMSG_LEN(sizeof(int));
-       cmptr->cmsg_level = SOL_SOCKET;
-       cmptr->cmsg_type = SCM_RIGHTS;
-       memcpy(CMSG_DATA(cmptr), &sendfd, sizeof(sendfd));
-#else
-       ZERO_STRUCT(msg);
-       msg.msg_accrights = (caddr_t) &sendfd;
-       msg.msg_accrightslen = sizeof(int);
-#endif
+       struct msghdr msg = {0};
+       size_t bufsize = msghdr_prep_fds(NULL, NULL, 0, &sendfd, 1);
+       uint8_t buf[bufsize];
+       struct iovec iov;
+       ssize_t sent;
 
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
+       msghdr_prep_fds(&msg, buf, bufsize, &sendfd, 1);
 
-       ZERO_STRUCT(iov);
-       iov[0].iov_base = (void *)ptr;
-       iov[0].iov_len = nbytes;
-       msg.msg_iov = iov;
+       iov.iov_base = (void *)ptr;
+       iov.iov_len = nbytes;
+       msg.msg_iov = &iov;
        msg.msg_iovlen = 1;
 
-       return (sendmsg(fd, &msg, 0));
+       do {
+               sent = sendmsg(fd, &msg, 0);
+       } while ((sent == -1) && (errno == EINTR));
+
+       return sent;
 }
 
 static void aio_child_cleanup(struct tevent_context *event_ctx,
-                             struct timed_event *te,
+                             struct tevent_timer *te,
                              struct timeval now,
                              void *private_data)
 {
@@ -294,7 +258,7 @@ static void aio_child_cleanup(struct tevent_context *event_ctx,
                /*
                 * Re-schedule the next cleanup round
                 */
-               list->cleanup_event = event_add_timed(server_event_context(), list,
+               list->cleanup_event = tevent_add_timer(server_event_context(), list,
                                                      timeval_add(&now, 30, 0),
                                                      aio_child_cleanup, list);
 
@@ -324,7 +288,7 @@ static struct aio_child_list *init_aio_children(struct vfs_handle_struct *handle
         */
 
        if (data->cleanup_event == NULL) {
-               data->cleanup_event = event_add_timed(server_event_context(), data,
+               data->cleanup_event = tevent_add_timer(server_event_context(), data,
                                                      timeval_current_ofs(30, 0),
                                                      aio_child_cleanup, data);
                if (data->cleanup_event == NULL) {
@@ -348,6 +312,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map)
                ssize_t ret;
                struct rw_cmd cmd_struct;
                struct rw_ret ret_struct;
+               struct timespec start, end;
 
                ret = read_fd(sockfd, &cmd_struct, sizeof(cmd_struct), &fd);
                if (ret != sizeof(cmd_struct)) {
@@ -379,10 +344,12 @@ static void aio_child_loop(int sockfd, struct mmap_area *map)
 
                ZERO_STRUCT(ret_struct);
 
+               PROFILE_TIMESTAMP(&start);
+
                switch (cmd_struct.cmd) {
                case READ_CMD:
                        ret_struct.size = sys_pread(
-                               fd, (void *)map->ptr, cmd_struct.n,
+                               fd, discard_const(map->ptr), cmd_struct.n,
                                cmd_struct.offset);
 #if 0
 /* This breaks "make test" when run with aio_fork module. */
@@ -393,7 +360,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map)
                        break;
                case WRITE_CMD:
                        ret_struct.size = sys_pwrite(
-                               fd, (void *)map->ptr, cmd_struct.n,
+                               fd, discard_const(map->ptr), cmd_struct.n,
                                cmd_struct.offset);
                        break;
                case FSYNC_CMD:
@@ -404,6 +371,8 @@ static void aio_child_loop(int sockfd, struct mmap_area *map)
                        errno = EINVAL;
                }
 
+               PROFILE_TIMESTAMP(&end);
+               ret_struct.duration = nsec_time_diff(&end, &start);
                DEBUG(10, ("aio_child_loop: syscall returned %d\n",
                           (int)ret_struct.size));
 
@@ -436,13 +405,13 @@ static int aio_child_destructor(struct aio_child *child)
        SMB_ASSERT(!child->busy);
 
        DEBUG(10, ("aio_child_destructor: removing child %d on fd %d\n",
-                       child->pid, child->sockfd));
+                  (int)child->pid, child->sockfd));
 
        /*
         * closing the sockfd makes the child not return from recvmsg() on RHEL
         * 5.5 so instead force the child to exit by writing bad data to it
         */
-       write(child->sockfd, &c, sizeof(c));
+       sys_write_v(child->sockfd, &c, sizeof(c));
        close(child->sockfd);
        DLIST_REMOVE(child->list->children, child);
        return 0;
@@ -509,7 +478,7 @@ static int create_aio_child(struct smbd_server_connection *sconn,
        }
 
        DEBUG(10, ("Child %d created with sockfd %d\n",
-                       result->pid, fdpair[0]));
+                  (int)result->pid, fdpair[0]));
 
        result->sockfd = fdpair[0];
        close(fdpair[1]);
@@ -572,7 +541,7 @@ static int get_idle_child(struct vfs_handle_struct *handle,
 struct aio_fork_pread_state {
        struct aio_child *child;
        ssize_t ret;
-       int err;
+       struct vfs_aio_state vfs_aio_state;
 };
 
 static void aio_fork_pread_done(struct tevent_req *subreq);
@@ -670,28 +639,28 @@ static void aio_fork_pread_done(struct tevent_req *subreq)
 
        retbuf = (struct rw_ret *)buf;
        state->ret = retbuf->size;
-       state->err = retbuf->ret_errno;
+       state->vfs_aio_state.error = retbuf->ret_errno;
+       state->vfs_aio_state.duration = retbuf->duration;
        tevent_req_done(req);
 }
 
-static ssize_t aio_fork_pread_recv(struct tevent_req *req, int *err)
+static ssize_t aio_fork_pread_recv(struct tevent_req *req,
+                                  struct vfs_aio_state *vfs_aio_state)
 {
        struct aio_fork_pread_state *state = tevent_req_data(
                req, struct aio_fork_pread_state);
 
-       if (tevent_req_is_unix_error(req, err)) {
+       if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
                return -1;
        }
-       if (state->ret == -1) {
-               *err = state->err;
-       }
+       *vfs_aio_state = state->vfs_aio_state;
        return state->ret;
 }
 
 struct aio_fork_pwrite_state {
        struct aio_child *child;
        ssize_t ret;
-       int err;
+       struct vfs_aio_state vfs_aio_state;
 };
 
 static void aio_fork_pwrite_done(struct tevent_req *subreq);
@@ -786,28 +755,28 @@ static void aio_fork_pwrite_done(struct tevent_req *subreq)
 
        retbuf = (struct rw_ret *)buf;
        state->ret = retbuf->size;
-       state->err = retbuf->ret_errno;
+       state->vfs_aio_state.error = retbuf->ret_errno;
+       state->vfs_aio_state.duration = retbuf->duration;
        tevent_req_done(req);
 }
 
-static ssize_t aio_fork_pwrite_recv(struct tevent_req *req, int *err)
+static ssize_t aio_fork_pwrite_recv(struct tevent_req *req,
+                                   struct vfs_aio_state *vfs_aio_state)
 {
        struct aio_fork_pwrite_state *state = tevent_req_data(
                req, struct aio_fork_pwrite_state);
 
-       if (tevent_req_is_unix_error(req, err)) {
+       if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
                return -1;
        }
-       if (state->ret == -1) {
-               *err = state->err;
-       }
+       *vfs_aio_state = state->vfs_aio_state;
        return state->ret;
 }
 
 struct aio_fork_fsync_state {
        struct aio_child *child;
        ssize_t ret;
-       int err;
+       struct vfs_aio_state vfs_aio_state;
 };
 
 static void aio_fork_fsync_done(struct tevent_req *subreq);
@@ -894,21 +863,21 @@ static void aio_fork_fsync_done(struct tevent_req *subreq)
 
        retbuf = (struct rw_ret *)buf;
        state->ret = retbuf->size;
-       state->err = retbuf->ret_errno;
+       state->vfs_aio_state.error = retbuf->ret_errno;
+       state->vfs_aio_state.duration = retbuf->duration;
        tevent_req_done(req);
 }
 
-static int aio_fork_fsync_recv(struct tevent_req *req, int *err)
+static int aio_fork_fsync_recv(struct tevent_req *req,
+                              struct vfs_aio_state *vfs_aio_state)
 {
        struct aio_fork_fsync_state *state = tevent_req_data(
                req, struct aio_fork_fsync_state);
 
-       if (tevent_req_is_unix_error(req, err)) {
+       if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
                return -1;
        }
-       if (state->ret == -1) {
-               *err = state->err;
-       }
+       *vfs_aio_state = state->vfs_aio_state;
        return state->ret;
 }
 
@@ -937,17 +906,6 @@ static int aio_fork_connect(vfs_handle_struct *handle, const char *service,
                                NULL, struct aio_fork_config,
                                return -1);
 
-       /*********************************************************************
-        * How many threads to initialize ?
-        * 100 per process seems insane as a default until you realize that
-        * (a) Threads terminate after 1 second when idle.
-        * (b) Throttling is done in SMB2 via the crediting algorithm.
-        * (c) SMB1 clients are limited to max_mux (50) outstanding
-        *     requests and Windows clients don't use this anyway.
-        * Essentially we want this to be unlimited unless smb.conf
-        * says different.
-        *********************************************************************/
-       aio_pending_size = 100;
        return 0;
 }
 
@@ -961,8 +919,8 @@ static struct vfs_fn_pointers vfs_aio_fork_fns = {
        .fsync_recv_fn = aio_fork_fsync_recv,
 };
 
-NTSTATUS vfs_aio_fork_init(void);
-NTSTATUS vfs_aio_fork_init(void)
+NTSTATUS vfs_aio_fork_init(TALLOC_CTX *);
+NTSTATUS vfs_aio_fork_init(TALLOC_CTX *ctx)
 {
        return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
                                "aio_fork", &vfs_aio_fork_fns);