#include "smbd/globals.h"
#include "lib/async_req/async_sock.h"
#include "lib/util/tevent_unix.h"
-#include "lib/sys_rw.h"
-#include "lib/sys_rw_data.h"
-#include "lib/msghdr.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/sys_rw_data.h"
+#include "lib/util/msghdr.h"
+#include "smbprofile.h"
#if !defined(HAVE_STRUCT_MSGHDR_MSG_CONTROL) && !defined(HAVE_STRUCT_MSGHDR_MSG_ACCRIGHTS)
# error Can not pass file descriptors
#define MAP_FILE 0
#endif
+struct aio_child_list;
+
struct aio_fork_config {
bool erratic_testing_mode;
+ struct aio_child_list *children;
};
struct mmap_area {
size_t size;
- volatile void *ptr;
+ void *ptr;
};
static int mmap_area_destructor(struct mmap_area *area)
{
- munmap((void *)area->ptr, area->size);
+ munmap(discard_const(area->ptr), area->size);
return 0;
}
struct rw_ret {
ssize_t size;
int ret_errno;
+ uint64_t duration;
};
struct aio_child_list;
struct tevent_timer *cleanup_event;
};
-static void free_aio_children(void **p)
-{
- TALLOC_FREE(*p);
-}
-
static ssize_t read_fd(int fd, void *ptr, size_t nbytes, int *recvfd)
{
- struct msghdr msg;
struct iovec iov[1];
+ struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1 };
ssize_t n;
-#ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
- int newfd;
+ size_t bufsize = msghdr_prep_recv_fds(NULL, NULL, 0, 1);
+ uint8_t buf[bufsize];
- ZERO_STRUCT(msg);
- msg.msg_accrights = (caddr_t) &newfd;
- msg.msg_accrightslen = sizeof(int);
-#else
+ msghdr_prep_recv_fds(&msg, buf, bufsize, 1);
- union {
- struct cmsghdr cm;
- char control[CMSG_SPACE(sizeof(int))];
- } control_un;
- struct cmsghdr *cmptr;
+ iov[0].iov_base = (void *)ptr;
+ iov[0].iov_len = nbytes;
- ZERO_STRUCT(msg);
- ZERO_STRUCT(control_un);
+ do {
+ n = recvmsg(fd, &msg, 0);
+ } while ((n == -1) && (errno == EINTR));
- msg.msg_control = control_un.control;
- msg.msg_controllen = sizeof(control_un.control);
-#endif
+ if (n <= 0) {
+ return n;
+ }
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
+ {
+ size_t num_fds = msghdr_extract_fds(&msg, NULL, 0);
+ int fds[num_fds];
- iov[0].iov_base = (void *)ptr;
- iov[0].iov_len = nbytes;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
+ msghdr_extract_fds(&msg, fds, num_fds);
- if ( (n = recvmsg(fd, &msg, 0)) <= 0) {
- return(n);
- }
+ if (num_fds != 1) {
+ size_t i;
-#ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
- if ((cmptr = CMSG_FIRSTHDR(&msg)) != NULL
- && cmptr->cmsg_len == CMSG_LEN(sizeof(int))) {
- if (cmptr->cmsg_level != SOL_SOCKET) {
- DEBUG(10, ("control level != SOL_SOCKET"));
- errno = EINVAL;
- return -1;
- }
- if (cmptr->cmsg_type != SCM_RIGHTS) {
- DEBUG(10, ("control type != SCM_RIGHTS"));
- errno = EINVAL;
- return -1;
+ for (i=0; i<num_fds; i++) {
+ close(fds[i]);
+ }
+
+ *recvfd = -1;
+ return n;
}
- memcpy(recvfd, CMSG_DATA(cmptr), sizeof(*recvfd));
- } else {
- *recvfd = -1; /* descriptor was not passed */
- }
-#else
- if (msg.msg_accrightslen == sizeof(int)) {
- *recvfd = newfd;
- }
- else {
- *recvfd = -1; /* descriptor was not passed */
+
+ *recvfd = fds[0];
}
-#endif
return(n);
}
static ssize_t write_fd(int fd, void *ptr, size_t nbytes, int sendfd)
{
- struct msghdr msg;
+ struct msghdr msg = {0};
size_t bufsize = msghdr_prep_fds(NULL, NULL, 0, &sendfd, 1);
uint8_t buf[bufsize];
struct iovec iov;
+ ssize_t sent;
msghdr_prep_fds(&msg, buf, bufsize, &sendfd, 1);
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
iov.iov_base = (void *)ptr;
iov.iov_len = nbytes;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
- return (sendmsg(fd, &msg, 0));
+ do {
+ sent = sendmsg(fd, &msg, 0);
+ } while ((sent == -1) && (errno == EINTR));
+
+ return sent;
}
static void aio_child_cleanup(struct tevent_context *event_ctx,
static struct aio_child_list *init_aio_children(struct vfs_handle_struct *handle)
{
- struct aio_child_list *data = NULL;
+ struct aio_fork_config *config;
+ struct aio_child_list *children;
- if (SMB_VFS_HANDLE_TEST_DATA(handle)) {
- SMB_VFS_HANDLE_GET_DATA(handle, data, struct aio_child_list,
- return NULL);
- }
+ SMB_VFS_HANDLE_GET_DATA(handle, config, struct aio_fork_config,
+ return NULL);
- if (data == NULL) {
- data = talloc_zero(NULL, struct aio_child_list);
- if (data == NULL) {
+ if (config->children == NULL) {
+ config->children = talloc_zero(config, struct aio_child_list);
+ if (config->children == NULL) {
return NULL;
}
}
+ children = config->children;
/*
* Regardless of whether the child_list had been around or not, make
* delete itself when it finds that no children are around anymore.
*/
- if (data->cleanup_event == NULL) {
- data->cleanup_event = tevent_add_timer(server_event_context(), data,
- timeval_current_ofs(30, 0),
- aio_child_cleanup, data);
- if (data->cleanup_event == NULL) {
- TALLOC_FREE(data);
+ if (children->cleanup_event == NULL) {
+ children->cleanup_event =
+ tevent_add_timer(server_event_context(), children,
+ timeval_current_ofs(30, 0),
+ aio_child_cleanup, children);
+ if (children->cleanup_event == NULL) {
+ TALLOC_FREE(config->children);
return NULL;
}
}
- if (!SMB_VFS_HANDLE_TEST_DATA(handle)) {
- SMB_VFS_HANDLE_SET_DATA(handle, data, free_aio_children,
- struct aio_child_list, return False);
- }
-
- return data;
+ return children;
}
static void aio_child_loop(int sockfd, struct mmap_area *map)
ssize_t ret;
struct rw_cmd cmd_struct;
struct rw_ret ret_struct;
+ struct timespec start, end;
ret = read_fd(sockfd, &cmd_struct, sizeof(cmd_struct), &fd);
if (ret != sizeof(cmd_struct)) {
ZERO_STRUCT(ret_struct);
+ PROFILE_TIMESTAMP(&start);
+
switch (cmd_struct.cmd) {
case READ_CMD:
ret_struct.size = sys_pread(
- fd, (void *)map->ptr, cmd_struct.n,
+ fd, discard_const(map->ptr), cmd_struct.n,
cmd_struct.offset);
#if 0
/* This breaks "make test" when run with aio_fork module. */
break;
case WRITE_CMD:
ret_struct.size = sys_pwrite(
- fd, (void *)map->ptr, cmd_struct.n,
+ fd, discard_const(map->ptr), cmd_struct.n,
cmd_struct.offset);
break;
case FSYNC_CMD:
errno = EINVAL;
}
+ PROFILE_TIMESTAMP(&end);
+ ret_struct.duration = nsec_time_diff(&end, &start);
DEBUG(10, ("aio_child_loop: syscall returned %d\n",
(int)ret_struct.size));
SMB_ASSERT(!child->busy);
DEBUG(10, ("aio_child_destructor: removing child %d on fd %d\n",
- child->pid, child->sockfd));
+ (int)child->pid, child->sockfd));
/*
* closing the sockfd makes the child not return from recvmsg() on RHEL
* 5.5 so instead force the child to exit by writing bad data to it
*/
- write(child->sockfd, &c, sizeof(c));
+ sys_write_v(child->sockfd, &c, sizeof(c));
close(child->sockfd);
DLIST_REMOVE(child->list->children, child);
return 0;
}
DEBUG(10, ("Child %d created with sockfd %d\n",
- result->pid, fdpair[0]));
+ (int)result->pid, fdpair[0]));
result->sockfd = fdpair[0];
close(fdpair[1]);
struct aio_fork_pread_state {
struct aio_child *child;
+ size_t n;
+ void *data;
ssize_t ret;
- int err;
+ struct vfs_aio_state vfs_aio_state;
};
static void aio_fork_pread_done(struct tevent_req *subreq);
if (req == NULL) {
return NULL;
}
+ state->n = n;
+ state->data = data;
if (n > 128*1024) {
/* TODO: support variable buffers */
return;
}
- state->child->busy = false;
-
retbuf = (struct rw_ret *)buf;
state->ret = retbuf->size;
- state->err = retbuf->ret_errno;
+ state->vfs_aio_state.error = retbuf->ret_errno;
+ state->vfs_aio_state.duration = retbuf->duration;
+
+ if ((size_t)state->ret > state->n) {
+ tevent_req_error(req, EIO);
+ state->child->busy = false;
+ return;
+ }
+ memcpy(state->data, state->child->map->ptr, state->ret);
+
+ state->child->busy = false;
+
tevent_req_done(req);
}
-static ssize_t aio_fork_pread_recv(struct tevent_req *req, int *err)
+static ssize_t aio_fork_pread_recv(struct tevent_req *req,
+ struct vfs_aio_state *vfs_aio_state)
{
struct aio_fork_pread_state *state = tevent_req_data(
req, struct aio_fork_pread_state);
- if (tevent_req_is_unix_error(req, err)) {
+ if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
return -1;
}
- if (state->ret == -1) {
- *err = state->err;
- }
+ *vfs_aio_state = state->vfs_aio_state;
return state->ret;
}
struct aio_fork_pwrite_state {
struct aio_child *child;
ssize_t ret;
- int err;
+ struct vfs_aio_state vfs_aio_state;
};
static void aio_fork_pwrite_done(struct tevent_req *subreq);
retbuf = (struct rw_ret *)buf;
state->ret = retbuf->size;
- state->err = retbuf->ret_errno;
+ state->vfs_aio_state.error = retbuf->ret_errno;
+ state->vfs_aio_state.duration = retbuf->duration;
tevent_req_done(req);
}
-static ssize_t aio_fork_pwrite_recv(struct tevent_req *req, int *err)
+static ssize_t aio_fork_pwrite_recv(struct tevent_req *req,
+ struct vfs_aio_state *vfs_aio_state)
{
struct aio_fork_pwrite_state *state = tevent_req_data(
req, struct aio_fork_pwrite_state);
- if (tevent_req_is_unix_error(req, err)) {
+ if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
return -1;
}
- if (state->ret == -1) {
- *err = state->err;
- }
+ *vfs_aio_state = state->vfs_aio_state;
return state->ret;
}
struct aio_fork_fsync_state {
struct aio_child *child;
ssize_t ret;
- int err;
+ struct vfs_aio_state vfs_aio_state;
};
static void aio_fork_fsync_done(struct tevent_req *subreq);
retbuf = (struct rw_ret *)buf;
state->ret = retbuf->size;
- state->err = retbuf->ret_errno;
+ state->vfs_aio_state.error = retbuf->ret_errno;
+ state->vfs_aio_state.duration = retbuf->duration;
tevent_req_done(req);
}
-static int aio_fork_fsync_recv(struct tevent_req *req, int *err)
+static int aio_fork_fsync_recv(struct tevent_req *req,
+ struct vfs_aio_state *vfs_aio_state)
{
struct aio_fork_fsync_state *state = tevent_req_data(
req, struct aio_fork_fsync_state);
- if (tevent_req_is_unix_error(req, err)) {
+ if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
return -1;
}
- if (state->ret == -1) {
- *err = state->err;
- }
+ *vfs_aio_state = state->vfs_aio_state;
return state->ret;
}
NULL, struct aio_fork_config,
return -1);
- /*********************************************************************
- * How many threads to initialize ?
- * 100 per process seems insane as a default until you realize that
- * (a) Threads terminate after 1 second when idle.
- * (b) Throttling is done in SMB2 via the crediting algorithm.
- * (c) SMB1 clients are limited to max_mux (50) outstanding
- * requests and Windows clients don't use this anyway.
- * Essentially we want this to be unlimited unless smb.conf
- * says different.
- *********************************************************************/
- aio_pending_size = 100;
return 0;
}
.fsync_recv_fn = aio_fork_fsync_recv,
};
-NTSTATUS vfs_aio_fork_init(void);
-NTSTATUS vfs_aio_fork_init(void)
+NTSTATUS vfs_aio_fork_init(TALLOC_CTX *);
+NTSTATUS vfs_aio_fork_init(TALLOC_CTX *ctx)
{
return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
"aio_fork", &vfs_aio_fork_fns);