* Simulate the Posix AIO using mmap/fork
*
* Copyright (C) Volker Lendecke 2008
+ * Copyright (C) Jeremy Allison 2010
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*/
#include "includes.h"
+#include "system/filesys.h"
+#include "system/shmem.h"
+#include "smbd/smbd.h"
+#include "smbd/globals.h"
+#include "lib/async_req/async_sock.h"
+#include "lib/util/tevent_unix.h"
+
+#undef recvmsg
+
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
struct mmap_area {
size_t size;
goto fail;
}
+ close(fd);
+
result->size = size;
talloc_set_destructor(result, mmap_area_destructor);
return NULL;
}
+enum cmd_type {
+ READ_CMD,
+ WRITE_CMD,
+ FSYNC_CMD
+};
+
+static const char *cmd_type_str(enum cmd_type cmd)
+{
+ const char *result;
+
+ switch (cmd) {
+ case READ_CMD:
+ result = "READ";
+ break;
+ case WRITE_CMD:
+ result = "WRITE";
+ break;
+ case FSYNC_CMD:
+ result = "FSYNC";
+ break;
+ default:
+ result = "<UNKNOWN>";
+ break;
+ }
+ return result;
+}
+
struct rw_cmd {
size_t n;
- SMB_OFF_T offset;
- bool read_cmd;
+ off_t offset;
+ enum cmd_type cmd;
};
struct rw_ret {
struct aio_child {
struct aio_child *prev, *next;
struct aio_child_list *list;
- SMB_STRUCT_AIOCB *aiocb;
pid_t pid;
int sockfd;
- struct fd_event *sock_event;
- struct rw_ret retval;
- struct mmap_area *map; /* ==NULL means write request */
+ struct mmap_area *map;
bool dont_delete; /* Marked as in use since last cleanup */
- bool cancelled;
- bool read_cmd;
+ bool busy;
};
struct aio_child_list {
msg.msg_name = NULL;
msg.msg_namelen = 0;
+ msg.msg_flags = 0;
- iov[0].iov_base = ptr;
+ iov[0].iov_base = (void *)ptr;
iov[0].iov_len = nbytes;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
errno = EINVAL;
return -1;
}
- *recvfd = *((int *) CMSG_DATA(cmptr));
+ memcpy(recvfd, CMSG_DATA(cmptr), sizeof(*recvfd));
} else {
*recvfd = -1; /* descriptor was not passed */
}
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
cmptr->cmsg_level = SOL_SOCKET;
cmptr->cmsg_type = SCM_RIGHTS;
- *((int *) CMSG_DATA(cmptr)) = sendfd;
+ memcpy(CMSG_DATA(cmptr), &sendfd, sizeof(sendfd));
#else
ZERO_STRUCT(msg);
msg.msg_accrights = (caddr_t) &sendfd;
msg.msg_namelen = 0;
ZERO_STRUCT(iov);
- iov[0].iov_base = ptr;
+ iov[0].iov_base = (void *)ptr;
iov[0].iov_len = nbytes;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
static void aio_child_cleanup(struct event_context *event_ctx,
struct timed_event *te,
- const struct timeval *now,
+ struct timeval now,
void *private_data)
{
struct aio_child_list *list = talloc_get_type_abort(
for (child = list->children; child != NULL; child = next) {
next = child->next;
- if (child->aiocb != NULL) {
+ if (child->busy) {
DEBUG(10, ("child %d currently active\n",
(int)child->pid));
continue;
"deleting\n", (int)child->pid));
TALLOC_FREE(child);
+ child = next;
}
if (list->children != NULL) {
/*
* Re-schedule the next cleanup round
*/
- list->cleanup_event = event_add_timed(smbd_event_context(), list,
- timeval_add(now, 30, 0),
- "aio_child_cleanup",
+ list->cleanup_event = event_add_timed(server_event_context(), list,
+ timeval_add(&now, 30, 0),
aio_child_cleanup, list);
}
}
if (data == NULL) {
- data = TALLOC_ZERO_P(NULL, struct aio_child_list);
+ data = talloc_zero(NULL, struct aio_child_list);
if (data == NULL) {
return NULL;
}
*/
if (data->cleanup_event == NULL) {
- data->cleanup_event = event_add_timed(smbd_event_context(), data,
+ data->cleanup_event = event_add_timed(server_event_context(), data,
timeval_current_ofs(30, 0),
- "aio_child_cleanup",
aio_child_cleanup, data);
if (data->cleanup_event == NULL) {
TALLOC_FREE(data);
}
DEBUG(10, ("aio_child_loop: %s %d bytes at %d from fd %d\n",
- cmd_struct.read_cmd ? "read" : "write",
+ cmd_type_str(cmd_struct.cmd),
(int)cmd_struct.n, (int)cmd_struct.offset, fd));
-#ifdef ENABLE_BUILD_FARM_HACKS
+#ifdef DEVELOPER
{
/*
- * In the build farm, we want erratic behaviour for
+ * For developer testing, we want erratic behaviour for
* async I/O times
*/
uint8_t randval;
ZERO_STRUCT(ret_struct);
- if (cmd_struct.read_cmd) {
+ switch (cmd_struct.cmd) {
+ case READ_CMD:
ret_struct.size = sys_pread(
fd, (void *)map->ptr, cmd_struct.n,
cmd_struct.offset);
- }
- else {
+#if 0
+/* This breaks "make test" when run with aio_fork module. */
+#ifdef DEVELOPER
+ ret_struct.size = MAX(1, ret_struct.size * 0.9);
+#endif
+#endif
+ break;
+ case WRITE_CMD:
ret_struct.size = sys_pwrite(
fd, (void *)map->ptr, cmd_struct.n,
cmd_struct.offset);
+ break;
+ case FSYNC_CMD:
+ ret_struct.size = fsync(fd);
+ break;
+ default:
+ ret_struct.size = -1;
+ errno = EINVAL;
}
DEBUG(10, ("aio_child_loop: syscall returned %d\n",
ret_struct.ret_errno = errno;
}
+ /*
+ * Close the fd before telling our parent we're done. The
+ * parent might close and re-open the file very quickly, and
+ * with system-level share modes (GPFS) we would get an
+ * unjustified SHARING_VIOLATION.
+ */
+ close(fd);
+
ret = write_data(sockfd, (char *)&ret_struct,
sizeof(ret_struct));
if (ret != sizeof(ret_struct)) {
strerror(errno)));
exit(2);
}
-
- close(fd);
}
}
-static void handle_aio_completion(struct event_context *event_ctx,
- struct fd_event *event, uint16 flags,
- void *p)
+static int aio_child_destructor(struct aio_child *child)
{
- struct aio_child *child = (struct aio_child *)p;
- uint16 mid;
+ char c=0;
- DEBUG(10, ("handle_aio_completion called with flags=%d\n", flags));
+ SMB_ASSERT(!child->busy);
- if ((flags & EVENT_FD_READ) == 0) {
- return;
- }
-
- if (!NT_STATUS_IS_OK(read_data(child->sockfd,
- (char *)&child->retval,
- sizeof(child->retval)))) {
- DEBUG(0, ("aio child %d died\n", (int)child->pid));
- child->retval.size = -1;
- child->retval.ret_errno = EIO;
- }
-
- if (child->cancelled) {
- child->aiocb = NULL;
- child->cancelled = false;
- return;
- }
+ DEBUG(10, ("aio_child_destructor: removing child %d on fd %d\n",
+ child->pid, child->sockfd));
- if (child->read_cmd && (child->retval.size > 0)) {
- SMB_ASSERT(child->retval.size <= child->aiocb->aio_nbytes);
- memcpy((void *)child->aiocb->aio_buf, (void *)child->map->ptr,
- child->retval.size);
- }
-
- mid = child->aiocb->aio_sigevent.sigev_value.sival_int;
-
- DEBUG(10, ("mid %d finished\n", (int)mid));
-
- aio_request_done(mid);
- process_aio_queue();
-}
-
-static int aio_child_destructor(struct aio_child *child)
-{
- SMB_ASSERT((child->aiocb == NULL) || child->cancelled);
+ /*
+ * closing the sockfd makes the child not return from recvmsg() on RHEL
+ * 5.5 so instead force the child to exit by writing bad data to it
+ */
+ write(child->sockfd, &c, sizeof(c));
close(child->sockfd);
DLIST_REMOVE(child->list->children, child);
return 0;
}
-static NTSTATUS create_aio_child(struct aio_child_list *children,
- size_t map_size,
- struct aio_child **presult)
+/*
+ * We have to close all fd's in open files, we might incorrectly hold a system
+ * level share mode on a file.
+ */
+
+static struct files_struct *close_fsp_fd(struct files_struct *fsp,
+ void *private_data)
+{
+ if ((fsp->fh != NULL) && (fsp->fh->fd != -1)) {
+ close(fsp->fh->fd);
+ fsp->fh->fd = -1;
+ }
+ return NULL;
+}
+
+static int create_aio_child(struct smbd_server_connection *sconn,
+ struct aio_child_list *children,
+ size_t map_size,
+ struct aio_child **presult)
{
struct aio_child *result;
int fdpair[2];
- NTSTATUS status;
+ int ret;
fdpair[0] = fdpair[1] = -1;
- result = TALLOC_ZERO_P(children, struct aio_child);
- NT_STATUS_HAVE_NO_MEMORY(result);
+ result = talloc_zero(children, struct aio_child);
+ if (result == NULL) {
+ return ENOMEM;
+ }
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) == -1) {
- status = map_nt_error_from_unix(errno);
+ ret = errno;
DEBUG(10, ("socketpair() failed: %s\n", strerror(errno)));
- TALLOC_FREE(result);
goto fail;
}
result->map = mmap_area_init(result, map_size);
if (result->map == NULL) {
- status = map_nt_error_from_unix(errno);
+ ret = errno;
DEBUG(0, ("Could not create mmap area\n"));
goto fail;
}
- result->pid = sys_fork();
+ result->pid = fork();
if (result->pid == -1) {
- status = map_nt_error_from_unix(errno);
+ ret = errno;
DEBUG(0, ("fork failed: %s\n", strerror(errno)));
goto fail;
}
if (result->pid == 0) {
close(fdpair[0]);
result->sockfd = fdpair[1];
+ files_forall(sconn, close_fsp_fd, NULL);
aio_child_loop(result->sockfd, result->map);
}
- DEBUG(10, ("Child %d created\n", result->pid));
+ DEBUG(10, ("Child %d created with sockfd %d\n",
+ result->pid, fdpair[0]));
result->sockfd = fdpair[0];
close(fdpair[1]);
- result->sock_event = event_add_fd(smbd_event_context(), result,
- result->sockfd, EVENT_FD_READ,
- handle_aio_completion,
- result);
- if (result->sock_event == NULL) {
- status = NT_STATUS_NO_MEMORY;
- DEBUG(0, ("event_add_fd failed\n"));
- goto fail;
- }
-
result->list = children;
DLIST_ADD(children->children, result);
*presult = result;
- return NT_STATUS_OK;
+ return 0;
fail:
if (fdpair[0] != -1) close(fdpair[0]);
if (fdpair[1] != -1) close(fdpair[1]);
TALLOC_FREE(result);
- return status;
+ return ret;
}
-static NTSTATUS get_idle_child(struct vfs_handle_struct *handle,
- struct aio_child **pchild)
+static int get_idle_child(struct vfs_handle_struct *handle,
+ struct aio_child **pchild)
{
struct aio_child_list *children;
struct aio_child *child;
- NTSTATUS status;
children = init_aio_children(handle);
if (children == NULL) {
- return NT_STATUS_NO_MEMORY;
+ return ENOMEM;
}
for (child = children->children; child != NULL; child = child->next) {
- if (child->aiocb == NULL) {
- /* idle */
+ if (!child->busy) {
break;
}
}
if (child == NULL) {
+ int ret;
+
DEBUG(10, ("no idle child found, creating new one\n"));
- status = create_aio_child(children, 128*1024, &child);
- if (!NT_STATUS_IS_OK(status)) {
+ ret = create_aio_child(handle->conn->sconn, children,
+ 128*1024, &child);
+ if (ret != 0) {
DEBUG(10, ("create_aio_child failed: %s\n",
- nt_errstr(status)));
- return status;
+ strerror(errno)));
+ return ret;
}
}
child->dont_delete = true;
+ child->busy = true;
*pchild = child;
- return NT_STATUS_OK;
+ return 0;
}
-static int aio_fork_read(struct vfs_handle_struct *handle,
- struct files_struct *fsp, SMB_STRUCT_AIOCB *aiocb)
-{
+struct aio_fork_pread_state {
struct aio_child *child;
- struct rw_cmd cmd;
ssize_t ret;
- NTSTATUS status;
+ int err;
+};
- if (aiocb->aio_nbytes > 128*1024) {
- /* TODO: support variable buffers */
- errno = EINVAL;
- return -1;
+static void aio_fork_pread_done(struct tevent_req *subreq);
+
+static struct tevent_req *aio_fork_pread_send(struct vfs_handle_struct *handle,
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct files_struct *fsp,
+ void *data,
+ size_t n, off_t offset)
+{
+ struct tevent_req *req, *subreq;
+ struct aio_fork_pread_state *state;
+ struct rw_cmd cmd;
+ ssize_t written;
+ int err;
+
+ req = tevent_req_create(mem_ctx, &state, struct aio_fork_pread_state);
+ if (req == NULL) {
+ return NULL;
}
- status = get_idle_child(handle, &child);
- if (!NT_STATUS_IS_OK(status)) {
- DEBUG(10, ("Could not get an idle child\n"));
- return -1;
+ if (n > 128*1024) {
+ /* TODO: support variable buffers */
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
}
- child->read_cmd = true;
- child->aiocb = aiocb;
- child->retval.ret_errno = EINPROGRESS;
+ err = get_idle_child(handle, &state->child);
+ if (err != 0) {
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
+ }
ZERO_STRUCT(cmd);
- cmd.n = aiocb->aio_nbytes;
- cmd.offset = aiocb->aio_offset;
- cmd.read_cmd = child->read_cmd;
+ cmd.n = n;
+ cmd.offset = offset;
+ cmd.cmd = READ_CMD;
DEBUG(10, ("sending fd %d to child %d\n", fsp->fh->fd,
- (int)child->pid));
+ (int)state->child->pid));
- ret = write_fd(child->sockfd, &cmd, sizeof(cmd), fsp->fh->fd);
- if (ret == -1) {
- DEBUG(10, ("write_fd failed: %s\n", strerror(errno)));
- return -1;
+ /*
+ * Not making this async. We're writing into an empty unix
+ * domain socket. This should never block.
+ */
+ written = write_fd(state->child->sockfd, &cmd, sizeof(cmd),
+ fsp->fh->fd);
+ if (written == -1) {
+ err = errno;
+
+ TALLOC_FREE(state->child);
+
+ DEBUG(10, ("write_fd failed: %s\n", strerror(err)));
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
}
- return 0;
+ subreq = read_packet_send(state, ev, state->child->sockfd,
+ sizeof(struct rw_ret), NULL, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ TALLOC_FREE(state->child); /* we sent sth down */
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, aio_fork_pread_done, req);
+ return req;
}
-static int aio_fork_write(struct vfs_handle_struct *handle,
- struct files_struct *fsp, SMB_STRUCT_AIOCB *aiocb)
+static void aio_fork_pread_done(struct tevent_req *subreq)
{
- struct aio_child *child;
- struct rw_cmd cmd;
- ssize_t ret;
- NTSTATUS status;
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct aio_fork_pread_state *state = tevent_req_data(
+ req, struct aio_fork_pread_state);
+ ssize_t nread;
+ uint8_t *buf;
+ int err;
+ struct rw_ret *retbuf;
+
+ nread = read_packet_recv(subreq, talloc_tos(), &buf, &err);
+ TALLOC_FREE(subreq);
+ if (nread == -1) {
+ TALLOC_FREE(state->child);
+ tevent_req_error(req, err);
+ return;
+ }
- if (aiocb->aio_nbytes > 128*1024) {
- /* TODO: support variable buffers */
- errno = EINVAL;
+ state->child->busy = false;
+
+ retbuf = (struct rw_ret *)buf;
+ state->ret = retbuf->size;
+ state->err = retbuf->ret_errno;
+ tevent_req_done(req);
+}
+
+static ssize_t aio_fork_pread_recv(struct tevent_req *req, int *err)
+{
+ struct aio_fork_pread_state *state = tevent_req_data(
+ req, struct aio_fork_pread_state);
+
+ if (tevent_req_is_unix_error(req, err)) {
return -1;
}
+ if (state->ret == -1) {
+ *err = state->err;
+ }
+ return state->ret;
+}
- status = get_idle_child(handle, &child);
- if (!NT_STATUS_IS_OK(status)) {
- DEBUG(10, ("Could not get an idle child\n"));
- return -1;
+struct aio_fork_pwrite_state {
+ struct aio_child *child;
+ ssize_t ret;
+ int err;
+};
+
+static void aio_fork_pwrite_done(struct tevent_req *subreq);
+
+static struct tevent_req *aio_fork_pwrite_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp,
+ const void *data, size_t n, off_t offset)
+{
+ struct tevent_req *req, *subreq;
+ struct aio_fork_pwrite_state *state;
+ struct rw_cmd cmd;
+ ssize_t written;
+ int err;
+
+ req = tevent_req_create(mem_ctx, &state, struct aio_fork_pwrite_state);
+ if (req == NULL) {
+ return NULL;
}
- child->read_cmd = false;
- child->aiocb = aiocb;
- child->retval.ret_errno = EINPROGRESS;
+ if (n > 128*1024) {
+ /* TODO: support variable buffers */
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
- memcpy((void *)child->map->ptr, (void *)aiocb->aio_buf,
- aiocb->aio_nbytes);
+ err = get_idle_child(handle, &state->child);
+ if (err != 0) {
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
+ }
ZERO_STRUCT(cmd);
- cmd.n = aiocb->aio_nbytes;
- cmd.offset = aiocb->aio_offset;
- cmd.read_cmd = child->read_cmd;
+ cmd.n = n;
+ cmd.offset = offset;
+ cmd.cmd = WRITE_CMD;
DEBUG(10, ("sending fd %d to child %d\n", fsp->fh->fd,
- (int)child->pid));
+ (int)state->child->pid));
- ret = write_fd(child->sockfd, &cmd, sizeof(cmd), fsp->fh->fd);
- if (ret == -1) {
- DEBUG(10, ("write_fd failed: %s\n", strerror(errno)));
- return -1;
+ /*
+ * Not making this async. We're writing into an empty unix
+ * domain socket. This should never block.
+ */
+ written = write_fd(state->child->sockfd, &cmd, sizeof(cmd),
+ fsp->fh->fd);
+ if (written == -1) {
+ err = errno;
+
+ TALLOC_FREE(state->child);
+
+ DEBUG(10, ("write_fd failed: %s\n", strerror(err)));
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
}
- return 0;
+ subreq = read_packet_send(state, ev, state->child->sockfd,
+ sizeof(struct rw_ret), NULL, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ TALLOC_FREE(state->child); /* we sent sth down */
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, aio_fork_pwrite_done, req);
+ return req;
}
-static struct aio_child *aio_fork_find_child(struct vfs_handle_struct *handle,
- SMB_STRUCT_AIOCB *aiocb)
+static void aio_fork_pwrite_done(struct tevent_req *subreq)
{
- struct aio_child_list *children;
- struct aio_child *child;
-
- children = init_aio_children(handle);
- if (children == NULL) {
- return NULL;
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct aio_fork_pwrite_state *state = tevent_req_data(
+ req, struct aio_fork_pwrite_state);
+ ssize_t nread;
+ uint8_t *buf;
+ int err;
+ struct rw_ret *retbuf;
+
+ nread = read_packet_recv(subreq, talloc_tos(), &buf, &err);
+ TALLOC_FREE(subreq);
+ if (nread == -1) {
+ TALLOC_FREE(state->child);
+ tevent_req_error(req, err);
+ return;
}
- for (child = children->children; child != NULL; child = child->next) {
- if (child->aiocb == aiocb) {
- return child;
- }
- }
+ state->child->busy = false;
- return NULL;
+ retbuf = (struct rw_ret *)buf;
+ state->ret = retbuf->size;
+ state->err = retbuf->ret_errno;
+ tevent_req_done(req);
}
-static ssize_t aio_fork_return_fn(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
+static ssize_t aio_fork_pwrite_recv(struct tevent_req *req, int *err)
{
- struct aio_child *child = aio_fork_find_child(handle, aiocb);
+ struct aio_fork_pwrite_state *state = tevent_req_data(
+ req, struct aio_fork_pwrite_state);
- if (child == NULL) {
- errno = EINVAL;
- DEBUG(0, ("returning EINVAL\n"));
+ if (tevent_req_is_unix_error(req, err)) {
return -1;
}
-
- child->aiocb = NULL;
-
- if (child->retval.size == -1) {
- errno = child->retval.ret_errno;
+ if (state->ret == -1) {
+ *err = state->err;
}
-
- return child->retval.size;
+ return state->ret;
}
-static int aio_fork_cancel(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
-{
- struct aio_child_list *children;
+struct aio_fork_fsync_state {
struct aio_child *child;
+ ssize_t ret;
+ int err;
+};
- children = init_aio_children(handle);
- if (children == NULL) {
- errno = EINVAL;
- return -1;
+static void aio_fork_fsync_done(struct tevent_req *subreq);
+
+static struct tevent_req *aio_fork_fsync_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp)
+{
+ struct tevent_req *req, *subreq;
+ struct aio_fork_fsync_state *state;
+ struct rw_cmd cmd;
+ ssize_t written;
+ int err;
+
+ req = tevent_req_create(mem_ctx, &state, struct aio_fork_fsync_state);
+ if (req == NULL) {
+ return NULL;
}
- for (child = children->children; child != NULL; child = child->next) {
- if (child->aiocb == NULL) {
- continue;
- }
- if (child->aiocb->aio_fildes != fsp->fh->fd) {
- continue;
- }
- if ((aiocb != NULL) && (child->aiocb != aiocb)) {
- continue;
- }
+ err = get_idle_child(handle, &state->child);
+ if (err != 0) {
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
+ }
- /*
- * We let the child do its job, but we discard the result when
- * it's finished.
- */
+ ZERO_STRUCT(cmd);
+ cmd.cmd = FSYNC_CMD;
- child->cancelled = true;
+ DEBUG(10, ("sending fd %d to child %d\n", fsp->fh->fd,
+ (int)state->child->pid));
+
+ /*
+ * Not making this async. We're writing into an empty unix
+ * domain socket. This should never block.
+ */
+ written = write_fd(state->child->sockfd, &cmd, sizeof(cmd),
+ fsp->fh->fd);
+ if (written == -1) {
+ err = errno;
+
+ TALLOC_FREE(state->child);
+
+ DEBUG(10, ("write_fd failed: %s\n", strerror(err)));
+ tevent_req_error(req, err);
+ return tevent_req_post(req, ev);
}
- return AIO_CANCELED;
+ subreq = read_packet_send(state, ev, state->child->sockfd,
+ sizeof(struct rw_ret), NULL, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ TALLOC_FREE(state->child); /* we sent sth down */
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, aio_fork_fsync_done, req);
+ return req;
}
-static int aio_fork_error_fn(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
+static void aio_fork_fsync_done(struct tevent_req *subreq)
{
- struct aio_child *child = aio_fork_find_child(handle, aiocb);
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct aio_fork_fsync_state *state = tevent_req_data(
+ req, struct aio_fork_fsync_state);
+ ssize_t nread;
+ uint8_t *buf;
+ int err;
+ struct rw_ret *retbuf;
+
+ nread = read_packet_recv(subreq, talloc_tos(), &buf, &err);
+ TALLOC_FREE(subreq);
+ if (nread == -1) {
+ TALLOC_FREE(state->child);
+ tevent_req_error(req, err);
+ return;
+ }
- if (child == NULL) {
- errno = EINVAL;
+ state->child->busy = false;
+
+ retbuf = (struct rw_ret *)buf;
+ state->ret = retbuf->size;
+ state->err = retbuf->ret_errno;
+ tevent_req_done(req);
+}
+
+static int aio_fork_fsync_recv(struct tevent_req *req, int *err)
+{
+ struct aio_fork_fsync_state *state = tevent_req_data(
+ req, struct aio_fork_fsync_state);
+
+ if (tevent_req_is_unix_error(req, err)) {
return -1;
}
+ if (state->ret == -1) {
+ *err = state->err;
+ }
+ return state->ret;
+}
- return child->retval.ret_errno;
+static int aio_fork_connect(vfs_handle_struct *handle, const char *service,
+ const char *user)
+{
+ /*********************************************************************
+ * How many threads to initialize ?
+ * 100 per process seems insane as a default until you realize that
+ * (a) Threads terminate after 1 second when idle.
+ * (b) Throttling is done in SMB2 via the crediting algorithm.
+ * (c) SMB1 clients are limited to max_mux (50) outstanding
+ * requests and Windows clients don't use this anyway.
+ * Essentially we want this to be unlimited unless smb.conf
+ * says different.
+ *********************************************************************/
+ aio_pending_size = 100;
+ return SMB_VFS_NEXT_CONNECT(handle, service, user);
}
-/* VFS operations structure */
-
-static vfs_op_tuple aio_fork_ops[] = {
- {SMB_VFS_OP(aio_fork_read), SMB_VFS_OP_AIO_READ,
- SMB_VFS_LAYER_TRANSPARENT},
- {SMB_VFS_OP(aio_fork_write), SMB_VFS_OP_AIO_WRITE,
- SMB_VFS_LAYER_TRANSPARENT},
- {SMB_VFS_OP(aio_fork_return_fn), SMB_VFS_OP_AIO_RETURN,
- SMB_VFS_LAYER_TRANSPARENT},
- {SMB_VFS_OP(aio_fork_cancel), SMB_VFS_OP_AIO_CANCEL,
- SMB_VFS_LAYER_TRANSPARENT},
- {SMB_VFS_OP(aio_fork_error_fn), SMB_VFS_OP_AIO_ERROR,
- SMB_VFS_LAYER_TRANSPARENT},
- {SMB_VFS_OP(NULL), SMB_VFS_OP_NOOP,
- SMB_VFS_LAYER_NOOP}
+static struct vfs_fn_pointers vfs_aio_fork_fns = {
+ .connect_fn = aio_fork_connect,
+ .pread_send_fn = aio_fork_pread_send,
+ .pread_recv_fn = aio_fork_pread_recv,
+ .pwrite_send_fn = aio_fork_pwrite_send,
+ .pwrite_recv_fn = aio_fork_pwrite_recv,
+ .fsync_send_fn = aio_fork_fsync_send,
+ .fsync_recv_fn = aio_fork_fsync_recv,
};
NTSTATUS vfs_aio_fork_init(void);
NTSTATUS vfs_aio_fork_init(void)
{
return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
- "aio_fork", aio_fork_ops);
+ "aio_fork", &vfs_aio_fork_fns);
}