2 * Simulate Posix AIO using Linux kernel AIO.
4 * Copyright (C) Jeremy Allison 2012
5 * Copyright (C) Volker Lendecke 2012
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "system/filesys.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "lib/util/tevent_unix.h"
27 #include "lib/util/sys_rw.h"
28 #include <sys/eventfd.h>
30 #include "smbprofile.h"
32 static int event_fd = -1;
33 static io_context_t io_ctx;
34 static struct tevent_fd *aio_read_event;
36 static unsigned num_busy;
38 static void aio_linux_done(struct tevent_context *event_ctx,
39 struct tevent_fd *event,
40 uint16_t flags, void *private_data);
42 /************************************************************************
43 Housekeeping. Cleanup if no activity for 30 seconds.
44 ***********************************************************************/
46 static void aio_linux_housekeeping(struct tevent_context *event_ctx,
47 struct tevent_timer *te,
51 /* Remove this timed event handler. */
54 if ((num_busy != 0) || used) {
57 /* Still busy. Look again in 30 seconds. */
58 (void)tevent_add_timer(event_ctx,
60 timeval_current_ofs(30, 0),
61 aio_linux_housekeeping,
66 /* No activity for 30 seconds. Close out kernel resources. */
67 io_queue_release(io_ctx);
68 memset(&io_ctx, '\0', sizeof(io_ctx));
75 TALLOC_FREE(aio_read_event);
78 /************************************************************************
79 Ensure event fd and aio context are initialized.
80 ***********************************************************************/
82 static bool init_aio_linux(struct vfs_handle_struct *handle)
84 struct tevent_timer *te = NULL;
87 /* Already initialized. */
91 /* Schedule a shutdown event for 30 seconds from now. */
92 te = tevent_add_timer(handle->conn->sconn->ev_ctx,
94 timeval_current_ofs(30, 0),
95 aio_linux_housekeeping,
102 event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
103 if (event_fd == -1) {
107 aio_read_event = tevent_add_fd(server_event_context(),
113 if (aio_read_event == NULL) {
117 if (io_queue_init(lp_aio_max_threads(), &io_ctx)) {
121 DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
122 (int)lp_aio_max_threads()));
128 DEBUG(10,("init_aio_linux: initialization failed\n"));
131 TALLOC_FREE(aio_read_event);
132 if (event_fd != -1) {
136 memset(&io_ctx, '\0', sizeof(io_ctx));
140 struct aio_linux_state {
141 struct iocb event_iocb;
143 struct vfs_aio_state vfs_aio_state;
144 struct timespec start;
147 static struct tevent_req *aio_linux_pread_send(
148 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
149 struct tevent_context *ev, struct files_struct *fsp,
150 void *data, size_t n, off_t offset)
152 struct tevent_req *req;
153 struct aio_linux_state *state;
157 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
161 if (!init_aio_linux(handle)) {
162 tevent_req_error(req, EIO);
163 return tevent_req_post(req, ev);
166 io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset);
167 io_set_eventfd(&state->event_iocb, event_fd);
168 state->event_iocb.data = req;
170 piocb = &state->event_iocb;
172 PROFILE_TIMESTAMP(&state->start);
173 ret = io_submit(io_ctx, 1, &piocb);
175 tevent_req_error(req, -ret);
176 return tevent_req_post(req, ev);
183 static struct tevent_req *aio_linux_pwrite_send(
184 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
185 struct tevent_context *ev, struct files_struct *fsp,
186 const void *data, size_t n, off_t offset)
188 struct tevent_req *req;
189 struct aio_linux_state *state;
193 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
197 if (!init_aio_linux(handle)) {
198 tevent_req_error(req, EIO);
199 return tevent_req_post(req, ev);
202 io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data),
204 io_set_eventfd(&state->event_iocb, event_fd);
205 state->event_iocb.data = req;
207 piocb = &state->event_iocb;
209 PROFILE_TIMESTAMP(&state->start);
210 ret = io_submit(io_ctx, 1, &piocb);
212 tevent_req_error(req, -ret);
213 return tevent_req_post(req, ev);
220 static struct tevent_req *aio_linux_fsync_send(
221 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
222 struct tevent_context *ev, struct files_struct *fsp)
224 struct tevent_req *req;
225 struct aio_linux_state *state;
229 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
233 if (!init_aio_linux(handle)) {
234 tevent_req_error(req, EIO);
235 return tevent_req_post(req, ev);
238 io_prep_fsync(&state->event_iocb, fsp->fh->fd);
239 io_set_eventfd(&state->event_iocb, event_fd);
240 state->event_iocb.data = req;
242 piocb = &state->event_iocb;
244 PROFILE_TIMESTAMP(&state->start);
245 ret = io_submit(io_ctx, 1, &piocb);
247 tevent_req_error(req, -ret);
248 return tevent_req_post(req, ev);
255 static void aio_linux_done(struct tevent_context *event_ctx,
256 struct tevent_fd *event,
257 uint16_t flags, void *private_data)
259 uint64_t num_events = 0;
262 DEBUG(10, ("aio_linux_done called with flags=%d\n",
265 PROFILE_TIMESTAMP(&end);
267 /* Read the number of events available. */
268 if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
269 sizeof(num_events)) {
270 smb_panic("aio_linux_handle_completion: invalid read");
273 while (num_events > 0) {
274 struct timespec ts = { 0, };
275 struct io_event finished;
276 struct tevent_req *req;
277 struct aio_linux_state *state;
280 ret = io_getevents(io_ctx, 1, 1, &finished, &ts);
282 DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
287 DEBUG(10, ("aio_linux_done: io_getvents returned "
294 req = talloc_get_type_abort(finished.data,
296 state = tevent_req_data(req, struct aio_linux_state);
298 if (finished.res < 0) {
300 state->vfs_aio_state.error = -finished.res;
302 state->ret = finished.res;
304 state->vfs_aio_state.duration = nsec_time_diff(&end, &state->start);
305 tevent_req_done(req);
310 static ssize_t aio_linux_recv(struct tevent_req *req,
311 struct vfs_aio_state *vfs_aio_state)
313 struct aio_linux_state *state = tevent_req_data(
314 req, struct aio_linux_state);
316 if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
319 *vfs_aio_state = state->vfs_aio_state;
323 static int aio_linux_int_recv(struct tevent_req *req,
324 struct vfs_aio_state *vfs_aio_state)
327 * Use implicit conversion ssize_t->int
329 return aio_linux_recv(req, vfs_aio_state);
332 static struct vfs_fn_pointers vfs_aio_linux_fns = {
333 .pread_send_fn = aio_linux_pread_send,
334 .pread_recv_fn = aio_linux_recv,
335 .pwrite_send_fn = aio_linux_pwrite_send,
336 .pwrite_recv_fn = aio_linux_recv,
337 .fsync_send_fn = aio_linux_fsync_send,
338 .fsync_recv_fn = aio_linux_int_recv,
342 NTSTATUS vfs_aio_linux_init(void)
344 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
345 "aio_linux", &vfs_aio_linux_fns);