2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "smbd/globals.h"
29 #include "../lib/pthreadpool/pthreadpool_tevent.h"
30 #ifdef HAVE_LINUX_FALLOC_H
31 #include <linux/falloc.h>
34 #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
37 * We must have openat() to do any thread-based
38 * asynchronous opens. We also must be using
39 * thread-specific credentials (Linux-only
43 struct aio_open_private_data {
44 struct aio_open_private_data *prev, *next;
53 connection_struct *conn;
54 const struct security_unix_token *ux_tok;
55 uint64_t initial_allocation_size;
61 /* List of outstanding requests we have. */
62 static struct aio_open_private_data *open_pd_list;
64 static void aio_open_do(struct aio_open_private_data *opd);
66 /************************************************************************
67 Find the open private data by mid.
68 ***********************************************************************/
70 static struct aio_open_private_data *find_open_private_data_by_mid(uint64_t mid)
72 struct aio_open_private_data *opd;
74 for (opd = open_pd_list; opd != NULL; opd = opd->next) {
75 if (opd->mid == mid) {
83 /************************************************************************
84 Callback when an open completes.
85 ***********************************************************************/
87 static void aio_open_handle_completion(struct tevent_req *subreq)
89 struct aio_open_private_data *opd =
90 tevent_req_callback_data(subreq,
91 struct aio_open_private_data);
93 struct smbXsrv_connection *xconn;
95 ret = pthreadpool_tevent_job_recv(subreq);
101 smb_panic("aio_open_handle_completion");
106 * Make sure we run as the user again
108 ok = change_to_user(opd->conn, opd->conn->vuid);
110 smb_panic("Can't change to user");
114 * If we get EAGAIN from pthreadpool_tevent_job_recv() this
115 * means the lower level pthreadpool failed to create a new
116 * thread. Fallback to sync processing in that case to allow
117 * some progress for the client.
122 DEBUG(10,("aio_open_handle_completion: mid %llu "
123 "for file %s/%s completed\n",
124 (unsigned long long)opd->mid,
128 opd->in_progress = false;
131 * TODO: In future we need a proper algorithm
132 * to find the correct connection for a fsp.
133 * For now we only have one connection, so this is correct...
135 xconn = opd->conn->sconn->client->connections;
137 /* Find outstanding event and reschedule. */
138 if (!schedule_deferred_open_message_smb(xconn, opd->mid)) {
140 * Outstanding event didn't exist or was
141 * cancelled. Free up the fd and throw
144 if (opd->ret_fd != -1) {
152 /*****************************************************************
153 The core of the async open code - the worker function. Note we
154 use the new openat() system call to avoid any problems with
155 current working directory changes plus we change credentials
156 on the thread to prevent any security race conditions.
157 *****************************************************************/
159 static void aio_open_worker(void *private_data)
161 struct aio_open_private_data *opd =
162 (struct aio_open_private_data *)private_data;
164 /* Become the correct credential on this thread. */
165 if (set_thread_credentials(opd->ux_tok->uid,
167 (size_t)opd->ux_tok->ngroups,
168 opd->ux_tok->groups) != 0) {
170 opd->ret_errno = errno;
177 static void aio_open_do(struct aio_open_private_data *opd)
179 opd->ret_fd = openat(opd->dir_fd,
184 if (opd->ret_fd == -1) {
185 opd->ret_errno = errno;
187 /* Create was successful. */
190 #if defined(HAVE_LINUX_FALLOCATE)
192 * See if we can set the initial
193 * allocation size. We don't record
194 * the return for this as it's an
195 * optimization - the upper layer
196 * will also do this for us once
199 if (opd->initial_allocation_size) {
200 (void)fallocate(opd->ret_fd,
203 (off_t)opd->initial_allocation_size);
209 /************************************************************************
210 Open private data destructor.
211 ***********************************************************************/
213 static int opd_destructor(struct aio_open_private_data *opd)
215 if (opd->dir_fd != -1) {
218 DLIST_REMOVE(open_pd_list, opd);
222 /************************************************************************
223 Create and initialize a private data struct for async open.
224 ***********************************************************************/
226 static struct aio_open_private_data *create_private_open_data(const files_struct *fsp,
230 struct aio_open_private_data *opd = talloc_zero(NULL,
231 struct aio_open_private_data);
232 const char *fname = NULL;
238 *opd = (struct aio_open_private_data) {
241 .ret_errno = EINPROGRESS,
247 .initial_allocation_size = fsp->initial_allocation_size,
250 /* Copy our current credentials. */
251 opd->ux_tok = copy_unix_token(opd, get_current_utok(fsp->conn));
252 if (opd->ux_tok == NULL) {
258 * Copy the parent directory name and the
259 * relative path within it.
261 if (parent_dirname(opd,
262 fsp->fsp_name->base_name,
268 opd->fname = talloc_strdup(opd, fname);
269 if (opd->fname == NULL) {
274 #if defined(O_DIRECTORY)
275 opd->dir_fd = open(opd->dname, O_RDONLY|O_DIRECTORY);
277 opd->dir_fd = open(opd->dname, O_RDONLY);
279 if (opd->dir_fd == -1) {
284 talloc_set_destructor(opd, opd_destructor);
285 DLIST_ADD_END(open_pd_list, opd);
289 /*****************************************************************
291 *****************************************************************/
293 static int open_async(const files_struct *fsp,
297 struct aio_open_private_data *opd = NULL;
298 struct tevent_req *subreq = NULL;
300 opd = create_private_open_data(fsp, flags, mode);
302 DEBUG(10, ("open_async: Could not create private data.\n"));
306 subreq = pthreadpool_tevent_job_send(opd,
307 fsp->conn->sconn->ev_ctx,
308 fsp->conn->sconn->pool,
309 aio_open_worker, opd);
310 if (subreq == NULL) {
313 tevent_req_set_callback(subreq, aio_open_handle_completion, opd);
315 DEBUG(5,("open_async: mid %llu created for file %s/%s\n",
316 (unsigned long long)opd->mid,
320 /* Cause the calling code to reschedule us. */
321 errno = EINTR; /* Maps to NT_STATUS_RETRY. */
325 /*****************************************************************
326 Look for a matching SMB2 mid. If we find it we're rescheduled,
327 just return the completed open.
328 *****************************************************************/
330 static bool find_completed_open(files_struct *fsp,
334 struct aio_open_private_data *opd;
336 opd = find_open_private_data_by_mid(fsp->mid);
341 if (opd->in_progress) {
342 DEBUG(0,("find_completed_open: mid %llu "
343 "still in progress for "
344 "file %s/%s. PANIC !\n",
345 (unsigned long long)opd->mid,
348 /* Disaster ! This is an open timeout. Just panic. */
349 smb_panic("find_completed_open - in_progress\n");
355 *p_errno = opd->ret_errno;
357 DEBUG(5,("find_completed_open: mid %llu returning "
358 "fd = %d, errno = %d (%s) "
360 (unsigned long long)opd->mid,
363 strerror(opd->ret_errno),
364 smb_fname_str_dbg(fsp->fsp_name)));
366 /* Now we can free the opd. */
371 /*****************************************************************
372 The core open function. Only go async on O_CREAT|O_EXCL
373 opens to prevent any race conditions.
374 *****************************************************************/
376 static int aio_pthread_open_fn(vfs_handle_struct *handle,
377 struct smb_filename *smb_fname,
384 bool aio_allow_open = lp_parm_bool(
385 SNUM(handle->conn), "aio_pthread", "aio open", false);
387 if (smb_fname->stream_name) {
388 /* Don't handle stream opens. */
393 if (!aio_allow_open) {
394 /* aio opens turned off. */
395 return open(smb_fname->base_name, flags, mode);
398 if (!(flags & O_CREAT)) {
399 /* Only creates matter. */
400 return open(smb_fname->base_name, flags, mode);
403 if (!(flags & O_EXCL)) {
404 /* Only creates with O_EXCL matter. */
405 return open(smb_fname->base_name, flags, mode);
409 * See if this is a reentrant call - i.e. is this a
410 * restart of an existing open that just completed.
413 if (find_completed_open(fsp,
420 /* Ok, it's a create exclusive call - pass it to a thread helper. */
421 return open_async(fsp, flags, mode);
425 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
426 #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
427 .open_fn = aio_pthread_open_fn,
432 NTSTATUS vfs_aio_pthread_init(TALLOC_CTX *ctx)
434 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
435 "aio_pthread", &vfs_aio_pthread_fns);