Revert "smbd: explain that/why we use the raw tevent_context for lease_timeout_handler()"
[samba.git] / source3 / modules / vfs_aio_pthread.c
1 /*
2  * Simulate Posix AIO using pthreads.
3  *
4  * Based on the aio_fork work from Volker and Volker's pthreadpool library.
5  *
6  * Copyright (C) Volker Lendecke 2008
7  * Copyright (C) Jeremy Allison 2012
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "smbd/globals.h"
29 #include "../lib/pthreadpool/pthreadpool_tevent.h"
30 #ifdef HAVE_LINUX_FALLOC_H
31 #include <linux/falloc.h>
32 #endif
33
34 #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
35
36 /*
37  * We must have openat() to do any thread-based
38  * asynchronous opens. We also must be using
39  * thread-specific credentials (Linux-only
40  * for now).
41  */
42
43 struct aio_open_private_data {
44         struct aio_open_private_data *prev, *next;
45         /* Inputs. */
46         int dir_fd;
47         int flags;
48         mode_t mode;
49         uint64_t mid;
50         bool in_progress;
51         const char *fname;
52         char *dname;
53         struct smbd_server_connection *sconn;
54         const struct security_unix_token *ux_tok;
55         uint64_t initial_allocation_size;
56         /* Returns. */
57         int ret_fd;
58         int ret_errno;
59 };
60
61 /* List of outstanding requests we have. */
62 static struct aio_open_private_data *open_pd_list;
63
64 /************************************************************************
65  Find the open private data by mid.
66 ***********************************************************************/
67
68 static struct aio_open_private_data *find_open_private_data_by_mid(uint64_t mid)
69 {
70         struct aio_open_private_data *opd;
71
72         for (opd = open_pd_list; opd != NULL; opd = opd->next) {
73                 if (opd->mid == mid) {
74                         return opd;
75                 }
76         }
77
78         return NULL;
79 }
80
81 /************************************************************************
82  Callback when an open completes.
83 ***********************************************************************/
84
85 static void aio_open_handle_completion(struct tevent_req *subreq)
86 {
87         struct aio_open_private_data *opd =
88                 tevent_req_callback_data(subreq,
89                 struct aio_open_private_data);
90         int ret;
91         struct smbXsrv_connection *xconn;
92
93         ret = pthreadpool_tevent_job_recv(subreq);
94         TALLOC_FREE(subreq);
95         if (ret != 0) {
96                 smb_panic("aio_open_handle_completion");
97                 /* notreached. */
98                 return;
99         }
100
101         DEBUG(10,("aio_open_handle_completion: mid %llu "
102                 "for file %s/%s completed\n",
103                 (unsigned long long)opd->mid,
104                 opd->dname,
105                 opd->fname));
106
107         opd->in_progress = false;
108
109         /*
110          * TODO: In future we need a proper algorithm
111          * to find the correct connection for a fsp.
112          * For now we only have one connection, so this is correct...
113          */
114         xconn = opd->sconn->client->connections;
115
116         /* Find outstanding event and reschedule. */
117         if (!schedule_deferred_open_message_smb(xconn, opd->mid)) {
118                 /*
119                  * Outstanding event didn't exist or was
120                  * cancelled. Free up the fd and throw
121                  * away the result.
122                  */
123                 if (opd->ret_fd != -1) {
124                         close(opd->ret_fd);
125                         opd->ret_fd = -1;
126                 }
127                 TALLOC_FREE(opd);
128         }
129 }
130
131 /*****************************************************************
132  The core of the async open code - the worker function. Note we
133  use the new openat() system call to avoid any problems with
134  current working directory changes plus we change credentials
135  on the thread to prevent any security race conditions.
136 *****************************************************************/
137
138 static void aio_open_worker(void *private_data)
139 {
140         struct aio_open_private_data *opd =
141                 (struct aio_open_private_data *)private_data;
142
143         /* Become the correct credential on this thread. */
144         if (set_thread_credentials(opd->ux_tok->uid,
145                                 opd->ux_tok->gid,
146                                 (size_t)opd->ux_tok->ngroups,
147                                 opd->ux_tok->groups) != 0) {
148                 opd->ret_fd = -1;
149                 opd->ret_errno = errno;
150                 return;
151         }
152
153         opd->ret_fd = openat(opd->dir_fd,
154                         opd->fname,
155                         opd->flags,
156                         opd->mode);
157
158         if (opd->ret_fd == -1) {
159                 opd->ret_errno = errno;
160         } else {
161                 /* Create was successful. */
162                 opd->ret_errno = 0;
163
164 #if defined(HAVE_LINUX_FALLOCATE)
165                 /*
166                  * See if we can set the initial
167                  * allocation size. We don't record
168                  * the return for this as it's an
169                  * optimization - the upper layer
170                  * will also do this for us once
171                  * the open returns.
172                  */
173                 if (opd->initial_allocation_size) {
174                         (void)fallocate(opd->ret_fd,
175                                         FALLOC_FL_KEEP_SIZE,
176                                         0,
177                                         (off_t)opd->initial_allocation_size);
178                 }
179 #endif
180         }
181 }
182
183 /************************************************************************
184  Open private data destructor.
185 ***********************************************************************/
186
187 static int opd_destructor(struct aio_open_private_data *opd)
188 {
189         if (opd->dir_fd != -1) {
190                 close(opd->dir_fd);
191         }
192         DLIST_REMOVE(open_pd_list, opd);
193         return 0;
194 }
195
196 /************************************************************************
197  Create and initialize a private data struct for async open.
198 ***********************************************************************/
199
200 static struct aio_open_private_data *create_private_open_data(const files_struct *fsp,
201                                         int flags,
202                                         mode_t mode)
203 {
204         struct aio_open_private_data *opd = talloc_zero(NULL,
205                                         struct aio_open_private_data);
206         const char *fname = NULL;
207
208         if (!opd) {
209                 return NULL;
210         }
211
212         opd->dir_fd = -1;
213         opd->ret_fd = -1;
214         opd->ret_errno = EINPROGRESS;
215         opd->flags = flags;
216         opd->mode = mode;
217         opd->mid = fsp->mid;
218         opd->in_progress = true;
219         opd->sconn = fsp->conn->sconn;
220         opd->initial_allocation_size = fsp->initial_allocation_size;
221
222         /* Copy our current credentials. */
223         opd->ux_tok = copy_unix_token(opd, get_current_utok(fsp->conn));
224         if (opd->ux_tok == NULL) {
225                 TALLOC_FREE(opd);
226                 return NULL;
227         }
228
229         /*
230          * Copy the parent directory name and the
231          * relative path within it.
232          */
233         if (parent_dirname(opd,
234                         fsp->fsp_name->base_name,
235                         &opd->dname,
236                         &fname) == false) {
237                 TALLOC_FREE(opd);
238                 return NULL;
239         }
240         opd->fname = talloc_strdup(opd, fname);
241         if (opd->fname == NULL) {
242                 TALLOC_FREE(opd);
243                 return NULL;
244         }
245
246 #if defined(O_DIRECTORY)
247         opd->dir_fd = open(opd->dname, O_RDONLY|O_DIRECTORY);
248 #else
249         opd->dir_fd = open(opd->dname, O_RDONLY);
250 #endif
251         if (opd->dir_fd == -1) {
252                 TALLOC_FREE(opd);
253                 return NULL;
254         }
255
256         talloc_set_destructor(opd, opd_destructor);
257         DLIST_ADD_END(open_pd_list, opd);
258         return opd;
259 }
260
261 /*****************************************************************
262  Setup an async open.
263 *****************************************************************/
264
265 static int open_async(const files_struct *fsp,
266                         int flags,
267                         mode_t mode)
268 {
269         struct aio_open_private_data *opd = NULL;
270         struct tevent_req *subreq = NULL;
271
272         opd = create_private_open_data(fsp, flags, mode);
273         if (opd == NULL) {
274                 DEBUG(10, ("open_async: Could not create private data.\n"));
275                 return -1;
276         }
277
278         subreq = pthreadpool_tevent_job_send(opd,
279                                              fsp->conn->sconn->ev_ctx,
280                                              fsp->conn->sconn->pool,
281                                              aio_open_worker, opd);
282         if (subreq == NULL) {
283                 return -1;
284         }
285         tevent_req_set_callback(subreq, aio_open_handle_completion, opd);
286
287         DEBUG(5,("open_async: mid %llu created for file %s/%s\n",
288                 (unsigned long long)opd->mid,
289                 opd->dname,
290                 opd->fname));
291
292         /* Cause the calling code to reschedule us. */
293         errno = EINTR; /* Maps to NT_STATUS_RETRY. */
294         return -1;
295 }
296
297 /*****************************************************************
298  Look for a matching SMB2 mid. If we find it we're rescheduled,
299  just return the completed open.
300 *****************************************************************/
301
302 static bool find_completed_open(files_struct *fsp,
303                                 int *p_fd,
304                                 int *p_errno)
305 {
306         struct aio_open_private_data *opd;
307
308         opd = find_open_private_data_by_mid(fsp->mid);
309         if (!opd) {
310                 return false;
311         }
312
313         if (opd->in_progress) {
314                 DEBUG(0,("find_completed_open: mid %llu "
315                         "still in progress for "
316                         "file %s/%s. PANIC !\n",
317                         (unsigned long long)opd->mid,
318                         opd->dname,
319                         opd->fname));
320                 /* Disaster ! This is an open timeout. Just panic. */
321                 smb_panic("find_completed_open - in_progress\n");
322                 /* notreached. */
323                 return false;
324         }
325
326         *p_fd = opd->ret_fd;
327         *p_errno = opd->ret_errno;
328
329         DEBUG(5,("find_completed_open: mid %llu returning "
330                 "fd = %d, errno = %d (%s) "
331                 "for file %s\n",
332                 (unsigned long long)opd->mid,
333                 opd->ret_fd,
334                 opd->ret_errno,
335                 strerror(opd->ret_errno),
336                 smb_fname_str_dbg(fsp->fsp_name)));
337
338         /* Now we can free the opd. */
339         TALLOC_FREE(opd);
340         return true;
341 }
342
343 /*****************************************************************
344  The core open function. Only go async on O_CREAT|O_EXCL
345  opens to prevent any race conditions.
346 *****************************************************************/
347
348 static int aio_pthread_open_fn(vfs_handle_struct *handle,
349                         struct smb_filename *smb_fname,
350                         files_struct *fsp,
351                         int flags,
352                         mode_t mode)
353 {
354         int my_errno = 0;
355         int fd = -1;
356         bool aio_allow_open = lp_parm_bool(
357                 SNUM(handle->conn), "aio_pthread", "aio open", false);
358
359         if (smb_fname->stream_name) {
360                 /* Don't handle stream opens. */
361                 errno = ENOENT;
362                 return -1;
363         }
364
365         if (!aio_allow_open) {
366                 /* aio opens turned off. */
367                 return open(smb_fname->base_name, flags, mode);
368         }
369
370         if (!(flags & O_CREAT)) {
371                 /* Only creates matter. */
372                 return open(smb_fname->base_name, flags, mode);
373         }
374
375         if (!(flags & O_EXCL)) {
376                 /* Only creates with O_EXCL matter. */
377                 return open(smb_fname->base_name, flags, mode);
378         }
379
380         /*
381          * See if this is a reentrant call - i.e. is this a
382          * restart of an existing open that just completed.
383          */
384
385         if (find_completed_open(fsp,
386                                 &fd,
387                                 &my_errno)) {
388                 errno = my_errno;
389                 return fd;
390         }
391
392         /* Ok, it's a create exclusive call - pass it to a thread helper. */
393         return open_async(fsp, flags, mode);
394 }
395 #endif
396
397 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
398 #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
399         .open_fn = aio_pthread_open_fn,
400 #endif
401 };
402
403 static_decl_vfs;
404 NTSTATUS vfs_aio_pthread_init(TALLOC_CTX *ctx)
405 {
406         return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
407                                 "aio_pthread", &vfs_aio_pthread_fns);
408 }