2 Unix SMB/Netbios implementation.
4 async_io read handling using POSIX async io.
5 Copyright (C) Jeremy Allison 2005.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "smbd/globals.h"
26 /* The signal we'll use to signify aio done. */
28 #define RT_SIGNAL_AIO (SIGRTMIN+3)
31 #ifndef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIVAL_PTR
32 #ifdef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIGVAL_PTR
33 #define sival_int sigval_int
34 #define sival_ptr sigval_ptr
38 /****************************************************************************
39 The buffer we keep around whilst an aio request is in process.
40 *****************************************************************************/
43 struct aio_extra *next, *prev;
46 struct smb_request *req;
48 int (*handle_completion)(struct aio_extra *ex, int errcode);
51 static int handle_aio_read_complete(struct aio_extra *aio_ex, int errcode);
52 static int handle_aio_write_complete(struct aio_extra *aio_ex, int errcode);
54 static int aio_extra_destructor(struct aio_extra *aio_ex)
56 DLIST_REMOVE(aio_list_head, aio_ex);
60 /****************************************************************************
61 Create the extended aio struct we must keep around for the lifetime
63 *****************************************************************************/
65 static struct aio_extra *create_aio_extra(files_struct *fsp, size_t buflen)
67 struct aio_extra *aio_ex = TALLOC_ZERO_P(NULL, struct aio_extra);
73 /* The output buffer stored in the aio_ex is the start of
74 the smb return buffer. The buffer used in the acb
75 is the start of the reply data portion of that buffer. */
77 aio_ex->outbuf = TALLOC_ARRAY(aio_ex, char, buflen);
78 if (!aio_ex->outbuf) {
82 DLIST_ADD(aio_list_head, aio_ex);
83 talloc_set_destructor(aio_ex, aio_extra_destructor);
88 /****************************************************************************
89 Given the mid find the extended aio struct containing it.
90 *****************************************************************************/
92 static struct aio_extra *find_aio_ex(uint16 mid)
96 for( p = aio_list_head; p; p = p->next) {
97 if (mid == p->req->mid) {
104 /****************************************************************************
105 We can have these many aio buffers in flight.
106 *****************************************************************************/
108 /****************************************************************************
109 Set up an aio request from a SMBreadX call.
110 *****************************************************************************/
112 bool schedule_aio_read_and_X(connection_struct *conn,
113 struct smb_request *req,
114 files_struct *fsp, SMB_OFF_T startpos,
117 struct aio_extra *aio_ex;
120 size_t min_aio_read_size = lp_aio_read_size(SNUM(conn));
123 if (fsp->base_fsp != NULL) {
124 /* No AIO on streams yet */
125 DEBUG(10, ("AIO on streams not yet supported\n"));
129 if ((!min_aio_read_size || (smb_maxcnt < min_aio_read_size))
130 && !SMB_VFS_AIO_FORCE(fsp)) {
131 /* Too small a read for aio request. */
132 DEBUG(10,("schedule_aio_read_and_X: read size (%u) too small "
133 "for minimum aio_read of %u\n",
134 (unsigned int)smb_maxcnt,
135 (unsigned int)min_aio_read_size ));
139 /* Only do this on non-chained and non-chaining reads not using the
141 if (req_is_in_chain(req) || (lp_write_cache_size(SNUM(conn)) != 0)) {
145 if (outstanding_aio_calls >= aio_pending_size) {
146 DEBUG(10,("schedule_aio_read_and_X: Already have %d aio "
147 "activities outstanding.\n",
148 outstanding_aio_calls ));
152 /* The following is safe from integer wrap as we've already checked
153 smb_maxcnt is 128k or less. Wct is 12 for read replies */
155 bufsize = smb_size + 12 * 2 + smb_maxcnt;
157 if ((aio_ex = create_aio_extra(fsp, bufsize)) == NULL) {
158 DEBUG(10,("schedule_aio_read_and_X: malloc fail.\n"));
161 aio_ex->handle_completion = handle_aio_read_complete;
163 construct_reply_common_req(req, aio_ex->outbuf);
164 srv_set_message(aio_ex->outbuf, 12, 0, True);
165 SCVAL(aio_ex->outbuf,smb_vwv0,0xFF); /* Never a chained reply. */
169 /* Now set up the aio record for the read call. */
171 a->aio_fildes = fsp->fh->fd;
172 a->aio_buf = smb_buf(aio_ex->outbuf);
173 a->aio_nbytes = smb_maxcnt;
174 a->aio_offset = startpos;
175 a->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
176 a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
177 a->aio_sigevent.sigev_value.sival_int = req->mid;
179 ret = SMB_VFS_AIO_READ(fsp, a);
181 DEBUG(0,("schedule_aio_read_and_X: aio_read failed. "
182 "Error %s\n", strerror(errno) ));
187 outstanding_aio_calls++;
188 aio_ex->req = talloc_move(aio_ex, &req);
190 DEBUG(10,("schedule_aio_read_and_X: scheduled aio_read for file %s, "
191 "offset %.0f, len = %u (mid = %u)\n",
192 fsp_str_dbg(fsp), (double)startpos, (unsigned int)smb_maxcnt,
193 (unsigned int)aio_ex->req->mid ));
198 /****************************************************************************
199 Set up an aio request from a SMBwriteX call.
200 *****************************************************************************/
202 bool schedule_aio_write_and_X(connection_struct *conn,
203 struct smb_request *req,
204 files_struct *fsp, char *data,
208 struct aio_extra *aio_ex;
211 bool write_through = BITSETW(req->vwv+7,0);
212 size_t min_aio_write_size = lp_aio_write_size(SNUM(conn));
215 if (fsp->base_fsp != NULL) {
216 /* No AIO on streams yet */
217 DEBUG(10, ("AIO on streams not yet supported\n"));
221 if ((!min_aio_write_size || (numtowrite < min_aio_write_size))
222 && !SMB_VFS_AIO_FORCE(fsp)) {
223 /* Too small a write for aio request. */
224 DEBUG(10,("schedule_aio_write_and_X: write size (%u) too "
225 "small for minimum aio_write of %u\n",
226 (unsigned int)numtowrite,
227 (unsigned int)min_aio_write_size ));
231 /* Only do this on non-chained and non-chaining reads not using the
233 if (req_is_in_chain(req) || (lp_write_cache_size(SNUM(conn)) != 0)) {
237 if (outstanding_aio_calls >= aio_pending_size) {
238 DEBUG(3,("schedule_aio_write_and_X: Already have %d aio "
239 "activities outstanding.\n",
240 outstanding_aio_calls ));
241 DEBUG(10,("schedule_aio_write_and_X: failed to schedule "
242 "aio_write for file %s, offset %.0f, len = %u "
244 fsp_str_dbg(fsp), (double)startpos,
245 (unsigned int)numtowrite,
246 (unsigned int)req->mid ));
250 bufsize = smb_size + 6*2;
252 if (!(aio_ex = create_aio_extra(fsp, bufsize))) {
253 DEBUG(0,("schedule_aio_write_and_X: malloc fail.\n"));
256 aio_ex->handle_completion = handle_aio_write_complete;
258 construct_reply_common_req(req, aio_ex->outbuf);
259 srv_set_message(aio_ex->outbuf, 6, 0, True);
260 SCVAL(aio_ex->outbuf,smb_vwv0,0xFF); /* Never a chained reply. */
264 /* Now set up the aio record for the write call. */
266 a->aio_fildes = fsp->fh->fd;
268 a->aio_nbytes = numtowrite;
269 a->aio_offset = startpos;
270 a->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
271 a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
272 a->aio_sigevent.sigev_value.sival_int = req->mid;
274 ret = SMB_VFS_AIO_WRITE(fsp, a);
276 DEBUG(3,("schedule_aio_wrote_and_X: aio_write failed. "
277 "Error %s\n", strerror(errno) ));
282 outstanding_aio_calls++;
283 aio_ex->req = talloc_move(aio_ex, &req);
285 /* This should actually be improved to span the write. */
286 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
287 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
289 if (!write_through && !lp_syncalways(SNUM(fsp->conn))
290 && fsp->aio_write_behind) {
291 /* Lie to the client and immediately claim we finished the
293 SSVAL(aio_ex->outbuf,smb_vwv2,numtowrite);
294 SSVAL(aio_ex->outbuf,smb_vwv4,(numtowrite>>16)&1);
295 show_msg(aio_ex->outbuf);
296 if (!srv_send_smb(smbd_server_fd(),aio_ex->outbuf,
297 true, aio_ex->req->seqnum+1,
298 IS_CONN_ENCRYPTED(fsp->conn),
299 &aio_ex->req->pcd)) {
300 exit_server_cleanly("handle_aio_write: srv_send_smb "
303 DEBUG(10,("schedule_aio_write_and_X: scheduled aio_write "
304 "behind for file %s\n", fsp_str_dbg(fsp)));
307 DEBUG(10,("schedule_aio_write_and_X: scheduled aio_write for file "
308 "%s, offset %.0f, len = %u (mid = %u) "
309 "outstanding_aio_calls = %d\n",
310 fsp_str_dbg(fsp), (double)startpos, (unsigned int)numtowrite,
311 (unsigned int)aio_ex->req->mid, outstanding_aio_calls ));
317 /****************************************************************************
318 Complete the read and return the data or error back to the client.
319 Returns errno or zero if all ok.
320 *****************************************************************************/
322 static int handle_aio_read_complete(struct aio_extra *aio_ex, int errcode)
326 char *outbuf = aio_ex->outbuf;
327 char *data = smb_buf(outbuf);
328 ssize_t nread = SMB_VFS_AIO_RETURN(aio_ex->fsp,&aio_ex->acb);
331 /* We're relying here on the fact that if the fd is
332 closed then the aio will complete and aio_return
333 will return an error. Hopefully this is
336 DEBUG( 3,( "handle_aio_read_complete: file %s nread == %d. "
338 fsp_str_dbg(aio_ex->fsp), nread, strerror(errcode)));
341 ERROR_NT(map_nt_error_from_unix(ret));
342 outsize = srv_set_message(outbuf,0,0,true);
344 outsize = srv_set_message(outbuf,12,nread,False);
345 SSVAL(outbuf,smb_vwv2,0xFFFF); /* Remaining - must be * -1. */
346 SSVAL(outbuf,smb_vwv5,nread);
347 SSVAL(outbuf,smb_vwv6,smb_offset(data,outbuf));
348 SSVAL(outbuf,smb_vwv7,((nread >> 16) & 1));
349 SSVAL(smb_buf(outbuf),-2,nread);
351 aio_ex->fsp->fh->pos = aio_ex->acb.aio_offset + nread;
352 aio_ex->fsp->fh->position_information = aio_ex->fsp->fh->pos;
354 DEBUG( 3, ( "handle_aio_read_complete file %s max=%d "
356 fsp_str_dbg(aio_ex->fsp),
357 (int)aio_ex->acb.aio_nbytes, (int)nread ) );
360 smb_setlen(outbuf,outsize - 4);
362 if (!srv_send_smb(smbd_server_fd(),outbuf,
363 true, aio_ex->req->seqnum+1,
364 IS_CONN_ENCRYPTED(aio_ex->fsp->conn), NULL)) {
365 exit_server_cleanly("handle_aio_read_complete: srv_send_smb "
369 DEBUG(10,("handle_aio_read_complete: scheduled aio_read completed "
370 "for file %s, offset %.0f, len = %u\n",
371 fsp_str_dbg(aio_ex->fsp), (double)aio_ex->acb.aio_offset,
372 (unsigned int)nread ));
377 /****************************************************************************
378 Complete the write and return the data or error back to the client.
379 Returns error code or zero if all ok.
380 *****************************************************************************/
382 static int handle_aio_write_complete(struct aio_extra *aio_ex, int errcode)
385 files_struct *fsp = aio_ex->fsp;
386 char *outbuf = aio_ex->outbuf;
387 ssize_t numtowrite = aio_ex->acb.aio_nbytes;
388 ssize_t nwritten = SMB_VFS_AIO_RETURN(fsp,&aio_ex->acb);
390 if (fsp->aio_write_behind) {
391 if (nwritten != numtowrite) {
392 if (nwritten == -1) {
393 DEBUG(5,("handle_aio_write_complete: "
394 "aio_write_behind failed ! File %s "
395 "is corrupt ! Error %s\n",
396 fsp_str_dbg(fsp), strerror(errcode)));
399 DEBUG(0,("handle_aio_write_complete: "
400 "aio_write_behind failed ! File %s "
401 "is corrupt ! Wanted %u bytes but "
402 "only wrote %d\n", fsp_str_dbg(fsp),
403 (unsigned int)numtowrite,
408 DEBUG(10,("handle_aio_write_complete: "
409 "aio_write_behind completed for file %s\n",
412 /* TODO: should no return 0 in case of an error !!! */
416 /* We don't need outsize or set_message here as we've already set the
417 fixed size length when we set up the aio call. */
420 DEBUG( 3,( "handle_aio_write: file %s wanted %u bytes. "
421 "nwritten == %d. Error = %s\n",
422 fsp_str_dbg(fsp), (unsigned int)numtowrite,
423 (int)nwritten, strerror(errcode) ));
426 ERROR_NT(map_nt_error_from_unix(ret));
427 srv_set_message(outbuf,0,0,true);
429 bool write_through = BITSETW(aio_ex->req->vwv+7,0);
432 SSVAL(outbuf,smb_vwv2,nwritten);
433 SSVAL(outbuf,smb_vwv4,(nwritten>>16)&1);
434 if (nwritten < (ssize_t)numtowrite) {
435 SCVAL(outbuf,smb_rcls,ERRHRD);
436 SSVAL(outbuf,smb_err,ERRdiskfull);
439 DEBUG(3,("handle_aio_write: fnum=%d num=%d wrote=%d\n",
440 fsp->fnum, (int)numtowrite, (int)nwritten));
441 status = sync_file(fsp->conn,fsp, write_through);
442 if (!NT_STATUS_IS_OK(status)) {
444 ERROR_BOTH(map_nt_error_from_unix(ret),
445 ERRHRD, ERRdiskfull);
446 srv_set_message(outbuf,0,0,true);
447 DEBUG(5,("handle_aio_write: sync_file for %s returned %s\n",
448 fsp_str_dbg(fsp), nt_errstr(status)));
451 aio_ex->fsp->fh->pos = aio_ex->acb.aio_offset + nwritten;
455 if (!srv_send_smb(smbd_server_fd(),outbuf,
456 true, aio_ex->req->seqnum+1,
457 IS_CONN_ENCRYPTED(fsp->conn),
459 exit_server_cleanly("handle_aio_write: srv_send_smb failed.");
462 DEBUG(10,("handle_aio_write_complete: scheduled aio_write completed "
463 "for file %s, offset %.0f, requested %u, written = %u\n",
464 fsp_str_dbg(fsp), (double)aio_ex->acb.aio_offset,
465 (unsigned int)numtowrite, (unsigned int)nwritten ));
470 /****************************************************************************
471 Handle any aio completion. Returns True if finished (and sets *perr if err
472 was non-zero), False if not.
473 *****************************************************************************/
475 static bool handle_aio_completed(struct aio_extra *aio_ex, int *perr)
480 DEBUG(3, ("handle_aio_completed: Non-existing aio_ex passed\n"));
484 /* Ensure the operation has really completed. */
485 err = SMB_VFS_AIO_ERROR(aio_ex->fsp, &aio_ex->acb);
486 if (err == EINPROGRESS) {
487 DEBUG(10,( "handle_aio_completed: operation mid %u still in "
488 "process for file %s\n",
489 aio_ex->req->mid, fsp_str_dbg(aio_ex->fsp)));
491 } else if (err == ECANCELED) {
492 /* If error is ECANCELED then don't return anything to the
494 DEBUG(10,( "handle_aio_completed: operation mid %u"
495 " canceled\n", aio_ex->req->mid));
499 err = aio_ex->handle_completion(aio_ex, err);
501 *perr = err; /* Only save non-zero errors. */
507 /****************************************************************************
508 Handle any aio completion inline.
509 *****************************************************************************/
511 void smbd_aio_complete_mid(unsigned int mid)
513 files_struct *fsp = NULL;
514 struct aio_extra *aio_ex = find_aio_ex(mid);
517 outstanding_aio_calls--;
519 DEBUG(10,("smbd_aio_complete_mid: mid[%u]\n", mid));
522 DEBUG(3,("smbd_aio_complete_mid: Can't find record to "
523 "match mid %u.\n", mid));
529 /* file was closed whilst I/O was outstanding. Just
531 DEBUG( 3,( "smbd_aio_complete_mid: file closed whilst "
532 "aio outstanding (mid[%u]).\n", mid));
536 if (!handle_aio_completed(aio_ex, &ret)) {
543 static void smbd_aio_signal_handler(struct tevent_context *ev_ctx,
544 struct tevent_signal *se,
545 int signum, int count,
546 void *_info, void *private_data)
548 siginfo_t *info = (siginfo_t *)_info;
549 unsigned int mid = (unsigned int)info->si_value.sival_int;
551 smbd_aio_complete_mid(mid);
554 /****************************************************************************
555 We're doing write behind and the client closed the file. Wait up to 30
556 seconds (my arbitrary choice) for the aio to complete. Return 0 if all writes
557 completed, errno to return if not.
558 *****************************************************************************/
560 #define SMB_TIME_FOR_AIO_COMPLETE_WAIT 29
562 int wait_for_aio_completion(files_struct *fsp)
564 struct aio_extra *aio_ex;
565 const SMB_STRUCT_AIOCB **aiocb_list;
566 int aio_completion_count = 0;
567 time_t start_time = time(NULL);
570 for (seconds_left = SMB_TIME_FOR_AIO_COMPLETE_WAIT;
571 seconds_left >= 0;) {
576 aio_completion_count = 0;
577 for( aio_ex = aio_list_head; aio_ex; aio_ex = aio_ex->next) {
578 if (aio_ex->fsp == fsp) {
579 aio_completion_count++;
583 if (!aio_completion_count) {
587 DEBUG(3,("wait_for_aio_completion: waiting for %d aio events "
588 "to complete.\n", aio_completion_count ));
590 aiocb_list = SMB_MALLOC_ARRAY(const SMB_STRUCT_AIOCB *,
591 aio_completion_count);
596 for( i = 0, aio_ex = aio_list_head;
598 aio_ex = aio_ex->next) {
599 if (aio_ex->fsp == fsp) {
600 aiocb_list[i++] = &aio_ex->acb;
604 /* Now wait up to seconds_left for completion. */
605 ts.tv_sec = seconds_left;
608 DEBUG(10,("wait_for_aio_completion: %d events, doing a wait "
610 aio_completion_count, seconds_left ));
612 err = SMB_VFS_AIO_SUSPEND(fsp, aiocb_list,
613 aio_completion_count, &ts);
615 DEBUG(10,("wait_for_aio_completion: returned err = %d, "
616 "errno = %s\n", err, strerror(errno) ));
618 if (err == -1 && errno == EAGAIN) {
619 DEBUG(0,("wait_for_aio_completion: aio_suspend timed "
620 "out waiting for %d events after a wait of "
621 "%d seconds\n", aio_completion_count,
624 cancel_aio_by_fsp(fsp);
625 SAFE_FREE(aiocb_list);
629 /* One or more events might have completed - process them if
631 for( i = 0; i < aio_completion_count; i++) {
632 uint16 mid = aiocb_list[i]->aio_sigevent.sigev_value.sival_int;
634 aio_ex = find_aio_ex(mid);
637 DEBUG(0, ("wait_for_aio_completion: mid %u "
638 "doesn't match an aio record\n",
639 (unsigned int)mid ));
643 if (!handle_aio_completed(aio_ex, &err)) {
649 SAFE_FREE(aiocb_list);
650 seconds_left = SMB_TIME_FOR_AIO_COMPLETE_WAIT
651 - (time(NULL) - start_time);
654 /* We timed out - we don't know why. Return ret if already an error,
656 DEBUG(10,("wait_for_aio_completion: aio_suspend timed out waiting "
658 aio_completion_count));
663 /****************************************************************************
664 Cancel any outstanding aio requests. The client doesn't care about the reply.
665 *****************************************************************************/
667 void cancel_aio_by_fsp(files_struct *fsp)
669 struct aio_extra *aio_ex;
671 for( aio_ex = aio_list_head; aio_ex; aio_ex = aio_ex->next) {
672 if (aio_ex->fsp == fsp) {
673 /* Don't delete the aio_extra record as we may have
674 completed and don't yet know it. Just do the
675 aio_cancel call and return. */
676 SMB_VFS_AIO_CANCEL(fsp, &aio_ex->acb);
677 aio_ex->fsp = NULL; /* fsp will be closed when we
683 /****************************************************************************
684 Initialize the signal handler for aio read/write.
685 *****************************************************************************/
687 void initialize_async_io_handler(void)
689 aio_signal_event = tevent_add_signal(smbd_event_context(),
690 smbd_event_context(),
691 RT_SIGNAL_AIO, SA_SIGINFO,
692 smbd_aio_signal_handler,
694 if (!aio_signal_event) {
695 exit_server("Failed to setup RT_SIGNAL_AIO handler");
698 /* tevent supports 100 signal with SA_SIGINFO */
699 aio_pending_size = 100;
703 void initialize_async_io_handler(void)
707 bool schedule_aio_read_and_X(connection_struct *conn,
708 struct smb_request *req,
709 files_struct *fsp, SMB_OFF_T startpos,
715 bool schedule_aio_write_and_X(connection_struct *conn,
716 struct smb_request *req,
717 files_struct *fsp, char *data,
724 void cancel_aio_by_fsp(files_struct *fsp)
728 int wait_for_aio_completion(files_struct *fsp)
733 void smbd_aio_complete_mid(unsigned int mid);