2 Unix SMB/CIFS implementation.
3 main select loop and event handling
4 Copyright (C) Andrew Tridgell 2003-2005
5 Copyright (C) Stefan Metzmacher 2005
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 This is SAMBA's default event loop code
25 - we try to use epoll if configure detected support for it
26 otherwise we use select()
27 - if epoll is broken on the system or the kernel doesn't support it
28 at runtime we fallback to select()
32 #include "system/filesys.h"
33 #include "system/select.h" /* needed for WITH_EPOLL */
34 #include "lib/util/dlinklist.h"
35 #include "lib/events/events.h"
36 #include "lib/events/events_internal.h"
38 struct std_event_context {
39 /* a pointer back to the generic event_context */
40 struct event_context *ev;
42 /* list of filedescriptor events */
43 struct fd_event *fd_events;
45 /* list of timed events */
46 struct timed_event *timed_events;
48 /* the maximum file descriptor number in fd_events */
51 /* information for exiting from the event loop */
54 /* this is changed by the destructors for the fd event
55 type. It is used to detect event destruction by event
56 handlers, which means the code that is calling the event
57 handler needs to assume that the linked list is no longer
60 uint32_t destruction_count;
62 /* when using epoll this is the handle from epoll_create */
66 static void std_event_loop_timer(struct std_event_context *std_ev);
68 /* use epoll if it is available */
71 called when a epoll call fails, and we should fallback
74 static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason)
76 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
77 close(std_ev->epoll_fd);
78 std_ev->epoll_fd = -1;
79 talloc_set_destructor(std_ev, NULL);
83 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
85 static uint32_t epoll_map_flags(uint16_t flags)
88 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
89 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
96 static int epoll_ctx_destructor(struct std_event_context *std_ev)
98 close(std_ev->epoll_fd);
99 std_ev->epoll_fd = -1;
106 static void epoll_init_ctx(struct std_event_context *std_ev, BOOL try_epoll)
108 if (!try_epoll) return;
109 std_ev->epoll_fd = epoll_create(64);
110 talloc_set_destructor(std_ev, epoll_ctx_destructor);
113 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
114 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
115 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
118 add the epoll event to the given fd_event
120 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde)
122 struct epoll_event event;
123 if (std_ev->epoll_fd == -1) return;
125 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
127 /* if we don't want events yet, don't add an epoll_event */
128 if (fde->flags == 0) return;
131 event.events = epoll_map_flags(fde->flags);
132 event.data.ptr = fde;
133 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
134 epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed");
136 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
138 /* only if we want to read we want to tell the event handler about errors */
139 if (fde->flags & EVENT_FD_READ) {
140 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
145 delete the epoll event for given fd_event
147 static void epoll_del_event(struct std_event_context *std_ev, struct fd_event *fde)
149 struct epoll_event event;
150 if (std_ev->epoll_fd == -1) return;
152 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
154 /* if there's no epoll_event, we don't need to delete it */
155 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
158 event.events = epoll_map_flags(fde->flags);
159 event.data.ptr = fde;
160 epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
161 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
165 change the epoll event to the given fd_event
167 static void epoll_mod_event(struct std_event_context *std_ev, struct fd_event *fde)
169 struct epoll_event event;
170 if (std_ev->epoll_fd == -1) return;
172 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
175 event.events = epoll_map_flags(fde->flags);
176 event.data.ptr = fde;
177 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
178 epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed");
181 /* only if we want to read we want to tell the event handler about errors */
182 if (fde->flags & EVENT_FD_READ) {
183 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
187 static void epoll_change_event(struct std_event_context *std_ev, struct fd_event *fde)
189 BOOL got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
190 BOOL want_read = (fde->flags & EVENT_FD_READ);
191 BOOL want_write= (fde->flags & EVENT_FD_WRITE);
193 if (std_ev->epoll_fd == -1) return;
195 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
197 /* there's already an event */
198 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
199 if (want_read || (want_write && !got_error)) {
200 epoll_mod_event(std_ev, fde);
204 * if we want to match the select behavior, we need to remove the epoll_event
205 * when the caller isn't interested in events.
207 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
209 epoll_del_event(std_ev, fde);
213 /* there's no epoll_event attached to the fde */
214 if (want_read || (want_write && !got_error)) {
215 epoll_add_event(std_ev, fde);
221 event loop handling using epoll
223 static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp)
227 struct epoll_event events[MAXEVENTS];
228 uint32_t destruction_count = std_ev->destruction_count;
231 if (std_ev->epoll_fd == -1) return -1;
234 /* it's better to trigger timed events a bit later than to early */
235 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
238 ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
240 if (ret == -1 && errno != EINTR) {
241 epoll_fallback_to_select(std_ev, "epoll_wait() failed");
245 if (ret == 0 && tvalp) {
246 std_event_loop_timer(std_ev);
250 for (i=0;i<ret;i++) {
251 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
256 epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data");
259 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
260 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
262 * if we only wait for EVENT_FD_WRITE, we should not tell the
263 * event handler about it, and remove the epoll_event,
264 * as we only report errors when waiting for read events,
265 * to match the select() behavior
267 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
268 epoll_del_event(std_ev, fde);
271 flags |= EVENT_FD_READ;
273 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
274 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
276 fde->handler(std_ev->ev, fde, flags, fde->private_data);
277 if (destruction_count != std_ev->destruction_count) {
286 #define epoll_init_ctx(std_ev,try_epoll) if (try_epoll) {/* fix unused variable warning*/}
287 #define epoll_add_event(std_ev,fde)
288 #define epoll_del_event(std_ev,fde)
289 #define epoll_change_event(std_ev,fde)
290 #define epoll_event_loop(std_ev,tvalp) (-1)
294 create a std_event_context structure.
296 static int std_event_context_init(struct event_context *ev, void *private_data)
298 struct std_event_context *std_ev;
299 BOOL *_try_epoll = private_data;
300 BOOL try_epoll = (_try_epoll == NULL ? True : *_try_epoll);
302 std_ev = talloc_zero(ev, struct std_event_context);
303 if (!std_ev) return -1;
305 std_ev->epoll_fd = -1;
307 epoll_init_ctx(std_ev, try_epoll);
309 ev->additional_data = std_ev;
314 recalculate the maxfd
316 static void calc_maxfd(struct std_event_context *std_ev)
318 struct fd_event *fde;
321 for (fde = std_ev->fd_events; fde; fde = fde->next) {
322 if (fde->fd > std_ev->maxfd) {
323 std_ev->maxfd = fde->fd;
329 /* to mark the ev->maxfd invalid
330 * this means we need to recalculate it
332 #define EVENT_INVALID_MAXFD (-1)
337 static int std_event_fd_destructor(struct fd_event *fde)
339 struct event_context *ev = fde->event_ctx;
340 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
341 struct std_event_context);
343 if (std_ev->maxfd == fde->fd) {
344 std_ev->maxfd = EVENT_INVALID_MAXFD;
347 DLIST_REMOVE(std_ev->fd_events, fde);
348 std_ev->destruction_count++;
350 epoll_del_event(std_ev, fde);
357 return NULL on failure (memory allocation error)
359 static struct fd_event *std_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
360 int fd, uint16_t flags,
361 event_fd_handler_t handler,
364 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
365 struct std_event_context);
366 struct fd_event *fde;
368 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
369 if (!fde) return NULL;
374 fde->handler = handler;
375 fde->private_data = private_data;
376 fde->additional_flags = 0;
377 fde->additional_data = NULL;
379 DLIST_ADD(std_ev->fd_events, fde);
380 if (fde->fd > std_ev->maxfd) {
381 std_ev->maxfd = fde->fd;
383 talloc_set_destructor(fde, std_event_fd_destructor);
385 epoll_add_event(std_ev, fde);
392 return the fd event flags
394 static uint16_t std_event_get_fd_flags(struct fd_event *fde)
400 set the fd event flags
402 static void std_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
404 struct event_context *ev;
405 struct std_event_context *std_ev;
407 if (fde->flags == flags) return;
410 std_ev = talloc_get_type(ev->additional_data, struct std_event_context);
414 epoll_change_event(std_ev, fde);
418 destroy a timed event
420 static int std_event_timed_destructor(struct timed_event *te)
422 struct std_event_context *std_ev = talloc_get_type(te->event_ctx->additional_data,
423 struct std_event_context);
424 DLIST_REMOVE(std_ev->timed_events, te);
428 static int std_event_timed_deny_destructor(struct timed_event *te)
435 return NULL on failure (memory allocation error)
437 static struct timed_event *std_event_add_timed(struct event_context *ev, TALLOC_CTX *mem_ctx,
438 struct timeval next_event,
439 event_timed_handler_t handler,
442 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
443 struct std_event_context);
444 struct timed_event *te, *last_te, *cur_te;
446 te = talloc(mem_ctx?mem_ctx:ev, struct timed_event);
447 if (te == NULL) return NULL;
450 te->next_event = next_event;
451 te->handler = handler;
452 te->private_data = private_data;
453 te->additional_data = NULL;
455 /* keep the list ordered */
457 for (cur_te = std_ev->timed_events; cur_te; cur_te = cur_te->next) {
458 /* if the new event comes before the current one break */
459 if (!timeval_is_zero(&cur_te->next_event) &&
460 timeval_compare(&te->next_event,
461 &cur_te->next_event) < 0) {
468 DLIST_ADD_AFTER(std_ev->timed_events, te, last_te);
470 talloc_set_destructor(te, std_event_timed_destructor);
476 a timer has gone off - call it
478 static void std_event_loop_timer(struct std_event_context *std_ev)
480 struct timeval t = timeval_current();
481 struct timed_event *te = std_ev->timed_events;
487 /* deny the handler to free the event */
488 talloc_set_destructor(te, std_event_timed_deny_destructor);
490 /* We need to remove the timer from the list before calling the
491 * handler because in a semi-async inner event loop called from the
492 * handler we don't want to come across this event again -- vl */
493 DLIST_REMOVE(std_ev->timed_events, te);
495 te->handler(std_ev->ev, te, t, te->private_data);
497 /* The destructor isn't necessary anymore, we've already removed the
498 * event from the list. */
499 talloc_set_destructor(te, NULL);
505 event loop handling using select()
507 static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp)
510 struct fd_event *fde;
512 uint32_t destruction_count = std_ev->destruction_count;
514 /* we maybe need to recalculate the maxfd */
515 if (std_ev->maxfd == EVENT_INVALID_MAXFD) {
522 /* setup any fd events */
523 for (fde = std_ev->fd_events; fde; fde = fde->next) {
524 if (fde->flags & EVENT_FD_READ) {
525 FD_SET(fde->fd, &r_fds);
527 if (fde->flags & EVENT_FD_WRITE) {
528 FD_SET(fde->fd, &w_fds);
532 selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
534 if (selrtn == -1 && errno == EBADF) {
535 /* the socket is dead! this should never
536 happen as the socket should have first been
537 made readable and that should have removed
538 the event, so this must be a bug. This is a
540 DEBUG(0,("ERROR: EBADF on std_event_loop_once\n"));
541 std_ev->exit_code = EBADF;
545 if (selrtn == 0 && tvalp) {
546 std_event_loop_timer(std_ev);
551 /* at least one file descriptor is ready - check
552 which ones and call the handler, being careful to allow
553 the handler to remove itself when called */
554 for (fde = std_ev->fd_events; fde; fde = fde->next) {
557 if (FD_ISSET(fde->fd, &r_fds)) flags |= EVENT_FD_READ;
558 if (FD_ISSET(fde->fd, &w_fds)) flags |= EVENT_FD_WRITE;
560 fde->handler(std_ev->ev, fde, flags, fde->private_data);
561 if (destruction_count != std_ev->destruction_count) {
572 do a single event loop using the events defined in ev
574 static int std_event_loop_once(struct event_context *ev)
576 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
577 struct std_event_context);
580 /* work out the right timeout for all timed events */
581 if (std_ev->timed_events) {
582 struct timeval t = timeval_current();
583 tval = timeval_until(&t, &std_ev->timed_events->next_event);
584 if (timeval_is_zero(&tval)) {
585 std_event_loop_timer(std_ev);
589 /* have a default tick time of 30 seconds. This guarantees
590 that code that uses its own timeout checking will be
591 able to proceeed eventually */
592 tval = timeval_set(30, 0);
595 if (epoll_event_loop(std_ev, &tval) == 0) {
599 return std_event_loop_select(std_ev, &tval);
603 return on failure or (with 0) if all fd events are removed
605 static int std_event_loop_wait(struct event_context *ev)
607 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
608 struct std_event_context);
609 std_ev->exit_code = 0;
611 while (std_ev->fd_events && std_ev->exit_code == 0) {
612 if (std_event_loop_once(ev) != 0) {
617 return std_ev->exit_code;
620 static const struct event_ops std_event_ops = {
621 .context_init = std_event_context_init,
622 .add_fd = std_event_add_fd,
623 .get_fd_flags = std_event_get_fd_flags,
624 .set_fd_flags = std_event_set_fd_flags,
625 .add_timed = std_event_add_timed,
626 .loop_once = std_event_loop_once,
627 .loop_wait = std_event_loop_wait,
630 const struct event_ops *event_standard_get_ops(void)
632 return &std_event_ops;