2 Unix SMB/CIFS implementation.
3 main select loop and event handling
4 Copyright (C) Andrew Tridgell 2003-2005
5 Copyright (C) Stefan Metzmacher 2005
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 This is SAMBA's default event loop code
24 - we try to use epoll if configure detected support for it
25 otherwise we use select()
26 - if epoll is broken on the system or the kernel doesn't support it
27 at runtime we fallback to select()
31 #include "system/filesys.h"
32 #include "system/select.h"
34 #include "tevent_util.h"
35 #include "tevent_internal.h"
37 struct std_event_context {
38 /* a pointer back to the generic event_context */
39 struct tevent_context *ev;
41 /* list of filedescriptor events */
42 struct tevent_fd *fd_events;
44 /* the maximum file descriptor number in fd_events */
47 /* information for exiting from the event loop */
50 /* this is changed by the destructors for the fd event
51 type. It is used to detect event destruction by event
52 handlers, which means the code that is calling the event
53 handler needs to assume that the linked list is no longer
56 uint32_t destruction_count;
58 /* when using epoll this is the handle from epoll_create */
61 /* our pid at the time the epoll_fd was created */
65 /* use epoll if it is available */
68 called when a epoll call fails, and we should fallback
71 static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason)
73 tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL,
74 "%s (%s) - falling back to select()\n",
75 reason, strerror(errno));
76 close(std_ev->epoll_fd);
77 std_ev->epoll_fd = -1;
78 talloc_set_destructor(std_ev, NULL);
82 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
84 static uint32_t epoll_map_flags(uint16_t flags)
87 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
88 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
95 static int epoll_ctx_destructor(struct std_event_context *std_ev)
97 if (std_ev->epoll_fd != -1) {
98 close(std_ev->epoll_fd);
100 std_ev->epoll_fd = -1;
107 static void epoll_init_ctx(struct std_event_context *std_ev)
109 std_ev->epoll_fd = epoll_create(64);
110 std_ev->pid = getpid();
111 talloc_set_destructor(std_ev, epoll_ctx_destructor);
114 static void epoll_add_event(struct std_event_context *std_ev, struct tevent_fd *fde);
117 reopen the epoll handle when our pid changes
118 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
119 demonstration of why this is needed
121 static void epoll_check_reopen(struct std_event_context *std_ev)
123 struct tevent_fd *fde;
125 if (std_ev->pid == getpid()) {
129 close(std_ev->epoll_fd);
130 std_ev->epoll_fd = epoll_create(64);
131 if (std_ev->epoll_fd == -1) {
132 tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL,
133 "Failed to recreate epoll handle after fork\n");
136 std_ev->pid = getpid();
137 for (fde=std_ev->fd_events;fde;fde=fde->next) {
138 epoll_add_event(std_ev, fde);
142 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
143 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
144 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
147 add the epoll event to the given fd_event
149 static void epoll_add_event(struct std_event_context *std_ev, struct tevent_fd *fde)
151 struct epoll_event event;
152 if (std_ev->epoll_fd == -1) return;
154 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
156 /* if we don't want events yet, don't add an epoll_event */
157 if (fde->flags == 0) return;
160 event.events = epoll_map_flags(fde->flags);
161 event.data.ptr = fde;
162 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
163 epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed");
165 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
167 /* only if we want to read we want to tell the event handler about errors */
168 if (fde->flags & TEVENT_FD_READ) {
169 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
174 delete the epoll event for given fd_event
176 static void epoll_del_event(struct std_event_context *std_ev, struct tevent_fd *fde)
178 struct epoll_event event;
179 if (std_ev->epoll_fd == -1) return;
181 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
183 /* if there's no epoll_event, we don't need to delete it */
184 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
187 event.events = epoll_map_flags(fde->flags);
188 event.data.ptr = fde;
189 epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
190 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
194 change the epoll event to the given fd_event
196 static void epoll_mod_event(struct std_event_context *std_ev, struct tevent_fd *fde)
198 struct epoll_event event;
199 if (std_ev->epoll_fd == -1) return;
201 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
204 event.events = epoll_map_flags(fde->flags);
205 event.data.ptr = fde;
206 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
207 epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed");
210 /* only if we want to read we want to tell the event handler about errors */
211 if (fde->flags & TEVENT_FD_READ) {
212 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
216 static void epoll_change_event(struct std_event_context *std_ev, struct tevent_fd *fde)
218 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
219 bool want_read = (fde->flags & TEVENT_FD_READ);
220 bool want_write= (fde->flags & TEVENT_FD_WRITE);
222 if (std_ev->epoll_fd == -1) return;
224 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
226 /* there's already an event */
227 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
228 if (want_read || (want_write && !got_error)) {
229 epoll_mod_event(std_ev, fde);
233 * if we want to match the select behavior, we need to remove the epoll_event
234 * when the caller isn't interested in events.
236 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
238 epoll_del_event(std_ev, fde);
242 /* there's no epoll_event attached to the fde */
243 if (want_read || (want_write && !got_error)) {
244 epoll_add_event(std_ev, fde);
250 event loop handling using epoll
252 static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp)
256 struct epoll_event events[MAXEVENTS];
257 uint32_t destruction_count = ++std_ev->destruction_count;
260 if (std_ev->epoll_fd == -1) return -1;
263 /* it's better to trigger timed events a bit later than to early */
264 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
267 if (std_ev->ev->signal_events &&
268 tevent_common_check_signal(std_ev->ev)) {
272 ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
274 if (ret == -1 && errno == EINTR && std_ev->ev->signal_events) {
275 if (tevent_common_check_signal(std_ev->ev)) {
280 if (ret == -1 && errno != EINTR) {
281 epoll_fallback_to_select(std_ev, "epoll_wait() failed");
285 if (ret == 0 && tvalp) {
286 /* we don't care about a possible delay here */
287 tevent_common_loop_timer_delay(std_ev->ev);
291 for (i=0;i<ret;i++) {
292 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
297 epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data");
300 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
301 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
303 * if we only wait for TEVENT_FD_WRITE, we should not tell the
304 * event handler about it, and remove the epoll_event,
305 * as we only report errors when waiting for read events,
306 * to match the select() behavior
308 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
309 epoll_del_event(std_ev, fde);
312 flags |= TEVENT_FD_READ;
314 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
315 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
317 fde->handler(std_ev->ev, fde, flags, fde->private_data);
318 if (destruction_count != std_ev->destruction_count) {
327 #define epoll_init_ctx(std_ev)
328 #define epoll_add_event(std_ev,fde)
329 #define epoll_del_event(std_ev,fde)
330 #define epoll_change_event(std_ev,fde)
331 #define epoll_event_loop(std_ev,tvalp) (-1)
332 #define epoll_check_reopen(std_ev)
336 create a std_event_context structure.
338 static int std_event_context_init(struct tevent_context *ev)
340 struct std_event_context *std_ev;
342 std_ev = talloc_zero(ev, struct std_event_context);
343 if (!std_ev) return -1;
345 std_ev->epoll_fd = -1;
347 epoll_init_ctx(std_ev);
349 ev->additional_data = std_ev;
354 recalculate the maxfd
356 static void calc_maxfd(struct std_event_context *std_ev)
358 struct tevent_fd *fde;
361 for (fde = std_ev->fd_events; fde; fde = fde->next) {
362 if (fde->fd > std_ev->maxfd) {
363 std_ev->maxfd = fde->fd;
369 /* to mark the ev->maxfd invalid
370 * this means we need to recalculate it
372 #define EVENT_INVALID_MAXFD (-1)
377 static int std_event_fd_destructor(struct tevent_fd *fde)
379 struct tevent_context *ev = fde->event_ctx;
380 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
381 struct std_event_context);
383 epoll_check_reopen(std_ev);
385 if (std_ev->maxfd == fde->fd) {
386 std_ev->maxfd = EVENT_INVALID_MAXFD;
389 DLIST_REMOVE(std_ev->fd_events, fde);
390 std_ev->destruction_count++;
392 epoll_del_event(std_ev, fde);
395 fde->close_fn(ev, fde, fde->fd, fde->private_data);
404 return NULL on failure (memory allocation error)
406 static struct tevent_fd *std_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
407 int fd, uint16_t flags,
408 tevent_fd_handler_t handler,
410 const char *handler_name,
411 const char *location)
413 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
414 struct std_event_context);
415 struct tevent_fd *fde;
417 epoll_check_reopen(std_ev);
419 fde = talloc(mem_ctx?mem_ctx:ev, struct tevent_fd);
420 if (!fde) return NULL;
425 fde->handler = handler;
426 fde->close_fn = NULL;
427 fde->private_data = private_data;
428 fde->handler_name = handler_name;
429 fde->location = location;
430 fde->additional_flags = 0;
431 fde->additional_data = NULL;
433 DLIST_ADD(std_ev->fd_events, fde);
434 if ((std_ev->maxfd != EVENT_INVALID_MAXFD)
435 && (fde->fd > std_ev->maxfd)) {
436 std_ev->maxfd = fde->fd;
438 talloc_set_destructor(fde, std_event_fd_destructor);
440 epoll_add_event(std_ev, fde);
446 set the fd event flags
448 static void std_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
450 struct tevent_context *ev;
451 struct std_event_context *std_ev;
453 if (fde->flags == flags) return;
456 std_ev = talloc_get_type(ev->additional_data, struct std_event_context);
460 epoll_check_reopen(std_ev);
462 epoll_change_event(std_ev, fde);
466 event loop handling using select()
468 static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp)
471 struct tevent_fd *fde;
473 uint32_t destruction_count = ++std_ev->destruction_count;
475 /* we maybe need to recalculate the maxfd */
476 if (std_ev->maxfd == EVENT_INVALID_MAXFD) {
483 /* setup any fd events */
484 for (fde = std_ev->fd_events; fde; fde = fde->next) {
485 if (fde->flags & TEVENT_FD_READ) {
486 FD_SET(fde->fd, &r_fds);
488 if (fde->flags & TEVENT_FD_WRITE) {
489 FD_SET(fde->fd, &w_fds);
493 if (std_ev->ev->signal_events &&
494 tevent_common_check_signal(std_ev->ev)) {
498 selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
500 if (selrtn == -1 && errno == EINTR &&
501 std_ev->ev->signal_events) {
502 tevent_common_check_signal(std_ev->ev);
506 if (selrtn == -1 && errno == EBADF) {
507 /* the socket is dead! this should never
508 happen as the socket should have first been
509 made readable and that should have removed
510 the event, so this must be a bug. This is a
512 tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL,
513 "ERROR: EBADF on std_event_loop_once\n");
514 std_ev->exit_code = EBADF;
518 if (selrtn == 0 && tvalp) {
519 /* we don't care about a possible delay here */
520 tevent_common_loop_timer_delay(std_ev->ev);
525 /* at least one file descriptor is ready - check
526 which ones and call the handler, being careful to allow
527 the handler to remove itself when called */
528 for (fde = std_ev->fd_events; fde; fde = fde->next) {
531 if (FD_ISSET(fde->fd, &r_fds)) flags |= TEVENT_FD_READ;
532 if (FD_ISSET(fde->fd, &w_fds)) flags |= TEVENT_FD_WRITE;
534 fde->handler(std_ev->ev, fde, flags, fde->private_data);
535 if (destruction_count != std_ev->destruction_count) {
546 do a single event loop using the events defined in ev
548 static int std_event_loop_once(struct tevent_context *ev)
550 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
551 struct std_event_context);
554 tval = tevent_common_loop_timer_delay(ev);
555 if (ev_timeval_is_zero(&tval)) {
559 epoll_check_reopen(std_ev);
561 if (epoll_event_loop(std_ev, &tval) == 0) {
565 return std_event_loop_select(std_ev, &tval);
569 return on failure or (with 0) if all fd events are removed
571 static int std_event_loop_wait(struct tevent_context *ev)
573 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
574 struct std_event_context);
575 std_ev->exit_code = 0;
577 while (std_ev->fd_events && std_ev->exit_code == 0) {
578 if (std_event_loop_once(ev) != 0) {
583 return std_ev->exit_code;
586 static const struct tevent_ops std_event_ops = {
587 .context_init = std_event_context_init,
588 .add_fd = std_event_add_fd,
589 .set_fd_close_fn= tevent_common_fd_set_close_fn,
590 .get_fd_flags = tevent_common_fd_get_flags,
591 .set_fd_flags = std_event_set_fd_flags,
592 .add_timer = tevent_common_add_timer,
593 .add_signal = tevent_common_add_signal,
594 .loop_once = std_event_loop_once,
595 .loop_wait = std_event_loop_wait,
599 bool tevent_standard_init(void)
601 return tevent_register_backend("standard", &std_event_ops);