2 Unix SMB/CIFS implementation.
3 main select loop and event handling
4 Copyright (C) Andrew Tridgell 2003-2005
5 Copyright (C) Stefan Metzmacher 2005
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 This is SAMBA's default event loop code
24 - we try to use epoll if configure detected support for it
25 otherwise we use select()
26 - if epoll is broken on the system or the kernel doesn't support it
27 at runtime we fallback to select()
32 #include "lib/util/dlinklist.h"
35 #include "events_util.h"
37 #include "system/filesys.h"
38 #include "system/select.h" /* needed for HAVE_EVENTS_EPOLL */
40 #include "events_internal.h"
42 struct std_event_context {
43 /* a pointer back to the generic event_context */
44 struct event_context *ev;
46 /* list of filedescriptor events */
47 struct fd_event *fd_events;
49 /* the maximum file descriptor number in fd_events */
52 /* information for exiting from the event loop */
55 /* this is changed by the destructors for the fd event
56 type. It is used to detect event destruction by event
57 handlers, which means the code that is calling the event
58 handler needs to assume that the linked list is no longer
61 uint32_t destruction_count;
63 /* when using epoll this is the handle from epoll_create */
66 /* our pid at the time the epoll_fd was created */
70 /* use epoll if it is available */
73 called when a epoll call fails, and we should fallback
76 static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason)
78 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
79 close(std_ev->epoll_fd);
80 std_ev->epoll_fd = -1;
81 talloc_set_destructor(std_ev, NULL);
85 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
87 static uint32_t epoll_map_flags(uint16_t flags)
90 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
91 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
98 static int epoll_ctx_destructor(struct std_event_context *std_ev)
100 if (std_ev->epoll_fd != -1) {
101 close(std_ev->epoll_fd);
103 std_ev->epoll_fd = -1;
110 static void epoll_init_ctx(struct std_event_context *std_ev)
112 std_ev->epoll_fd = epoll_create(64);
113 std_ev->pid = getpid();
114 talloc_set_destructor(std_ev, epoll_ctx_destructor);
117 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde);
120 reopen the epoll handle when our pid changes
121 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
122 demonstration of why this is needed
124 static void epoll_check_reopen(struct std_event_context *std_ev)
126 struct fd_event *fde;
128 if (std_ev->pid == getpid()) {
132 close(std_ev->epoll_fd);
133 std_ev->epoll_fd = epoll_create(64);
134 if (std_ev->epoll_fd == -1) {
135 DEBUG(0,("Failed to recreate epoll handle after fork\n"));
138 std_ev->pid = getpid();
139 for (fde=std_ev->fd_events;fde;fde=fde->next) {
140 epoll_add_event(std_ev, fde);
144 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
145 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
146 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
149 add the epoll event to the given fd_event
151 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde)
153 struct epoll_event event;
154 if (std_ev->epoll_fd == -1) return;
156 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
158 /* if we don't want events yet, don't add an epoll_event */
159 if (fde->flags == 0) return;
162 event.events = epoll_map_flags(fde->flags);
163 event.data.ptr = fde;
164 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
165 epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed");
167 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
169 /* only if we want to read we want to tell the event handler about errors */
170 if (fde->flags & EVENT_FD_READ) {
171 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
176 delete the epoll event for given fd_event
178 static void epoll_del_event(struct std_event_context *std_ev, struct fd_event *fde)
180 struct epoll_event event;
181 if (std_ev->epoll_fd == -1) return;
183 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
185 /* if there's no epoll_event, we don't need to delete it */
186 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
189 event.events = epoll_map_flags(fde->flags);
190 event.data.ptr = fde;
191 epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
192 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
196 change the epoll event to the given fd_event
198 static void epoll_mod_event(struct std_event_context *std_ev, struct fd_event *fde)
200 struct epoll_event event;
201 if (std_ev->epoll_fd == -1) return;
203 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
206 event.events = epoll_map_flags(fde->flags);
207 event.data.ptr = fde;
208 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
209 epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed");
212 /* only if we want to read we want to tell the event handler about errors */
213 if (fde->flags & EVENT_FD_READ) {
214 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
218 static void epoll_change_event(struct std_event_context *std_ev, struct fd_event *fde)
220 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
221 bool want_read = (fde->flags & EVENT_FD_READ);
222 bool want_write= (fde->flags & EVENT_FD_WRITE);
224 if (std_ev->epoll_fd == -1) return;
226 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
228 /* there's already an event */
229 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
230 if (want_read || (want_write && !got_error)) {
231 epoll_mod_event(std_ev, fde);
235 * if we want to match the select behavior, we need to remove the epoll_event
236 * when the caller isn't interested in events.
238 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
240 epoll_del_event(std_ev, fde);
244 /* there's no epoll_event attached to the fde */
245 if (want_read || (want_write && !got_error)) {
246 epoll_add_event(std_ev, fde);
252 event loop handling using epoll
254 static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp)
258 struct epoll_event events[MAXEVENTS];
259 uint32_t destruction_count = ++std_ev->destruction_count;
262 if (std_ev->epoll_fd == -1) return -1;
265 /* it's better to trigger timed events a bit later than to early */
266 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
269 if (std_ev->ev->num_signal_handlers &&
270 common_event_check_signal(std_ev->ev)) {
274 ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
276 if (ret == -1 && errno == EINTR && std_ev->ev->num_signal_handlers) {
277 if (common_event_check_signal(std_ev->ev)) {
282 if (ret == -1 && errno != EINTR) {
283 epoll_fallback_to_select(std_ev, "epoll_wait() failed");
287 if (ret == 0 && tvalp) {
288 /* we don't care about a possible delay here */
289 common_event_loop_timer_delay(std_ev->ev);
293 for (i=0;i<ret;i++) {
294 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
299 epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data");
302 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
303 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
305 * if we only wait for EVENT_FD_WRITE, we should not tell the
306 * event handler about it, and remove the epoll_event,
307 * as we only report errors when waiting for read events,
308 * to match the select() behavior
310 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
311 epoll_del_event(std_ev, fde);
314 flags |= EVENT_FD_READ;
316 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
317 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
319 fde->handler(std_ev->ev, fde, flags, fde->private_data);
320 if (destruction_count != std_ev->destruction_count) {
329 #define epoll_init_ctx(std_ev)
330 #define epoll_add_event(std_ev,fde)
331 #define epoll_del_event(std_ev,fde)
332 #define epoll_change_event(std_ev,fde)
333 #define epoll_event_loop(std_ev,tvalp) (-1)
334 #define epoll_check_reopen(std_ev)
338 create a std_event_context structure.
340 static int std_event_context_init(struct event_context *ev)
342 struct std_event_context *std_ev;
344 std_ev = talloc_zero(ev, struct std_event_context);
345 if (!std_ev) return -1;
347 std_ev->epoll_fd = -1;
349 epoll_init_ctx(std_ev);
351 ev->additional_data = std_ev;
356 recalculate the maxfd
358 static void calc_maxfd(struct std_event_context *std_ev)
360 struct fd_event *fde;
363 for (fde = std_ev->fd_events; fde; fde = fde->next) {
364 if (fde->fd > std_ev->maxfd) {
365 std_ev->maxfd = fde->fd;
371 /* to mark the ev->maxfd invalid
372 * this means we need to recalculate it
374 #define EVENT_INVALID_MAXFD (-1)
379 static int std_event_fd_destructor(struct fd_event *fde)
381 struct event_context *ev = fde->event_ctx;
382 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
383 struct std_event_context);
385 epoll_check_reopen(std_ev);
387 if (std_ev->maxfd == fde->fd) {
388 std_ev->maxfd = EVENT_INVALID_MAXFD;
391 DLIST_REMOVE(std_ev->fd_events, fde);
392 std_ev->destruction_count++;
394 epoll_del_event(std_ev, fde);
396 if (fde->flags & EVENT_FD_AUTOCLOSE) {
406 return NULL on failure (memory allocation error)
408 static struct fd_event *std_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
409 int fd, uint16_t flags,
410 event_fd_handler_t handler,
413 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
414 struct std_event_context);
415 struct fd_event *fde;
417 epoll_check_reopen(std_ev);
419 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
420 if (!fde) return NULL;
425 fde->handler = handler;
426 fde->private_data = private_data;
427 fde->additional_flags = 0;
428 fde->additional_data = NULL;
430 DLIST_ADD(std_ev->fd_events, fde);
431 if ((std_ev->maxfd != EVENT_INVALID_MAXFD)
432 && (fde->fd > std_ev->maxfd)) {
433 std_ev->maxfd = fde->fd;
435 talloc_set_destructor(fde, std_event_fd_destructor);
437 epoll_add_event(std_ev, fde);
444 return the fd event flags
446 static uint16_t std_event_get_fd_flags(struct fd_event *fde)
452 set the fd event flags
454 static void std_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
456 struct event_context *ev;
457 struct std_event_context *std_ev;
459 if (fde->flags == flags) return;
462 std_ev = talloc_get_type(ev->additional_data, struct std_event_context);
466 epoll_check_reopen(std_ev);
468 epoll_change_event(std_ev, fde);
472 event loop handling using select()
474 static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp)
477 struct fd_event *fde;
479 uint32_t destruction_count = ++std_ev->destruction_count;
481 /* we maybe need to recalculate the maxfd */
482 if (std_ev->maxfd == EVENT_INVALID_MAXFD) {
489 /* setup any fd events */
490 for (fde = std_ev->fd_events; fde; fde = fde->next) {
491 if (fde->flags & EVENT_FD_READ) {
492 FD_SET(fde->fd, &r_fds);
494 if (fde->flags & EVENT_FD_WRITE) {
495 FD_SET(fde->fd, &w_fds);
499 if (std_ev->ev->num_signal_handlers &&
500 common_event_check_signal(std_ev->ev)) {
504 selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
506 if (selrtn == -1 && errno == EINTR &&
507 std_ev->ev->num_signal_handlers) {
508 common_event_check_signal(std_ev->ev);
512 if (selrtn == -1 && errno == EBADF) {
513 /* the socket is dead! this should never
514 happen as the socket should have first been
515 made readable and that should have removed
516 the event, so this must be a bug. This is a
518 DEBUG(0,("ERROR: EBADF on std_event_loop_once\n"));
519 std_ev->exit_code = EBADF;
523 if (selrtn == 0 && tvalp) {
524 /* we don't care about a possible delay here */
525 common_event_loop_timer_delay(std_ev->ev);
530 /* at least one file descriptor is ready - check
531 which ones and call the handler, being careful to allow
532 the handler to remove itself when called */
533 for (fde = std_ev->fd_events; fde; fde = fde->next) {
536 if (FD_ISSET(fde->fd, &r_fds)) flags |= EVENT_FD_READ;
537 if (FD_ISSET(fde->fd, &w_fds)) flags |= EVENT_FD_WRITE;
539 fde->handler(std_ev->ev, fde, flags, fde->private_data);
540 if (destruction_count != std_ev->destruction_count) {
551 do a single event loop using the events defined in ev
553 static int std_event_loop_once(struct event_context *ev)
555 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
556 struct std_event_context);
559 tval = common_event_loop_timer_delay(ev);
560 if (timeval_is_zero(&tval)) {
564 epoll_check_reopen(std_ev);
566 if (epoll_event_loop(std_ev, &tval) == 0) {
570 return std_event_loop_select(std_ev, &tval);
574 return on failure or (with 0) if all fd events are removed
576 static int std_event_loop_wait(struct event_context *ev)
578 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
579 struct std_event_context);
580 std_ev->exit_code = 0;
582 while (std_ev->fd_events && std_ev->exit_code == 0) {
583 if (std_event_loop_once(ev) != 0) {
588 return std_ev->exit_code;
591 static const struct event_ops std_event_ops = {
592 .context_init = std_event_context_init,
593 .add_fd = std_event_add_fd,
594 .get_fd_flags = std_event_get_fd_flags,
595 .set_fd_flags = std_event_set_fd_flags,
596 .add_timed = common_event_add_timed,
597 .add_signal = common_event_add_signal,
598 .loop_once = std_event_loop_once,
599 .loop_wait = std_event_loop_wait,
603 bool events_standard_init(void)
605 return event_register_backend("standard", &std_event_ops);
609 _PUBLIC_ NTSTATUS s4_events_standard_init(void)
611 if (!events_standard_init()) {
612 return NT_STATUS_INTERNAL_ERROR;