2 Unix SMB/CIFS implementation.
3 main select loop and event handling
4 Copyright (C) Andrew Tridgell 2003-2005
5 Copyright (C) Stefan Metzmacher 2005
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 This is SAMBA's default event loop code
24 - we try to use epoll if configure detected support for it
25 otherwise we use select()
26 - if epoll is broken on the system or the kernel doesn't support it
27 at runtime we fallback to select()
31 #include "system/filesys.h"
32 #include "system/network.h"
33 #include "system/select.h" /* needed for HAVE_EVENTS_EPOLL */
34 #include "lib/util/dlinklist.h"
35 #include "lib/events/events.h"
36 #include "lib/events/events_internal.h"
38 struct std_event_context {
39 /* a pointer back to the generic event_context */
40 struct event_context *ev;
42 /* list of filedescriptor events */
43 struct fd_event *fd_events;
45 /* the maximum file descriptor number in fd_events */
48 /* information for exiting from the event loop */
51 /* this is changed by the destructors for the fd event
52 type. It is used to detect event destruction by event
53 handlers, which means the code that is calling the event
54 handler needs to assume that the linked list is no longer
57 uint32_t destruction_count;
59 /* when using epoll this is the handle from epoll_create */
62 /* our pid at the time the epoll_fd was created */
66 /* use epoll if it is available */
69 called when a epoll call fails, and we should fallback
72 static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason)
74 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
75 close(std_ev->epoll_fd);
76 std_ev->epoll_fd = -1;
77 talloc_set_destructor(std_ev, NULL);
81 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
83 static uint32_t epoll_map_flags(uint16_t flags)
86 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
87 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
94 static int epoll_ctx_destructor(struct std_event_context *std_ev)
96 if (std_ev->epoll_fd != -1) {
97 close(std_ev->epoll_fd);
99 std_ev->epoll_fd = -1;
106 static void epoll_init_ctx(struct std_event_context *std_ev)
108 std_ev->epoll_fd = epoll_create(64);
109 std_ev->pid = getpid();
110 talloc_set_destructor(std_ev, epoll_ctx_destructor);
113 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde);
116 reopen the epoll handle when our pid changes
117 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
118 demonstration of why this is needed
120 static void epoll_check_reopen(struct std_event_context *std_ev)
122 struct fd_event *fde;
124 if (std_ev->pid == getpid()) {
128 close(std_ev->epoll_fd);
129 std_ev->epoll_fd = epoll_create(64);
130 if (std_ev->epoll_fd == -1) {
131 DEBUG(0,("Failed to recreate epoll handle after fork\n"));
134 std_ev->pid = getpid();
135 for (fde=std_ev->fd_events;fde;fde=fde->next) {
136 epoll_add_event(std_ev, fde);
140 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
141 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
142 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
145 add the epoll event to the given fd_event
147 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde)
149 struct epoll_event event;
150 if (std_ev->epoll_fd == -1) return;
152 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
154 /* if we don't want events yet, don't add an epoll_event */
155 if (fde->flags == 0) return;
158 event.events = epoll_map_flags(fde->flags);
159 event.data.ptr = fde;
160 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
161 epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed");
163 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
165 /* only if we want to read we want to tell the event handler about errors */
166 if (fde->flags & EVENT_FD_READ) {
167 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
172 delete the epoll event for given fd_event
174 static void epoll_del_event(struct std_event_context *std_ev, struct fd_event *fde)
176 struct epoll_event event;
177 if (std_ev->epoll_fd == -1) return;
179 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
181 /* if there's no epoll_event, we don't need to delete it */
182 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
185 event.events = epoll_map_flags(fde->flags);
186 event.data.ptr = fde;
187 epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
188 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
192 change the epoll event to the given fd_event
194 static void epoll_mod_event(struct std_event_context *std_ev, struct fd_event *fde)
196 struct epoll_event event;
197 if (std_ev->epoll_fd == -1) return;
199 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
202 event.events = epoll_map_flags(fde->flags);
203 event.data.ptr = fde;
204 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
205 epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed");
208 /* only if we want to read we want to tell the event handler about errors */
209 if (fde->flags & EVENT_FD_READ) {
210 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
214 static void epoll_change_event(struct std_event_context *std_ev, struct fd_event *fde)
216 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
217 bool want_read = (fde->flags & EVENT_FD_READ);
218 bool want_write= (fde->flags & EVENT_FD_WRITE);
220 if (std_ev->epoll_fd == -1) return;
222 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
224 /* there's already an event */
225 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
226 if (want_read || (want_write && !got_error)) {
227 epoll_mod_event(std_ev, fde);
231 * if we want to match the select behavior, we need to remove the epoll_event
232 * when the caller isn't interested in events.
234 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
236 epoll_del_event(std_ev, fde);
240 /* there's no epoll_event attached to the fde */
241 if (want_read || (want_write && !got_error)) {
242 epoll_add_event(std_ev, fde);
248 event loop handling using epoll
250 static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp)
254 struct epoll_event events[MAXEVENTS];
255 uint32_t destruction_count = ++std_ev->destruction_count;
258 if (std_ev->epoll_fd == -1) return -1;
261 /* it's better to trigger timed events a bit later than to early */
262 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
265 if (std_ev->ev->num_signal_handlers &&
266 common_event_check_signal(std_ev->ev)) {
270 ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
272 if (ret == -1 && errno == EINTR && std_ev->ev->num_signal_handlers) {
273 if (common_event_check_signal(std_ev->ev)) {
278 if (ret == -1 && errno != EINTR) {
279 epoll_fallback_to_select(std_ev, "epoll_wait() failed");
283 if (ret == 0 && tvalp) {
284 /* we don't care about a possible delay here */
285 common_event_loop_timer_delay(std_ev->ev);
289 for (i=0;i<ret;i++) {
290 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
295 epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data");
298 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
299 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
301 * if we only wait for EVENT_FD_WRITE, we should not tell the
302 * event handler about it, and remove the epoll_event,
303 * as we only report errors when waiting for read events,
304 * to match the select() behavior
306 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
307 epoll_del_event(std_ev, fde);
310 flags |= EVENT_FD_READ;
312 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
313 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
315 fde->handler(std_ev->ev, fde, flags, fde->private_data);
316 if (destruction_count != std_ev->destruction_count) {
325 #define epoll_init_ctx(std_ev)
326 #define epoll_add_event(std_ev,fde)
327 #define epoll_del_event(std_ev,fde)
328 #define epoll_change_event(std_ev,fde)
329 #define epoll_event_loop(std_ev,tvalp) (-1)
330 #define epoll_check_reopen(std_ev)
334 create a std_event_context structure.
336 static int std_event_context_init(struct event_context *ev)
338 struct std_event_context *std_ev;
340 std_ev = talloc_zero(ev, struct std_event_context);
341 if (!std_ev) return -1;
343 std_ev->epoll_fd = -1;
345 epoll_init_ctx(std_ev);
347 ev->additional_data = std_ev;
352 recalculate the maxfd
354 static void calc_maxfd(struct std_event_context *std_ev)
356 struct fd_event *fde;
359 for (fde = std_ev->fd_events; fde; fde = fde->next) {
360 if (fde->fd > std_ev->maxfd) {
361 std_ev->maxfd = fde->fd;
367 /* to mark the ev->maxfd invalid
368 * this means we need to recalculate it
370 #define EVENT_INVALID_MAXFD (-1)
375 static int std_event_fd_destructor(struct fd_event *fde)
377 struct event_context *ev = fde->event_ctx;
378 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
379 struct std_event_context);
381 epoll_check_reopen(std_ev);
383 if (std_ev->maxfd == fde->fd) {
384 std_ev->maxfd = EVENT_INVALID_MAXFD;
387 DLIST_REMOVE(std_ev->fd_events, fde);
388 std_ev->destruction_count++;
390 epoll_del_event(std_ev, fde);
392 if (fde->flags & EVENT_FD_AUTOCLOSE) {
402 return NULL on failure (memory allocation error)
404 static struct fd_event *std_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
405 int fd, uint16_t flags,
406 event_fd_handler_t handler,
409 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
410 struct std_event_context);
411 struct fd_event *fde;
413 epoll_check_reopen(std_ev);
415 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
416 if (!fde) return NULL;
421 fde->handler = handler;
422 fde->private_data = private_data;
423 fde->additional_flags = 0;
424 fde->additional_data = NULL;
426 DLIST_ADD(std_ev->fd_events, fde);
427 if ((std_ev->maxfd != EVENT_INVALID_MAXFD)
428 && (fde->fd > std_ev->maxfd)) {
429 std_ev->maxfd = fde->fd;
431 talloc_set_destructor(fde, std_event_fd_destructor);
433 epoll_add_event(std_ev, fde);
440 return the fd event flags
442 static uint16_t std_event_get_fd_flags(struct fd_event *fde)
448 set the fd event flags
450 static void std_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
452 struct event_context *ev;
453 struct std_event_context *std_ev;
455 if (fde->flags == flags) return;
458 std_ev = talloc_get_type(ev->additional_data, struct std_event_context);
462 epoll_check_reopen(std_ev);
464 epoll_change_event(std_ev, fde);
468 event loop handling using select()
470 static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp)
473 struct fd_event *fde;
475 uint32_t destruction_count = ++std_ev->destruction_count;
477 /* we maybe need to recalculate the maxfd */
478 if (std_ev->maxfd == EVENT_INVALID_MAXFD) {
485 /* setup any fd events */
486 for (fde = std_ev->fd_events; fde; fde = fde->next) {
487 if (fde->flags & EVENT_FD_READ) {
488 FD_SET(fde->fd, &r_fds);
490 if (fde->flags & EVENT_FD_WRITE) {
491 FD_SET(fde->fd, &w_fds);
495 if (std_ev->ev->num_signal_handlers &&
496 common_event_check_signal(std_ev->ev)) {
500 selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
502 if (selrtn == -1 && errno == EINTR &&
503 std_ev->ev->num_signal_handlers) {
504 common_event_check_signal(std_ev->ev);
508 if (selrtn == -1 && errno == EBADF) {
509 /* the socket is dead! this should never
510 happen as the socket should have first been
511 made readable and that should have removed
512 the event, so this must be a bug. This is a
514 DEBUG(0,("ERROR: EBADF on std_event_loop_once\n"));
515 std_ev->exit_code = EBADF;
519 if (selrtn == 0 && tvalp) {
520 /* we don't care about a possible delay here */
521 common_event_loop_timer_delay(std_ev->ev);
526 /* at least one file descriptor is ready - check
527 which ones and call the handler, being careful to allow
528 the handler to remove itself when called */
529 for (fde = std_ev->fd_events; fde; fde = fde->next) {
532 if (FD_ISSET(fde->fd, &r_fds)) flags |= EVENT_FD_READ;
533 if (FD_ISSET(fde->fd, &w_fds)) flags |= EVENT_FD_WRITE;
535 fde->handler(std_ev->ev, fde, flags, fde->private_data);
536 if (destruction_count != std_ev->destruction_count) {
547 do a single event loop using the events defined in ev
549 static int std_event_loop_once(struct event_context *ev)
551 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
552 struct std_event_context);
555 tval = common_event_loop_timer_delay(ev);
556 if (timeval_is_zero(&tval)) {
560 epoll_check_reopen(std_ev);
562 if (epoll_event_loop(std_ev, &tval) == 0) {
566 return std_event_loop_select(std_ev, &tval);
570 return on failure or (with 0) if all fd events are removed
572 static int std_event_loop_wait(struct event_context *ev)
574 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
575 struct std_event_context);
576 std_ev->exit_code = 0;
578 while (std_ev->fd_events && std_ev->exit_code == 0) {
579 if (std_event_loop_once(ev) != 0) {
584 return std_ev->exit_code;
587 static const struct event_ops std_event_ops = {
588 .context_init = std_event_context_init,
589 .add_fd = std_event_add_fd,
590 .get_fd_flags = std_event_get_fd_flags,
591 .set_fd_flags = std_event_set_fd_flags,
592 .add_timed = common_event_add_timed,
593 .add_signal = common_event_add_signal,
594 .loop_once = std_event_loop_once,
595 .loop_wait = std_event_loop_wait,
599 bool events_standard_init(void)
601 return event_register_backend("standard", &std_event_ops);
605 NTSTATUS s4_events_standard_init(void)
607 if (!events_standard_init()) {
608 return NT_STATUS_INTERNAL_ERROR;