2 Unix SMB/CIFS implementation.
3 main select loop and event handling
4 Copyright (C) Andrew Tridgell 2003-2005
5 Copyright (C) Stefan Metzmacher 2005
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 This is SAMBA's default event loop code
24 - we try to use epoll if configure detected support for it
25 otherwise we use select()
26 - if epoll is broken on the system or the kernel doesn't support it
27 at runtime we fallback to select()
32 #include "lib/util/dlinklist.h"
35 #include "events_util.h"
37 #include "system/filesys.h"
38 #include "system/network.h"
39 #include "system/select.h" /* needed for HAVE_EVENTS_EPOLL */
41 #include "events_internal.h"
43 struct std_event_context {
44 /* a pointer back to the generic event_context */
45 struct event_context *ev;
47 /* list of filedescriptor events */
48 struct fd_event *fd_events;
50 /* the maximum file descriptor number in fd_events */
53 /* information for exiting from the event loop */
56 /* this is changed by the destructors for the fd event
57 type. It is used to detect event destruction by event
58 handlers, which means the code that is calling the event
59 handler needs to assume that the linked list is no longer
62 uint32_t destruction_count;
64 /* when using epoll this is the handle from epoll_create */
67 /* our pid at the time the epoll_fd was created */
71 /* use epoll if it is available */
74 called when a epoll call fails, and we should fallback
77 static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason)
79 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
80 close(std_ev->epoll_fd);
81 std_ev->epoll_fd = -1;
82 talloc_set_destructor(std_ev, NULL);
86 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
88 static uint32_t epoll_map_flags(uint16_t flags)
91 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
92 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
99 static int epoll_ctx_destructor(struct std_event_context *std_ev)
101 if (std_ev->epoll_fd != -1) {
102 close(std_ev->epoll_fd);
104 std_ev->epoll_fd = -1;
111 static void epoll_init_ctx(struct std_event_context *std_ev)
113 std_ev->epoll_fd = epoll_create(64);
114 std_ev->pid = getpid();
115 talloc_set_destructor(std_ev, epoll_ctx_destructor);
118 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde);
121 reopen the epoll handle when our pid changes
122 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
123 demonstration of why this is needed
125 static void epoll_check_reopen(struct std_event_context *std_ev)
127 struct fd_event *fde;
129 if (std_ev->pid == getpid()) {
133 close(std_ev->epoll_fd);
134 std_ev->epoll_fd = epoll_create(64);
135 if (std_ev->epoll_fd == -1) {
136 DEBUG(0,("Failed to recreate epoll handle after fork\n"));
139 std_ev->pid = getpid();
140 for (fde=std_ev->fd_events;fde;fde=fde->next) {
141 epoll_add_event(std_ev, fde);
145 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
146 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
147 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
150 add the epoll event to the given fd_event
152 static void epoll_add_event(struct std_event_context *std_ev, struct fd_event *fde)
154 struct epoll_event event;
155 if (std_ev->epoll_fd == -1) return;
157 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
159 /* if we don't want events yet, don't add an epoll_event */
160 if (fde->flags == 0) return;
163 event.events = epoll_map_flags(fde->flags);
164 event.data.ptr = fde;
165 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
166 epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed");
168 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
170 /* only if we want to read we want to tell the event handler about errors */
171 if (fde->flags & EVENT_FD_READ) {
172 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
177 delete the epoll event for given fd_event
179 static void epoll_del_event(struct std_event_context *std_ev, struct fd_event *fde)
181 struct epoll_event event;
182 if (std_ev->epoll_fd == -1) return;
184 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
186 /* if there's no epoll_event, we don't need to delete it */
187 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
190 event.events = epoll_map_flags(fde->flags);
191 event.data.ptr = fde;
192 epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
193 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
197 change the epoll event to the given fd_event
199 static void epoll_mod_event(struct std_event_context *std_ev, struct fd_event *fde)
201 struct epoll_event event;
202 if (std_ev->epoll_fd == -1) return;
204 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
207 event.events = epoll_map_flags(fde->flags);
208 event.data.ptr = fde;
209 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
210 epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed");
213 /* only if we want to read we want to tell the event handler about errors */
214 if (fde->flags & EVENT_FD_READ) {
215 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
219 static void epoll_change_event(struct std_event_context *std_ev, struct fd_event *fde)
221 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
222 bool want_read = (fde->flags & EVENT_FD_READ);
223 bool want_write= (fde->flags & EVENT_FD_WRITE);
225 if (std_ev->epoll_fd == -1) return;
227 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
229 /* there's already an event */
230 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
231 if (want_read || (want_write && !got_error)) {
232 epoll_mod_event(std_ev, fde);
236 * if we want to match the select behavior, we need to remove the epoll_event
237 * when the caller isn't interested in events.
239 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
241 epoll_del_event(std_ev, fde);
245 /* there's no epoll_event attached to the fde */
246 if (want_read || (want_write && !got_error)) {
247 epoll_add_event(std_ev, fde);
253 event loop handling using epoll
255 static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp)
259 struct epoll_event events[MAXEVENTS];
260 uint32_t destruction_count = ++std_ev->destruction_count;
263 if (std_ev->epoll_fd == -1) return -1;
266 /* it's better to trigger timed events a bit later than to early */
267 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
270 if (std_ev->ev->num_signal_handlers &&
271 common_event_check_signal(std_ev->ev)) {
275 ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
277 if (ret == -1 && errno == EINTR && std_ev->ev->num_signal_handlers) {
278 if (common_event_check_signal(std_ev->ev)) {
283 if (ret == -1 && errno != EINTR) {
284 epoll_fallback_to_select(std_ev, "epoll_wait() failed");
288 if (ret == 0 && tvalp) {
289 /* we don't care about a possible delay here */
290 common_event_loop_timer_delay(std_ev->ev);
294 for (i=0;i<ret;i++) {
295 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
300 epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data");
303 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
304 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
306 * if we only wait for EVENT_FD_WRITE, we should not tell the
307 * event handler about it, and remove the epoll_event,
308 * as we only report errors when waiting for read events,
309 * to match the select() behavior
311 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
312 epoll_del_event(std_ev, fde);
315 flags |= EVENT_FD_READ;
317 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
318 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
320 fde->handler(std_ev->ev, fde, flags, fde->private_data);
321 if (destruction_count != std_ev->destruction_count) {
330 #define epoll_init_ctx(std_ev)
331 #define epoll_add_event(std_ev,fde)
332 #define epoll_del_event(std_ev,fde)
333 #define epoll_change_event(std_ev,fde)
334 #define epoll_event_loop(std_ev,tvalp) (-1)
335 #define epoll_check_reopen(std_ev)
339 create a std_event_context structure.
341 static int std_event_context_init(struct event_context *ev)
343 struct std_event_context *std_ev;
345 std_ev = talloc_zero(ev, struct std_event_context);
346 if (!std_ev) return -1;
348 std_ev->epoll_fd = -1;
350 epoll_init_ctx(std_ev);
352 ev->additional_data = std_ev;
357 recalculate the maxfd
359 static void calc_maxfd(struct std_event_context *std_ev)
361 struct fd_event *fde;
364 for (fde = std_ev->fd_events; fde; fde = fde->next) {
365 if (fde->fd > std_ev->maxfd) {
366 std_ev->maxfd = fde->fd;
372 /* to mark the ev->maxfd invalid
373 * this means we need to recalculate it
375 #define EVENT_INVALID_MAXFD (-1)
380 static int std_event_fd_destructor(struct fd_event *fde)
382 struct event_context *ev = fde->event_ctx;
383 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
384 struct std_event_context);
386 epoll_check_reopen(std_ev);
388 if (std_ev->maxfd == fde->fd) {
389 std_ev->maxfd = EVENT_INVALID_MAXFD;
392 DLIST_REMOVE(std_ev->fd_events, fde);
393 std_ev->destruction_count++;
395 epoll_del_event(std_ev, fde);
397 if (fde->flags & EVENT_FD_AUTOCLOSE) {
407 return NULL on failure (memory allocation error)
409 static struct fd_event *std_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
410 int fd, uint16_t flags,
411 event_fd_handler_t handler,
414 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
415 struct std_event_context);
416 struct fd_event *fde;
418 epoll_check_reopen(std_ev);
420 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
421 if (!fde) return NULL;
426 fde->handler = handler;
427 fde->private_data = private_data;
428 fde->additional_flags = 0;
429 fde->additional_data = NULL;
431 DLIST_ADD(std_ev->fd_events, fde);
432 if ((std_ev->maxfd != EVENT_INVALID_MAXFD)
433 && (fde->fd > std_ev->maxfd)) {
434 std_ev->maxfd = fde->fd;
436 talloc_set_destructor(fde, std_event_fd_destructor);
438 epoll_add_event(std_ev, fde);
445 return the fd event flags
447 static uint16_t std_event_get_fd_flags(struct fd_event *fde)
453 set the fd event flags
455 static void std_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
457 struct event_context *ev;
458 struct std_event_context *std_ev;
460 if (fde->flags == flags) return;
463 std_ev = talloc_get_type(ev->additional_data, struct std_event_context);
467 epoll_check_reopen(std_ev);
469 epoll_change_event(std_ev, fde);
473 event loop handling using select()
475 static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp)
478 struct fd_event *fde;
480 uint32_t destruction_count = ++std_ev->destruction_count;
482 /* we maybe need to recalculate the maxfd */
483 if (std_ev->maxfd == EVENT_INVALID_MAXFD) {
490 /* setup any fd events */
491 for (fde = std_ev->fd_events; fde; fde = fde->next) {
492 if (fde->flags & EVENT_FD_READ) {
493 FD_SET(fde->fd, &r_fds);
495 if (fde->flags & EVENT_FD_WRITE) {
496 FD_SET(fde->fd, &w_fds);
500 if (std_ev->ev->num_signal_handlers &&
501 common_event_check_signal(std_ev->ev)) {
505 selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
507 if (selrtn == -1 && errno == EINTR &&
508 std_ev->ev->num_signal_handlers) {
509 common_event_check_signal(std_ev->ev);
513 if (selrtn == -1 && errno == EBADF) {
514 /* the socket is dead! this should never
515 happen as the socket should have first been
516 made readable and that should have removed
517 the event, so this must be a bug. This is a
519 DEBUG(0,("ERROR: EBADF on std_event_loop_once\n"));
520 std_ev->exit_code = EBADF;
524 if (selrtn == 0 && tvalp) {
525 /* we don't care about a possible delay here */
526 common_event_loop_timer_delay(std_ev->ev);
531 /* at least one file descriptor is ready - check
532 which ones and call the handler, being careful to allow
533 the handler to remove itself when called */
534 for (fde = std_ev->fd_events; fde; fde = fde->next) {
537 if (FD_ISSET(fde->fd, &r_fds)) flags |= EVENT_FD_READ;
538 if (FD_ISSET(fde->fd, &w_fds)) flags |= EVENT_FD_WRITE;
540 fde->handler(std_ev->ev, fde, flags, fde->private_data);
541 if (destruction_count != std_ev->destruction_count) {
552 do a single event loop using the events defined in ev
554 static int std_event_loop_once(struct event_context *ev)
556 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
557 struct std_event_context);
560 tval = common_event_loop_timer_delay(ev);
561 if (timeval_is_zero(&tval)) {
565 epoll_check_reopen(std_ev);
567 if (epoll_event_loop(std_ev, &tval) == 0) {
571 return std_event_loop_select(std_ev, &tval);
575 return on failure or (with 0) if all fd events are removed
577 static int std_event_loop_wait(struct event_context *ev)
579 struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
580 struct std_event_context);
581 std_ev->exit_code = 0;
583 while (std_ev->fd_events && std_ev->exit_code == 0) {
584 if (std_event_loop_once(ev) != 0) {
589 return std_ev->exit_code;
592 static const struct event_ops std_event_ops = {
593 .context_init = std_event_context_init,
594 .add_fd = std_event_add_fd,
595 .get_fd_flags = std_event_get_fd_flags,
596 .set_fd_flags = std_event_set_fd_flags,
597 .add_timed = common_event_add_timed,
598 .add_signal = common_event_add_signal,
599 .loop_once = std_event_loop_once,
600 .loop_wait = std_event_loop_wait,
604 bool events_standard_init(void)
606 return event_register_backend("standard", &std_event_ops);
610 _PUBLIC_ NTSTATUS s4_events_standard_init(void)
612 if (!events_standard_init()) {
613 return NT_STATUS_INTERNAL_ERROR;