2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "lib/util/dlinklist.h"
28 #include "lib/events/events.h"
29 #include "lib/events/events_internal.h"
30 #include <sys/epoll.h>
32 struct epoll_event_context {
33 /* a pointer back to the generic event_context */
34 struct event_context *ev;
36 /* list of filedescriptor events */
37 struct fd_event *fd_events;
39 /* number of registered fd event handlers */
42 /* this is changed by the destructors for the fd event
43 type. It is used to detect event destruction by event
44 handlers, which means the code that is calling the event
45 handler needs to assume that the linked list is no longer
48 uint32_t destruction_count;
50 /* when using epoll this is the handle from epoll_create */
57 called when a epoll call fails, and we should fallback
60 static void epoll_fallback_to_select(struct epoll_event_context *epoll_ev, const char *reason)
62 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
63 close(epoll_ev->epoll_fd);
64 epoll_ev->epoll_fd = -1;
65 talloc_set_destructor(epoll_ev, NULL);
69 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
71 static uint32_t epoll_map_flags(uint16_t flags)
74 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
75 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
82 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
84 close(epoll_ev->epoll_fd);
85 epoll_ev->epoll_fd = -1;
92 static void epoll_init_ctx(struct epoll_event_context *epoll_ev)
94 epoll_ev->epoll_fd = epoll_create(64);
95 epoll_ev->pid = getpid();
96 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
99 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde);
102 reopen the epoll handle when our pid changes
103 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
104 demonstration of why this is needed
106 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
108 struct fd_event *fde;
110 if (epoll_ev->pid == getpid()) {
114 close(epoll_ev->epoll_fd);
115 epoll_ev->epoll_fd = epoll_create(64);
116 if (epoll_ev->epoll_fd == -1) {
117 DEBUG(0,("Failed to recreate epoll handle after fork\n"));
120 epoll_ev->pid = getpid();
121 for (fde=epoll_ev->fd_events;fde;fde=fde->next) {
122 epoll_add_event(epoll_ev, fde);
126 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
127 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
128 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
131 add the epoll event to the given fd_event
133 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
135 struct epoll_event event;
137 if (epoll_ev->epoll_fd == -1) return;
139 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
141 /* if we don't want events yet, don't add an epoll_event */
142 if (fde->flags == 0) return;
145 event.events = epoll_map_flags(fde->flags);
146 event.data.ptr = fde;
147 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
148 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_ADD failed");
150 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
152 /* only if we want to read we want to tell the event handler about errors */
153 if (fde->flags & EVENT_FD_READ) {
154 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
159 delete the epoll event for given fd_event
161 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
163 struct epoll_event event;
165 DLIST_REMOVE(epoll_ev->fd_events, fde);
167 if (epoll_ev->epoll_fd == -1) return;
169 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
171 /* if there's no epoll_event, we don't need to delete it */
172 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
175 event.events = epoll_map_flags(fde->flags);
176 event.data.ptr = fde;
177 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
178 DEBUG(0,("epoll_del_event failed! probable early close bug (%s)\n", strerror(errno)));
180 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
184 change the epoll event to the given fd_event
186 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
188 struct epoll_event event;
189 if (epoll_ev->epoll_fd == -1) return;
191 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
194 event.events = epoll_map_flags(fde->flags);
195 event.data.ptr = fde;
196 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
197 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_MOD failed");
200 /* only if we want to read we want to tell the event handler about errors */
201 if (fde->flags & EVENT_FD_READ) {
202 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
206 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
208 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
209 bool want_read = (fde->flags & EVENT_FD_READ);
210 bool want_write= (fde->flags & EVENT_FD_WRITE);
212 if (epoll_ev->epoll_fd == -1) return;
214 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
216 /* there's already an event */
217 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
218 if (want_read || (want_write && !got_error)) {
219 epoll_mod_event(epoll_ev, fde);
223 * if we want to match the select behavior, we need to remove the epoll_event
224 * when the caller isn't interested in events.
226 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
228 epoll_del_event(epoll_ev, fde);
232 /* there's no epoll_event attached to the fde */
233 if (want_read || (want_write && !got_error)) {
234 DLIST_ADD(epoll_ev->fd_events, fde);
235 epoll_add_event(epoll_ev, fde);
241 event loop handling using epoll
243 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
247 struct epoll_event events[MAXEVENTS];
248 uint32_t destruction_count = ++epoll_ev->destruction_count;
251 if (epoll_ev->epoll_fd == -1) return -1;
254 /* it's better to trigger timed events a bit later than to early */
255 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
258 if (epoll_ev->ev->num_signal_handlers &&
259 common_event_check_signal(epoll_ev->ev)) {
263 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
265 if (ret == -1 && errno == EINTR && epoll_ev->ev->num_signal_handlers) {
266 if (common_event_check_signal(epoll_ev->ev)) {
271 if (ret == -1 && errno != EINTR) {
272 epoll_fallback_to_select(epoll_ev, "epoll_wait() failed");
276 if (ret == 0 && tvalp) {
277 /* we don't care about a possible delay here */
278 common_event_loop_timer_delay(epoll_ev->ev);
282 for (i=0;i<ret;i++) {
283 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
288 epoll_fallback_to_select(epoll_ev, "epoll_wait() gave bad data");
291 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
292 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
294 * if we only wait for EVENT_FD_WRITE, we should not tell the
295 * event handler about it, and remove the epoll_event,
296 * as we only report errors when waiting for read events,
297 * to match the select() behavior
299 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
300 epoll_del_event(epoll_ev, fde);
303 flags |= EVENT_FD_READ;
305 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
306 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
308 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
309 if (destruction_count != epoll_ev->destruction_count) {
319 create a epoll_event_context structure.
321 static int epoll_event_context_init(struct event_context *ev)
323 struct epoll_event_context *epoll_ev;
325 epoll_ev = talloc_zero(ev, struct epoll_event_context);
326 if (!epoll_ev) return -1;
328 epoll_ev->epoll_fd = -1;
330 epoll_init_ctx(epoll_ev);
332 ev->additional_data = epoll_ev;
339 static int epoll_event_fd_destructor(struct fd_event *fde)
341 struct event_context *ev = fde->event_ctx;
342 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
343 struct epoll_event_context);
345 epoll_ev->num_fd_events--;
346 epoll_ev->destruction_count++;
348 epoll_del_event(epoll_ev, fde);
350 if (fde->flags & EVENT_FD_AUTOCLOSE) {
360 return NULL on failure (memory allocation error)
362 static struct fd_event *epoll_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
363 int fd, uint16_t flags,
364 event_fd_handler_t handler,
367 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
368 struct epoll_event_context);
369 struct fd_event *fde;
371 epoll_check_reopen(epoll_ev);
373 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
374 if (!fde) return NULL;
379 fde->handler = handler;
380 fde->private_data = private_data;
381 fde->additional_flags = 0;
382 fde->additional_data = NULL;
384 epoll_ev->num_fd_events++;
385 talloc_set_destructor(fde, epoll_event_fd_destructor);
387 DLIST_ADD(epoll_ev->fd_events, fde);
388 epoll_add_event(epoll_ev, fde);
395 return the fd event flags
397 static uint16_t epoll_event_get_fd_flags(struct fd_event *fde)
403 set the fd event flags
405 static void epoll_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
407 struct event_context *ev;
408 struct epoll_event_context *epoll_ev;
410 if (fde->flags == flags) return;
413 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
417 epoll_check_reopen(epoll_ev);
419 epoll_change_event(epoll_ev, fde);
423 do a single event loop using the events defined in ev
425 static int epoll_event_loop_once(struct event_context *ev)
427 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
428 struct epoll_event_context);
431 tval = common_event_loop_timer_delay(ev);
432 if (timeval_is_zero(&tval)) {
436 epoll_check_reopen(epoll_ev);
438 return epoll_event_loop(epoll_ev, &tval);
442 return on failure or (with 0) if all fd events are removed
444 static int epoll_event_loop_wait(struct event_context *ev)
446 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
447 struct epoll_event_context);
448 while (epoll_ev->num_fd_events) {
449 if (epoll_event_loop_once(ev) != 0) {
457 static const struct event_ops epoll_event_ops = {
458 .context_init = epoll_event_context_init,
459 .add_fd = epoll_event_add_fd,
460 .get_fd_flags = epoll_event_get_fd_flags,
461 .set_fd_flags = epoll_event_set_fd_flags,
462 .add_timed = common_event_add_timed,
463 .add_signal = common_event_add_signal,
464 .loop_once = epoll_event_loop_once,
465 .loop_wait = epoll_event_loop_wait,
468 bool events_epoll_init(void)
470 return event_register_backend("epoll", &epoll_event_ops);
474 NTSTATUS s4_events_epoll_init(void)
476 if (!events_epoll_init()) {
477 return NT_STATUS_INTERNAL_ERROR;