2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "lib/util/dlinklist.h"
27 #include "lib/events/events.h"
28 #include "lib/events/events_internal.h"
29 #include <sys/epoll.h>
31 struct epoll_event_context {
32 /* a pointer back to the generic event_context */
33 struct event_context *ev;
35 /* number of registered fd event handlers */
38 /* this is changed by the destructors for the fd event
39 type. It is used to detect event destruction by event
40 handlers, which means the code that is calling the event
41 handler needs to assume that the linked list is no longer
44 uint32_t destruction_count;
46 /* when using epoll this is the handle from epoll_create */
51 called when a epoll call fails, and we should fallback
54 static void epoll_fallback_to_select(struct epoll_event_context *epoll_ev, const char *reason)
56 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
57 close(epoll_ev->epoll_fd);
58 epoll_ev->epoll_fd = -1;
59 talloc_set_destructor(epoll_ev, NULL);
63 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
65 static uint32_t epoll_map_flags(uint16_t flags)
68 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
69 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
76 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
78 close(epoll_ev->epoll_fd);
79 epoll_ev->epoll_fd = -1;
86 static void epoll_init_ctx(struct epoll_event_context *epoll_ev)
88 epoll_ev->epoll_fd = epoll_create(64);
89 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
92 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
93 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
94 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
97 add the epoll event to the given fd_event
99 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
101 struct epoll_event event;
102 if (epoll_ev->epoll_fd == -1) return;
104 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
106 /* if we don't want events yet, don't add an epoll_event */
107 if (fde->flags == 0) return;
110 event.events = epoll_map_flags(fde->flags);
111 event.data.ptr = fde;
112 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
113 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_ADD failed");
115 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
117 /* only if we want to read we want to tell the event handler about errors */
118 if (fde->flags & EVENT_FD_READ) {
119 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
124 delete the epoll event for given fd_event
126 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
128 struct epoll_event event;
129 if (epoll_ev->epoll_fd == -1) return;
131 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
133 /* if there's no epoll_event, we don't need to delete it */
134 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
137 event.events = epoll_map_flags(fde->flags);
138 event.data.ptr = fde;
139 epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
140 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
144 change the epoll event to the given fd_event
146 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
148 struct epoll_event event;
149 if (epoll_ev->epoll_fd == -1) return;
151 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
154 event.events = epoll_map_flags(fde->flags);
155 event.data.ptr = fde;
156 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
157 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_MOD failed");
160 /* only if we want to read we want to tell the event handler about errors */
161 if (fde->flags & EVENT_FD_READ) {
162 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
166 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
168 BOOL got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
169 BOOL want_read = (fde->flags & EVENT_FD_READ);
170 BOOL want_write= (fde->flags & EVENT_FD_WRITE);
172 if (epoll_ev->epoll_fd == -1) return;
174 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
176 /* there's already an event */
177 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
178 if (want_read || (want_write && !got_error)) {
179 epoll_mod_event(epoll_ev, fde);
183 * if we want to match the select behavior, we need to remove the epoll_event
184 * when the caller isn't interested in events.
186 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
188 epoll_del_event(epoll_ev, fde);
192 /* there's no epoll_event attached to the fde */
193 if (want_read || (want_write && !got_error)) {
194 epoll_add_event(epoll_ev, fde);
200 event loop handling using epoll
202 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
206 struct epoll_event events[MAXEVENTS];
207 uint32_t destruction_count = epoll_ev->destruction_count;
210 if (epoll_ev->epoll_fd == -1) return -1;
213 /* it's better to trigger timed events a bit later than to early */
214 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
217 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
219 if (ret == -1 && errno != EINTR) {
220 epoll_fallback_to_select(epoll_ev, "epoll_wait() failed");
224 if (ret == 0 && tvalp) {
225 common_event_loop_timer(epoll_ev->ev);
229 for (i=0;i<ret;i++) {
230 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
235 epoll_fallback_to_select(epoll_ev, "epoll_wait() gave bad data");
238 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
239 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
241 * if we only wait for EVENT_FD_WRITE, we should not tell the
242 * event handler about it, and remove the epoll_event,
243 * as we only report errors when waiting for read events,
244 * to match the select() behavior
246 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
247 epoll_del_event(epoll_ev, fde);
250 flags |= EVENT_FD_READ;
252 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
253 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
255 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
256 if (destruction_count != epoll_ev->destruction_count) {
266 create a epoll_event_context structure.
268 static int epoll_event_context_init(struct event_context *ev)
270 struct epoll_event_context *epoll_ev;
272 epoll_ev = talloc_zero(ev, struct epoll_event_context);
273 if (!epoll_ev) return -1;
275 epoll_ev->epoll_fd = -1;
277 epoll_init_ctx(epoll_ev);
279 ev->additional_data = epoll_ev;
286 static int epoll_event_fd_destructor(struct fd_event *fde)
288 struct event_context *ev = fde->event_ctx;
289 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
290 struct epoll_event_context);
292 epoll_ev->num_fd_events--;
293 epoll_ev->destruction_count++;
295 epoll_del_event(epoll_ev, fde);
302 return NULL on failure (memory allocation error)
304 static struct fd_event *epoll_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
305 int fd, uint16_t flags,
306 event_fd_handler_t handler,
309 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
310 struct epoll_event_context);
311 struct fd_event *fde;
313 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
314 if (!fde) return NULL;
319 fde->handler = handler;
320 fde->private_data = private_data;
321 fde->additional_flags = 0;
322 fde->additional_data = NULL;
324 epoll_ev->num_fd_events++;
325 talloc_set_destructor(fde, epoll_event_fd_destructor);
327 epoll_add_event(epoll_ev, fde);
334 return the fd event flags
336 static uint16_t epoll_event_get_fd_flags(struct fd_event *fde)
342 set the fd event flags
344 static void epoll_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
346 struct event_context *ev;
347 struct epoll_event_context *epoll_ev;
349 if (fde->flags == flags) return;
352 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
356 epoll_change_event(epoll_ev, fde);
360 do a single event loop using the events defined in ev
362 static int epoll_event_loop_once(struct event_context *ev)
364 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
365 struct epoll_event_context);
368 tval = common_event_loop_delay(ev);
370 if (timeval_is_zero(&tval)) {
371 common_event_loop_timer(ev);
375 return epoll_event_loop(epoll_ev, &tval);
379 return on failure or (with 0) if all fd events are removed
381 static int epoll_event_loop_wait(struct event_context *ev)
383 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
384 struct epoll_event_context);
385 while (epoll_ev->num_fd_events) {
386 if (epoll_event_loop_once(ev) != 0) {
394 static const struct event_ops epoll_event_ops = {
395 .context_init = epoll_event_context_init,
396 .add_fd = epoll_event_add_fd,
397 .get_fd_flags = epoll_event_get_fd_flags,
398 .set_fd_flags = epoll_event_set_fd_flags,
399 .add_timed = common_event_add_timed,
400 .loop_once = epoll_event_loop_once,
401 .loop_wait = epoll_event_loop_wait,
404 NTSTATUS events_epoll_init(void)
406 return event_register_backend("epoll", &epoll_event_ops);