2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "lib/util/dlinklist.h"
27 #include "lib/events/events.h"
28 #include "lib/events/events_internal.h"
29 #include <sys/epoll.h>
31 struct epoll_event_context {
32 /* a pointer back to the generic event_context */
33 struct event_context *ev;
35 /* number of registered fd event handlers */
38 /* this is changed by the destructors for the fd event
39 type. It is used to detect event destruction by event
40 handlers, which means the code that is calling the event
41 handler needs to assume that the linked list is no longer
44 uint32_t destruction_count;
46 /* when using epoll this is the handle from epoll_create */
51 called when a epoll call fails, and we should fallback
54 static void epoll_fallback_to_select(struct epoll_event_context *epoll_ev, const char *reason)
56 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
57 close(epoll_ev->epoll_fd);
58 epoll_ev->epoll_fd = -1;
59 talloc_set_destructor(epoll_ev, NULL);
63 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
65 static uint32_t epoll_map_flags(uint16_t flags)
68 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
69 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
76 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
78 close(epoll_ev->epoll_fd);
79 epoll_ev->epoll_fd = -1;
86 static void epoll_init_ctx(struct epoll_event_context *epoll_ev)
88 epoll_ev->epoll_fd = epoll_create(64);
89 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
92 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
93 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
94 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
97 add the epoll event to the given fd_event
99 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
101 struct epoll_event event;
102 if (epoll_ev->epoll_fd == -1) return;
104 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
106 /* if we don't want events yet, don't add an epoll_event */
107 if (fde->flags == 0) return;
110 event.events = epoll_map_flags(fde->flags);
111 event.data.ptr = fde;
112 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
113 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_ADD failed");
115 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
117 /* only if we want to read we want to tell the event handler about errors */
118 if (fde->flags & EVENT_FD_READ) {
119 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
124 delete the epoll event for given fd_event
126 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
128 struct epoll_event event;
129 if (epoll_ev->epoll_fd == -1) return;
131 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
133 /* if there's no epoll_event, we don't need to delete it */
134 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
137 event.events = epoll_map_flags(fde->flags);
138 event.data.ptr = fde;
139 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
140 DEBUG(0,("epoll_del_event failed! probable early close bug (%s)\n", strerror(errno)));
142 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
146 change the epoll event to the given fd_event
148 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
150 struct epoll_event event;
151 if (epoll_ev->epoll_fd == -1) return;
153 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
156 event.events = epoll_map_flags(fde->flags);
157 event.data.ptr = fde;
158 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
159 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_MOD failed");
162 /* only if we want to read we want to tell the event handler about errors */
163 if (fde->flags & EVENT_FD_READ) {
164 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
168 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
170 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
171 bool want_read = (fde->flags & EVENT_FD_READ);
172 bool want_write= (fde->flags & EVENT_FD_WRITE);
174 if (epoll_ev->epoll_fd == -1) return;
176 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
178 /* there's already an event */
179 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
180 if (want_read || (want_write && !got_error)) {
181 epoll_mod_event(epoll_ev, fde);
185 * if we want to match the select behavior, we need to remove the epoll_event
186 * when the caller isn't interested in events.
188 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
190 epoll_del_event(epoll_ev, fde);
194 /* there's no epoll_event attached to the fde */
195 if (want_read || (want_write && !got_error)) {
196 epoll_add_event(epoll_ev, fde);
202 event loop handling using epoll
204 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
208 struct epoll_event events[MAXEVENTS];
209 uint32_t destruction_count = ++epoll_ev->destruction_count;
212 if (epoll_ev->epoll_fd == -1) return -1;
215 /* it's better to trigger timed events a bit later than to early */
216 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
219 if (epoll_ev->ev->num_signal_handlers &&
220 common_event_check_signal(epoll_ev->ev)) {
224 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
226 if (ret == -1 && errno == EINTR && epoll_ev->ev->num_signal_handlers) {
227 if (common_event_check_signal(epoll_ev->ev)) {
232 if (ret == -1 && errno != EINTR) {
233 epoll_fallback_to_select(epoll_ev, "epoll_wait() failed");
237 if (ret == 0 && tvalp) {
238 /* we don't care about a possible delay here */
239 common_event_loop_timer_delay(epoll_ev->ev);
243 for (i=0;i<ret;i++) {
244 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
249 epoll_fallback_to_select(epoll_ev, "epoll_wait() gave bad data");
252 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
253 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
255 * if we only wait for EVENT_FD_WRITE, we should not tell the
256 * event handler about it, and remove the epoll_event,
257 * as we only report errors when waiting for read events,
258 * to match the select() behavior
260 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
261 epoll_del_event(epoll_ev, fde);
264 flags |= EVENT_FD_READ;
266 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
267 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
269 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
270 if (destruction_count != epoll_ev->destruction_count) {
280 create a epoll_event_context structure.
282 static int epoll_event_context_init(struct event_context *ev)
284 struct epoll_event_context *epoll_ev;
286 epoll_ev = talloc_zero(ev, struct epoll_event_context);
287 if (!epoll_ev) return -1;
289 epoll_ev->epoll_fd = -1;
291 epoll_init_ctx(epoll_ev);
293 ev->additional_data = epoll_ev;
300 static int epoll_event_fd_destructor(struct fd_event *fde)
302 struct event_context *ev = fde->event_ctx;
303 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
304 struct epoll_event_context);
306 epoll_ev->num_fd_events--;
307 epoll_ev->destruction_count++;
309 epoll_del_event(epoll_ev, fde);
311 if (fde->flags & EVENT_FD_AUTOCLOSE) {
321 return NULL on failure (memory allocation error)
323 static struct fd_event *epoll_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
324 int fd, uint16_t flags,
325 event_fd_handler_t handler,
328 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
329 struct epoll_event_context);
330 struct fd_event *fde;
332 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
333 if (!fde) return NULL;
338 fde->handler = handler;
339 fde->private_data = private_data;
340 fde->additional_flags = 0;
341 fde->additional_data = NULL;
343 epoll_ev->num_fd_events++;
344 talloc_set_destructor(fde, epoll_event_fd_destructor);
346 epoll_add_event(epoll_ev, fde);
353 return the fd event flags
355 static uint16_t epoll_event_get_fd_flags(struct fd_event *fde)
361 set the fd event flags
363 static void epoll_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
365 struct event_context *ev;
366 struct epoll_event_context *epoll_ev;
368 if (fde->flags == flags) return;
371 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
375 epoll_change_event(epoll_ev, fde);
379 do a single event loop using the events defined in ev
381 static int epoll_event_loop_once(struct event_context *ev)
383 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
384 struct epoll_event_context);
387 tval = common_event_loop_timer_delay(ev);
388 if (timeval_is_zero(&tval)) {
392 return epoll_event_loop(epoll_ev, &tval);
396 return on failure or (with 0) if all fd events are removed
398 static int epoll_event_loop_wait(struct event_context *ev)
400 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
401 struct epoll_event_context);
402 while (epoll_ev->num_fd_events) {
403 if (epoll_event_loop_once(ev) != 0) {
411 static const struct event_ops epoll_event_ops = {
412 .context_init = epoll_event_context_init,
413 .add_fd = epoll_event_add_fd,
414 .get_fd_flags = epoll_event_get_fd_flags,
415 .set_fd_flags = epoll_event_set_fd_flags,
416 .add_timed = common_event_add_timed,
417 .add_signal = common_event_add_signal,
418 .loop_once = epoll_event_loop_once,
419 .loop_wait = epoll_event_loop_wait,
422 bool events_epoll_init(void)
424 return event_register_backend("epoll", &epoll_event_ops);
428 NTSTATUS s4_events_epoll_init(void)
430 if (!events_epoll_init()) {
431 return NT_STATUS_INTERNAL_ERROR;