2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005-2009
9 ** NOTE! The following LGPL license applies to the tevent
10 ** library. This does NOT imply that all of Samba is released
13 This library is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 3 of the License, or (at your option) any later version.
18 This library is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
23 You should have received a copy of the GNU Lesser General Public
24 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "system/filesys.h"
29 #include "system/select.h"
31 #include "tevent_internal.h"
32 #include "tevent_util.h"
34 struct epoll_event_context {
35 /* a pointer back to the generic event_context */
36 struct tevent_context *ev;
38 /* this is changed by the destructors for the fd event
39 type. It is used to detect event destruction by event
40 handlers, which means the code that is calling the event
41 handler needs to assume that the linked list is no longer
44 uint32_t destruction_count;
46 /* when using epoll this is the handle from epoll_create */
53 called when a epoll call fails, and we should fallback
56 static void epoll_panic(struct epoll_event_context *epoll_ev, const char *reason)
58 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
59 "%s (%s) - calling abort()\n", reason, strerror(errno));
64 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
66 static uint32_t epoll_map_flags(uint16_t flags)
69 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
70 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
77 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
79 close(epoll_ev->epoll_fd);
80 epoll_ev->epoll_fd = -1;
87 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
89 epoll_ev->epoll_fd = epoll_create(64);
90 epoll_ev->pid = getpid();
91 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
92 if (epoll_ev->epoll_fd == -1) {
98 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
101 reopen the epoll handle when our pid changes
102 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
103 demonstration of why this is needed
105 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
107 struct tevent_fd *fde;
109 if (epoll_ev->pid == getpid()) {
113 close(epoll_ev->epoll_fd);
114 epoll_ev->epoll_fd = epoll_create(64);
115 if (epoll_ev->epoll_fd == -1) {
116 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
117 "Failed to recreate epoll handle after fork\n");
120 epoll_ev->pid = getpid();
121 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
122 epoll_add_event(epoll_ev, fde);
126 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
127 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
128 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
131 add the epoll event to the given fd_event
133 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
135 struct epoll_event event;
137 if (epoll_ev->epoll_fd == -1) return;
139 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
141 /* if we don't want events yet, don't add an epoll_event */
142 if (fde->flags == 0) return;
145 event.events = epoll_map_flags(fde->flags);
146 event.data.ptr = fde;
147 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
148 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed");
150 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
152 /* only if we want to read we want to tell the event handler about errors */
153 if (fde->flags & TEVENT_FD_READ) {
154 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
159 delete the epoll event for given fd_event
161 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
163 struct epoll_event event;
165 if (epoll_ev->epoll_fd == -1) return;
167 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
169 /* if there's no epoll_event, we don't need to delete it */
170 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
173 event.events = epoll_map_flags(fde->flags);
174 event.data.ptr = fde;
175 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
176 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
177 "epoll_del_event failed! probable early close bug (%s)\n",
180 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
184 change the epoll event to the given fd_event
186 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
188 struct epoll_event event;
189 if (epoll_ev->epoll_fd == -1) return;
191 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
194 event.events = epoll_map_flags(fde->flags);
195 event.data.ptr = fde;
196 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
197 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed");
200 /* only if we want to read we want to tell the event handler about errors */
201 if (fde->flags & TEVENT_FD_READ) {
202 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
206 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
208 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
209 bool want_read = (fde->flags & TEVENT_FD_READ);
210 bool want_write= (fde->flags & TEVENT_FD_WRITE);
212 if (epoll_ev->epoll_fd == -1) return;
214 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
216 /* there's already an event */
217 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
218 if (want_read || (want_write && !got_error)) {
219 epoll_mod_event(epoll_ev, fde);
223 * if we want to match the select behavior, we need to remove the epoll_event
224 * when the caller isn't interested in events.
226 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
228 epoll_del_event(epoll_ev, fde);
232 /* there's no epoll_event attached to the fde */
233 if (want_read || (want_write && !got_error)) {
234 epoll_add_event(epoll_ev, fde);
240 event loop handling using epoll
242 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
246 struct epoll_event events[MAXEVENTS];
247 uint32_t destruction_count = ++epoll_ev->destruction_count;
250 if (epoll_ev->epoll_fd == -1) return -1;
253 /* it's better to trigger timed events a bit later than to early */
254 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
257 if (epoll_ev->ev->signal_events &&
258 tevent_common_check_signal(epoll_ev->ev)) {
262 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
264 if (ret == -1 && errno == EINTR && epoll_ev->ev->signal_events) {
265 if (tevent_common_check_signal(epoll_ev->ev)) {
270 if (ret == -1 && errno != EINTR) {
271 epoll_panic(epoll_ev, "epoll_wait() failed");
275 if (ret == 0 && tvalp) {
276 /* we don't care about a possible delay here */
277 tevent_common_loop_timer_delay(epoll_ev->ev);
281 for (i=0;i<ret;i++) {
282 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
287 epoll_panic(epoll_ev, "epoll_wait() gave bad data");
290 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
291 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
293 * if we only wait for TEVENT_FD_WRITE, we should not tell the
294 * event handler about it, and remove the epoll_event,
295 * as we only report errors when waiting for read events,
296 * to match the select() behavior
298 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
299 epoll_del_event(epoll_ev, fde);
302 flags |= TEVENT_FD_READ;
304 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
305 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
307 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
308 if (destruction_count != epoll_ev->destruction_count) {
318 create a epoll_event_context structure.
320 static int epoll_event_context_init(struct tevent_context *ev)
323 struct epoll_event_context *epoll_ev;
325 epoll_ev = talloc_zero(ev, struct epoll_event_context);
326 if (!epoll_ev) return -1;
328 epoll_ev->epoll_fd = -1;
330 ret = epoll_init_ctx(epoll_ev);
332 talloc_free(epoll_ev);
336 ev->additional_data = epoll_ev;
343 static int epoll_event_fd_destructor(struct tevent_fd *fde)
345 struct tevent_context *ev = fde->event_ctx;
346 struct epoll_event_context *epoll_ev = NULL;
349 epoll_ev = talloc_get_type(ev->additional_data,
350 struct epoll_event_context);
352 epoll_check_reopen(epoll_ev);
354 epoll_ev->destruction_count++;
356 epoll_del_event(epoll_ev, fde);
359 return tevent_common_fd_destructor(fde);
364 return NULL on failure (memory allocation error)
366 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
367 int fd, uint16_t flags,
368 tevent_fd_handler_t handler,
370 const char *handler_name,
371 const char *location)
373 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
374 struct epoll_event_context);
375 struct tevent_fd *fde;
377 epoll_check_reopen(epoll_ev);
379 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
380 handler, private_data,
381 handler_name, location);
382 if (!fde) return NULL;
384 talloc_set_destructor(fde, epoll_event_fd_destructor);
386 epoll_add_event(epoll_ev, fde);
392 set the fd event flags
394 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
396 struct tevent_context *ev;
397 struct epoll_event_context *epoll_ev;
399 if (fde->flags == flags) return;
402 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
406 epoll_check_reopen(epoll_ev);
408 epoll_change_event(epoll_ev, fde);
412 do a single event loop using the events defined in ev
414 static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
416 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
417 struct epoll_event_context);
420 tval = tevent_common_loop_timer_delay(ev);
421 if (tevent_timeval_is_zero(&tval)) {
425 epoll_check_reopen(epoll_ev);
427 return epoll_event_loop(epoll_ev, &tval);
431 return on failure or (with 0) if all fd events are removed
433 static int epoll_event_loop_wait(struct tevent_context *ev, const char *location)
435 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
436 struct epoll_event_context);
437 while (epoll_ev->ev->fd_events) {
438 if (epoll_event_loop_once(ev, location) != 0) {
446 static const struct tevent_ops epoll_event_ops = {
447 .context_init = epoll_event_context_init,
448 .add_fd = epoll_event_add_fd,
449 .set_fd_close_fn= tevent_common_fd_set_close_fn,
450 .get_fd_flags = tevent_common_fd_get_flags,
451 .set_fd_flags = epoll_event_set_fd_flags,
452 .add_timer = tevent_common_add_timer,
453 .add_signal = tevent_common_add_signal,
454 .loop_once = epoll_event_loop_once,
455 .loop_wait = epoll_event_loop_wait,
458 bool tevent_epoll_init(void)
460 return tevent_register_backend("epoll", &epoll_event_ops);