2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "system/filesys.h"
25 #include "system/select.h"
27 #include "tevent_internal.h"
28 #include "tevent_util.h"
30 struct epoll_event_context {
31 /* a pointer back to the generic event_context */
32 struct tevent_context *ev;
34 /* list of filedescriptor events */
35 struct tevent_fd *fd_events;
37 /* number of registered fd event handlers */
40 /* this is changed by the destructors for the fd event
41 type. It is used to detect event destruction by event
42 handlers, which means the code that is calling the event
43 handler needs to assume that the linked list is no longer
46 uint32_t destruction_count;
48 /* when using epoll this is the handle from epoll_create */
55 called when a epoll call fails, and we should fallback
58 static void epoll_panic(struct epoll_event_context *epoll_ev, const char *reason)
60 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
61 "%s (%s) - calling abort()\n", reason, strerror(errno));
66 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
68 static uint32_t epoll_map_flags(uint16_t flags)
71 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
72 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
79 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
81 close(epoll_ev->epoll_fd);
82 epoll_ev->epoll_fd = -1;
89 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
91 epoll_ev->epoll_fd = epoll_create(64);
92 epoll_ev->pid = getpid();
93 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
94 if (epoll_ev->epoll_fd == -1) {
100 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
103 reopen the epoll handle when our pid changes
104 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
105 demonstration of why this is needed
107 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
109 struct tevent_fd *fde;
111 if (epoll_ev->pid == getpid()) {
115 close(epoll_ev->epoll_fd);
116 epoll_ev->epoll_fd = epoll_create(64);
117 if (epoll_ev->epoll_fd == -1) {
118 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
119 "Failed to recreate epoll handle after fork\n");
122 epoll_ev->pid = getpid();
123 for (fde=epoll_ev->fd_events;fde;fde=fde->next) {
124 epoll_add_event(epoll_ev, fde);
128 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
129 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
130 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
133 add the epoll event to the given fd_event
135 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
137 struct epoll_event event;
139 if (epoll_ev->epoll_fd == -1) return;
141 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
143 /* if we don't want events yet, don't add an epoll_event */
144 if (fde->flags == 0) return;
147 event.events = epoll_map_flags(fde->flags);
148 event.data.ptr = fde;
149 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
150 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed");
152 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
154 /* only if we want to read we want to tell the event handler about errors */
155 if (fde->flags & TEVENT_FD_READ) {
156 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
161 delete the epoll event for given fd_event
163 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
165 struct epoll_event event;
167 if (epoll_ev->epoll_fd == -1) return;
169 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
171 /* if there's no epoll_event, we don't need to delete it */
172 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
175 event.events = epoll_map_flags(fde->flags);
176 event.data.ptr = fde;
177 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
178 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
179 "epoll_del_event failed! probable early close bug (%s)\n",
182 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
186 change the epoll event to the given fd_event
188 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
190 struct epoll_event event;
191 if (epoll_ev->epoll_fd == -1) return;
193 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
196 event.events = epoll_map_flags(fde->flags);
197 event.data.ptr = fde;
198 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
199 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed");
202 /* only if we want to read we want to tell the event handler about errors */
203 if (fde->flags & TEVENT_FD_READ) {
204 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
208 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
210 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
211 bool want_read = (fde->flags & TEVENT_FD_READ);
212 bool want_write= (fde->flags & TEVENT_FD_WRITE);
214 if (epoll_ev->epoll_fd == -1) return;
216 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
218 /* there's already an event */
219 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
220 if (want_read || (want_write && !got_error)) {
221 epoll_mod_event(epoll_ev, fde);
225 * if we want to match the select behavior, we need to remove the epoll_event
226 * when the caller isn't interested in events.
228 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
230 epoll_del_event(epoll_ev, fde);
234 /* there's no epoll_event attached to the fde */
235 if (want_read || (want_write && !got_error)) {
236 epoll_add_event(epoll_ev, fde);
242 event loop handling using epoll
244 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
248 struct epoll_event events[MAXEVENTS];
249 uint32_t destruction_count = ++epoll_ev->destruction_count;
252 if (epoll_ev->epoll_fd == -1) return -1;
255 /* it's better to trigger timed events a bit later than to early */
256 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
259 if (epoll_ev->ev->signal_events &&
260 tevent_common_check_signal(epoll_ev->ev)) {
264 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
266 if (ret == -1 && errno == EINTR && epoll_ev->ev->signal_events) {
267 if (tevent_common_check_signal(epoll_ev->ev)) {
272 if (ret == -1 && errno != EINTR) {
273 epoll_panic(epoll_ev, "epoll_wait() failed");
277 if (ret == 0 && tvalp) {
278 /* we don't care about a possible delay here */
279 tevent_common_loop_timer_delay(epoll_ev->ev);
283 for (i=0;i<ret;i++) {
284 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
289 epoll_panic(epoll_ev, "epoll_wait() gave bad data");
292 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
293 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
295 * if we only wait for TEVENT_FD_WRITE, we should not tell the
296 * event handler about it, and remove the epoll_event,
297 * as we only report errors when waiting for read events,
298 * to match the select() behavior
300 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
301 epoll_del_event(epoll_ev, fde);
304 flags |= TEVENT_FD_READ;
306 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
307 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
309 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
310 if (destruction_count != epoll_ev->destruction_count) {
320 create a epoll_event_context structure.
322 static int epoll_event_context_init(struct tevent_context *ev)
325 struct epoll_event_context *epoll_ev;
327 epoll_ev = talloc_zero(ev, struct epoll_event_context);
328 if (!epoll_ev) return -1;
330 epoll_ev->epoll_fd = -1;
332 ret = epoll_init_ctx(epoll_ev);
334 talloc_free(epoll_ev);
338 ev->additional_data = epoll_ev;
345 static int epoll_event_fd_destructor(struct tevent_fd *fde)
347 struct tevent_context *ev = fde->event_ctx;
348 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
349 struct epoll_event_context);
351 epoll_check_reopen(epoll_ev);
353 epoll_ev->num_fd_events--;
354 epoll_ev->destruction_count++;
356 DLIST_REMOVE(epoll_ev->fd_events, fde);
358 epoll_del_event(epoll_ev, fde);
361 fde->close_fn(ev, fde, fde->fd, fde->private_data);
370 return NULL on failure (memory allocation error)
372 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
373 int fd, uint16_t flags,
374 tevent_fd_handler_t handler,
376 const char *handler_name,
377 const char *location)
379 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
380 struct epoll_event_context);
381 struct tevent_fd *fde;
383 epoll_check_reopen(epoll_ev);
385 fde = talloc(mem_ctx?mem_ctx:ev, struct tevent_fd);
386 if (!fde) return NULL;
391 fde->handler = handler;
392 fde->close_fn = NULL;
393 fde->private_data = private_data;
394 fde->handler_name = handler_name;
395 fde->location = location;
396 fde->additional_flags = 0;
397 fde->additional_data = NULL;
399 epoll_ev->num_fd_events++;
400 talloc_set_destructor(fde, epoll_event_fd_destructor);
402 DLIST_ADD(epoll_ev->fd_events, fde);
403 epoll_add_event(epoll_ev, fde);
409 set the fd event flags
411 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
413 struct tevent_context *ev;
414 struct epoll_event_context *epoll_ev;
416 if (fde->flags == flags) return;
419 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
423 epoll_check_reopen(epoll_ev);
425 epoll_change_event(epoll_ev, fde);
429 do a single event loop using the events defined in ev
431 static int epoll_event_loop_once(struct tevent_context *ev)
433 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
434 struct epoll_event_context);
437 tval = tevent_common_loop_timer_delay(ev);
438 if (ev_timeval_is_zero(&tval)) {
442 epoll_check_reopen(epoll_ev);
444 return epoll_event_loop(epoll_ev, &tval);
448 return on failure or (with 0) if all fd events are removed
450 static int epoll_event_loop_wait(struct tevent_context *ev)
452 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
453 struct epoll_event_context);
454 while (epoll_ev->num_fd_events) {
455 if (epoll_event_loop_once(ev) != 0) {
463 static const struct tevent_ops epoll_event_ops = {
464 .context_init = epoll_event_context_init,
465 .add_fd = epoll_event_add_fd,
466 .set_fd_close_fn= tevent_common_fd_set_close_fn,
467 .get_fd_flags = tevent_common_fd_get_flags,
468 .set_fd_flags = epoll_event_set_fd_flags,
469 .add_timer = tevent_common_add_timer,
470 .add_signal = tevent_common_add_signal,
471 .loop_once = epoll_event_loop_once,
472 .loop_wait = epoll_event_loop_wait,
475 bool tevent_epoll_init(void)
477 return tevent_register_backend("epoll", &epoll_event_ops);