tevent: rewrite/simplify tevent_poll and maintain ev->fd_events correctly
[nivanova/samba-autobuild/.git] / lib / tevent / tevent_poll.c
index f63173744fae431cf64857b3525a99b3156f5977..282f3cf3082c32dfb0ba1359289554a34d242940 100644 (file)
@@ -34,57 +34,32 @@ struct poll_event_context {
        struct tevent_context *ev;
 
        /*
-        * A DLIST for fresh fde's added by poll_event_add_fd but not
-        * picked up yet by poll_event_loop_once
+        * one or more events were deleted or disabled
         */
-       struct tevent_fd *fresh;
+       bool deleted;
 
        /*
         * These two arrays are maintained together.
+        *
+        * The following is always true:
+        * num_fds <= num_fdes
+        *
+        * new 'fresh' elements are added at the end
+        * of the 'fdes' array and picked up later
+        * to the 'fds' array in poll_event_sync_arrays()
+        * before the poll() syscall.
         */
        struct pollfd *fds;
+       size_t num_fds;
        struct tevent_fd **fdes;
-       unsigned num_fds;
+       size_t num_fdes;
 
        /*
-        * Signal fd to wake the poll() thread
+        * use tevent_common_wakeup(ev) to wake the poll() thread
         */
-       int signal_fd;
-
-       /* information for exiting from the event loop */
-       int exit_code;
+       bool use_mt_mode;
 };
 
-static int poll_event_context_destructor(struct poll_event_context *poll_ev)
-{
-       struct tevent_fd *fd, *fn;
-
-       for (fd = poll_ev->fresh; fd; fd = fn) {
-               fn = fd->next;
-               fd->event_ctx = NULL;
-               DLIST_REMOVE(poll_ev->fresh, fd);
-       }
-
-       if (poll_ev->signal_fd == -1) {
-               /*
-                * Non-threaded, no signal pipe
-                */
-               return 0;
-       }
-
-       close(poll_ev->signal_fd);
-       poll_ev->signal_fd = -1;
-
-       if (poll_ev->num_fds == 0) {
-               return 0;
-       }
-       if (poll_ev->fds[0].fd != -1) {
-               close(poll_ev->fds[0].fd);
-               poll_ev->fds[0].fd = -1;
-       }
-       return 0;
-}
-
 /*
   create a poll_event_context structure.
 */
@@ -92,35 +67,26 @@ static int poll_event_context_init(struct tevent_context *ev)
 {
        struct poll_event_context *poll_ev;
 
+       /*
+        * we might be called during tevent_re_initialise()
+        * which means we need to free our old additional_data
+        * in order to detach old fd events from the
+        * poll_ev->fresh list
+        */
+       TALLOC_FREE(ev->additional_data);
+
        poll_ev = talloc_zero(ev, struct poll_event_context);
        if (poll_ev == NULL) {
                return -1;
        }
        poll_ev->ev = ev;
-       poll_ev->signal_fd = -1;
        ev->additional_data = poll_ev;
-       talloc_set_destructor(poll_ev, poll_event_context_destructor);
        return 0;
 }
 
-static bool set_nonblock(int fd)
-{
-       int val;
-
-       val = fcntl(fd, F_GETFL, 0);
-       if (val == -1) {
-               return false;
-       }
-       val |= O_NONBLOCK;
-
-       return (fcntl(fd, F_SETFL, val) != -1);
-}
-
 static int poll_event_context_init_mt(struct tevent_context *ev)
 {
        struct poll_event_context *poll_ev;
-       struct pollfd *pfd;
-       int fds[2];
        int ret;
 
        ret = poll_event_context_init(ev);
@@ -131,67 +97,22 @@ static int poll_event_context_init_mt(struct tevent_context *ev)
        poll_ev = talloc_get_type_abort(
                ev->additional_data, struct poll_event_context);
 
-       poll_ev->fds = talloc_zero(poll_ev, struct pollfd);
-       if (poll_ev->fds == NULL) {
-               return -1;
-       }
-
-       ret = pipe(fds);
-       if (ret == -1) {
-               return -1;
-       }
-
-       if (!set_nonblock(fds[0]) || !set_nonblock(fds[1])) {
-               close(fds[0]);
-               close(fds[1]);
-               return -1;
+       ret = tevent_common_wakeup_init(ev);
+       if (ret != 0) {
+               return ret;
        }
 
-       poll_ev->signal_fd = fds[1];
-
-       pfd = &poll_ev->fds[0];
-       pfd->fd = fds[0];
-       pfd->events = (POLLIN|POLLHUP);
-
-       poll_ev->num_fds = 1;
-
-       talloc_set_destructor(poll_ev, poll_event_context_destructor);
+       poll_ev->use_mt_mode = true;
 
        return 0;
 }
 
 static void poll_event_wake_pollthread(struct poll_event_context *poll_ev)
 {
-       char c;
-       ssize_t ret;
-
-       if (poll_ev->signal_fd == -1) {
-               return;
-       }
-       c = 0;
-       do {
-               ret = write(poll_ev->signal_fd, &c, sizeof(c));
-       } while ((ret == -1) && (errno == EINTR));
-}
-
-static void poll_event_drain_signal_fd(struct poll_event_context *poll_ev)
-{
-       char buf[16];
-       ssize_t ret;
-       int fd;
-
-       if (poll_ev->signal_fd == -1) {
-               return;
-       }
-
-       if (poll_ev->num_fds < 1) {
+       if (!poll_ev->use_mt_mode) {
                return;
        }
-       fd = poll_ev->fds[0].fd;
-
-       do {
-               ret = read(fd, buf, sizeof(buf));
-       } while (ret == sizeof(buf));
+       tevent_common_wakeup(poll_ev->ev);
 }
 
 /*
@@ -210,27 +131,17 @@ static int poll_event_fd_destructor(struct tevent_fd *fde)
        poll_ev = talloc_get_type_abort(
                ev->additional_data, struct poll_event_context);
 
+       if (del_idx == UINT64_MAX) {
+               goto done;
+       }
+
        poll_ev->fdes[del_idx] = NULL;
+       poll_ev->deleted = true;
        poll_event_wake_pollthread(poll_ev);
 done:
        return tevent_common_fd_destructor(fde);
 }
 
-static int poll_fresh_fde_destructor(struct tevent_fd *fde)
-{
-       struct tevent_context *ev = fde->event_ctx;
-       struct poll_event_context *poll_ev;
-
-       if (ev == NULL) {
-               return 0;
-       }
-       poll_ev = talloc_get_type_abort(
-               ev->additional_data, struct poll_event_context);
-
-       DLIST_REMOVE(poll_ev->fresh, fde);
-       return 0;
-}
-
 static void poll_event_schedule_immediate(struct tevent_immediate *im,
                                          struct tevent_context *ev,
                                          tevent_immediate_handler_t handler,
@@ -246,6 +157,56 @@ static void poll_event_schedule_immediate(struct tevent_immediate *im,
        poll_event_wake_pollthread(poll_ev);
 }
 
+/*
+  Private function called by "standard" backend fallback.
+  Note this only allows fallback to "poll" backend, not "poll-mt".
+*/
+_PRIVATE_ bool tevent_poll_event_add_fd_internal(struct tevent_context *ev,
+                                                struct tevent_fd *fde)
+{
+       struct poll_event_context *poll_ev = talloc_get_type_abort(
+               ev->additional_data, struct poll_event_context);
+       uint64_t fde_idx = UINT64_MAX;
+       size_t num_fdes;
+
+       fde->additional_flags = UINT64_MAX;
+       talloc_set_destructor(fde, poll_event_fd_destructor);
+
+       if (fde->flags == 0) {
+               /*
+                * Nothing more to do...
+                */
+               return true;
+       }
+
+       /*
+        * We need to add it to the end of the 'fdes' array.
+        */
+       num_fdes = poll_ev->num_fdes + 1;
+       if (num_fdes > talloc_array_length(poll_ev->fdes)) {
+               struct tevent_fd **tmp_fdes = NULL;
+               size_t array_length;
+
+               array_length = (num_fdes + 15) & ~15; /* round up to 16 */
+
+               tmp_fdes = talloc_realloc(poll_ev,
+                                         poll_ev->fdes,
+                                         struct tevent_fd *,
+                                         array_length);
+               if (tmp_fdes == NULL) {
+                       return false;
+               }
+               poll_ev->fdes = tmp_fdes;
+       }
+
+       fde_idx = poll_ev->num_fdes;
+       fde->additional_flags = fde_idx;
+       poll_ev->fdes[fde_idx] = fde;
+       poll_ev->num_fdes++;
+
+       return true;
+}
+
 /*
   add a fd based event
   return NULL on failure (memory allocation error)
@@ -261,28 +222,29 @@ static struct tevent_fd *poll_event_add_fd(struct tevent_context *ev,
        struct poll_event_context *poll_ev = talloc_get_type_abort(
                ev->additional_data, struct poll_event_context);
        struct tevent_fd *fde;
+       bool ok;
 
        if (fd < 0) {
                return NULL;
        }
 
-       fde = talloc(mem_ctx ? mem_ctx : ev, struct tevent_fd);
+       fde = tevent_common_add_fd(ev,
+                                  mem_ctx,
+                                  fd,
+                                  flags,
+                                  handler,
+                                  private_data,
+                                  handler_name,
+                                  location);
        if (fde == NULL) {
                return NULL;
        }
-       fde->event_ctx          = ev;
-       fde->fd                 = fd;
-       fde->flags              = flags;
-       fde->handler            = handler;
-       fde->close_fn           = NULL;
-       fde->private_data       = private_data;
-       fde->handler_name       = handler_name;
-       fde->location           = location;
-       fde->additional_flags   = UINT64_MAX;
-       fde->additional_data    = NULL;
-
-       DLIST_ADD(poll_ev->fresh, fde);
-       talloc_set_destructor(fde, poll_fresh_fde_destructor);
+
+       ok = tevent_poll_event_add_fd_internal(ev, fde);
+       if (!ok) {
+               TALLOC_FREE(fde);
+               return NULL;
+       }
        poll_event_wake_pollthread(poll_ev);
 
        /*
@@ -297,19 +259,51 @@ static struct tevent_fd *poll_event_add_fd(struct tevent_context *ev,
 */
 static void poll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
 {
-       struct poll_event_context *poll_ev = talloc_get_type_abort(
-               fde->event_ctx->additional_data, struct poll_event_context);
+       struct tevent_context *ev = fde->event_ctx;
+       struct poll_event_context *poll_ev;
        uint64_t idx = fde->additional_flags;
        uint16_t pollflags;
 
+       if (ev == NULL) {
+               return;
+       }
+
+       if (fde->flags == flags) {
+               return;
+       }
+
+       poll_ev = talloc_get_type_abort(
+               ev->additional_data, struct poll_event_context);
+
        fde->flags = flags;
 
        if (idx == UINT64_MAX) {
                /*
-                * poll_event_setup_fresh not yet called after this fde was
-                * added. We don't have to do anything to transfer the changed
-                * flags to the array passed to poll(2)
+                * We move it between the fresh and disabled lists.
+                */
+               tevent_poll_event_add_fd_internal(ev, fde);
+               poll_event_wake_pollthread(poll_ev);
+               return;
+       }
+
+       if (fde->flags == 0) {
+               /*
+                * We need to remove it from the array
+                * and move it to the disabled list.
                 */
+               poll_ev->fdes[idx] = NULL;
+               poll_ev->deleted = true;
+               fde->additional_flags = UINT64_MAX;
+               poll_event_wake_pollthread(poll_ev);
+               return;
+       }
+
+       if (idx >= poll_ev->num_fds) {
+               /*
+                * Not yet added to the
+                * poll_ev->fds array.
+                */
+               poll_event_wake_pollthread(poll_ev);
                return;
        }
 
@@ -326,55 +320,85 @@ static void poll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
        poll_event_wake_pollthread(poll_ev);
 }
 
-static bool poll_event_setup_fresh(struct tevent_context *ev,
+static bool poll_event_sync_arrays(struct tevent_context *ev,
                                   struct poll_event_context *poll_ev)
 {
-       struct tevent_fd *fde, *next;
-       unsigned num_fresh, num_fds;
+       size_t i;
+       size_t array_length;
 
-       if (poll_ev->fresh == NULL) {
-               return true;
+       if (poll_ev->deleted) {
+
+               for (i=0; i < poll_ev->num_fds;) {
+                       struct tevent_fd *fde = poll_ev->fdes[i];
+                       size_t ci;
+
+                       if (fde != NULL) {
+                               i++;
+                               continue;
+                       }
+
+                       /*
+                        * This fde was talloc_free()'ed. Delete it
+                        * from the arrays
+                        */
+                       poll_ev->num_fds -= 1;
+                       ci = poll_ev->num_fds;
+                       if (ci > i) {
+                               poll_ev->fds[i] = poll_ev->fds[ci];
+                               poll_ev->fdes[i] = poll_ev->fdes[ci];
+                               if (poll_ev->fdes[i] != NULL) {
+                                       poll_ev->fdes[i]->additional_flags = i;
+                               }
+                       }
+                       poll_ev->fds[ci] = (struct pollfd) { .fd = -1 };
+                       poll_ev->fdes[ci] = NULL;
+               }
+               poll_ev->deleted = false;
        }
 
-       num_fresh = 0;
-       for (fde = poll_ev->fresh; fde; fde = fde->next) {
-               num_fresh += 1;
+       if (poll_ev->num_fds == poll_ev->num_fdes) {
+               return true;
        }
-       num_fds = poll_ev->num_fds + num_fresh;
 
        /*
-        * We check the length of fdes here. It is the last one
-        * enlarged, so if the realloc for poll_fd->fdes fails,
-        * poll_fd->fds will have at least the size of poll_fd->fdes
+        * Recheck the size of both arrays and make sure
+        * poll_fd->fds array has at least the size of the
+        * in use poll_ev->fdes array.
         */
+       if (poll_ev->num_fdes > talloc_array_length(poll_ev->fds)) {
+               struct pollfd *tmp_fds = NULL;
 
-       if (num_fds >= talloc_array_length(poll_ev->fdes)) {
-               struct pollfd *tmp_fds;
-               struct tevent_fd **tmp_fdes;
-               unsigned array_length;
-
-               array_length = (num_fds + 15) & ~15; /* round up to 16 */
+               /*
+                * Make sure both allocated the same length.
+                */
+               array_length = talloc_array_length(poll_ev->fdes);
 
-               tmp_fds = talloc_realloc(
-                       poll_ev, poll_ev->fds, struct pollfd, array_length);
+               tmp_fds = talloc_realloc(poll_ev,
+                                        poll_ev->fds,
+                                        struct pollfd,
+                                        array_length);
                if (tmp_fds == NULL) {
                        return false;
                }
                poll_ev->fds = tmp_fds;
-
-               tmp_fdes = talloc_realloc(
-                       poll_ev, poll_ev->fdes, struct tevent_fd *,
-                       array_length);
-               if (tmp_fdes == NULL) {
-                       return false;
-               }
-               poll_ev->fdes = tmp_fdes;
        }
 
-       for (fde = poll_ev->fresh; fde; fde = next) {
-               struct pollfd *pfd;
+       /*
+        * Now setup the new elements.
+        */
+       for (i = poll_ev->num_fds; i < poll_ev->num_fdes; i++) {
+               struct tevent_fd *fde = poll_ev->fdes[i];
+               struct pollfd *pfd = &poll_ev->fds[poll_ev->num_fds];
+
+               if (fde == NULL) {
+                       continue;
+               }
 
-               pfd = &poll_ev->fds[poll_ev->num_fds];
+               if (i > poll_ev->num_fds) {
+                       poll_ev->fdes[poll_ev->num_fds] = fde;
+                       fde->additional_flags = poll_ev->num_fds;
+                       poll_ev->fdes[i] = NULL;
+               }
 
                pfd->fd = fde->fd;
                pfd->events = 0;
@@ -387,17 +411,41 @@ static bool poll_event_setup_fresh(struct tevent_context *ev,
                        pfd->events |= (POLLOUT);
                }
 
-               fde->additional_flags = poll_ev->num_fds;
-               poll_ev->fdes[poll_ev->num_fds] = fde;
+               poll_ev->num_fds += 1;
+       }
+       /* Both are in sync again */
+       poll_ev->num_fdes = poll_ev->num_fds;
 
-               next = fde->next;
-               DLIST_REMOVE(poll_ev->fresh, fde);
-               DLIST_ADD(ev->fd_events, fde);
+       /*
+        * Check if we should shrink the arrays
+        * But keep at least 16 elements.
+        */
 
-               talloc_set_destructor(fde, poll_event_fd_destructor);
+       array_length = (poll_ev->num_fds + 15) & ~15; /* round up to 16 */
+       array_length = MAX(array_length, 16);
+       if (array_length < talloc_array_length(poll_ev->fdes)) {
+               struct tevent_fd **tmp_fdes = NULL;
+               struct pollfd *tmp_fds = NULL;
 
-               poll_ev->num_fds += 1;
+               tmp_fdes = talloc_realloc(poll_ev,
+                                         poll_ev->fdes,
+                                         struct tevent_fd *,
+                                         array_length);
+               if (tmp_fdes == NULL) {
+                       return false;
+               }
+               poll_ev->fdes = tmp_fdes;
+
+               tmp_fds = talloc_realloc(poll_ev,
+                                        poll_ev->fds,
+                                        struct pollfd,
+                                        array_length);
+               if (tmp_fds == NULL) {
+                       return false;
+               }
+               poll_ev->fds = tmp_fds;
        }
+
        return true;
 }
 
@@ -411,8 +459,11 @@ static int poll_event_loop_poll(struct tevent_context *ev,
                ev->additional_data, struct poll_event_context);
        int pollrtn;
        int timeout = -1;
-       unsigned first_fd;
+       int poll_errno;
+       struct tevent_fd *fde = NULL;
+       struct tevent_fd *next = NULL;
        unsigned i;
+       bool ok;
 
        if (ev->signal_events && tevent_common_check_signal(ev)) {
                return 0;
@@ -423,17 +474,17 @@ static int poll_event_loop_poll(struct tevent_context *ev,
                timeout += (tvalp->tv_usec + 999) / 1000;
        }
 
-       poll_event_drain_signal_fd(poll_ev);
-
-       if (!poll_event_setup_fresh(ev, poll_ev)) {
+       ok = poll_event_sync_arrays(ev, poll_ev);
+       if (!ok) {
                return -1;
        }
 
        tevent_trace_point_callback(poll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
        pollrtn = poll(poll_ev->fds, poll_ev->num_fds, timeout);
+       poll_errno = errno;
        tevent_trace_point_callback(poll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
 
-       if (pollrtn == -1 && errno == EINTR && ev->signal_events) {
+       if (pollrtn == -1 && poll_errno == EINTR && ev->signal_events) {
                tevent_common_check_signal(ev);
                return 0;
        }
@@ -451,34 +502,43 @@ static int poll_event_loop_poll(struct tevent_context *ev,
                return 0;
        }
 
-       first_fd = (poll_ev->signal_fd != -1) ? 1 : 0;
-
        /* at least one file descriptor is ready - check
           which ones and call the handler, being careful to allow
           the handler to remove itself when called */
 
-       for (i=first_fd; i<poll_ev->num_fds; i++) {
+       for (fde = ev->fd_events; fde; fde = next) {
+               uint64_t idx = fde->additional_flags;
                struct pollfd *pfd;
-               struct tevent_fd *fde;
                uint16_t flags = 0;
 
-               fde = poll_ev->fdes[i];
-               if (fde == NULL) {
+               next = fde->next;
+
+               if (idx == UINT64_MAX) {
+                       continue;
+               }
+
+               pfd = &poll_ev->fds[idx];
+
+               if (pfd->revents & POLLNVAL) {
                        /*
-                        * This fde was talloc_free()'ed. Delete it
-                        * from the arrays
+                        * the socket is dead! this should never
+                        * happen as the socket should have first been
+                        * made readable and that should have removed
+                        * the event, so this must be a bug.
+                        *
+                        * We ignore it here to match the epoll
+                        * behavior.
                         */
-                       poll_ev->num_fds -= 1;
-                       poll_ev->fds[i] = poll_ev->fds[poll_ev->num_fds];
-                       poll_ev->fdes[i] = poll_ev->fdes[poll_ev->num_fds];
-                       if (poll_ev->fdes[i] != NULL) {
-                               poll_ev->fdes[i]->additional_flags = i;
-                       }
+                       tevent_debug(ev, TEVENT_DEBUG_ERROR,
+                                    "POLLNVAL on fde[%p] fd[%d] - disabling\n",
+                                    fde, pfd->fd);
+                       poll_ev->fdes[idx] = NULL;
+                       poll_ev->deleted = true;
+                       DLIST_REMOVE(ev->fd_events, fde);
+                       fde->event_ctx = NULL;
                        continue;
                }
 
-               pfd = &poll_ev->fds[i];
-
                if (pfd->revents & (POLLHUP|POLLERR)) {
                        /* If we only wait for TEVENT_FD_WRITE, we
                           should not tell the event handler about it,
@@ -497,9 +557,38 @@ static int poll_event_loop_poll(struct tevent_context *ev,
                if (pfd->revents & POLLOUT) {
                        flags |= TEVENT_FD_WRITE;
                }
+               /*
+                * Note that fde->flags could be changed when using
+                * the poll_mt backend together with threads,
+                * that why we need to check pfd->revents and fde->flags
+                */
+               flags &= fde->flags;
                if (flags != 0) {
+                       DLIST_DEMOTE(ev->fd_events, fde);
                        fde->handler(ev, fde, flags, fde->private_data);
-                       break;
+                       return 0;
+               }
+       }
+
+       for (i = 0; i < poll_ev->num_fds; i++) {
+               if (poll_ev->fds[i].revents & POLLNVAL) {
+                       /*
+                        * the socket is dead! this should never
+                        * happen as the socket should have first been
+                        * made readable and that should have removed
+                        * the event, so this must be a bug or
+                        * a race in the poll_mt usage.
+                        */
+                       fde = poll_ev->fdes[i];
+                       tevent_debug(ev, TEVENT_DEBUG_WARNING,
+                                    "POLLNVAL on dangling fd[%d] fde[%p] - disabling\n",
+                                    poll_ev->fds[i].fd, fde);
+                       poll_ev->fdes[i] = NULL;
+                       poll_ev->deleted = true;
+                       if (fde != NULL) {
+                               DLIST_REMOVE(ev->fd_events, fde);
+                               fde->event_ctx = NULL;
+                       }
                }
        }
 
@@ -519,6 +608,10 @@ static int poll_event_loop_once(struct tevent_context *ev,
                return 0;
        }
 
+       if (ev->threaded_contexts != NULL) {
+               tevent_common_threaded_activate_immediate(ev);
+       }
+
        if (ev->immediate_events &&
            tevent_common_loop_immediate(ev)) {
                return 0;
@@ -538,7 +631,7 @@ static const struct tevent_ops poll_event_ops = {
        .set_fd_close_fn        = tevent_common_fd_set_close_fn,
        .get_fd_flags           = tevent_common_fd_get_flags,
        .set_fd_flags           = poll_event_set_fd_flags,
-       .add_timer              = tevent_common_add_timer,
+       .add_timer              = tevent_common_add_timer_v2,
        .schedule_immediate     = tevent_common_schedule_immediate,
        .add_signal             = tevent_common_add_signal,
        .loop_once              = poll_event_loop_once,
@@ -556,7 +649,7 @@ static const struct tevent_ops poll_event_mt_ops = {
        .set_fd_close_fn        = tevent_common_fd_set_close_fn,
        .get_fd_flags           = tevent_common_fd_get_flags,
        .set_fd_flags           = poll_event_set_fd_flags,
-       .add_timer              = tevent_common_add_timer,
+       .add_timer              = tevent_common_add_timer_v2,
        .schedule_immediate     = poll_event_schedule_immediate,
        .add_signal             = tevent_common_add_signal,
        .loop_once              = poll_event_loop_once,