tsocket/bsd: fix bug #7115 FreeBSD includes the UDP header in FIONREAD
[ira/wip.git] / lib / tsocket / tsocket_bsd.c
index 52cc5cc1cfba5e7b15a51f7178e270e9d92aee47..9027bc97114f0b10286ede18b416f2ead919bc96 100644 (file)
@@ -3,7 +3,7 @@
 
    Copyright (C) Stefan Metzmacher 2009
 
-     ** NOTE! The following LGPL license applies to the tevent
+     ** NOTE! The following LGPL license applies to the tsocket
      ** library. This does NOT imply that all of Samba is released
      ** under the LGPL
 
@@ -149,40 +149,42 @@ static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
 
 static ssize_t tsocket_bsd_pending(int fd)
 {
-       int ret;
+       int ret, error;
        int value = 0;
+       socklen_t len;
 
        ret = ioctl(fd, FIONREAD, &value);
        if (ret == -1) {
                return ret;
        }
 
-       if (ret == 0) {
-               if (value == 0) {
-                       int error=0;
-                       socklen_t len = sizeof(error);
-                       /*
-                        * if no data is available check if the socket
-                        * is in error state. For dgram sockets
-                        * it's the way to return ICMP error messages
-                        * of connected sockets to the caller.
-                        */
-                       ret = getsockopt(fd, SOL_SOCKET, SO_ERROR,
-                                        &error, &len);
-                       if (ret == -1) {
-                               return ret;
-                       }
-                       if (error != 0) {
-                               errno = error;
-                               return -1;
-                       }
-               }
+       if (ret != 0) {
+               /* this should not be reached */
+               errno = EIO;
+               return -1;
+       }
+
+       if (value != 0) {
                return value;
        }
 
-       /* this should not be reached */
-       errno = EIO;
-       return -1;
+       error = 0;
+       len = sizeof(error);
+
+       /*
+        * if no data is available check if the socket is in error state. For
+        * dgram sockets it's the way to return ICMP error messages of
+        * connected sockets to the caller.
+        */
+       ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
+       if (ret == -1) {
+               return ret;
+       }
+       if (error != 0) {
+               errno = error;
+               return -1;
+       }
+       return 0;
 }
 
 static const struct tsocket_address_ops tsocket_address_bsd_ops;
@@ -199,34 +201,40 @@ struct tsocket_address_bsd {
        } u;
 };
 
-static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
-                                             struct sockaddr *sa,
-                                             socklen_t sa_len,
-                                             struct tsocket_address **_addr,
-                                             const char *location)
+int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
+                                      struct sockaddr *sa,
+                                      size_t sa_socklen,
+                                      struct tsocket_address **_addr,
+                                      const char *location)
 {
        struct tsocket_address *addr;
        struct tsocket_address_bsd *bsda;
 
+       if (sa_socklen < sizeof(sa->sa_family)) {
+               errno = EINVAL;
+               return -1;
+       }
+
        switch (sa->sa_family) {
        case AF_UNIX:
-               if (sa_len < sizeof(struct sockaddr_un)) {
-                       errno = EINVAL;
-                       return -1;
+               if (sa_socklen > sizeof(struct sockaddr_un)) {
+                       sa_socklen = sizeof(struct sockaddr_un);
                }
                break;
        case AF_INET:
-               if (sa_len < sizeof(struct sockaddr_in)) {
+               if (sa_socklen < sizeof(struct sockaddr_in)) {
                        errno = EINVAL;
                        return -1;
                }
+               sa_socklen = sizeof(struct sockaddr_in);
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
-               if (sa_len < sizeof(struct sockaddr_in6)) {
+               if (sa_socklen < sizeof(struct sockaddr_in6)) {
                        errno = EINVAL;
                        return -1;
                }
+               sa_socklen = sizeof(struct sockaddr_in6);
                break;
 #endif
        default:
@@ -234,7 +242,7 @@ static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
                return -1;
        }
 
-       if (sa_len > sizeof(struct sockaddr_storage)) {
+       if (sa_socklen > sizeof(struct sockaddr_storage)) {
                errno = EINVAL;
                return -1;
        }
@@ -251,12 +259,56 @@ static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
 
        ZERO_STRUCTP(bsda);
 
-       memcpy(&bsda->u.ss, sa, sa_len);
+       memcpy(&bsda->u.ss, sa, sa_socklen);
 
        *_addr = addr;
        return 0;
 }
 
+ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
+                                    struct sockaddr *sa,
+                                    size_t sa_socklen)
+{
+       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
+                                          struct tsocket_address_bsd);
+       ssize_t rlen = 0;
+
+       if (!bsda) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       switch (bsda->u.sa.sa_family) {
+       case AF_UNIX:
+               rlen = sizeof(struct sockaddr_un);
+               break;
+       case AF_INET:
+               rlen = sizeof(struct sockaddr_in);
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               rlen = sizeof(struct sockaddr_in6);
+               break;
+#endif
+       default:
+               errno = EAFNOSUPPORT;
+               return -1;
+       }
+
+       if (sa_socklen < rlen) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (sa_socklen > sizeof(struct sockaddr_storage)) {
+               memset(sa, 0, sa_socklen);
+               sa_socklen = sizeof(struct sockaddr_storage);
+       }
+
+       memcpy(sa, &bsda->u.ss, sa_socklen);
+       return rlen;
+}
+
 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
                                       const char *fam,
                                       const char *addr,
@@ -442,9 +494,14 @@ int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
                path = "";
        }
 
+       if (strlen(path) > sizeof(un.sun_path)-1) {
+               errno = ENAMETOOLONG;
+               return -1;
+       }
+
        ZERO_STRUCT(un);
        un.sun_family = AF_UNIX;
-       strncpy(un.sun_path, path, sizeof(un.sun_path));
+       strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
 
        ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
                                                 (struct sockaddr *)p,
@@ -557,9 +614,6 @@ struct tdgram_bsd {
        void (*readable_handler)(void *private_data);
        void *writeable_private;
        void (*writeable_handler)(void *private_data);
-
-       struct tevent_req *read_req;
-       struct tevent_req *write_req;
 };
 
 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
@@ -613,12 +667,15 @@ static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
                TALLOC_FREE(bsds->fde);
        }
 
-       if (bsds->fde == NULL) {
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
                bsds->fde = tevent_add_fd(ev, bsds,
                                          bsds->fd, TEVENT_FD_READ,
                                          tdgram_bsd_fde_handler,
                                          bsds);
                if (!bsds->fde) {
+                       errno = ENOMEM;
                        return -1;
                }
 
@@ -664,12 +721,15 @@ static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
                TALLOC_FREE(bsds->fde);
        }
 
-       if (bsds->fde == NULL) {
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
                bsds->fde = tevent_add_fd(ev, bsds,
                                          bsds->fd, TEVENT_FD_WRITE,
                                          tdgram_bsd_fde_handler,
                                          bsds);
                if (!bsds->fde) {
+                       errno = ENOMEM;
                        return -1;
                }
 
@@ -698,7 +758,6 @@ static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *stat
        struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
                                  struct tdgram_bsd);
 
-       bsds->read_req = NULL;
        tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
 
        return 0;
@@ -726,12 +785,6 @@ static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
        state->len      = 0;
        state->src      = NULL;
 
-       if (bsds->read_req) {
-               tevent_req_error(req, EBUSY);
-               goto post;
-       }
-       bsds->read_req = req;
-
        talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
 
        if (bsds->fd == -1) {
@@ -775,7 +828,7 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
        struct tsocket_address_bsd *bsda;
        ssize_t ret;
        struct sockaddr *sa = NULL;
-       socklen_t sa_len = 0;
+       socklen_t sa_socklen = 0;
        int err;
        bool retry;
 
@@ -811,16 +864,16 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
        ZERO_STRUCTP(bsda);
 
        sa = &bsda->u.sa;
-       sa_len = sizeof(bsda->u.ss);
+       sa_socklen = sizeof(bsda->u.ss);
        /*
         * for unix sockets we can't use the size of sockaddr_storage
         * we would get EINVAL
         */
        if (bsda->u.sa.sa_family == AF_UNIX) {
-               sa_len = sizeof(bsda->u.un);
+               sa_socklen = sizeof(bsda->u.un);
        }
 
-       ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_len);
+       ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
        err = tsocket_bsd_error_from_errno(ret, errno, &retry);
        if (retry) {
                /* retry later */
@@ -830,10 +883,12 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
                return;
        }
 
-       if (ret != state->len) {
-               tevent_req_error(req, EIO);
-               return;
-       }
+       /*
+        * some systems too much bytes in tsocket_bsd_pending()
+        * the return value includes some IP/UDP header bytes
+        */
+       state->len = ret;
+       talloc_realloc(state, state->buf, uint8_t, ret);
 
        tevent_req_done(req);
 }
@@ -876,8 +931,8 @@ static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
        struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
                                  struct tdgram_bsd);
 
-       bsds->write_req = NULL;
        tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
+
        return 0;
 }
 
@@ -907,12 +962,6 @@ static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
        state->dst      = dst;
        state->ret      = -1;
 
-       if (bsds->write_req) {
-               tevent_req_error(req, EBUSY);
-               goto post;
-       }
-       bsds->write_req = req;
-
        talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
 
        if (bsds->fd == -1) {
@@ -954,7 +1003,7 @@ static void tdgram_bsd_sendto_handler(void *private_data)
        struct tdgram_context *dgram = state->dgram;
        struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
        struct sockaddr *sa = NULL;
-       socklen_t sa_len = 0;
+       socklen_t sa_socklen = 0;
        ssize_t ret;
        int err;
        bool retry;
@@ -965,17 +1014,17 @@ static void tdgram_bsd_sendto_handler(void *private_data)
                        struct tsocket_address_bsd);
 
                sa = &bsda->u.sa;
-               sa_len = sizeof(bsda->u.ss);
+               sa_socklen = sizeof(bsda->u.ss);
                /*
                 * for unix sockets we can't use the size of sockaddr_storage
                 * we would get EINVAL
                 */
                if (bsda->u.sa.sa_family == AF_UNIX) {
-                       sa_len = sizeof(bsda->u.un);
+                       sa_socklen = sizeof(bsda->u.un);
                }
        }
 
-       ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_len);
+       ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
        err = tsocket_bsd_error_from_errno(ret, errno, &retry);
        if (retry) {
                /* retry later */
@@ -1026,11 +1075,6 @@ static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
                return NULL;
        }
 
-       if (bsds->read_req || bsds->write_req) {
-               tevent_req_error(req, EBUSY);
-               goto post;
-       }
-
        if (bsds->fd == -1) {
                tevent_req_error(req, ENOTCONN);
                goto post;
@@ -1100,7 +1144,10 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        int ret;
        bool do_bind = false;
        bool do_reuseaddr = false;
-       socklen_t sa_len = sizeof(lbsda->u.ss);
+       bool do_ipv6only = false;
+       bool is_inet = false;
+       int sa_fam = lbsda->u.sa.sa_family;
+       socklen_t sa_socklen = sizeof(lbsda->u.ss);
 
        if (remote) {
                rbsda = talloc_get_type_abort(remote->private_data,
@@ -1121,16 +1168,18 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                 * for unix sockets we can't use the size of sockaddr_storage
                 * we would get EINVAL
                 */
-               sa_len = sizeof(lbsda->u.un);
+               sa_socklen = sizeof(lbsda->u.un);
                break;
        case AF_INET:
                if (lbsda->u.in.sin_port != 0) {
                        do_reuseaddr = true;
                        do_bind = true;
                }
-               if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
+               if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
                        do_bind = true;
                }
+               is_inet = true;
+               sa_socklen = sizeof(rbsda->u.in);
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
@@ -1143,6 +1192,9 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                           sizeof(in6addr_any)) != 0) {
                        do_bind = true;
                }
+               is_inet = true;
+               sa_socklen = sizeof(rbsda->u.in6);
+               do_ipv6only = true;
                break;
 #endif
        default:
@@ -1150,7 +1202,23 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                return -1;
        }
 
-       fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
+       if (!do_bind && is_inet && rbsda) {
+               sa_fam = rbsda->u.sa.sa_family;
+               switch (sa_fam) {
+               case AF_INET:
+                       sa_socklen = sizeof(rbsda->u.in);
+                       do_ipv6only = false;
+                       break;
+#ifdef HAVE_IPV6
+               case AF_INET6:
+                       sa_socklen = sizeof(rbsda->u.in6);
+                       do_ipv6only = true;
+                       break;
+#endif
+               }
+       }
+
+       fd = socket(sa_fam, SOCK_DGRAM, 0);
        if (fd < 0) {
                return fd;
        }
@@ -1175,6 +1243,21 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        bsds->fd = fd;
        talloc_set_destructor(bsds, tdgram_bsd_destructor);
 
+#ifdef HAVE_IPV6
+       if (do_ipv6only) {
+               int val = 1;
+
+               ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return ret;
+               }
+       }
+#endif
+
        if (broadcast) {
                int val = 1;
 
@@ -1202,7 +1285,7 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        }
 
        if (do_bind) {
-               ret = bind(fd, &lbsda->u.sa, sa_len);
+               ret = bind(fd, &lbsda->u.sa, sa_socklen);
                if (ret == -1) {
                        int saved_errno = errno;
                        talloc_free(dgram);
@@ -1212,7 +1295,13 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        }
 
        if (rbsda) {
-               ret = connect(fd, &rbsda->u.sa, sa_len);
+               if (rbsda->u.sa.sa_family != sa_fam) {
+                       talloc_free(dgram);
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               ret = connect(fd, &rbsda->u.sa, sa_socklen);
                if (ret == -1) {
                        int saved_errno = errno;
                        talloc_free(dgram);
@@ -1254,11 +1343,11 @@ int _tdgram_inet_udp_socket(const struct tsocket_address *local,
        return ret;
 }
 
-int _tdgram_unix_dgram_socket(const struct tsocket_address *local,
-                             const struct tsocket_address *remote,
-                             TALLOC_CTX *mem_ctx,
-                             struct tdgram_context **dgram,
-                             const char *location)
+int _tdgram_unix_socket(const struct tsocket_address *local,
+                       const struct tsocket_address *remote,
+                       TALLOC_CTX *mem_ctx,
+                       struct tdgram_context **dgram,
+                       const char *location)
 {
        struct tsocket_address_bsd *lbsda =
                talloc_get_type_abort(local->private_data,
@@ -1279,3 +1368,984 @@ int _tdgram_unix_dgram_socket(const struct tsocket_address *local,
        return ret;
 }
 
+struct tstream_bsd {
+       int fd;
+
+       void *event_ptr;
+       struct tevent_fd *fde;
+
+       void *readable_private;
+       void (*readable_handler)(void *private_data);
+       void *writeable_private;
+       void (*writeable_handler)(void *private_data);
+};
+
+static void tstream_bsd_fde_handler(struct tevent_context *ev,
+                                   struct tevent_fd *fde,
+                                   uint16_t flags,
+                                   void *private_data)
+{
+       struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
+                                  struct tstream_bsd);
+
+       if (flags & TEVENT_FD_WRITE) {
+               bsds->writeable_handler(bsds->writeable_private);
+               return;
+       }
+       if (flags & TEVENT_FD_READ) {
+               if (!bsds->readable_handler) {
+                       if (bsds->writeable_handler) {
+                               bsds->writeable_handler(bsds->writeable_private);
+                               return;
+                       }
+                       TEVENT_FD_NOT_READABLE(bsds->fde);
+                       return;
+               }
+               bsds->readable_handler(bsds->readable_private);
+               return;
+       }
+}
+
+static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
+                                           struct tevent_context *ev,
+                                           void (*handler)(void *private_data),
+                                           void *private_data)
+{
+       if (ev == NULL) {
+               if (handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               if (!bsds->readable_handler) {
+                       return 0;
+               }
+               bsds->readable_handler = NULL;
+               bsds->readable_private = NULL;
+
+               return 0;
+       }
+
+       /* read and write must use the same tevent_context */
+       if (bsds->event_ptr != ev) {
+               if (bsds->readable_handler || bsds->writeable_handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               bsds->event_ptr = NULL;
+               TALLOC_FREE(bsds->fde);
+       }
+
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
+               bsds->fde = tevent_add_fd(ev, bsds,
+                                         bsds->fd, TEVENT_FD_READ,
+                                         tstream_bsd_fde_handler,
+                                         bsds);
+               if (!bsds->fde) {
+                       errno = ENOMEM;
+                       return -1;
+               }
+
+               /* cache the event context we're running on */
+               bsds->event_ptr = ev;
+       } else if (!bsds->readable_handler) {
+               TEVENT_FD_READABLE(bsds->fde);
+       }
+
+       bsds->readable_handler = handler;
+       bsds->readable_private = private_data;
+
+       return 0;
+}
+
+static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
+                                            struct tevent_context *ev,
+                                            void (*handler)(void *private_data),
+                                            void *private_data)
+{
+       if (ev == NULL) {
+               if (handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               if (!bsds->writeable_handler) {
+                       return 0;
+               }
+               bsds->writeable_handler = NULL;
+               bsds->writeable_private = NULL;
+               TEVENT_FD_NOT_WRITEABLE(bsds->fde);
+
+               return 0;
+       }
+
+       /* read and write must use the same tevent_context */
+       if (bsds->event_ptr != ev) {
+               if (bsds->readable_handler || bsds->writeable_handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               bsds->event_ptr = NULL;
+               TALLOC_FREE(bsds->fde);
+       }
+
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
+               bsds->fde = tevent_add_fd(ev, bsds,
+                                         bsds->fd,
+                                         TEVENT_FD_READ | TEVENT_FD_WRITE,
+                                         tstream_bsd_fde_handler,
+                                         bsds);
+               if (!bsds->fde) {
+                       errno = ENOMEM;
+                       return -1;
+               }
+
+               /* cache the event context we're running on */
+               bsds->event_ptr = ev;
+       } else if (!bsds->writeable_handler) {
+               uint16_t flags = tevent_fd_get_flags(bsds->fde);
+               flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
+               tevent_fd_set_flags(bsds->fde, flags);
+       }
+
+       bsds->writeable_handler = handler;
+       bsds->writeable_private = private_data;
+
+       return 0;
+}
+
+static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
+{
+       struct tstream_bsd *bsds = tstream_context_data(stream,
+                                  struct tstream_bsd);
+       ssize_t ret;
+
+       if (bsds->fd == -1) {
+               errno = ENOTCONN;
+               return -1;
+       }
+
+       ret = tsocket_bsd_pending(bsds->fd);
+
+       return ret;
+}
+
+struct tstream_bsd_readv_state {
+       struct tstream_context *stream;
+
+       struct iovec *vector;
+       size_t count;
+
+       int ret;
+};
+
+static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
+{
+       struct tstream_bsd *bsds = tstream_context_data(state->stream,
+                                  struct tstream_bsd);
+
+       tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
+
+       return 0;
+}
+
+static void tstream_bsd_readv_handler(void *private_data);
+
+static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       struct tstream_context *stream,
+                                       struct iovec *vector,
+                                       size_t count)
+{
+       struct tevent_req *req;
+       struct tstream_bsd_readv_state *state;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       int ret;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_readv_state);
+       if (!req) {
+               return NULL;
+       }
+
+       state->stream   = stream;
+       /* we make a copy of the vector so that we can modify it */
+       state->vector   = talloc_array(state, struct iovec, count);
+       if (tevent_req_nomem(state->vector, req)) {
+               goto post;
+       }
+       memcpy(state->vector, vector, sizeof(struct iovec)*count);
+       state->count    = count;
+       state->ret      = 0;
+
+       talloc_set_destructor(state, tstream_bsd_readv_destructor);
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
+
+       /*
+        * this is a fast path, not waiting for the
+        * socket to become explicit readable gains
+        * about 10%-20% performance in benchmark tests.
+        */
+       tstream_bsd_readv_handler(req);
+       if (!tevent_req_is_in_progress(req)) {
+               goto post;
+       }
+
+       ret = tstream_bsd_set_readable_handler(bsds, ev,
+                                             tstream_bsd_readv_handler,
+                                             req);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static void tstream_bsd_readv_handler(void *private_data)
+{
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tstream_bsd_readv_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_readv_state);
+       struct tstream_context *stream = state->stream;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       int ret;
+       int err;
+       bool retry;
+
+       ret = readv(bsds->fd, state->vector, state->count);
+       if (ret == 0) {
+               /* propagate end of file */
+               tevent_req_error(req, EPIPE);
+               return;
+       }
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
+
+       state->ret += ret;
+
+       while (ret > 0) {
+               if (ret < state->vector[0].iov_len) {
+                       uint8_t *base;
+                       base = (uint8_t *)state->vector[0].iov_base;
+                       base += ret;
+                       state->vector[0].iov_base = base;
+                       state->vector[0].iov_len -= ret;
+                       break;
+               }
+               ret -= state->vector[0].iov_len;
+               state->vector += 1;
+               state->count -= 1;
+       }
+
+       /*
+        * there're maybe some empty vectors at the end
+        * which we need to skip, otherwise we would get
+        * ret == 0 from the readv() call and return EPIPE
+        */
+       while (state->count > 0) {
+               if (state->vector[0].iov_len > 0) {
+                       break;
+               }
+               state->vector += 1;
+               state->count -= 1;
+       }
+
+       if (state->count > 0) {
+               /* we have more to read */
+               return;
+       }
+
+       tevent_req_done(req);
+}
+
+static int tstream_bsd_readv_recv(struct tevent_req *req,
+                                 int *perrno)
+{
+       struct tstream_bsd_readv_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_readv_state);
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = state->ret;
+       }
+
+       tevent_req_received(req);
+       return ret;
+}
+
+struct tstream_bsd_writev_state {
+       struct tstream_context *stream;
+
+       struct iovec *vector;
+       size_t count;
+
+       int ret;
+};
+
+static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
+{
+       struct tstream_bsd *bsds = tstream_context_data(state->stream,
+                                 struct tstream_bsd);
+
+       tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
+
+       return 0;
+}
+
+static void tstream_bsd_writev_handler(void *private_data);
+
+static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
+                                                struct tevent_context *ev,
+                                                struct tstream_context *stream,
+                                                const struct iovec *vector,
+                                                size_t count)
+{
+       struct tevent_req *req;
+       struct tstream_bsd_writev_state *state;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       int ret;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_writev_state);
+       if (!req) {
+               return NULL;
+       }
+
+       state->stream   = stream;
+       /* we make a copy of the vector so that we can modify it */
+       state->vector   = talloc_array(state, struct iovec, count);
+       if (tevent_req_nomem(state->vector, req)) {
+               goto post;
+       }
+       memcpy(state->vector, vector, sizeof(struct iovec)*count);
+       state->count    = count;
+       state->ret      = 0;
+
+       talloc_set_destructor(state, tstream_bsd_writev_destructor);
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
+
+       /*
+        * this is a fast path, not waiting for the
+        * socket to become explicit writeable gains
+        * about 10%-20% performance in benchmark tests.
+        */
+       tstream_bsd_writev_handler(req);
+       if (!tevent_req_is_in_progress(req)) {
+               goto post;
+       }
+
+       ret = tstream_bsd_set_writeable_handler(bsds, ev,
+                                              tstream_bsd_writev_handler,
+                                              req);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static void tstream_bsd_writev_handler(void *private_data)
+{
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tstream_bsd_writev_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_writev_state);
+       struct tstream_context *stream = state->stream;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       ssize_t ret;
+       int err;
+       bool retry;
+
+       ret = writev(bsds->fd, state->vector, state->count);
+       if (ret == 0) {
+               /* propagate end of file */
+               tevent_req_error(req, EPIPE);
+               return;
+       }
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
+
+       state->ret += ret;
+
+       while (ret > 0) {
+               if (ret < state->vector[0].iov_len) {
+                       uint8_t *base;
+                       base = (uint8_t *)state->vector[0].iov_base;
+                       base += ret;
+                       state->vector[0].iov_base = base;
+                       state->vector[0].iov_len -= ret;
+                       break;
+               }
+               ret -= state->vector[0].iov_len;
+               state->vector += 1;
+               state->count -= 1;
+       }
+
+       /*
+        * there're maybe some empty vectors at the end
+        * which we need to skip, otherwise we would get
+        * ret == 0 from the writev() call and return EPIPE
+        */
+       while (state->count > 0) {
+               if (state->vector[0].iov_len > 0) {
+                       break;
+               }
+               state->vector += 1;
+               state->count -= 1;
+       }
+
+       if (state->count > 0) {
+               /* we have more to read */
+               return;
+       }
+
+       tevent_req_done(req);
+}
+
+static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
+{
+       struct tstream_bsd_writev_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_writev_state);
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = state->ret;
+       }
+
+       tevent_req_received(req);
+       return ret;
+}
+
+struct tstream_bsd_disconnect_state {
+       void *__dummy;
+};
+
+static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
+                                                    struct tevent_context *ev,
+                                                    struct tstream_context *stream)
+{
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       struct tevent_req *req;
+       struct tstream_bsd_disconnect_state *state;
+       int ret;
+       int err;
+       bool dummy;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_disconnect_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
+
+       ret = close(bsds->fd);
+       bsds->fd = -1;
+       err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
+       if (tevent_req_error(req, err)) {
+               goto post;
+       }
+
+       tevent_req_done(req);
+post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static int tstream_bsd_disconnect_recv(struct tevent_req *req,
+                                     int *perrno)
+{
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+
+       tevent_req_received(req);
+       return ret;
+}
+
+static const struct tstream_context_ops tstream_bsd_ops = {
+       .name                   = "bsd",
+
+       .pending_bytes          = tstream_bsd_pending_bytes,
+
+       .readv_send             = tstream_bsd_readv_send,
+       .readv_recv             = tstream_bsd_readv_recv,
+
+       .writev_send            = tstream_bsd_writev_send,
+       .writev_recv            = tstream_bsd_writev_recv,
+
+       .disconnect_send        = tstream_bsd_disconnect_send,
+       .disconnect_recv        = tstream_bsd_disconnect_recv,
+};
+
+static int tstream_bsd_destructor(struct tstream_bsd *bsds)
+{
+       TALLOC_FREE(bsds->fde);
+       if (bsds->fd != -1) {
+               close(bsds->fd);
+               bsds->fd = -1;
+       }
+       return 0;
+}
+
+int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
+                                int fd,
+                                struct tstream_context **_stream,
+                                const char *location)
+{
+       struct tstream_context *stream;
+       struct tstream_bsd *bsds;
+
+       stream = tstream_context_create(mem_ctx,
+                                       &tstream_bsd_ops,
+                                       &bsds,
+                                       struct tstream_bsd,
+                                       location);
+       if (!stream) {
+               return -1;
+       }
+       ZERO_STRUCTP(bsds);
+       bsds->fd = fd;
+       talloc_set_destructor(bsds, tstream_bsd_destructor);
+
+       *_stream = stream;
+       return 0;
+}
+
+struct tstream_bsd_connect_state {
+       int fd;
+       struct tevent_fd *fde;
+       struct tstream_conext *stream;
+};
+
+static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
+{
+       TALLOC_FREE(state->fde);
+       if (state->fd != -1) {
+               close(state->fd);
+               state->fd = -1;
+       }
+
+       return 0;
+}
+
+static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
+                                           struct tevent_fd *fde,
+                                           uint16_t flags,
+                                           void *private_data);
+
+static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       int sys_errno,
+                                       const struct tsocket_address *local,
+                                       const struct tsocket_address *remote)
+{
+       struct tevent_req *req;
+       struct tstream_bsd_connect_state *state;
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tsocket_address_bsd *rbsda =
+               talloc_get_type_abort(remote->private_data,
+               struct tsocket_address_bsd);
+       int ret;
+       int err;
+       bool retry;
+       bool do_bind = false;
+       bool do_reuseaddr = false;
+       bool do_ipv6only = false;
+       bool is_inet = false;
+       int sa_fam = lbsda->u.sa.sa_family;
+       socklen_t sa_socklen = sizeof(rbsda->u.ss);
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_connect_state);
+       if (!req) {
+               return NULL;
+       }
+       state->fd = -1;
+       state->fde = NULL;
+
+       talloc_set_destructor(state, tstream_bsd_connect_destructor);
+
+       /* give the wrappers a chance to report an error */
+       if (sys_errno != 0) {
+               tevent_req_error(req, sys_errno);
+               goto post;
+       }
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_UNIX:
+               if (lbsda->u.un.sun_path[0] != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               /*
+                * for unix sockets we can't use the size of sockaddr_storage
+                * we would get EINVAL
+                */
+               sa_socklen = sizeof(rbsda->u.un);
+               break;
+       case AF_INET:
+               if (lbsda->u.in.sin_port != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
+                       do_bind = true;
+               }
+               is_inet = true;
+               sa_socklen = sizeof(rbsda->u.in);
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               if (lbsda->u.in6.sin6_port != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               if (memcmp(&in6addr_any,
+                          &lbsda->u.in6.sin6_addr,
+                          sizeof(in6addr_any)) != 0) {
+                       do_bind = true;
+               }
+               is_inet = true;
+               sa_socklen = sizeof(rbsda->u.in6);
+               do_ipv6only = true;
+               break;
+#endif
+       default:
+               tevent_req_error(req, EINVAL);
+               goto post;
+       }
+
+       if (!do_bind && is_inet) {
+               sa_fam = rbsda->u.sa.sa_family;
+               switch (sa_fam) {
+               case AF_INET:
+                       sa_socklen = sizeof(rbsda->u.in);
+                       do_ipv6only = false;
+                       break;
+#ifdef HAVE_IPV6
+               case AF_INET6:
+                       sa_socklen = sizeof(rbsda->u.in6);
+                       do_ipv6only = true;
+                       break;
+#endif
+               }
+       }
+
+       state->fd = socket(sa_fam, SOCK_STREAM, 0);
+       if (state->fd == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
+       if (state->fd == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+#ifdef HAVE_IPV6
+       if (do_ipv6only) {
+               int val = 1;
+
+               ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+#endif
+
+       if (do_reuseaddr) {
+               int val = 1;
+
+               ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+
+       if (do_bind) {
+               ret = bind(state->fd, &lbsda->u.sa, sa_socklen);
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+
+       if (rbsda->u.sa.sa_family != sa_fam) {
+               tevent_req_error(req, EINVAL);
+               goto post;
+       }
+
+       ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               goto async;
+       }
+       if (tevent_req_error(req, err)) {
+               goto post;
+       }
+
+       tevent_req_done(req);
+       goto post;
+
+ async:
+       state->fde = tevent_add_fd(ev, state,
+                                  state->fd,
+                                  TEVENT_FD_READ | TEVENT_FD_WRITE,
+                                  tstream_bsd_connect_fde_handler,
+                                  req);
+       if (tevent_req_nomem(state->fde, req)) {
+               goto post;
+       }
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
+                                           struct tevent_fd *fde,
+                                           uint16_t flags,
+                                           void *private_data)
+{
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tstream_bsd_connect_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_connect_state);
+       int ret;
+       int error=0;
+       socklen_t len = sizeof(error);
+       int err;
+       bool retry;
+
+       ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
+       if (ret == 0) {
+               if (error != 0) {
+                       errno = error;
+                       ret = -1;
+               }
+       }
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
+
+       tevent_req_done(req);
+}
+
+static int tstream_bsd_connect_recv(struct tevent_req *req,
+                                   int *perrno,
+                                   TALLOC_CTX *mem_ctx,
+                                   struct tstream_context **stream,
+                                   const char *location)
+{
+       struct tstream_bsd_connect_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_connect_state);
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = _tstream_bsd_existing_socket(mem_ctx,
+                                                  state->fd,
+                                                  stream,
+                                                  location);
+               if (ret == -1) {
+                       *perrno = errno;
+                       goto done;
+               }
+               TALLOC_FREE(state->fde);
+               state->fd = -1;
+       }
+
+done:
+       tevent_req_received(req);
+       return ret;
+}
+
+struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       const struct tsocket_address *local,
+                                       const struct tsocket_address *remote)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tevent_req *req;
+       int sys_errno = 0;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_INET:
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               break;
+#endif
+       default:
+               sys_errno = EINVAL;
+               break;
+       }
+
+       req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
+
+       return req;
+}
+
+int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
+                                  int *perrno,
+                                  TALLOC_CTX *mem_ctx,
+                                  struct tstream_context **stream,
+                                  const char *location)
+{
+       return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
+}
+
+struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       const struct tsocket_address *local,
+                                       const struct tsocket_address *remote)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tevent_req *req;
+       int sys_errno = 0;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_UNIX:
+               break;
+       default:
+               sys_errno = EINVAL;
+               break;
+       }
+
+       req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
+
+       return req;
+}
+
+int _tstream_unix_connect_recv(struct tevent_req *req,
+                                     int *perrno,
+                                     TALLOC_CTX *mem_ctx,
+                                     struct tstream_context **stream,
+                                     const char *location)
+{
+       return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
+}
+
+int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
+                            struct tstream_context **_stream1,
+                            TALLOC_CTX *mem_ctx2,
+                            struct tstream_context **_stream2,
+                            const char *location)
+{
+       int ret;
+       int fds[2];
+       int fd1;
+       int fd2;
+       struct tstream_context *stream1 = NULL;
+       struct tstream_context *stream2 = NULL;
+
+       ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
+       if (ret == -1) {
+               return -1;
+       }
+       fd1 = fds[0];
+       fd2 = fds[1];
+
+       fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
+       if (fd1 == -1) {
+               int sys_errno = errno;
+               close(fd2);
+               errno = sys_errno;
+               return -1;
+       }
+
+       fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
+       if (fd2 == -1) {
+               int sys_errno = errno;
+               close(fd1);
+               errno = sys_errno;
+               return -1;
+       }
+
+       ret = _tstream_bsd_existing_socket(mem_ctx1,
+                                          fd1,
+                                          &stream1,
+                                          location);
+       if (ret == -1) {
+               int sys_errno = errno;
+               close(fd1);
+               close(fd2);
+               errno = sys_errno;
+               return -1;
+       }
+
+       ret = _tstream_bsd_existing_socket(mem_ctx2,
+                                          fd2,
+                                          &stream2,
+                                          location);
+       if (ret == -1) {
+               int sys_errno = errno;
+               talloc_free(stream1);
+               close(fd2);
+               errno = sys_errno;
+               return -1;
+       }
+
+       *_stream1 = stream1;
+       *_stream2 = stream2;
+       return 0;
+}
+