talloc: use the system pytalloc-util for python3 as well
[sfrench/samba-autobuild/.git] / lib / tsocket / tsocket_bsd.c
index 2811882fed320f400535d14eb3bcccbb591d924d..708d17edc321094b9423e082cca8a101078a2757 100644 (file)
@@ -3,7 +3,7 @@
 
    Copyright (C) Stefan Metzmacher 2009
 
-     ** NOTE! The following LGPL license applies to the tevent
+     ** NOTE! The following LGPL license applies to the tsocket
      ** library. This does NOT imply that all of Samba is released
      ** under the LGPL
 
 */
 
 #include "replace.h"
+#include "system/filesys.h"
 #include "system/network.h"
 #include "tsocket.h"
 #include "tsocket_internal.h"
+#include "lib/util/iov_buf.h"
+#include "lib/util/blocking.h"
 
-static const struct tsocket_context_ops tsocket_context_bsd_ops;
-static const struct tsocket_address_ops tsocket_address_bsd_ops;
+static int tsocket_bsd_error_from_errno(int ret,
+                                       int sys_errno,
+                                       bool *retry)
+{
+       *retry = false;
 
-static int tsocket_context_bsd_set_option(const struct tsocket_context *sock,
-                                         const char *option,
-                                         bool force,
-                                         const char *value);
+       if (ret >= 0) {
+               return 0;
+       }
 
-struct tsocket_context_bsd {
-       bool close_on_disconnect;
-       int fd;
-       struct tevent_fd *fde;
-};
+       if (ret != -1) {
+               return EIO;
+       }
+
+       if (sys_errno == 0) {
+               return EIO;
+       }
+
+       if (sys_errno == EINTR) {
+               *retry = true;
+               return sys_errno;
+       }
+
+       if (sys_errno == EINPROGRESS) {
+               *retry = true;
+               return sys_errno;
+       }
+
+       if (sys_errno == EAGAIN) {
+               *retry = true;
+               return sys_errno;
+       }
+
+       /* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
+       if (sys_errno == ENOMEM) {
+               *retry = true;
+               return sys_errno;
+       }
+
+#ifdef EWOULDBLOCK
+       if (sys_errno == EWOULDBLOCK) {
+               *retry = true;
+               return sys_errno;
+       }
+#endif
+
+       return sys_errno;
+}
+
+static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
+{
+       int i;
+       int sys_errno = 0;
+       int fds[3];
+       int num_fds = 0;
+
+       int result;
+       bool ok;
+
+       if (fd == -1) {
+               return -1;
+       }
+
+       /* first make a fd >= 3 */
+       if (high_fd) {
+               while (fd < 3) {
+                       fds[num_fds++] = fd;
+                       fd = dup(fd);
+                       if (fd == -1) {
+                               sys_errno = errno;
+                               break;
+                       }
+               }
+               for (i=0; i<num_fds; i++) {
+                       close(fds[i]);
+               }
+               if (fd == -1) {
+                       errno = sys_errno;
+                       return fd;
+               }
+       }
+
+       result = set_blocking(fd, false);
+       if (result == -1) {
+               goto fail;
+       }
+
+       ok = smb_set_close_on_exec(fd);
+       if (!ok) {
+               goto fail;
+       }
+
+       return fd;
+
+ fail:
+       if (fd != -1) {
+               sys_errno = errno;
+               close(fd);
+               errno = sys_errno;
+       }
+       return -1;
+}
+
+#ifdef HAVE_LINUX_RTNETLINK_H
+/**
+ * Get the amount of pending bytes from a netlink socket
+ *
+ * For some reason netlink sockets don't support querying the amount of pending
+ * data via ioctl with FIONREAD, which is what we use in tsocket_bsd_pending()
+ * below.
+ *
+ * We know we are on Linux as we're using netlink, which means we have a working
+ * MSG_TRUNC flag to recvmsg() as well, so we use that together with MSG_PEEK.
+ **/
+static ssize_t tsocket_bsd_netlink_pending(int fd)
+{
+       struct iovec iov;
+       struct msghdr msg;
+       char buf[1];
+
+       iov = (struct iovec) {
+               .iov_base = buf,
+               .iov_len = sizeof(buf)
+       };
+
+       msg = (struct msghdr) {
+               .msg_iov = &iov,
+               .msg_iovlen = 1
+       };
+
+       return recvmsg(fd, &msg, MSG_PEEK | MSG_TRUNC);
+}
+#else
+static ssize_t tsocket_bsd_netlink_pending(int fd)
+{
+       errno = ENOSYS;
+       return -1;
+}
+#endif
+
+static ssize_t tsocket_bsd_pending(int fd)
+{
+       int ret, error;
+       int value = 0;
+       socklen_t len;
+
+       ret = ioctl(fd, FIONREAD, &value);
+       if (ret == -1) {
+               return ret;
+       }
+
+       if (ret != 0) {
+               /* this should not be reached */
+               errno = EIO;
+               return -1;
+       }
+
+       if (value != 0) {
+               return value;
+       }
+
+       error = 0;
+       len = sizeof(error);
+
+       /*
+        * if no data is available check if the socket is in error state. For
+        * dgram sockets it's the way to return ICMP error messages of
+        * connected sockets to the caller.
+        */
+       ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
+       if (ret == -1) {
+               return ret;
+       }
+       if (error != 0) {
+               errno = error;
+               return -1;
+       }
+       return 0;
+}
+
+static const struct tsocket_address_ops tsocket_address_bsd_ops;
 
 struct tsocket_address_bsd {
-       bool broadcast;
+       socklen_t sa_socklen;
        union {
                struct sockaddr sa;
-               struct sockaddr_in sin;
+               struct sockaddr_in in;
 #ifdef HAVE_IPV6
-               struct sockaddr_in6 sin6;
+               struct sockaddr_in6 in6;
 #endif
-               struct sockaddr_un sun;
+               struct sockaddr_un un;
                struct sockaddr_storage ss;
        } u;
 };
 
-static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
-                                             struct sockaddr *sa,
-                                             socklen_t sa_len,
-                                             struct tsocket_address **_addr,
-                                             const char *location)
+int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
+                                      const struct sockaddr *sa,
+                                      size_t sa_socklen,
+                                      struct tsocket_address **_addr,
+                                      const char *location)
 {
        struct tsocket_address *addr;
        struct tsocket_address_bsd *bsda;
 
+       if (sa_socklen < sizeof(sa->sa_family)) {
+               errno = EINVAL;
+               return -1;
+       }
+
        switch (sa->sa_family) {
        case AF_UNIX:
-               if (sa_len < sizeof(struct sockaddr_un)) {
-                       errno = EINVAL;
-                       return -1;
+               if (sa_socklen > sizeof(struct sockaddr_un)) {
+                       sa_socklen = sizeof(struct sockaddr_un);
                }
                break;
        case AF_INET:
-               if (sa_len < sizeof(struct sockaddr_in)) {
+               if (sa_socklen < sizeof(struct sockaddr_in)) {
                        errno = EINVAL;
                        return -1;
                }
+               sa_socklen = sizeof(struct sockaddr_in);
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
-               if (sa_len < sizeof(struct sockaddr_in6)) {
+               if (sa_socklen < sizeof(struct sockaddr_in6)) {
                        errno = EINVAL;
                        return -1;
                }
+               sa_socklen = sizeof(struct sockaddr_in6);
                break;
 #endif
        default:
@@ -88,7 +265,7 @@ static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
                return -1;
        }
 
-       if (sa_len > sizeof(struct sockaddr_storage)) {
+       if (sa_socklen > sizeof(struct sockaddr_storage)) {
                errno = EINVAL;
                return -1;
        }
@@ -105,12 +282,83 @@ static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
 
        ZERO_STRUCTP(bsda);
 
-       memcpy(&bsda->u.ss, sa, sa_len);
+       memcpy(&bsda->u.ss, sa, sa_socklen);
+
+       bsda->sa_socklen = sa_socklen;
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+       bsda->u.sa.sa_len = bsda->sa_socklen;
+#endif
 
        *_addr = addr;
        return 0;
 }
 
+ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
+                                    struct sockaddr *sa,
+                                    size_t sa_socklen)
+{
+       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
+                                          struct tsocket_address_bsd);
+
+       if (!bsda) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (sa_socklen < bsda->sa_socklen) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (sa_socklen > bsda->sa_socklen) {
+               memset(sa, 0, sa_socklen);
+               sa_socklen = bsda->sa_socklen;
+       }
+
+       memcpy(sa, &bsda->u.ss, sa_socklen);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+       sa->sa_len = sa_socklen;
+#endif
+       return sa_socklen;
+}
+
+bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam)
+{
+       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
+                                          struct tsocket_address_bsd);
+
+       if (!bsda) {
+               return false;
+       }
+
+       switch (bsda->u.sa.sa_family) {
+       case AF_INET:
+               if (strcasecmp(fam, "ip") == 0) {
+                       return true;
+               }
+
+               if (strcasecmp(fam, "ipv4") == 0) {
+                       return true;
+               }
+
+               return false;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               if (strcasecmp(fam, "ip") == 0) {
+                       return true;
+               }
+
+               if (strcasecmp(fam, "ipv6") == 0) {
+                       return true;
+               }
+
+               return false;
+#endif
+       }
+
+       return false;
+}
+
 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
                                       const char *fam,
                                       const char *addr,
@@ -157,7 +405,7 @@ int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
                return -1;
        }
 
-       snprintf(port_str, sizeof(port_str) - 1, "%u", port);
+       snprintf(port_str, sizeof(port_str), "%u", port);
 
        ret = getaddrinfo(addr, port_str, &hints, &result);
        if (ret != 0) {
@@ -204,14 +452,14 @@ char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
 
        switch (bsda->u.sa.sa_family) {
        case AF_INET:
-               str = inet_ntop(bsda->u.sin.sin_family,
-                               &bsda->u.sin.sin_addr,
+               str = inet_ntop(bsda->u.in.sin_family,
+                               &bsda->u.in.sin_addr,
                                addr_str, sizeof(addr_str));
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
-               str = inet_ntop(bsda->u.sin6.sin6_family,
-                               &bsda->u.sin6.sin6_addr,
+               str = inet_ntop(bsda->u.in6.sin6_family,
+                               &bsda->u.in6.sin6_addr,
                                addr_str, sizeof(addr_str));
                break;
 #endif
@@ -240,11 +488,11 @@ uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
 
        switch (bsda->u.sa.sa_family) {
        case AF_INET:
-               port = ntohs(bsda->u.sin.sin_port);
+               port = ntohs(bsda->u.in.sin_port);
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
-               port = ntohs(bsda->u.sin6.sin6_port);
+               port = ntohs(bsda->u.in6.sin6_port);
                break;
 #endif
        default:
@@ -268,11 +516,11 @@ int tsocket_address_inet_set_port(struct tsocket_address *addr,
 
        switch (bsda->u.sa.sa_family) {
        case AF_INET:
-               bsda->u.sin.sin_port = htons(port);
+               bsda->u.in.sin_port = htons(port);
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
-               bsda->u.sin6.sin6_port = htons(port);
+               bsda->u.in6.sin6_port = htons(port);
                break;
 #endif
        default:
@@ -283,17 +531,21 @@ int tsocket_address_inet_set_port(struct tsocket_address *addr,
        return 0;
 }
 
-void tsocket_address_inet_set_broadcast(struct tsocket_address *addr,
-                                       bool broadcast)
+bool tsocket_address_is_unix(const struct tsocket_address *addr)
 {
        struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
                                           struct tsocket_address_bsd);
 
        if (!bsda) {
-               return;
+               return false;
+       }
+
+       switch (bsda->u.sa.sa_family) {
+       case AF_UNIX:
+               return true;
        }
 
-       bsda->broadcast = broadcast;
+       return false;
 }
 
 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
@@ -301,21 +553,26 @@ int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
                                    struct tsocket_address **_addr,
                                    const char *location)
 {
-       struct sockaddr_un sun;
-       void *p = &sun;
+       struct sockaddr_un un;
+       void *p = &un;
        int ret;
 
        if (!path) {
                path = "";
        }
 
-       ZERO_STRUCT(sun);
-       sun.sun_family = AF_UNIX;
-       strncpy(sun.sun_path, path, sizeof(sun.sun_path));
+       if (strlen(path) > sizeof(un.sun_path)-1) {
+               errno = ENAMETOOLONG;
+               return -1;
+       }
+
+       ZERO_STRUCT(un);
+       un.sun_family = AF_UNIX;
+       strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
 
        ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
                                                 (struct sockaddr *)p,
-                                                sizeof(sun),
+                                                sizeof(un),
                                                 _addr,
                                                 location);
 
@@ -336,7 +593,7 @@ char *tsocket_address_unix_path(const struct tsocket_address *addr,
 
        switch (bsda->u.sa.sa_family) {
        case AF_UNIX:
-               str = bsda->u.sun.sun_path;
+               str = bsda->u.un.sun_path;
                break;
        default:
                errno = EINVAL;
@@ -359,13 +616,15 @@ static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
        switch (bsda->u.sa.sa_family) {
        case AF_UNIX:
                return talloc_asprintf(mem_ctx, "unix:%s",
-                                      bsda->u.sun.sun_path);
+                                      bsda->u.un.sun_path);
        case AF_INET:
                prefix = "ipv4";
                break;
+#ifdef HAVE_IPV6
        case AF_INET6:
                prefix = "ipv6";
                break;
+#endif
        default:
                errno = EINVAL;
                return NULL;
@@ -396,731 +655,1928 @@ static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_add
 
        ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
                                                 &bsda->u.sa,
-                                                sizeof(bsda->u.ss),
+                                                bsda->sa_socklen,
                                                 &copy,
                                                 location);
        if (ret != 0) {
                return NULL;
        }
 
-       tsocket_address_inet_set_broadcast(copy, bsda->broadcast);
        return copy;
 }
 
-int _tsocket_context_bsd_wrap_existing(TALLOC_CTX *mem_ctx,
-                                      int fd, bool close_on_disconnect,
-                                      struct tsocket_context **_sock,
-                                      const char *location)
-{
-       struct tsocket_context *sock;
-       struct tsocket_context_bsd *bsds;
+static const struct tsocket_address_ops tsocket_address_bsd_ops = {
+       .name           = "bsd",
+       .string         = tsocket_address_bsd_string,
+       .copy           = tsocket_address_bsd_copy,
+};
 
-       sock = tsocket_context_create(mem_ctx,
-                                     &tsocket_context_bsd_ops,
-                                     &bsds,
-                                     struct tsocket_context_bsd,
-                                     location);
-       if (!sock) {
-               return -1;
+struct tdgram_bsd {
+       int fd;
+
+       void *event_ptr;
+       struct tevent_fd *fde;
+       bool optimize_recvfrom;
+       bool netlink;
+
+       void *readable_private;
+       void (*readable_handler)(void *private_data);
+       void *writeable_private;
+       void (*writeable_handler)(void *private_data);
+};
+
+bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram,
+                                 bool on)
+{
+       struct tdgram_bsd *bsds =
+               talloc_get_type(_tdgram_context_data(dgram),
+               struct tdgram_bsd);
+       bool old;
+
+       if (bsds == NULL) {
+               /* not a bsd socket */
+               return false;
        }
 
-       bsds->close_on_disconnect       = close_on_disconnect;
-       bsds->fd                        = fd;
-       bsds->fde                       = NULL;
+       old = bsds->optimize_recvfrom;
+       bsds->optimize_recvfrom = on;
 
-       *_sock = sock;
-       return 0;
+       return old;
 }
 
-static int tsocket_address_bsd_create_socket(const struct tsocket_address *addr,
-                                            enum tsocket_type type,
-                                            TALLOC_CTX *mem_ctx,
-                                            struct tsocket_context **_sock,
-                                            const char *location)
+static void tdgram_bsd_fde_handler(struct tevent_context *ev,
+                                  struct tevent_fd *fde,
+                                  uint16_t flags,
+                                  void *private_data)
 {
-       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
-                                          struct tsocket_address_bsd);
-       struct tsocket_context *sock;
-       int bsd_type;
-       int fd;
-       int ret;
-       bool do_bind = false;
-       bool do_reuseaddr = false;
+       struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
+                                 struct tdgram_bsd);
 
-       switch (type) {
-       case TSOCKET_TYPE_STREAM:
-               if (bsda->broadcast) {
-                       errno = EINVAL;
-                       return -1;
+       if (flags & TEVENT_FD_WRITE) {
+               bsds->writeable_handler(bsds->writeable_private);
+               return;
+       }
+       if (flags & TEVENT_FD_READ) {
+               if (!bsds->readable_handler) {
+                       TEVENT_FD_NOT_READABLE(bsds->fde);
+                       return;
                }
-               bsd_type = SOCK_STREAM;
-               break;
-       case TSOCKET_TYPE_DGRAM:
-               bsd_type = SOCK_DGRAM;
-               break;
-       default:
-               errno = EPROTONOSUPPORT;
-               return -1;
+               bsds->readable_handler(bsds->readable_private);
+               return;
        }
+}
 
-       switch (bsda->u.sa.sa_family) {
-       case AF_UNIX:
-               if (bsda->broadcast) {
+static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
+                                          struct tevent_context *ev,
+                                          void (*handler)(void *private_data),
+                                          void *private_data)
+{
+       if (ev == NULL) {
+               if (handler) {
                        errno = EINVAL;
                        return -1;
                }
-               if (bsda->u.sun.sun_path[0] != 0) {
-                       do_bind = true;
-               }
-               break;
-       case AF_INET:
-               if (bsda->u.sin.sin_port != 0) {
-                       do_reuseaddr = true;
-                       do_bind = true;
-               }
-               if (bsda->u.sin.sin_addr.s_addr == INADDR_ANY) {
-                       do_bind = true;
-               }
-               break;
-#ifdef HAVE_IPV6
-       case AF_INET6:
-               if (bsda->u.sin6.sin6_port != 0) {
-                       do_reuseaddr = true;
-                       do_bind = true;
-               }
-               if (memcmp(&in6addr_any,
-                          &bsda->u.sin6.sin6_addr,
-                          sizeof(in6addr_any)) != 0) {
-                       do_bind = true;
+               if (!bsds->readable_handler) {
+                       return 0;
                }
-               break;
-#endif
-       default:
-               errno = EINVAL;
-               return -1;
-       }
+               bsds->readable_handler = NULL;
+               bsds->readable_private = NULL;
 
-       fd = socket(bsda->u.sa.sa_family, bsd_type, 0);
-       if (fd < 0) {
-               return fd;
+               return 0;
        }
 
-       fd = tsocket_common_prepare_fd(fd, true);
-       if (fd < 0) {
-               return fd;
+       /* read and write must use the same tevent_context */
+       if (bsds->event_ptr != ev) {
+               if (bsds->readable_handler || bsds->writeable_handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               bsds->event_ptr = NULL;
+               TALLOC_FREE(bsds->fde);
        }
 
-       ret = _tsocket_context_bsd_wrap_existing(mem_ctx, fd, true,
-                                                &sock, location);
-       if (ret != 0) {
-               int saved_errno = errno;
-               close(fd);
-               errno = saved_errno;
-               return ret;
-       }
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
 
-       if (bsda->broadcast) {
-               ret = tsocket_context_bsd_set_option(sock, "SO_BROADCAST", true, "1");
-               if (ret != 0) {
-                       int saved_errno = errno;
-                       talloc_free(sock);
-                       errno = saved_errno;
-                       return ret;
+               bsds->fde = tevent_add_fd(ev, bsds,
+                                         bsds->fd, TEVENT_FD_READ,
+                                         tdgram_bsd_fde_handler,
+                                         bsds);
+               if (!bsds->fde) {
+                       errno = ENOMEM;
+                       return -1;
                }
-       }
 
-       if (do_reuseaddr) {
-               ret = tsocket_context_bsd_set_option(sock, "SO_REUSEADDR", true, "1");
-               if (ret != 0) {
-                       int saved_errno = errno;
-                       talloc_free(sock);
-                       errno = saved_errno;
-                       return ret;
-               }
+               /* cache the event context we're running on */
+               bsds->event_ptr = ev;
+       } else if (!bsds->readable_handler) {
+               TEVENT_FD_READABLE(bsds->fde);
        }
 
-       if (do_bind) {
-               ret = bind(fd, &bsda->u.sa, sizeof(bsda->u.ss));
-               if (ret != 0) {
-                       int saved_errno = errno;
-                       talloc_free(sock);
-                       errno = saved_errno;
-                       return ret;
-               }
-       }
+       bsds->readable_handler = handler;
+       bsds->readable_private = private_data;
 
-       *_sock = sock;
        return 0;
 }
 
-static const struct tsocket_address_ops tsocket_address_bsd_ops = {
-       .name           = "bsd",
-       .string         = tsocket_address_bsd_string,
-       .copy           = tsocket_address_bsd_copy,
-       .create_socket  = tsocket_address_bsd_create_socket
-};
-
-static void tsocket_context_bsd_fde_handler(struct tevent_context *ev,
-                                           struct tevent_fd *fde,
-                                           uint16_t flags,
+static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
+                                           struct tevent_context *ev,
+                                           void (*handler)(void *private_data),
                                            void *private_data)
 {
-       struct tsocket_context *sock = talloc_get_type(private_data,
-                                      struct tsocket_context);
+       if (ev == NULL) {
+               if (handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               if (!bsds->writeable_handler) {
+                       return 0;
+               }
+               bsds->writeable_handler = NULL;
+               bsds->writeable_private = NULL;
+               TEVENT_FD_NOT_WRITEABLE(bsds->fde);
 
-       if (flags & TEVENT_FD_WRITE) {
-               sock->event.write_handler(sock, sock->event.write_private);
-               return;
+               return 0;
        }
-       if (flags & TEVENT_FD_READ) {
-               sock->event.read_handler(sock, sock->event.read_private);
-               return;
-       }
-}
 
-static int tsocket_context_bsd_set_event_context(struct tsocket_context *sock,
-                                                struct tevent_context *ev)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-
-       talloc_free(bsds->fde);
-       bsds->fde = NULL;
-       ZERO_STRUCT(sock->event);
-
-       if (!ev) {
-               return 0;
+       /* read and write must use the same tevent_context */
+       if (bsds->event_ptr != ev) {
+               if (bsds->readable_handler || bsds->writeable_handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               bsds->event_ptr = NULL;
+               TALLOC_FREE(bsds->fde);
        }
 
-       bsds->fde = tevent_add_fd(ev, bsds,
-                                 bsds->fd,
-                                 0,
-                                 tsocket_context_bsd_fde_handler,
-                                 sock);
-       if (!bsds->fde) {
-               if (errno == 0) {
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
+               bsds->fde = tevent_add_fd(ev, bsds,
+                                         bsds->fd, TEVENT_FD_WRITE,
+                                         tdgram_bsd_fde_handler,
+                                         bsds);
+               if (!bsds->fde) {
                        errno = ENOMEM;
+                       return -1;
                }
-               return -1;
+
+               /* cache the event context we're running on */
+               bsds->event_ptr = ev;
+       } else if (!bsds->writeable_handler) {
+               TEVENT_FD_WRITEABLE(bsds->fde);
        }
 
-       sock->event.ctx = ev;
+       bsds->writeable_handler = handler;
+       bsds->writeable_private = private_data;
 
        return 0;
 }
 
-static int tsocket_context_bsd_set_read_handler(struct tsocket_context *sock,
-                                               tsocket_event_handler_t handler,
-                                               void *private_data)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
+struct tdgram_bsd_recvfrom_state {
+       struct tdgram_context *dgram;
+       bool first_try;
+       uint8_t *buf;
+       size_t len;
+       struct tsocket_address *src;
+};
 
-       if (sock->event.read_handler && !handler) {
-               TEVENT_FD_NOT_READABLE(bsds->fde);
-       } else if (!sock->event.read_handler && handler) {
-               TEVENT_FD_READABLE(bsds->fde);
-       }
+static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
+{
+       struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
+                                 struct tdgram_bsd);
 
-       sock->event.read_handler = handler;
-       sock->event.read_private = private_data;
+       tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
 
        return 0;
 }
 
-static int tsocket_context_bsd_set_write_handler(struct tsocket_context *sock,
-                                                tsocket_event_handler_t handler,
-                                                void *private_data)
+static void tdgram_bsd_recvfrom_handler(void *private_data);
+
+static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       struct tdgram_context *dgram)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
+       struct tevent_req *req;
+       struct tdgram_bsd_recvfrom_state *state;
+       struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
+       int ret;
 
-       if (sock->event.write_handler && !handler) {
-               TEVENT_FD_NOT_WRITEABLE(bsds->fde);
-       } else if (!sock->event.write_handler && handler) {
-               TEVENT_FD_WRITEABLE(bsds->fde);
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tdgram_bsd_recvfrom_state);
+       if (!req) {
+               return NULL;
        }
 
-       sock->event.write_handler = handler;
-       sock->event.write_private = private_data;
+       state->dgram    = dgram;
+       state->first_try= true;
+       state->buf      = NULL;
+       state->len      = 0;
+       state->src      = NULL;
 
-       return 0;
-}
+       talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
 
-static int tsocket_context_bsd_connect_to(struct tsocket_context *sock,
-                                         const struct tsocket_address *remote)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       struct tsocket_address_bsd *bsda = talloc_get_type(remote->private_data,
-                                          struct tsocket_address_bsd);
-       int ret;
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
 
-       ret = connect(bsds->fd, &bsda->u.sa,
-                     sizeof(bsda->u.ss));
 
-       return ret;
-}
+       /*
+        * this is a fast path, not waiting for the
+        * socket to become explicit readable gains
+        * about 10%-20% performance in benchmark tests.
+        */
+       if (bsds->optimize_recvfrom) {
+               /*
+                * We only do the optimization on
+                * recvfrom if the caller asked for it.
+                *
+                * This is needed because in most cases
+                * we prefer to flush send buffers before
+                * receiving incoming requests.
+                */
+               tdgram_bsd_recvfrom_handler(req);
+               if (!tevent_req_is_in_progress(req)) {
+                       goto post;
+               }
+       }
 
-static int tsocket_context_bsd_listen_on(struct tsocket_context *sock,
-                                         int queue_size)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       int ret;
+       ret = tdgram_bsd_set_readable_handler(bsds, ev,
+                                             tdgram_bsd_recvfrom_handler,
+                                             req);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
 
-       ret = listen(bsds->fd, queue_size);
+       return req;
 
-       return ret;
+ post:
+       tevent_req_post(req, ev);
+       return req;
 }
 
-static int tsocket_context_bsd_accept_new(struct tsocket_context *sock,
-                                          TALLOC_CTX *mem_ctx,
-                                          struct tsocket_context **_new_sock,
-                                          const char *location)
+static void tdgram_bsd_recvfrom_handler(void *private_data)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       int new_fd;
-       struct tsocket_context *new_sock;
-       struct tsocket_context_bsd *new_bsds;
-       struct sockaddr_storage ss;
-       void *p = &ss;
-       socklen_t ss_len = sizeof(ss);
-
-       new_fd = accept(bsds->fd, (struct sockaddr *)p, &ss_len);
-       if (new_fd < 0) {
-               return new_fd;
-       }
-
-       new_fd = tsocket_common_prepare_fd(new_fd, true);
-       if (new_fd < 0) {
-               return new_fd;
-       }
-
-       new_sock = tsocket_context_create(mem_ctx,
-                                         &tsocket_context_bsd_ops,
-                                         &new_bsds,
-                                         struct tsocket_context_bsd,
-                                         location);
-       if (!new_sock) {
-               int saved_errno = errno;
-               close(new_fd);
-               errno = saved_errno;
-               return -1;
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
+                                       struct tdgram_bsd_recvfrom_state);
+       struct tdgram_context *dgram = state->dgram;
+       struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
+       struct tsocket_address_bsd *bsda;
+       ssize_t ret;
+       int err;
+       bool retry;
+
+       if (bsds->netlink) {
+               ret = tsocket_bsd_netlink_pending(bsds->fd);
+       } else {
+               ret = tsocket_bsd_pending(bsds->fd);
        }
 
-       new_bsds->close_on_disconnect   = true;
-       new_bsds->fd                    = new_fd;
-       new_bsds->fde                   = NULL;
+       if (state->first_try && ret == 0) {
+               state->first_try = false;
+               /* retry later */
+               return;
+       }
+       state->first_try = false;
 
-       *_new_sock = new_sock;
-       return 0;
-}
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
 
-static ssize_t tsocket_context_bsd_pending_data(struct tsocket_context *sock)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       int ret;
-       int value = 0;
+       /* note that 'ret' can be 0 here */
+       state->buf = talloc_array(state, uint8_t, ret);
+       if (tevent_req_nomem(state->buf, req)) {
+               return;
+       }
+       state->len = ret;
+
+       state->src = tsocket_address_create(state,
+                                           &tsocket_address_bsd_ops,
+                                           &bsda,
+                                           struct tsocket_address_bsd,
+                                           __location__ "bsd_recvfrom");
+       if (tevent_req_nomem(state->src, req)) {
+               return;
+       }
 
-       ret = ioctl(bsds->fd, FIONREAD, &value);
-       if (ret == -1) {
-               return ret;
+       ZERO_STRUCTP(bsda);
+       bsda->sa_socklen = sizeof(bsda->u.ss);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+       bsda->u.sa.sa_len = bsda->sa_socklen;
+#endif
+
+       ret = recvfrom(bsds->fd, state->buf, state->len, 0,
+                      &bsda->u.sa, &bsda->sa_socklen);
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
        }
 
-       if (ret == 0) {
-               if (value == 0) {
-                       int error=0;
-                       socklen_t len = sizeof(error);
-                       /*
-                        * if no data is available check if the socket
-                        * is in error state. For dgram sockets
-                        * it's the way to return ICMP error messages
-                        * of connected sockets to the caller.
-                        */
-                       ret = getsockopt(bsds->fd, SOL_SOCKET, SO_ERROR,
-                                        &error, &len);
-                       if (ret == -1) {
-                               return ret;
-                       }
-                       if (error != 0) {
-                               errno = error;
-                               return -1;
-                       }
-               }
-               return value;
+       /*
+        * Some systems (FreeBSD, see bug #7115) return too much
+        * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
+        * the return value includes some IP/UDP header bytes,
+        * while recvfrom() just returns the payload.
+        */
+       state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
+       if (tevent_req_nomem(state->buf, req)) {
+               return;
        }
+       state->len = ret;
 
-       /* this should not be reached */
-       errno = EIO;
-       return -1;
+       tevent_req_done(req);
 }
 
-static int tsocket_context_bsd_readv_data(struct tsocket_context *sock,
-                                         const struct iovec *vector,
-                                         size_t count)
+static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
+                                       int *perrno,
+                                       TALLOC_CTX *mem_ctx,
+                                       uint8_t **buf,
+                                       struct tsocket_address **src)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       int ret;
+       struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
+                                       struct tdgram_bsd_recvfrom_state);
+       ssize_t ret;
 
-       ret = readv(bsds->fd, vector, count);
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               *buf = talloc_move(mem_ctx, &state->buf);
+               ret = state->len;
+               if (src) {
+                       *src = talloc_move(mem_ctx, &state->src);
+               }
+       }
 
+       tevent_req_received(req);
        return ret;
 }
 
-static int tsocket_context_bsd_writev_data(struct tsocket_context *sock,
-                                          const struct iovec *vector,
-                                          size_t count)
+struct tdgram_bsd_sendto_state {
+       struct tdgram_context *dgram;
+
+       const uint8_t *buf;
+       size_t len;
+       const struct tsocket_address *dst;
+
+       ssize_t ret;
+};
+
+static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       int ret;
+       struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
+                                 struct tdgram_bsd);
 
-       ret = writev(bsds->fd, vector, count);
+       tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
 
-       return ret;
+       return 0;
 }
 
-static ssize_t tsocket_context_bsd_recvfrom_data(struct tsocket_context *sock,
-                                                 uint8_t *data, size_t len,
-                                                 TALLOC_CTX *addr_ctx,
-                                                 struct tsocket_address **remote)
+static void tdgram_bsd_sendto_handler(void *private_data);
+
+static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
+                                                struct tevent_context *ev,
+                                                struct tdgram_context *dgram,
+                                                const uint8_t *buf,
+                                                size_t len,
+                                                const struct tsocket_address *dst)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       struct tsocket_address *addr = NULL;
-       struct tsocket_address_bsd *bsda;
-       ssize_t ret;
-       struct sockaddr *sa = NULL;
-       socklen_t sa_len = 0;
+       struct tevent_req *req;
+       struct tdgram_bsd_sendto_state *state;
+       struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
+       int ret;
 
-       if (remote) {
-               addr = tsocket_address_create(addr_ctx,
-                                             &tsocket_address_bsd_ops,
-                                             &bsda,
-                                             struct tsocket_address_bsd,
-                                             __location__ "recvfrom");
-               if (!addr) {
-                       return -1;
-               }
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tdgram_bsd_sendto_state);
+       if (!req) {
+               return NULL;
+       }
 
-               ZERO_STRUCTP(bsda);
+       state->dgram    = dgram;
+       state->buf      = buf;
+       state->len      = len;
+       state->dst      = dst;
+       state->ret      = -1;
 
-               sa = &bsda->u.sa;
-               sa_len = sizeof(bsda->u.ss);
+       talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
        }
 
-       ret = recvfrom(bsds->fd, data, len, 0, sa, &sa_len);
-       if (ret < 0) {
-               int saved_errno = errno;
-               talloc_free(addr);
-               errno = saved_errno;
-               return ret;
+       /*
+        * this is a fast path, not waiting for the
+        * socket to become explicit writeable gains
+        * about 10%-20% performance in benchmark tests.
+        */
+       tdgram_bsd_sendto_handler(req);
+       if (!tevent_req_is_in_progress(req)) {
+               goto post;
        }
 
-       if (remote) {
-               *remote = addr;
+       ret = tdgram_bsd_set_writeable_handler(bsds, ev,
+                                              tdgram_bsd_sendto_handler,
+                                              req);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               goto post;
        }
-       return ret;
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
 }
 
-static ssize_t tsocket_context_bsd_sendto_data(struct tsocket_context *sock,
-                                               const uint8_t *data, size_t len,
-                                               const struct tsocket_address *remote)
+static void tdgram_bsd_sendto_handler(void *private_data)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
+                                       struct tdgram_bsd_sendto_state);
+       struct tdgram_context *dgram = state->dgram;
+       struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
        struct sockaddr *sa = NULL;
-       socklen_t sa_len = 0;
+       socklen_t sa_socklen = 0;
        ssize_t ret;
+       int err;
+       bool retry;
 
-       if (remote) {
+       if (state->dst) {
                struct tsocket_address_bsd *bsda =
-                       talloc_get_type(remote->private_data,
+                       talloc_get_type(state->dst->private_data,
                        struct tsocket_address_bsd);
 
                sa = &bsda->u.sa;
-               sa_len = sizeof(bsda->u.ss);
-       }
-
-       ret = sendto(bsds->fd, data, len, 0, sa, sa_len);
-
-       return ret;
-}
-
-static int tsocket_context_bsd_get_status(const struct tsocket_context *sock)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       int ret;
-       int error=0;
-       socklen_t len = sizeof(error);
-
-       if (bsds->fd == -1) {
-               errno = EPIPE;
-               return -1;
+               sa_socklen = bsda->sa_socklen;
        }
 
-       ret = getsockopt(bsds->fd, SOL_SOCKET, SO_ERROR, &error, &len);
-       if (ret == -1) {
-               return ret;
-       }
-       if (error != 0) {
-               errno = error;
-               return -1;
+       ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
        }
 
-       return 0;
-}
+       if (err == EMSGSIZE) {
+               /* round up in 1K increments */
+               int bufsize = ((state->len + 1023) & (~1023));
 
-static int tsocket_context_bsd_get_local_address(const struct tsocket_context *sock,
-                                                 TALLOC_CTX *mem_ctx,
-                                                 struct tsocket_address **_addr,
-                                                 const char *location)
-{
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       struct tsocket_address *addr;
-       struct tsocket_address_bsd *bsda;
-       ssize_t ret;
-       socklen_t sa_len;
+               ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize,
+                                sizeof(bufsize));
+               if (ret == 0) {
+                       /*
+                        * We do the retry here, rather then via the
+                        * handler, as we only want to retry once for
+                        * this condition, so if there is a mismatch
+                        * between what setsockopt() accepts and what can
+                        * actually be sent, we do not end up in a
+                        * loop.
+                        */
 
-       addr = tsocket_address_create(mem_ctx,
-                                     &tsocket_address_bsd_ops,
-                                     &bsda,
-                                     struct tsocket_address_bsd,
-                                     location);
-       if (!addr) {
-               return -1;
+                       ret = sendto(bsds->fd, state->buf, state->len,
+                                    0, sa, sa_socklen);
+                       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+                       if (retry) { /* retry later */
+                               return;
+                       }
+               }
        }
 
-       ZERO_STRUCTP(bsda);
-
-       sa_len = sizeof(bsda->u.ss);
-       ret = getsockname(bsds->fd, &bsda->u.sa, &sa_len);
-       if (ret < 0) {
-               int saved_errno = errno;
-               talloc_free(addr);
-               errno = saved_errno;
-               return ret;
+       if (tevent_req_error(req, err)) {
+               return;
        }
 
-       *_addr = addr;
-       return 0;
+       state->ret = ret;
+
+       tevent_req_done(req);
 }
 
-static int tsocket_context_bsd_get_remote_address(const struct tsocket_context *sock,
-                                                  TALLOC_CTX *mem_ctx,
-                                                  struct tsocket_address **_addr,
-                                                  const char *location)
+static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       struct tsocket_address *addr;
-       struct tsocket_address_bsd *bsda;
+       struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
+                                       struct tdgram_bsd_sendto_state);
        ssize_t ret;
-       socklen_t sa_len;
-
-       addr = tsocket_address_create(mem_ctx,
-                                     &tsocket_address_bsd_ops,
-                                     &bsda,
-                                     struct tsocket_address_bsd,
-                                     location);
-       if (!addr) {
-               return -1;
-       }
 
-       ZERO_STRUCTP(bsda);
-
-       sa_len = sizeof(bsda->u.ss);
-       ret = getpeername(bsds->fd, &bsda->u.sa, &sa_len);
-       if (ret < 0) {
-               int saved_errno = errno;
-               talloc_free(addr);
-               errno = saved_errno;
-               return ret;
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = state->ret;
        }
 
-       *_addr = addr;
-       return 0;
+       tevent_req_received(req);
+       return ret;
 }
 
-static const struct tsocket_context_bsd_option {
-       const char *name;
-       int level;
-       int optnum;
-       int optval;
-} tsocket_context_bsd_options[] = {
-#define TSOCKET_OPTION(_level, _optnum, _optval) { \
-       .name = #_optnum, \
-       .level = _level, \
-       .optnum = _optnum, \
-       .optval = _optval \
-}
-       TSOCKET_OPTION(SOL_SOCKET, SO_REUSEADDR, 0),
-       TSOCKET_OPTION(SOL_SOCKET, SO_BROADCAST, 0)
+struct tdgram_bsd_disconnect_state {
+       uint8_t __dummy;
 };
 
-static int tsocket_context_bsd_get_option(const struct tsocket_context *sock,
-                                         const char *option,
-                                         TALLOC_CTX *mem_ctx,
-                                         char **_value)
+static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
+                                                    struct tevent_context *ev,
+                                                    struct tdgram_context *dgram)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       const struct tsocket_context_bsd_option *opt = NULL;
-       uint32_t i;
-       int optval;
-       socklen_t optval_len = sizeof(optval);
-       char *value;
+       struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
+       struct tevent_req *req;
+       struct tdgram_bsd_disconnect_state *state;
        int ret;
+       int err;
+       bool dummy;
 
-       for (i=0; i < ARRAY_SIZE(tsocket_context_bsd_options); i++) {
-               if (strcmp(option, tsocket_context_bsd_options[i].name) != 0) {
-                       continue;
-               }
-
-               opt = &tsocket_context_bsd_options[i];
-               break;
-       }
-
-       if (!opt) {
-               goto nosys;
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tdgram_bsd_disconnect_state);
+       if (req == NULL) {
+               return NULL;
        }
 
-       ret = getsockopt(bsds->fd, opt->level, opt->optnum,
-                        (void *)&optval, &optval_len);
-       if (ret != 0) {
-               return ret;
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
        }
 
-       if (optval_len != sizeof(optval)) {
-               value = NULL;
-       } if (opt->optval != 0) {
-               if (optval == opt->optval) {
-                       value = talloc_strdup(mem_ctx, "1");
-               } else {
-                       value = talloc_strdup(mem_ctx, "0");
-               }
-               if (!value) {
-                       goto nomem;
-               }
-       } else {
-               value = talloc_asprintf(mem_ctx, "%d", optval);
-               if (!value) {
-                       goto nomem;
-               }
+       TALLOC_FREE(bsds->fde);
+       ret = close(bsds->fd);
+       bsds->fd = -1;
+       err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
+       if (tevent_req_error(req, err)) {
+               goto post;
        }
 
-       *_value = value;
-       return 0;
-
- nomem:
-       errno = ENOMEM;
-       return -1;
- nosys:
-       errno = ENOSYS;
-       return -1;
+       tevent_req_done(req);
+post:
+       tevent_req_post(req, ev);
+       return req;
 }
 
-static int tsocket_context_bsd_set_option(const struct tsocket_context *sock,
-                                         const char *option,
-                                         bool force,
-                                         const char *value)
+static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
+                                     int *perrno)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-       const struct tsocket_context_bsd_option *opt = NULL;
-       uint32_t i;
-       int optval;
        int ret;
 
-       for (i=0; i < ARRAY_SIZE(tsocket_context_bsd_options); i++) {
-               if (strcmp(option, tsocket_context_bsd_options[i].name) != 0) {
-                       continue;
-               }
-
-               opt = &tsocket_context_bsd_options[i];
-               break;
-       }
-
-       if (!opt) {
-               goto nosys;
-       }
-
-       if (value) {
-               if (opt->optval != 0) {
-                       errno = EINVAL;
-                       return -1;
-               }
+       ret = tsocket_simple_int_recv(req, perrno);
 
-               optval = atoi(value);
-       } else {
-               optval = opt->optval;
-       }
+       tevent_req_received(req);
+       return ret;
+}
 
-       ret = setsockopt(bsds->fd, opt->level, opt->optnum,
-                        (const void *)&optval, sizeof(optval));
-       if (ret != 0) {
-               if (!force) {
-                       errno = 0;
-                       return 0;
-               }
-               return ret;
-       }
+static const struct tdgram_context_ops tdgram_bsd_ops = {
+       .name                   = "bsd",
 
-       return 0;
+       .recvfrom_send          = tdgram_bsd_recvfrom_send,
+       .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
 
- nosys:
-       if (!force) {
-               return 0;
-       }
+       .sendto_send            = tdgram_bsd_sendto_send,
+       .sendto_recv            = tdgram_bsd_sendto_recv,
 
-       errno = ENOSYS;
-       return -1;
-}
+       .disconnect_send        = tdgram_bsd_disconnect_send,
+       .disconnect_recv        = tdgram_bsd_disconnect_recv,
+};
 
-static void tsocket_context_bsd_disconnect(struct tsocket_context *sock)
+static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
 {
-       struct tsocket_context_bsd *bsds = talloc_get_type(sock->private_data,
-                                          struct tsocket_context_bsd);
-
-       tsocket_context_bsd_set_event_context(sock, NULL);
-
+       TALLOC_FREE(bsds->fde);
        if (bsds->fd != -1) {
-               if (bsds->close_on_disconnect) {
-                       close(bsds->fd);
-               }
+               close(bsds->fd);
                bsds->fd = -1;
        }
+       return 0;
 }
 
-static const struct tsocket_context_ops tsocket_context_bsd_ops = {
-       .name                   = "bsd",
-
-       .set_event_context      = tsocket_context_bsd_set_event_context,
-       .set_read_handler       = tsocket_context_bsd_set_read_handler,
-       .set_write_handler      = tsocket_context_bsd_set_write_handler,
-
-       .connect_to             = tsocket_context_bsd_connect_to,
-       .listen_on              = tsocket_context_bsd_listen_on,
-       .accept_new             = tsocket_context_bsd_accept_new,
-
-       .pending_data           = tsocket_context_bsd_pending_data,
-       .readv_data             = tsocket_context_bsd_readv_data,
-       .writev_data            = tsocket_context_bsd_writev_data,
-       .recvfrom_data          = tsocket_context_bsd_recvfrom_data,
-       .sendto_data            = tsocket_context_bsd_sendto_data,
-
-       .get_status             = tsocket_context_bsd_get_status,
-       .get_local_address      = tsocket_context_bsd_get_local_address,
-       .get_remote_address     = tsocket_context_bsd_get_remote_address,
+static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
+                                  const struct tsocket_address *remote,
+                                  bool broadcast,
+                                  TALLOC_CTX *mem_ctx,
+                                  struct tdgram_context **_dgram,
+                                  const char *location)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tsocket_address_bsd *rbsda = NULL;
+       struct tdgram_context *dgram;
+       struct tdgram_bsd *bsds;
+       int fd;
+       int ret;
+       bool do_bind = false;
+       bool do_reuseaddr = false;
+       bool do_ipv6only = false;
+       bool is_inet = false;
+       int sa_fam = lbsda->u.sa.sa_family;
 
-       .get_option             = tsocket_context_bsd_get_option,
-       .set_option             = tsocket_context_bsd_set_option,
+       if (remote) {
+               rbsda = talloc_get_type_abort(remote->private_data,
+                       struct tsocket_address_bsd);
+       }
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_UNIX:
+               if (broadcast) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               if (lbsda->u.un.sun_path[0] != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               break;
+       case AF_INET:
+               if (lbsda->u.in.sin_port != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
+                       do_bind = true;
+               }
+               is_inet = true;
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               if (lbsda->u.in6.sin6_port != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               if (memcmp(&in6addr_any,
+                          &lbsda->u.in6.sin6_addr,
+                          sizeof(in6addr_any)) != 0) {
+                       do_bind = true;
+               }
+               is_inet = true;
+               do_ipv6only = true;
+               break;
+#endif
+       default:
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (!do_bind && is_inet && rbsda) {
+               sa_fam = rbsda->u.sa.sa_family;
+               switch (sa_fam) {
+               case AF_INET:
+                       do_ipv6only = false;
+                       break;
+#ifdef HAVE_IPV6
+               case AF_INET6:
+                       do_ipv6only = true;
+                       break;
+#endif
+               }
+       }
+
+       fd = socket(sa_fam, SOCK_DGRAM, 0);
+       if (fd < 0) {
+               return -1;
+       }
+
+       fd = tsocket_bsd_common_prepare_fd(fd, true);
+       if (fd < 0) {
+               return -1;
+       }
+
+       dgram = tdgram_context_create(mem_ctx,
+                                     &tdgram_bsd_ops,
+                                     &bsds,
+                                     struct tdgram_bsd,
+                                     location);
+       if (!dgram) {
+               int saved_errno = errno;
+               close(fd);
+               errno = saved_errno;
+               return -1;
+       }
+       ZERO_STRUCTP(bsds);
+       bsds->fd = fd;
+       talloc_set_destructor(bsds, tdgram_bsd_destructor);
+
+#ifdef HAVE_IPV6
+       if (do_ipv6only) {
+               int val = 1;
+
+               ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return -1;
+               }
+       }
+#endif
+
+       if (broadcast) {
+               int val = 1;
+
+               ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return -1;
+               }
+       }
+
+       if (do_reuseaddr) {
+               int val = 1;
+
+               ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return -1;
+               }
+       }
+
+       if (do_bind) {
+               ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return -1;
+               }
+       }
+
+       if (rbsda) {
+               if (rbsda->u.sa.sa_family != sa_fam) {
+                       talloc_free(dgram);
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return -1;
+               }
+       }
+
+       *_dgram = dgram;
+       return 0;
+}
+
+int _tdgram_bsd_existing_socket(TALLOC_CTX *mem_ctx,
+                               int fd,
+                               struct tdgram_context **_dgram,
+                               const char *location)
+{
+       struct tdgram_context *dgram;
+       struct tdgram_bsd *bsds;
+#ifdef HAVE_LINUX_RTNETLINK_H
+       int result;
+       struct sockaddr sa;
+       socklen_t sa_len = sizeof(struct sockaddr);
+#endif
+
+       dgram = tdgram_context_create(mem_ctx,
+                                     &tdgram_bsd_ops,
+                                     &bsds,
+                                     struct tdgram_bsd,
+                                     location);
+       if (!dgram) {
+               return -1;
+       }
+       ZERO_STRUCTP(bsds);
+       bsds->fd = fd;
+       talloc_set_destructor(bsds, tdgram_bsd_destructor);
+
+       *_dgram = dgram;
+
+#ifdef HAVE_LINUX_RTNETLINK_H
+       /*
+        * Try to determine the protocol family and remember if it's
+        * AF_NETLINK. We don't care if this fails.
+        */
+       result = getsockname(fd, &sa, &sa_len);
+       if (result == 0 && sa.sa_family == AF_NETLINK) {
+               bsds->netlink = true;
+       }
+#endif
+
+       return 0;
+}
+
+int _tdgram_inet_udp_socket(const struct tsocket_address *local,
+                           const struct tsocket_address *remote,
+                           TALLOC_CTX *mem_ctx,
+                           struct tdgram_context **dgram,
+                           const char *location)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       int ret;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_INET:
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               break;
+#endif
+       default:
+               errno = EINVAL;
+               return -1;
+       }
+
+       ret = tdgram_bsd_dgram_socket(local, remote, false,
+                                     mem_ctx, dgram, location);
+
+       return ret;
+}
+
+int _tdgram_inet_udp_broadcast_socket(const struct tsocket_address *local,
+                                     TALLOC_CTX *mem_ctx,
+                                     struct tdgram_context **dgram,
+                                     const char *location)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       int ret;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_INET:
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               /* only ipv4 */
+               errno = EINVAL;
+               return -1;
+#endif
+       default:
+               errno = EINVAL;
+               return -1;
+       }
+
+       ret = tdgram_bsd_dgram_socket(local, NULL, true,
+                                     mem_ctx, dgram, location);
+
+       return ret;
+}
+
+int _tdgram_unix_socket(const struct tsocket_address *local,
+                       const struct tsocket_address *remote,
+                       TALLOC_CTX *mem_ctx,
+                       struct tdgram_context **dgram,
+                       const char *location)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       int ret;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_UNIX:
+               break;
+       default:
+               errno = EINVAL;
+               return -1;
+       }
+
+       ret = tdgram_bsd_dgram_socket(local, remote, false,
+                                     mem_ctx, dgram, location);
+
+       return ret;
+}
+
+struct tstream_bsd {
+       int fd;
+
+       void *event_ptr;
+       struct tevent_fd *fde;
+       bool optimize_readv;
+
+       void *readable_private;
+       void (*readable_handler)(void *private_data);
+       void *writeable_private;
+       void (*writeable_handler)(void *private_data);
+};
+
+bool tstream_bsd_optimize_readv(struct tstream_context *stream,
+                               bool on)
+{
+       struct tstream_bsd *bsds =
+               talloc_get_type(_tstream_context_data(stream),
+               struct tstream_bsd);
+       bool old;
+
+       if (bsds == NULL) {
+               /* not a bsd socket */
+               return false;
+       }
+
+       old = bsds->optimize_readv;
+       bsds->optimize_readv = on;
+
+       return old;
+}
+
+static void tstream_bsd_fde_handler(struct tevent_context *ev,
+                                   struct tevent_fd *fde,
+                                   uint16_t flags,
+                                   void *private_data)
+{
+       struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
+                                  struct tstream_bsd);
+
+       if (flags & TEVENT_FD_WRITE) {
+               bsds->writeable_handler(bsds->writeable_private);
+               return;
+       }
+       if (flags & TEVENT_FD_READ) {
+               if (!bsds->readable_handler) {
+                       if (bsds->writeable_handler) {
+                               bsds->writeable_handler(bsds->writeable_private);
+                               return;
+                       }
+                       TEVENT_FD_NOT_READABLE(bsds->fde);
+                       return;
+               }
+               bsds->readable_handler(bsds->readable_private);
+               return;
+       }
+}
+
+static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
+                                           struct tevent_context *ev,
+                                           void (*handler)(void *private_data),
+                                           void *private_data)
+{
+       if (ev == NULL) {
+               if (handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               if (!bsds->readable_handler) {
+                       return 0;
+               }
+               bsds->readable_handler = NULL;
+               bsds->readable_private = NULL;
+
+               return 0;
+       }
+
+       /* read and write must use the same tevent_context */
+       if (bsds->event_ptr != ev) {
+               if (bsds->readable_handler || bsds->writeable_handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               bsds->event_ptr = NULL;
+               TALLOC_FREE(bsds->fde);
+       }
+
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
+               bsds->fde = tevent_add_fd(ev, bsds,
+                                         bsds->fd, TEVENT_FD_READ,
+                                         tstream_bsd_fde_handler,
+                                         bsds);
+               if (!bsds->fde) {
+                       errno = ENOMEM;
+                       return -1;
+               }
+
+               /* cache the event context we're running on */
+               bsds->event_ptr = ev;
+       } else if (!bsds->readable_handler) {
+               TEVENT_FD_READABLE(bsds->fde);
+       }
+
+       bsds->readable_handler = handler;
+       bsds->readable_private = private_data;
+
+       return 0;
+}
+
+static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
+                                            struct tevent_context *ev,
+                                            void (*handler)(void *private_data),
+                                            void *private_data)
+{
+       if (ev == NULL) {
+               if (handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               if (!bsds->writeable_handler) {
+                       return 0;
+               }
+               bsds->writeable_handler = NULL;
+               bsds->writeable_private = NULL;
+               TEVENT_FD_NOT_WRITEABLE(bsds->fde);
+
+               return 0;
+       }
+
+       /* read and write must use the same tevent_context */
+       if (bsds->event_ptr != ev) {
+               if (bsds->readable_handler || bsds->writeable_handler) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               bsds->event_ptr = NULL;
+               TALLOC_FREE(bsds->fde);
+       }
+
+       if (tevent_fd_get_flags(bsds->fde) == 0) {
+               TALLOC_FREE(bsds->fde);
+
+               bsds->fde = tevent_add_fd(ev, bsds,
+                                         bsds->fd,
+                                         TEVENT_FD_READ | TEVENT_FD_WRITE,
+                                         tstream_bsd_fde_handler,
+                                         bsds);
+               if (!bsds->fde) {
+                       errno = ENOMEM;
+                       return -1;
+               }
+
+               /* cache the event context we're running on */
+               bsds->event_ptr = ev;
+       } else if (!bsds->writeable_handler) {
+               uint16_t flags = tevent_fd_get_flags(bsds->fde);
+               flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
+               tevent_fd_set_flags(bsds->fde, flags);
+       }
+
+       bsds->writeable_handler = handler;
+       bsds->writeable_private = private_data;
+
+       return 0;
+}
+
+static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
+{
+       struct tstream_bsd *bsds = tstream_context_data(stream,
+                                  struct tstream_bsd);
+       ssize_t ret;
+
+       if (bsds->fd == -1) {
+               errno = ENOTCONN;
+               return -1;
+       }
+
+       ret = tsocket_bsd_pending(bsds->fd);
+
+       return ret;
+}
+
+struct tstream_bsd_readv_state {
+       struct tstream_context *stream;
+
+       struct iovec *vector;
+       size_t count;
+
+       int ret;
+};
+
+static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
+{
+       struct tstream_bsd *bsds = tstream_context_data(state->stream,
+                                  struct tstream_bsd);
+
+       tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
+
+       return 0;
+}
+
+static void tstream_bsd_readv_handler(void *private_data);
+
+static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       struct tstream_context *stream,
+                                       struct iovec *vector,
+                                       size_t count)
+{
+       struct tevent_req *req;
+       struct tstream_bsd_readv_state *state;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       int ret;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_readv_state);
+       if (!req) {
+               return NULL;
+       }
+
+       state->stream   = stream;
+       /* we make a copy of the vector so that we can modify it */
+       state->vector   = talloc_array(state, struct iovec, count);
+       if (tevent_req_nomem(state->vector, req)) {
+               goto post;
+       }
+       memcpy(state->vector, vector, sizeof(struct iovec)*count);
+       state->count    = count;
+       state->ret      = 0;
+
+       talloc_set_destructor(state, tstream_bsd_readv_destructor);
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
+
+       /*
+        * this is a fast path, not waiting for the
+        * socket to become explicit readable gains
+        * about 10%-20% performance in benchmark tests.
+        */
+       if (bsds->optimize_readv) {
+               /*
+                * We only do the optimization on
+                * readv if the caller asked for it.
+                *
+                * This is needed because in most cases
+                * we prefer to flush send buffers before
+                * receiving incoming requests.
+                */
+               tstream_bsd_readv_handler(req);
+               if (!tevent_req_is_in_progress(req)) {
+                       goto post;
+               }
+       }
+
+       ret = tstream_bsd_set_readable_handler(bsds, ev,
+                                             tstream_bsd_readv_handler,
+                                             req);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static void tstream_bsd_readv_handler(void *private_data)
+{
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tstream_bsd_readv_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_readv_state);
+       struct tstream_context *stream = state->stream;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       int ret;
+       int err;
+       int _count;
+       bool ok, retry;
+
+       ret = readv(bsds->fd, state->vector, state->count);
+       if (ret == 0) {
+               /* propagate end of file */
+               tevent_req_error(req, EPIPE);
+               return;
+       }
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
+
+       state->ret += ret;
+
+       _count = state->count; /* tstream has size_t count, readv has int */
+       ok = iov_advance(&state->vector, &_count, ret);
+       state->count = _count;
+
+       if (!ok) {
+               tevent_req_error(req, EINVAL);
+               return;
+       }
+
+       if (state->count > 0) {
+               /* we have more to read */
+               return;
+       }
+
+       tevent_req_done(req);
+}
+
+static int tstream_bsd_readv_recv(struct tevent_req *req,
+                                 int *perrno)
+{
+       struct tstream_bsd_readv_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_readv_state);
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = state->ret;
+       }
+
+       tevent_req_received(req);
+       return ret;
+}
+
+struct tstream_bsd_writev_state {
+       struct tstream_context *stream;
+
+       struct iovec *vector;
+       size_t count;
+
+       int ret;
+};
+
+static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
+{
+       struct tstream_bsd *bsds = tstream_context_data(state->stream,
+                                 struct tstream_bsd);
+
+       tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
+
+       return 0;
+}
+
+static void tstream_bsd_writev_handler(void *private_data);
+
+static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
+                                                struct tevent_context *ev,
+                                                struct tstream_context *stream,
+                                                const struct iovec *vector,
+                                                size_t count)
+{
+       struct tevent_req *req;
+       struct tstream_bsd_writev_state *state;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       int ret;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_writev_state);
+       if (!req) {
+               return NULL;
+       }
+
+       state->stream   = stream;
+       /* we make a copy of the vector so that we can modify it */
+       state->vector   = talloc_array(state, struct iovec, count);
+       if (tevent_req_nomem(state->vector, req)) {
+               goto post;
+       }
+       memcpy(state->vector, vector, sizeof(struct iovec)*count);
+       state->count    = count;
+       state->ret      = 0;
+
+       talloc_set_destructor(state, tstream_bsd_writev_destructor);
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
+
+       /*
+        * this is a fast path, not waiting for the
+        * socket to become explicit writeable gains
+        * about 10%-20% performance in benchmark tests.
+        */
+       tstream_bsd_writev_handler(req);
+       if (!tevent_req_is_in_progress(req)) {
+               goto post;
+       }
+
+       ret = tstream_bsd_set_writeable_handler(bsds, ev,
+                                              tstream_bsd_writev_handler,
+                                              req);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static void tstream_bsd_writev_handler(void *private_data)
+{
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tstream_bsd_writev_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_writev_state);
+       struct tstream_context *stream = state->stream;
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       ssize_t ret;
+       int err;
+       int _count;
+       bool ok, retry;
+
+       ret = writev(bsds->fd, state->vector, state->count);
+       if (ret == 0) {
+               /* propagate end of file */
+               tevent_req_error(req, EPIPE);
+               return;
+       }
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
+
+       state->ret += ret;
+
+       _count = state->count; /* tstream has size_t count, writev has int */
+       ok = iov_advance(&state->vector, &_count, ret);
+       state->count = _count;
+
+       if (!ok) {
+               tevent_req_error(req, EINVAL);
+               return;
+       }
+
+       if (state->count > 0) {
+               /* we have more to read */
+               return;
+       }
+
+       tevent_req_done(req);
+}
+
+static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
+{
+       struct tstream_bsd_writev_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_writev_state);
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = state->ret;
+       }
+
+       tevent_req_received(req);
+       return ret;
+}
+
+struct tstream_bsd_disconnect_state {
+       void *__dummy;
+};
+
+static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
+                                                    struct tevent_context *ev,
+                                                    struct tstream_context *stream)
+{
+       struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
+       struct tevent_req *req;
+       struct tstream_bsd_disconnect_state *state;
+       int ret;
+       int err;
+       bool dummy;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_disconnect_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       if (bsds->fd == -1) {
+               tevent_req_error(req, ENOTCONN);
+               goto post;
+       }
+
+       TALLOC_FREE(bsds->fde);
+       ret = close(bsds->fd);
+       bsds->fd = -1;
+       err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
+       if (tevent_req_error(req, err)) {
+               goto post;
+       }
+
+       tevent_req_done(req);
+post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static int tstream_bsd_disconnect_recv(struct tevent_req *req,
+                                     int *perrno)
+{
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+
+       tevent_req_received(req);
+       return ret;
+}
+
+static const struct tstream_context_ops tstream_bsd_ops = {
+       .name                   = "bsd",
+
+       .pending_bytes          = tstream_bsd_pending_bytes,
+
+       .readv_send             = tstream_bsd_readv_send,
+       .readv_recv             = tstream_bsd_readv_recv,
+
+       .writev_send            = tstream_bsd_writev_send,
+       .writev_recv            = tstream_bsd_writev_recv,
+
+       .disconnect_send        = tstream_bsd_disconnect_send,
+       .disconnect_recv        = tstream_bsd_disconnect_recv,
+};
+
+static int tstream_bsd_destructor(struct tstream_bsd *bsds)
+{
+       TALLOC_FREE(bsds->fde);
+       if (bsds->fd != -1) {
+               close(bsds->fd);
+               bsds->fd = -1;
+       }
+       return 0;
+}
+
+int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
+                                int fd,
+                                struct tstream_context **_stream,
+                                const char *location)
+{
+       struct tstream_context *stream;
+       struct tstream_bsd *bsds;
+
+       stream = tstream_context_create(mem_ctx,
+                                       &tstream_bsd_ops,
+                                       &bsds,
+                                       struct tstream_bsd,
+                                       location);
+       if (!stream) {
+               return -1;
+       }
+       ZERO_STRUCTP(bsds);
+       bsds->fd = fd;
+       talloc_set_destructor(bsds, tstream_bsd_destructor);
+
+       *_stream = stream;
+       return 0;
+}
+
+struct tstream_bsd_connect_state {
+       int fd;
+       struct tevent_fd *fde;
+       struct tstream_conext *stream;
+       struct tsocket_address *local;
+};
+
+static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
+{
+       TALLOC_FREE(state->fde);
+       if (state->fd != -1) {
+               close(state->fd);
+               state->fd = -1;
+       }
+
+       return 0;
+}
+
+static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
+                                           struct tevent_fd *fde,
+                                           uint16_t flags,
+                                           void *private_data);
+
+static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       int sys_errno,
+                                       const struct tsocket_address *local,
+                                       const struct tsocket_address *remote)
+{
+       struct tevent_req *req;
+       struct tstream_bsd_connect_state *state;
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tsocket_address_bsd *lrbsda = NULL;
+       struct tsocket_address_bsd *rbsda =
+               talloc_get_type_abort(remote->private_data,
+               struct tsocket_address_bsd);
+       int ret;
+       bool do_bind = false;
+       bool do_reuseaddr = false;
+       bool do_ipv6only = false;
+       bool is_inet = false;
+       int sa_fam = lbsda->u.sa.sa_family;
+
+       req = tevent_req_create(mem_ctx, &state,
+                               struct tstream_bsd_connect_state);
+       if (!req) {
+               return NULL;
+       }
+       state->fd = -1;
+       state->fde = NULL;
+
+       talloc_set_destructor(state, tstream_bsd_connect_destructor);
+
+       /* give the wrappers a chance to report an error */
+       if (sys_errno != 0) {
+               tevent_req_error(req, sys_errno);
+               goto post;
+       }
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_UNIX:
+               if (lbsda->u.un.sun_path[0] != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               break;
+       case AF_INET:
+               if (lbsda->u.in.sin_port != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
+                       do_bind = true;
+               }
+               is_inet = true;
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               if (lbsda->u.in6.sin6_port != 0) {
+                       do_reuseaddr = true;
+                       do_bind = true;
+               }
+               if (memcmp(&in6addr_any,
+                          &lbsda->u.in6.sin6_addr,
+                          sizeof(in6addr_any)) != 0) {
+                       do_bind = true;
+               }
+               is_inet = true;
+               do_ipv6only = true;
+               break;
+#endif
+       default:
+               tevent_req_error(req, EINVAL);
+               goto post;
+       }
+
+       if (!do_bind && is_inet) {
+               sa_fam = rbsda->u.sa.sa_family;
+               switch (sa_fam) {
+               case AF_INET:
+                       do_ipv6only = false;
+                       break;
+#ifdef HAVE_IPV6
+               case AF_INET6:
+                       do_ipv6only = true;
+                       break;
+#endif
+               }
+       }
+
+       if (is_inet) {
+               state->local = tsocket_address_create(state,
+                                                     &tsocket_address_bsd_ops,
+                                                     &lrbsda,
+                                                     struct tsocket_address_bsd,
+                                                     __location__ "bsd_connect");
+               if (tevent_req_nomem(state->local, req)) {
+                       goto post;
+               }
+
+               ZERO_STRUCTP(lrbsda);
+               lrbsda->sa_socklen = sizeof(lrbsda->u.ss);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+               lrbsda->u.sa.sa_len = lrbsda->sa_socklen;
+#endif
+       }
+
+       state->fd = socket(sa_fam, SOCK_STREAM, 0);
+       if (state->fd == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
+       if (state->fd == -1) {
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+#ifdef HAVE_IPV6
+       if (do_ipv6only) {
+               int val = 1;
+
+               ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+#endif
+
+       if (do_reuseaddr) {
+               int val = 1;
+
+               ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+
+       if (do_bind) {
+               ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+
+       if (rbsda->u.sa.sa_family != sa_fam) {
+               tevent_req_error(req, EINVAL);
+               goto post;
+       }
+
+       ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
+       if (ret == -1) {
+               if (errno == EINPROGRESS) {
+                       goto async;
+               }
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       if (!state->local) {
+               tevent_req_done(req);
+               goto post;
+       }
+
+       if (lrbsda != NULL) {
+               ret = getsockname(state->fd,
+                                 &lrbsda->u.sa,
+                                 &lrbsda->sa_socklen);
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+
+       tevent_req_done(req);
+       goto post;
+
+ async:
+       state->fde = tevent_add_fd(ev, state,
+                                  state->fd,
+                                  TEVENT_FD_READ | TEVENT_FD_WRITE,
+                                  tstream_bsd_connect_fde_handler,
+                                  req);
+       if (tevent_req_nomem(state->fde, req)) {
+               goto post;
+       }
+
+       return req;
+
+ post:
+       tevent_req_post(req, ev);
+       return req;
+}
+
+static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
+                                           struct tevent_fd *fde,
+                                           uint16_t flags,
+                                           void *private_data)
+{
+       struct tevent_req *req = talloc_get_type_abort(private_data,
+                                struct tevent_req);
+       struct tstream_bsd_connect_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_connect_state);
+       struct tsocket_address_bsd *lrbsda = NULL;
+       int ret;
+       int error=0;
+       socklen_t len = sizeof(error);
+       int err;
+       bool retry;
+
+       ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
+       if (ret == 0) {
+               if (error != 0) {
+                       errno = error;
+                       ret = -1;
+               }
+       }
+       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+       if (retry) {
+               /* retry later */
+               return;
+       }
+       if (tevent_req_error(req, err)) {
+               return;
+       }
+
+       if (!state->local) {
+               tevent_req_done(req);
+               return;
+       }
+
+       lrbsda = talloc_get_type_abort(state->local->private_data,
+                                      struct tsocket_address_bsd);
+
+       ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               return;
+       }
+
+       tevent_req_done(req);
+}
+
+static int tstream_bsd_connect_recv(struct tevent_req *req,
+                                   int *perrno,
+                                   TALLOC_CTX *mem_ctx,
+                                   struct tstream_context **stream,
+                                   struct tsocket_address **local,
+                                   const char *location)
+{
+       struct tstream_bsd_connect_state *state = tevent_req_data(req,
+                                       struct tstream_bsd_connect_state);
+       int ret;
+
+       ret = tsocket_simple_int_recv(req, perrno);
+       if (ret == 0) {
+               ret = _tstream_bsd_existing_socket(mem_ctx,
+                                                  state->fd,
+                                                  stream,
+                                                  location);
+               if (ret == -1) {
+                       *perrno = errno;
+                       goto done;
+               }
+               TALLOC_FREE(state->fde);
+               state->fd = -1;
+
+               if (local) {
+                       *local = talloc_move(mem_ctx, &state->local);
+               }
+       }
+
+done:
+       tevent_req_received(req);
+       return ret;
+}
+
+struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       const struct tsocket_address *local,
+                                       const struct tsocket_address *remote)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tevent_req *req;
+       int sys_errno = 0;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_INET:
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               break;
+#endif
+       default:
+               sys_errno = EINVAL;
+               break;
+       }
+
+       req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
+
+       return req;
+}
+
+int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
+                                  int *perrno,
+                                  TALLOC_CTX *mem_ctx,
+                                  struct tstream_context **stream,
+                                  struct tsocket_address **local,
+                                  const char *location)
+{
+       return tstream_bsd_connect_recv(req, perrno,
+                                       mem_ctx, stream, local,
+                                       location);
+}
+
+struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
+                                       struct tevent_context *ev,
+                                       const struct tsocket_address *local,
+                                       const struct tsocket_address *remote)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       struct tevent_req *req;
+       int sys_errno = 0;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_UNIX:
+               break;
+       default:
+               sys_errno = EINVAL;
+               break;
+       }
+
+       req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
+
+       return req;
+}
+
+int _tstream_unix_connect_recv(struct tevent_req *req,
+                                     int *perrno,
+                                     TALLOC_CTX *mem_ctx,
+                                     struct tstream_context **stream,
+                                     const char *location)
+{
+       return tstream_bsd_connect_recv(req, perrno,
+                                       mem_ctx, stream, NULL,
+                                       location);
+}
+
+int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
+                            struct tstream_context **_stream1,
+                            TALLOC_CTX *mem_ctx2,
+                            struct tstream_context **_stream2,
+                            const char *location)
+{
+       int ret;
+       int fds[2];
+       int fd1;
+       int fd2;
+       struct tstream_context *stream1 = NULL;
+       struct tstream_context *stream2 = NULL;
+
+       ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
+       if (ret == -1) {
+               return -1;
+       }
+       fd1 = fds[0];
+       fd2 = fds[1];
+
+       fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
+       if (fd1 == -1) {
+               int sys_errno = errno;
+               close(fd2);
+               errno = sys_errno;
+               return -1;
+       }
+
+       fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
+       if (fd2 == -1) {
+               int sys_errno = errno;
+               close(fd1);
+               errno = sys_errno;
+               return -1;
+       }
+
+       ret = _tstream_bsd_existing_socket(mem_ctx1,
+                                          fd1,
+                                          &stream1,
+                                          location);
+       if (ret == -1) {
+               int sys_errno = errno;
+               close(fd1);
+               close(fd2);
+               errno = sys_errno;
+               return -1;
+       }
+
+       ret = _tstream_bsd_existing_socket(mem_ctx2,
+                                          fd2,
+                                          &stream2,
+                                          location);
+       if (ret == -1) {
+               int sys_errno = errno;
+               talloc_free(stream1);
+               close(fd2);
+               errno = sys_errno;
+               return -1;
+       }
+
+       *_stream1 = stream1;
+       *_stream2 = stream2;
+       return 0;
+}
 
-       .disconnect             = tsocket_context_bsd_disconnect
-};