tsocket: Do not dereference a NULL pointer
[sfrench/samba-autobuild/.git] / lib / tsocket / tsocket_bsd.c
index 78bca4b0b568d547785a6af55fa53adf34b6ba44..708d17edc321094b9423e082cca8a101078a2757 100644 (file)
@@ -3,7 +3,7 @@
 
    Copyright (C) Stefan Metzmacher 2009
 
-     ** NOTE! The following LGPL license applies to the tevent
+     ** NOTE! The following LGPL license applies to the tsocket
      ** library. This does NOT imply that all of Samba is released
      ** under the LGPL
 
@@ -26,6 +26,8 @@
 #include "system/network.h"
 #include "tsocket.h"
 #include "tsocket_internal.h"
+#include "lib/util/iov_buf.h"
+#include "lib/util/blocking.h"
 
 static int tsocket_bsd_error_from_errno(int ret,
                                        int sys_errno,
@@ -60,6 +62,12 @@ static int tsocket_bsd_error_from_errno(int ret,
                return sys_errno;
        }
 
+       /* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
+       if (sys_errno == ENOMEM) {
+               *retry = true;
+               return sys_errno;
+       }
+
 #ifdef EWOULDBLOCK
        if (sys_errno == EWOULDBLOCK) {
                *retry = true;
@@ -77,7 +85,8 @@ static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
        int fds[3];
        int num_fds = 0;
 
-       int result, flags;
+       int result;
+       bool ok;
 
        if (fd == -1) {
                return -1;
@@ -102,40 +111,16 @@ static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
                }
        }
 
-       /* fd should be nonblocking. */
-
-#ifdef O_NONBLOCK
-#define FLAG_TO_SET O_NONBLOCK
-#else
-#ifdef SYSV
-#define FLAG_TO_SET O_NDELAY
-#else /* BSD */
-#define FLAG_TO_SET FNDELAY
-#endif
-#endif
-
-       if ((flags = fcntl(fd, F_GETFL)) == -1) {
+       result = set_blocking(fd, false);
+       if (result == -1) {
                goto fail;
        }
 
-       flags |= FLAG_TO_SET;
-       if (fcntl(fd, F_SETFL, flags) == -1) {
+       ok = smb_set_close_on_exec(fd);
+       if (!ok) {
                goto fail;
        }
 
-#undef FLAG_TO_SET
-
-       /* fd should be closed on exec() */
-#ifdef FD_CLOEXEC
-       result = flags = fcntl(fd, F_GETFD, 0);
-       if (flags >= 0) {
-               flags |= FD_CLOEXEC;
-               result = fcntl(fd, F_SETFD, flags);
-       }
-       if (result < 0) {
-               goto fail;
-       }
-#endif
        return fd;
 
  fail:
@@ -147,6 +132,43 @@ static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
        return -1;
 }
 
+#ifdef HAVE_LINUX_RTNETLINK_H
+/**
+ * Get the amount of pending bytes from a netlink socket
+ *
+ * For some reason netlink sockets don't support querying the amount of pending
+ * data via ioctl with FIONREAD, which is what we use in tsocket_bsd_pending()
+ * below.
+ *
+ * We know we are on Linux as we're using netlink, which means we have a working
+ * MSG_TRUNC flag to recvmsg() as well, so we use that together with MSG_PEEK.
+ **/
+static ssize_t tsocket_bsd_netlink_pending(int fd)
+{
+       struct iovec iov;
+       struct msghdr msg;
+       char buf[1];
+
+       iov = (struct iovec) {
+               .iov_base = buf,
+               .iov_len = sizeof(buf)
+       };
+
+       msg = (struct msghdr) {
+               .msg_iov = &iov,
+               .msg_iovlen = 1
+       };
+
+       return recvmsg(fd, &msg, MSG_PEEK | MSG_TRUNC);
+}
+#else
+static ssize_t tsocket_bsd_netlink_pending(int fd)
+{
+       errno = ENOSYS;
+       return -1;
+}
+#endif
+
 static ssize_t tsocket_bsd_pending(int fd)
 {
        int ret, error;
@@ -190,6 +212,7 @@ static ssize_t tsocket_bsd_pending(int fd)
 static const struct tsocket_address_ops tsocket_address_bsd_ops;
 
 struct tsocket_address_bsd {
+       socklen_t sa_socklen;
        union {
                struct sockaddr sa;
                struct sockaddr_in in;
@@ -201,34 +224,40 @@ struct tsocket_address_bsd {
        } u;
 };
 
-static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
-                                             struct sockaddr *sa,
-                                             socklen_t sa_len,
-                                             struct tsocket_address **_addr,
-                                             const char *location)
+int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
+                                      const struct sockaddr *sa,
+                                      size_t sa_socklen,
+                                      struct tsocket_address **_addr,
+                                      const char *location)
 {
        struct tsocket_address *addr;
        struct tsocket_address_bsd *bsda;
 
+       if (sa_socklen < sizeof(sa->sa_family)) {
+               errno = EINVAL;
+               return -1;
+       }
+
        switch (sa->sa_family) {
        case AF_UNIX:
-               if (sa_len < sizeof(struct sockaddr_un)) {
-                       errno = EINVAL;
-                       return -1;
+               if (sa_socklen > sizeof(struct sockaddr_un)) {
+                       sa_socklen = sizeof(struct sockaddr_un);
                }
                break;
        case AF_INET:
-               if (sa_len < sizeof(struct sockaddr_in)) {
+               if (sa_socklen < sizeof(struct sockaddr_in)) {
                        errno = EINVAL;
                        return -1;
                }
+               sa_socklen = sizeof(struct sockaddr_in);
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
-               if (sa_len < sizeof(struct sockaddr_in6)) {
+               if (sa_socklen < sizeof(struct sockaddr_in6)) {
                        errno = EINVAL;
                        return -1;
                }
+               sa_socklen = sizeof(struct sockaddr_in6);
                break;
 #endif
        default:
@@ -236,7 +265,7 @@ static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
                return -1;
        }
 
-       if (sa_len > sizeof(struct sockaddr_storage)) {
+       if (sa_socklen > sizeof(struct sockaddr_storage)) {
                errno = EINVAL;
                return -1;
        }
@@ -253,12 +282,83 @@ static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
 
        ZERO_STRUCTP(bsda);
 
-       memcpy(&bsda->u.ss, sa, sa_len);
+       memcpy(&bsda->u.ss, sa, sa_socklen);
+
+       bsda->sa_socklen = sa_socklen;
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+       bsda->u.sa.sa_len = bsda->sa_socklen;
+#endif
 
        *_addr = addr;
        return 0;
 }
 
+ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
+                                    struct sockaddr *sa,
+                                    size_t sa_socklen)
+{
+       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
+                                          struct tsocket_address_bsd);
+
+       if (!bsda) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (sa_socklen < bsda->sa_socklen) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (sa_socklen > bsda->sa_socklen) {
+               memset(sa, 0, sa_socklen);
+               sa_socklen = bsda->sa_socklen;
+       }
+
+       memcpy(sa, &bsda->u.ss, sa_socklen);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+       sa->sa_len = sa_socklen;
+#endif
+       return sa_socklen;
+}
+
+bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam)
+{
+       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
+                                          struct tsocket_address_bsd);
+
+       if (!bsda) {
+               return false;
+       }
+
+       switch (bsda->u.sa.sa_family) {
+       case AF_INET:
+               if (strcasecmp(fam, "ip") == 0) {
+                       return true;
+               }
+
+               if (strcasecmp(fam, "ipv4") == 0) {
+                       return true;
+               }
+
+               return false;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               if (strcasecmp(fam, "ip") == 0) {
+                       return true;
+               }
+
+               if (strcasecmp(fam, "ipv6") == 0) {
+                       return true;
+               }
+
+               return false;
+#endif
+       }
+
+       return false;
+}
+
 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
                                       const char *fam,
                                       const char *addr,
@@ -305,7 +405,7 @@ int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
                return -1;
        }
 
-       snprintf(port_str, sizeof(port_str) - 1, "%u", port);
+       snprintf(port_str, sizeof(port_str), "%u", port);
 
        ret = getaddrinfo(addr, port_str, &hints, &result);
        if (ret != 0) {
@@ -431,6 +531,23 @@ int tsocket_address_inet_set_port(struct tsocket_address *addr,
        return 0;
 }
 
+bool tsocket_address_is_unix(const struct tsocket_address *addr)
+{
+       struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
+                                          struct tsocket_address_bsd);
+
+       if (!bsda) {
+               return false;
+       }
+
+       switch (bsda->u.sa.sa_family) {
+       case AF_UNIX:
+               return true;
+       }
+
+       return false;
+}
+
 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
                                    const char *path,
                                    struct tsocket_address **_addr,
@@ -444,9 +561,14 @@ int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
                path = "";
        }
 
+       if (strlen(path) > sizeof(un.sun_path)-1) {
+               errno = ENAMETOOLONG;
+               return -1;
+       }
+
        ZERO_STRUCT(un);
        un.sun_family = AF_UNIX;
-       strncpy(un.sun_path, path, sizeof(un.sun_path));
+       strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
 
        ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
                                                 (struct sockaddr *)p,
@@ -533,7 +655,7 @@ static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_add
 
        ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
                                                 &bsda->u.sa,
-                                                sizeof(bsda->u.ss),
+                                                bsda->sa_socklen,
                                                 &copy,
                                                 location);
        if (ret != 0) {
@@ -554,6 +676,8 @@ struct tdgram_bsd {
 
        void *event_ptr;
        struct tevent_fd *fde;
+       bool optimize_recvfrom;
+       bool netlink;
 
        void *readable_private;
        void (*readable_handler)(void *private_data);
@@ -561,6 +685,25 @@ struct tdgram_bsd {
        void (*writeable_handler)(void *private_data);
 };
 
+bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram,
+                                 bool on)
+{
+       struct tdgram_bsd *bsds =
+               talloc_get_type(_tdgram_context_data(dgram),
+               struct tdgram_bsd);
+       bool old;
+
+       if (bsds == NULL) {
+               /* not a bsd socket */
+               return false;
+       }
+
+       old = bsds->optimize_recvfrom;
+       bsds->optimize_recvfrom = on;
+
+       return old;
+}
+
 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
                                   struct tevent_fd *fde,
                                   uint16_t flags,
@@ -692,7 +835,7 @@ static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
 
 struct tdgram_bsd_recvfrom_state {
        struct tdgram_context *dgram;
-
+       bool first_try;
        uint8_t *buf;
        size_t len;
        struct tsocket_address *src;
@@ -726,6 +869,7 @@ static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
        }
 
        state->dgram    = dgram;
+       state->first_try= true;
        state->buf      = NULL;
        state->len      = 0;
        state->src      = NULL;
@@ -737,14 +881,25 @@ static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
                goto post;
        }
 
+
        /*
         * this is a fast path, not waiting for the
         * socket to become explicit readable gains
         * about 10%-20% performance in benchmark tests.
         */
-       tdgram_bsd_recvfrom_handler(req);
-       if (!tevent_req_is_in_progress(req)) {
-               goto post;
+       if (bsds->optimize_recvfrom) {
+               /*
+                * We only do the optimization on
+                * recvfrom if the caller asked for it.
+                *
+                * This is needed because in most cases
+                * we prefer to flush send buffers before
+                * receiving incoming requests.
+                */
+               tdgram_bsd_recvfrom_handler(req);
+               if (!tevent_req_is_in_progress(req)) {
+                       goto post;
+               }
        }
 
        ret = tdgram_bsd_set_readable_handler(bsds, ev,
@@ -772,16 +927,22 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
        struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
        struct tsocket_address_bsd *bsda;
        ssize_t ret;
-       struct sockaddr *sa = NULL;
-       socklen_t sa_len = 0;
        int err;
        bool retry;
 
-       ret = tsocket_bsd_pending(bsds->fd);
-       if (ret == 0) {
+       if (bsds->netlink) {
+               ret = tsocket_bsd_netlink_pending(bsds->fd);
+       } else {
+               ret = tsocket_bsd_pending(bsds->fd);
+       }
+
+       if (state->first_try && ret == 0) {
+               state->first_try = false;
                /* retry later */
                return;
        }
+       state->first_try = false;
+
        err = tsocket_bsd_error_from_errno(ret, errno, &retry);
        if (retry) {
                /* retry later */
@@ -791,6 +952,7 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
                return;
        }
 
+       /* note that 'ret' can be 0 here */
        state->buf = talloc_array(state, uint8_t, ret);
        if (tevent_req_nomem(state->buf, req)) {
                return;
@@ -807,18 +969,13 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
        }
 
        ZERO_STRUCTP(bsda);
+       bsda->sa_socklen = sizeof(bsda->u.ss);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+       bsda->u.sa.sa_len = bsda->sa_socklen;
+#endif
 
-       sa = &bsda->u.sa;
-       sa_len = sizeof(bsda->u.ss);
-       /*
-        * for unix sockets we can't use the size of sockaddr_storage
-        * we would get EINVAL
-        */
-       if (bsda->u.sa.sa_family == AF_UNIX) {
-               sa_len = sizeof(bsda->u.un);
-       }
-
-       ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_len);
+       ret = recvfrom(bsds->fd, state->buf, state->len, 0,
+                      &bsda->u.sa, &bsda->sa_socklen);
        err = tsocket_bsd_error_from_errno(ret, errno, &retry);
        if (retry) {
                /* retry later */
@@ -828,10 +985,17 @@ static void tdgram_bsd_recvfrom_handler(void *private_data)
                return;
        }
 
-       if (ret != state->len) {
-               tevent_req_error(req, EIO);
+       /*
+        * Some systems (FreeBSD, see bug #7115) return too much
+        * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
+        * the return value includes some IP/UDP header bytes,
+        * while recvfrom() just returns the payload.
+        */
+       state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
+       if (tevent_req_nomem(state->buf, req)) {
                return;
        }
+       state->len = ret;
 
        tevent_req_done(req);
 }
@@ -946,7 +1110,7 @@ static void tdgram_bsd_sendto_handler(void *private_data)
        struct tdgram_context *dgram = state->dgram;
        struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
        struct sockaddr *sa = NULL;
-       socklen_t sa_len = 0;
+       socklen_t sa_socklen = 0;
        ssize_t ret;
        int err;
        bool retry;
@@ -957,22 +1121,41 @@ static void tdgram_bsd_sendto_handler(void *private_data)
                        struct tsocket_address_bsd);
 
                sa = &bsda->u.sa;
-               sa_len = sizeof(bsda->u.ss);
-               /*
-                * for unix sockets we can't use the size of sockaddr_storage
-                * we would get EINVAL
-                */
-               if (bsda->u.sa.sa_family == AF_UNIX) {
-                       sa_len = sizeof(bsda->u.un);
-               }
+               sa_socklen = bsda->sa_socklen;
        }
 
-       ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_len);
+       ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
        err = tsocket_bsd_error_from_errno(ret, errno, &retry);
        if (retry) {
                /* retry later */
                return;
        }
+
+       if (err == EMSGSIZE) {
+               /* round up in 1K increments */
+               int bufsize = ((state->len + 1023) & (~1023));
+
+               ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize,
+                                sizeof(bufsize));
+               if (ret == 0) {
+                       /*
+                        * We do the retry here, rather then via the
+                        * handler, as we only want to retry once for
+                        * this condition, so if there is a mismatch
+                        * between what setsockopt() accepts and what can
+                        * actually be sent, we do not end up in a
+                        * loop.
+                        */
+
+                       ret = sendto(bsds->fd, state->buf, state->len,
+                                    0, sa, sa_socklen);
+                       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+                       if (retry) { /* retry later */
+                               return;
+                       }
+               }
+       }
+
        if (tevent_req_error(req, err)) {
                return;
        }
@@ -1023,6 +1206,7 @@ static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
                goto post;
        }
 
+       TALLOC_FREE(bsds->fde);
        ret = close(bsds->fd);
        bsds->fd = -1;
        err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
@@ -1087,7 +1271,9 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        int ret;
        bool do_bind = false;
        bool do_reuseaddr = false;
-       socklen_t sa_len = sizeof(lbsda->u.ss);
+       bool do_ipv6only = false;
+       bool is_inet = false;
+       int sa_fam = lbsda->u.sa.sa_family;
 
        if (remote) {
                rbsda = talloc_get_type_abort(remote->private_data,
@@ -1104,20 +1290,16 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                        do_reuseaddr = true;
                        do_bind = true;
                }
-               /*
-                * for unix sockets we can't use the size of sockaddr_storage
-                * we would get EINVAL
-                */
-               sa_len = sizeof(lbsda->u.un);
                break;
        case AF_INET:
                if (lbsda->u.in.sin_port != 0) {
                        do_reuseaddr = true;
                        do_bind = true;
                }
-               if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
+               if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
                        do_bind = true;
                }
+               is_inet = true;
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
@@ -1130,6 +1312,8 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                           sizeof(in6addr_any)) != 0) {
                        do_bind = true;
                }
+               is_inet = true;
+               do_ipv6only = true;
                break;
 #endif
        default:
@@ -1137,14 +1321,28 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                return -1;
        }
 
-       fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
+       if (!do_bind && is_inet && rbsda) {
+               sa_fam = rbsda->u.sa.sa_family;
+               switch (sa_fam) {
+               case AF_INET:
+                       do_ipv6only = false;
+                       break;
+#ifdef HAVE_IPV6
+               case AF_INET6:
+                       do_ipv6only = true;
+                       break;
+#endif
+               }
+       }
+
+       fd = socket(sa_fam, SOCK_DGRAM, 0);
        if (fd < 0) {
-               return fd;
+               return -1;
        }
 
        fd = tsocket_bsd_common_prepare_fd(fd, true);
        if (fd < 0) {
-               return fd;
+               return -1;
        }
 
        dgram = tdgram_context_create(mem_ctx,
@@ -1162,6 +1360,21 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        bsds->fd = fd;
        talloc_set_destructor(bsds, tdgram_bsd_destructor);
 
+#ifdef HAVE_IPV6
+       if (do_ipv6only) {
+               int val = 1;
+
+               ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       talloc_free(dgram);
+                       errno = saved_errno;
+                       return -1;
+               }
+       }
+#endif
+
        if (broadcast) {
                int val = 1;
 
@@ -1171,7 +1384,7 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                        int saved_errno = errno;
                        talloc_free(dgram);
                        errno = saved_errno;
-                       return ret;
+                       return -1;
                }
        }
 
@@ -1184,27 +1397,33 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
                        int saved_errno = errno;
                        talloc_free(dgram);
                        errno = saved_errno;
-                       return ret;
+                       return -1;
                }
        }
 
        if (do_bind) {
-               ret = bind(fd, &lbsda->u.sa, sa_len);
+               ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
                if (ret == -1) {
                        int saved_errno = errno;
                        talloc_free(dgram);
                        errno = saved_errno;
-                       return ret;
+                       return -1;
                }
        }
 
        if (rbsda) {
-               ret = connect(fd, &rbsda->u.sa, sa_len);
+               if (rbsda->u.sa.sa_family != sa_fam) {
+                       talloc_free(dgram);
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
                if (ret == -1) {
                        int saved_errno = errno;
                        talloc_free(dgram);
                        errno = saved_errno;
-                       return ret;
+                       return -1;
                }
        }
 
@@ -1212,6 +1431,47 @@ static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
        return 0;
 }
 
+int _tdgram_bsd_existing_socket(TALLOC_CTX *mem_ctx,
+                               int fd,
+                               struct tdgram_context **_dgram,
+                               const char *location)
+{
+       struct tdgram_context *dgram;
+       struct tdgram_bsd *bsds;
+#ifdef HAVE_LINUX_RTNETLINK_H
+       int result;
+       struct sockaddr sa;
+       socklen_t sa_len = sizeof(struct sockaddr);
+#endif
+
+       dgram = tdgram_context_create(mem_ctx,
+                                     &tdgram_bsd_ops,
+                                     &bsds,
+                                     struct tdgram_bsd,
+                                     location);
+       if (!dgram) {
+               return -1;
+       }
+       ZERO_STRUCTP(bsds);
+       bsds->fd = fd;
+       talloc_set_destructor(bsds, tdgram_bsd_destructor);
+
+       *_dgram = dgram;
+
+#ifdef HAVE_LINUX_RTNETLINK_H
+       /*
+        * Try to determine the protocol family and remember if it's
+        * AF_NETLINK. We don't care if this fails.
+        */
+       result = getsockname(fd, &sa, &sa_len);
+       if (result == 0 && sa.sa_family == AF_NETLINK) {
+               bsds->netlink = true;
+       }
+#endif
+
+       return 0;
+}
+
 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
                            const struct tsocket_address *remote,
                            TALLOC_CTX *mem_ctx,
@@ -1241,6 +1501,36 @@ int _tdgram_inet_udp_socket(const struct tsocket_address *local,
        return ret;
 }
 
+int _tdgram_inet_udp_broadcast_socket(const struct tsocket_address *local,
+                                     TALLOC_CTX *mem_ctx,
+                                     struct tdgram_context **dgram,
+                                     const char *location)
+{
+       struct tsocket_address_bsd *lbsda =
+               talloc_get_type_abort(local->private_data,
+               struct tsocket_address_bsd);
+       int ret;
+
+       switch (lbsda->u.sa.sa_family) {
+       case AF_INET:
+               break;
+#ifdef HAVE_IPV6
+       case AF_INET6:
+               /* only ipv4 */
+               errno = EINVAL;
+               return -1;
+#endif
+       default:
+               errno = EINVAL;
+               return -1;
+       }
+
+       ret = tdgram_bsd_dgram_socket(local, NULL, true,
+                                     mem_ctx, dgram, location);
+
+       return ret;
+}
+
 int _tdgram_unix_socket(const struct tsocket_address *local,
                        const struct tsocket_address *remote,
                        TALLOC_CTX *mem_ctx,
@@ -1271,6 +1561,7 @@ struct tstream_bsd {
 
        void *event_ptr;
        struct tevent_fd *fde;
+       bool optimize_readv;
 
        void *readable_private;
        void (*readable_handler)(void *private_data);
@@ -1278,6 +1569,25 @@ struct tstream_bsd {
        void (*writeable_handler)(void *private_data);
 };
 
+bool tstream_bsd_optimize_readv(struct tstream_context *stream,
+                               bool on)
+{
+       struct tstream_bsd *bsds =
+               talloc_get_type(_tstream_context_data(stream),
+               struct tstream_bsd);
+       bool old;
+
+       if (bsds == NULL) {
+               /* not a bsd socket */
+               return false;
+       }
+
+       old = bsds->optimize_readv;
+       bsds->optimize_readv = on;
+
+       return old;
+}
+
 static void tstream_bsd_fde_handler(struct tevent_context *ev,
                                    struct tevent_fd *fde,
                                    uint16_t flags,
@@ -1490,9 +1800,19 @@ static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
         * socket to become explicit readable gains
         * about 10%-20% performance in benchmark tests.
         */
-       tstream_bsd_readv_handler(req);
-       if (!tevent_req_is_in_progress(req)) {
-               goto post;
+       if (bsds->optimize_readv) {
+               /*
+                * We only do the optimization on
+                * readv if the caller asked for it.
+                *
+                * This is needed because in most cases
+                * we prefer to flush send buffers before
+                * receiving incoming requests.
+                */
+               tstream_bsd_readv_handler(req);
+               if (!tevent_req_is_in_progress(req)) {
+                       goto post;
+               }
        }
 
        ret = tstream_bsd_set_readable_handler(bsds, ev,
@@ -1520,7 +1840,8 @@ static void tstream_bsd_readv_handler(void *private_data)
        struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
        int ret;
        int err;
-       bool retry;
+       int _count;
+       bool ok, retry;
 
        ret = readv(bsds->fd, state->vector, state->count);
        if (ret == 0) {
@@ -1539,31 +1860,13 @@ static void tstream_bsd_readv_handler(void *private_data)
 
        state->ret += ret;
 
-       while (ret > 0) {
-               if (ret < state->vector[0].iov_len) {
-                       uint8_t *base;
-                       base = (uint8_t *)state->vector[0].iov_base;
-                       base += ret;
-                       state->vector[0].iov_base = base;
-                       state->vector[0].iov_len -= ret;
-                       break;
-               }
-               ret -= state->vector[0].iov_len;
-               state->vector += 1;
-               state->count -= 1;
-       }
+       _count = state->count; /* tstream has size_t count, readv has int */
+       ok = iov_advance(&state->vector, &_count, ret);
+       state->count = _count;
 
-       /*
-        * there're maybe some empty vectors at the end
-        * which we need to skip, otherwise we would get
-        * ret == 0 from the readv() call and return EPIPE
-        */
-       while (state->count > 0) {
-               if (state->vector[0].iov_len > 0) {
-                       break;
-               }
-               state->vector += 1;
-               state->count -= 1;
+       if (!ok) {
+               tevent_req_error(req, EINVAL);
+               return;
        }
 
        if (state->count > 0) {
@@ -1680,7 +1983,8 @@ static void tstream_bsd_writev_handler(void *private_data)
        struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
        ssize_t ret;
        int err;
-       bool retry;
+       int _count;
+       bool ok, retry;
 
        ret = writev(bsds->fd, state->vector, state->count);
        if (ret == 0) {
@@ -1699,31 +2003,13 @@ static void tstream_bsd_writev_handler(void *private_data)
 
        state->ret += ret;
 
-       while (ret > 0) {
-               if (ret < state->vector[0].iov_len) {
-                       uint8_t *base;
-                       base = (uint8_t *)state->vector[0].iov_base;
-                       base += ret;
-                       state->vector[0].iov_base = base;
-                       state->vector[0].iov_len -= ret;
-                       break;
-               }
-               ret -= state->vector[0].iov_len;
-               state->vector += 1;
-               state->count -= 1;
-       }
+       _count = state->count; /* tstream has size_t count, writev has int */
+       ok = iov_advance(&state->vector, &_count, ret);
+       state->count = _count;
 
-       /*
-        * there're maybe some empty vectors at the end
-        * which we need to skip, otherwise we would get
-        * ret == 0 from the writev() call and return EPIPE
-        */
-       while (state->count > 0) {
-               if (state->vector[0].iov_len > 0) {
-                       break;
-               }
-               state->vector += 1;
-               state->count -= 1;
+       if (!ok) {
+               tevent_req_error(req, EINVAL);
+               return;
        }
 
        if (state->count > 0) {
@@ -1775,6 +2061,7 @@ static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
                goto post;
        }
 
+       TALLOC_FREE(bsds->fde);
        ret = close(bsds->fd);
        bsds->fd = -1;
        err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
@@ -1852,6 +2139,7 @@ struct tstream_bsd_connect_state {
        int fd;
        struct tevent_fd *fde;
        struct tstream_conext *stream;
+       struct tsocket_address *local;
 };
 
 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
@@ -1870,7 +2158,7 @@ static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
                                            uint16_t flags,
                                            void *private_data);
 
-static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
+static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
                                        struct tevent_context *ev,
                                        int sys_errno,
                                        const struct tsocket_address *local,
@@ -1881,15 +2169,16 @@ static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
        struct tsocket_address_bsd *lbsda =
                talloc_get_type_abort(local->private_data,
                struct tsocket_address_bsd);
+       struct tsocket_address_bsd *lrbsda = NULL;
        struct tsocket_address_bsd *rbsda =
                talloc_get_type_abort(remote->private_data,
                struct tsocket_address_bsd);
        int ret;
-       int err;
-       bool retry;
        bool do_bind = false;
        bool do_reuseaddr = false;
-       socklen_t sa_len = sizeof(rbsda->u.ss);
+       bool do_ipv6only = false;
+       bool is_inet = false;
+       int sa_fam = lbsda->u.sa.sa_family;
 
        req = tevent_req_create(mem_ctx, &state,
                                struct tstream_bsd_connect_state);
@@ -1913,20 +2202,16 @@ static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
                        do_reuseaddr = true;
                        do_bind = true;
                }
-               /*
-                * for unix sockets we can't use the size of sockaddr_storage
-                * we would get EINVAL
-                */
-               sa_len = sizeof(rbsda->u.un);
                break;
        case AF_INET:
                if (lbsda->u.in.sin_port != 0) {
                        do_reuseaddr = true;
                        do_bind = true;
                }
-               if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
+               if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
                        do_bind = true;
                }
+               is_inet = true;
                break;
 #ifdef HAVE_IPV6
        case AF_INET6:
@@ -1939,6 +2224,8 @@ static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
                           sizeof(in6addr_any)) != 0) {
                        do_bind = true;
                }
+               is_inet = true;
+               do_ipv6only = true;
                break;
 #endif
        default:
@@ -1946,7 +2233,38 @@ static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
                goto post;
        }
 
-       state->fd = socket(lbsda->u.sa.sa_family, SOCK_STREAM, 0);
+       if (!do_bind && is_inet) {
+               sa_fam = rbsda->u.sa.sa_family;
+               switch (sa_fam) {
+               case AF_INET:
+                       do_ipv6only = false;
+                       break;
+#ifdef HAVE_IPV6
+               case AF_INET6:
+                       do_ipv6only = true;
+                       break;
+#endif
+               }
+       }
+
+       if (is_inet) {
+               state->local = tsocket_address_create(state,
+                                                     &tsocket_address_bsd_ops,
+                                                     &lrbsda,
+                                                     struct tsocket_address_bsd,
+                                                     __location__ "bsd_connect");
+               if (tevent_req_nomem(state->local, req)) {
+                       goto post;
+               }
+
+               ZERO_STRUCTP(lrbsda);
+               lrbsda->sa_socklen = sizeof(lrbsda->u.ss);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+               lrbsda->u.sa.sa_len = lrbsda->sa_socklen;
+#endif
+       }
+
+       state->fd = socket(sa_fam, SOCK_STREAM, 0);
        if (state->fd == -1) {
                tevent_req_error(req, errno);
                goto post;
@@ -1958,6 +2276,19 @@ static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
                goto post;
        }
 
+#ifdef HAVE_IPV6
+       if (do_ipv6only) {
+               int val = 1;
+
+               ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
+                                (const void *)&val, sizeof(val));
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+#endif
+
        if (do_reuseaddr) {
                int val = 1;
 
@@ -1970,23 +2301,42 @@ static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
        }
 
        if (do_bind) {
-               ret = bind(state->fd, &lbsda->u.sa, sizeof(lbsda->u.ss));
+               ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
                if (ret == -1) {
                        tevent_req_error(req, errno);
                        goto post;
                }
        }
 
-       ret = connect(state->fd, &rbsda->u.sa, sa_len);
-       err = tsocket_bsd_error_from_errno(ret, errno, &retry);
-       if (retry) {
-               /* retry later */
-               goto async;
+       if (rbsda->u.sa.sa_family != sa_fam) {
+               tevent_req_error(req, EINVAL);
+               goto post;
        }
-       if (tevent_req_error(req, err)) {
+
+       ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
+       if (ret == -1) {
+               if (errno == EINPROGRESS) {
+                       goto async;
+               }
+               tevent_req_error(req, errno);
+               goto post;
+       }
+
+       if (!state->local) {
+               tevent_req_done(req);
                goto post;
        }
 
+       if (lrbsda != NULL) {
+               ret = getsockname(state->fd,
+                                 &lrbsda->u.sa,
+                                 &lrbsda->sa_socklen);
+               if (ret == -1) {
+                       tevent_req_error(req, errno);
+                       goto post;
+               }
+       }
+
        tevent_req_done(req);
        goto post;
 
@@ -2016,6 +2366,7 @@ static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
                                 struct tevent_req);
        struct tstream_bsd_connect_state *state = tevent_req_data(req,
                                        struct tstream_bsd_connect_state);
+       struct tsocket_address_bsd *lrbsda = NULL;
        int ret;
        int error=0;
        socklen_t len = sizeof(error);
@@ -2038,6 +2389,20 @@ static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
                return;
        }
 
+       if (!state->local) {
+               tevent_req_done(req);
+               return;
+       }
+
+       lrbsda = talloc_get_type_abort(state->local->private_data,
+                                      struct tsocket_address_bsd);
+
+       ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
+       if (ret == -1) {
+               tevent_req_error(req, errno);
+               return;
+       }
+
        tevent_req_done(req);
 }
 
@@ -2045,6 +2410,7 @@ static int tstream_bsd_connect_recv(struct tevent_req *req,
                                    int *perrno,
                                    TALLOC_CTX *mem_ctx,
                                    struct tstream_context **stream,
+                                   struct tsocket_address **local,
                                    const char *location)
 {
        struct tstream_bsd_connect_state *state = tevent_req_data(req,
@@ -2063,6 +2429,10 @@ static int tstream_bsd_connect_recv(struct tevent_req *req,
                }
                TALLOC_FREE(state->fde);
                state->fd = -1;
+
+               if (local) {
+                       *local = talloc_move(mem_ctx, &state->local);
+               }
        }
 
 done:
@@ -2102,9 +2472,12 @@ int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
                                   int *perrno,
                                   TALLOC_CTX *mem_ctx,
                                   struct tstream_context **stream,
+                                  struct tsocket_address **local,
                                   const char *location)
 {
-       return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
+       return tstream_bsd_connect_recv(req, perrno,
+                                       mem_ctx, stream, local,
+                                       location);
 }
 
 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
@@ -2137,7 +2510,9 @@ int _tstream_unix_connect_recv(struct tevent_req *req,
                                      struct tstream_context **stream,
                                      const char *location)
 {
-       return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
+       return tstream_bsd_connect_recv(req, perrno,
+                                       mem_ctx, stream, NULL,
+                                       location);
 }
 
 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,