#include "system/network.h"
#include "tsocket.h"
#include "tsocket_internal.h"
+#include "lib/util/iov_buf.h"
+#include "lib/util/blocking.h"
static int tsocket_bsd_error_from_errno(int ret,
int sys_errno,
return sys_errno;
}
+ /* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
+ if (sys_errno == ENOMEM) {
+ *retry = true;
+ return sys_errno;
+ }
+
#ifdef EWOULDBLOCK
if (sys_errno == EWOULDBLOCK) {
*retry = true;
int fds[3];
int num_fds = 0;
- int result, flags;
+ int result;
+ bool ok;
if (fd == -1) {
return -1;
}
}
- /* fd should be nonblocking. */
-
-#ifdef O_NONBLOCK
-#define FLAG_TO_SET O_NONBLOCK
-#else
-#ifdef SYSV
-#define FLAG_TO_SET O_NDELAY
-#else /* BSD */
-#define FLAG_TO_SET FNDELAY
-#endif
-#endif
-
- if ((flags = fcntl(fd, F_GETFL)) == -1) {
+ result = set_blocking(fd, false);
+ if (result == -1) {
goto fail;
}
- flags |= FLAG_TO_SET;
- if (fcntl(fd, F_SETFL, flags) == -1) {
+ ok = smb_set_close_on_exec(fd);
+ if (!ok) {
goto fail;
}
-#undef FLAG_TO_SET
-
- /* fd should be closed on exec() */
-#ifdef FD_CLOEXEC
- result = flags = fcntl(fd, F_GETFD, 0);
- if (flags >= 0) {
- flags |= FD_CLOEXEC;
- result = fcntl(fd, F_SETFD, flags);
- }
- if (result < 0) {
- goto fail;
- }
-#endif
return fd;
fail:
};
int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
- struct sockaddr *sa,
+ const struct sockaddr *sa,
size_t sa_socklen,
struct tsocket_address **_addr,
const char *location)
memcpy(&bsda->u.ss, sa, sa_socklen);
bsda->sa_socklen = sa_socklen;
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+ bsda->u.sa.sa_len = bsda->sa_socklen;
+#endif
*_addr = addr;
return 0;
}
memcpy(sa, &bsda->u.ss, sa_socklen);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+ sa->sa_len = sa_socklen;
+#endif
return sa_socklen;
}
return -1;
}
- snprintf(port_str, sizeof(port_str) - 1, "%u", port);
+ snprintf(port_str, sizeof(port_str), "%u", port);
ret = getaddrinfo(addr, port_str, &hints, &result);
if (ret != 0) {
void *event_ptr;
struct tevent_fd *fde;
+ bool optimize_recvfrom;
void *readable_private;
void (*readable_handler)(void *private_data);
void (*writeable_handler)(void *private_data);
};
+bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram,
+ bool on)
+{
+ struct tdgram_bsd *bsds =
+ talloc_get_type(_tdgram_context_data(dgram),
+ struct tdgram_bsd);
+ bool old;
+
+ if (bsds == NULL) {
+ /* not a bsd socket */
+ return false;
+ }
+
+ old = bsds->optimize_recvfrom;
+ bsds->optimize_recvfrom = on;
+
+ return old;
+}
+
static void tdgram_bsd_fde_handler(struct tevent_context *ev,
struct tevent_fd *fde,
uint16_t flags,
struct tdgram_bsd_recvfrom_state {
struct tdgram_context *dgram;
-
+ bool first_try;
uint8_t *buf;
size_t len;
struct tsocket_address *src;
}
state->dgram = dgram;
+ state->first_try= true;
state->buf = NULL;
state->len = 0;
state->src = NULL;
goto post;
}
+
/*
* this is a fast path, not waiting for the
* socket to become explicit readable gains
* about 10%-20% performance in benchmark tests.
*/
- tdgram_bsd_recvfrom_handler(req);
- if (!tevent_req_is_in_progress(req)) {
- goto post;
+ if (bsds->optimize_recvfrom) {
+ /*
+ * We only do the optimization on
+ * recvfrom if the caller asked for it.
+ *
+ * This is needed because in most cases
+ * we prefer to flush send buffers before
+ * receiving incoming requests.
+ */
+ tdgram_bsd_recvfrom_handler(req);
+ if (!tevent_req_is_in_progress(req)) {
+ goto post;
+ }
}
ret = tdgram_bsd_set_readable_handler(bsds, ev,
bool retry;
ret = tsocket_bsd_pending(bsds->fd);
- if (ret == 0) {
+ if (state->first_try && ret == 0) {
+ state->first_try = false;
/* retry later */
return;
}
+ state->first_try = false;
+
err = tsocket_bsd_error_from_errno(ret, errno, &retry);
if (retry) {
/* retry later */
return;
}
+ /* note that 'ret' can be 0 here */
state->buf = talloc_array(state, uint8_t, ret);
if (tevent_req_nomem(state->buf, req)) {
return;
ZERO_STRUCTP(bsda);
bsda->sa_socklen = sizeof(bsda->u.ss);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+ bsda->u.sa.sa_len = bsda->sa_socklen;
+#endif
ret = recvfrom(bsds->fd, state->buf, state->len, 0,
&bsda->u.sa, &bsda->sa_socklen);
/* retry later */
return;
}
+
+ if (err == EMSGSIZE) {
+ /* round up in 1K increments */
+ int bufsize = ((state->len + 1023) & (~1023));
+
+ ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ if (ret == 0) {
+ /*
+ * We do the retry here, rather then via the
+ * handler, as we only want to retry once for
+ * this condition, so if there is a mismatch
+ * between what setsockopt() accepts and what can
+ * actually be sent, we do not end up in a
+ * loop.
+ */
+
+ ret = sendto(bsds->fd, state->buf, state->len,
+ 0, sa, sa_socklen);
+ err = tsocket_bsd_error_from_errno(ret, errno, &retry);
+ if (retry) { /* retry later */
+ return;
+ }
+ }
+ }
+
if (tevent_req_error(req, err)) {
return;
}
return 0;
}
+int _tdgram_bsd_existing_socket(TALLOC_CTX *mem_ctx,
+ int fd,
+ struct tdgram_context **_dgram,
+ const char *location)
+{
+ struct tdgram_context *dgram;
+ struct tdgram_bsd *bsds;
+
+ dgram = tdgram_context_create(mem_ctx,
+ &tdgram_bsd_ops,
+ &bsds,
+ struct tdgram_bsd,
+ location);
+ if (!dgram) {
+ return -1;
+ }
+ ZERO_STRUCTP(bsds);
+ bsds->fd = fd;
+ talloc_set_destructor(bsds, tdgram_bsd_destructor);
+
+ *_dgram = dgram;
+ return 0;
+}
+
int _tdgram_inet_udp_socket(const struct tsocket_address *local,
const struct tsocket_address *remote,
TALLOC_CTX *mem_ctx,
return ret;
}
+int _tdgram_inet_udp_broadcast_socket(const struct tsocket_address *local,
+ TALLOC_CTX *mem_ctx,
+ struct tdgram_context **dgram,
+ const char *location)
+{
+ struct tsocket_address_bsd *lbsda =
+ talloc_get_type_abort(local->private_data,
+ struct tsocket_address_bsd);
+ int ret;
+
+ switch (lbsda->u.sa.sa_family) {
+ case AF_INET:
+ break;
+#ifdef HAVE_IPV6
+ case AF_INET6:
+ /* only ipv4 */
+ errno = EINVAL;
+ return -1;
+#endif
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+
+ ret = tdgram_bsd_dgram_socket(local, NULL, true,
+ mem_ctx, dgram, location);
+
+ return ret;
+}
+
int _tdgram_unix_socket(const struct tsocket_address *local,
const struct tsocket_address *remote,
TALLOC_CTX *mem_ctx,
void *event_ptr;
struct tevent_fd *fde;
+ bool optimize_readv;
void *readable_private;
void (*readable_handler)(void *private_data);
void (*writeable_handler)(void *private_data);
};
+bool tstream_bsd_optimize_readv(struct tstream_context *stream,
+ bool on)
+{
+ struct tstream_bsd *bsds =
+ talloc_get_type(_tstream_context_data(stream),
+ struct tstream_bsd);
+ bool old;
+
+ if (bsds == NULL) {
+ /* not a bsd socket */
+ return false;
+ }
+
+ old = bsds->optimize_readv;
+ bsds->optimize_readv = on;
+
+ return old;
+}
+
static void tstream_bsd_fde_handler(struct tevent_context *ev,
struct tevent_fd *fde,
uint16_t flags,
* socket to become explicit readable gains
* about 10%-20% performance in benchmark tests.
*/
- tstream_bsd_readv_handler(req);
- if (!tevent_req_is_in_progress(req)) {
- goto post;
+ if (bsds->optimize_readv) {
+ /*
+ * We only do the optimization on
+ * readv if the caller asked for it.
+ *
+ * This is needed because in most cases
+ * we prefer to flush send buffers before
+ * receiving incoming requests.
+ */
+ tstream_bsd_readv_handler(req);
+ if (!tevent_req_is_in_progress(req)) {
+ goto post;
+ }
}
ret = tstream_bsd_set_readable_handler(bsds, ev,
struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
int ret;
int err;
- bool retry;
+ int _count;
+ bool ok, retry;
ret = readv(bsds->fd, state->vector, state->count);
if (ret == 0) {
state->ret += ret;
- while (ret > 0) {
- if (ret < state->vector[0].iov_len) {
- uint8_t *base;
- base = (uint8_t *)state->vector[0].iov_base;
- base += ret;
- state->vector[0].iov_base = (void *)base;
- state->vector[0].iov_len -= ret;
- break;
- }
- ret -= state->vector[0].iov_len;
- state->vector += 1;
- state->count -= 1;
- }
+ _count = state->count; /* tstream has size_t count, readv has int */
+ ok = iov_advance(&state->vector, &_count, ret);
+ state->count = _count;
- /*
- * there're maybe some empty vectors at the end
- * which we need to skip, otherwise we would get
- * ret == 0 from the readv() call and return EPIPE
- */
- while (state->count > 0) {
- if (state->vector[0].iov_len > 0) {
- break;
- }
- state->vector += 1;
- state->count -= 1;
+ if (!ok) {
+ tevent_req_error(req, EINVAL);
+ return;
}
if (state->count > 0) {
struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
ssize_t ret;
int err;
- bool retry;
+ int _count;
+ bool ok, retry;
ret = writev(bsds->fd, state->vector, state->count);
if (ret == 0) {
state->ret += ret;
- while (ret > 0) {
- if (ret < state->vector[0].iov_len) {
- uint8_t *base;
- base = (uint8_t *)state->vector[0].iov_base;
- base += ret;
- state->vector[0].iov_base = (void *)base;
- state->vector[0].iov_len -= ret;
- break;
- }
- ret -= state->vector[0].iov_len;
- state->vector += 1;
- state->count -= 1;
- }
+ _count = state->count; /* tstream has size_t count, writev has int */
+ ok = iov_advance(&state->vector, &_count, ret);
+ state->count = _count;
- /*
- * there're maybe some empty vectors at the end
- * which we need to skip, otherwise we would get
- * ret == 0 from the writev() call and return EPIPE
- */
- while (state->count > 0) {
- if (state->vector[0].iov_len > 0) {
- break;
- }
- state->vector += 1;
- state->count -= 1;
+ if (!ok) {
+ tevent_req_error(req, EINVAL);
+ return;
}
if (state->count > 0) {
int fd;
struct tevent_fd *fde;
struct tstream_conext *stream;
+ struct tsocket_address *local;
};
static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
uint16_t flags,
void *private_data);
-static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
+static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
int sys_errno,
const struct tsocket_address *local,
struct tsocket_address_bsd *lbsda =
talloc_get_type_abort(local->private_data,
struct tsocket_address_bsd);
+ struct tsocket_address_bsd *lrbsda = NULL;
struct tsocket_address_bsd *rbsda =
talloc_get_type_abort(remote->private_data,
struct tsocket_address_bsd);
int ret;
- int err;
- bool retry;
bool do_bind = false;
bool do_reuseaddr = false;
bool do_ipv6only = false;
}
}
+ if (is_inet) {
+ state->local = tsocket_address_create(state,
+ &tsocket_address_bsd_ops,
+ &lrbsda,
+ struct tsocket_address_bsd,
+ __location__ "bsd_connect");
+ if (tevent_req_nomem(state->local, req)) {
+ goto post;
+ }
+
+ ZERO_STRUCTP(lrbsda);
+ lrbsda->sa_socklen = sizeof(lrbsda->u.ss);
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+ lrbsda->u.sa.sa_len = lrbsda->sa_socklen;
+#endif
+ }
+
state->fd = socket(sa_fam, SOCK_STREAM, 0);
if (state->fd == -1) {
tevent_req_error(req, errno);
}
ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
- err = tsocket_bsd_error_from_errno(ret, errno, &retry);
- if (retry) {
- /* retry later */
- goto async;
+ if (ret == -1) {
+ if (errno == EINPROGRESS) {
+ goto async;
+ }
+ tevent_req_error(req, errno);
+ goto post;
}
- if (tevent_req_error(req, err)) {
+
+ if (!state->local) {
+ tevent_req_done(req);
+ goto post;
+ }
+
+ ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
+ if (ret == -1) {
+ tevent_req_error(req, errno);
goto post;
}
struct tevent_req);
struct tstream_bsd_connect_state *state = tevent_req_data(req,
struct tstream_bsd_connect_state);
+ struct tsocket_address_bsd *lrbsda = NULL;
int ret;
int error=0;
socklen_t len = sizeof(error);
return;
}
+ if (!state->local) {
+ tevent_req_done(req);
+ return;
+ }
+
+ lrbsda = talloc_get_type_abort(state->local->private_data,
+ struct tsocket_address_bsd);
+
+ ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
+ if (ret == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+
tevent_req_done(req);
}
int *perrno,
TALLOC_CTX *mem_ctx,
struct tstream_context **stream,
+ struct tsocket_address **local,
const char *location)
{
struct tstream_bsd_connect_state *state = tevent_req_data(req,
}
TALLOC_FREE(state->fde);
state->fd = -1;
+
+ if (local) {
+ *local = talloc_move(mem_ctx, &state->local);
+ }
}
done:
int *perrno,
TALLOC_CTX *mem_ctx,
struct tstream_context **stream,
+ struct tsocket_address **local,
const char *location)
{
- return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
+ return tstream_bsd_connect_recv(req, perrno,
+ mem_ctx, stream, local,
+ location);
}
struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
struct tstream_context **stream,
const char *location)
{
- return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
+ return tstream_bsd_connect_recv(req, perrno,
+ mem_ctx, stream, NULL,
+ location);
}
int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,