2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
26 #include "lib/iov_buf.h"
27 #include "lib/msghdr.h"
31 * This file implements two abstractions: The "unix_dgram" functions implement
32 * queueing for unix domain datagram sockets. You can send to a destination
33 * socket, and if that has no free space available, it will fall back to an
34 * anonymous socket that will poll for writability. "unix_dgram" expects the
35 * data size not to exceed the system limit.
37 * The "unix_msg" functions implement the fragmentation of large messages on
38 * top of "unix_dgram". This is what is exposed to the user of this API.
41 struct unix_dgram_msg {
42 struct unix_dgram_msg *prev, *next;
49 struct unix_dgram_send_queue {
50 struct unix_dgram_send_queue *prev, *next;
51 struct unix_dgram_ctx *ctx;
53 struct unix_dgram_msg *msgs;
57 struct unix_dgram_ctx {
60 const struct poll_funcs *ev_funcs;
63 void (*recv_callback)(struct unix_dgram_ctx *ctx,
64 uint8_t *msg, size_t msg_len,
65 int *fds, size_t num_fds,
69 struct poll_watch *sock_read_watch;
70 struct unix_dgram_send_queue *send_queues;
72 struct pthreadpool *send_pool;
73 struct poll_watch *pool_read_watch;
79 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock)
87 #define FLAG_TO_SET O_NONBLOCK
90 #define FLAG_TO_SET O_NDELAY
92 #define FLAG_TO_SET FNDELAY
96 flags = fcntl(sock, F_GETFL);
100 flags |= FLAG_TO_SET;
101 if (fcntl(sock, F_SETFL, flags) == -1) {
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock)
115 flags = fcntl(sock, F_GETFD, 0);
120 if (fcntl(sock, F_SETFD, flags) == -1) {
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock)
130 int ret = prepare_socket_nonblock(sock);
135 return prepare_socket_cloexec(sock);
138 static size_t unix_dgram_msg_size(void)
140 size_t msgsize = sizeof(struct unix_dgram_msg);
141 msgsize = (msgsize + 15) & ~15; /* align to 16 */
145 static struct msghdr_buf *unix_dgram_msghdr(struct unix_dgram_msg *msg)
148 * Not portable in C99, but "msg" is aligned and so is
149 * unix_dgram_msg_size()
151 return (struct msghdr_buf *)(((char *)msg) + unix_dgram_msg_size());
154 static void close_fd_array(int *fds, size_t num_fds)
158 for (i = 0; i < num_fds; i++) {
168 static void close_fd_array_dgram_msg(struct unix_dgram_msg *dmsg)
170 struct msghdr_buf *hdr = unix_dgram_msghdr(dmsg);
171 struct msghdr *msg = msghdr_buf_msghdr(hdr);
172 size_t num_fds = msghdr_extract_fds(msg, NULL, 0);
175 msghdr_extract_fds(msg, fds, num_fds);
177 close_fd_array(fds, num_fds);
180 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
181 const struct poll_funcs *ev_funcs,
182 void (*recv_callback)(struct unix_dgram_ctx *ctx,
183 uint8_t *msg, size_t msg_len,
184 int *fds, size_t num_fds,
187 struct unix_dgram_ctx **result)
189 struct unix_dgram_ctx *ctx;
194 pathlen = strlen(addr->sun_path)+1;
199 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
204 memcpy(ctx->path, addr->sun_path, pathlen);
209 *ctx = (struct unix_dgram_ctx) {
211 .ev_funcs = ev_funcs,
212 .recv_callback = recv_callback,
213 .private_data = private_data,
214 .created_pid = (pid_t)-1
217 ctx->recv_buf = malloc(max_msg);
218 if (ctx->recv_buf == NULL) {
223 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
224 if (ctx->sock == -1) {
229 /* Set non-blocking and close-on-exec. */
230 ret = prepare_socket(ctx->sock);
236 ret = bind(ctx->sock,
237 (const struct sockaddr *)(const void *)addr,
244 ctx->created_pid = getpid();
246 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
247 ctx->ev_funcs, ctx->sock, POLLIN,
248 unix_dgram_recv_handler, ctx);
250 if (ctx->sock_read_watch == NULL) {
267 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
270 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
275 size_t bufsize = msghdr_prep_recv_fds(NULL, NULL, 0, INT8_MAX);
276 uint8_t buf[bufsize];
278 iov = (struct iovec) {
279 .iov_base = (void *)ctx->recv_buf,
280 .iov_len = ctx->max_msg,
283 msg = (struct msghdr) {
288 msghdr_prep_recv_fds(&msg, buf, bufsize, INT8_MAX);
290 #ifdef MSG_CMSG_CLOEXEC
291 flags |= MSG_CMSG_CLOEXEC;
294 received = recvmsg(fd, &msg, flags);
295 if (received == -1) {
296 if ((errno == EAGAIN) ||
297 (errno == EWOULDBLOCK) ||
298 (errno == EINTR) || (errno == ENOMEM)) {
299 /* Not really an error - just try again. */
302 /* Problem with the socket. Set it unreadable. */
303 ctx->ev_funcs->watch_update(w, 0);
306 if (received > ctx->max_msg) {
307 /* More than we expected, not for us */
312 size_t num_fds = msghdr_extract_fds(&msg, NULL, 0);
316 msghdr_extract_fds(&msg, fds, num_fds);
318 for (i = 0; i < num_fds; i++) {
321 err = prepare_socket_cloexec(fds[i]);
323 close_fd_array(fds, num_fds);
328 ctx->recv_callback(ctx, ctx->recv_buf, received,
329 fds, num_fds, ctx->private_data);
333 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
336 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
340 if (ctx->send_pool != NULL) {
344 ret = pthreadpool_init(0, &ctx->send_pool);
349 signalfd = pthreadpool_signal_fd(ctx->send_pool);
351 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
352 ctx->ev_funcs, signalfd, POLLIN,
353 unix_dgram_job_finished, ctx);
354 if (ctx->pool_read_watch == NULL) {
355 pthreadpool_destroy(ctx->send_pool);
356 ctx->send_pool = NULL;
363 static int unix_dgram_send_queue_init(
364 struct unix_dgram_ctx *ctx, const struct sockaddr_un *dst,
365 struct unix_dgram_send_queue **result)
367 struct unix_dgram_send_queue *q;
371 pathlen = strlen(dst->sun_path)+1;
373 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
379 memcpy(q->path, dst->sun_path, pathlen);
381 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
387 err = prepare_socket_cloexec(q->sock);
393 ret = connect(q->sock,
394 (const struct sockaddr *)(const void *)dst,
396 } while ((ret == -1) && (errno == EINTR));
403 err = unix_dgram_init_pthreadpool(ctx);
408 DLIST_ADD(ctx->send_queues, q);
420 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
422 struct unix_dgram_ctx *ctx = q->ctx;
424 while (q->msgs != NULL) {
425 struct unix_dgram_msg *msg;
427 DLIST_REMOVE(q->msgs, msg);
428 close_fd_array_dgram_msg(msg);
432 DLIST_REMOVE(ctx->send_queues, q);
436 static struct unix_dgram_send_queue *find_send_queue(
437 struct unix_dgram_ctx *ctx, const char *dst_sock)
439 struct unix_dgram_send_queue *s;
441 for (s = ctx->send_queues; s != NULL; s = s->next) {
442 if (strcmp(s->path, dst_sock) == 0) {
449 static int queue_msg(struct unix_dgram_send_queue *q,
450 const struct iovec *iov, int iovcnt,
451 const int *fds, size_t num_fds)
453 struct unix_dgram_msg *msg;
454 struct msghdr_buf *hdr;
455 size_t msglen, needed;
457 int fds_copy[MIN(num_fds, INT8_MAX)];
460 for (i=0; i<num_fds; i++) {
464 for (i = 0; i < num_fds; i++) {
465 fds_copy[i] = dup(fds[i]);
466 if (fds_copy[i] == -1) {
472 msglen = unix_dgram_msg_size();
474 msghdrlen = msghdr_copy(NULL, 0, NULL, 0, iov, iovcnt,
476 if (msghdrlen == -1) {
481 needed = msglen + msghdrlen;
482 if (needed < msglen) {
487 msg = malloc(needed);
492 hdr = unix_dgram_msghdr(msg);
495 msghdr_copy(hdr, msghdrlen, NULL, 0, iov, iovcnt,
498 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
501 close_fd_array(fds_copy, num_fds);
505 static void unix_dgram_send_job(void *private_data)
507 struct unix_dgram_msg *dmsg = private_data;
510 struct msghdr_buf *hdr = unix_dgram_msghdr(dmsg);
511 struct msghdr *msg = msghdr_buf_msghdr(hdr);
512 dmsg->sent = sendmsg(dmsg->sock, msg, 0);
513 } while ((dmsg->sent == -1) && (errno == EINTR));
515 if (dmsg->sent == -1) {
516 dmsg->sys_errno = errno;
520 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
523 struct unix_dgram_ctx *ctx = private_data;
524 struct unix_dgram_send_queue *q;
525 struct unix_dgram_msg *msg;
528 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
533 for (q = ctx->send_queues; q != NULL; q = q->next) {
534 if (job == q->sock) {
540 /* Huh? Should not happen */
545 DLIST_REMOVE(q->msgs, msg);
546 close_fd_array_dgram_msg(msg);
549 if (q->msgs != NULL) {
550 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
551 unix_dgram_send_job, q->msgs);
557 unix_dgram_send_queue_free(q);
560 static int unix_dgram_send(struct unix_dgram_ctx *ctx,
561 const struct sockaddr_un *dst,
562 const struct iovec *iov, int iovlen,
563 const int *fds, size_t num_fds)
565 struct unix_dgram_send_queue *q;
571 if (num_fds > INT8_MAX) {
575 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
579 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
581 for (i = 0; i < num_fds; i++) {
583 * Make sure we only allow fd passing
584 * for communication channels,
585 * e.g. sockets, pipes, fifos, ...
587 ret = lseek(fds[i], 0, SEEK_CUR);
588 if (ret == -1 && errno == ESPIPE) {
594 * Reject the message as we may need to call dup(),
595 * if we queue the message.
597 * That might result in unexpected behavior for the caller
598 * for files and broken posix locking.
604 * To preserve message ordering, we have to queue a message when
605 * others are waiting in line already.
607 q = find_send_queue(ctx, dst->sun_path);
609 return queue_msg(q, iov, iovlen, fds, num_fds);
613 * Try a cheap nonblocking send
616 msg = (struct msghdr) {
617 .msg_name = discard_const_p(struct sockaddr_un, dst),
618 .msg_namelen = sizeof(*dst),
619 .msg_iov = discard_const_p(struct iovec, iov),
623 fdlen = msghdr_prep_fds(&msg, NULL, 0, fds, num_fds);
630 msghdr_prep_fds(&msg, buf, 0, fds, num_fds);
632 ret = sendmsg(ctx->sock, &msg, 0);
638 if ((errno != EWOULDBLOCK) &&
641 /* FreeBSD can give this for large messages */
642 (errno != ENOBUFS) &&
648 ret = unix_dgram_send_queue_init(ctx, dst, &q);
652 ret = queue_msg(q, iov, iovlen, fds, num_fds);
654 unix_dgram_send_queue_free(q);
657 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
658 unix_dgram_send_job, q->msgs);
660 unix_dgram_send_queue_free(q);
666 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
671 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
673 if (ctx->send_queues != NULL) {
677 if (ctx->send_pool != NULL) {
678 int ret = pthreadpool_destroy(ctx->send_pool);
682 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
685 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
687 if (getpid() == ctx->created_pid) {
688 /* If we created it, unlink. Otherwise someone else might
689 * still have it open */
700 * Every message starts with a uint64_t cookie.
702 * A value of 0 indicates a single-fragment message which is complete in
703 * itself. The data immediately follows the cookie.
705 * Every multi-fragment message has a cookie != 0 and starts with a cookie
706 * followed by a struct unix_msg_header and then the data. The pid and sock
707 * fields are used to assure uniqueness on the receiver side.
710 struct unix_msg_hdr {
717 struct unix_msg *prev, *next;
726 struct unix_msg_ctx {
727 struct unix_dgram_ctx *dgram;
731 void (*recv_callback)(struct unix_msg_ctx *ctx,
732 uint8_t *msg, size_t msg_len,
733 int *fds, size_t num_fds,
737 struct unix_msg *msgs;
740 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
741 uint8_t *buf, size_t buflen,
742 int *fds, size_t num_fds,
745 int unix_msg_init(const struct sockaddr_un *addr,
746 const struct poll_funcs *ev_funcs,
748 void (*recv_callback)(struct unix_msg_ctx *ctx,
749 uint8_t *msg, size_t msg_len,
750 int *fds, size_t num_fds,
753 struct unix_msg_ctx **result)
755 struct unix_msg_ctx *ctx;
758 ctx = malloc(sizeof(*ctx));
763 *ctx = (struct unix_msg_ctx) {
764 .fragment_len = fragment_len,
766 .recv_callback = recv_callback,
767 .private_data = private_data
770 ret = unix_dgram_init(addr, fragment_len, ev_funcs,
771 unix_msg_recv, ctx, &ctx->dgram);
781 int unix_msg_send(struct unix_msg_ctx *ctx, const struct sockaddr_un *dst,
782 const struct iovec *iov, int iovlen,
783 const int *fds, size_t num_fds)
788 struct iovec iov_copy[iovlen+2];
789 struct unix_msg_hdr hdr;
790 struct iovec src_iov;
796 msglen = iov_buflen(iov, iovlen);
801 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
805 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
807 if (num_fds > INT8_MAX) {
811 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
814 iov_copy[0].iov_base = &cookie;
815 iov_copy[0].iov_len = sizeof(cookie);
817 memcpy(&iov_copy[1], iov,
818 sizeof(struct iovec) * iovlen);
821 return unix_dgram_send(ctx->dgram, dst, iov_copy, iovlen+1,
825 hdr = (struct unix_msg_hdr) {
828 .sock = unix_dgram_sock(ctx->dgram)
831 iov_copy[0].iov_base = &ctx->cookie;
832 iov_copy[0].iov_len = sizeof(ctx->cookie);
833 iov_copy[1].iov_base = &hdr;
834 iov_copy[1].iov_len = sizeof(hdr);
840 * The following write loop sends the user message in pieces. We have
841 * filled the first two iovecs above with "cookie" and "hdr". In the
842 * following loops we pull message chunks from the user iov array and
843 * fill iov_copy piece by piece, possibly truncating chunks from the
844 * caller's iov array. Ugly, but hopefully efficient.
847 while (sent < msglen) {
849 size_t iov_index = 2;
851 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
853 while (fragment_len < ctx->fragment_len) {
856 space = ctx->fragment_len - fragment_len;
857 chunk = MIN(space, src_iov.iov_len);
859 iov_copy[iov_index].iov_base = src_iov.iov_base;
860 iov_copy[iov_index].iov_len = chunk;
863 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
864 src_iov.iov_len -= chunk;
865 fragment_len += chunk;
867 if (src_iov.iov_len == 0) {
876 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
879 * only the last fragment should pass the fd array.
880 * That simplifies the receiver a lot.
883 ret = unix_dgram_send(ctx->dgram, dst,
887 ret = unix_dgram_send(ctx->dgram, dst,
897 if (ctx->cookie == 0) {
904 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
905 uint8_t *buf, size_t buflen,
906 int *fds, size_t num_fds,
909 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
910 struct unix_msg_hdr hdr;
911 struct unix_msg *msg;
915 if (buflen < sizeof(cookie)) {
919 memcpy(&cookie, buf, sizeof(cookie));
921 buf += sizeof(cookie);
922 buflen -= sizeof(cookie);
925 ctx->recv_callback(ctx, buf, buflen, fds, num_fds,
930 if (buflen < sizeof(hdr)) {
933 memcpy(&hdr, buf, sizeof(hdr));
936 buflen -= sizeof(hdr);
938 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
939 if ((msg->sender_pid == hdr.pid) &&
940 (msg->sender_sock == hdr.sock)) {
945 if ((msg != NULL) && (msg->cookie != cookie)) {
946 DLIST_REMOVE(ctx->msgs, msg);
952 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
956 *msg = (struct unix_msg) {
957 .msglen = hdr.msglen,
958 .sender_pid = hdr.pid,
959 .sender_sock = hdr.sock,
962 DLIST_ADD(ctx->msgs, msg);
965 space = msg->msglen - msg->received;
966 if (buflen > space) {
970 memcpy(msg->buf + msg->received, buf, buflen);
971 msg->received += buflen;
973 if (msg->received < msg->msglen) {
977 DLIST_REMOVE(ctx->msgs, msg);
978 ctx->recv_callback(ctx, msg->buf, msg->msglen, fds, num_fds,
984 close_fd_array(fds, num_fds);
987 int unix_msg_free(struct unix_msg_ctx *ctx)
991 ret = unix_dgram_free(ctx->dgram);
996 while (ctx->msgs != NULL) {
997 struct unix_msg *msg = ctx->msgs;
998 DLIST_REMOVE(ctx->msgs, msg);