tsocket: allow empty vectors at the end for tstream_writev()/readv()
[ira/wip.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tevent
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                               struct sockaddr *sa,
206                                               socklen_t sa_len,
207                                               struct tsocket_address **_addr,
208                                               const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         switch (sa->sa_family) {
214         case AF_UNIX:
215                 if (sa_len < sizeof(struct sockaddr_un)) {
216                         errno = EINVAL;
217                         return -1;
218                 }
219                 break;
220         case AF_INET:
221                 if (sa_len < sizeof(struct sockaddr_in)) {
222                         errno = EINVAL;
223                         return -1;
224                 }
225                 break;
226 #ifdef HAVE_IPV6
227         case AF_INET6:
228                 if (sa_len < sizeof(struct sockaddr_in6)) {
229                         errno = EINVAL;
230                         return -1;
231                 }
232                 break;
233 #endif
234         default:
235                 errno = EAFNOSUPPORT;
236                 return -1;
237         }
238
239         if (sa_len > sizeof(struct sockaddr_storage)) {
240                 errno = EINVAL;
241                 return -1;
242         }
243
244         addr = tsocket_address_create(mem_ctx,
245                                       &tsocket_address_bsd_ops,
246                                       &bsda,
247                                       struct tsocket_address_bsd,
248                                       location);
249         if (!addr) {
250                 errno = ENOMEM;
251                 return -1;
252         }
253
254         ZERO_STRUCTP(bsda);
255
256         memcpy(&bsda->u.ss, sa, sa_len);
257
258         *_addr = addr;
259         return 0;
260 }
261
262 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
263                                        const char *fam,
264                                        const char *addr,
265                                        uint16_t port,
266                                        struct tsocket_address **_addr,
267                                        const char *location)
268 {
269         struct addrinfo hints;
270         struct addrinfo *result = NULL;
271         char port_str[6];
272         int ret;
273
274         ZERO_STRUCT(hints);
275         /*
276          * we use SOCKET_STREAM here to get just one result
277          * back from getaddrinfo().
278          */
279         hints.ai_socktype = SOCK_STREAM;
280         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
281
282         if (strcasecmp(fam, "ip") == 0) {
283                 hints.ai_family = AF_UNSPEC;
284                 if (!addr) {
285 #ifdef HAVE_IPV6
286                         addr = "::";
287 #else
288                         addr = "0.0.0.0";
289 #endif
290                 }
291         } else if (strcasecmp(fam, "ipv4") == 0) {
292                 hints.ai_family = AF_INET;
293                 if (!addr) {
294                         addr = "0.0.0.0";
295                 }
296 #ifdef HAVE_IPV6
297         } else if (strcasecmp(fam, "ipv6") == 0) {
298                 hints.ai_family = AF_INET6;
299                 if (!addr) {
300                         addr = "::";
301                 }
302 #endif
303         } else {
304                 errno = EAFNOSUPPORT;
305                 return -1;
306         }
307
308         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
309
310         ret = getaddrinfo(addr, port_str, &hints, &result);
311         if (ret != 0) {
312                 switch (ret) {
313                 case EAI_FAIL:
314                         errno = EINVAL;
315                         break;
316                 }
317                 ret = -1;
318                 goto done;
319         }
320
321         if (result->ai_socktype != SOCK_STREAM) {
322                 errno = EINVAL;
323                 ret = -1;
324                 goto done;
325         }
326
327         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
328                                                   result->ai_addr,
329                                                   result->ai_addrlen,
330                                                   _addr,
331                                                   location);
332
333 done:
334         if (result) {
335                 freeaddrinfo(result);
336         }
337         return ret;
338 }
339
340 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
341                                        TALLOC_CTX *mem_ctx)
342 {
343         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
344                                            struct tsocket_address_bsd);
345         char addr_str[INET6_ADDRSTRLEN+1];
346         const char *str;
347
348         if (!bsda) {
349                 errno = EINVAL;
350                 return NULL;
351         }
352
353         switch (bsda->u.sa.sa_family) {
354         case AF_INET:
355                 str = inet_ntop(bsda->u.in.sin_family,
356                                 &bsda->u.in.sin_addr,
357                                 addr_str, sizeof(addr_str));
358                 break;
359 #ifdef HAVE_IPV6
360         case AF_INET6:
361                 str = inet_ntop(bsda->u.in6.sin6_family,
362                                 &bsda->u.in6.sin6_addr,
363                                 addr_str, sizeof(addr_str));
364                 break;
365 #endif
366         default:
367                 errno = EINVAL;
368                 return NULL;
369         }
370
371         if (!str) {
372                 return NULL;
373         }
374
375         return talloc_strdup(mem_ctx, str);
376 }
377
378 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
379 {
380         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
381                                            struct tsocket_address_bsd);
382         uint16_t port = 0;
383
384         if (!bsda) {
385                 errno = EINVAL;
386                 return 0;
387         }
388
389         switch (bsda->u.sa.sa_family) {
390         case AF_INET:
391                 port = ntohs(bsda->u.in.sin_port);
392                 break;
393 #ifdef HAVE_IPV6
394         case AF_INET6:
395                 port = ntohs(bsda->u.in6.sin6_port);
396                 break;
397 #endif
398         default:
399                 errno = EINVAL;
400                 return 0;
401         }
402
403         return port;
404 }
405
406 int tsocket_address_inet_set_port(struct tsocket_address *addr,
407                                   uint16_t port)
408 {
409         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
410                                            struct tsocket_address_bsd);
411
412         if (!bsda) {
413                 errno = EINVAL;
414                 return -1;
415         }
416
417         switch (bsda->u.sa.sa_family) {
418         case AF_INET:
419                 bsda->u.in.sin_port = htons(port);
420                 break;
421 #ifdef HAVE_IPV6
422         case AF_INET6:
423                 bsda->u.in6.sin6_port = htons(port);
424                 break;
425 #endif
426         default:
427                 errno = EINVAL;
428                 return -1;
429         }
430
431         return 0;
432 }
433
434 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
435                                     const char *path,
436                                     struct tsocket_address **_addr,
437                                     const char *location)
438 {
439         struct sockaddr_un un;
440         void *p = &un;
441         int ret;
442
443         if (!path) {
444                 path = "";
445         }
446
447         ZERO_STRUCT(un);
448         un.sun_family = AF_UNIX;
449         strncpy(un.sun_path, path, sizeof(un.sun_path));
450
451         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
452                                                  (struct sockaddr *)p,
453                                                  sizeof(un),
454                                                  _addr,
455                                                  location);
456
457         return ret;
458 }
459
460 char *tsocket_address_unix_path(const struct tsocket_address *addr,
461                                 TALLOC_CTX *mem_ctx)
462 {
463         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
464                                            struct tsocket_address_bsd);
465         const char *str;
466
467         if (!bsda) {
468                 errno = EINVAL;
469                 return NULL;
470         }
471
472         switch (bsda->u.sa.sa_family) {
473         case AF_UNIX:
474                 str = bsda->u.un.sun_path;
475                 break;
476         default:
477                 errno = EINVAL;
478                 return NULL;
479         }
480
481         return talloc_strdup(mem_ctx, str);
482 }
483
484 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
485                                         TALLOC_CTX *mem_ctx)
486 {
487         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
488                                            struct tsocket_address_bsd);
489         char *str;
490         char *addr_str;
491         const char *prefix = NULL;
492         uint16_t port;
493
494         switch (bsda->u.sa.sa_family) {
495         case AF_UNIX:
496                 return talloc_asprintf(mem_ctx, "unix:%s",
497                                        bsda->u.un.sun_path);
498         case AF_INET:
499                 prefix = "ipv4";
500                 break;
501 #ifdef HAVE_IPV6
502         case AF_INET6:
503                 prefix = "ipv6";
504                 break;
505 #endif
506         default:
507                 errno = EINVAL;
508                 return NULL;
509         }
510
511         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
512         if (!addr_str) {
513                 return NULL;
514         }
515
516         port = tsocket_address_inet_port(addr);
517
518         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
519                               prefix, addr_str, port);
520         talloc_free(addr_str);
521
522         return str;
523 }
524
525 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
526                                                          TALLOC_CTX *mem_ctx,
527                                                          const char *location)
528 {
529         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
530                                            struct tsocket_address_bsd);
531         struct tsocket_address *copy;
532         int ret;
533
534         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
535                                                  &bsda->u.sa,
536                                                  sizeof(bsda->u.ss),
537                                                  &copy,
538                                                  location);
539         if (ret != 0) {
540                 return NULL;
541         }
542
543         return copy;
544 }
545
546 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
547         .name           = "bsd",
548         .string         = tsocket_address_bsd_string,
549         .copy           = tsocket_address_bsd_copy,
550 };
551
552 struct tdgram_bsd {
553         int fd;
554
555         void *event_ptr;
556         struct tevent_fd *fde;
557
558         void *readable_private;
559         void (*readable_handler)(void *private_data);
560         void *writeable_private;
561         void (*writeable_handler)(void *private_data);
562 };
563
564 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
565                                    struct tevent_fd *fde,
566                                    uint16_t flags,
567                                    void *private_data)
568 {
569         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
570                                   struct tdgram_bsd);
571
572         if (flags & TEVENT_FD_WRITE) {
573                 bsds->writeable_handler(bsds->writeable_private);
574                 return;
575         }
576         if (flags & TEVENT_FD_READ) {
577                 if (!bsds->readable_handler) {
578                         TEVENT_FD_NOT_READABLE(bsds->fde);
579                         return;
580                 }
581                 bsds->readable_handler(bsds->readable_private);
582                 return;
583         }
584 }
585
586 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
587                                            struct tevent_context *ev,
588                                            void (*handler)(void *private_data),
589                                            void *private_data)
590 {
591         if (ev == NULL) {
592                 if (handler) {
593                         errno = EINVAL;
594                         return -1;
595                 }
596                 if (!bsds->readable_handler) {
597                         return 0;
598                 }
599                 bsds->readable_handler = NULL;
600                 bsds->readable_private = NULL;
601
602                 return 0;
603         }
604
605         /* read and write must use the same tevent_context */
606         if (bsds->event_ptr != ev) {
607                 if (bsds->readable_handler || bsds->writeable_handler) {
608                         errno = EINVAL;
609                         return -1;
610                 }
611                 bsds->event_ptr = NULL;
612                 TALLOC_FREE(bsds->fde);
613         }
614
615         if (bsds->fde == NULL) {
616                 bsds->fde = tevent_add_fd(ev, bsds,
617                                           bsds->fd, TEVENT_FD_READ,
618                                           tdgram_bsd_fde_handler,
619                                           bsds);
620                 if (!bsds->fde) {
621                         errno = ENOMEM;
622                         return -1;
623                 }
624
625                 /* cache the event context we're running on */
626                 bsds->event_ptr = ev;
627         } else if (!bsds->readable_handler) {
628                 TEVENT_FD_READABLE(bsds->fde);
629         }
630
631         bsds->readable_handler = handler;
632         bsds->readable_private = private_data;
633
634         return 0;
635 }
636
637 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
638                                             struct tevent_context *ev,
639                                             void (*handler)(void *private_data),
640                                             void *private_data)
641 {
642         if (ev == NULL) {
643                 if (handler) {
644                         errno = EINVAL;
645                         return -1;
646                 }
647                 if (!bsds->writeable_handler) {
648                         return 0;
649                 }
650                 bsds->writeable_handler = NULL;
651                 bsds->writeable_private = NULL;
652                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
653
654                 return 0;
655         }
656
657         /* read and write must use the same tevent_context */
658         if (bsds->event_ptr != ev) {
659                 if (bsds->readable_handler || bsds->writeable_handler) {
660                         errno = EINVAL;
661                         return -1;
662                 }
663                 bsds->event_ptr = NULL;
664                 TALLOC_FREE(bsds->fde);
665         }
666
667         if (bsds->fde == NULL) {
668                 bsds->fde = tevent_add_fd(ev, bsds,
669                                           bsds->fd, TEVENT_FD_WRITE,
670                                           tdgram_bsd_fde_handler,
671                                           bsds);
672                 if (!bsds->fde) {
673                         errno = ENOMEM;
674                         return -1;
675                 }
676
677                 /* cache the event context we're running on */
678                 bsds->event_ptr = ev;
679         } else if (!bsds->writeable_handler) {
680                 TEVENT_FD_WRITEABLE(bsds->fde);
681         }
682
683         bsds->writeable_handler = handler;
684         bsds->writeable_private = private_data;
685
686         return 0;
687 }
688
689 struct tdgram_bsd_recvfrom_state {
690         struct tdgram_context *dgram;
691
692         uint8_t *buf;
693         size_t len;
694         struct tsocket_address *src;
695 };
696
697 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
698 {
699         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
700                                   struct tdgram_bsd);
701
702         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
703
704         return 0;
705 }
706
707 static void tdgram_bsd_recvfrom_handler(void *private_data);
708
709 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
710                                         struct tevent_context *ev,
711                                         struct tdgram_context *dgram)
712 {
713         struct tevent_req *req;
714         struct tdgram_bsd_recvfrom_state *state;
715         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
716         int ret;
717
718         req = tevent_req_create(mem_ctx, &state,
719                                 struct tdgram_bsd_recvfrom_state);
720         if (!req) {
721                 return NULL;
722         }
723
724         state->dgram    = dgram;
725         state->buf      = NULL;
726         state->len      = 0;
727         state->src      = NULL;
728
729         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
730
731         if (bsds->fd == -1) {
732                 tevent_req_error(req, ENOTCONN);
733                 goto post;
734         }
735
736         /*
737          * this is a fast path, not waiting for the
738          * socket to become explicit readable gains
739          * about 10%-20% performance in benchmark tests.
740          */
741         tdgram_bsd_recvfrom_handler(req);
742         if (!tevent_req_is_in_progress(req)) {
743                 goto post;
744         }
745
746         ret = tdgram_bsd_set_readable_handler(bsds, ev,
747                                               tdgram_bsd_recvfrom_handler,
748                                               req);
749         if (ret == -1) {
750                 tevent_req_error(req, errno);
751                 goto post;
752         }
753
754         return req;
755
756  post:
757         tevent_req_post(req, ev);
758         return req;
759 }
760
761 static void tdgram_bsd_recvfrom_handler(void *private_data)
762 {
763         struct tevent_req *req = talloc_get_type_abort(private_data,
764                                  struct tevent_req);
765         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
766                                         struct tdgram_bsd_recvfrom_state);
767         struct tdgram_context *dgram = state->dgram;
768         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
769         struct tsocket_address_bsd *bsda;
770         ssize_t ret;
771         struct sockaddr *sa = NULL;
772         socklen_t sa_len = 0;
773         int err;
774         bool retry;
775
776         ret = tsocket_bsd_pending(bsds->fd);
777         if (ret == 0) {
778                 /* retry later */
779                 return;
780         }
781         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
782         if (retry) {
783                 /* retry later */
784                 return;
785         }
786         if (tevent_req_error(req, err)) {
787                 return;
788         }
789
790         state->buf = talloc_array(state, uint8_t, ret);
791         if (tevent_req_nomem(state->buf, req)) {
792                 return;
793         }
794         state->len = ret;
795
796         state->src = tsocket_address_create(state,
797                                             &tsocket_address_bsd_ops,
798                                             &bsda,
799                                             struct tsocket_address_bsd,
800                                             __location__ "bsd_recvfrom");
801         if (tevent_req_nomem(state->src, req)) {
802                 return;
803         }
804
805         ZERO_STRUCTP(bsda);
806
807         sa = &bsda->u.sa;
808         sa_len = sizeof(bsda->u.ss);
809         /*
810          * for unix sockets we can't use the size of sockaddr_storage
811          * we would get EINVAL
812          */
813         if (bsda->u.sa.sa_family == AF_UNIX) {
814                 sa_len = sizeof(bsda->u.un);
815         }
816
817         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_len);
818         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
819         if (retry) {
820                 /* retry later */
821                 return;
822         }
823         if (tevent_req_error(req, err)) {
824                 return;
825         }
826
827         if (ret != state->len) {
828                 tevent_req_error(req, EIO);
829                 return;
830         }
831
832         tevent_req_done(req);
833 }
834
835 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
836                                         int *perrno,
837                                         TALLOC_CTX *mem_ctx,
838                                         uint8_t **buf,
839                                         struct tsocket_address **src)
840 {
841         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
842                                         struct tdgram_bsd_recvfrom_state);
843         ssize_t ret;
844
845         ret = tsocket_simple_int_recv(req, perrno);
846         if (ret == 0) {
847                 *buf = talloc_move(mem_ctx, &state->buf);
848                 ret = state->len;
849                 if (src) {
850                         *src = talloc_move(mem_ctx, &state->src);
851                 }
852         }
853
854         tevent_req_received(req);
855         return ret;
856 }
857
858 struct tdgram_bsd_sendto_state {
859         struct tdgram_context *dgram;
860
861         const uint8_t *buf;
862         size_t len;
863         const struct tsocket_address *dst;
864
865         ssize_t ret;
866 };
867
868 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
869 {
870         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
871                                   struct tdgram_bsd);
872
873         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
874
875         return 0;
876 }
877
878 static void tdgram_bsd_sendto_handler(void *private_data);
879
880 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
881                                                  struct tevent_context *ev,
882                                                  struct tdgram_context *dgram,
883                                                  const uint8_t *buf,
884                                                  size_t len,
885                                                  const struct tsocket_address *dst)
886 {
887         struct tevent_req *req;
888         struct tdgram_bsd_sendto_state *state;
889         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
890         int ret;
891
892         req = tevent_req_create(mem_ctx, &state,
893                                 struct tdgram_bsd_sendto_state);
894         if (!req) {
895                 return NULL;
896         }
897
898         state->dgram    = dgram;
899         state->buf      = buf;
900         state->len      = len;
901         state->dst      = dst;
902         state->ret      = -1;
903
904         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
905
906         if (bsds->fd == -1) {
907                 tevent_req_error(req, ENOTCONN);
908                 goto post;
909         }
910
911         /*
912          * this is a fast path, not waiting for the
913          * socket to become explicit writeable gains
914          * about 10%-20% performance in benchmark tests.
915          */
916         tdgram_bsd_sendto_handler(req);
917         if (!tevent_req_is_in_progress(req)) {
918                 goto post;
919         }
920
921         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
922                                                tdgram_bsd_sendto_handler,
923                                                req);
924         if (ret == -1) {
925                 tevent_req_error(req, errno);
926                 goto post;
927         }
928
929         return req;
930
931  post:
932         tevent_req_post(req, ev);
933         return req;
934 }
935
936 static void tdgram_bsd_sendto_handler(void *private_data)
937 {
938         struct tevent_req *req = talloc_get_type_abort(private_data,
939                                  struct tevent_req);
940         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
941                                         struct tdgram_bsd_sendto_state);
942         struct tdgram_context *dgram = state->dgram;
943         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
944         struct sockaddr *sa = NULL;
945         socklen_t sa_len = 0;
946         ssize_t ret;
947         int err;
948         bool retry;
949
950         if (state->dst) {
951                 struct tsocket_address_bsd *bsda =
952                         talloc_get_type(state->dst->private_data,
953                         struct tsocket_address_bsd);
954
955                 sa = &bsda->u.sa;
956                 sa_len = sizeof(bsda->u.ss);
957                 /*
958                  * for unix sockets we can't use the size of sockaddr_storage
959                  * we would get EINVAL
960                  */
961                 if (bsda->u.sa.sa_family == AF_UNIX) {
962                         sa_len = sizeof(bsda->u.un);
963                 }
964         }
965
966         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_len);
967         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
968         if (retry) {
969                 /* retry later */
970                 return;
971         }
972         if (tevent_req_error(req, err)) {
973                 return;
974         }
975
976         state->ret = ret;
977
978         tevent_req_done(req);
979 }
980
981 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
982 {
983         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
984                                         struct tdgram_bsd_sendto_state);
985         ssize_t ret;
986
987         ret = tsocket_simple_int_recv(req, perrno);
988         if (ret == 0) {
989                 ret = state->ret;
990         }
991
992         tevent_req_received(req);
993         return ret;
994 }
995
996 struct tdgram_bsd_disconnect_state {
997         uint8_t __dummy;
998 };
999
1000 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1001                                                      struct tevent_context *ev,
1002                                                      struct tdgram_context *dgram)
1003 {
1004         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1005         struct tevent_req *req;
1006         struct tdgram_bsd_disconnect_state *state;
1007         int ret;
1008         int err;
1009         bool dummy;
1010
1011         req = tevent_req_create(mem_ctx, &state,
1012                                 struct tdgram_bsd_disconnect_state);
1013         if (req == NULL) {
1014                 return NULL;
1015         }
1016
1017         if (bsds->fd == -1) {
1018                 tevent_req_error(req, ENOTCONN);
1019                 goto post;
1020         }
1021
1022         ret = close(bsds->fd);
1023         bsds->fd = -1;
1024         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1025         if (tevent_req_error(req, err)) {
1026                 goto post;
1027         }
1028
1029         tevent_req_done(req);
1030 post:
1031         tevent_req_post(req, ev);
1032         return req;
1033 }
1034
1035 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1036                                       int *perrno)
1037 {
1038         int ret;
1039
1040         ret = tsocket_simple_int_recv(req, perrno);
1041
1042         tevent_req_received(req);
1043         return ret;
1044 }
1045
1046 static const struct tdgram_context_ops tdgram_bsd_ops = {
1047         .name                   = "bsd",
1048
1049         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1050         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1051
1052         .sendto_send            = tdgram_bsd_sendto_send,
1053         .sendto_recv            = tdgram_bsd_sendto_recv,
1054
1055         .disconnect_send        = tdgram_bsd_disconnect_send,
1056         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1057 };
1058
1059 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1060 {
1061         TALLOC_FREE(bsds->fde);
1062         if (bsds->fd != -1) {
1063                 close(bsds->fd);
1064                 bsds->fd = -1;
1065         }
1066         return 0;
1067 }
1068
1069 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1070                                    const struct tsocket_address *remote,
1071                                    bool broadcast,
1072                                    TALLOC_CTX *mem_ctx,
1073                                    struct tdgram_context **_dgram,
1074                                    const char *location)
1075 {
1076         struct tsocket_address_bsd *lbsda =
1077                 talloc_get_type_abort(local->private_data,
1078                 struct tsocket_address_bsd);
1079         struct tsocket_address_bsd *rbsda = NULL;
1080         struct tdgram_context *dgram;
1081         struct tdgram_bsd *bsds;
1082         int fd;
1083         int ret;
1084         bool do_bind = false;
1085         bool do_reuseaddr = false;
1086         socklen_t sa_len = sizeof(lbsda->u.ss);
1087
1088         if (remote) {
1089                 rbsda = talloc_get_type_abort(remote->private_data,
1090                         struct tsocket_address_bsd);
1091         }
1092
1093         switch (lbsda->u.sa.sa_family) {
1094         case AF_UNIX:
1095                 if (broadcast) {
1096                         errno = EINVAL;
1097                         return -1;
1098                 }
1099                 if (lbsda->u.un.sun_path[0] != 0) {
1100                         do_reuseaddr = true;
1101                         do_bind = true;
1102                 }
1103                 /*
1104                  * for unix sockets we can't use the size of sockaddr_storage
1105                  * we would get EINVAL
1106                  */
1107                 sa_len = sizeof(lbsda->u.un);
1108                 break;
1109         case AF_INET:
1110                 if (lbsda->u.in.sin_port != 0) {
1111                         do_reuseaddr = true;
1112                         do_bind = true;
1113                 }
1114                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1115                         do_bind = true;
1116                 }
1117                 break;
1118 #ifdef HAVE_IPV6
1119         case AF_INET6:
1120                 if (lbsda->u.in6.sin6_port != 0) {
1121                         do_reuseaddr = true;
1122                         do_bind = true;
1123                 }
1124                 if (memcmp(&in6addr_any,
1125                            &lbsda->u.in6.sin6_addr,
1126                            sizeof(in6addr_any)) != 0) {
1127                         do_bind = true;
1128                 }
1129                 break;
1130 #endif
1131         default:
1132                 errno = EINVAL;
1133                 return -1;
1134         }
1135
1136         fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
1137         if (fd < 0) {
1138                 return fd;
1139         }
1140
1141         fd = tsocket_bsd_common_prepare_fd(fd, true);
1142         if (fd < 0) {
1143                 return fd;
1144         }
1145
1146         dgram = tdgram_context_create(mem_ctx,
1147                                       &tdgram_bsd_ops,
1148                                       &bsds,
1149                                       struct tdgram_bsd,
1150                                       location);
1151         if (!dgram) {
1152                 int saved_errno = errno;
1153                 close(fd);
1154                 errno = saved_errno;
1155                 return -1;
1156         }
1157         ZERO_STRUCTP(bsds);
1158         bsds->fd = fd;
1159         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1160
1161         if (broadcast) {
1162                 int val = 1;
1163
1164                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1165                                  (const void *)&val, sizeof(val));
1166                 if (ret == -1) {
1167                         int saved_errno = errno;
1168                         talloc_free(dgram);
1169                         errno = saved_errno;
1170                         return ret;
1171                 }
1172         }
1173
1174         if (do_reuseaddr) {
1175                 int val = 1;
1176
1177                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1178                                  (const void *)&val, sizeof(val));
1179                 if (ret == -1) {
1180                         int saved_errno = errno;
1181                         talloc_free(dgram);
1182                         errno = saved_errno;
1183                         return ret;
1184                 }
1185         }
1186
1187         if (do_bind) {
1188                 ret = bind(fd, &lbsda->u.sa, sa_len);
1189                 if (ret == -1) {
1190                         int saved_errno = errno;
1191                         talloc_free(dgram);
1192                         errno = saved_errno;
1193                         return ret;
1194                 }
1195         }
1196
1197         if (rbsda) {
1198                 ret = connect(fd, &rbsda->u.sa, sa_len);
1199                 if (ret == -1) {
1200                         int saved_errno = errno;
1201                         talloc_free(dgram);
1202                         errno = saved_errno;
1203                         return ret;
1204                 }
1205         }
1206
1207         *_dgram = dgram;
1208         return 0;
1209 }
1210
1211 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1212                             const struct tsocket_address *remote,
1213                             TALLOC_CTX *mem_ctx,
1214                             struct tdgram_context **dgram,
1215                             const char *location)
1216 {
1217         struct tsocket_address_bsd *lbsda =
1218                 talloc_get_type_abort(local->private_data,
1219                 struct tsocket_address_bsd);
1220         int ret;
1221
1222         switch (lbsda->u.sa.sa_family) {
1223         case AF_INET:
1224                 break;
1225 #ifdef HAVE_IPV6
1226         case AF_INET6:
1227                 break;
1228 #endif
1229         default:
1230                 errno = EINVAL;
1231                 return -1;
1232         }
1233
1234         ret = tdgram_bsd_dgram_socket(local, remote, false,
1235                                       mem_ctx, dgram, location);
1236
1237         return ret;
1238 }
1239
1240 int _tdgram_unix_socket(const struct tsocket_address *local,
1241                         const struct tsocket_address *remote,
1242                         TALLOC_CTX *mem_ctx,
1243                         struct tdgram_context **dgram,
1244                         const char *location)
1245 {
1246         struct tsocket_address_bsd *lbsda =
1247                 talloc_get_type_abort(local->private_data,
1248                 struct tsocket_address_bsd);
1249         int ret;
1250
1251         switch (lbsda->u.sa.sa_family) {
1252         case AF_UNIX:
1253                 break;
1254         default:
1255                 errno = EINVAL;
1256                 return -1;
1257         }
1258
1259         ret = tdgram_bsd_dgram_socket(local, remote, false,
1260                                       mem_ctx, dgram, location);
1261
1262         return ret;
1263 }
1264
1265 struct tstream_bsd {
1266         int fd;
1267
1268         void *event_ptr;
1269         struct tevent_fd *fde;
1270
1271         void *readable_private;
1272         void (*readable_handler)(void *private_data);
1273         void *writeable_private;
1274         void (*writeable_handler)(void *private_data);
1275 };
1276
1277 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1278                                     struct tevent_fd *fde,
1279                                     uint16_t flags,
1280                                     void *private_data)
1281 {
1282         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1283                                    struct tstream_bsd);
1284
1285         if (flags & TEVENT_FD_WRITE) {
1286                 bsds->writeable_handler(bsds->writeable_private);
1287                 return;
1288         }
1289         if (flags & TEVENT_FD_READ) {
1290                 if (!bsds->readable_handler) {
1291                         TEVENT_FD_NOT_READABLE(bsds->fde);
1292                         return;
1293                 }
1294                 bsds->readable_handler(bsds->readable_private);
1295                 return;
1296         }
1297 }
1298
1299 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1300                                             struct tevent_context *ev,
1301                                             void (*handler)(void *private_data),
1302                                             void *private_data)
1303 {
1304         if (ev == NULL) {
1305                 if (handler) {
1306                         errno = EINVAL;
1307                         return -1;
1308                 }
1309                 if (!bsds->readable_handler) {
1310                         return 0;
1311                 }
1312                 bsds->readable_handler = NULL;
1313                 bsds->readable_private = NULL;
1314
1315                 return 0;
1316         }
1317
1318         /* read and write must use the same tevent_context */
1319         if (bsds->event_ptr != ev) {
1320                 if (bsds->readable_handler || bsds->writeable_handler) {
1321                         errno = EINVAL;
1322                         return -1;
1323                 }
1324                 bsds->event_ptr = NULL;
1325                 TALLOC_FREE(bsds->fde);
1326         }
1327
1328         if (bsds->fde == NULL) {
1329                 bsds->fde = tevent_add_fd(ev, bsds,
1330                                           bsds->fd, TEVENT_FD_READ,
1331                                           tstream_bsd_fde_handler,
1332                                           bsds);
1333                 if (!bsds->fde) {
1334                         errno = ENOMEM;
1335                         return -1;
1336                 }
1337
1338                 /* cache the event context we're running on */
1339                 bsds->event_ptr = ev;
1340         } else if (!bsds->readable_handler) {
1341                 TEVENT_FD_READABLE(bsds->fde);
1342         }
1343
1344         bsds->readable_handler = handler;
1345         bsds->readable_private = private_data;
1346
1347         return 0;
1348 }
1349
1350 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1351                                              struct tevent_context *ev,
1352                                              void (*handler)(void *private_data),
1353                                              void *private_data)
1354 {
1355         if (ev == NULL) {
1356                 if (handler) {
1357                         errno = EINVAL;
1358                         return -1;
1359                 }
1360                 if (!bsds->writeable_handler) {
1361                         return 0;
1362                 }
1363                 bsds->writeable_handler = NULL;
1364                 bsds->writeable_private = NULL;
1365                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1366
1367                 return 0;
1368         }
1369
1370         /* read and write must use the same tevent_context */
1371         if (bsds->event_ptr != ev) {
1372                 if (bsds->readable_handler || bsds->writeable_handler) {
1373                         errno = EINVAL;
1374                         return -1;
1375                 }
1376                 bsds->event_ptr = NULL;
1377                 TALLOC_FREE(bsds->fde);
1378         }
1379
1380         if (bsds->fde == NULL) {
1381                 bsds->fde = tevent_add_fd(ev, bsds,
1382                                           bsds->fd, TEVENT_FD_WRITE,
1383                                           tstream_bsd_fde_handler,
1384                                           bsds);
1385                 if (!bsds->fde) {
1386                         errno = ENOMEM;
1387                         return -1;
1388                 }
1389
1390                 /* cache the event context we're running on */
1391                 bsds->event_ptr = ev;
1392         } else if (!bsds->writeable_handler) {
1393                 TEVENT_FD_WRITEABLE(bsds->fde);
1394         }
1395
1396         bsds->writeable_handler = handler;
1397         bsds->writeable_private = private_data;
1398
1399         return 0;
1400 }
1401
1402 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1403 {
1404         struct tstream_bsd *bsds = tstream_context_data(stream,
1405                                    struct tstream_bsd);
1406         ssize_t ret;
1407
1408         if (bsds->fd == -1) {
1409                 errno = ENOTCONN;
1410                 return -1;
1411         }
1412
1413         ret = tsocket_bsd_pending(bsds->fd);
1414
1415         return ret;
1416 }
1417
1418 struct tstream_bsd_readv_state {
1419         struct tstream_context *stream;
1420
1421         struct iovec *vector;
1422         size_t count;
1423
1424         int ret;
1425 };
1426
1427 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1428 {
1429         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1430                                    struct tstream_bsd);
1431
1432         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1433
1434         return 0;
1435 }
1436
1437 static void tstream_bsd_readv_handler(void *private_data);
1438
1439 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1440                                         struct tevent_context *ev,
1441                                         struct tstream_context *stream,
1442                                         struct iovec *vector,
1443                                         size_t count)
1444 {
1445         struct tevent_req *req;
1446         struct tstream_bsd_readv_state *state;
1447         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1448         int ret;
1449
1450         req = tevent_req_create(mem_ctx, &state,
1451                                 struct tstream_bsd_readv_state);
1452         if (!req) {
1453                 return NULL;
1454         }
1455
1456         state->stream   = stream;
1457         /* we make a copy of the vector so that we can modify it */
1458         state->vector   = talloc_array(state, struct iovec, count);
1459         if (tevent_req_nomem(state->vector, req)) {
1460                 goto post;
1461         }
1462         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1463         state->count    = count;
1464         state->ret      = 0;
1465
1466         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1467
1468         if (bsds->fd == -1) {
1469                 tevent_req_error(req, ENOTCONN);
1470                 goto post;
1471         }
1472
1473         /*
1474          * this is a fast path, not waiting for the
1475          * socket to become explicit readable gains
1476          * about 10%-20% performance in benchmark tests.
1477          */
1478         tstream_bsd_readv_handler(req);
1479         if (!tevent_req_is_in_progress(req)) {
1480                 goto post;
1481         }
1482
1483         ret = tstream_bsd_set_readable_handler(bsds, ev,
1484                                               tstream_bsd_readv_handler,
1485                                               req);
1486         if (ret == -1) {
1487                 tevent_req_error(req, errno);
1488                 goto post;
1489         }
1490
1491         return req;
1492
1493  post:
1494         tevent_req_post(req, ev);
1495         return req;
1496 }
1497
1498 static void tstream_bsd_readv_handler(void *private_data)
1499 {
1500         struct tevent_req *req = talloc_get_type_abort(private_data,
1501                                  struct tevent_req);
1502         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1503                                         struct tstream_bsd_readv_state);
1504         struct tstream_context *stream = state->stream;
1505         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1506         int ret;
1507         int err;
1508         bool retry;
1509
1510         ret = readv(bsds->fd, state->vector, state->count);
1511         if (ret == 0) {
1512                 /* propagate end of file */
1513                 tevent_req_error(req, EPIPE);
1514                 return;
1515         }
1516         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1517         if (retry) {
1518                 /* retry later */
1519                 return;
1520         }
1521         if (tevent_req_error(req, err)) {
1522                 return;
1523         }
1524
1525         state->ret += ret;
1526
1527         while (ret > 0) {
1528                 if (ret < state->vector[0].iov_len) {
1529                         uint8_t *base;
1530                         base = (uint8_t *)state->vector[0].iov_base;
1531                         base += ret;
1532                         state->vector[0].iov_base = base;
1533                         state->vector[0].iov_len -= ret;
1534                         break;
1535                 }
1536                 ret -= state->vector[0].iov_len;
1537                 state->vector += 1;
1538                 state->count -= 1;
1539         }
1540
1541         /*
1542          * there're maybe some empty vectors at the end
1543          * which we need to skip, otherwise we would get
1544          * ret == 0 from the readv() call and return EPIPE
1545          */
1546         while (state->count > 0) {
1547                 if (state->vector[0].iov_len > 0) {
1548                         break;
1549                 }
1550                 state->vector += 1;
1551                 state->count -= 1;
1552         }
1553
1554         if (state->count > 0) {
1555                 /* we have more to read */
1556                 return;
1557         }
1558
1559         tevent_req_done(req);
1560 }
1561
1562 static int tstream_bsd_readv_recv(struct tevent_req *req,
1563                                   int *perrno)
1564 {
1565         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1566                                         struct tstream_bsd_readv_state);
1567         int ret;
1568
1569         ret = tsocket_simple_int_recv(req, perrno);
1570         if (ret == 0) {
1571                 ret = state->ret;
1572         }
1573
1574         tevent_req_received(req);
1575         return ret;
1576 }
1577
1578 struct tstream_bsd_writev_state {
1579         struct tstream_context *stream;
1580
1581         struct iovec *vector;
1582         size_t count;
1583
1584         int ret;
1585 };
1586
1587 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1588 {
1589         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1590                                   struct tstream_bsd);
1591
1592         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1593
1594         return 0;
1595 }
1596
1597 static void tstream_bsd_writev_handler(void *private_data);
1598
1599 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1600                                                  struct tevent_context *ev,
1601                                                  struct tstream_context *stream,
1602                                                  const struct iovec *vector,
1603                                                  size_t count)
1604 {
1605         struct tevent_req *req;
1606         struct tstream_bsd_writev_state *state;
1607         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1608         int ret;
1609
1610         req = tevent_req_create(mem_ctx, &state,
1611                                 struct tstream_bsd_writev_state);
1612         if (!req) {
1613                 return NULL;
1614         }
1615
1616         state->stream   = stream;
1617         /* we make a copy of the vector so that we can modify it */
1618         state->vector   = talloc_array(state, struct iovec, count);
1619         if (tevent_req_nomem(state->vector, req)) {
1620                 goto post;
1621         }
1622         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1623         state->count    = count;
1624         state->ret      = 0;
1625
1626         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1627
1628         if (bsds->fd == -1) {
1629                 tevent_req_error(req, ENOTCONN);
1630                 goto post;
1631         }
1632
1633         /*
1634          * this is a fast path, not waiting for the
1635          * socket to become explicit writeable gains
1636          * about 10%-20% performance in benchmark tests.
1637          */
1638         tstream_bsd_writev_handler(req);
1639         if (!tevent_req_is_in_progress(req)) {
1640                 goto post;
1641         }
1642
1643         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1644                                                tstream_bsd_writev_handler,
1645                                                req);
1646         if (ret == -1) {
1647                 tevent_req_error(req, errno);
1648                 goto post;
1649         }
1650
1651         return req;
1652
1653  post:
1654         tevent_req_post(req, ev);
1655         return req;
1656 }
1657
1658 static void tstream_bsd_writev_handler(void *private_data)
1659 {
1660         struct tevent_req *req = talloc_get_type_abort(private_data,
1661                                  struct tevent_req);
1662         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1663                                         struct tstream_bsd_writev_state);
1664         struct tstream_context *stream = state->stream;
1665         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1666         ssize_t ret;
1667         int err;
1668         bool retry;
1669
1670         ret = writev(bsds->fd, state->vector, state->count);
1671         if (ret == 0) {
1672                 /* propagate end of file */
1673                 tevent_req_error(req, EPIPE);
1674                 return;
1675         }
1676         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1677         if (retry) {
1678                 /* retry later */
1679                 return;
1680         }
1681         if (tevent_req_error(req, err)) {
1682                 return;
1683         }
1684
1685         state->ret += ret;
1686
1687         while (ret > 0) {
1688                 if (ret < state->vector[0].iov_len) {
1689                         uint8_t *base;
1690                         base = (uint8_t *)state->vector[0].iov_base;
1691                         base += ret;
1692                         state->vector[0].iov_base = base;
1693                         state->vector[0].iov_len -= ret;
1694                         break;
1695                 }
1696                 ret -= state->vector[0].iov_len;
1697                 state->vector += 1;
1698                 state->count -= 1;
1699         }
1700
1701         /*
1702          * there're maybe some empty vectors at the end
1703          * which we need to skip, otherwise we would get
1704          * ret == 0 from the writev() call and return EPIPE
1705          */
1706         while (state->count > 0) {
1707                 if (state->vector[0].iov_len > 0) {
1708                         break;
1709                 }
1710                 state->vector += 1;
1711                 state->count -= 1;
1712         }
1713
1714         if (state->count > 0) {
1715                 /* we have more to read */
1716                 return;
1717         }
1718
1719         tevent_req_done(req);
1720 }
1721
1722 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1723 {
1724         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1725                                         struct tstream_bsd_writev_state);
1726         int ret;
1727
1728         ret = tsocket_simple_int_recv(req, perrno);
1729         if (ret == 0) {
1730                 ret = state->ret;
1731         }
1732
1733         tevent_req_received(req);
1734         return ret;
1735 }
1736
1737 struct tstream_bsd_disconnect_state {
1738         void *__dummy;
1739 };
1740
1741 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1742                                                      struct tevent_context *ev,
1743                                                      struct tstream_context *stream)
1744 {
1745         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1746         struct tevent_req *req;
1747         struct tstream_bsd_disconnect_state *state;
1748         int ret;
1749         int err;
1750         bool dummy;
1751
1752         req = tevent_req_create(mem_ctx, &state,
1753                                 struct tstream_bsd_disconnect_state);
1754         if (req == NULL) {
1755                 return NULL;
1756         }
1757
1758         if (bsds->fd == -1) {
1759                 tevent_req_error(req, ENOTCONN);
1760                 goto post;
1761         }
1762
1763         ret = close(bsds->fd);
1764         bsds->fd = -1;
1765         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1766         if (tevent_req_error(req, err)) {
1767                 goto post;
1768         }
1769
1770         tevent_req_done(req);
1771 post:
1772         tevent_req_post(req, ev);
1773         return req;
1774 }
1775
1776 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1777                                       int *perrno)
1778 {
1779         int ret;
1780
1781         ret = tsocket_simple_int_recv(req, perrno);
1782
1783         tevent_req_received(req);
1784         return ret;
1785 }
1786
1787 static const struct tstream_context_ops tstream_bsd_ops = {
1788         .name                   = "bsd",
1789
1790         .pending_bytes          = tstream_bsd_pending_bytes,
1791
1792         .readv_send             = tstream_bsd_readv_send,
1793         .readv_recv             = tstream_bsd_readv_recv,
1794
1795         .writev_send            = tstream_bsd_writev_send,
1796         .writev_recv            = tstream_bsd_writev_recv,
1797
1798         .disconnect_send        = tstream_bsd_disconnect_send,
1799         .disconnect_recv        = tstream_bsd_disconnect_recv,
1800 };
1801
1802 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1803 {
1804         TALLOC_FREE(bsds->fde);
1805         if (bsds->fd != -1) {
1806                 close(bsds->fd);
1807                 bsds->fd = -1;
1808         }
1809         return 0;
1810 }
1811
1812 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1813                                  int fd,
1814                                  struct tstream_context **_stream,
1815                                  const char *location)
1816 {
1817         struct tstream_context *stream;
1818         struct tstream_bsd *bsds;
1819
1820         stream = tstream_context_create(mem_ctx,
1821                                         &tstream_bsd_ops,
1822                                         &bsds,
1823                                         struct tstream_bsd,
1824                                         location);
1825         if (!stream) {
1826                 return -1;
1827         }
1828         ZERO_STRUCTP(bsds);
1829         bsds->fd = fd;
1830         talloc_set_destructor(bsds, tstream_bsd_destructor);
1831
1832         *_stream = stream;
1833         return 0;
1834 }
1835
1836 struct tstream_bsd_connect_state {
1837         int fd;
1838         struct tevent_fd *fde;
1839         struct tstream_conext *stream;
1840 };
1841
1842 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1843 {
1844         TALLOC_FREE(state->fde);
1845         if (state->fd != -1) {
1846                 close(state->fd);
1847                 state->fd = -1;
1848         }
1849
1850         return 0;
1851 }
1852
1853 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1854                                             struct tevent_fd *fde,
1855                                             uint16_t flags,
1856                                             void *private_data);
1857
1858 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1859                                         struct tevent_context *ev,
1860                                         int sys_errno,
1861                                         const struct tsocket_address *local,
1862                                         const struct tsocket_address *remote)
1863 {
1864         struct tevent_req *req;
1865         struct tstream_bsd_connect_state *state;
1866         struct tsocket_address_bsd *lbsda =
1867                 talloc_get_type_abort(local->private_data,
1868                 struct tsocket_address_bsd);
1869         struct tsocket_address_bsd *rbsda =
1870                 talloc_get_type_abort(remote->private_data,
1871                 struct tsocket_address_bsd);
1872         int ret;
1873         int err;
1874         bool retry;
1875         bool do_bind = false;
1876         bool do_reuseaddr = false;
1877         socklen_t sa_len = sizeof(rbsda->u.ss);
1878
1879         req = tevent_req_create(mem_ctx, &state,
1880                                 struct tstream_bsd_connect_state);
1881         if (!req) {
1882                 return NULL;
1883         }
1884         state->fd = -1;
1885         state->fde = NULL;
1886
1887         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1888
1889         /* give the wrappers a chance to report an error */
1890         if (sys_errno != 0) {
1891                 tevent_req_error(req, sys_errno);
1892                 goto post;
1893         }
1894
1895         switch (lbsda->u.sa.sa_family) {
1896         case AF_UNIX:
1897                 if (lbsda->u.un.sun_path[0] != 0) {
1898                         do_reuseaddr = true;
1899                         do_bind = true;
1900                 }
1901                 /*
1902                  * for unix sockets we can't use the size of sockaddr_storage
1903                  * we would get EINVAL
1904                  */
1905                 sa_len = sizeof(rbsda->u.un);
1906                 break;
1907         case AF_INET:
1908                 if (lbsda->u.in.sin_port != 0) {
1909                         do_reuseaddr = true;
1910                         do_bind = true;
1911                 }
1912                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1913                         do_bind = true;
1914                 }
1915                 break;
1916 #ifdef HAVE_IPV6
1917         case AF_INET6:
1918                 if (lbsda->u.in6.sin6_port != 0) {
1919                         do_reuseaddr = true;
1920                         do_bind = true;
1921                 }
1922                 if (memcmp(&in6addr_any,
1923                            &lbsda->u.in6.sin6_addr,
1924                            sizeof(in6addr_any)) != 0) {
1925                         do_bind = true;
1926                 }
1927                 break;
1928 #endif
1929         default:
1930                 tevent_req_error(req, EINVAL);
1931                 goto post;
1932         }
1933
1934         state->fd = socket(lbsda->u.sa.sa_family, SOCK_STREAM, 0);
1935         if (state->fd == -1) {
1936                 tevent_req_error(req, errno);
1937                 goto post;
1938         }
1939
1940         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
1941         if (state->fd == -1) {
1942                 tevent_req_error(req, errno);
1943                 goto post;
1944         }
1945
1946         if (do_reuseaddr) {
1947                 int val = 1;
1948
1949                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
1950                                  (const void *)&val, sizeof(val));
1951                 if (ret == -1) {
1952                         tevent_req_error(req, errno);
1953                         goto post;
1954                 }
1955         }
1956
1957         if (do_bind) {
1958                 ret = bind(state->fd, &lbsda->u.sa, sizeof(lbsda->u.ss));
1959                 if (ret == -1) {
1960                         tevent_req_error(req, errno);
1961                         goto post;
1962                 }
1963         }
1964
1965         ret = connect(state->fd, &rbsda->u.sa, sa_len);
1966         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1967         if (retry) {
1968                 /* retry later */
1969                 goto async;
1970         }
1971         if (tevent_req_error(req, err)) {
1972                 goto post;
1973         }
1974
1975         tevent_req_done(req);
1976         goto post;
1977
1978  async:
1979         state->fde = tevent_add_fd(ev, state,
1980                                    state->fd,
1981                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
1982                                    tstream_bsd_connect_fde_handler,
1983                                    req);
1984         if (tevent_req_nomem(state->fde, req)) {
1985                 goto post;
1986         }
1987
1988         return req;
1989
1990  post:
1991         tevent_req_post(req, ev);
1992         return req;
1993 }
1994
1995 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1996                                             struct tevent_fd *fde,
1997                                             uint16_t flags,
1998                                             void *private_data)
1999 {
2000         struct tevent_req *req = talloc_get_type_abort(private_data,
2001                                  struct tevent_req);
2002         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2003                                         struct tstream_bsd_connect_state);
2004         int ret;
2005         int error=0;
2006         socklen_t len = sizeof(error);
2007         int err;
2008         bool retry;
2009
2010         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2011         if (ret == 0) {
2012                 if (error != 0) {
2013                         errno = error;
2014                         ret = -1;
2015                 }
2016         }
2017         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2018         if (retry) {
2019                 /* retry later */
2020                 return;
2021         }
2022         if (tevent_req_error(req, err)) {
2023                 return;
2024         }
2025
2026         tevent_req_done(req);
2027 }
2028
2029 static int tstream_bsd_connect_recv(struct tevent_req *req,
2030                                     int *perrno,
2031                                     TALLOC_CTX *mem_ctx,
2032                                     struct tstream_context **stream,
2033                                     const char *location)
2034 {
2035         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2036                                         struct tstream_bsd_connect_state);
2037         int ret;
2038
2039         ret = tsocket_simple_int_recv(req, perrno);
2040         if (ret == 0) {
2041                 ret = _tstream_bsd_existing_socket(mem_ctx,
2042                                                    state->fd,
2043                                                    stream,
2044                                                    location);
2045                 if (ret == -1) {
2046                         *perrno = errno;
2047                         goto done;
2048                 }
2049                 TALLOC_FREE(state->fde);
2050                 state->fd = -1;
2051         }
2052
2053 done:
2054         tevent_req_received(req);
2055         return ret;
2056 }
2057
2058 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2059                                         struct tevent_context *ev,
2060                                         const struct tsocket_address *local,
2061                                         const struct tsocket_address *remote)
2062 {
2063         struct tsocket_address_bsd *lbsda =
2064                 talloc_get_type_abort(local->private_data,
2065                 struct tsocket_address_bsd);
2066         struct tevent_req *req;
2067         int sys_errno = 0;
2068
2069         switch (lbsda->u.sa.sa_family) {
2070         case AF_INET:
2071                 break;
2072 #ifdef HAVE_IPV6
2073         case AF_INET6:
2074                 break;
2075 #endif
2076         default:
2077                 sys_errno = EINVAL;
2078                 break;
2079         }
2080
2081         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2082
2083         return req;
2084 }
2085
2086 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2087                                    int *perrno,
2088                                    TALLOC_CTX *mem_ctx,
2089                                    struct tstream_context **stream,
2090                                    const char *location)
2091 {
2092         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2093 }
2094
2095 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2096                                         struct tevent_context *ev,
2097                                         const struct tsocket_address *local,
2098                                         const struct tsocket_address *remote)
2099 {
2100         struct tsocket_address_bsd *lbsda =
2101                 talloc_get_type_abort(local->private_data,
2102                 struct tsocket_address_bsd);
2103         struct tevent_req *req;
2104         int sys_errno = 0;
2105
2106         switch (lbsda->u.sa.sa_family) {
2107         case AF_UNIX:
2108                 break;
2109         default:
2110                 sys_errno = EINVAL;
2111                 break;
2112         }
2113
2114         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2115
2116         return req;
2117 }
2118
2119 int _tstream_unix_connect_recv(struct tevent_req *req,
2120                                       int *perrno,
2121                                       TALLOC_CTX *mem_ctx,
2122                                       struct tstream_context **stream,
2123                                       const char *location)
2124 {
2125         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2126 }
2127
2128 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2129                              struct tstream_context **_stream1,
2130                              TALLOC_CTX *mem_ctx2,
2131                              struct tstream_context **_stream2,
2132                              const char *location)
2133 {
2134         int ret;
2135         int fds[2];
2136         int fd1;
2137         int fd2;
2138         struct tstream_context *stream1 = NULL;
2139         struct tstream_context *stream2 = NULL;
2140
2141         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2142         if (ret == -1) {
2143                 return -1;
2144         }
2145         fd1 = fds[0];
2146         fd2 = fds[1];
2147
2148         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2149         if (fd1 == -1) {
2150                 int sys_errno = errno;
2151                 close(fd2);
2152                 errno = sys_errno;
2153                 return -1;
2154         }
2155
2156         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2157         if (fd2 == -1) {
2158                 int sys_errno = errno;
2159                 close(fd1);
2160                 errno = sys_errno;
2161                 return -1;
2162         }
2163
2164         ret = _tstream_bsd_existing_socket(mem_ctx1,
2165                                            fd1,
2166                                            &stream1,
2167                                            location);
2168         if (ret == -1) {
2169                 int sys_errno = errno;
2170                 close(fd1);
2171                 close(fd2);
2172                 errno = sys_errno;
2173                 return -1;
2174         }
2175
2176         ret = _tstream_bsd_existing_socket(mem_ctx2,
2177                                            fd2,
2178                                            &stream2,
2179                                            location);
2180         if (ret == -1) {
2181                 int sys_errno = errno;
2182                 talloc_free(stream1);
2183                 close(fd2);
2184                 errno = sys_errno;
2185                 return -1;
2186         }
2187
2188         *_stream1 = stream1;
2189         *_stream2 = stream2;
2190         return 0;
2191 }
2192