ndr: Add support for pulling/printing an ipv6address type
[samba.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         socklen_t sa_socklen;
194         union {
195                 struct sockaddr sa;
196                 struct sockaddr_in in;
197 #ifdef HAVE_IPV6
198                 struct sockaddr_in6 in6;
199 #endif
200                 struct sockaddr_un un;
201                 struct sockaddr_storage ss;
202         } u;
203 };
204
205 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
206                                        struct sockaddr *sa,
207                                        size_t sa_socklen,
208                                        struct tsocket_address **_addr,
209                                        const char *location)
210 {
211         struct tsocket_address *addr;
212         struct tsocket_address_bsd *bsda;
213
214         if (sa_socklen < sizeof(sa->sa_family)) {
215                 errno = EINVAL;
216                 return -1;
217         }
218
219         switch (sa->sa_family) {
220         case AF_UNIX:
221                 if (sa_socklen > sizeof(struct sockaddr_un)) {
222                         sa_socklen = sizeof(struct sockaddr_un);
223                 }
224                 break;
225         case AF_INET:
226                 if (sa_socklen < sizeof(struct sockaddr_in)) {
227                         errno = EINVAL;
228                         return -1;
229                 }
230                 sa_socklen = sizeof(struct sockaddr_in);
231                 break;
232 #ifdef HAVE_IPV6
233         case AF_INET6:
234                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
235                         errno = EINVAL;
236                         return -1;
237                 }
238                 sa_socklen = sizeof(struct sockaddr_in6);
239                 break;
240 #endif
241         default:
242                 errno = EAFNOSUPPORT;
243                 return -1;
244         }
245
246         if (sa_socklen > sizeof(struct sockaddr_storage)) {
247                 errno = EINVAL;
248                 return -1;
249         }
250
251         addr = tsocket_address_create(mem_ctx,
252                                       &tsocket_address_bsd_ops,
253                                       &bsda,
254                                       struct tsocket_address_bsd,
255                                       location);
256         if (!addr) {
257                 errno = ENOMEM;
258                 return -1;
259         }
260
261         ZERO_STRUCTP(bsda);
262
263         memcpy(&bsda->u.ss, sa, sa_socklen);
264
265         bsda->sa_socklen = sa_socklen;
266
267         *_addr = addr;
268         return 0;
269 }
270
271 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
272                                      struct sockaddr *sa,
273                                      size_t sa_socklen)
274 {
275         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
276                                            struct tsocket_address_bsd);
277
278         if (!bsda) {
279                 errno = EINVAL;
280                 return -1;
281         }
282
283         if (sa_socklen < bsda->sa_socklen) {
284                 errno = EINVAL;
285                 return -1;
286         }
287
288         if (sa_socklen > bsda->sa_socklen) {
289                 memset(sa, 0, sa_socklen);
290                 sa_socklen = bsda->sa_socklen;
291         }
292
293         memcpy(sa, &bsda->u.ss, sa_socklen);
294         return sa_socklen;
295 }
296
297 bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam)
298 {
299         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
300                                            struct tsocket_address_bsd);
301
302         if (!bsda) {
303                 return false;
304         }
305
306         switch (bsda->u.sa.sa_family) {
307         case AF_INET:
308                 if (strcasecmp(fam, "ip") == 0) {
309                         return true;
310                 }
311
312                 if (strcasecmp(fam, "ipv4") == 0) {
313                         return true;
314                 }
315
316                 return false;
317 #ifdef HAVE_IPV6
318         case AF_INET6:
319                 if (strcasecmp(fam, "ip") == 0) {
320                         return true;
321                 }
322
323                 if (strcasecmp(fam, "ipv6") == 0) {
324                         return true;
325                 }
326
327                 return false;
328 #endif
329         }
330
331         return false;
332 }
333
334 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
335                                        const char *fam,
336                                        const char *addr,
337                                        uint16_t port,
338                                        struct tsocket_address **_addr,
339                                        const char *location)
340 {
341         struct addrinfo hints;
342         struct addrinfo *result = NULL;
343         char port_str[6];
344         int ret;
345
346         ZERO_STRUCT(hints);
347         /*
348          * we use SOCKET_STREAM here to get just one result
349          * back from getaddrinfo().
350          */
351         hints.ai_socktype = SOCK_STREAM;
352         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
353
354         if (strcasecmp(fam, "ip") == 0) {
355                 hints.ai_family = AF_UNSPEC;
356                 if (!addr) {
357 #ifdef HAVE_IPV6
358                         addr = "::";
359 #else
360                         addr = "0.0.0.0";
361 #endif
362                 }
363         } else if (strcasecmp(fam, "ipv4") == 0) {
364                 hints.ai_family = AF_INET;
365                 if (!addr) {
366                         addr = "0.0.0.0";
367                 }
368 #ifdef HAVE_IPV6
369         } else if (strcasecmp(fam, "ipv6") == 0) {
370                 hints.ai_family = AF_INET6;
371                 if (!addr) {
372                         addr = "::";
373                 }
374 #endif
375         } else {
376                 errno = EAFNOSUPPORT;
377                 return -1;
378         }
379
380         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
381
382         ret = getaddrinfo(addr, port_str, &hints, &result);
383         if (ret != 0) {
384                 switch (ret) {
385                 case EAI_FAIL:
386                         errno = EINVAL;
387                         break;
388                 }
389                 ret = -1;
390                 goto done;
391         }
392
393         if (result->ai_socktype != SOCK_STREAM) {
394                 errno = EINVAL;
395                 ret = -1;
396                 goto done;
397         }
398
399         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
400                                                   result->ai_addr,
401                                                   result->ai_addrlen,
402                                                   _addr,
403                                                   location);
404
405 done:
406         if (result) {
407                 freeaddrinfo(result);
408         }
409         return ret;
410 }
411
412 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
413                                        TALLOC_CTX *mem_ctx)
414 {
415         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
416                                            struct tsocket_address_bsd);
417         char addr_str[INET6_ADDRSTRLEN+1];
418         const char *str;
419
420         if (!bsda) {
421                 errno = EINVAL;
422                 return NULL;
423         }
424
425         switch (bsda->u.sa.sa_family) {
426         case AF_INET:
427                 str = inet_ntop(bsda->u.in.sin_family,
428                                 &bsda->u.in.sin_addr,
429                                 addr_str, sizeof(addr_str));
430                 break;
431 #ifdef HAVE_IPV6
432         case AF_INET6:
433                 str = inet_ntop(bsda->u.in6.sin6_family,
434                                 &bsda->u.in6.sin6_addr,
435                                 addr_str, sizeof(addr_str));
436                 break;
437 #endif
438         default:
439                 errno = EINVAL;
440                 return NULL;
441         }
442
443         if (!str) {
444                 return NULL;
445         }
446
447         return talloc_strdup(mem_ctx, str);
448 }
449
450 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
451 {
452         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
453                                            struct tsocket_address_bsd);
454         uint16_t port = 0;
455
456         if (!bsda) {
457                 errno = EINVAL;
458                 return 0;
459         }
460
461         switch (bsda->u.sa.sa_family) {
462         case AF_INET:
463                 port = ntohs(bsda->u.in.sin_port);
464                 break;
465 #ifdef HAVE_IPV6
466         case AF_INET6:
467                 port = ntohs(bsda->u.in6.sin6_port);
468                 break;
469 #endif
470         default:
471                 errno = EINVAL;
472                 return 0;
473         }
474
475         return port;
476 }
477
478 int tsocket_address_inet_set_port(struct tsocket_address *addr,
479                                   uint16_t port)
480 {
481         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
482                                            struct tsocket_address_bsd);
483
484         if (!bsda) {
485                 errno = EINVAL;
486                 return -1;
487         }
488
489         switch (bsda->u.sa.sa_family) {
490         case AF_INET:
491                 bsda->u.in.sin_port = htons(port);
492                 break;
493 #ifdef HAVE_IPV6
494         case AF_INET6:
495                 bsda->u.in6.sin6_port = htons(port);
496                 break;
497 #endif
498         default:
499                 errno = EINVAL;
500                 return -1;
501         }
502
503         return 0;
504 }
505
506 bool tsocket_address_is_unix(const struct tsocket_address *addr)
507 {
508         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
509                                            struct tsocket_address_bsd);
510
511         if (!bsda) {
512                 return false;
513         }
514
515         switch (bsda->u.sa.sa_family) {
516         case AF_UNIX:
517                 return true;
518         }
519
520         return false;
521 }
522
523 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
524                                     const char *path,
525                                     struct tsocket_address **_addr,
526                                     const char *location)
527 {
528         struct sockaddr_un un;
529         void *p = &un;
530         int ret;
531
532         if (!path) {
533                 path = "";
534         }
535
536         if (strlen(path) > sizeof(un.sun_path)-1) {
537                 errno = ENAMETOOLONG;
538                 return -1;
539         }
540
541         ZERO_STRUCT(un);
542         un.sun_family = AF_UNIX;
543         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
544
545         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
546                                                  (struct sockaddr *)p,
547                                                  sizeof(un),
548                                                  _addr,
549                                                  location);
550
551         return ret;
552 }
553
554 char *tsocket_address_unix_path(const struct tsocket_address *addr,
555                                 TALLOC_CTX *mem_ctx)
556 {
557         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
558                                            struct tsocket_address_bsd);
559         const char *str;
560
561         if (!bsda) {
562                 errno = EINVAL;
563                 return NULL;
564         }
565
566         switch (bsda->u.sa.sa_family) {
567         case AF_UNIX:
568                 str = bsda->u.un.sun_path;
569                 break;
570         default:
571                 errno = EINVAL;
572                 return NULL;
573         }
574
575         return talloc_strdup(mem_ctx, str);
576 }
577
578 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
579                                         TALLOC_CTX *mem_ctx)
580 {
581         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
582                                            struct tsocket_address_bsd);
583         char *str;
584         char *addr_str;
585         const char *prefix = NULL;
586         uint16_t port;
587
588         switch (bsda->u.sa.sa_family) {
589         case AF_UNIX:
590                 return talloc_asprintf(mem_ctx, "unix:%s",
591                                        bsda->u.un.sun_path);
592         case AF_INET:
593                 prefix = "ipv4";
594                 break;
595 #ifdef HAVE_IPV6
596         case AF_INET6:
597                 prefix = "ipv6";
598                 break;
599 #endif
600         default:
601                 errno = EINVAL;
602                 return NULL;
603         }
604
605         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
606         if (!addr_str) {
607                 return NULL;
608         }
609
610         port = tsocket_address_inet_port(addr);
611
612         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
613                               prefix, addr_str, port);
614         talloc_free(addr_str);
615
616         return str;
617 }
618
619 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
620                                                          TALLOC_CTX *mem_ctx,
621                                                          const char *location)
622 {
623         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
624                                            struct tsocket_address_bsd);
625         struct tsocket_address *copy;
626         int ret;
627
628         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
629                                                  &bsda->u.sa,
630                                                  bsda->sa_socklen,
631                                                  &copy,
632                                                  location);
633         if (ret != 0) {
634                 return NULL;
635         }
636
637         return copy;
638 }
639
640 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
641         .name           = "bsd",
642         .string         = tsocket_address_bsd_string,
643         .copy           = tsocket_address_bsd_copy,
644 };
645
646 struct tdgram_bsd {
647         int fd;
648
649         void *event_ptr;
650         struct tevent_fd *fde;
651
652         void *readable_private;
653         void (*readable_handler)(void *private_data);
654         void *writeable_private;
655         void (*writeable_handler)(void *private_data);
656 };
657
658 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
659                                    struct tevent_fd *fde,
660                                    uint16_t flags,
661                                    void *private_data)
662 {
663         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
664                                   struct tdgram_bsd);
665
666         if (flags & TEVENT_FD_WRITE) {
667                 bsds->writeable_handler(bsds->writeable_private);
668                 return;
669         }
670         if (flags & TEVENT_FD_READ) {
671                 if (!bsds->readable_handler) {
672                         TEVENT_FD_NOT_READABLE(bsds->fde);
673                         return;
674                 }
675                 bsds->readable_handler(bsds->readable_private);
676                 return;
677         }
678 }
679
680 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
681                                            struct tevent_context *ev,
682                                            void (*handler)(void *private_data),
683                                            void *private_data)
684 {
685         if (ev == NULL) {
686                 if (handler) {
687                         errno = EINVAL;
688                         return -1;
689                 }
690                 if (!bsds->readable_handler) {
691                         return 0;
692                 }
693                 bsds->readable_handler = NULL;
694                 bsds->readable_private = NULL;
695
696                 return 0;
697         }
698
699         /* read and write must use the same tevent_context */
700         if (bsds->event_ptr != ev) {
701                 if (bsds->readable_handler || bsds->writeable_handler) {
702                         errno = EINVAL;
703                         return -1;
704                 }
705                 bsds->event_ptr = NULL;
706                 TALLOC_FREE(bsds->fde);
707         }
708
709         if (tevent_fd_get_flags(bsds->fde) == 0) {
710                 TALLOC_FREE(bsds->fde);
711
712                 bsds->fde = tevent_add_fd(ev, bsds,
713                                           bsds->fd, TEVENT_FD_READ,
714                                           tdgram_bsd_fde_handler,
715                                           bsds);
716                 if (!bsds->fde) {
717                         errno = ENOMEM;
718                         return -1;
719                 }
720
721                 /* cache the event context we're running on */
722                 bsds->event_ptr = ev;
723         } else if (!bsds->readable_handler) {
724                 TEVENT_FD_READABLE(bsds->fde);
725         }
726
727         bsds->readable_handler = handler;
728         bsds->readable_private = private_data;
729
730         return 0;
731 }
732
733 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
734                                             struct tevent_context *ev,
735                                             void (*handler)(void *private_data),
736                                             void *private_data)
737 {
738         if (ev == NULL) {
739                 if (handler) {
740                         errno = EINVAL;
741                         return -1;
742                 }
743                 if (!bsds->writeable_handler) {
744                         return 0;
745                 }
746                 bsds->writeable_handler = NULL;
747                 bsds->writeable_private = NULL;
748                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
749
750                 return 0;
751         }
752
753         /* read and write must use the same tevent_context */
754         if (bsds->event_ptr != ev) {
755                 if (bsds->readable_handler || bsds->writeable_handler) {
756                         errno = EINVAL;
757                         return -1;
758                 }
759                 bsds->event_ptr = NULL;
760                 TALLOC_FREE(bsds->fde);
761         }
762
763         if (tevent_fd_get_flags(bsds->fde) == 0) {
764                 TALLOC_FREE(bsds->fde);
765
766                 bsds->fde = tevent_add_fd(ev, bsds,
767                                           bsds->fd, TEVENT_FD_WRITE,
768                                           tdgram_bsd_fde_handler,
769                                           bsds);
770                 if (!bsds->fde) {
771                         errno = ENOMEM;
772                         return -1;
773                 }
774
775                 /* cache the event context we're running on */
776                 bsds->event_ptr = ev;
777         } else if (!bsds->writeable_handler) {
778                 TEVENT_FD_WRITEABLE(bsds->fde);
779         }
780
781         bsds->writeable_handler = handler;
782         bsds->writeable_private = private_data;
783
784         return 0;
785 }
786
787 struct tdgram_bsd_recvfrom_state {
788         struct tdgram_context *dgram;
789
790         uint8_t *buf;
791         size_t len;
792         struct tsocket_address *src;
793 };
794
795 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
796 {
797         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
798                                   struct tdgram_bsd);
799
800         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
801
802         return 0;
803 }
804
805 static void tdgram_bsd_recvfrom_handler(void *private_data);
806
807 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
808                                         struct tevent_context *ev,
809                                         struct tdgram_context *dgram)
810 {
811         struct tevent_req *req;
812         struct tdgram_bsd_recvfrom_state *state;
813         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
814         int ret;
815
816         req = tevent_req_create(mem_ctx, &state,
817                                 struct tdgram_bsd_recvfrom_state);
818         if (!req) {
819                 return NULL;
820         }
821
822         state->dgram    = dgram;
823         state->buf      = NULL;
824         state->len      = 0;
825         state->src      = NULL;
826
827         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
828
829         if (bsds->fd == -1) {
830                 tevent_req_error(req, ENOTCONN);
831                 goto post;
832         }
833
834         /*
835          * this is a fast path, not waiting for the
836          * socket to become explicit readable gains
837          * about 10%-20% performance in benchmark tests.
838          */
839         tdgram_bsd_recvfrom_handler(req);
840         if (!tevent_req_is_in_progress(req)) {
841                 goto post;
842         }
843
844         ret = tdgram_bsd_set_readable_handler(bsds, ev,
845                                               tdgram_bsd_recvfrom_handler,
846                                               req);
847         if (ret == -1) {
848                 tevent_req_error(req, errno);
849                 goto post;
850         }
851
852         return req;
853
854  post:
855         tevent_req_post(req, ev);
856         return req;
857 }
858
859 static void tdgram_bsd_recvfrom_handler(void *private_data)
860 {
861         struct tevent_req *req = talloc_get_type_abort(private_data,
862                                  struct tevent_req);
863         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
864                                         struct tdgram_bsd_recvfrom_state);
865         struct tdgram_context *dgram = state->dgram;
866         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
867         struct tsocket_address_bsd *bsda;
868         ssize_t ret;
869         int err;
870         bool retry;
871
872         ret = tsocket_bsd_pending(bsds->fd);
873         if (ret == 0) {
874                 /* retry later */
875                 return;
876         }
877         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
878         if (retry) {
879                 /* retry later */
880                 return;
881         }
882         if (tevent_req_error(req, err)) {
883                 return;
884         }
885
886         state->buf = talloc_array(state, uint8_t, ret);
887         if (tevent_req_nomem(state->buf, req)) {
888                 return;
889         }
890         state->len = ret;
891
892         state->src = tsocket_address_create(state,
893                                             &tsocket_address_bsd_ops,
894                                             &bsda,
895                                             struct tsocket_address_bsd,
896                                             __location__ "bsd_recvfrom");
897         if (tevent_req_nomem(state->src, req)) {
898                 return;
899         }
900
901         ZERO_STRUCTP(bsda);
902         bsda->sa_socklen = sizeof(bsda->u.ss);
903
904         ret = recvfrom(bsds->fd, state->buf, state->len, 0,
905                        &bsda->u.sa, &bsda->sa_socklen);
906         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
907         if (retry) {
908                 /* retry later */
909                 return;
910         }
911         if (tevent_req_error(req, err)) {
912                 return;
913         }
914
915         /*
916          * Some systems (FreeBSD, see bug #7115) return too much
917          * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
918          * the return value includes some IP/UDP header bytes,
919          * while recvfrom() just returns the payload.
920          */
921         state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
922         if (tevent_req_nomem(state->buf, req)) {
923                 return;
924         }
925         state->len = ret;
926
927         tevent_req_done(req);
928 }
929
930 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
931                                         int *perrno,
932                                         TALLOC_CTX *mem_ctx,
933                                         uint8_t **buf,
934                                         struct tsocket_address **src)
935 {
936         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
937                                         struct tdgram_bsd_recvfrom_state);
938         ssize_t ret;
939
940         ret = tsocket_simple_int_recv(req, perrno);
941         if (ret == 0) {
942                 *buf = talloc_move(mem_ctx, &state->buf);
943                 ret = state->len;
944                 if (src) {
945                         *src = talloc_move(mem_ctx, &state->src);
946                 }
947         }
948
949         tevent_req_received(req);
950         return ret;
951 }
952
953 struct tdgram_bsd_sendto_state {
954         struct tdgram_context *dgram;
955
956         const uint8_t *buf;
957         size_t len;
958         const struct tsocket_address *dst;
959
960         ssize_t ret;
961 };
962
963 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
964 {
965         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
966                                   struct tdgram_bsd);
967
968         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
969
970         return 0;
971 }
972
973 static void tdgram_bsd_sendto_handler(void *private_data);
974
975 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
976                                                  struct tevent_context *ev,
977                                                  struct tdgram_context *dgram,
978                                                  const uint8_t *buf,
979                                                  size_t len,
980                                                  const struct tsocket_address *dst)
981 {
982         struct tevent_req *req;
983         struct tdgram_bsd_sendto_state *state;
984         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
985         int ret;
986
987         req = tevent_req_create(mem_ctx, &state,
988                                 struct tdgram_bsd_sendto_state);
989         if (!req) {
990                 return NULL;
991         }
992
993         state->dgram    = dgram;
994         state->buf      = buf;
995         state->len      = len;
996         state->dst      = dst;
997         state->ret      = -1;
998
999         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
1000
1001         if (bsds->fd == -1) {
1002                 tevent_req_error(req, ENOTCONN);
1003                 goto post;
1004         }
1005
1006         /*
1007          * this is a fast path, not waiting for the
1008          * socket to become explicit writeable gains
1009          * about 10%-20% performance in benchmark tests.
1010          */
1011         tdgram_bsd_sendto_handler(req);
1012         if (!tevent_req_is_in_progress(req)) {
1013                 goto post;
1014         }
1015
1016         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
1017                                                tdgram_bsd_sendto_handler,
1018                                                req);
1019         if (ret == -1) {
1020                 tevent_req_error(req, errno);
1021                 goto post;
1022         }
1023
1024         return req;
1025
1026  post:
1027         tevent_req_post(req, ev);
1028         return req;
1029 }
1030
1031 static void tdgram_bsd_sendto_handler(void *private_data)
1032 {
1033         struct tevent_req *req = talloc_get_type_abort(private_data,
1034                                  struct tevent_req);
1035         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1036                                         struct tdgram_bsd_sendto_state);
1037         struct tdgram_context *dgram = state->dgram;
1038         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1039         struct sockaddr *sa = NULL;
1040         socklen_t sa_socklen = 0;
1041         ssize_t ret;
1042         int err;
1043         bool retry;
1044
1045         if (state->dst) {
1046                 struct tsocket_address_bsd *bsda =
1047                         talloc_get_type(state->dst->private_data,
1048                         struct tsocket_address_bsd);
1049
1050                 sa = &bsda->u.sa;
1051                 sa_socklen = bsda->sa_socklen;
1052         }
1053
1054         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1055         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1056         if (retry) {
1057                 /* retry later */
1058                 return;
1059         }
1060         if (tevent_req_error(req, err)) {
1061                 return;
1062         }
1063
1064         state->ret = ret;
1065
1066         tevent_req_done(req);
1067 }
1068
1069 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1070 {
1071         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1072                                         struct tdgram_bsd_sendto_state);
1073         ssize_t ret;
1074
1075         ret = tsocket_simple_int_recv(req, perrno);
1076         if (ret == 0) {
1077                 ret = state->ret;
1078         }
1079
1080         tevent_req_received(req);
1081         return ret;
1082 }
1083
1084 struct tdgram_bsd_disconnect_state {
1085         uint8_t __dummy;
1086 };
1087
1088 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1089                                                      struct tevent_context *ev,
1090                                                      struct tdgram_context *dgram)
1091 {
1092         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1093         struct tevent_req *req;
1094         struct tdgram_bsd_disconnect_state *state;
1095         int ret;
1096         int err;
1097         bool dummy;
1098
1099         req = tevent_req_create(mem_ctx, &state,
1100                                 struct tdgram_bsd_disconnect_state);
1101         if (req == NULL) {
1102                 return NULL;
1103         }
1104
1105         if (bsds->fd == -1) {
1106                 tevent_req_error(req, ENOTCONN);
1107                 goto post;
1108         }
1109
1110         TALLOC_FREE(bsds->fde);
1111         ret = close(bsds->fd);
1112         bsds->fd = -1;
1113         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1114         if (tevent_req_error(req, err)) {
1115                 goto post;
1116         }
1117
1118         tevent_req_done(req);
1119 post:
1120         tevent_req_post(req, ev);
1121         return req;
1122 }
1123
1124 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1125                                       int *perrno)
1126 {
1127         int ret;
1128
1129         ret = tsocket_simple_int_recv(req, perrno);
1130
1131         tevent_req_received(req);
1132         return ret;
1133 }
1134
1135 static const struct tdgram_context_ops tdgram_bsd_ops = {
1136         .name                   = "bsd",
1137
1138         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1139         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1140
1141         .sendto_send            = tdgram_bsd_sendto_send,
1142         .sendto_recv            = tdgram_bsd_sendto_recv,
1143
1144         .disconnect_send        = tdgram_bsd_disconnect_send,
1145         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1146 };
1147
1148 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1149 {
1150         TALLOC_FREE(bsds->fde);
1151         if (bsds->fd != -1) {
1152                 close(bsds->fd);
1153                 bsds->fd = -1;
1154         }
1155         return 0;
1156 }
1157
1158 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1159                                    const struct tsocket_address *remote,
1160                                    bool broadcast,
1161                                    TALLOC_CTX *mem_ctx,
1162                                    struct tdgram_context **_dgram,
1163                                    const char *location)
1164 {
1165         struct tsocket_address_bsd *lbsda =
1166                 talloc_get_type_abort(local->private_data,
1167                 struct tsocket_address_bsd);
1168         struct tsocket_address_bsd *rbsda = NULL;
1169         struct tdgram_context *dgram;
1170         struct tdgram_bsd *bsds;
1171         int fd;
1172         int ret;
1173         bool do_bind = false;
1174         bool do_reuseaddr = false;
1175         bool do_ipv6only = false;
1176         bool is_inet = false;
1177         int sa_fam = lbsda->u.sa.sa_family;
1178
1179         if (remote) {
1180                 rbsda = talloc_get_type_abort(remote->private_data,
1181                         struct tsocket_address_bsd);
1182         }
1183
1184         switch (lbsda->u.sa.sa_family) {
1185         case AF_UNIX:
1186                 if (broadcast) {
1187                         errno = EINVAL;
1188                         return -1;
1189                 }
1190                 if (lbsda->u.un.sun_path[0] != 0) {
1191                         do_reuseaddr = true;
1192                         do_bind = true;
1193                 }
1194                 break;
1195         case AF_INET:
1196                 if (lbsda->u.in.sin_port != 0) {
1197                         do_reuseaddr = true;
1198                         do_bind = true;
1199                 }
1200                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1201                         do_bind = true;
1202                 }
1203                 is_inet = true;
1204                 break;
1205 #ifdef HAVE_IPV6
1206         case AF_INET6:
1207                 if (lbsda->u.in6.sin6_port != 0) {
1208                         do_reuseaddr = true;
1209                         do_bind = true;
1210                 }
1211                 if (memcmp(&in6addr_any,
1212                            &lbsda->u.in6.sin6_addr,
1213                            sizeof(in6addr_any)) != 0) {
1214                         do_bind = true;
1215                 }
1216                 is_inet = true;
1217                 do_ipv6only = true;
1218                 break;
1219 #endif
1220         default:
1221                 errno = EINVAL;
1222                 return -1;
1223         }
1224
1225         if (!do_bind && is_inet && rbsda) {
1226                 sa_fam = rbsda->u.sa.sa_family;
1227                 switch (sa_fam) {
1228                 case AF_INET:
1229                         do_ipv6only = false;
1230                         break;
1231 #ifdef HAVE_IPV6
1232                 case AF_INET6:
1233                         do_ipv6only = true;
1234                         break;
1235 #endif
1236                 }
1237         }
1238
1239         fd = socket(sa_fam, SOCK_DGRAM, 0);
1240         if (fd < 0) {
1241                 return -1;
1242         }
1243
1244         fd = tsocket_bsd_common_prepare_fd(fd, true);
1245         if (fd < 0) {
1246                 return -1;
1247         }
1248
1249         dgram = tdgram_context_create(mem_ctx,
1250                                       &tdgram_bsd_ops,
1251                                       &bsds,
1252                                       struct tdgram_bsd,
1253                                       location);
1254         if (!dgram) {
1255                 int saved_errno = errno;
1256                 close(fd);
1257                 errno = saved_errno;
1258                 return -1;
1259         }
1260         ZERO_STRUCTP(bsds);
1261         bsds->fd = fd;
1262         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1263
1264 #ifdef HAVE_IPV6
1265         if (do_ipv6only) {
1266                 int val = 1;
1267
1268                 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
1269                                  (const void *)&val, sizeof(val));
1270                 if (ret == -1) {
1271                         int saved_errno = errno;
1272                         talloc_free(dgram);
1273                         errno = saved_errno;
1274                         return -1;
1275                 }
1276         }
1277 #endif
1278
1279         if (broadcast) {
1280                 int val = 1;
1281
1282                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1283                                  (const void *)&val, sizeof(val));
1284                 if (ret == -1) {
1285                         int saved_errno = errno;
1286                         talloc_free(dgram);
1287                         errno = saved_errno;
1288                         return -1;
1289                 }
1290         }
1291
1292         if (do_reuseaddr) {
1293                 int val = 1;
1294
1295                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1296                                  (const void *)&val, sizeof(val));
1297                 if (ret == -1) {
1298                         int saved_errno = errno;
1299                         talloc_free(dgram);
1300                         errno = saved_errno;
1301                         return -1;
1302                 }
1303         }
1304
1305         if (do_bind) {
1306                 ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
1307                 if (ret == -1) {
1308                         int saved_errno = errno;
1309                         talloc_free(dgram);
1310                         errno = saved_errno;
1311                         return -1;
1312                 }
1313         }
1314
1315         if (rbsda) {
1316                 if (rbsda->u.sa.sa_family != sa_fam) {
1317                         talloc_free(dgram);
1318                         errno = EINVAL;
1319                         return -1;
1320                 }
1321
1322                 ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
1323                 if (ret == -1) {
1324                         int saved_errno = errno;
1325                         talloc_free(dgram);
1326                         errno = saved_errno;
1327                         return -1;
1328                 }
1329         }
1330
1331         *_dgram = dgram;
1332         return 0;
1333 }
1334
1335 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1336                             const struct tsocket_address *remote,
1337                             TALLOC_CTX *mem_ctx,
1338                             struct tdgram_context **dgram,
1339                             const char *location)
1340 {
1341         struct tsocket_address_bsd *lbsda =
1342                 talloc_get_type_abort(local->private_data,
1343                 struct tsocket_address_bsd);
1344         int ret;
1345
1346         switch (lbsda->u.sa.sa_family) {
1347         case AF_INET:
1348                 break;
1349 #ifdef HAVE_IPV6
1350         case AF_INET6:
1351                 break;
1352 #endif
1353         default:
1354                 errno = EINVAL;
1355                 return -1;
1356         }
1357
1358         ret = tdgram_bsd_dgram_socket(local, remote, false,
1359                                       mem_ctx, dgram, location);
1360
1361         return ret;
1362 }
1363
1364 int _tdgram_unix_socket(const struct tsocket_address *local,
1365                         const struct tsocket_address *remote,
1366                         TALLOC_CTX *mem_ctx,
1367                         struct tdgram_context **dgram,
1368                         const char *location)
1369 {
1370         struct tsocket_address_bsd *lbsda =
1371                 talloc_get_type_abort(local->private_data,
1372                 struct tsocket_address_bsd);
1373         int ret;
1374
1375         switch (lbsda->u.sa.sa_family) {
1376         case AF_UNIX:
1377                 break;
1378         default:
1379                 errno = EINVAL;
1380                 return -1;
1381         }
1382
1383         ret = tdgram_bsd_dgram_socket(local, remote, false,
1384                                       mem_ctx, dgram, location);
1385
1386         return ret;
1387 }
1388
1389 struct tstream_bsd {
1390         int fd;
1391
1392         void *event_ptr;
1393         struct tevent_fd *fde;
1394
1395         void *readable_private;
1396         void (*readable_handler)(void *private_data);
1397         void *writeable_private;
1398         void (*writeable_handler)(void *private_data);
1399 };
1400
1401 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1402                                     struct tevent_fd *fde,
1403                                     uint16_t flags,
1404                                     void *private_data)
1405 {
1406         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1407                                    struct tstream_bsd);
1408
1409         if (flags & TEVENT_FD_WRITE) {
1410                 bsds->writeable_handler(bsds->writeable_private);
1411                 return;
1412         }
1413         if (flags & TEVENT_FD_READ) {
1414                 if (!bsds->readable_handler) {
1415                         if (bsds->writeable_handler) {
1416                                 bsds->writeable_handler(bsds->writeable_private);
1417                                 return;
1418                         }
1419                         TEVENT_FD_NOT_READABLE(bsds->fde);
1420                         return;
1421                 }
1422                 bsds->readable_handler(bsds->readable_private);
1423                 return;
1424         }
1425 }
1426
1427 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1428                                             struct tevent_context *ev,
1429                                             void (*handler)(void *private_data),
1430                                             void *private_data)
1431 {
1432         if (ev == NULL) {
1433                 if (handler) {
1434                         errno = EINVAL;
1435                         return -1;
1436                 }
1437                 if (!bsds->readable_handler) {
1438                         return 0;
1439                 }
1440                 bsds->readable_handler = NULL;
1441                 bsds->readable_private = NULL;
1442
1443                 return 0;
1444         }
1445
1446         /* read and write must use the same tevent_context */
1447         if (bsds->event_ptr != ev) {
1448                 if (bsds->readable_handler || bsds->writeable_handler) {
1449                         errno = EINVAL;
1450                         return -1;
1451                 }
1452                 bsds->event_ptr = NULL;
1453                 TALLOC_FREE(bsds->fde);
1454         }
1455
1456         if (tevent_fd_get_flags(bsds->fde) == 0) {
1457                 TALLOC_FREE(bsds->fde);
1458
1459                 bsds->fde = tevent_add_fd(ev, bsds,
1460                                           bsds->fd, TEVENT_FD_READ,
1461                                           tstream_bsd_fde_handler,
1462                                           bsds);
1463                 if (!bsds->fde) {
1464                         errno = ENOMEM;
1465                         return -1;
1466                 }
1467
1468                 /* cache the event context we're running on */
1469                 bsds->event_ptr = ev;
1470         } else if (!bsds->readable_handler) {
1471                 TEVENT_FD_READABLE(bsds->fde);
1472         }
1473
1474         bsds->readable_handler = handler;
1475         bsds->readable_private = private_data;
1476
1477         return 0;
1478 }
1479
1480 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1481                                              struct tevent_context *ev,
1482                                              void (*handler)(void *private_data),
1483                                              void *private_data)
1484 {
1485         if (ev == NULL) {
1486                 if (handler) {
1487                         errno = EINVAL;
1488                         return -1;
1489                 }
1490                 if (!bsds->writeable_handler) {
1491                         return 0;
1492                 }
1493                 bsds->writeable_handler = NULL;
1494                 bsds->writeable_private = NULL;
1495                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1496
1497                 return 0;
1498         }
1499
1500         /* read and write must use the same tevent_context */
1501         if (bsds->event_ptr != ev) {
1502                 if (bsds->readable_handler || bsds->writeable_handler) {
1503                         errno = EINVAL;
1504                         return -1;
1505                 }
1506                 bsds->event_ptr = NULL;
1507                 TALLOC_FREE(bsds->fde);
1508         }
1509
1510         if (tevent_fd_get_flags(bsds->fde) == 0) {
1511                 TALLOC_FREE(bsds->fde);
1512
1513                 bsds->fde = tevent_add_fd(ev, bsds,
1514                                           bsds->fd,
1515                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1516                                           tstream_bsd_fde_handler,
1517                                           bsds);
1518                 if (!bsds->fde) {
1519                         errno = ENOMEM;
1520                         return -1;
1521                 }
1522
1523                 /* cache the event context we're running on */
1524                 bsds->event_ptr = ev;
1525         } else if (!bsds->writeable_handler) {
1526                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1527                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1528                 tevent_fd_set_flags(bsds->fde, flags);
1529         }
1530
1531         bsds->writeable_handler = handler;
1532         bsds->writeable_private = private_data;
1533
1534         return 0;
1535 }
1536
1537 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1538 {
1539         struct tstream_bsd *bsds = tstream_context_data(stream,
1540                                    struct tstream_bsd);
1541         ssize_t ret;
1542
1543         if (bsds->fd == -1) {
1544                 errno = ENOTCONN;
1545                 return -1;
1546         }
1547
1548         ret = tsocket_bsd_pending(bsds->fd);
1549
1550         return ret;
1551 }
1552
1553 struct tstream_bsd_readv_state {
1554         struct tstream_context *stream;
1555
1556         struct iovec *vector;
1557         size_t count;
1558
1559         int ret;
1560 };
1561
1562 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1563 {
1564         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1565                                    struct tstream_bsd);
1566
1567         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1568
1569         return 0;
1570 }
1571
1572 static void tstream_bsd_readv_handler(void *private_data);
1573
1574 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1575                                         struct tevent_context *ev,
1576                                         struct tstream_context *stream,
1577                                         struct iovec *vector,
1578                                         size_t count)
1579 {
1580         struct tevent_req *req;
1581         struct tstream_bsd_readv_state *state;
1582         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1583         int ret;
1584
1585         req = tevent_req_create(mem_ctx, &state,
1586                                 struct tstream_bsd_readv_state);
1587         if (!req) {
1588                 return NULL;
1589         }
1590
1591         state->stream   = stream;
1592         /* we make a copy of the vector so that we can modify it */
1593         state->vector   = talloc_array(state, struct iovec, count);
1594         if (tevent_req_nomem(state->vector, req)) {
1595                 goto post;
1596         }
1597         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1598         state->count    = count;
1599         state->ret      = 0;
1600
1601         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1602
1603         if (bsds->fd == -1) {
1604                 tevent_req_error(req, ENOTCONN);
1605                 goto post;
1606         }
1607
1608         /*
1609          * this is a fast path, not waiting for the
1610          * socket to become explicit readable gains
1611          * about 10%-20% performance in benchmark tests.
1612          */
1613         tstream_bsd_readv_handler(req);
1614         if (!tevent_req_is_in_progress(req)) {
1615                 goto post;
1616         }
1617
1618         ret = tstream_bsd_set_readable_handler(bsds, ev,
1619                                               tstream_bsd_readv_handler,
1620                                               req);
1621         if (ret == -1) {
1622                 tevent_req_error(req, errno);
1623                 goto post;
1624         }
1625
1626         return req;
1627
1628  post:
1629         tevent_req_post(req, ev);
1630         return req;
1631 }
1632
1633 static void tstream_bsd_readv_handler(void *private_data)
1634 {
1635         struct tevent_req *req = talloc_get_type_abort(private_data,
1636                                  struct tevent_req);
1637         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1638                                         struct tstream_bsd_readv_state);
1639         struct tstream_context *stream = state->stream;
1640         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1641         int ret;
1642         int err;
1643         bool retry;
1644
1645         ret = readv(bsds->fd, state->vector, state->count);
1646         if (ret == 0) {
1647                 /* propagate end of file */
1648                 tevent_req_error(req, EPIPE);
1649                 return;
1650         }
1651         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1652         if (retry) {
1653                 /* retry later */
1654                 return;
1655         }
1656         if (tevent_req_error(req, err)) {
1657                 return;
1658         }
1659
1660         state->ret += ret;
1661
1662         while (ret > 0) {
1663                 if (ret < state->vector[0].iov_len) {
1664                         uint8_t *base;
1665                         base = (uint8_t *)state->vector[0].iov_base;
1666                         base += ret;
1667                         state->vector[0].iov_base = (void *)base;
1668                         state->vector[0].iov_len -= ret;
1669                         break;
1670                 }
1671                 ret -= state->vector[0].iov_len;
1672                 state->vector += 1;
1673                 state->count -= 1;
1674         }
1675
1676         /*
1677          * there're maybe some empty vectors at the end
1678          * which we need to skip, otherwise we would get
1679          * ret == 0 from the readv() call and return EPIPE
1680          */
1681         while (state->count > 0) {
1682                 if (state->vector[0].iov_len > 0) {
1683                         break;
1684                 }
1685                 state->vector += 1;
1686                 state->count -= 1;
1687         }
1688
1689         if (state->count > 0) {
1690                 /* we have more to read */
1691                 return;
1692         }
1693
1694         tevent_req_done(req);
1695 }
1696
1697 static int tstream_bsd_readv_recv(struct tevent_req *req,
1698                                   int *perrno)
1699 {
1700         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1701                                         struct tstream_bsd_readv_state);
1702         int ret;
1703
1704         ret = tsocket_simple_int_recv(req, perrno);
1705         if (ret == 0) {
1706                 ret = state->ret;
1707         }
1708
1709         tevent_req_received(req);
1710         return ret;
1711 }
1712
1713 struct tstream_bsd_writev_state {
1714         struct tstream_context *stream;
1715
1716         struct iovec *vector;
1717         size_t count;
1718
1719         int ret;
1720 };
1721
1722 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1723 {
1724         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1725                                   struct tstream_bsd);
1726
1727         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1728
1729         return 0;
1730 }
1731
1732 static void tstream_bsd_writev_handler(void *private_data);
1733
1734 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1735                                                  struct tevent_context *ev,
1736                                                  struct tstream_context *stream,
1737                                                  const struct iovec *vector,
1738                                                  size_t count)
1739 {
1740         struct tevent_req *req;
1741         struct tstream_bsd_writev_state *state;
1742         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1743         int ret;
1744
1745         req = tevent_req_create(mem_ctx, &state,
1746                                 struct tstream_bsd_writev_state);
1747         if (!req) {
1748                 return NULL;
1749         }
1750
1751         state->stream   = stream;
1752         /* we make a copy of the vector so that we can modify it */
1753         state->vector   = talloc_array(state, struct iovec, count);
1754         if (tevent_req_nomem(state->vector, req)) {
1755                 goto post;
1756         }
1757         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1758         state->count    = count;
1759         state->ret      = 0;
1760
1761         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1762
1763         if (bsds->fd == -1) {
1764                 tevent_req_error(req, ENOTCONN);
1765                 goto post;
1766         }
1767
1768         /*
1769          * this is a fast path, not waiting for the
1770          * socket to become explicit writeable gains
1771          * about 10%-20% performance in benchmark tests.
1772          */
1773         tstream_bsd_writev_handler(req);
1774         if (!tevent_req_is_in_progress(req)) {
1775                 goto post;
1776         }
1777
1778         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1779                                                tstream_bsd_writev_handler,
1780                                                req);
1781         if (ret == -1) {
1782                 tevent_req_error(req, errno);
1783                 goto post;
1784         }
1785
1786         return req;
1787
1788  post:
1789         tevent_req_post(req, ev);
1790         return req;
1791 }
1792
1793 static void tstream_bsd_writev_handler(void *private_data)
1794 {
1795         struct tevent_req *req = talloc_get_type_abort(private_data,
1796                                  struct tevent_req);
1797         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1798                                         struct tstream_bsd_writev_state);
1799         struct tstream_context *stream = state->stream;
1800         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1801         ssize_t ret;
1802         int err;
1803         bool retry;
1804
1805         ret = writev(bsds->fd, state->vector, state->count);
1806         if (ret == 0) {
1807                 /* propagate end of file */
1808                 tevent_req_error(req, EPIPE);
1809                 return;
1810         }
1811         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1812         if (retry) {
1813                 /* retry later */
1814                 return;
1815         }
1816         if (tevent_req_error(req, err)) {
1817                 return;
1818         }
1819
1820         state->ret += ret;
1821
1822         while (ret > 0) {
1823                 if (ret < state->vector[0].iov_len) {
1824                         uint8_t *base;
1825                         base = (uint8_t *)state->vector[0].iov_base;
1826                         base += ret;
1827                         state->vector[0].iov_base = (void *)base;
1828                         state->vector[0].iov_len -= ret;
1829                         break;
1830                 }
1831                 ret -= state->vector[0].iov_len;
1832                 state->vector += 1;
1833                 state->count -= 1;
1834         }
1835
1836         /*
1837          * there're maybe some empty vectors at the end
1838          * which we need to skip, otherwise we would get
1839          * ret == 0 from the writev() call and return EPIPE
1840          */
1841         while (state->count > 0) {
1842                 if (state->vector[0].iov_len > 0) {
1843                         break;
1844                 }
1845                 state->vector += 1;
1846                 state->count -= 1;
1847         }
1848
1849         if (state->count > 0) {
1850                 /* we have more to read */
1851                 return;
1852         }
1853
1854         tevent_req_done(req);
1855 }
1856
1857 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1858 {
1859         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1860                                         struct tstream_bsd_writev_state);
1861         int ret;
1862
1863         ret = tsocket_simple_int_recv(req, perrno);
1864         if (ret == 0) {
1865                 ret = state->ret;
1866         }
1867
1868         tevent_req_received(req);
1869         return ret;
1870 }
1871
1872 struct tstream_bsd_disconnect_state {
1873         void *__dummy;
1874 };
1875
1876 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1877                                                      struct tevent_context *ev,
1878                                                      struct tstream_context *stream)
1879 {
1880         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1881         struct tevent_req *req;
1882         struct tstream_bsd_disconnect_state *state;
1883         int ret;
1884         int err;
1885         bool dummy;
1886
1887         req = tevent_req_create(mem_ctx, &state,
1888                                 struct tstream_bsd_disconnect_state);
1889         if (req == NULL) {
1890                 return NULL;
1891         }
1892
1893         if (bsds->fd == -1) {
1894                 tevent_req_error(req, ENOTCONN);
1895                 goto post;
1896         }
1897
1898         TALLOC_FREE(bsds->fde);
1899         ret = close(bsds->fd);
1900         bsds->fd = -1;
1901         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1902         if (tevent_req_error(req, err)) {
1903                 goto post;
1904         }
1905
1906         tevent_req_done(req);
1907 post:
1908         tevent_req_post(req, ev);
1909         return req;
1910 }
1911
1912 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1913                                       int *perrno)
1914 {
1915         int ret;
1916
1917         ret = tsocket_simple_int_recv(req, perrno);
1918
1919         tevent_req_received(req);
1920         return ret;
1921 }
1922
1923 static const struct tstream_context_ops tstream_bsd_ops = {
1924         .name                   = "bsd",
1925
1926         .pending_bytes          = tstream_bsd_pending_bytes,
1927
1928         .readv_send             = tstream_bsd_readv_send,
1929         .readv_recv             = tstream_bsd_readv_recv,
1930
1931         .writev_send            = tstream_bsd_writev_send,
1932         .writev_recv            = tstream_bsd_writev_recv,
1933
1934         .disconnect_send        = tstream_bsd_disconnect_send,
1935         .disconnect_recv        = tstream_bsd_disconnect_recv,
1936 };
1937
1938 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1939 {
1940         TALLOC_FREE(bsds->fde);
1941         if (bsds->fd != -1) {
1942                 close(bsds->fd);
1943                 bsds->fd = -1;
1944         }
1945         return 0;
1946 }
1947
1948 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1949                                  int fd,
1950                                  struct tstream_context **_stream,
1951                                  const char *location)
1952 {
1953         struct tstream_context *stream;
1954         struct tstream_bsd *bsds;
1955
1956         stream = tstream_context_create(mem_ctx,
1957                                         &tstream_bsd_ops,
1958                                         &bsds,
1959                                         struct tstream_bsd,
1960                                         location);
1961         if (!stream) {
1962                 return -1;
1963         }
1964         ZERO_STRUCTP(bsds);
1965         bsds->fd = fd;
1966         talloc_set_destructor(bsds, tstream_bsd_destructor);
1967
1968         *_stream = stream;
1969         return 0;
1970 }
1971
1972 struct tstream_bsd_connect_state {
1973         int fd;
1974         struct tevent_fd *fde;
1975         struct tstream_conext *stream;
1976 };
1977
1978 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1979 {
1980         TALLOC_FREE(state->fde);
1981         if (state->fd != -1) {
1982                 close(state->fd);
1983                 state->fd = -1;
1984         }
1985
1986         return 0;
1987 }
1988
1989 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1990                                             struct tevent_fd *fde,
1991                                             uint16_t flags,
1992                                             void *private_data);
1993
1994 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1995                                         struct tevent_context *ev,
1996                                         int sys_errno,
1997                                         const struct tsocket_address *local,
1998                                         const struct tsocket_address *remote)
1999 {
2000         struct tevent_req *req;
2001         struct tstream_bsd_connect_state *state;
2002         struct tsocket_address_bsd *lbsda =
2003                 talloc_get_type_abort(local->private_data,
2004                 struct tsocket_address_bsd);
2005         struct tsocket_address_bsd *rbsda =
2006                 talloc_get_type_abort(remote->private_data,
2007                 struct tsocket_address_bsd);
2008         int ret;
2009         int err;
2010         bool retry;
2011         bool do_bind = false;
2012         bool do_reuseaddr = false;
2013         bool do_ipv6only = false;
2014         bool is_inet = false;
2015         int sa_fam = lbsda->u.sa.sa_family;
2016
2017         req = tevent_req_create(mem_ctx, &state,
2018                                 struct tstream_bsd_connect_state);
2019         if (!req) {
2020                 return NULL;
2021         }
2022         state->fd = -1;
2023         state->fde = NULL;
2024
2025         talloc_set_destructor(state, tstream_bsd_connect_destructor);
2026
2027         /* give the wrappers a chance to report an error */
2028         if (sys_errno != 0) {
2029                 tevent_req_error(req, sys_errno);
2030                 goto post;
2031         }
2032
2033         switch (lbsda->u.sa.sa_family) {
2034         case AF_UNIX:
2035                 if (lbsda->u.un.sun_path[0] != 0) {
2036                         do_reuseaddr = true;
2037                         do_bind = true;
2038                 }
2039                 break;
2040         case AF_INET:
2041                 if (lbsda->u.in.sin_port != 0) {
2042                         do_reuseaddr = true;
2043                         do_bind = true;
2044                 }
2045                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
2046                         do_bind = true;
2047                 }
2048                 is_inet = true;
2049                 break;
2050 #ifdef HAVE_IPV6
2051         case AF_INET6:
2052                 if (lbsda->u.in6.sin6_port != 0) {
2053                         do_reuseaddr = true;
2054                         do_bind = true;
2055                 }
2056                 if (memcmp(&in6addr_any,
2057                            &lbsda->u.in6.sin6_addr,
2058                            sizeof(in6addr_any)) != 0) {
2059                         do_bind = true;
2060                 }
2061                 is_inet = true;
2062                 do_ipv6only = true;
2063                 break;
2064 #endif
2065         default:
2066                 tevent_req_error(req, EINVAL);
2067                 goto post;
2068         }
2069
2070         if (!do_bind && is_inet) {
2071                 sa_fam = rbsda->u.sa.sa_family;
2072                 switch (sa_fam) {
2073                 case AF_INET:
2074                         do_ipv6only = false;
2075                         break;
2076 #ifdef HAVE_IPV6
2077                 case AF_INET6:
2078                         do_ipv6only = true;
2079                         break;
2080 #endif
2081                 }
2082         }
2083
2084         state->fd = socket(sa_fam, SOCK_STREAM, 0);
2085         if (state->fd == -1) {
2086                 tevent_req_error(req, errno);
2087                 goto post;
2088         }
2089
2090         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2091         if (state->fd == -1) {
2092                 tevent_req_error(req, errno);
2093                 goto post;
2094         }
2095
2096 #ifdef HAVE_IPV6
2097         if (do_ipv6only) {
2098                 int val = 1;
2099
2100                 ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
2101                                  (const void *)&val, sizeof(val));
2102                 if (ret == -1) {
2103                         tevent_req_error(req, errno);
2104                         goto post;
2105                 }
2106         }
2107 #endif
2108
2109         if (do_reuseaddr) {
2110                 int val = 1;
2111
2112                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2113                                  (const void *)&val, sizeof(val));
2114                 if (ret == -1) {
2115                         tevent_req_error(req, errno);
2116                         goto post;
2117                 }
2118         }
2119
2120         if (do_bind) {
2121                 ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
2122                 if (ret == -1) {
2123                         tevent_req_error(req, errno);
2124                         goto post;
2125                 }
2126         }
2127
2128         if (rbsda->u.sa.sa_family != sa_fam) {
2129                 tevent_req_error(req, EINVAL);
2130                 goto post;
2131         }
2132
2133         ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
2134         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2135         if (retry) {
2136                 /* retry later */
2137                 goto async;
2138         }
2139         if (tevent_req_error(req, err)) {
2140                 goto post;
2141         }
2142
2143         tevent_req_done(req);
2144         goto post;
2145
2146  async:
2147         state->fde = tevent_add_fd(ev, state,
2148                                    state->fd,
2149                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2150                                    tstream_bsd_connect_fde_handler,
2151                                    req);
2152         if (tevent_req_nomem(state->fde, req)) {
2153                 goto post;
2154         }
2155
2156         return req;
2157
2158  post:
2159         tevent_req_post(req, ev);
2160         return req;
2161 }
2162
2163 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2164                                             struct tevent_fd *fde,
2165                                             uint16_t flags,
2166                                             void *private_data)
2167 {
2168         struct tevent_req *req = talloc_get_type_abort(private_data,
2169                                  struct tevent_req);
2170         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2171                                         struct tstream_bsd_connect_state);
2172         int ret;
2173         int error=0;
2174         socklen_t len = sizeof(error);
2175         int err;
2176         bool retry;
2177
2178         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2179         if (ret == 0) {
2180                 if (error != 0) {
2181                         errno = error;
2182                         ret = -1;
2183                 }
2184         }
2185         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2186         if (retry) {
2187                 /* retry later */
2188                 return;
2189         }
2190         if (tevent_req_error(req, err)) {
2191                 return;
2192         }
2193
2194         tevent_req_done(req);
2195 }
2196
2197 static int tstream_bsd_connect_recv(struct tevent_req *req,
2198                                     int *perrno,
2199                                     TALLOC_CTX *mem_ctx,
2200                                     struct tstream_context **stream,
2201                                     const char *location)
2202 {
2203         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2204                                         struct tstream_bsd_connect_state);
2205         int ret;
2206
2207         ret = tsocket_simple_int_recv(req, perrno);
2208         if (ret == 0) {
2209                 ret = _tstream_bsd_existing_socket(mem_ctx,
2210                                                    state->fd,
2211                                                    stream,
2212                                                    location);
2213                 if (ret == -1) {
2214                         *perrno = errno;
2215                         goto done;
2216                 }
2217                 TALLOC_FREE(state->fde);
2218                 state->fd = -1;
2219         }
2220
2221 done:
2222         tevent_req_received(req);
2223         return ret;
2224 }
2225
2226 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2227                                         struct tevent_context *ev,
2228                                         const struct tsocket_address *local,
2229                                         const struct tsocket_address *remote)
2230 {
2231         struct tsocket_address_bsd *lbsda =
2232                 talloc_get_type_abort(local->private_data,
2233                 struct tsocket_address_bsd);
2234         struct tevent_req *req;
2235         int sys_errno = 0;
2236
2237         switch (lbsda->u.sa.sa_family) {
2238         case AF_INET:
2239                 break;
2240 #ifdef HAVE_IPV6
2241         case AF_INET6:
2242                 break;
2243 #endif
2244         default:
2245                 sys_errno = EINVAL;
2246                 break;
2247         }
2248
2249         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2250
2251         return req;
2252 }
2253
2254 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2255                                    int *perrno,
2256                                    TALLOC_CTX *mem_ctx,
2257                                    struct tstream_context **stream,
2258                                    const char *location)
2259 {
2260         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2261 }
2262
2263 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2264                                         struct tevent_context *ev,
2265                                         const struct tsocket_address *local,
2266                                         const struct tsocket_address *remote)
2267 {
2268         struct tsocket_address_bsd *lbsda =
2269                 talloc_get_type_abort(local->private_data,
2270                 struct tsocket_address_bsd);
2271         struct tevent_req *req;
2272         int sys_errno = 0;
2273
2274         switch (lbsda->u.sa.sa_family) {
2275         case AF_UNIX:
2276                 break;
2277         default:
2278                 sys_errno = EINVAL;
2279                 break;
2280         }
2281
2282         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2283
2284         return req;
2285 }
2286
2287 int _tstream_unix_connect_recv(struct tevent_req *req,
2288                                       int *perrno,
2289                                       TALLOC_CTX *mem_ctx,
2290                                       struct tstream_context **stream,
2291                                       const char *location)
2292 {
2293         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2294 }
2295
2296 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2297                              struct tstream_context **_stream1,
2298                              TALLOC_CTX *mem_ctx2,
2299                              struct tstream_context **_stream2,
2300                              const char *location)
2301 {
2302         int ret;
2303         int fds[2];
2304         int fd1;
2305         int fd2;
2306         struct tstream_context *stream1 = NULL;
2307         struct tstream_context *stream2 = NULL;
2308
2309         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2310         if (ret == -1) {
2311                 return -1;
2312         }
2313         fd1 = fds[0];
2314         fd2 = fds[1];
2315
2316         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2317         if (fd1 == -1) {
2318                 int sys_errno = errno;
2319                 close(fd2);
2320                 errno = sys_errno;
2321                 return -1;
2322         }
2323
2324         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2325         if (fd2 == -1) {
2326                 int sys_errno = errno;
2327                 close(fd1);
2328                 errno = sys_errno;
2329                 return -1;
2330         }
2331
2332         ret = _tstream_bsd_existing_socket(mem_ctx1,
2333                                            fd1,
2334                                            &stream1,
2335                                            location);
2336         if (ret == -1) {
2337                 int sys_errno = errno;
2338                 close(fd1);
2339                 close(fd2);
2340                 errno = sys_errno;
2341                 return -1;
2342         }
2343
2344         ret = _tstream_bsd_existing_socket(mem_ctx2,
2345                                            fd2,
2346                                            &stream2,
2347                                            location);
2348         if (ret == -1) {
2349                 int sys_errno = errno;
2350                 talloc_free(stream1);
2351                 close(fd2);
2352                 errno = sys_errno;
2353                 return -1;
2354         }
2355
2356         *_stream1 = stream1;
2357         *_stream2 = stream2;
2358         return 0;
2359 }
2360