2b0a24a58ce23fe16282e7c53602b7a404ca6248
[ira/wip.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                        struct sockaddr *sa,
206                                        size_t sa_socklen,
207                                        struct tsocket_address **_addr,
208                                        const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         if (sa_socklen < sizeof(sa->sa_family)) {
214                 errno = EINVAL;
215                 return -1;
216         }
217
218         switch (sa->sa_family) {
219         case AF_UNIX:
220                 if (sa_socklen > sizeof(struct sockaddr_un)) {
221                         sa_socklen = sizeof(struct sockaddr_un);
222                 }
223                 break;
224         case AF_INET:
225                 if (sa_socklen < sizeof(struct sockaddr_in)) {
226                         errno = EINVAL;
227                         return -1;
228                 }
229                 sa_socklen = sizeof(struct sockaddr_in);
230                 break;
231 #ifdef HAVE_IPV6
232         case AF_INET6:
233                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
234                         errno = EINVAL;
235                         return -1;
236                 }
237                 sa_socklen = sizeof(struct sockaddr_in6);
238                 break;
239 #endif
240         default:
241                 errno = EAFNOSUPPORT;
242                 return -1;
243         }
244
245         if (sa_socklen > sizeof(struct sockaddr_storage)) {
246                 errno = EINVAL;
247                 return -1;
248         }
249
250         addr = tsocket_address_create(mem_ctx,
251                                       &tsocket_address_bsd_ops,
252                                       &bsda,
253                                       struct tsocket_address_bsd,
254                                       location);
255         if (!addr) {
256                 errno = ENOMEM;
257                 return -1;
258         }
259
260         ZERO_STRUCTP(bsda);
261
262         memcpy(&bsda->u.ss, sa, sa_socklen);
263
264         *_addr = addr;
265         return 0;
266 }
267
268 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
269                                      struct sockaddr *sa,
270                                      size_t sa_socklen)
271 {
272         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
273                                            struct tsocket_address_bsd);
274         ssize_t rlen = 0;
275
276         if (!bsda) {
277                 errno = EINVAL;
278                 return -1;
279         }
280
281         switch (bsda->u.sa.sa_family) {
282         case AF_UNIX:
283                 rlen = sizeof(struct sockaddr_un);
284                 break;
285         case AF_INET:
286                 rlen = sizeof(struct sockaddr_in);
287                 break;
288 #ifdef HAVE_IPV6
289         case AF_INET6:
290                 rlen = sizeof(struct sockaddr_in6);
291                 break;
292 #endif
293         default:
294                 errno = EAFNOSUPPORT;
295                 return -1;
296         }
297
298         if (sa_socklen < rlen) {
299                 errno = EINVAL;
300                 return -1;
301         }
302
303         if (sa_socklen > sizeof(struct sockaddr_storage)) {
304                 memset(sa, 0, sa_socklen);
305                 sa_socklen = sizeof(struct sockaddr_storage);
306         }
307
308         memcpy(sa, &bsda->u.ss, sa_socklen);
309         return rlen;
310 }
311
312 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
313                                        const char *fam,
314                                        const char *addr,
315                                        uint16_t port,
316                                        struct tsocket_address **_addr,
317                                        const char *location)
318 {
319         struct addrinfo hints;
320         struct addrinfo *result = NULL;
321         char port_str[6];
322         int ret;
323
324         ZERO_STRUCT(hints);
325         /*
326          * we use SOCKET_STREAM here to get just one result
327          * back from getaddrinfo().
328          */
329         hints.ai_socktype = SOCK_STREAM;
330         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
331
332         if (strcasecmp(fam, "ip") == 0) {
333                 hints.ai_family = AF_UNSPEC;
334                 if (!addr) {
335 #ifdef HAVE_IPV6
336                         addr = "::";
337 #else
338                         addr = "0.0.0.0";
339 #endif
340                 }
341         } else if (strcasecmp(fam, "ipv4") == 0) {
342                 hints.ai_family = AF_INET;
343                 if (!addr) {
344                         addr = "0.0.0.0";
345                 }
346 #ifdef HAVE_IPV6
347         } else if (strcasecmp(fam, "ipv6") == 0) {
348                 hints.ai_family = AF_INET6;
349                 if (!addr) {
350                         addr = "::";
351                 }
352 #endif
353         } else {
354                 errno = EAFNOSUPPORT;
355                 return -1;
356         }
357
358         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
359
360         ret = getaddrinfo(addr, port_str, &hints, &result);
361         if (ret != 0) {
362                 switch (ret) {
363                 case EAI_FAIL:
364                         errno = EINVAL;
365                         break;
366                 }
367                 ret = -1;
368                 goto done;
369         }
370
371         if (result->ai_socktype != SOCK_STREAM) {
372                 errno = EINVAL;
373                 ret = -1;
374                 goto done;
375         }
376
377         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
378                                                   result->ai_addr,
379                                                   result->ai_addrlen,
380                                                   _addr,
381                                                   location);
382
383 done:
384         if (result) {
385                 freeaddrinfo(result);
386         }
387         return ret;
388 }
389
390 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
391                                        TALLOC_CTX *mem_ctx)
392 {
393         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
394                                            struct tsocket_address_bsd);
395         char addr_str[INET6_ADDRSTRLEN+1];
396         const char *str;
397
398         if (!bsda) {
399                 errno = EINVAL;
400                 return NULL;
401         }
402
403         switch (bsda->u.sa.sa_family) {
404         case AF_INET:
405                 str = inet_ntop(bsda->u.in.sin_family,
406                                 &bsda->u.in.sin_addr,
407                                 addr_str, sizeof(addr_str));
408                 break;
409 #ifdef HAVE_IPV6
410         case AF_INET6:
411                 str = inet_ntop(bsda->u.in6.sin6_family,
412                                 &bsda->u.in6.sin6_addr,
413                                 addr_str, sizeof(addr_str));
414                 break;
415 #endif
416         default:
417                 errno = EINVAL;
418                 return NULL;
419         }
420
421         if (!str) {
422                 return NULL;
423         }
424
425         return talloc_strdup(mem_ctx, str);
426 }
427
428 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
429 {
430         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
431                                            struct tsocket_address_bsd);
432         uint16_t port = 0;
433
434         if (!bsda) {
435                 errno = EINVAL;
436                 return 0;
437         }
438
439         switch (bsda->u.sa.sa_family) {
440         case AF_INET:
441                 port = ntohs(bsda->u.in.sin_port);
442                 break;
443 #ifdef HAVE_IPV6
444         case AF_INET6:
445                 port = ntohs(bsda->u.in6.sin6_port);
446                 break;
447 #endif
448         default:
449                 errno = EINVAL;
450                 return 0;
451         }
452
453         return port;
454 }
455
456 int tsocket_address_inet_set_port(struct tsocket_address *addr,
457                                   uint16_t port)
458 {
459         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
460                                            struct tsocket_address_bsd);
461
462         if (!bsda) {
463                 errno = EINVAL;
464                 return -1;
465         }
466
467         switch (bsda->u.sa.sa_family) {
468         case AF_INET:
469                 bsda->u.in.sin_port = htons(port);
470                 break;
471 #ifdef HAVE_IPV6
472         case AF_INET6:
473                 bsda->u.in6.sin6_port = htons(port);
474                 break;
475 #endif
476         default:
477                 errno = EINVAL;
478                 return -1;
479         }
480
481         return 0;
482 }
483
484 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
485                                     const char *path,
486                                     struct tsocket_address **_addr,
487                                     const char *location)
488 {
489         struct sockaddr_un un;
490         void *p = &un;
491         int ret;
492
493         if (!path) {
494                 path = "";
495         }
496
497         if (strlen(path) > sizeof(un.sun_path)-1) {
498                 errno = ENAMETOOLONG;
499                 return -1;
500         }
501
502         ZERO_STRUCT(un);
503         un.sun_family = AF_UNIX;
504         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
505
506         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
507                                                  (struct sockaddr *)p,
508                                                  sizeof(un),
509                                                  _addr,
510                                                  location);
511
512         return ret;
513 }
514
515 char *tsocket_address_unix_path(const struct tsocket_address *addr,
516                                 TALLOC_CTX *mem_ctx)
517 {
518         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
519                                            struct tsocket_address_bsd);
520         const char *str;
521
522         if (!bsda) {
523                 errno = EINVAL;
524                 return NULL;
525         }
526
527         switch (bsda->u.sa.sa_family) {
528         case AF_UNIX:
529                 str = bsda->u.un.sun_path;
530                 break;
531         default:
532                 errno = EINVAL;
533                 return NULL;
534         }
535
536         return talloc_strdup(mem_ctx, str);
537 }
538
539 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
540                                         TALLOC_CTX *mem_ctx)
541 {
542         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
543                                            struct tsocket_address_bsd);
544         char *str;
545         char *addr_str;
546         const char *prefix = NULL;
547         uint16_t port;
548
549         switch (bsda->u.sa.sa_family) {
550         case AF_UNIX:
551                 return talloc_asprintf(mem_ctx, "unix:%s",
552                                        bsda->u.un.sun_path);
553         case AF_INET:
554                 prefix = "ipv4";
555                 break;
556 #ifdef HAVE_IPV6
557         case AF_INET6:
558                 prefix = "ipv6";
559                 break;
560 #endif
561         default:
562                 errno = EINVAL;
563                 return NULL;
564         }
565
566         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
567         if (!addr_str) {
568                 return NULL;
569         }
570
571         port = tsocket_address_inet_port(addr);
572
573         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
574                               prefix, addr_str, port);
575         talloc_free(addr_str);
576
577         return str;
578 }
579
580 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
581                                                          TALLOC_CTX *mem_ctx,
582                                                          const char *location)
583 {
584         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
585                                            struct tsocket_address_bsd);
586         struct tsocket_address *copy;
587         int ret;
588
589         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
590                                                  &bsda->u.sa,
591                                                  sizeof(bsda->u.ss),
592                                                  &copy,
593                                                  location);
594         if (ret != 0) {
595                 return NULL;
596         }
597
598         return copy;
599 }
600
601 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
602         .name           = "bsd",
603         .string         = tsocket_address_bsd_string,
604         .copy           = tsocket_address_bsd_copy,
605 };
606
607 struct tdgram_bsd {
608         int fd;
609
610         void *event_ptr;
611         struct tevent_fd *fde;
612
613         void *readable_private;
614         void (*readable_handler)(void *private_data);
615         void *writeable_private;
616         void (*writeable_handler)(void *private_data);
617 };
618
619 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
620                                    struct tevent_fd *fde,
621                                    uint16_t flags,
622                                    void *private_data)
623 {
624         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
625                                   struct tdgram_bsd);
626
627         if (flags & TEVENT_FD_WRITE) {
628                 bsds->writeable_handler(bsds->writeable_private);
629                 return;
630         }
631         if (flags & TEVENT_FD_READ) {
632                 if (!bsds->readable_handler) {
633                         TEVENT_FD_NOT_READABLE(bsds->fde);
634                         return;
635                 }
636                 bsds->readable_handler(bsds->readable_private);
637                 return;
638         }
639 }
640
641 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
642                                            struct tevent_context *ev,
643                                            void (*handler)(void *private_data),
644                                            void *private_data)
645 {
646         if (ev == NULL) {
647                 if (handler) {
648                         errno = EINVAL;
649                         return -1;
650                 }
651                 if (!bsds->readable_handler) {
652                         return 0;
653                 }
654                 bsds->readable_handler = NULL;
655                 bsds->readable_private = NULL;
656
657                 return 0;
658         }
659
660         /* read and write must use the same tevent_context */
661         if (bsds->event_ptr != ev) {
662                 if (bsds->readable_handler || bsds->writeable_handler) {
663                         errno = EINVAL;
664                         return -1;
665                 }
666                 bsds->event_ptr = NULL;
667                 TALLOC_FREE(bsds->fde);
668         }
669
670         if (tevent_fd_get_flags(bsds->fde) == 0) {
671                 TALLOC_FREE(bsds->fde);
672
673                 bsds->fde = tevent_add_fd(ev, bsds,
674                                           bsds->fd, TEVENT_FD_READ,
675                                           tdgram_bsd_fde_handler,
676                                           bsds);
677                 if (!bsds->fde) {
678                         errno = ENOMEM;
679                         return -1;
680                 }
681
682                 /* cache the event context we're running on */
683                 bsds->event_ptr = ev;
684         } else if (!bsds->readable_handler) {
685                 TEVENT_FD_READABLE(bsds->fde);
686         }
687
688         bsds->readable_handler = handler;
689         bsds->readable_private = private_data;
690
691         return 0;
692 }
693
694 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
695                                             struct tevent_context *ev,
696                                             void (*handler)(void *private_data),
697                                             void *private_data)
698 {
699         if (ev == NULL) {
700                 if (handler) {
701                         errno = EINVAL;
702                         return -1;
703                 }
704                 if (!bsds->writeable_handler) {
705                         return 0;
706                 }
707                 bsds->writeable_handler = NULL;
708                 bsds->writeable_private = NULL;
709                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
710
711                 return 0;
712         }
713
714         /* read and write must use the same tevent_context */
715         if (bsds->event_ptr != ev) {
716                 if (bsds->readable_handler || bsds->writeable_handler) {
717                         errno = EINVAL;
718                         return -1;
719                 }
720                 bsds->event_ptr = NULL;
721                 TALLOC_FREE(bsds->fde);
722         }
723
724         if (tevent_fd_get_flags(bsds->fde) == 0) {
725                 TALLOC_FREE(bsds->fde);
726
727                 bsds->fde = tevent_add_fd(ev, bsds,
728                                           bsds->fd, TEVENT_FD_WRITE,
729                                           tdgram_bsd_fde_handler,
730                                           bsds);
731                 if (!bsds->fde) {
732                         errno = ENOMEM;
733                         return -1;
734                 }
735
736                 /* cache the event context we're running on */
737                 bsds->event_ptr = ev;
738         } else if (!bsds->writeable_handler) {
739                 TEVENT_FD_WRITEABLE(bsds->fde);
740         }
741
742         bsds->writeable_handler = handler;
743         bsds->writeable_private = private_data;
744
745         return 0;
746 }
747
748 struct tdgram_bsd_recvfrom_state {
749         struct tdgram_context *dgram;
750
751         uint8_t *buf;
752         size_t len;
753         struct tsocket_address *src;
754 };
755
756 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
757 {
758         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
759                                   struct tdgram_bsd);
760
761         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
762
763         return 0;
764 }
765
766 static void tdgram_bsd_recvfrom_handler(void *private_data);
767
768 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
769                                         struct tevent_context *ev,
770                                         struct tdgram_context *dgram)
771 {
772         struct tevent_req *req;
773         struct tdgram_bsd_recvfrom_state *state;
774         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
775         int ret;
776
777         req = tevent_req_create(mem_ctx, &state,
778                                 struct tdgram_bsd_recvfrom_state);
779         if (!req) {
780                 return NULL;
781         }
782
783         state->dgram    = dgram;
784         state->buf      = NULL;
785         state->len      = 0;
786         state->src      = NULL;
787
788         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
789
790         if (bsds->fd == -1) {
791                 tevent_req_error(req, ENOTCONN);
792                 goto post;
793         }
794
795         /*
796          * this is a fast path, not waiting for the
797          * socket to become explicit readable gains
798          * about 10%-20% performance in benchmark tests.
799          */
800         tdgram_bsd_recvfrom_handler(req);
801         if (!tevent_req_is_in_progress(req)) {
802                 goto post;
803         }
804
805         ret = tdgram_bsd_set_readable_handler(bsds, ev,
806                                               tdgram_bsd_recvfrom_handler,
807                                               req);
808         if (ret == -1) {
809                 tevent_req_error(req, errno);
810                 goto post;
811         }
812
813         return req;
814
815  post:
816         tevent_req_post(req, ev);
817         return req;
818 }
819
820 static void tdgram_bsd_recvfrom_handler(void *private_data)
821 {
822         struct tevent_req *req = talloc_get_type_abort(private_data,
823                                  struct tevent_req);
824         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
825                                         struct tdgram_bsd_recvfrom_state);
826         struct tdgram_context *dgram = state->dgram;
827         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
828         struct tsocket_address_bsd *bsda;
829         ssize_t ret;
830         struct sockaddr *sa = NULL;
831         socklen_t sa_socklen = 0;
832         int err;
833         bool retry;
834
835         ret = tsocket_bsd_pending(bsds->fd);
836         if (ret == 0) {
837                 /* retry later */
838                 return;
839         }
840         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
841         if (retry) {
842                 /* retry later */
843                 return;
844         }
845         if (tevent_req_error(req, err)) {
846                 return;
847         }
848
849         state->buf = talloc_array(state, uint8_t, ret);
850         if (tevent_req_nomem(state->buf, req)) {
851                 return;
852         }
853         state->len = ret;
854
855         state->src = tsocket_address_create(state,
856                                             &tsocket_address_bsd_ops,
857                                             &bsda,
858                                             struct tsocket_address_bsd,
859                                             __location__ "bsd_recvfrom");
860         if (tevent_req_nomem(state->src, req)) {
861                 return;
862         }
863
864         ZERO_STRUCTP(bsda);
865
866         sa = &bsda->u.sa;
867         sa_socklen = sizeof(bsda->u.ss);
868         /*
869          * for unix sockets we can't use the size of sockaddr_storage
870          * we would get EINVAL
871          */
872         if (bsda->u.sa.sa_family == AF_UNIX) {
873                 sa_socklen = sizeof(bsda->u.un);
874         }
875
876         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
877         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
878         if (retry) {
879                 /* retry later */
880                 return;
881         }
882         if (tevent_req_error(req, err)) {
883                 return;
884         }
885
886         if (ret != state->len) {
887                 tevent_req_error(req, EIO);
888                 return;
889         }
890
891         tevent_req_done(req);
892 }
893
894 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
895                                         int *perrno,
896                                         TALLOC_CTX *mem_ctx,
897                                         uint8_t **buf,
898                                         struct tsocket_address **src)
899 {
900         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
901                                         struct tdgram_bsd_recvfrom_state);
902         ssize_t ret;
903
904         ret = tsocket_simple_int_recv(req, perrno);
905         if (ret == 0) {
906                 *buf = talloc_move(mem_ctx, &state->buf);
907                 ret = state->len;
908                 if (src) {
909                         *src = talloc_move(mem_ctx, &state->src);
910                 }
911         }
912
913         tevent_req_received(req);
914         return ret;
915 }
916
917 struct tdgram_bsd_sendto_state {
918         struct tdgram_context *dgram;
919
920         const uint8_t *buf;
921         size_t len;
922         const struct tsocket_address *dst;
923
924         ssize_t ret;
925 };
926
927 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
928 {
929         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
930                                   struct tdgram_bsd);
931
932         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
933
934         return 0;
935 }
936
937 static void tdgram_bsd_sendto_handler(void *private_data);
938
939 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
940                                                  struct tevent_context *ev,
941                                                  struct tdgram_context *dgram,
942                                                  const uint8_t *buf,
943                                                  size_t len,
944                                                  const struct tsocket_address *dst)
945 {
946         struct tevent_req *req;
947         struct tdgram_bsd_sendto_state *state;
948         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
949         int ret;
950
951         req = tevent_req_create(mem_ctx, &state,
952                                 struct tdgram_bsd_sendto_state);
953         if (!req) {
954                 return NULL;
955         }
956
957         state->dgram    = dgram;
958         state->buf      = buf;
959         state->len      = len;
960         state->dst      = dst;
961         state->ret      = -1;
962
963         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
964
965         if (bsds->fd == -1) {
966                 tevent_req_error(req, ENOTCONN);
967                 goto post;
968         }
969
970         /*
971          * this is a fast path, not waiting for the
972          * socket to become explicit writeable gains
973          * about 10%-20% performance in benchmark tests.
974          */
975         tdgram_bsd_sendto_handler(req);
976         if (!tevent_req_is_in_progress(req)) {
977                 goto post;
978         }
979
980         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
981                                                tdgram_bsd_sendto_handler,
982                                                req);
983         if (ret == -1) {
984                 tevent_req_error(req, errno);
985                 goto post;
986         }
987
988         return req;
989
990  post:
991         tevent_req_post(req, ev);
992         return req;
993 }
994
995 static void tdgram_bsd_sendto_handler(void *private_data)
996 {
997         struct tevent_req *req = talloc_get_type_abort(private_data,
998                                  struct tevent_req);
999         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1000                                         struct tdgram_bsd_sendto_state);
1001         struct tdgram_context *dgram = state->dgram;
1002         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1003         struct sockaddr *sa = NULL;
1004         socklen_t sa_socklen = 0;
1005         ssize_t ret;
1006         int err;
1007         bool retry;
1008
1009         if (state->dst) {
1010                 struct tsocket_address_bsd *bsda =
1011                         talloc_get_type(state->dst->private_data,
1012                         struct tsocket_address_bsd);
1013
1014                 sa = &bsda->u.sa;
1015                 sa_socklen = sizeof(bsda->u.ss);
1016                 /*
1017                  * for unix sockets we can't use the size of sockaddr_storage
1018                  * we would get EINVAL
1019                  */
1020                 if (bsda->u.sa.sa_family == AF_UNIX) {
1021                         sa_socklen = sizeof(bsda->u.un);
1022                 }
1023         }
1024
1025         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1026         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1027         if (retry) {
1028                 /* retry later */
1029                 return;
1030         }
1031         if (tevent_req_error(req, err)) {
1032                 return;
1033         }
1034
1035         state->ret = ret;
1036
1037         tevent_req_done(req);
1038 }
1039
1040 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1041 {
1042         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1043                                         struct tdgram_bsd_sendto_state);
1044         ssize_t ret;
1045
1046         ret = tsocket_simple_int_recv(req, perrno);
1047         if (ret == 0) {
1048                 ret = state->ret;
1049         }
1050
1051         tevent_req_received(req);
1052         return ret;
1053 }
1054
1055 struct tdgram_bsd_disconnect_state {
1056         uint8_t __dummy;
1057 };
1058
1059 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1060                                                      struct tevent_context *ev,
1061                                                      struct tdgram_context *dgram)
1062 {
1063         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1064         struct tevent_req *req;
1065         struct tdgram_bsd_disconnect_state *state;
1066         int ret;
1067         int err;
1068         bool dummy;
1069
1070         req = tevent_req_create(mem_ctx, &state,
1071                                 struct tdgram_bsd_disconnect_state);
1072         if (req == NULL) {
1073                 return NULL;
1074         }
1075
1076         if (bsds->fd == -1) {
1077                 tevent_req_error(req, ENOTCONN);
1078                 goto post;
1079         }
1080
1081         ret = close(bsds->fd);
1082         bsds->fd = -1;
1083         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1084         if (tevent_req_error(req, err)) {
1085                 goto post;
1086         }
1087
1088         tevent_req_done(req);
1089 post:
1090         tevent_req_post(req, ev);
1091         return req;
1092 }
1093
1094 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1095                                       int *perrno)
1096 {
1097         int ret;
1098
1099         ret = tsocket_simple_int_recv(req, perrno);
1100
1101         tevent_req_received(req);
1102         return ret;
1103 }
1104
1105 static const struct tdgram_context_ops tdgram_bsd_ops = {
1106         .name                   = "bsd",
1107
1108         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1109         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1110
1111         .sendto_send            = tdgram_bsd_sendto_send,
1112         .sendto_recv            = tdgram_bsd_sendto_recv,
1113
1114         .disconnect_send        = tdgram_bsd_disconnect_send,
1115         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1116 };
1117
1118 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1119 {
1120         TALLOC_FREE(bsds->fde);
1121         if (bsds->fd != -1) {
1122                 close(bsds->fd);
1123                 bsds->fd = -1;
1124         }
1125         return 0;
1126 }
1127
1128 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1129                                    const struct tsocket_address *remote,
1130                                    bool broadcast,
1131                                    TALLOC_CTX *mem_ctx,
1132                                    struct tdgram_context **_dgram,
1133                                    const char *location)
1134 {
1135         struct tsocket_address_bsd *lbsda =
1136                 talloc_get_type_abort(local->private_data,
1137                 struct tsocket_address_bsd);
1138         struct tsocket_address_bsd *rbsda = NULL;
1139         struct tdgram_context *dgram;
1140         struct tdgram_bsd *bsds;
1141         int fd;
1142         int ret;
1143         bool do_bind = false;
1144         bool do_reuseaddr = false;
1145         bool is_inet = false;
1146         int sa_fam = lbsda->u.sa.sa_family;
1147         socklen_t sa_socklen = sizeof(lbsda->u.ss);
1148
1149         if (remote) {
1150                 rbsda = talloc_get_type_abort(remote->private_data,
1151                         struct tsocket_address_bsd);
1152         }
1153
1154         switch (lbsda->u.sa.sa_family) {
1155         case AF_UNIX:
1156                 if (broadcast) {
1157                         errno = EINVAL;
1158                         return -1;
1159                 }
1160                 if (lbsda->u.un.sun_path[0] != 0) {
1161                         do_reuseaddr = true;
1162                         do_bind = true;
1163                 }
1164                 /*
1165                  * for unix sockets we can't use the size of sockaddr_storage
1166                  * we would get EINVAL
1167                  */
1168                 sa_socklen = sizeof(lbsda->u.un);
1169                 break;
1170         case AF_INET:
1171                 if (lbsda->u.in.sin_port != 0) {
1172                         do_reuseaddr = true;
1173                         do_bind = true;
1174                 }
1175                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1176                         do_bind = true;
1177                 }
1178                 is_inet = true;
1179                 sa_socklen = sizeof(rbsda->u.in);
1180                 break;
1181 #ifdef HAVE_IPV6
1182         case AF_INET6:
1183                 if (lbsda->u.in6.sin6_port != 0) {
1184                         do_reuseaddr = true;
1185                         do_bind = true;
1186                 }
1187                 if (memcmp(&in6addr_any,
1188                            &lbsda->u.in6.sin6_addr,
1189                            sizeof(in6addr_any)) != 0) {
1190                         do_bind = true;
1191                 }
1192                 is_inet = true;
1193                 sa_socklen = sizeof(rbsda->u.in6);
1194                 break;
1195 #endif
1196         default:
1197                 errno = EINVAL;
1198                 return -1;
1199         }
1200
1201         if (!do_bind && is_inet && rbsda) {
1202                 sa_fam = rbsda->u.sa.sa_family;
1203                 switch (sa_fam) {
1204                 case AF_INET:
1205                         sa_socklen = sizeof(rbsda->u.in);
1206                         break;
1207 #ifdef HAVE_IPV6
1208                 case AF_INET6:
1209                         sa_socklen = sizeof(rbsda->u.in6);
1210                         break;
1211 #endif
1212                 }
1213         }
1214
1215         fd = socket(sa_fam, SOCK_DGRAM, 0);
1216         if (fd < 0) {
1217                 return fd;
1218         }
1219
1220         fd = tsocket_bsd_common_prepare_fd(fd, true);
1221         if (fd < 0) {
1222                 return fd;
1223         }
1224
1225         dgram = tdgram_context_create(mem_ctx,
1226                                       &tdgram_bsd_ops,
1227                                       &bsds,
1228                                       struct tdgram_bsd,
1229                                       location);
1230         if (!dgram) {
1231                 int saved_errno = errno;
1232                 close(fd);
1233                 errno = saved_errno;
1234                 return -1;
1235         }
1236         ZERO_STRUCTP(bsds);
1237         bsds->fd = fd;
1238         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1239
1240         if (broadcast) {
1241                 int val = 1;
1242
1243                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1244                                  (const void *)&val, sizeof(val));
1245                 if (ret == -1) {
1246                         int saved_errno = errno;
1247                         talloc_free(dgram);
1248                         errno = saved_errno;
1249                         return ret;
1250                 }
1251         }
1252
1253         if (do_reuseaddr) {
1254                 int val = 1;
1255
1256                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1257                                  (const void *)&val, sizeof(val));
1258                 if (ret == -1) {
1259                         int saved_errno = errno;
1260                         talloc_free(dgram);
1261                         errno = saved_errno;
1262                         return ret;
1263                 }
1264         }
1265
1266         if (do_bind) {
1267                 ret = bind(fd, &lbsda->u.sa, sa_socklen);
1268                 if (ret == -1) {
1269                         int saved_errno = errno;
1270                         talloc_free(dgram);
1271                         errno = saved_errno;
1272                         return ret;
1273                 }
1274         }
1275
1276         if (rbsda) {
1277                 if (rbsda->u.sa.sa_family != sa_fam) {
1278                         talloc_free(dgram);
1279                         errno = EINVAL;
1280                         return -1;
1281                 }
1282
1283                 ret = connect(fd, &rbsda->u.sa, sa_socklen);
1284                 if (ret == -1) {
1285                         int saved_errno = errno;
1286                         talloc_free(dgram);
1287                         errno = saved_errno;
1288                         return ret;
1289                 }
1290         }
1291
1292         *_dgram = dgram;
1293         return 0;
1294 }
1295
1296 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1297                             const struct tsocket_address *remote,
1298                             TALLOC_CTX *mem_ctx,
1299                             struct tdgram_context **dgram,
1300                             const char *location)
1301 {
1302         struct tsocket_address_bsd *lbsda =
1303                 talloc_get_type_abort(local->private_data,
1304                 struct tsocket_address_bsd);
1305         int ret;
1306
1307         switch (lbsda->u.sa.sa_family) {
1308         case AF_INET:
1309                 break;
1310 #ifdef HAVE_IPV6
1311         case AF_INET6:
1312                 break;
1313 #endif
1314         default:
1315                 errno = EINVAL;
1316                 return -1;
1317         }
1318
1319         ret = tdgram_bsd_dgram_socket(local, remote, false,
1320                                       mem_ctx, dgram, location);
1321
1322         return ret;
1323 }
1324
1325 int _tdgram_unix_socket(const struct tsocket_address *local,
1326                         const struct tsocket_address *remote,
1327                         TALLOC_CTX *mem_ctx,
1328                         struct tdgram_context **dgram,
1329                         const char *location)
1330 {
1331         struct tsocket_address_bsd *lbsda =
1332                 talloc_get_type_abort(local->private_data,
1333                 struct tsocket_address_bsd);
1334         int ret;
1335
1336         switch (lbsda->u.sa.sa_family) {
1337         case AF_UNIX:
1338                 break;
1339         default:
1340                 errno = EINVAL;
1341                 return -1;
1342         }
1343
1344         ret = tdgram_bsd_dgram_socket(local, remote, false,
1345                                       mem_ctx, dgram, location);
1346
1347         return ret;
1348 }
1349
1350 struct tstream_bsd {
1351         int fd;
1352
1353         void *event_ptr;
1354         struct tevent_fd *fde;
1355
1356         void *readable_private;
1357         void (*readable_handler)(void *private_data);
1358         void *writeable_private;
1359         void (*writeable_handler)(void *private_data);
1360 };
1361
1362 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1363                                     struct tevent_fd *fde,
1364                                     uint16_t flags,
1365                                     void *private_data)
1366 {
1367         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1368                                    struct tstream_bsd);
1369
1370         if (flags & TEVENT_FD_WRITE) {
1371                 bsds->writeable_handler(bsds->writeable_private);
1372                 return;
1373         }
1374         if (flags & TEVENT_FD_READ) {
1375                 if (!bsds->readable_handler) {
1376                         if (bsds->writeable_handler) {
1377                                 bsds->writeable_handler(bsds->writeable_private);
1378                                 return;
1379                         }
1380                         TEVENT_FD_NOT_READABLE(bsds->fde);
1381                         return;
1382                 }
1383                 bsds->readable_handler(bsds->readable_private);
1384                 return;
1385         }
1386 }
1387
1388 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1389                                             struct tevent_context *ev,
1390                                             void (*handler)(void *private_data),
1391                                             void *private_data)
1392 {
1393         if (ev == NULL) {
1394                 if (handler) {
1395                         errno = EINVAL;
1396                         return -1;
1397                 }
1398                 if (!bsds->readable_handler) {
1399                         return 0;
1400                 }
1401                 bsds->readable_handler = NULL;
1402                 bsds->readable_private = NULL;
1403
1404                 return 0;
1405         }
1406
1407         /* read and write must use the same tevent_context */
1408         if (bsds->event_ptr != ev) {
1409                 if (bsds->readable_handler || bsds->writeable_handler) {
1410                         errno = EINVAL;
1411                         return -1;
1412                 }
1413                 bsds->event_ptr = NULL;
1414                 TALLOC_FREE(bsds->fde);
1415         }
1416
1417         if (tevent_fd_get_flags(bsds->fde) == 0) {
1418                 TALLOC_FREE(bsds->fde);
1419
1420                 bsds->fde = tevent_add_fd(ev, bsds,
1421                                           bsds->fd, TEVENT_FD_READ,
1422                                           tstream_bsd_fde_handler,
1423                                           bsds);
1424                 if (!bsds->fde) {
1425                         errno = ENOMEM;
1426                         return -1;
1427                 }
1428
1429                 /* cache the event context we're running on */
1430                 bsds->event_ptr = ev;
1431         } else if (!bsds->readable_handler) {
1432                 TEVENT_FD_READABLE(bsds->fde);
1433         }
1434
1435         bsds->readable_handler = handler;
1436         bsds->readable_private = private_data;
1437
1438         return 0;
1439 }
1440
1441 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1442                                              struct tevent_context *ev,
1443                                              void (*handler)(void *private_data),
1444                                              void *private_data)
1445 {
1446         if (ev == NULL) {
1447                 if (handler) {
1448                         errno = EINVAL;
1449                         return -1;
1450                 }
1451                 if (!bsds->writeable_handler) {
1452                         return 0;
1453                 }
1454                 bsds->writeable_handler = NULL;
1455                 bsds->writeable_private = NULL;
1456                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1457
1458                 return 0;
1459         }
1460
1461         /* read and write must use the same tevent_context */
1462         if (bsds->event_ptr != ev) {
1463                 if (bsds->readable_handler || bsds->writeable_handler) {
1464                         errno = EINVAL;
1465                         return -1;
1466                 }
1467                 bsds->event_ptr = NULL;
1468                 TALLOC_FREE(bsds->fde);
1469         }
1470
1471         if (tevent_fd_get_flags(bsds->fde) == 0) {
1472                 TALLOC_FREE(bsds->fde);
1473
1474                 bsds->fde = tevent_add_fd(ev, bsds,
1475                                           bsds->fd,
1476                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1477                                           tstream_bsd_fde_handler,
1478                                           bsds);
1479                 if (!bsds->fde) {
1480                         errno = ENOMEM;
1481                         return -1;
1482                 }
1483
1484                 /* cache the event context we're running on */
1485                 bsds->event_ptr = ev;
1486         } else if (!bsds->writeable_handler) {
1487                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1488                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1489                 tevent_fd_set_flags(bsds->fde, flags);
1490         }
1491
1492         bsds->writeable_handler = handler;
1493         bsds->writeable_private = private_data;
1494
1495         return 0;
1496 }
1497
1498 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1499 {
1500         struct tstream_bsd *bsds = tstream_context_data(stream,
1501                                    struct tstream_bsd);
1502         ssize_t ret;
1503
1504         if (bsds->fd == -1) {
1505                 errno = ENOTCONN;
1506                 return -1;
1507         }
1508
1509         ret = tsocket_bsd_pending(bsds->fd);
1510
1511         return ret;
1512 }
1513
1514 struct tstream_bsd_readv_state {
1515         struct tstream_context *stream;
1516
1517         struct iovec *vector;
1518         size_t count;
1519
1520         int ret;
1521 };
1522
1523 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1524 {
1525         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1526                                    struct tstream_bsd);
1527
1528         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1529
1530         return 0;
1531 }
1532
1533 static void tstream_bsd_readv_handler(void *private_data);
1534
1535 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1536                                         struct tevent_context *ev,
1537                                         struct tstream_context *stream,
1538                                         struct iovec *vector,
1539                                         size_t count)
1540 {
1541         struct tevent_req *req;
1542         struct tstream_bsd_readv_state *state;
1543         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1544         int ret;
1545
1546         req = tevent_req_create(mem_ctx, &state,
1547                                 struct tstream_bsd_readv_state);
1548         if (!req) {
1549                 return NULL;
1550         }
1551
1552         state->stream   = stream;
1553         /* we make a copy of the vector so that we can modify it */
1554         state->vector   = talloc_array(state, struct iovec, count);
1555         if (tevent_req_nomem(state->vector, req)) {
1556                 goto post;
1557         }
1558         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1559         state->count    = count;
1560         state->ret      = 0;
1561
1562         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1563
1564         if (bsds->fd == -1) {
1565                 tevent_req_error(req, ENOTCONN);
1566                 goto post;
1567         }
1568
1569         /*
1570          * this is a fast path, not waiting for the
1571          * socket to become explicit readable gains
1572          * about 10%-20% performance in benchmark tests.
1573          */
1574         tstream_bsd_readv_handler(req);
1575         if (!tevent_req_is_in_progress(req)) {
1576                 goto post;
1577         }
1578
1579         ret = tstream_bsd_set_readable_handler(bsds, ev,
1580                                               tstream_bsd_readv_handler,
1581                                               req);
1582         if (ret == -1) {
1583                 tevent_req_error(req, errno);
1584                 goto post;
1585         }
1586
1587         return req;
1588
1589  post:
1590         tevent_req_post(req, ev);
1591         return req;
1592 }
1593
1594 static void tstream_bsd_readv_handler(void *private_data)
1595 {
1596         struct tevent_req *req = talloc_get_type_abort(private_data,
1597                                  struct tevent_req);
1598         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1599                                         struct tstream_bsd_readv_state);
1600         struct tstream_context *stream = state->stream;
1601         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1602         int ret;
1603         int err;
1604         bool retry;
1605
1606         ret = readv(bsds->fd, state->vector, state->count);
1607         if (ret == 0) {
1608                 /* propagate end of file */
1609                 tevent_req_error(req, EPIPE);
1610                 return;
1611         }
1612         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1613         if (retry) {
1614                 /* retry later */
1615                 return;
1616         }
1617         if (tevent_req_error(req, err)) {
1618                 return;
1619         }
1620
1621         state->ret += ret;
1622
1623         while (ret > 0) {
1624                 if (ret < state->vector[0].iov_len) {
1625                         uint8_t *base;
1626                         base = (uint8_t *)state->vector[0].iov_base;
1627                         base += ret;
1628                         state->vector[0].iov_base = base;
1629                         state->vector[0].iov_len -= ret;
1630                         break;
1631                 }
1632                 ret -= state->vector[0].iov_len;
1633                 state->vector += 1;
1634                 state->count -= 1;
1635         }
1636
1637         /*
1638          * there're maybe some empty vectors at the end
1639          * which we need to skip, otherwise we would get
1640          * ret == 0 from the readv() call and return EPIPE
1641          */
1642         while (state->count > 0) {
1643                 if (state->vector[0].iov_len > 0) {
1644                         break;
1645                 }
1646                 state->vector += 1;
1647                 state->count -= 1;
1648         }
1649
1650         if (state->count > 0) {
1651                 /* we have more to read */
1652                 return;
1653         }
1654
1655         tevent_req_done(req);
1656 }
1657
1658 static int tstream_bsd_readv_recv(struct tevent_req *req,
1659                                   int *perrno)
1660 {
1661         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1662                                         struct tstream_bsd_readv_state);
1663         int ret;
1664
1665         ret = tsocket_simple_int_recv(req, perrno);
1666         if (ret == 0) {
1667                 ret = state->ret;
1668         }
1669
1670         tevent_req_received(req);
1671         return ret;
1672 }
1673
1674 struct tstream_bsd_writev_state {
1675         struct tstream_context *stream;
1676
1677         struct iovec *vector;
1678         size_t count;
1679
1680         int ret;
1681 };
1682
1683 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1684 {
1685         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1686                                   struct tstream_bsd);
1687
1688         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1689
1690         return 0;
1691 }
1692
1693 static void tstream_bsd_writev_handler(void *private_data);
1694
1695 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1696                                                  struct tevent_context *ev,
1697                                                  struct tstream_context *stream,
1698                                                  const struct iovec *vector,
1699                                                  size_t count)
1700 {
1701         struct tevent_req *req;
1702         struct tstream_bsd_writev_state *state;
1703         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1704         int ret;
1705
1706         req = tevent_req_create(mem_ctx, &state,
1707                                 struct tstream_bsd_writev_state);
1708         if (!req) {
1709                 return NULL;
1710         }
1711
1712         state->stream   = stream;
1713         /* we make a copy of the vector so that we can modify it */
1714         state->vector   = talloc_array(state, struct iovec, count);
1715         if (tevent_req_nomem(state->vector, req)) {
1716                 goto post;
1717         }
1718         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1719         state->count    = count;
1720         state->ret      = 0;
1721
1722         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1723
1724         if (bsds->fd == -1) {
1725                 tevent_req_error(req, ENOTCONN);
1726                 goto post;
1727         }
1728
1729         /*
1730          * this is a fast path, not waiting for the
1731          * socket to become explicit writeable gains
1732          * about 10%-20% performance in benchmark tests.
1733          */
1734         tstream_bsd_writev_handler(req);
1735         if (!tevent_req_is_in_progress(req)) {
1736                 goto post;
1737         }
1738
1739         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1740                                                tstream_bsd_writev_handler,
1741                                                req);
1742         if (ret == -1) {
1743                 tevent_req_error(req, errno);
1744                 goto post;
1745         }
1746
1747         return req;
1748
1749  post:
1750         tevent_req_post(req, ev);
1751         return req;
1752 }
1753
1754 static void tstream_bsd_writev_handler(void *private_data)
1755 {
1756         struct tevent_req *req = talloc_get_type_abort(private_data,
1757                                  struct tevent_req);
1758         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1759                                         struct tstream_bsd_writev_state);
1760         struct tstream_context *stream = state->stream;
1761         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1762         ssize_t ret;
1763         int err;
1764         bool retry;
1765
1766         ret = writev(bsds->fd, state->vector, state->count);
1767         if (ret == 0) {
1768                 /* propagate end of file */
1769                 tevent_req_error(req, EPIPE);
1770                 return;
1771         }
1772         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1773         if (retry) {
1774                 /* retry later */
1775                 return;
1776         }
1777         if (tevent_req_error(req, err)) {
1778                 return;
1779         }
1780
1781         state->ret += ret;
1782
1783         while (ret > 0) {
1784                 if (ret < state->vector[0].iov_len) {
1785                         uint8_t *base;
1786                         base = (uint8_t *)state->vector[0].iov_base;
1787                         base += ret;
1788                         state->vector[0].iov_base = base;
1789                         state->vector[0].iov_len -= ret;
1790                         break;
1791                 }
1792                 ret -= state->vector[0].iov_len;
1793                 state->vector += 1;
1794                 state->count -= 1;
1795         }
1796
1797         /*
1798          * there're maybe some empty vectors at the end
1799          * which we need to skip, otherwise we would get
1800          * ret == 0 from the writev() call and return EPIPE
1801          */
1802         while (state->count > 0) {
1803                 if (state->vector[0].iov_len > 0) {
1804                         break;
1805                 }
1806                 state->vector += 1;
1807                 state->count -= 1;
1808         }
1809
1810         if (state->count > 0) {
1811                 /* we have more to read */
1812                 return;
1813         }
1814
1815         tevent_req_done(req);
1816 }
1817
1818 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1819 {
1820         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1821                                         struct tstream_bsd_writev_state);
1822         int ret;
1823
1824         ret = tsocket_simple_int_recv(req, perrno);
1825         if (ret == 0) {
1826                 ret = state->ret;
1827         }
1828
1829         tevent_req_received(req);
1830         return ret;
1831 }
1832
1833 struct tstream_bsd_disconnect_state {
1834         void *__dummy;
1835 };
1836
1837 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1838                                                      struct tevent_context *ev,
1839                                                      struct tstream_context *stream)
1840 {
1841         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1842         struct tevent_req *req;
1843         struct tstream_bsd_disconnect_state *state;
1844         int ret;
1845         int err;
1846         bool dummy;
1847
1848         req = tevent_req_create(mem_ctx, &state,
1849                                 struct tstream_bsd_disconnect_state);
1850         if (req == NULL) {
1851                 return NULL;
1852         }
1853
1854         if (bsds->fd == -1) {
1855                 tevent_req_error(req, ENOTCONN);
1856                 goto post;
1857         }
1858
1859         ret = close(bsds->fd);
1860         bsds->fd = -1;
1861         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1862         if (tevent_req_error(req, err)) {
1863                 goto post;
1864         }
1865
1866         tevent_req_done(req);
1867 post:
1868         tevent_req_post(req, ev);
1869         return req;
1870 }
1871
1872 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1873                                       int *perrno)
1874 {
1875         int ret;
1876
1877         ret = tsocket_simple_int_recv(req, perrno);
1878
1879         tevent_req_received(req);
1880         return ret;
1881 }
1882
1883 static const struct tstream_context_ops tstream_bsd_ops = {
1884         .name                   = "bsd",
1885
1886         .pending_bytes          = tstream_bsd_pending_bytes,
1887
1888         .readv_send             = tstream_bsd_readv_send,
1889         .readv_recv             = tstream_bsd_readv_recv,
1890
1891         .writev_send            = tstream_bsd_writev_send,
1892         .writev_recv            = tstream_bsd_writev_recv,
1893
1894         .disconnect_send        = tstream_bsd_disconnect_send,
1895         .disconnect_recv        = tstream_bsd_disconnect_recv,
1896 };
1897
1898 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1899 {
1900         TALLOC_FREE(bsds->fde);
1901         if (bsds->fd != -1) {
1902                 close(bsds->fd);
1903                 bsds->fd = -1;
1904         }
1905         return 0;
1906 }
1907
1908 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1909                                  int fd,
1910                                  struct tstream_context **_stream,
1911                                  const char *location)
1912 {
1913         struct tstream_context *stream;
1914         struct tstream_bsd *bsds;
1915
1916         stream = tstream_context_create(mem_ctx,
1917                                         &tstream_bsd_ops,
1918                                         &bsds,
1919                                         struct tstream_bsd,
1920                                         location);
1921         if (!stream) {
1922                 return -1;
1923         }
1924         ZERO_STRUCTP(bsds);
1925         bsds->fd = fd;
1926         talloc_set_destructor(bsds, tstream_bsd_destructor);
1927
1928         *_stream = stream;
1929         return 0;
1930 }
1931
1932 struct tstream_bsd_connect_state {
1933         int fd;
1934         struct tevent_fd *fde;
1935         struct tstream_conext *stream;
1936 };
1937
1938 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1939 {
1940         TALLOC_FREE(state->fde);
1941         if (state->fd != -1) {
1942                 close(state->fd);
1943                 state->fd = -1;
1944         }
1945
1946         return 0;
1947 }
1948
1949 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1950                                             struct tevent_fd *fde,
1951                                             uint16_t flags,
1952                                             void *private_data);
1953
1954 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1955                                         struct tevent_context *ev,
1956                                         int sys_errno,
1957                                         const struct tsocket_address *local,
1958                                         const struct tsocket_address *remote)
1959 {
1960         struct tevent_req *req;
1961         struct tstream_bsd_connect_state *state;
1962         struct tsocket_address_bsd *lbsda =
1963                 talloc_get_type_abort(local->private_data,
1964                 struct tsocket_address_bsd);
1965         struct tsocket_address_bsd *rbsda =
1966                 talloc_get_type_abort(remote->private_data,
1967                 struct tsocket_address_bsd);
1968         int ret;
1969         int err;
1970         bool retry;
1971         bool do_bind = false;
1972         bool do_reuseaddr = false;
1973         bool is_inet = false;
1974         int sa_fam = lbsda->u.sa.sa_family;
1975         socklen_t sa_socklen = sizeof(rbsda->u.ss);
1976
1977         req = tevent_req_create(mem_ctx, &state,
1978                                 struct tstream_bsd_connect_state);
1979         if (!req) {
1980                 return NULL;
1981         }
1982         state->fd = -1;
1983         state->fde = NULL;
1984
1985         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1986
1987         /* give the wrappers a chance to report an error */
1988         if (sys_errno != 0) {
1989                 tevent_req_error(req, sys_errno);
1990                 goto post;
1991         }
1992
1993         switch (lbsda->u.sa.sa_family) {
1994         case AF_UNIX:
1995                 if (lbsda->u.un.sun_path[0] != 0) {
1996                         do_reuseaddr = true;
1997                         do_bind = true;
1998                 }
1999                 /*
2000                  * for unix sockets we can't use the size of sockaddr_storage
2001                  * we would get EINVAL
2002                  */
2003                 sa_socklen = sizeof(rbsda->u.un);
2004                 break;
2005         case AF_INET:
2006                 if (lbsda->u.in.sin_port != 0) {
2007                         do_reuseaddr = true;
2008                         do_bind = true;
2009                 }
2010                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
2011                         do_bind = true;
2012                 }
2013                 is_inet = true;
2014                 sa_socklen = sizeof(rbsda->u.in);
2015                 break;
2016 #ifdef HAVE_IPV6
2017         case AF_INET6:
2018                 if (lbsda->u.in6.sin6_port != 0) {
2019                         do_reuseaddr = true;
2020                         do_bind = true;
2021                 }
2022                 if (memcmp(&in6addr_any,
2023                            &lbsda->u.in6.sin6_addr,
2024                            sizeof(in6addr_any)) != 0) {
2025                         do_bind = true;
2026                 }
2027                 is_inet = true;
2028                 sa_socklen = sizeof(rbsda->u.in6);
2029                 break;
2030 #endif
2031         default:
2032                 tevent_req_error(req, EINVAL);
2033                 goto post;
2034         }
2035
2036         if (!do_bind && is_inet) {
2037                 sa_fam = rbsda->u.sa.sa_family;
2038                 switch (sa_fam) {
2039                 case AF_INET:
2040                         sa_socklen = sizeof(rbsda->u.in);
2041                         break;
2042 #ifdef HAVE_IPV6
2043                 case AF_INET6:
2044                         sa_socklen = sizeof(rbsda->u.in6);
2045                         break;
2046 #endif
2047                 }
2048         }
2049
2050         state->fd = socket(sa_fam, SOCK_STREAM, 0);
2051         if (state->fd == -1) {
2052                 tevent_req_error(req, errno);
2053                 goto post;
2054         }
2055
2056         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2057         if (state->fd == -1) {
2058                 tevent_req_error(req, errno);
2059                 goto post;
2060         }
2061
2062         if (do_reuseaddr) {
2063                 int val = 1;
2064
2065                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2066                                  (const void *)&val, sizeof(val));
2067                 if (ret == -1) {
2068                         tevent_req_error(req, errno);
2069                         goto post;
2070                 }
2071         }
2072
2073         if (do_bind) {
2074                 ret = bind(state->fd, &lbsda->u.sa, sa_socklen);
2075                 if (ret == -1) {
2076                         tevent_req_error(req, errno);
2077                         goto post;
2078                 }
2079         }
2080
2081         if (rbsda->u.sa.sa_family != sa_fam) {
2082                 tevent_req_error(req, EINVAL);
2083                 goto post;
2084         }
2085
2086         ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
2087         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2088         if (retry) {
2089                 /* retry later */
2090                 goto async;
2091         }
2092         if (tevent_req_error(req, err)) {
2093                 goto post;
2094         }
2095
2096         tevent_req_done(req);
2097         goto post;
2098
2099  async:
2100         state->fde = tevent_add_fd(ev, state,
2101                                    state->fd,
2102                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2103                                    tstream_bsd_connect_fde_handler,
2104                                    req);
2105         if (tevent_req_nomem(state->fde, req)) {
2106                 goto post;
2107         }
2108
2109         return req;
2110
2111  post:
2112         tevent_req_post(req, ev);
2113         return req;
2114 }
2115
2116 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2117                                             struct tevent_fd *fde,
2118                                             uint16_t flags,
2119                                             void *private_data)
2120 {
2121         struct tevent_req *req = talloc_get_type_abort(private_data,
2122                                  struct tevent_req);
2123         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2124                                         struct tstream_bsd_connect_state);
2125         int ret;
2126         int error=0;
2127         socklen_t len = sizeof(error);
2128         int err;
2129         bool retry;
2130
2131         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2132         if (ret == 0) {
2133                 if (error != 0) {
2134                         errno = error;
2135                         ret = -1;
2136                 }
2137         }
2138         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2139         if (retry) {
2140                 /* retry later */
2141                 return;
2142         }
2143         if (tevent_req_error(req, err)) {
2144                 return;
2145         }
2146
2147         tevent_req_done(req);
2148 }
2149
2150 static int tstream_bsd_connect_recv(struct tevent_req *req,
2151                                     int *perrno,
2152                                     TALLOC_CTX *mem_ctx,
2153                                     struct tstream_context **stream,
2154                                     const char *location)
2155 {
2156         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2157                                         struct tstream_bsd_connect_state);
2158         int ret;
2159
2160         ret = tsocket_simple_int_recv(req, perrno);
2161         if (ret == 0) {
2162                 ret = _tstream_bsd_existing_socket(mem_ctx,
2163                                                    state->fd,
2164                                                    stream,
2165                                                    location);
2166                 if (ret == -1) {
2167                         *perrno = errno;
2168                         goto done;
2169                 }
2170                 TALLOC_FREE(state->fde);
2171                 state->fd = -1;
2172         }
2173
2174 done:
2175         tevent_req_received(req);
2176         return ret;
2177 }
2178
2179 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2180                                         struct tevent_context *ev,
2181                                         const struct tsocket_address *local,
2182                                         const struct tsocket_address *remote)
2183 {
2184         struct tsocket_address_bsd *lbsda =
2185                 talloc_get_type_abort(local->private_data,
2186                 struct tsocket_address_bsd);
2187         struct tevent_req *req;
2188         int sys_errno = 0;
2189
2190         switch (lbsda->u.sa.sa_family) {
2191         case AF_INET:
2192                 break;
2193 #ifdef HAVE_IPV6
2194         case AF_INET6:
2195                 break;
2196 #endif
2197         default:
2198                 sys_errno = EINVAL;
2199                 break;
2200         }
2201
2202         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2203
2204         return req;
2205 }
2206
2207 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2208                                    int *perrno,
2209                                    TALLOC_CTX *mem_ctx,
2210                                    struct tstream_context **stream,
2211                                    const char *location)
2212 {
2213         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2214 }
2215
2216 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2217                                         struct tevent_context *ev,
2218                                         const struct tsocket_address *local,
2219                                         const struct tsocket_address *remote)
2220 {
2221         struct tsocket_address_bsd *lbsda =
2222                 talloc_get_type_abort(local->private_data,
2223                 struct tsocket_address_bsd);
2224         struct tevent_req *req;
2225         int sys_errno = 0;
2226
2227         switch (lbsda->u.sa.sa_family) {
2228         case AF_UNIX:
2229                 break;
2230         default:
2231                 sys_errno = EINVAL;
2232                 break;
2233         }
2234
2235         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2236
2237         return req;
2238 }
2239
2240 int _tstream_unix_connect_recv(struct tevent_req *req,
2241                                       int *perrno,
2242                                       TALLOC_CTX *mem_ctx,
2243                                       struct tstream_context **stream,
2244                                       const char *location)
2245 {
2246         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2247 }
2248
2249 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2250                              struct tstream_context **_stream1,
2251                              TALLOC_CTX *mem_ctx2,
2252                              struct tstream_context **_stream2,
2253                              const char *location)
2254 {
2255         int ret;
2256         int fds[2];
2257         int fd1;
2258         int fd2;
2259         struct tstream_context *stream1 = NULL;
2260         struct tstream_context *stream2 = NULL;
2261
2262         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2263         if (ret == -1) {
2264                 return -1;
2265         }
2266         fd1 = fds[0];
2267         fd2 = fds[1];
2268
2269         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2270         if (fd1 == -1) {
2271                 int sys_errno = errno;
2272                 close(fd2);
2273                 errno = sys_errno;
2274                 return -1;
2275         }
2276
2277         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2278         if (fd2 == -1) {
2279                 int sys_errno = errno;
2280                 close(fd1);
2281                 errno = sys_errno;
2282                 return -1;
2283         }
2284
2285         ret = _tstream_bsd_existing_socket(mem_ctx1,
2286                                            fd1,
2287                                            &stream1,
2288                                            location);
2289         if (ret == -1) {
2290                 int sys_errno = errno;
2291                 close(fd1);
2292                 close(fd2);
2293                 errno = sys_errno;
2294                 return -1;
2295         }
2296
2297         ret = _tstream_bsd_existing_socket(mem_ctx2,
2298                                            fd2,
2299                                            &stream2,
2300                                            location);
2301         if (ret == -1) {
2302                 int sys_errno = errno;
2303                 talloc_free(stream1);
2304                 close(fd2);
2305                 errno = sys_errno;
2306                 return -1;
2307         }
2308
2309         *_stream1 = stream1;
2310         *_stream2 = stream2;
2311         return 0;
2312 }
2313