tdb: remove lock ops
[ira/wip.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                        struct sockaddr *sa,
206                                        size_t sa_socklen,
207                                        struct tsocket_address **_addr,
208                                        const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         if (sa_socklen < sizeof(sa->sa_family)) {
214                 errno = EINVAL;
215                 return -1;
216         }
217
218         switch (sa->sa_family) {
219         case AF_UNIX:
220                 if (sa_socklen > sizeof(struct sockaddr_un)) {
221                         sa_socklen = sizeof(struct sockaddr_un);
222                 }
223                 break;
224         case AF_INET:
225                 if (sa_socklen < sizeof(struct sockaddr_in)) {
226                         errno = EINVAL;
227                         return -1;
228                 }
229                 sa_socklen = sizeof(struct sockaddr_in);
230                 break;
231 #ifdef HAVE_IPV6
232         case AF_INET6:
233                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
234                         errno = EINVAL;
235                         return -1;
236                 }
237                 sa_socklen = sizeof(struct sockaddr_in6);
238                 break;
239 #endif
240         default:
241                 errno = EAFNOSUPPORT;
242                 return -1;
243         }
244
245         if (sa_socklen > sizeof(struct sockaddr_storage)) {
246                 errno = EINVAL;
247                 return -1;
248         }
249
250         addr = tsocket_address_create(mem_ctx,
251                                       &tsocket_address_bsd_ops,
252                                       &bsda,
253                                       struct tsocket_address_bsd,
254                                       location);
255         if (!addr) {
256                 errno = ENOMEM;
257                 return -1;
258         }
259
260         ZERO_STRUCTP(bsda);
261
262         memcpy(&bsda->u.ss, sa, sa_socklen);
263
264         *_addr = addr;
265         return 0;
266 }
267
268 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
269                                      struct sockaddr *sa,
270                                      size_t sa_socklen)
271 {
272         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
273                                            struct tsocket_address_bsd);
274         ssize_t rlen = 0;
275
276         if (!bsda) {
277                 errno = EINVAL;
278                 return -1;
279         }
280
281         switch (bsda->u.sa.sa_family) {
282         case AF_UNIX:
283                 rlen = sizeof(struct sockaddr_un);
284                 break;
285         case AF_INET:
286                 rlen = sizeof(struct sockaddr_in);
287                 break;
288 #ifdef HAVE_IPV6
289         case AF_INET6:
290                 rlen = sizeof(struct sockaddr_in6);
291                 break;
292 #endif
293         default:
294                 errno = EAFNOSUPPORT;
295                 return -1;
296         }
297
298         if (sa_socklen < rlen) {
299                 errno = EINVAL;
300                 return -1;
301         }
302
303         if (sa_socklen > sizeof(struct sockaddr_storage)) {
304                 memset(sa, 0, sa_socklen);
305                 sa_socklen = sizeof(struct sockaddr_storage);
306         }
307
308         memcpy(sa, &bsda->u.ss, sa_socklen);
309         return rlen;
310 }
311
312 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
313                                        const char *fam,
314                                        const char *addr,
315                                        uint16_t port,
316                                        struct tsocket_address **_addr,
317                                        const char *location)
318 {
319         struct addrinfo hints;
320         struct addrinfo *result = NULL;
321         char port_str[6];
322         int ret;
323
324         ZERO_STRUCT(hints);
325         /*
326          * we use SOCKET_STREAM here to get just one result
327          * back from getaddrinfo().
328          */
329         hints.ai_socktype = SOCK_STREAM;
330         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
331
332         if (strcasecmp(fam, "ip") == 0) {
333                 hints.ai_family = AF_UNSPEC;
334                 if (!addr) {
335 #ifdef HAVE_IPV6
336                         addr = "::";
337 #else
338                         addr = "0.0.0.0";
339 #endif
340                 }
341         } else if (strcasecmp(fam, "ipv4") == 0) {
342                 hints.ai_family = AF_INET;
343                 if (!addr) {
344                         addr = "0.0.0.0";
345                 }
346 #ifdef HAVE_IPV6
347         } else if (strcasecmp(fam, "ipv6") == 0) {
348                 hints.ai_family = AF_INET6;
349                 if (!addr) {
350                         addr = "::";
351                 }
352 #endif
353         } else {
354                 errno = EAFNOSUPPORT;
355                 return -1;
356         }
357
358         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
359
360         ret = getaddrinfo(addr, port_str, &hints, &result);
361         if (ret != 0) {
362                 switch (ret) {
363                 case EAI_FAIL:
364                         errno = EINVAL;
365                         break;
366                 }
367                 ret = -1;
368                 goto done;
369         }
370
371         if (result->ai_socktype != SOCK_STREAM) {
372                 errno = EINVAL;
373                 ret = -1;
374                 goto done;
375         }
376
377         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
378                                                   result->ai_addr,
379                                                   result->ai_addrlen,
380                                                   _addr,
381                                                   location);
382
383 done:
384         if (result) {
385                 freeaddrinfo(result);
386         }
387         return ret;
388 }
389
390 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
391                                        TALLOC_CTX *mem_ctx)
392 {
393         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
394                                            struct tsocket_address_bsd);
395         char addr_str[INET6_ADDRSTRLEN+1];
396         const char *str;
397
398         if (!bsda) {
399                 errno = EINVAL;
400                 return NULL;
401         }
402
403         switch (bsda->u.sa.sa_family) {
404         case AF_INET:
405                 str = inet_ntop(bsda->u.in.sin_family,
406                                 &bsda->u.in.sin_addr,
407                                 addr_str, sizeof(addr_str));
408                 break;
409 #ifdef HAVE_IPV6
410         case AF_INET6:
411                 str = inet_ntop(bsda->u.in6.sin6_family,
412                                 &bsda->u.in6.sin6_addr,
413                                 addr_str, sizeof(addr_str));
414                 break;
415 #endif
416         default:
417                 errno = EINVAL;
418                 return NULL;
419         }
420
421         if (!str) {
422                 return NULL;
423         }
424
425         return talloc_strdup(mem_ctx, str);
426 }
427
428 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
429 {
430         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
431                                            struct tsocket_address_bsd);
432         uint16_t port = 0;
433
434         if (!bsda) {
435                 errno = EINVAL;
436                 return 0;
437         }
438
439         switch (bsda->u.sa.sa_family) {
440         case AF_INET:
441                 port = ntohs(bsda->u.in.sin_port);
442                 break;
443 #ifdef HAVE_IPV6
444         case AF_INET6:
445                 port = ntohs(bsda->u.in6.sin6_port);
446                 break;
447 #endif
448         default:
449                 errno = EINVAL;
450                 return 0;
451         }
452
453         return port;
454 }
455
456 int tsocket_address_inet_set_port(struct tsocket_address *addr,
457                                   uint16_t port)
458 {
459         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
460                                            struct tsocket_address_bsd);
461
462         if (!bsda) {
463                 errno = EINVAL;
464                 return -1;
465         }
466
467         switch (bsda->u.sa.sa_family) {
468         case AF_INET:
469                 bsda->u.in.sin_port = htons(port);
470                 break;
471 #ifdef HAVE_IPV6
472         case AF_INET6:
473                 bsda->u.in6.sin6_port = htons(port);
474                 break;
475 #endif
476         default:
477                 errno = EINVAL;
478                 return -1;
479         }
480
481         return 0;
482 }
483
484 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
485                                     const char *path,
486                                     struct tsocket_address **_addr,
487                                     const char *location)
488 {
489         struct sockaddr_un un;
490         void *p = &un;
491         int ret;
492
493         if (!path) {
494                 path = "";
495         }
496
497         if (strlen(path) > sizeof(un.sun_path)-1) {
498                 errno = ENAMETOOLONG;
499                 return -1;
500         }
501
502         ZERO_STRUCT(un);
503         un.sun_family = AF_UNIX;
504         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
505
506         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
507                                                  (struct sockaddr *)p,
508                                                  sizeof(un),
509                                                  _addr,
510                                                  location);
511
512         return ret;
513 }
514
515 char *tsocket_address_unix_path(const struct tsocket_address *addr,
516                                 TALLOC_CTX *mem_ctx)
517 {
518         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
519                                            struct tsocket_address_bsd);
520         const char *str;
521
522         if (!bsda) {
523                 errno = EINVAL;
524                 return NULL;
525         }
526
527         switch (bsda->u.sa.sa_family) {
528         case AF_UNIX:
529                 str = bsda->u.un.sun_path;
530                 break;
531         default:
532                 errno = EINVAL;
533                 return NULL;
534         }
535
536         return talloc_strdup(mem_ctx, str);
537 }
538
539 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
540                                         TALLOC_CTX *mem_ctx)
541 {
542         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
543                                            struct tsocket_address_bsd);
544         char *str;
545         char *addr_str;
546         const char *prefix = NULL;
547         uint16_t port;
548
549         switch (bsda->u.sa.sa_family) {
550         case AF_UNIX:
551                 return talloc_asprintf(mem_ctx, "unix:%s",
552                                        bsda->u.un.sun_path);
553         case AF_INET:
554                 prefix = "ipv4";
555                 break;
556 #ifdef HAVE_IPV6
557         case AF_INET6:
558                 prefix = "ipv6";
559                 break;
560 #endif
561         default:
562                 errno = EINVAL;
563                 return NULL;
564         }
565
566         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
567         if (!addr_str) {
568                 return NULL;
569         }
570
571         port = tsocket_address_inet_port(addr);
572
573         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
574                               prefix, addr_str, port);
575         talloc_free(addr_str);
576
577         return str;
578 }
579
580 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
581                                                          TALLOC_CTX *mem_ctx,
582                                                          const char *location)
583 {
584         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
585                                            struct tsocket_address_bsd);
586         struct tsocket_address *copy;
587         int ret;
588
589         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
590                                                  &bsda->u.sa,
591                                                  sizeof(bsda->u.ss),
592                                                  &copy,
593                                                  location);
594         if (ret != 0) {
595                 return NULL;
596         }
597
598         return copy;
599 }
600
601 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
602         .name           = "bsd",
603         .string         = tsocket_address_bsd_string,
604         .copy           = tsocket_address_bsd_copy,
605 };
606
607 struct tdgram_bsd {
608         int fd;
609
610         void *event_ptr;
611         struct tevent_fd *fde;
612
613         void *readable_private;
614         void (*readable_handler)(void *private_data);
615         void *writeable_private;
616         void (*writeable_handler)(void *private_data);
617 };
618
619 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
620                                    struct tevent_fd *fde,
621                                    uint16_t flags,
622                                    void *private_data)
623 {
624         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
625                                   struct tdgram_bsd);
626
627         if (flags & TEVENT_FD_WRITE) {
628                 bsds->writeable_handler(bsds->writeable_private);
629                 return;
630         }
631         if (flags & TEVENT_FD_READ) {
632                 if (!bsds->readable_handler) {
633                         TEVENT_FD_NOT_READABLE(bsds->fde);
634                         return;
635                 }
636                 bsds->readable_handler(bsds->readable_private);
637                 return;
638         }
639 }
640
641 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
642                                            struct tevent_context *ev,
643                                            void (*handler)(void *private_data),
644                                            void *private_data)
645 {
646         if (ev == NULL) {
647                 if (handler) {
648                         errno = EINVAL;
649                         return -1;
650                 }
651                 if (!bsds->readable_handler) {
652                         return 0;
653                 }
654                 bsds->readable_handler = NULL;
655                 bsds->readable_private = NULL;
656
657                 return 0;
658         }
659
660         /* read and write must use the same tevent_context */
661         if (bsds->event_ptr != ev) {
662                 if (bsds->readable_handler || bsds->writeable_handler) {
663                         errno = EINVAL;
664                         return -1;
665                 }
666                 bsds->event_ptr = NULL;
667                 TALLOC_FREE(bsds->fde);
668         }
669
670         if (tevent_fd_get_flags(bsds->fde) == 0) {
671                 TALLOC_FREE(bsds->fde);
672
673                 bsds->fde = tevent_add_fd(ev, bsds,
674                                           bsds->fd, TEVENT_FD_READ,
675                                           tdgram_bsd_fde_handler,
676                                           bsds);
677                 if (!bsds->fde) {
678                         errno = ENOMEM;
679                         return -1;
680                 }
681
682                 /* cache the event context we're running on */
683                 bsds->event_ptr = ev;
684         } else if (!bsds->readable_handler) {
685                 TEVENT_FD_READABLE(bsds->fde);
686         }
687
688         bsds->readable_handler = handler;
689         bsds->readable_private = private_data;
690
691         return 0;
692 }
693
694 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
695                                             struct tevent_context *ev,
696                                             void (*handler)(void *private_data),
697                                             void *private_data)
698 {
699         if (ev == NULL) {
700                 if (handler) {
701                         errno = EINVAL;
702                         return -1;
703                 }
704                 if (!bsds->writeable_handler) {
705                         return 0;
706                 }
707                 bsds->writeable_handler = NULL;
708                 bsds->writeable_private = NULL;
709                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
710
711                 return 0;
712         }
713
714         /* read and write must use the same tevent_context */
715         if (bsds->event_ptr != ev) {
716                 if (bsds->readable_handler || bsds->writeable_handler) {
717                         errno = EINVAL;
718                         return -1;
719                 }
720                 bsds->event_ptr = NULL;
721                 TALLOC_FREE(bsds->fde);
722         }
723
724         if (tevent_fd_get_flags(bsds->fde) == 0) {
725                 TALLOC_FREE(bsds->fde);
726
727                 bsds->fde = tevent_add_fd(ev, bsds,
728                                           bsds->fd, TEVENT_FD_WRITE,
729                                           tdgram_bsd_fde_handler,
730                                           bsds);
731                 if (!bsds->fde) {
732                         errno = ENOMEM;
733                         return -1;
734                 }
735
736                 /* cache the event context we're running on */
737                 bsds->event_ptr = ev;
738         } else if (!bsds->writeable_handler) {
739                 TEVENT_FD_WRITEABLE(bsds->fde);
740         }
741
742         bsds->writeable_handler = handler;
743         bsds->writeable_private = private_data;
744
745         return 0;
746 }
747
748 struct tdgram_bsd_recvfrom_state {
749         struct tdgram_context *dgram;
750
751         uint8_t *buf;
752         size_t len;
753         struct tsocket_address *src;
754 };
755
756 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
757 {
758         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
759                                   struct tdgram_bsd);
760
761         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
762
763         return 0;
764 }
765
766 static void tdgram_bsd_recvfrom_handler(void *private_data);
767
768 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
769                                         struct tevent_context *ev,
770                                         struct tdgram_context *dgram)
771 {
772         struct tevent_req *req;
773         struct tdgram_bsd_recvfrom_state *state;
774         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
775         int ret;
776
777         req = tevent_req_create(mem_ctx, &state,
778                                 struct tdgram_bsd_recvfrom_state);
779         if (!req) {
780                 return NULL;
781         }
782
783         state->dgram    = dgram;
784         state->buf      = NULL;
785         state->len      = 0;
786         state->src      = NULL;
787
788         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
789
790         if (bsds->fd == -1) {
791                 tevent_req_error(req, ENOTCONN);
792                 goto post;
793         }
794
795         /*
796          * this is a fast path, not waiting for the
797          * socket to become explicit readable gains
798          * about 10%-20% performance in benchmark tests.
799          */
800         tdgram_bsd_recvfrom_handler(req);
801         if (!tevent_req_is_in_progress(req)) {
802                 goto post;
803         }
804
805         ret = tdgram_bsd_set_readable_handler(bsds, ev,
806                                               tdgram_bsd_recvfrom_handler,
807                                               req);
808         if (ret == -1) {
809                 tevent_req_error(req, errno);
810                 goto post;
811         }
812
813         return req;
814
815  post:
816         tevent_req_post(req, ev);
817         return req;
818 }
819
820 static void tdgram_bsd_recvfrom_handler(void *private_data)
821 {
822         struct tevent_req *req = talloc_get_type_abort(private_data,
823                                  struct tevent_req);
824         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
825                                         struct tdgram_bsd_recvfrom_state);
826         struct tdgram_context *dgram = state->dgram;
827         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
828         struct tsocket_address_bsd *bsda;
829         ssize_t ret;
830         struct sockaddr *sa = NULL;
831         socklen_t sa_socklen = 0;
832         int err;
833         bool retry;
834
835         ret = tsocket_bsd_pending(bsds->fd);
836         if (ret == 0) {
837                 /* retry later */
838                 return;
839         }
840         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
841         if (retry) {
842                 /* retry later */
843                 return;
844         }
845         if (tevent_req_error(req, err)) {
846                 return;
847         }
848
849         state->buf = talloc_array(state, uint8_t, ret);
850         if (tevent_req_nomem(state->buf, req)) {
851                 return;
852         }
853         state->len = ret;
854
855         state->src = tsocket_address_create(state,
856                                             &tsocket_address_bsd_ops,
857                                             &bsda,
858                                             struct tsocket_address_bsd,
859                                             __location__ "bsd_recvfrom");
860         if (tevent_req_nomem(state->src, req)) {
861                 return;
862         }
863
864         ZERO_STRUCTP(bsda);
865
866         sa = &bsda->u.sa;
867         sa_socklen = sizeof(bsda->u.ss);
868         /*
869          * for unix sockets we can't use the size of sockaddr_storage
870          * we would get EINVAL
871          */
872         if (bsda->u.sa.sa_family == AF_UNIX) {
873                 sa_socklen = sizeof(bsda->u.un);
874         }
875
876         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
877         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
878         if (retry) {
879                 /* retry later */
880                 return;
881         }
882         if (tevent_req_error(req, err)) {
883                 return;
884         }
885
886         /*
887          * Some systems (FreeBSD, see bug #7115) return too much
888          * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
889          * the return value includes some IP/UDP header bytes,
890          * while recvfrom() just returns the payload.
891          */
892         state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
893         if (tevent_req_nomem(state->buf, req)) {
894                 return;
895         }
896         state->len = ret;
897
898         tevent_req_done(req);
899 }
900
901 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
902                                         int *perrno,
903                                         TALLOC_CTX *mem_ctx,
904                                         uint8_t **buf,
905                                         struct tsocket_address **src)
906 {
907         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
908                                         struct tdgram_bsd_recvfrom_state);
909         ssize_t ret;
910
911         ret = tsocket_simple_int_recv(req, perrno);
912         if (ret == 0) {
913                 *buf = talloc_move(mem_ctx, &state->buf);
914                 ret = state->len;
915                 if (src) {
916                         *src = talloc_move(mem_ctx, &state->src);
917                 }
918         }
919
920         tevent_req_received(req);
921         return ret;
922 }
923
924 struct tdgram_bsd_sendto_state {
925         struct tdgram_context *dgram;
926
927         const uint8_t *buf;
928         size_t len;
929         const struct tsocket_address *dst;
930
931         ssize_t ret;
932 };
933
934 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
935 {
936         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
937                                   struct tdgram_bsd);
938
939         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
940
941         return 0;
942 }
943
944 static void tdgram_bsd_sendto_handler(void *private_data);
945
946 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
947                                                  struct tevent_context *ev,
948                                                  struct tdgram_context *dgram,
949                                                  const uint8_t *buf,
950                                                  size_t len,
951                                                  const struct tsocket_address *dst)
952 {
953         struct tevent_req *req;
954         struct tdgram_bsd_sendto_state *state;
955         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
956         int ret;
957
958         req = tevent_req_create(mem_ctx, &state,
959                                 struct tdgram_bsd_sendto_state);
960         if (!req) {
961                 return NULL;
962         }
963
964         state->dgram    = dgram;
965         state->buf      = buf;
966         state->len      = len;
967         state->dst      = dst;
968         state->ret      = -1;
969
970         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
971
972         if (bsds->fd == -1) {
973                 tevent_req_error(req, ENOTCONN);
974                 goto post;
975         }
976
977         /*
978          * this is a fast path, not waiting for the
979          * socket to become explicit writeable gains
980          * about 10%-20% performance in benchmark tests.
981          */
982         tdgram_bsd_sendto_handler(req);
983         if (!tevent_req_is_in_progress(req)) {
984                 goto post;
985         }
986
987         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
988                                                tdgram_bsd_sendto_handler,
989                                                req);
990         if (ret == -1) {
991                 tevent_req_error(req, errno);
992                 goto post;
993         }
994
995         return req;
996
997  post:
998         tevent_req_post(req, ev);
999         return req;
1000 }
1001
1002 static void tdgram_bsd_sendto_handler(void *private_data)
1003 {
1004         struct tevent_req *req = talloc_get_type_abort(private_data,
1005                                  struct tevent_req);
1006         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1007                                         struct tdgram_bsd_sendto_state);
1008         struct tdgram_context *dgram = state->dgram;
1009         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1010         struct sockaddr *sa = NULL;
1011         socklen_t sa_socklen = 0;
1012         ssize_t ret;
1013         int err;
1014         bool retry;
1015
1016         if (state->dst) {
1017                 struct tsocket_address_bsd *bsda =
1018                         talloc_get_type(state->dst->private_data,
1019                         struct tsocket_address_bsd);
1020
1021                 sa = &bsda->u.sa;
1022                 sa_socklen = sizeof(bsda->u.ss);
1023                 /*
1024                  * for unix sockets we can't use the size of sockaddr_storage
1025                  * we would get EINVAL
1026                  */
1027                 if (bsda->u.sa.sa_family == AF_UNIX) {
1028                         sa_socklen = sizeof(bsda->u.un);
1029                 }
1030         }
1031
1032         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1033         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1034         if (retry) {
1035                 /* retry later */
1036                 return;
1037         }
1038         if (tevent_req_error(req, err)) {
1039                 return;
1040         }
1041
1042         state->ret = ret;
1043
1044         tevent_req_done(req);
1045 }
1046
1047 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1048 {
1049         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1050                                         struct tdgram_bsd_sendto_state);
1051         ssize_t ret;
1052
1053         ret = tsocket_simple_int_recv(req, perrno);
1054         if (ret == 0) {
1055                 ret = state->ret;
1056         }
1057
1058         tevent_req_received(req);
1059         return ret;
1060 }
1061
1062 struct tdgram_bsd_disconnect_state {
1063         uint8_t __dummy;
1064 };
1065
1066 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1067                                                      struct tevent_context *ev,
1068                                                      struct tdgram_context *dgram)
1069 {
1070         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1071         struct tevent_req *req;
1072         struct tdgram_bsd_disconnect_state *state;
1073         int ret;
1074         int err;
1075         bool dummy;
1076
1077         req = tevent_req_create(mem_ctx, &state,
1078                                 struct tdgram_bsd_disconnect_state);
1079         if (req == NULL) {
1080                 return NULL;
1081         }
1082
1083         if (bsds->fd == -1) {
1084                 tevent_req_error(req, ENOTCONN);
1085                 goto post;
1086         }
1087
1088         ret = close(bsds->fd);
1089         bsds->fd = -1;
1090         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1091         if (tevent_req_error(req, err)) {
1092                 goto post;
1093         }
1094
1095         tevent_req_done(req);
1096 post:
1097         tevent_req_post(req, ev);
1098         return req;
1099 }
1100
1101 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1102                                       int *perrno)
1103 {
1104         int ret;
1105
1106         ret = tsocket_simple_int_recv(req, perrno);
1107
1108         tevent_req_received(req);
1109         return ret;
1110 }
1111
1112 static const struct tdgram_context_ops tdgram_bsd_ops = {
1113         .name                   = "bsd",
1114
1115         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1116         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1117
1118         .sendto_send            = tdgram_bsd_sendto_send,
1119         .sendto_recv            = tdgram_bsd_sendto_recv,
1120
1121         .disconnect_send        = tdgram_bsd_disconnect_send,
1122         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1123 };
1124
1125 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1126 {
1127         TALLOC_FREE(bsds->fde);
1128         if (bsds->fd != -1) {
1129                 close(bsds->fd);
1130                 bsds->fd = -1;
1131         }
1132         return 0;
1133 }
1134
1135 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1136                                    const struct tsocket_address *remote,
1137                                    bool broadcast,
1138                                    TALLOC_CTX *mem_ctx,
1139                                    struct tdgram_context **_dgram,
1140                                    const char *location)
1141 {
1142         struct tsocket_address_bsd *lbsda =
1143                 talloc_get_type_abort(local->private_data,
1144                 struct tsocket_address_bsd);
1145         struct tsocket_address_bsd *rbsda = NULL;
1146         struct tdgram_context *dgram;
1147         struct tdgram_bsd *bsds;
1148         int fd;
1149         int ret;
1150         bool do_bind = false;
1151         bool do_reuseaddr = false;
1152         bool do_ipv6only = false;
1153         bool is_inet = false;
1154         int sa_fam = lbsda->u.sa.sa_family;
1155         socklen_t sa_socklen = sizeof(lbsda->u.ss);
1156
1157         if (remote) {
1158                 rbsda = talloc_get_type_abort(remote->private_data,
1159                         struct tsocket_address_bsd);
1160         }
1161
1162         switch (lbsda->u.sa.sa_family) {
1163         case AF_UNIX:
1164                 if (broadcast) {
1165                         errno = EINVAL;
1166                         return -1;
1167                 }
1168                 if (lbsda->u.un.sun_path[0] != 0) {
1169                         do_reuseaddr = true;
1170                         do_bind = true;
1171                 }
1172                 /*
1173                  * for unix sockets we can't use the size of sockaddr_storage
1174                  * we would get EINVAL
1175                  */
1176                 sa_socklen = sizeof(lbsda->u.un);
1177                 break;
1178         case AF_INET:
1179                 if (lbsda->u.in.sin_port != 0) {
1180                         do_reuseaddr = true;
1181                         do_bind = true;
1182                 }
1183                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1184                         do_bind = true;
1185                 }
1186                 is_inet = true;
1187                 sa_socklen = sizeof(rbsda->u.in);
1188                 break;
1189 #ifdef HAVE_IPV6
1190         case AF_INET6:
1191                 if (lbsda->u.in6.sin6_port != 0) {
1192                         do_reuseaddr = true;
1193                         do_bind = true;
1194                 }
1195                 if (memcmp(&in6addr_any,
1196                            &lbsda->u.in6.sin6_addr,
1197                            sizeof(in6addr_any)) != 0) {
1198                         do_bind = true;
1199                 }
1200                 is_inet = true;
1201                 sa_socklen = sizeof(rbsda->u.in6);
1202                 do_ipv6only = true;
1203                 break;
1204 #endif
1205         default:
1206                 errno = EINVAL;
1207                 return -1;
1208         }
1209
1210         if (!do_bind && is_inet && rbsda) {
1211                 sa_fam = rbsda->u.sa.sa_family;
1212                 switch (sa_fam) {
1213                 case AF_INET:
1214                         sa_socklen = sizeof(rbsda->u.in);
1215                         do_ipv6only = false;
1216                         break;
1217 #ifdef HAVE_IPV6
1218                 case AF_INET6:
1219                         sa_socklen = sizeof(rbsda->u.in6);
1220                         do_ipv6only = true;
1221                         break;
1222 #endif
1223                 }
1224         }
1225
1226         fd = socket(sa_fam, SOCK_DGRAM, 0);
1227         if (fd < 0) {
1228                 return fd;
1229         }
1230
1231         fd = tsocket_bsd_common_prepare_fd(fd, true);
1232         if (fd < 0) {
1233                 return fd;
1234         }
1235
1236         dgram = tdgram_context_create(mem_ctx,
1237                                       &tdgram_bsd_ops,
1238                                       &bsds,
1239                                       struct tdgram_bsd,
1240                                       location);
1241         if (!dgram) {
1242                 int saved_errno = errno;
1243                 close(fd);
1244                 errno = saved_errno;
1245                 return -1;
1246         }
1247         ZERO_STRUCTP(bsds);
1248         bsds->fd = fd;
1249         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1250
1251 #ifdef HAVE_IPV6
1252         if (do_ipv6only) {
1253                 int val = 1;
1254
1255                 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
1256                                  (const void *)&val, sizeof(val));
1257                 if (ret == -1) {
1258                         int saved_errno = errno;
1259                         talloc_free(dgram);
1260                         errno = saved_errno;
1261                         return ret;
1262                 }
1263         }
1264 #endif
1265
1266         if (broadcast) {
1267                 int val = 1;
1268
1269                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1270                                  (const void *)&val, sizeof(val));
1271                 if (ret == -1) {
1272                         int saved_errno = errno;
1273                         talloc_free(dgram);
1274                         errno = saved_errno;
1275                         return ret;
1276                 }
1277         }
1278
1279         if (do_reuseaddr) {
1280                 int val = 1;
1281
1282                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1283                                  (const void *)&val, sizeof(val));
1284                 if (ret == -1) {
1285                         int saved_errno = errno;
1286                         talloc_free(dgram);
1287                         errno = saved_errno;
1288                         return ret;
1289                 }
1290         }
1291
1292         if (do_bind) {
1293                 ret = bind(fd, &lbsda->u.sa, sa_socklen);
1294                 if (ret == -1) {
1295                         int saved_errno = errno;
1296                         talloc_free(dgram);
1297                         errno = saved_errno;
1298                         return ret;
1299                 }
1300         }
1301
1302         if (rbsda) {
1303                 if (rbsda->u.sa.sa_family != sa_fam) {
1304                         talloc_free(dgram);
1305                         errno = EINVAL;
1306                         return -1;
1307                 }
1308
1309                 ret = connect(fd, &rbsda->u.sa, sa_socklen);
1310                 if (ret == -1) {
1311                         int saved_errno = errno;
1312                         talloc_free(dgram);
1313                         errno = saved_errno;
1314                         return ret;
1315                 }
1316         }
1317
1318         *_dgram = dgram;
1319         return 0;
1320 }
1321
1322 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1323                             const struct tsocket_address *remote,
1324                             TALLOC_CTX *mem_ctx,
1325                             struct tdgram_context **dgram,
1326                             const char *location)
1327 {
1328         struct tsocket_address_bsd *lbsda =
1329                 talloc_get_type_abort(local->private_data,
1330                 struct tsocket_address_bsd);
1331         int ret;
1332
1333         switch (lbsda->u.sa.sa_family) {
1334         case AF_INET:
1335                 break;
1336 #ifdef HAVE_IPV6
1337         case AF_INET6:
1338                 break;
1339 #endif
1340         default:
1341                 errno = EINVAL;
1342                 return -1;
1343         }
1344
1345         ret = tdgram_bsd_dgram_socket(local, remote, false,
1346                                       mem_ctx, dgram, location);
1347
1348         return ret;
1349 }
1350
1351 int _tdgram_unix_socket(const struct tsocket_address *local,
1352                         const struct tsocket_address *remote,
1353                         TALLOC_CTX *mem_ctx,
1354                         struct tdgram_context **dgram,
1355                         const char *location)
1356 {
1357         struct tsocket_address_bsd *lbsda =
1358                 talloc_get_type_abort(local->private_data,
1359                 struct tsocket_address_bsd);
1360         int ret;
1361
1362         switch (lbsda->u.sa.sa_family) {
1363         case AF_UNIX:
1364                 break;
1365         default:
1366                 errno = EINVAL;
1367                 return -1;
1368         }
1369
1370         ret = tdgram_bsd_dgram_socket(local, remote, false,
1371                                       mem_ctx, dgram, location);
1372
1373         return ret;
1374 }
1375
1376 struct tstream_bsd {
1377         int fd;
1378
1379         void *event_ptr;
1380         struct tevent_fd *fde;
1381
1382         void *readable_private;
1383         void (*readable_handler)(void *private_data);
1384         void *writeable_private;
1385         void (*writeable_handler)(void *private_data);
1386 };
1387
1388 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1389                                     struct tevent_fd *fde,
1390                                     uint16_t flags,
1391                                     void *private_data)
1392 {
1393         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1394                                    struct tstream_bsd);
1395
1396         if (flags & TEVENT_FD_WRITE) {
1397                 bsds->writeable_handler(bsds->writeable_private);
1398                 return;
1399         }
1400         if (flags & TEVENT_FD_READ) {
1401                 if (!bsds->readable_handler) {
1402                         if (bsds->writeable_handler) {
1403                                 bsds->writeable_handler(bsds->writeable_private);
1404                                 return;
1405                         }
1406                         TEVENT_FD_NOT_READABLE(bsds->fde);
1407                         return;
1408                 }
1409                 bsds->readable_handler(bsds->readable_private);
1410                 return;
1411         }
1412 }
1413
1414 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1415                                             struct tevent_context *ev,
1416                                             void (*handler)(void *private_data),
1417                                             void *private_data)
1418 {
1419         if (ev == NULL) {
1420                 if (handler) {
1421                         errno = EINVAL;
1422                         return -1;
1423                 }
1424                 if (!bsds->readable_handler) {
1425                         return 0;
1426                 }
1427                 bsds->readable_handler = NULL;
1428                 bsds->readable_private = NULL;
1429
1430                 return 0;
1431         }
1432
1433         /* read and write must use the same tevent_context */
1434         if (bsds->event_ptr != ev) {
1435                 if (bsds->readable_handler || bsds->writeable_handler) {
1436                         errno = EINVAL;
1437                         return -1;
1438                 }
1439                 bsds->event_ptr = NULL;
1440                 TALLOC_FREE(bsds->fde);
1441         }
1442
1443         if (tevent_fd_get_flags(bsds->fde) == 0) {
1444                 TALLOC_FREE(bsds->fde);
1445
1446                 bsds->fde = tevent_add_fd(ev, bsds,
1447                                           bsds->fd, TEVENT_FD_READ,
1448                                           tstream_bsd_fde_handler,
1449                                           bsds);
1450                 if (!bsds->fde) {
1451                         errno = ENOMEM;
1452                         return -1;
1453                 }
1454
1455                 /* cache the event context we're running on */
1456                 bsds->event_ptr = ev;
1457         } else if (!bsds->readable_handler) {
1458                 TEVENT_FD_READABLE(bsds->fde);
1459         }
1460
1461         bsds->readable_handler = handler;
1462         bsds->readable_private = private_data;
1463
1464         return 0;
1465 }
1466
1467 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1468                                              struct tevent_context *ev,
1469                                              void (*handler)(void *private_data),
1470                                              void *private_data)
1471 {
1472         if (ev == NULL) {
1473                 if (handler) {
1474                         errno = EINVAL;
1475                         return -1;
1476                 }
1477                 if (!bsds->writeable_handler) {
1478                         return 0;
1479                 }
1480                 bsds->writeable_handler = NULL;
1481                 bsds->writeable_private = NULL;
1482                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1483
1484                 return 0;
1485         }
1486
1487         /* read and write must use the same tevent_context */
1488         if (bsds->event_ptr != ev) {
1489                 if (bsds->readable_handler || bsds->writeable_handler) {
1490                         errno = EINVAL;
1491                         return -1;
1492                 }
1493                 bsds->event_ptr = NULL;
1494                 TALLOC_FREE(bsds->fde);
1495         }
1496
1497         if (tevent_fd_get_flags(bsds->fde) == 0) {
1498                 TALLOC_FREE(bsds->fde);
1499
1500                 bsds->fde = tevent_add_fd(ev, bsds,
1501                                           bsds->fd,
1502                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1503                                           tstream_bsd_fde_handler,
1504                                           bsds);
1505                 if (!bsds->fde) {
1506                         errno = ENOMEM;
1507                         return -1;
1508                 }
1509
1510                 /* cache the event context we're running on */
1511                 bsds->event_ptr = ev;
1512         } else if (!bsds->writeable_handler) {
1513                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1514                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1515                 tevent_fd_set_flags(bsds->fde, flags);
1516         }
1517
1518         bsds->writeable_handler = handler;
1519         bsds->writeable_private = private_data;
1520
1521         return 0;
1522 }
1523
1524 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1525 {
1526         struct tstream_bsd *bsds = tstream_context_data(stream,
1527                                    struct tstream_bsd);
1528         ssize_t ret;
1529
1530         if (bsds->fd == -1) {
1531                 errno = ENOTCONN;
1532                 return -1;
1533         }
1534
1535         ret = tsocket_bsd_pending(bsds->fd);
1536
1537         return ret;
1538 }
1539
1540 struct tstream_bsd_readv_state {
1541         struct tstream_context *stream;
1542
1543         struct iovec *vector;
1544         size_t count;
1545
1546         int ret;
1547 };
1548
1549 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1550 {
1551         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1552                                    struct tstream_bsd);
1553
1554         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1555
1556         return 0;
1557 }
1558
1559 static void tstream_bsd_readv_handler(void *private_data);
1560
1561 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1562                                         struct tevent_context *ev,
1563                                         struct tstream_context *stream,
1564                                         struct iovec *vector,
1565                                         size_t count)
1566 {
1567         struct tevent_req *req;
1568         struct tstream_bsd_readv_state *state;
1569         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1570         int ret;
1571
1572         req = tevent_req_create(mem_ctx, &state,
1573                                 struct tstream_bsd_readv_state);
1574         if (!req) {
1575                 return NULL;
1576         }
1577
1578         state->stream   = stream;
1579         /* we make a copy of the vector so that we can modify it */
1580         state->vector   = talloc_array(state, struct iovec, count);
1581         if (tevent_req_nomem(state->vector, req)) {
1582                 goto post;
1583         }
1584         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1585         state->count    = count;
1586         state->ret      = 0;
1587
1588         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1589
1590         if (bsds->fd == -1) {
1591                 tevent_req_error(req, ENOTCONN);
1592                 goto post;
1593         }
1594
1595         /*
1596          * this is a fast path, not waiting for the
1597          * socket to become explicit readable gains
1598          * about 10%-20% performance in benchmark tests.
1599          */
1600         tstream_bsd_readv_handler(req);
1601         if (!tevent_req_is_in_progress(req)) {
1602                 goto post;
1603         }
1604
1605         ret = tstream_bsd_set_readable_handler(bsds, ev,
1606                                               tstream_bsd_readv_handler,
1607                                               req);
1608         if (ret == -1) {
1609                 tevent_req_error(req, errno);
1610                 goto post;
1611         }
1612
1613         return req;
1614
1615  post:
1616         tevent_req_post(req, ev);
1617         return req;
1618 }
1619
1620 static void tstream_bsd_readv_handler(void *private_data)
1621 {
1622         struct tevent_req *req = talloc_get_type_abort(private_data,
1623                                  struct tevent_req);
1624         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1625                                         struct tstream_bsd_readv_state);
1626         struct tstream_context *stream = state->stream;
1627         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1628         int ret;
1629         int err;
1630         bool retry;
1631
1632         ret = readv(bsds->fd, state->vector, state->count);
1633         if (ret == 0) {
1634                 /* propagate end of file */
1635                 tevent_req_error(req, EPIPE);
1636                 return;
1637         }
1638         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1639         if (retry) {
1640                 /* retry later */
1641                 return;
1642         }
1643         if (tevent_req_error(req, err)) {
1644                 return;
1645         }
1646
1647         state->ret += ret;
1648
1649         while (ret > 0) {
1650                 if (ret < state->vector[0].iov_len) {
1651                         uint8_t *base;
1652                         base = (uint8_t *)state->vector[0].iov_base;
1653                         base += ret;
1654                         state->vector[0].iov_base = base;
1655                         state->vector[0].iov_len -= ret;
1656                         break;
1657                 }
1658                 ret -= state->vector[0].iov_len;
1659                 state->vector += 1;
1660                 state->count -= 1;
1661         }
1662
1663         /*
1664          * there're maybe some empty vectors at the end
1665          * which we need to skip, otherwise we would get
1666          * ret == 0 from the readv() call and return EPIPE
1667          */
1668         while (state->count > 0) {
1669                 if (state->vector[0].iov_len > 0) {
1670                         break;
1671                 }
1672                 state->vector += 1;
1673                 state->count -= 1;
1674         }
1675
1676         if (state->count > 0) {
1677                 /* we have more to read */
1678                 return;
1679         }
1680
1681         tevent_req_done(req);
1682 }
1683
1684 static int tstream_bsd_readv_recv(struct tevent_req *req,
1685                                   int *perrno)
1686 {
1687         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1688                                         struct tstream_bsd_readv_state);
1689         int ret;
1690
1691         ret = tsocket_simple_int_recv(req, perrno);
1692         if (ret == 0) {
1693                 ret = state->ret;
1694         }
1695
1696         tevent_req_received(req);
1697         return ret;
1698 }
1699
1700 struct tstream_bsd_writev_state {
1701         struct tstream_context *stream;
1702
1703         struct iovec *vector;
1704         size_t count;
1705
1706         int ret;
1707 };
1708
1709 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1710 {
1711         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1712                                   struct tstream_bsd);
1713
1714         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1715
1716         return 0;
1717 }
1718
1719 static void tstream_bsd_writev_handler(void *private_data);
1720
1721 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1722                                                  struct tevent_context *ev,
1723                                                  struct tstream_context *stream,
1724                                                  const struct iovec *vector,
1725                                                  size_t count)
1726 {
1727         struct tevent_req *req;
1728         struct tstream_bsd_writev_state *state;
1729         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1730         int ret;
1731
1732         req = tevent_req_create(mem_ctx, &state,
1733                                 struct tstream_bsd_writev_state);
1734         if (!req) {
1735                 return NULL;
1736         }
1737
1738         state->stream   = stream;
1739         /* we make a copy of the vector so that we can modify it */
1740         state->vector   = talloc_array(state, struct iovec, count);
1741         if (tevent_req_nomem(state->vector, req)) {
1742                 goto post;
1743         }
1744         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1745         state->count    = count;
1746         state->ret      = 0;
1747
1748         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1749
1750         if (bsds->fd == -1) {
1751                 tevent_req_error(req, ENOTCONN);
1752                 goto post;
1753         }
1754
1755         /*
1756          * this is a fast path, not waiting for the
1757          * socket to become explicit writeable gains
1758          * about 10%-20% performance in benchmark tests.
1759          */
1760         tstream_bsd_writev_handler(req);
1761         if (!tevent_req_is_in_progress(req)) {
1762                 goto post;
1763         }
1764
1765         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1766                                                tstream_bsd_writev_handler,
1767                                                req);
1768         if (ret == -1) {
1769                 tevent_req_error(req, errno);
1770                 goto post;
1771         }
1772
1773         return req;
1774
1775  post:
1776         tevent_req_post(req, ev);
1777         return req;
1778 }
1779
1780 static void tstream_bsd_writev_handler(void *private_data)
1781 {
1782         struct tevent_req *req = talloc_get_type_abort(private_data,
1783                                  struct tevent_req);
1784         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1785                                         struct tstream_bsd_writev_state);
1786         struct tstream_context *stream = state->stream;
1787         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1788         ssize_t ret;
1789         int err;
1790         bool retry;
1791
1792         ret = writev(bsds->fd, state->vector, state->count);
1793         if (ret == 0) {
1794                 /* propagate end of file */
1795                 tevent_req_error(req, EPIPE);
1796                 return;
1797         }
1798         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1799         if (retry) {
1800                 /* retry later */
1801                 return;
1802         }
1803         if (tevent_req_error(req, err)) {
1804                 return;
1805         }
1806
1807         state->ret += ret;
1808
1809         while (ret > 0) {
1810                 if (ret < state->vector[0].iov_len) {
1811                         uint8_t *base;
1812                         base = (uint8_t *)state->vector[0].iov_base;
1813                         base += ret;
1814                         state->vector[0].iov_base = base;
1815                         state->vector[0].iov_len -= ret;
1816                         break;
1817                 }
1818                 ret -= state->vector[0].iov_len;
1819                 state->vector += 1;
1820                 state->count -= 1;
1821         }
1822
1823         /*
1824          * there're maybe some empty vectors at the end
1825          * which we need to skip, otherwise we would get
1826          * ret == 0 from the writev() call and return EPIPE
1827          */
1828         while (state->count > 0) {
1829                 if (state->vector[0].iov_len > 0) {
1830                         break;
1831                 }
1832                 state->vector += 1;
1833                 state->count -= 1;
1834         }
1835
1836         if (state->count > 0) {
1837                 /* we have more to read */
1838                 return;
1839         }
1840
1841         tevent_req_done(req);
1842 }
1843
1844 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1845 {
1846         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1847                                         struct tstream_bsd_writev_state);
1848         int ret;
1849
1850         ret = tsocket_simple_int_recv(req, perrno);
1851         if (ret == 0) {
1852                 ret = state->ret;
1853         }
1854
1855         tevent_req_received(req);
1856         return ret;
1857 }
1858
1859 struct tstream_bsd_disconnect_state {
1860         void *__dummy;
1861 };
1862
1863 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1864                                                      struct tevent_context *ev,
1865                                                      struct tstream_context *stream)
1866 {
1867         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1868         struct tevent_req *req;
1869         struct tstream_bsd_disconnect_state *state;
1870         int ret;
1871         int err;
1872         bool dummy;
1873
1874         req = tevent_req_create(mem_ctx, &state,
1875                                 struct tstream_bsd_disconnect_state);
1876         if (req == NULL) {
1877                 return NULL;
1878         }
1879
1880         if (bsds->fd == -1) {
1881                 tevent_req_error(req, ENOTCONN);
1882                 goto post;
1883         }
1884
1885         ret = close(bsds->fd);
1886         bsds->fd = -1;
1887         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1888         if (tevent_req_error(req, err)) {
1889                 goto post;
1890         }
1891
1892         tevent_req_done(req);
1893 post:
1894         tevent_req_post(req, ev);
1895         return req;
1896 }
1897
1898 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1899                                       int *perrno)
1900 {
1901         int ret;
1902
1903         ret = tsocket_simple_int_recv(req, perrno);
1904
1905         tevent_req_received(req);
1906         return ret;
1907 }
1908
1909 static const struct tstream_context_ops tstream_bsd_ops = {
1910         .name                   = "bsd",
1911
1912         .pending_bytes          = tstream_bsd_pending_bytes,
1913
1914         .readv_send             = tstream_bsd_readv_send,
1915         .readv_recv             = tstream_bsd_readv_recv,
1916
1917         .writev_send            = tstream_bsd_writev_send,
1918         .writev_recv            = tstream_bsd_writev_recv,
1919
1920         .disconnect_send        = tstream_bsd_disconnect_send,
1921         .disconnect_recv        = tstream_bsd_disconnect_recv,
1922 };
1923
1924 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1925 {
1926         TALLOC_FREE(bsds->fde);
1927         if (bsds->fd != -1) {
1928                 close(bsds->fd);
1929                 bsds->fd = -1;
1930         }
1931         return 0;
1932 }
1933
1934 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1935                                  int fd,
1936                                  struct tstream_context **_stream,
1937                                  const char *location)
1938 {
1939         struct tstream_context *stream;
1940         struct tstream_bsd *bsds;
1941
1942         stream = tstream_context_create(mem_ctx,
1943                                         &tstream_bsd_ops,
1944                                         &bsds,
1945                                         struct tstream_bsd,
1946                                         location);
1947         if (!stream) {
1948                 return -1;
1949         }
1950         ZERO_STRUCTP(bsds);
1951         bsds->fd = fd;
1952         talloc_set_destructor(bsds, tstream_bsd_destructor);
1953
1954         *_stream = stream;
1955         return 0;
1956 }
1957
1958 struct tstream_bsd_connect_state {
1959         int fd;
1960         struct tevent_fd *fde;
1961         struct tstream_conext *stream;
1962 };
1963
1964 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1965 {
1966         TALLOC_FREE(state->fde);
1967         if (state->fd != -1) {
1968                 close(state->fd);
1969                 state->fd = -1;
1970         }
1971
1972         return 0;
1973 }
1974
1975 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1976                                             struct tevent_fd *fde,
1977                                             uint16_t flags,
1978                                             void *private_data);
1979
1980 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1981                                         struct tevent_context *ev,
1982                                         int sys_errno,
1983                                         const struct tsocket_address *local,
1984                                         const struct tsocket_address *remote)
1985 {
1986         struct tevent_req *req;
1987         struct tstream_bsd_connect_state *state;
1988         struct tsocket_address_bsd *lbsda =
1989                 talloc_get_type_abort(local->private_data,
1990                 struct tsocket_address_bsd);
1991         struct tsocket_address_bsd *rbsda =
1992                 talloc_get_type_abort(remote->private_data,
1993                 struct tsocket_address_bsd);
1994         int ret;
1995         int err;
1996         bool retry;
1997         bool do_bind = false;
1998         bool do_reuseaddr = false;
1999         bool do_ipv6only = false;
2000         bool is_inet = false;
2001         int sa_fam = lbsda->u.sa.sa_family;
2002         socklen_t sa_socklen = sizeof(rbsda->u.ss);
2003
2004         req = tevent_req_create(mem_ctx, &state,
2005                                 struct tstream_bsd_connect_state);
2006         if (!req) {
2007                 return NULL;
2008         }
2009         state->fd = -1;
2010         state->fde = NULL;
2011
2012         talloc_set_destructor(state, tstream_bsd_connect_destructor);
2013
2014         /* give the wrappers a chance to report an error */
2015         if (sys_errno != 0) {
2016                 tevent_req_error(req, sys_errno);
2017                 goto post;
2018         }
2019
2020         switch (lbsda->u.sa.sa_family) {
2021         case AF_UNIX:
2022                 if (lbsda->u.un.sun_path[0] != 0) {
2023                         do_reuseaddr = true;
2024                         do_bind = true;
2025                 }
2026                 /*
2027                  * for unix sockets we can't use the size of sockaddr_storage
2028                  * we would get EINVAL
2029                  */
2030                 sa_socklen = sizeof(rbsda->u.un);
2031                 break;
2032         case AF_INET:
2033                 if (lbsda->u.in.sin_port != 0) {
2034                         do_reuseaddr = true;
2035                         do_bind = true;
2036                 }
2037                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
2038                         do_bind = true;
2039                 }
2040                 is_inet = true;
2041                 sa_socklen = sizeof(rbsda->u.in);
2042                 break;
2043 #ifdef HAVE_IPV6
2044         case AF_INET6:
2045                 if (lbsda->u.in6.sin6_port != 0) {
2046                         do_reuseaddr = true;
2047                         do_bind = true;
2048                 }
2049                 if (memcmp(&in6addr_any,
2050                            &lbsda->u.in6.sin6_addr,
2051                            sizeof(in6addr_any)) != 0) {
2052                         do_bind = true;
2053                 }
2054                 is_inet = true;
2055                 sa_socklen = sizeof(rbsda->u.in6);
2056                 do_ipv6only = true;
2057                 break;
2058 #endif
2059         default:
2060                 tevent_req_error(req, EINVAL);
2061                 goto post;
2062         }
2063
2064         if (!do_bind && is_inet) {
2065                 sa_fam = rbsda->u.sa.sa_family;
2066                 switch (sa_fam) {
2067                 case AF_INET:
2068                         sa_socklen = sizeof(rbsda->u.in);
2069                         do_ipv6only = false;
2070                         break;
2071 #ifdef HAVE_IPV6
2072                 case AF_INET6:
2073                         sa_socklen = sizeof(rbsda->u.in6);
2074                         do_ipv6only = true;
2075                         break;
2076 #endif
2077                 }
2078         }
2079
2080         state->fd = socket(sa_fam, SOCK_STREAM, 0);
2081         if (state->fd == -1) {
2082                 tevent_req_error(req, errno);
2083                 goto post;
2084         }
2085
2086         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2087         if (state->fd == -1) {
2088                 tevent_req_error(req, errno);
2089                 goto post;
2090         }
2091
2092 #ifdef HAVE_IPV6
2093         if (do_ipv6only) {
2094                 int val = 1;
2095
2096                 ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
2097                                  (const void *)&val, sizeof(val));
2098                 if (ret == -1) {
2099                         tevent_req_error(req, errno);
2100                         goto post;
2101                 }
2102         }
2103 #endif
2104
2105         if (do_reuseaddr) {
2106                 int val = 1;
2107
2108                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2109                                  (const void *)&val, sizeof(val));
2110                 if (ret == -1) {
2111                         tevent_req_error(req, errno);
2112                         goto post;
2113                 }
2114         }
2115
2116         if (do_bind) {
2117                 ret = bind(state->fd, &lbsda->u.sa, sa_socklen);
2118                 if (ret == -1) {
2119                         tevent_req_error(req, errno);
2120                         goto post;
2121                 }
2122         }
2123
2124         if (rbsda->u.sa.sa_family != sa_fam) {
2125                 tevent_req_error(req, EINVAL);
2126                 goto post;
2127         }
2128
2129         ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
2130         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2131         if (retry) {
2132                 /* retry later */
2133                 goto async;
2134         }
2135         if (tevent_req_error(req, err)) {
2136                 goto post;
2137         }
2138
2139         tevent_req_done(req);
2140         goto post;
2141
2142  async:
2143         state->fde = tevent_add_fd(ev, state,
2144                                    state->fd,
2145                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2146                                    tstream_bsd_connect_fde_handler,
2147                                    req);
2148         if (tevent_req_nomem(state->fde, req)) {
2149                 goto post;
2150         }
2151
2152         return req;
2153
2154  post:
2155         tevent_req_post(req, ev);
2156         return req;
2157 }
2158
2159 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2160                                             struct tevent_fd *fde,
2161                                             uint16_t flags,
2162                                             void *private_data)
2163 {
2164         struct tevent_req *req = talloc_get_type_abort(private_data,
2165                                  struct tevent_req);
2166         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2167                                         struct tstream_bsd_connect_state);
2168         int ret;
2169         int error=0;
2170         socklen_t len = sizeof(error);
2171         int err;
2172         bool retry;
2173
2174         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2175         if (ret == 0) {
2176                 if (error != 0) {
2177                         errno = error;
2178                         ret = -1;
2179                 }
2180         }
2181         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2182         if (retry) {
2183                 /* retry later */
2184                 return;
2185         }
2186         if (tevent_req_error(req, err)) {
2187                 return;
2188         }
2189
2190         tevent_req_done(req);
2191 }
2192
2193 static int tstream_bsd_connect_recv(struct tevent_req *req,
2194                                     int *perrno,
2195                                     TALLOC_CTX *mem_ctx,
2196                                     struct tstream_context **stream,
2197                                     const char *location)
2198 {
2199         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2200                                         struct tstream_bsd_connect_state);
2201         int ret;
2202
2203         ret = tsocket_simple_int_recv(req, perrno);
2204         if (ret == 0) {
2205                 ret = _tstream_bsd_existing_socket(mem_ctx,
2206                                                    state->fd,
2207                                                    stream,
2208                                                    location);
2209                 if (ret == -1) {
2210                         *perrno = errno;
2211                         goto done;
2212                 }
2213                 TALLOC_FREE(state->fde);
2214                 state->fd = -1;
2215         }
2216
2217 done:
2218         tevent_req_received(req);
2219         return ret;
2220 }
2221
2222 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2223                                         struct tevent_context *ev,
2224                                         const struct tsocket_address *local,
2225                                         const struct tsocket_address *remote)
2226 {
2227         struct tsocket_address_bsd *lbsda =
2228                 talloc_get_type_abort(local->private_data,
2229                 struct tsocket_address_bsd);
2230         struct tevent_req *req;
2231         int sys_errno = 0;
2232
2233         switch (lbsda->u.sa.sa_family) {
2234         case AF_INET:
2235                 break;
2236 #ifdef HAVE_IPV6
2237         case AF_INET6:
2238                 break;
2239 #endif
2240         default:
2241                 sys_errno = EINVAL;
2242                 break;
2243         }
2244
2245         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2246
2247         return req;
2248 }
2249
2250 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2251                                    int *perrno,
2252                                    TALLOC_CTX *mem_ctx,
2253                                    struct tstream_context **stream,
2254                                    const char *location)
2255 {
2256         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2257 }
2258
2259 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2260                                         struct tevent_context *ev,
2261                                         const struct tsocket_address *local,
2262                                         const struct tsocket_address *remote)
2263 {
2264         struct tsocket_address_bsd *lbsda =
2265                 talloc_get_type_abort(local->private_data,
2266                 struct tsocket_address_bsd);
2267         struct tevent_req *req;
2268         int sys_errno = 0;
2269
2270         switch (lbsda->u.sa.sa_family) {
2271         case AF_UNIX:
2272                 break;
2273         default:
2274                 sys_errno = EINVAL;
2275                 break;
2276         }
2277
2278         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2279
2280         return req;
2281 }
2282
2283 int _tstream_unix_connect_recv(struct tevent_req *req,
2284                                       int *perrno,
2285                                       TALLOC_CTX *mem_ctx,
2286                                       struct tstream_context **stream,
2287                                       const char *location)
2288 {
2289         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2290 }
2291
2292 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2293                              struct tstream_context **_stream1,
2294                              TALLOC_CTX *mem_ctx2,
2295                              struct tstream_context **_stream2,
2296                              const char *location)
2297 {
2298         int ret;
2299         int fds[2];
2300         int fd1;
2301         int fd2;
2302         struct tstream_context *stream1 = NULL;
2303         struct tstream_context *stream2 = NULL;
2304
2305         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2306         if (ret == -1) {
2307                 return -1;
2308         }
2309         fd1 = fds[0];
2310         fd2 = fds[1];
2311
2312         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2313         if (fd1 == -1) {
2314                 int sys_errno = errno;
2315                 close(fd2);
2316                 errno = sys_errno;
2317                 return -1;
2318         }
2319
2320         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2321         if (fd2 == -1) {
2322                 int sys_errno = errno;
2323                 close(fd1);
2324                 errno = sys_errno;
2325                 return -1;
2326         }
2327
2328         ret = _tstream_bsd_existing_socket(mem_ctx1,
2329                                            fd1,
2330                                            &stream1,
2331                                            location);
2332         if (ret == -1) {
2333                 int sys_errno = errno;
2334                 close(fd1);
2335                 close(fd2);
2336                 errno = sys_errno;
2337                 return -1;
2338         }
2339
2340         ret = _tstream_bsd_existing_socket(mem_ctx2,
2341                                            fd2,
2342                                            &stream2,
2343                                            location);
2344         if (ret == -1) {
2345                 int sys_errno = errno;
2346                 talloc_free(stream1);
2347                 close(fd2);
2348                 errno = sys_errno;
2349                 return -1;
2350         }
2351
2352         *_stream1 = stream1;
2353         *_stream2 = stream2;
2354         return 0;
2355 }
2356