tsocket: rewrite tsocket_guide.txt to reflect the current APIs
[ira/wip.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tevent
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                               struct sockaddr *sa,
206                                               socklen_t sa_socklen,
207                                               struct tsocket_address **_addr,
208                                               const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         switch (sa->sa_family) {
214         case AF_UNIX:
215                 if (sa_socklen < sizeof(struct sockaddr_un)) {
216                         errno = EINVAL;
217                         return -1;
218                 }
219                 break;
220         case AF_INET:
221                 if (sa_socklen < sizeof(struct sockaddr_in)) {
222                         errno = EINVAL;
223                         return -1;
224                 }
225                 break;
226 #ifdef HAVE_IPV6
227         case AF_INET6:
228                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
229                         errno = EINVAL;
230                         return -1;
231                 }
232                 break;
233 #endif
234         default:
235                 errno = EAFNOSUPPORT;
236                 return -1;
237         }
238
239         if (sa_socklen > sizeof(struct sockaddr_storage)) {
240                 errno = EINVAL;
241                 return -1;
242         }
243
244         addr = tsocket_address_create(mem_ctx,
245                                       &tsocket_address_bsd_ops,
246                                       &bsda,
247                                       struct tsocket_address_bsd,
248                                       location);
249         if (!addr) {
250                 errno = ENOMEM;
251                 return -1;
252         }
253
254         ZERO_STRUCTP(bsda);
255
256         memcpy(&bsda->u.ss, sa, sa_socklen);
257
258         *_addr = addr;
259         return 0;
260 }
261
262 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
263                                        const char *fam,
264                                        const char *addr,
265                                        uint16_t port,
266                                        struct tsocket_address **_addr,
267                                        const char *location)
268 {
269         struct addrinfo hints;
270         struct addrinfo *result = NULL;
271         char port_str[6];
272         int ret;
273
274         ZERO_STRUCT(hints);
275         /*
276          * we use SOCKET_STREAM here to get just one result
277          * back from getaddrinfo().
278          */
279         hints.ai_socktype = SOCK_STREAM;
280         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
281
282         if (strcasecmp(fam, "ip") == 0) {
283                 hints.ai_family = AF_UNSPEC;
284                 if (!addr) {
285 #ifdef HAVE_IPV6
286                         addr = "::";
287 #else
288                         addr = "0.0.0.0";
289 #endif
290                 }
291         } else if (strcasecmp(fam, "ipv4") == 0) {
292                 hints.ai_family = AF_INET;
293                 if (!addr) {
294                         addr = "0.0.0.0";
295                 }
296 #ifdef HAVE_IPV6
297         } else if (strcasecmp(fam, "ipv6") == 0) {
298                 hints.ai_family = AF_INET6;
299                 if (!addr) {
300                         addr = "::";
301                 }
302 #endif
303         } else {
304                 errno = EAFNOSUPPORT;
305                 return -1;
306         }
307
308         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
309
310         ret = getaddrinfo(addr, port_str, &hints, &result);
311         if (ret != 0) {
312                 switch (ret) {
313                 case EAI_FAIL:
314                         errno = EINVAL;
315                         break;
316                 }
317                 ret = -1;
318                 goto done;
319         }
320
321         if (result->ai_socktype != SOCK_STREAM) {
322                 errno = EINVAL;
323                 ret = -1;
324                 goto done;
325         }
326
327         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
328                                                   result->ai_addr,
329                                                   result->ai_addrlen,
330                                                   _addr,
331                                                   location);
332
333 done:
334         if (result) {
335                 freeaddrinfo(result);
336         }
337         return ret;
338 }
339
340 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
341                                        TALLOC_CTX *mem_ctx)
342 {
343         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
344                                            struct tsocket_address_bsd);
345         char addr_str[INET6_ADDRSTRLEN+1];
346         const char *str;
347
348         if (!bsda) {
349                 errno = EINVAL;
350                 return NULL;
351         }
352
353         switch (bsda->u.sa.sa_family) {
354         case AF_INET:
355                 str = inet_ntop(bsda->u.in.sin_family,
356                                 &bsda->u.in.sin_addr,
357                                 addr_str, sizeof(addr_str));
358                 break;
359 #ifdef HAVE_IPV6
360         case AF_INET6:
361                 str = inet_ntop(bsda->u.in6.sin6_family,
362                                 &bsda->u.in6.sin6_addr,
363                                 addr_str, sizeof(addr_str));
364                 break;
365 #endif
366         default:
367                 errno = EINVAL;
368                 return NULL;
369         }
370
371         if (!str) {
372                 return NULL;
373         }
374
375         return talloc_strdup(mem_ctx, str);
376 }
377
378 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
379 {
380         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
381                                            struct tsocket_address_bsd);
382         uint16_t port = 0;
383
384         if (!bsda) {
385                 errno = EINVAL;
386                 return 0;
387         }
388
389         switch (bsda->u.sa.sa_family) {
390         case AF_INET:
391                 port = ntohs(bsda->u.in.sin_port);
392                 break;
393 #ifdef HAVE_IPV6
394         case AF_INET6:
395                 port = ntohs(bsda->u.in6.sin6_port);
396                 break;
397 #endif
398         default:
399                 errno = EINVAL;
400                 return 0;
401         }
402
403         return port;
404 }
405
406 int tsocket_address_inet_set_port(struct tsocket_address *addr,
407                                   uint16_t port)
408 {
409         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
410                                            struct tsocket_address_bsd);
411
412         if (!bsda) {
413                 errno = EINVAL;
414                 return -1;
415         }
416
417         switch (bsda->u.sa.sa_family) {
418         case AF_INET:
419                 bsda->u.in.sin_port = htons(port);
420                 break;
421 #ifdef HAVE_IPV6
422         case AF_INET6:
423                 bsda->u.in6.sin6_port = htons(port);
424                 break;
425 #endif
426         default:
427                 errno = EINVAL;
428                 return -1;
429         }
430
431         return 0;
432 }
433
434 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
435                                     const char *path,
436                                     struct tsocket_address **_addr,
437                                     const char *location)
438 {
439         struct sockaddr_un un;
440         void *p = &un;
441         int ret;
442
443         if (!path) {
444                 path = "";
445         }
446
447         if (strlen(path) > sizeof(un.sun_path)-1) {
448                 errno = ENAMETOOLONG;
449                 return -1;
450         }
451
452         ZERO_STRUCT(un);
453         un.sun_family = AF_UNIX;
454         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
455
456         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
457                                                  (struct sockaddr *)p,
458                                                  sizeof(un),
459                                                  _addr,
460                                                  location);
461
462         return ret;
463 }
464
465 char *tsocket_address_unix_path(const struct tsocket_address *addr,
466                                 TALLOC_CTX *mem_ctx)
467 {
468         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
469                                            struct tsocket_address_bsd);
470         const char *str;
471
472         if (!bsda) {
473                 errno = EINVAL;
474                 return NULL;
475         }
476
477         switch (bsda->u.sa.sa_family) {
478         case AF_UNIX:
479                 str = bsda->u.un.sun_path;
480                 break;
481         default:
482                 errno = EINVAL;
483                 return NULL;
484         }
485
486         return talloc_strdup(mem_ctx, str);
487 }
488
489 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
490                                         TALLOC_CTX *mem_ctx)
491 {
492         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
493                                            struct tsocket_address_bsd);
494         char *str;
495         char *addr_str;
496         const char *prefix = NULL;
497         uint16_t port;
498
499         switch (bsda->u.sa.sa_family) {
500         case AF_UNIX:
501                 return talloc_asprintf(mem_ctx, "unix:%s",
502                                        bsda->u.un.sun_path);
503         case AF_INET:
504                 prefix = "ipv4";
505                 break;
506 #ifdef HAVE_IPV6
507         case AF_INET6:
508                 prefix = "ipv6";
509                 break;
510 #endif
511         default:
512                 errno = EINVAL;
513                 return NULL;
514         }
515
516         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
517         if (!addr_str) {
518                 return NULL;
519         }
520
521         port = tsocket_address_inet_port(addr);
522
523         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
524                               prefix, addr_str, port);
525         talloc_free(addr_str);
526
527         return str;
528 }
529
530 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
531                                                          TALLOC_CTX *mem_ctx,
532                                                          const char *location)
533 {
534         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
535                                            struct tsocket_address_bsd);
536         struct tsocket_address *copy;
537         int ret;
538
539         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
540                                                  &bsda->u.sa,
541                                                  sizeof(bsda->u.ss),
542                                                  &copy,
543                                                  location);
544         if (ret != 0) {
545                 return NULL;
546         }
547
548         return copy;
549 }
550
551 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
552         .name           = "bsd",
553         .string         = tsocket_address_bsd_string,
554         .copy           = tsocket_address_bsd_copy,
555 };
556
557 struct tdgram_bsd {
558         int fd;
559
560         void *event_ptr;
561         struct tevent_fd *fde;
562
563         void *readable_private;
564         void (*readable_handler)(void *private_data);
565         void *writeable_private;
566         void (*writeable_handler)(void *private_data);
567 };
568
569 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
570                                    struct tevent_fd *fde,
571                                    uint16_t flags,
572                                    void *private_data)
573 {
574         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
575                                   struct tdgram_bsd);
576
577         if (flags & TEVENT_FD_WRITE) {
578                 bsds->writeable_handler(bsds->writeable_private);
579                 return;
580         }
581         if (flags & TEVENT_FD_READ) {
582                 if (!bsds->readable_handler) {
583                         TEVENT_FD_NOT_READABLE(bsds->fde);
584                         return;
585                 }
586                 bsds->readable_handler(bsds->readable_private);
587                 return;
588         }
589 }
590
591 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
592                                            struct tevent_context *ev,
593                                            void (*handler)(void *private_data),
594                                            void *private_data)
595 {
596         if (ev == NULL) {
597                 if (handler) {
598                         errno = EINVAL;
599                         return -1;
600                 }
601                 if (!bsds->readable_handler) {
602                         return 0;
603                 }
604                 bsds->readable_handler = NULL;
605                 bsds->readable_private = NULL;
606
607                 return 0;
608         }
609
610         /* read and write must use the same tevent_context */
611         if (bsds->event_ptr != ev) {
612                 if (bsds->readable_handler || bsds->writeable_handler) {
613                         errno = EINVAL;
614                         return -1;
615                 }
616                 bsds->event_ptr = NULL;
617                 TALLOC_FREE(bsds->fde);
618         }
619
620         if (tevent_fd_get_flags(bsds->fde) == 0) {
621                 TALLOC_FREE(bsds->fde);
622
623                 bsds->fde = tevent_add_fd(ev, bsds,
624                                           bsds->fd, TEVENT_FD_READ,
625                                           tdgram_bsd_fde_handler,
626                                           bsds);
627                 if (!bsds->fde) {
628                         errno = ENOMEM;
629                         return -1;
630                 }
631
632                 /* cache the event context we're running on */
633                 bsds->event_ptr = ev;
634         } else if (!bsds->readable_handler) {
635                 TEVENT_FD_READABLE(bsds->fde);
636         }
637
638         bsds->readable_handler = handler;
639         bsds->readable_private = private_data;
640
641         return 0;
642 }
643
644 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
645                                             struct tevent_context *ev,
646                                             void (*handler)(void *private_data),
647                                             void *private_data)
648 {
649         if (ev == NULL) {
650                 if (handler) {
651                         errno = EINVAL;
652                         return -1;
653                 }
654                 if (!bsds->writeable_handler) {
655                         return 0;
656                 }
657                 bsds->writeable_handler = NULL;
658                 bsds->writeable_private = NULL;
659                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
660
661                 return 0;
662         }
663
664         /* read and write must use the same tevent_context */
665         if (bsds->event_ptr != ev) {
666                 if (bsds->readable_handler || bsds->writeable_handler) {
667                         errno = EINVAL;
668                         return -1;
669                 }
670                 bsds->event_ptr = NULL;
671                 TALLOC_FREE(bsds->fde);
672         }
673
674         if (tevent_fd_get_flags(bsds->fde) == 0) {
675                 TALLOC_FREE(bsds->fde);
676
677                 bsds->fde = tevent_add_fd(ev, bsds,
678                                           bsds->fd, TEVENT_FD_WRITE,
679                                           tdgram_bsd_fde_handler,
680                                           bsds);
681                 if (!bsds->fde) {
682                         errno = ENOMEM;
683                         return -1;
684                 }
685
686                 /* cache the event context we're running on */
687                 bsds->event_ptr = ev;
688         } else if (!bsds->writeable_handler) {
689                 TEVENT_FD_WRITEABLE(bsds->fde);
690         }
691
692         bsds->writeable_handler = handler;
693         bsds->writeable_private = private_data;
694
695         return 0;
696 }
697
698 struct tdgram_bsd_recvfrom_state {
699         struct tdgram_context *dgram;
700
701         uint8_t *buf;
702         size_t len;
703         struct tsocket_address *src;
704 };
705
706 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
707 {
708         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
709                                   struct tdgram_bsd);
710
711         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
712
713         return 0;
714 }
715
716 static void tdgram_bsd_recvfrom_handler(void *private_data);
717
718 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
719                                         struct tevent_context *ev,
720                                         struct tdgram_context *dgram)
721 {
722         struct tevent_req *req;
723         struct tdgram_bsd_recvfrom_state *state;
724         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
725         int ret;
726
727         req = tevent_req_create(mem_ctx, &state,
728                                 struct tdgram_bsd_recvfrom_state);
729         if (!req) {
730                 return NULL;
731         }
732
733         state->dgram    = dgram;
734         state->buf      = NULL;
735         state->len      = 0;
736         state->src      = NULL;
737
738         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
739
740         if (bsds->fd == -1) {
741                 tevent_req_error(req, ENOTCONN);
742                 goto post;
743         }
744
745         /*
746          * this is a fast path, not waiting for the
747          * socket to become explicit readable gains
748          * about 10%-20% performance in benchmark tests.
749          */
750         tdgram_bsd_recvfrom_handler(req);
751         if (!tevent_req_is_in_progress(req)) {
752                 goto post;
753         }
754
755         ret = tdgram_bsd_set_readable_handler(bsds, ev,
756                                               tdgram_bsd_recvfrom_handler,
757                                               req);
758         if (ret == -1) {
759                 tevent_req_error(req, errno);
760                 goto post;
761         }
762
763         return req;
764
765  post:
766         tevent_req_post(req, ev);
767         return req;
768 }
769
770 static void tdgram_bsd_recvfrom_handler(void *private_data)
771 {
772         struct tevent_req *req = talloc_get_type_abort(private_data,
773                                  struct tevent_req);
774         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
775                                         struct tdgram_bsd_recvfrom_state);
776         struct tdgram_context *dgram = state->dgram;
777         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
778         struct tsocket_address_bsd *bsda;
779         ssize_t ret;
780         struct sockaddr *sa = NULL;
781         socklen_t sa_socklen = 0;
782         int err;
783         bool retry;
784
785         ret = tsocket_bsd_pending(bsds->fd);
786         if (ret == 0) {
787                 /* retry later */
788                 return;
789         }
790         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
791         if (retry) {
792                 /* retry later */
793                 return;
794         }
795         if (tevent_req_error(req, err)) {
796                 return;
797         }
798
799         state->buf = talloc_array(state, uint8_t, ret);
800         if (tevent_req_nomem(state->buf, req)) {
801                 return;
802         }
803         state->len = ret;
804
805         state->src = tsocket_address_create(state,
806                                             &tsocket_address_bsd_ops,
807                                             &bsda,
808                                             struct tsocket_address_bsd,
809                                             __location__ "bsd_recvfrom");
810         if (tevent_req_nomem(state->src, req)) {
811                 return;
812         }
813
814         ZERO_STRUCTP(bsda);
815
816         sa = &bsda->u.sa;
817         sa_socklen = sizeof(bsda->u.ss);
818         /*
819          * for unix sockets we can't use the size of sockaddr_storage
820          * we would get EINVAL
821          */
822         if (bsda->u.sa.sa_family == AF_UNIX) {
823                 sa_socklen = sizeof(bsda->u.un);
824         }
825
826         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
827         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
828         if (retry) {
829                 /* retry later */
830                 return;
831         }
832         if (tevent_req_error(req, err)) {
833                 return;
834         }
835
836         if (ret != state->len) {
837                 tevent_req_error(req, EIO);
838                 return;
839         }
840
841         tevent_req_done(req);
842 }
843
844 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
845                                         int *perrno,
846                                         TALLOC_CTX *mem_ctx,
847                                         uint8_t **buf,
848                                         struct tsocket_address **src)
849 {
850         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
851                                         struct tdgram_bsd_recvfrom_state);
852         ssize_t ret;
853
854         ret = tsocket_simple_int_recv(req, perrno);
855         if (ret == 0) {
856                 *buf = talloc_move(mem_ctx, &state->buf);
857                 ret = state->len;
858                 if (src) {
859                         *src = talloc_move(mem_ctx, &state->src);
860                 }
861         }
862
863         tevent_req_received(req);
864         return ret;
865 }
866
867 struct tdgram_bsd_sendto_state {
868         struct tdgram_context *dgram;
869
870         const uint8_t *buf;
871         size_t len;
872         const struct tsocket_address *dst;
873
874         ssize_t ret;
875 };
876
877 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
878 {
879         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
880                                   struct tdgram_bsd);
881
882         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
883
884         return 0;
885 }
886
887 static void tdgram_bsd_sendto_handler(void *private_data);
888
889 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
890                                                  struct tevent_context *ev,
891                                                  struct tdgram_context *dgram,
892                                                  const uint8_t *buf,
893                                                  size_t len,
894                                                  const struct tsocket_address *dst)
895 {
896         struct tevent_req *req;
897         struct tdgram_bsd_sendto_state *state;
898         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
899         int ret;
900
901         req = tevent_req_create(mem_ctx, &state,
902                                 struct tdgram_bsd_sendto_state);
903         if (!req) {
904                 return NULL;
905         }
906
907         state->dgram    = dgram;
908         state->buf      = buf;
909         state->len      = len;
910         state->dst      = dst;
911         state->ret      = -1;
912
913         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
914
915         if (bsds->fd == -1) {
916                 tevent_req_error(req, ENOTCONN);
917                 goto post;
918         }
919
920         /*
921          * this is a fast path, not waiting for the
922          * socket to become explicit writeable gains
923          * about 10%-20% performance in benchmark tests.
924          */
925         tdgram_bsd_sendto_handler(req);
926         if (!tevent_req_is_in_progress(req)) {
927                 goto post;
928         }
929
930         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
931                                                tdgram_bsd_sendto_handler,
932                                                req);
933         if (ret == -1) {
934                 tevent_req_error(req, errno);
935                 goto post;
936         }
937
938         return req;
939
940  post:
941         tevent_req_post(req, ev);
942         return req;
943 }
944
945 static void tdgram_bsd_sendto_handler(void *private_data)
946 {
947         struct tevent_req *req = talloc_get_type_abort(private_data,
948                                  struct tevent_req);
949         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
950                                         struct tdgram_bsd_sendto_state);
951         struct tdgram_context *dgram = state->dgram;
952         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
953         struct sockaddr *sa = NULL;
954         socklen_t sa_socklen = 0;
955         ssize_t ret;
956         int err;
957         bool retry;
958
959         if (state->dst) {
960                 struct tsocket_address_bsd *bsda =
961                         talloc_get_type(state->dst->private_data,
962                         struct tsocket_address_bsd);
963
964                 sa = &bsda->u.sa;
965                 sa_socklen = sizeof(bsda->u.ss);
966                 /*
967                  * for unix sockets we can't use the size of sockaddr_storage
968                  * we would get EINVAL
969                  */
970                 if (bsda->u.sa.sa_family == AF_UNIX) {
971                         sa_socklen = sizeof(bsda->u.un);
972                 }
973         }
974
975         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
976         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
977         if (retry) {
978                 /* retry later */
979                 return;
980         }
981         if (tevent_req_error(req, err)) {
982                 return;
983         }
984
985         state->ret = ret;
986
987         tevent_req_done(req);
988 }
989
990 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
991 {
992         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
993                                         struct tdgram_bsd_sendto_state);
994         ssize_t ret;
995
996         ret = tsocket_simple_int_recv(req, perrno);
997         if (ret == 0) {
998                 ret = state->ret;
999         }
1000
1001         tevent_req_received(req);
1002         return ret;
1003 }
1004
1005 struct tdgram_bsd_disconnect_state {
1006         uint8_t __dummy;
1007 };
1008
1009 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1010                                                      struct tevent_context *ev,
1011                                                      struct tdgram_context *dgram)
1012 {
1013         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1014         struct tevent_req *req;
1015         struct tdgram_bsd_disconnect_state *state;
1016         int ret;
1017         int err;
1018         bool dummy;
1019
1020         req = tevent_req_create(mem_ctx, &state,
1021                                 struct tdgram_bsd_disconnect_state);
1022         if (req == NULL) {
1023                 return NULL;
1024         }
1025
1026         if (bsds->fd == -1) {
1027                 tevent_req_error(req, ENOTCONN);
1028                 goto post;
1029         }
1030
1031         ret = close(bsds->fd);
1032         bsds->fd = -1;
1033         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1034         if (tevent_req_error(req, err)) {
1035                 goto post;
1036         }
1037
1038         tevent_req_done(req);
1039 post:
1040         tevent_req_post(req, ev);
1041         return req;
1042 }
1043
1044 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1045                                       int *perrno)
1046 {
1047         int ret;
1048
1049         ret = tsocket_simple_int_recv(req, perrno);
1050
1051         tevent_req_received(req);
1052         return ret;
1053 }
1054
1055 static const struct tdgram_context_ops tdgram_bsd_ops = {
1056         .name                   = "bsd",
1057
1058         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1059         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1060
1061         .sendto_send            = tdgram_bsd_sendto_send,
1062         .sendto_recv            = tdgram_bsd_sendto_recv,
1063
1064         .disconnect_send        = tdgram_bsd_disconnect_send,
1065         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1066 };
1067
1068 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1069 {
1070         TALLOC_FREE(bsds->fde);
1071         if (bsds->fd != -1) {
1072                 close(bsds->fd);
1073                 bsds->fd = -1;
1074         }
1075         return 0;
1076 }
1077
1078 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1079                                    const struct tsocket_address *remote,
1080                                    bool broadcast,
1081                                    TALLOC_CTX *mem_ctx,
1082                                    struct tdgram_context **_dgram,
1083                                    const char *location)
1084 {
1085         struct tsocket_address_bsd *lbsda =
1086                 talloc_get_type_abort(local->private_data,
1087                 struct tsocket_address_bsd);
1088         struct tsocket_address_bsd *rbsda = NULL;
1089         struct tdgram_context *dgram;
1090         struct tdgram_bsd *bsds;
1091         int fd;
1092         int ret;
1093         bool do_bind = false;
1094         bool do_reuseaddr = false;
1095         socklen_t sa_socklen = sizeof(lbsda->u.ss);
1096
1097         if (remote) {
1098                 rbsda = talloc_get_type_abort(remote->private_data,
1099                         struct tsocket_address_bsd);
1100         }
1101
1102         switch (lbsda->u.sa.sa_family) {
1103         case AF_UNIX:
1104                 if (broadcast) {
1105                         errno = EINVAL;
1106                         return -1;
1107                 }
1108                 if (lbsda->u.un.sun_path[0] != 0) {
1109                         do_reuseaddr = true;
1110                         do_bind = true;
1111                 }
1112                 /*
1113                  * for unix sockets we can't use the size of sockaddr_storage
1114                  * we would get EINVAL
1115                  */
1116                 sa_socklen = sizeof(lbsda->u.un);
1117                 break;
1118         case AF_INET:
1119                 if (lbsda->u.in.sin_port != 0) {
1120                         do_reuseaddr = true;
1121                         do_bind = true;
1122                 }
1123                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1124                         do_bind = true;
1125                 }
1126                 break;
1127 #ifdef HAVE_IPV6
1128         case AF_INET6:
1129                 if (lbsda->u.in6.sin6_port != 0) {
1130                         do_reuseaddr = true;
1131                         do_bind = true;
1132                 }
1133                 if (memcmp(&in6addr_any,
1134                            &lbsda->u.in6.sin6_addr,
1135                            sizeof(in6addr_any)) != 0) {
1136                         do_bind = true;
1137                 }
1138                 break;
1139 #endif
1140         default:
1141                 errno = EINVAL;
1142                 return -1;
1143         }
1144
1145         fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
1146         if (fd < 0) {
1147                 return fd;
1148         }
1149
1150         fd = tsocket_bsd_common_prepare_fd(fd, true);
1151         if (fd < 0) {
1152                 return fd;
1153         }
1154
1155         dgram = tdgram_context_create(mem_ctx,
1156                                       &tdgram_bsd_ops,
1157                                       &bsds,
1158                                       struct tdgram_bsd,
1159                                       location);
1160         if (!dgram) {
1161                 int saved_errno = errno;
1162                 close(fd);
1163                 errno = saved_errno;
1164                 return -1;
1165         }
1166         ZERO_STRUCTP(bsds);
1167         bsds->fd = fd;
1168         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1169
1170         if (broadcast) {
1171                 int val = 1;
1172
1173                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1174                                  (const void *)&val, sizeof(val));
1175                 if (ret == -1) {
1176                         int saved_errno = errno;
1177                         talloc_free(dgram);
1178                         errno = saved_errno;
1179                         return ret;
1180                 }
1181         }
1182
1183         if (do_reuseaddr) {
1184                 int val = 1;
1185
1186                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1187                                  (const void *)&val, sizeof(val));
1188                 if (ret == -1) {
1189                         int saved_errno = errno;
1190                         talloc_free(dgram);
1191                         errno = saved_errno;
1192                         return ret;
1193                 }
1194         }
1195
1196         if (do_bind) {
1197                 ret = bind(fd, &lbsda->u.sa, sa_socklen);
1198                 if (ret == -1) {
1199                         int saved_errno = errno;
1200                         talloc_free(dgram);
1201                         errno = saved_errno;
1202                         return ret;
1203                 }
1204         }
1205
1206         if (rbsda) {
1207                 ret = connect(fd, &rbsda->u.sa, sa_socklen);
1208                 if (ret == -1) {
1209                         int saved_errno = errno;
1210                         talloc_free(dgram);
1211                         errno = saved_errno;
1212                         return ret;
1213                 }
1214         }
1215
1216         *_dgram = dgram;
1217         return 0;
1218 }
1219
1220 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1221                             const struct tsocket_address *remote,
1222                             TALLOC_CTX *mem_ctx,
1223                             struct tdgram_context **dgram,
1224                             const char *location)
1225 {
1226         struct tsocket_address_bsd *lbsda =
1227                 talloc_get_type_abort(local->private_data,
1228                 struct tsocket_address_bsd);
1229         int ret;
1230
1231         switch (lbsda->u.sa.sa_family) {
1232         case AF_INET:
1233                 break;
1234 #ifdef HAVE_IPV6
1235         case AF_INET6:
1236                 break;
1237 #endif
1238         default:
1239                 errno = EINVAL;
1240                 return -1;
1241         }
1242
1243         ret = tdgram_bsd_dgram_socket(local, remote, false,
1244                                       mem_ctx, dgram, location);
1245
1246         return ret;
1247 }
1248
1249 int _tdgram_unix_socket(const struct tsocket_address *local,
1250                         const struct tsocket_address *remote,
1251                         TALLOC_CTX *mem_ctx,
1252                         struct tdgram_context **dgram,
1253                         const char *location)
1254 {
1255         struct tsocket_address_bsd *lbsda =
1256                 talloc_get_type_abort(local->private_data,
1257                 struct tsocket_address_bsd);
1258         int ret;
1259
1260         switch (lbsda->u.sa.sa_family) {
1261         case AF_UNIX:
1262                 break;
1263         default:
1264                 errno = EINVAL;
1265                 return -1;
1266         }
1267
1268         ret = tdgram_bsd_dgram_socket(local, remote, false,
1269                                       mem_ctx, dgram, location);
1270
1271         return ret;
1272 }
1273
1274 struct tstream_bsd {
1275         int fd;
1276
1277         void *event_ptr;
1278         struct tevent_fd *fde;
1279
1280         void *readable_private;
1281         void (*readable_handler)(void *private_data);
1282         void *writeable_private;
1283         void (*writeable_handler)(void *private_data);
1284 };
1285
1286 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1287                                     struct tevent_fd *fde,
1288                                     uint16_t flags,
1289                                     void *private_data)
1290 {
1291         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1292                                    struct tstream_bsd);
1293
1294         if (flags & TEVENT_FD_WRITE) {
1295                 bsds->writeable_handler(bsds->writeable_private);
1296                 return;
1297         }
1298         if (flags & TEVENT_FD_READ) {
1299                 if (!bsds->readable_handler) {
1300                         if (bsds->writeable_handler) {
1301                                 bsds->writeable_handler(bsds->writeable_private);
1302                                 return;
1303                         }
1304                         TEVENT_FD_NOT_READABLE(bsds->fde);
1305                         return;
1306                 }
1307                 bsds->readable_handler(bsds->readable_private);
1308                 return;
1309         }
1310 }
1311
1312 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1313                                             struct tevent_context *ev,
1314                                             void (*handler)(void *private_data),
1315                                             void *private_data)
1316 {
1317         if (ev == NULL) {
1318                 if (handler) {
1319                         errno = EINVAL;
1320                         return -1;
1321                 }
1322                 if (!bsds->readable_handler) {
1323                         return 0;
1324                 }
1325                 bsds->readable_handler = NULL;
1326                 bsds->readable_private = NULL;
1327
1328                 return 0;
1329         }
1330
1331         /* read and write must use the same tevent_context */
1332         if (bsds->event_ptr != ev) {
1333                 if (bsds->readable_handler || bsds->writeable_handler) {
1334                         errno = EINVAL;
1335                         return -1;
1336                 }
1337                 bsds->event_ptr = NULL;
1338                 TALLOC_FREE(bsds->fde);
1339         }
1340
1341         if (tevent_fd_get_flags(bsds->fde) == 0) {
1342                 TALLOC_FREE(bsds->fde);
1343
1344                 bsds->fde = tevent_add_fd(ev, bsds,
1345                                           bsds->fd, TEVENT_FD_READ,
1346                                           tstream_bsd_fde_handler,
1347                                           bsds);
1348                 if (!bsds->fde) {
1349                         errno = ENOMEM;
1350                         return -1;
1351                 }
1352
1353                 /* cache the event context we're running on */
1354                 bsds->event_ptr = ev;
1355         } else if (!bsds->readable_handler) {
1356                 TEVENT_FD_READABLE(bsds->fde);
1357         }
1358
1359         bsds->readable_handler = handler;
1360         bsds->readable_private = private_data;
1361
1362         return 0;
1363 }
1364
1365 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1366                                              struct tevent_context *ev,
1367                                              void (*handler)(void *private_data),
1368                                              void *private_data)
1369 {
1370         if (ev == NULL) {
1371                 if (handler) {
1372                         errno = EINVAL;
1373                         return -1;
1374                 }
1375                 if (!bsds->writeable_handler) {
1376                         return 0;
1377                 }
1378                 bsds->writeable_handler = NULL;
1379                 bsds->writeable_private = NULL;
1380                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1381
1382                 return 0;
1383         }
1384
1385         /* read and write must use the same tevent_context */
1386         if (bsds->event_ptr != ev) {
1387                 if (bsds->readable_handler || bsds->writeable_handler) {
1388                         errno = EINVAL;
1389                         return -1;
1390                 }
1391                 bsds->event_ptr = NULL;
1392                 TALLOC_FREE(bsds->fde);
1393         }
1394
1395         if (tevent_fd_get_flags(bsds->fde) == 0) {
1396                 TALLOC_FREE(bsds->fde);
1397
1398                 bsds->fde = tevent_add_fd(ev, bsds,
1399                                           bsds->fd,
1400                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1401                                           tstream_bsd_fde_handler,
1402                                           bsds);
1403                 if (!bsds->fde) {
1404                         errno = ENOMEM;
1405                         return -1;
1406                 }
1407
1408                 /* cache the event context we're running on */
1409                 bsds->event_ptr = ev;
1410         } else if (!bsds->writeable_handler) {
1411                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1412                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1413                 tevent_fd_set_flags(bsds->fde, flags);
1414         }
1415
1416         bsds->writeable_handler = handler;
1417         bsds->writeable_private = private_data;
1418
1419         return 0;
1420 }
1421
1422 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1423 {
1424         struct tstream_bsd *bsds = tstream_context_data(stream,
1425                                    struct tstream_bsd);
1426         ssize_t ret;
1427
1428         if (bsds->fd == -1) {
1429                 errno = ENOTCONN;
1430                 return -1;
1431         }
1432
1433         ret = tsocket_bsd_pending(bsds->fd);
1434
1435         return ret;
1436 }
1437
1438 struct tstream_bsd_readv_state {
1439         struct tstream_context *stream;
1440
1441         struct iovec *vector;
1442         size_t count;
1443
1444         int ret;
1445 };
1446
1447 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1448 {
1449         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1450                                    struct tstream_bsd);
1451
1452         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1453
1454         return 0;
1455 }
1456
1457 static void tstream_bsd_readv_handler(void *private_data);
1458
1459 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1460                                         struct tevent_context *ev,
1461                                         struct tstream_context *stream,
1462                                         struct iovec *vector,
1463                                         size_t count)
1464 {
1465         struct tevent_req *req;
1466         struct tstream_bsd_readv_state *state;
1467         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1468         int ret;
1469
1470         req = tevent_req_create(mem_ctx, &state,
1471                                 struct tstream_bsd_readv_state);
1472         if (!req) {
1473                 return NULL;
1474         }
1475
1476         state->stream   = stream;
1477         /* we make a copy of the vector so that we can modify it */
1478         state->vector   = talloc_array(state, struct iovec, count);
1479         if (tevent_req_nomem(state->vector, req)) {
1480                 goto post;
1481         }
1482         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1483         state->count    = count;
1484         state->ret      = 0;
1485
1486         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1487
1488         if (bsds->fd == -1) {
1489                 tevent_req_error(req, ENOTCONN);
1490                 goto post;
1491         }
1492
1493         /*
1494          * this is a fast path, not waiting for the
1495          * socket to become explicit readable gains
1496          * about 10%-20% performance in benchmark tests.
1497          */
1498         tstream_bsd_readv_handler(req);
1499         if (!tevent_req_is_in_progress(req)) {
1500                 goto post;
1501         }
1502
1503         ret = tstream_bsd_set_readable_handler(bsds, ev,
1504                                               tstream_bsd_readv_handler,
1505                                               req);
1506         if (ret == -1) {
1507                 tevent_req_error(req, errno);
1508                 goto post;
1509         }
1510
1511         return req;
1512
1513  post:
1514         tevent_req_post(req, ev);
1515         return req;
1516 }
1517
1518 static void tstream_bsd_readv_handler(void *private_data)
1519 {
1520         struct tevent_req *req = talloc_get_type_abort(private_data,
1521                                  struct tevent_req);
1522         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1523                                         struct tstream_bsd_readv_state);
1524         struct tstream_context *stream = state->stream;
1525         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1526         int ret;
1527         int err;
1528         bool retry;
1529
1530         ret = readv(bsds->fd, state->vector, state->count);
1531         if (ret == 0) {
1532                 /* propagate end of file */
1533                 tevent_req_error(req, EPIPE);
1534                 return;
1535         }
1536         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1537         if (retry) {
1538                 /* retry later */
1539                 return;
1540         }
1541         if (tevent_req_error(req, err)) {
1542                 return;
1543         }
1544
1545         state->ret += ret;
1546
1547         while (ret > 0) {
1548                 if (ret < state->vector[0].iov_len) {
1549                         uint8_t *base;
1550                         base = (uint8_t *)state->vector[0].iov_base;
1551                         base += ret;
1552                         state->vector[0].iov_base = base;
1553                         state->vector[0].iov_len -= ret;
1554                         break;
1555                 }
1556                 ret -= state->vector[0].iov_len;
1557                 state->vector += 1;
1558                 state->count -= 1;
1559         }
1560
1561         /*
1562          * there're maybe some empty vectors at the end
1563          * which we need to skip, otherwise we would get
1564          * ret == 0 from the readv() call and return EPIPE
1565          */
1566         while (state->count > 0) {
1567                 if (state->vector[0].iov_len > 0) {
1568                         break;
1569                 }
1570                 state->vector += 1;
1571                 state->count -= 1;
1572         }
1573
1574         if (state->count > 0) {
1575                 /* we have more to read */
1576                 return;
1577         }
1578
1579         tevent_req_done(req);
1580 }
1581
1582 static int tstream_bsd_readv_recv(struct tevent_req *req,
1583                                   int *perrno)
1584 {
1585         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1586                                         struct tstream_bsd_readv_state);
1587         int ret;
1588
1589         ret = tsocket_simple_int_recv(req, perrno);
1590         if (ret == 0) {
1591                 ret = state->ret;
1592         }
1593
1594         tevent_req_received(req);
1595         return ret;
1596 }
1597
1598 struct tstream_bsd_writev_state {
1599         struct tstream_context *stream;
1600
1601         struct iovec *vector;
1602         size_t count;
1603
1604         int ret;
1605 };
1606
1607 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1608 {
1609         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1610                                   struct tstream_bsd);
1611
1612         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1613
1614         return 0;
1615 }
1616
1617 static void tstream_bsd_writev_handler(void *private_data);
1618
1619 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1620                                                  struct tevent_context *ev,
1621                                                  struct tstream_context *stream,
1622                                                  const struct iovec *vector,
1623                                                  size_t count)
1624 {
1625         struct tevent_req *req;
1626         struct tstream_bsd_writev_state *state;
1627         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1628         int ret;
1629
1630         req = tevent_req_create(mem_ctx, &state,
1631                                 struct tstream_bsd_writev_state);
1632         if (!req) {
1633                 return NULL;
1634         }
1635
1636         state->stream   = stream;
1637         /* we make a copy of the vector so that we can modify it */
1638         state->vector   = talloc_array(state, struct iovec, count);
1639         if (tevent_req_nomem(state->vector, req)) {
1640                 goto post;
1641         }
1642         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1643         state->count    = count;
1644         state->ret      = 0;
1645
1646         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1647
1648         if (bsds->fd == -1) {
1649                 tevent_req_error(req, ENOTCONN);
1650                 goto post;
1651         }
1652
1653         /*
1654          * this is a fast path, not waiting for the
1655          * socket to become explicit writeable gains
1656          * about 10%-20% performance in benchmark tests.
1657          */
1658         tstream_bsd_writev_handler(req);
1659         if (!tevent_req_is_in_progress(req)) {
1660                 goto post;
1661         }
1662
1663         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1664                                                tstream_bsd_writev_handler,
1665                                                req);
1666         if (ret == -1) {
1667                 tevent_req_error(req, errno);
1668                 goto post;
1669         }
1670
1671         return req;
1672
1673  post:
1674         tevent_req_post(req, ev);
1675         return req;
1676 }
1677
1678 static void tstream_bsd_writev_handler(void *private_data)
1679 {
1680         struct tevent_req *req = talloc_get_type_abort(private_data,
1681                                  struct tevent_req);
1682         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1683                                         struct tstream_bsd_writev_state);
1684         struct tstream_context *stream = state->stream;
1685         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1686         ssize_t ret;
1687         int err;
1688         bool retry;
1689
1690         ret = writev(bsds->fd, state->vector, state->count);
1691         if (ret == 0) {
1692                 /* propagate end of file */
1693                 tevent_req_error(req, EPIPE);
1694                 return;
1695         }
1696         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1697         if (retry) {
1698                 /* retry later */
1699                 return;
1700         }
1701         if (tevent_req_error(req, err)) {
1702                 return;
1703         }
1704
1705         state->ret += ret;
1706
1707         while (ret > 0) {
1708                 if (ret < state->vector[0].iov_len) {
1709                         uint8_t *base;
1710                         base = (uint8_t *)state->vector[0].iov_base;
1711                         base += ret;
1712                         state->vector[0].iov_base = base;
1713                         state->vector[0].iov_len -= ret;
1714                         break;
1715                 }
1716                 ret -= state->vector[0].iov_len;
1717                 state->vector += 1;
1718                 state->count -= 1;
1719         }
1720
1721         /*
1722          * there're maybe some empty vectors at the end
1723          * which we need to skip, otherwise we would get
1724          * ret == 0 from the writev() call and return EPIPE
1725          */
1726         while (state->count > 0) {
1727                 if (state->vector[0].iov_len > 0) {
1728                         break;
1729                 }
1730                 state->vector += 1;
1731                 state->count -= 1;
1732         }
1733
1734         if (state->count > 0) {
1735                 /* we have more to read */
1736                 return;
1737         }
1738
1739         tevent_req_done(req);
1740 }
1741
1742 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1743 {
1744         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1745                                         struct tstream_bsd_writev_state);
1746         int ret;
1747
1748         ret = tsocket_simple_int_recv(req, perrno);
1749         if (ret == 0) {
1750                 ret = state->ret;
1751         }
1752
1753         tevent_req_received(req);
1754         return ret;
1755 }
1756
1757 struct tstream_bsd_disconnect_state {
1758         void *__dummy;
1759 };
1760
1761 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1762                                                      struct tevent_context *ev,
1763                                                      struct tstream_context *stream)
1764 {
1765         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1766         struct tevent_req *req;
1767         struct tstream_bsd_disconnect_state *state;
1768         int ret;
1769         int err;
1770         bool dummy;
1771
1772         req = tevent_req_create(mem_ctx, &state,
1773                                 struct tstream_bsd_disconnect_state);
1774         if (req == NULL) {
1775                 return NULL;
1776         }
1777
1778         if (bsds->fd == -1) {
1779                 tevent_req_error(req, ENOTCONN);
1780                 goto post;
1781         }
1782
1783         ret = close(bsds->fd);
1784         bsds->fd = -1;
1785         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1786         if (tevent_req_error(req, err)) {
1787                 goto post;
1788         }
1789
1790         tevent_req_done(req);
1791 post:
1792         tevent_req_post(req, ev);
1793         return req;
1794 }
1795
1796 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1797                                       int *perrno)
1798 {
1799         int ret;
1800
1801         ret = tsocket_simple_int_recv(req, perrno);
1802
1803         tevent_req_received(req);
1804         return ret;
1805 }
1806
1807 static const struct tstream_context_ops tstream_bsd_ops = {
1808         .name                   = "bsd",
1809
1810         .pending_bytes          = tstream_bsd_pending_bytes,
1811
1812         .readv_send             = tstream_bsd_readv_send,
1813         .readv_recv             = tstream_bsd_readv_recv,
1814
1815         .writev_send            = tstream_bsd_writev_send,
1816         .writev_recv            = tstream_bsd_writev_recv,
1817
1818         .disconnect_send        = tstream_bsd_disconnect_send,
1819         .disconnect_recv        = tstream_bsd_disconnect_recv,
1820 };
1821
1822 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1823 {
1824         TALLOC_FREE(bsds->fde);
1825         if (bsds->fd != -1) {
1826                 close(bsds->fd);
1827                 bsds->fd = -1;
1828         }
1829         return 0;
1830 }
1831
1832 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1833                                  int fd,
1834                                  struct tstream_context **_stream,
1835                                  const char *location)
1836 {
1837         struct tstream_context *stream;
1838         struct tstream_bsd *bsds;
1839
1840         stream = tstream_context_create(mem_ctx,
1841                                         &tstream_bsd_ops,
1842                                         &bsds,
1843                                         struct tstream_bsd,
1844                                         location);
1845         if (!stream) {
1846                 return -1;
1847         }
1848         ZERO_STRUCTP(bsds);
1849         bsds->fd = fd;
1850         talloc_set_destructor(bsds, tstream_bsd_destructor);
1851
1852         *_stream = stream;
1853         return 0;
1854 }
1855
1856 struct tstream_bsd_connect_state {
1857         int fd;
1858         struct tevent_fd *fde;
1859         struct tstream_conext *stream;
1860 };
1861
1862 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1863 {
1864         TALLOC_FREE(state->fde);
1865         if (state->fd != -1) {
1866                 close(state->fd);
1867                 state->fd = -1;
1868         }
1869
1870         return 0;
1871 }
1872
1873 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1874                                             struct tevent_fd *fde,
1875                                             uint16_t flags,
1876                                             void *private_data);
1877
1878 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1879                                         struct tevent_context *ev,
1880                                         int sys_errno,
1881                                         const struct tsocket_address *local,
1882                                         const struct tsocket_address *remote)
1883 {
1884         struct tevent_req *req;
1885         struct tstream_bsd_connect_state *state;
1886         struct tsocket_address_bsd *lbsda =
1887                 talloc_get_type_abort(local->private_data,
1888                 struct tsocket_address_bsd);
1889         struct tsocket_address_bsd *rbsda =
1890                 talloc_get_type_abort(remote->private_data,
1891                 struct tsocket_address_bsd);
1892         int ret;
1893         int err;
1894         bool retry;
1895         bool do_bind = false;
1896         bool do_reuseaddr = false;
1897         socklen_t sa_socklen = sizeof(rbsda->u.ss);
1898
1899         req = tevent_req_create(mem_ctx, &state,
1900                                 struct tstream_bsd_connect_state);
1901         if (!req) {
1902                 return NULL;
1903         }
1904         state->fd = -1;
1905         state->fde = NULL;
1906
1907         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1908
1909         /* give the wrappers a chance to report an error */
1910         if (sys_errno != 0) {
1911                 tevent_req_error(req, sys_errno);
1912                 goto post;
1913         }
1914
1915         switch (lbsda->u.sa.sa_family) {
1916         case AF_UNIX:
1917                 if (lbsda->u.un.sun_path[0] != 0) {
1918                         do_reuseaddr = true;
1919                         do_bind = true;
1920                 }
1921                 /*
1922                  * for unix sockets we can't use the size of sockaddr_storage
1923                  * we would get EINVAL
1924                  */
1925                 sa_socklen = sizeof(rbsda->u.un);
1926                 break;
1927         case AF_INET:
1928                 if (lbsda->u.in.sin_port != 0) {
1929                         do_reuseaddr = true;
1930                         do_bind = true;
1931                 }
1932                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1933                         do_bind = true;
1934                 }
1935                 break;
1936 #ifdef HAVE_IPV6
1937         case AF_INET6:
1938                 if (lbsda->u.in6.sin6_port != 0) {
1939                         do_reuseaddr = true;
1940                         do_bind = true;
1941                 }
1942                 if (memcmp(&in6addr_any,
1943                            &lbsda->u.in6.sin6_addr,
1944                            sizeof(in6addr_any)) != 0) {
1945                         do_bind = true;
1946                 }
1947                 break;
1948 #endif
1949         default:
1950                 tevent_req_error(req, EINVAL);
1951                 goto post;
1952         }
1953
1954         state->fd = socket(lbsda->u.sa.sa_family, SOCK_STREAM, 0);
1955         if (state->fd == -1) {
1956                 tevent_req_error(req, errno);
1957                 goto post;
1958         }
1959
1960         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
1961         if (state->fd == -1) {
1962                 tevent_req_error(req, errno);
1963                 goto post;
1964         }
1965
1966         if (do_reuseaddr) {
1967                 int val = 1;
1968
1969                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
1970                                  (const void *)&val, sizeof(val));
1971                 if (ret == -1) {
1972                         tevent_req_error(req, errno);
1973                         goto post;
1974                 }
1975         }
1976
1977         if (do_bind) {
1978                 ret = bind(state->fd, &lbsda->u.sa, sizeof(lbsda->u.ss));
1979                 if (ret == -1) {
1980                         tevent_req_error(req, errno);
1981                         goto post;
1982                 }
1983         }
1984
1985         ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
1986         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1987         if (retry) {
1988                 /* retry later */
1989                 goto async;
1990         }
1991         if (tevent_req_error(req, err)) {
1992                 goto post;
1993         }
1994
1995         tevent_req_done(req);
1996         goto post;
1997
1998  async:
1999         state->fde = tevent_add_fd(ev, state,
2000                                    state->fd,
2001                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2002                                    tstream_bsd_connect_fde_handler,
2003                                    req);
2004         if (tevent_req_nomem(state->fde, req)) {
2005                 goto post;
2006         }
2007
2008         return req;
2009
2010  post:
2011         tevent_req_post(req, ev);
2012         return req;
2013 }
2014
2015 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2016                                             struct tevent_fd *fde,
2017                                             uint16_t flags,
2018                                             void *private_data)
2019 {
2020         struct tevent_req *req = talloc_get_type_abort(private_data,
2021                                  struct tevent_req);
2022         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2023                                         struct tstream_bsd_connect_state);
2024         int ret;
2025         int error=0;
2026         socklen_t len = sizeof(error);
2027         int err;
2028         bool retry;
2029
2030         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2031         if (ret == 0) {
2032                 if (error != 0) {
2033                         errno = error;
2034                         ret = -1;
2035                 }
2036         }
2037         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2038         if (retry) {
2039                 /* retry later */
2040                 return;
2041         }
2042         if (tevent_req_error(req, err)) {
2043                 return;
2044         }
2045
2046         tevent_req_done(req);
2047 }
2048
2049 static int tstream_bsd_connect_recv(struct tevent_req *req,
2050                                     int *perrno,
2051                                     TALLOC_CTX *mem_ctx,
2052                                     struct tstream_context **stream,
2053                                     const char *location)
2054 {
2055         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2056                                         struct tstream_bsd_connect_state);
2057         int ret;
2058
2059         ret = tsocket_simple_int_recv(req, perrno);
2060         if (ret == 0) {
2061                 ret = _tstream_bsd_existing_socket(mem_ctx,
2062                                                    state->fd,
2063                                                    stream,
2064                                                    location);
2065                 if (ret == -1) {
2066                         *perrno = errno;
2067                         goto done;
2068                 }
2069                 TALLOC_FREE(state->fde);
2070                 state->fd = -1;
2071         }
2072
2073 done:
2074         tevent_req_received(req);
2075         return ret;
2076 }
2077
2078 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2079                                         struct tevent_context *ev,
2080                                         const struct tsocket_address *local,
2081                                         const struct tsocket_address *remote)
2082 {
2083         struct tsocket_address_bsd *lbsda =
2084                 talloc_get_type_abort(local->private_data,
2085                 struct tsocket_address_bsd);
2086         struct tevent_req *req;
2087         int sys_errno = 0;
2088
2089         switch (lbsda->u.sa.sa_family) {
2090         case AF_INET:
2091                 break;
2092 #ifdef HAVE_IPV6
2093         case AF_INET6:
2094                 break;
2095 #endif
2096         default:
2097                 sys_errno = EINVAL;
2098                 break;
2099         }
2100
2101         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2102
2103         return req;
2104 }
2105
2106 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2107                                    int *perrno,
2108                                    TALLOC_CTX *mem_ctx,
2109                                    struct tstream_context **stream,
2110                                    const char *location)
2111 {
2112         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2113 }
2114
2115 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2116                                         struct tevent_context *ev,
2117                                         const struct tsocket_address *local,
2118                                         const struct tsocket_address *remote)
2119 {
2120         struct tsocket_address_bsd *lbsda =
2121                 talloc_get_type_abort(local->private_data,
2122                 struct tsocket_address_bsd);
2123         struct tevent_req *req;
2124         int sys_errno = 0;
2125
2126         switch (lbsda->u.sa.sa_family) {
2127         case AF_UNIX:
2128                 break;
2129         default:
2130                 sys_errno = EINVAL;
2131                 break;
2132         }
2133
2134         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2135
2136         return req;
2137 }
2138
2139 int _tstream_unix_connect_recv(struct tevent_req *req,
2140                                       int *perrno,
2141                                       TALLOC_CTX *mem_ctx,
2142                                       struct tstream_context **stream,
2143                                       const char *location)
2144 {
2145         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2146 }
2147
2148 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2149                              struct tstream_context **_stream1,
2150                              TALLOC_CTX *mem_ctx2,
2151                              struct tstream_context **_stream2,
2152                              const char *location)
2153 {
2154         int ret;
2155         int fds[2];
2156         int fd1;
2157         int fd2;
2158         struct tstream_context *stream1 = NULL;
2159         struct tstream_context *stream2 = NULL;
2160
2161         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2162         if (ret == -1) {
2163                 return -1;
2164         }
2165         fd1 = fds[0];
2166         fd2 = fds[1];
2167
2168         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2169         if (fd1 == -1) {
2170                 int sys_errno = errno;
2171                 close(fd2);
2172                 errno = sys_errno;
2173                 return -1;
2174         }
2175
2176         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2177         if (fd2 == -1) {
2178                 int sys_errno = errno;
2179                 close(fd1);
2180                 errno = sys_errno;
2181                 return -1;
2182         }
2183
2184         ret = _tstream_bsd_existing_socket(mem_ctx1,
2185                                            fd1,
2186                                            &stream1,
2187                                            location);
2188         if (ret == -1) {
2189                 int sys_errno = errno;
2190                 close(fd1);
2191                 close(fd2);
2192                 errno = sys_errno;
2193                 return -1;
2194         }
2195
2196         ret = _tstream_bsd_existing_socket(mem_ctx2,
2197                                            fd2,
2198                                            &stream2,
2199                                            location);
2200         if (ret == -1) {
2201                 int sys_errno = errno;
2202                 talloc_free(stream1);
2203                 close(fd2);
2204                 errno = sys_errno;
2205                 return -1;
2206         }
2207
2208         *_stream1 = stream1;
2209         *_stream2 = stream2;
2210         return 0;
2211 }
2212