8de70f8bca628f19a0c0860e5a687c61037b60e4
[vlendec/samba-autobuild/.git] / ctdb / common / system_socket.c
1 /*
2    ctdb system specific code to manage raw sockets on linux
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Marc Dequènes (Duck) 2009
7    Copyright (C) Volker Lendecke 2012
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "replace.h"
24
25 /*
26  * Use BSD struct tcphdr field names for portability.  Modern glibc
27  * makes them available by default via <netinet/tcp.h> but older glibc
28  * requires __FAVOR_BSD to be defined.
29  *
30  * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
31  * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
32  * set.  Including "replace.h" above causes <features.h> to be
33  * indirectly included and this will not set __FAVOR_BSD because
34  * _GNU_SOURCE is set in Samba's "config.h" (which is included by
35  * "replace.h").
36  *
37  * Therefore, set __FAVOR_BSD by hand below.
38  */
39 #define __FAVOR_BSD 1
40 #include "system/network.h"
41
42 #ifdef HAVE_NETINET_IF_ETHER_H
43 #include <netinet/if_ether.h>
44 #endif
45 #ifdef HAVE_NETINET_IP6_H
46 #include <netinet/ip6.h>
47 #endif
48 #ifdef HAVE_NETINET_ICMP6_H
49 #include <netinet/icmp6.h>
50 #endif
51 #ifdef HAVE_LINUX_IF_PACKET_H
52 #include <linux/if_packet.h>
53 #endif
54
55 #ifndef ETHERTYPE_IP6
56 #define ETHERTYPE_IP6 0x86dd
57 #endif
58
59 #include "lib/util/debug.h"
60 #include "lib/util/blocking.h"
61
62 #include "protocol/protocol.h"
63
64 #include "common/logging.h"
65 #include "common/system_socket.h"
66
67 /*
68   uint16 checksum for n bytes
69  */
70 static uint32_t uint16_checksum(uint16_t *data, size_t n)
71 {
72         uint32_t sum=0;
73         while (n>=2) {
74                 sum += (uint32_t)ntohs(*data);
75                 data++;
76                 n -= 2;
77         }
78         if (n == 1) {
79                 sum += (uint32_t)ntohs(*(uint8_t *)data);
80         }
81         return sum;
82 }
83
84 /*
85  * See if the given IP is currently on an interface
86  */
87 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
88 {
89         int s;
90         int ret;
91         ctdb_sock_addr __addr = *_addr;
92         ctdb_sock_addr *addr = &__addr;
93         socklen_t addrlen = 0;
94
95         switch (addr->sa.sa_family) {
96         case AF_INET:
97                 addr->ip.sin_port = 0;
98                 addrlen = sizeof(struct sockaddr_in);
99                 break;
100         case AF_INET6:
101                 addr->ip6.sin6_port = 0;
102                 addrlen = sizeof(struct sockaddr_in6);
103                 break;
104         }
105
106         s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
107         if (s == -1) {
108                 return false;
109         }
110
111         ret = bind(s, (struct sockaddr *)addr, addrlen);
112
113         close(s);
114         return ret == 0;
115 }
116
117 /*
118  * simple TCP checksum - assumes data is multiple of 2 bytes long
119  */
120 static uint16_t ip_checksum(uint16_t *data, size_t n, struct ip *ip)
121 {
122         uint32_t sum = uint16_checksum(data, n);
123         uint16_t sum2;
124
125         sum += uint16_checksum((uint16_t *)&ip->ip_src, sizeof(ip->ip_src));
126         sum += uint16_checksum((uint16_t *)&ip->ip_dst, sizeof(ip->ip_dst));
127         sum += ip->ip_p + n;
128         sum = (sum & 0xFFFF) + (sum >> 16);
129         sum = (sum & 0xFFFF) + (sum >> 16);
130         sum2 = htons(sum);
131         sum2 = ~sum2;
132         if (sum2 == 0) {
133                 return 0xFFFF;
134         }
135         return sum2;
136 }
137
138 static uint16_t ip6_checksum(uint16_t *data, size_t n, struct ip6_hdr *ip6)
139 {
140         uint16_t phdr[3];
141         uint32_t sum = 0;
142         uint16_t sum2;
143         uint32_t len;
144
145         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
146         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
147
148         len = htonl(n);
149         phdr[0] = len & UINT16_MAX;
150         phdr[1] = (len >> 16) & UINT16_MAX;
151         /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
152         phdr[2] = htons(ip6->ip6_nxt);
153         sum += uint16_checksum(phdr, sizeof(phdr));
154
155         sum += uint16_checksum(data, n);
156
157         sum = (sum & 0xFFFF) + (sum >> 16);
158         sum = (sum & 0xFFFF) + (sum >> 16);
159         sum2 = htons(sum);
160         sum2 = ~sum2;
161         if (sum2 == 0) {
162                 return 0xFFFF;
163         }
164         return sum2;
165 }
166
167 /*
168  * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
169  */
170
171 #ifdef HAVE_PACKETSOCKET
172
173 /*
174  * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
175  * packets
176  */
177
178 #define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
179                         sizeof(struct ether_arp)
180
181 #define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
182                            sizeof(struct ip6_hdr) + \
183                            sizeof(struct nd_neighbor_advert) + \
184                            sizeof(struct nd_opt_hdr) + \
185                            sizeof(struct ether_addr)
186
187 #define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
188
189 #define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
190
191 static int arp_build(uint8_t *buffer,
192                      size_t buflen,
193                      const struct sockaddr_in *addr,
194                      const struct ether_addr *hwaddr,
195                      bool reply,
196                      struct ether_addr **ether_dhost,
197                      size_t *len)
198 {
199         size_t l = ARP_BUFFER_SIZE;
200         struct ether_header *eh;
201         struct ether_arp *ea;
202         struct arphdr *ah;
203
204         if (addr->sin_family != AF_INET) {
205                 return EINVAL;
206         }
207
208         if (buflen < l) {
209                 return EMSGSIZE;
210         }
211
212         memset(buffer, 0 , l);
213
214         eh = (struct ether_header *)buffer;
215         memset(eh->ether_dhost, 0xff, ETH_ALEN);
216         memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
217         eh->ether_type = htons(ETHERTYPE_ARP);
218
219         ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
220         ah = &ea->ea_hdr;
221         ah->ar_hrd = htons(ARPHRD_ETHER);
222         ah->ar_pro = htons(ETH_P_IP);
223         ah->ar_hln = ETH_ALEN;
224         ah->ar_pln = sizeof(ea->arp_spa);
225
226         if (! reply) {
227                 ah->ar_op  = htons(ARPOP_REQUEST);
228                 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
229                 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
230                 memset(ea->arp_tha, 0, ETH_ALEN);
231                 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
232         } else {
233                 ah->ar_op  = htons(ARPOP_REPLY);
234                 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
235                 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
236                 memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
237                 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
238         }
239
240         *ether_dhost = (struct ether_addr *)eh->ether_dhost;
241         *len = l;
242         return 0;
243 }
244
245 static int ip6_na_build(uint8_t *buffer,
246                         size_t buflen,
247                         const struct sockaddr_in6 *addr,
248                         const struct ether_addr *hwaddr,
249                         struct ether_addr **ether_dhost,
250                         size_t *len)
251 {
252         size_t l = IP6_NA_BUFFER_SIZE;
253         struct ether_header *eh;
254         struct ip6_hdr *ip6;
255         struct nd_neighbor_advert *nd_na;
256         struct nd_opt_hdr *nd_oh;
257         struct ether_addr *ea;
258         int ret;
259
260         if (addr->sin6_family != AF_INET6) {
261                 return EINVAL;
262         }
263
264         if (buflen < l) {
265                 return EMSGSIZE;
266         }
267
268         memset(buffer, 0 , l);
269
270         eh = (struct ether_header *)buffer;
271         /*
272          * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
273          * section 7) - note memset 0 above!
274          */
275         eh->ether_dhost[0] = 0x33;
276         eh->ether_dhost[1] = 0x33;
277         eh->ether_dhost[5] = 0x01;
278         memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
279         eh->ether_type = htons(ETHERTYPE_IP6);
280
281         ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
282         ip6->ip6_vfc  = 6 << 4;
283         ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
284                               sizeof(struct nd_opt_hdr) +
285                               ETH_ALEN);
286         ip6->ip6_nxt  = IPPROTO_ICMPV6;
287         ip6->ip6_hlim = 255;
288         ip6->ip6_src  = addr->sin6_addr;
289         /* all-nodes multicast */
290
291         ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
292         if (ret != 1) {
293                 return EIO;
294         }
295
296         nd_na = (struct nd_neighbor_advert *)(buffer +
297                                               sizeof(struct ether_header) +
298                                               sizeof(struct ip6_hdr));
299         nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
300         nd_na->nd_na_code = 0;
301         nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
302         nd_na->nd_na_target = addr->sin6_addr;
303
304         /* Option: Target link-layer address */
305         nd_oh = (struct nd_opt_hdr *)(buffer +
306                                       sizeof(struct ether_header) +
307                                       sizeof(struct ip6_hdr) +
308                                       sizeof(struct nd_neighbor_advert));
309         nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
310         nd_oh->nd_opt_len = 1;  /* multiple of 8 octets */
311
312         ea = (struct ether_addr *)(buffer +
313                                    sizeof(struct ether_header) +
314                                    sizeof(struct ip6_hdr) +
315                                    sizeof(struct nd_neighbor_advert) +
316                                    sizeof(struct nd_opt_hdr));
317         memcpy(ea, hwaddr, ETH_ALEN);
318
319         nd_na->nd_na_cksum = ip6_checksum((uint16_t *)nd_na,
320                                           ntohs(ip6->ip6_plen),
321                                           ip6);
322
323         *ether_dhost = (struct ether_addr *)eh->ether_dhost;
324         *len = l;
325         return 0;
326 }
327
328 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
329 {
330         int s;
331         struct sockaddr_ll sall = {0};
332         struct ifreq if_hwaddr = {{{0}}};
333         uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
334         struct ifreq ifr = {{{0}}};
335         struct ether_addr *hwaddr = NULL;
336         struct ether_addr *ether_dhost = NULL;
337         size_t len = 0;
338         int ret = 0;
339
340         s = socket(AF_PACKET, SOCK_RAW, 0);
341         if (s == -1) {
342                 ret = errno;
343                 DBG_ERR("Failed to open raw socket\n");
344                 return ret;
345         }
346         DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
347
348         /* Find interface */
349         strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
350         if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
351                 ret = errno;
352                 DBG_ERR("Interface '%s' not found\n", iface);
353                 goto fail;
354         }
355
356         /* Get MAC address */
357         strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
358         ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
359         if ( ret < 0 ) {
360                 ret = errno;
361                 DBG_ERR("ioctl failed\n");
362                 goto fail;
363         }
364         if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
365                 ret = 0;
366                 D_DEBUG("Ignoring loopback arp request\n");
367                 goto fail;
368         }
369         if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
370                 ret = EINVAL;
371                 DBG_ERR("Not an ethernet address family (0x%x)\n",
372                         if_hwaddr.ifr_hwaddr.sa_family);
373                 goto fail;;
374         }
375
376         /* Set up most of destination address structure */
377         sall.sll_family = AF_PACKET;
378         sall.sll_halen = sizeof(struct ether_addr);
379         sall.sll_protocol = htons(ETH_P_ALL);
380         sall.sll_ifindex = ifr.ifr_ifindex;
381
382         /* For clarity */
383         hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
384
385         switch (addr->ip.sin_family) {
386         case AF_INET:
387                 /* Send gratuitous ARP */
388                 ret = arp_build(buffer,
389                                 sizeof(buffer),
390                                 &addr->ip,
391                                 hwaddr,
392                                 false,
393                                 &ether_dhost,
394                                 &len);
395                 if (ret != 0) {
396                         DBG_ERR("Failed to build ARP request\n");
397                         goto fail;
398                 }
399
400                 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
401
402                 ret = sendto(s,
403                              buffer,
404                              len,
405                              0,
406                              (struct sockaddr *)&sall,
407                              sizeof(sall));
408                 if (ret < 0 ) {
409                         ret = errno;
410                         DBG_ERR("Failed sendto\n");
411                         goto fail;
412                 }
413
414                 /* Send unsolicited ARP reply */
415                 ret = arp_build(buffer,
416                                 sizeof(buffer),
417                                 &addr->ip,
418                                 hwaddr,
419                                 true,
420                                 &ether_dhost,
421                                 &len);
422                 if (ret != 0) {
423                         DBG_ERR("Failed to build ARP reply\n");
424                         goto fail;
425                 }
426
427                 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
428
429                 ret = sendto(s,
430                              buffer,
431                              len,
432                              0,
433                              (struct sockaddr *)&sall,
434                              sizeof(sall));
435                 if (ret < 0 ) {
436                         ret = errno;
437                         DBG_ERR("Failed sendto\n");
438                         goto fail;
439                 }
440
441                 close(s);
442                 break;
443
444         case AF_INET6:
445                 ret = ip6_na_build(buffer,
446                                    sizeof(buffer),
447                                    &addr->ip6,
448                                    hwaddr,
449                                    &ether_dhost,
450                                    &len);
451                 if (ret != 0) {
452                         DBG_ERR("Failed to build IPv6 neighbor advertisment\n");
453                         goto fail;
454                 }
455
456                 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
457
458                 ret = sendto(s,
459                              buffer,
460                              len,
461                              0,
462                              (struct sockaddr *)&sall,
463                              sizeof(sall));
464                 if (ret < 0 ) {
465                         ret = errno;
466                         DBG_ERR("Failed sendto\n");
467                         goto fail;
468                 }
469
470                 close(s);
471                 break;
472
473         default:
474                 ret = EINVAL;
475                 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
476                         addr->ip.sin_family);
477                 goto fail;
478         }
479
480         return 0;
481
482 fail:
483         close(s);
484         return ret;
485 }
486
487 #else /* HAVE_PACKETSOCKET */
488
489 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
490 {
491         /* Not implemented */
492         return ENOSYS;
493 }
494
495 #endif /* HAVE_PACKETSOCKET */
496
497
498 #define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
499                             sizeof(struct tcphdr)
500
501 #define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
502                             sizeof(struct tcphdr)
503
504 static int tcp4_build(uint8_t *buf,
505                       size_t buflen,
506                       const struct sockaddr_in *src,
507                       const struct sockaddr_in *dst,
508                       uint32_t seq,
509                       uint32_t ack,
510                       int rst,
511                       size_t *len)
512 {
513         size_t l = IP4_TCP_BUFFER_SIZE;
514         struct {
515                 struct ip ip;
516                 struct tcphdr tcp;
517         } *ip4pkt;
518
519         if (l != sizeof(*ip4pkt)) {
520                 return EMSGSIZE;
521         }
522
523         if (buflen < l) {
524                 return EMSGSIZE;
525         }
526
527         ip4pkt = (void *)buf;
528         memset(ip4pkt, 0, l);
529
530         ip4pkt->ip.ip_v     = 4;
531         ip4pkt->ip.ip_hl    = sizeof(ip4pkt->ip)/sizeof(uint32_t);
532         ip4pkt->ip.ip_len   = htons(sizeof(ip4pkt));
533         ip4pkt->ip.ip_ttl   = 255;
534         ip4pkt->ip.ip_p     = IPPROTO_TCP;
535         ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
536         ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
537         ip4pkt->ip.ip_sum   = 0;
538
539         ip4pkt->tcp.th_sport = src->sin_port;
540         ip4pkt->tcp.th_dport = dst->sin_port;
541         ip4pkt->tcp.th_seq   = seq;
542         ip4pkt->tcp.th_ack   = ack;
543         ip4pkt->tcp.th_flags = 0;
544         ip4pkt->tcp.th_flags |= TH_ACK;
545         if (rst) {
546                 ip4pkt->tcp.th_flags |= TH_RST;
547         }
548         ip4pkt->tcp.th_off   = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
549         /* this makes it easier to spot in a sniffer */
550         ip4pkt->tcp.th_win   = htons(1234);
551         ip4pkt->tcp.th_sum   = ip_checksum((uint16_t *)&ip4pkt->tcp,
552                                            sizeof(ip4pkt->tcp),
553                                            &ip4pkt->ip);
554
555         *len = l;
556         return 0;
557 }
558
559 static int tcp6_build(uint8_t *buf,
560                       size_t buflen,
561                       const struct sockaddr_in6 *src,
562                       const struct sockaddr_in6 *dst,
563                       uint32_t seq,
564                       uint32_t ack,
565                       int rst,
566                       size_t *len)
567 {
568         size_t l = IP6_TCP_BUFFER_SIZE;
569         struct {
570                 struct ip6_hdr ip6;
571                 struct tcphdr tcp;
572         } *ip6pkt;
573
574         if (l != sizeof(*ip6pkt)) {
575                 return EMSGSIZE;
576         }
577
578         if (buflen < l) {
579                 return EMSGSIZE;
580         }
581
582         ip6pkt = (void *)buf;
583         memset(ip6pkt, 0, l);
584
585         ip6pkt->ip6.ip6_vfc  = 6 << 4;
586         ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
587         ip6pkt->ip6.ip6_nxt  = IPPROTO_TCP;
588         ip6pkt->ip6.ip6_hlim = 64;
589         ip6pkt->ip6.ip6_src  = src->sin6_addr;
590         ip6pkt->ip6.ip6_dst  = dst->sin6_addr;
591
592         ip6pkt->tcp.th_sport = src->sin6_port;
593         ip6pkt->tcp.th_dport = dst->sin6_port;
594         ip6pkt->tcp.th_seq   = seq;
595         ip6pkt->tcp.th_ack   = ack;
596         ip6pkt->tcp.th_flags = 0;
597         ip6pkt->tcp.th_flags |= TH_ACK;
598         if (rst) {
599                 ip6pkt->tcp.th_flags |= TH_RST;
600         }
601         ip6pkt->tcp.th_off    = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
602         /* this makes it easier to spot in a sniffer */
603         ip6pkt->tcp.th_win   = htons(1234);
604         ip6pkt->tcp.th_sum   = ip6_checksum((uint16_t *)&ip6pkt->tcp,
605                                             sizeof(ip6pkt->tcp),
606                                             &ip6pkt->ip6);
607
608         *len = l;
609         return 0;
610 }
611
612 /*
613  * Send tcp segment from the specified IP/port to the specified
614  * destination IP/port.
615  *
616  * This is used to trigger the receiving host into sending its own ACK,
617  * which should trigger early detection of TCP reset by the client
618  * after IP takeover
619  *
620  * This can also be used to send RST segments (if rst is true) and also
621  * if correct seq and ack numbers are provided.
622  */
623 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
624                       const ctdb_sock_addr *src,
625                       uint32_t seq,
626                       uint32_t ack,
627                       int rst)
628 {
629         uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
630         size_t len = 0;
631         int ret;
632         int s;
633         uint32_t one = 1;
634         struct sockaddr_in6 tmpdest = { 0 };
635         int saved_errno;
636
637         switch (src->ip.sin_family) {
638         case AF_INET:
639                 ret = tcp4_build(buf,
640                                  sizeof(buf),
641                                  &src->ip,
642                                  &dest->ip,
643                                  seq,
644                                  ack,
645                                  rst,
646                                  &len);
647                 if (ret != 0) {
648                         DBG_ERR("Failed to build TCP packet (%d)\n", ret);
649                         return ret;
650                 }
651
652                 /* open a raw socket to send this segment from */
653                 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
654                 if (s == -1) {
655                         DBG_ERR("Failed to open raw socket (%s)\n",
656                                 strerror(errno));
657                         return -1;
658                 }
659
660                 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
661                 if (ret != 0) {
662                         DBG_ERR("Failed to setup IP headers (%s)\n",
663                                 strerror(errno));
664                         close(s);
665                         return -1;
666                 }
667
668                 ret = sendto(s,
669                              buf,
670                              len,
671                              0,
672                              (const struct sockaddr *)&dest->ip,
673                              sizeof(dest->ip));
674                 saved_errno = errno;
675                 close(s);
676                 if (ret != len) {
677                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
678                         return -1;
679                 }
680                 break;
681
682         case AF_INET6:
683                 ret = tcp6_build(buf,
684                                  sizeof(buf),
685                                  &src->ip6,
686                                  &dest->ip6,
687                                  seq,
688                                  ack,
689                                  rst,
690                                  &len);
691                 if (ret != 0) {
692                         DBG_ERR("Failed to build TCP packet (%d)\n", ret);
693                         return ret;
694                 }
695
696                 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
697                 if (s == -1) {
698                         DBG_ERR("Failed to open sending socket\n");
699                         return -1;
700
701                 }
702                 /*
703                  * sendto() on an IPv6 raw socket requires the port to
704                  * be either 0 or a protocol value
705                  */
706                 tmpdest = dest->ip6;
707                 tmpdest.sin6_port = 0;
708
709                 ret = sendto(s,
710                              buf,
711                              len,
712                              0,
713                              (const struct sockaddr *)&tmpdest,
714                              sizeof(tmpdest));
715                 saved_errno = errno;
716                 close(s);
717
718                 if (ret != len) {
719                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
720                         return -1;
721                 }
722                 break;
723
724         default:
725                 DBG_ERR("Not an ipv4/v6 address\n");
726                 return -1;
727         }
728
729         return 0;
730 }
731
732 /*
733  * Packet capture
734  *
735  * If AF_PACKET is available then use a raw socket otherwise use pcap.
736  * wscript has checked to make sure that pcap is available if needed.
737  */
738
739 static int tcp4_extract(const uint8_t *ip_pkt,
740                         size_t pktlen,
741                         struct sockaddr_in *src,
742                         struct sockaddr_in *dst,
743                         uint32_t *ack_seq,
744                         uint32_t *seq,
745                         int *rst,
746                         uint16_t *window)
747 {
748         const struct ip *ip;
749         const struct tcphdr *tcp;
750
751         if (pktlen < sizeof(struct ip)) {
752                 return EMSGSIZE;
753         }
754
755         /* IP */
756         ip = (const struct ip *)ip_pkt;
757
758         /* We only want IPv4 packets */
759         if (ip->ip_v != 4) {
760                 return ENOMSG;
761         }
762         /* Dont look at fragments */
763         if ((ntohs(ip->ip_off)&0x1fff) != 0) {
764                 return ENOMSG;
765         }
766         /* we only want TCP */
767         if (ip->ip_p != IPPROTO_TCP) {
768                 return ENOMSG;
769         }
770
771         /* make sure its not a short packet */
772         if (offsetof(struct tcphdr, th_ack) + 4 + (ip->ip_hl*4) > pktlen) {
773                 return EMSGSIZE;
774         }
775
776         /* TCP */
777         tcp = (const struct tcphdr *)((ip->ip_hl*4) + (const char *)ip);
778
779         /* tell the caller which one we've found */
780         src->sin_family      = AF_INET;
781         src->sin_addr.s_addr = ip->ip_src.s_addr;
782         src->sin_port        = tcp->th_sport;
783
784         dst->sin_family      = AF_INET;
785         dst->sin_addr.s_addr = ip->ip_dst.s_addr;
786         dst->sin_port        = tcp->th_dport;
787
788         *ack_seq             = tcp->th_ack;
789         *seq                 = tcp->th_seq;
790         if (window != NULL) {
791                 *window = tcp->th_win;
792         }
793         if (rst != NULL) {
794                 *rst = tcp->th_flags & TH_RST;
795         }
796
797         return 0;
798 }
799
800 static int tcp6_extract(const uint8_t *ip_pkt,
801                         size_t pktlen,
802                         struct sockaddr_in6 *src,
803                         struct sockaddr_in6 *dst,
804                         uint32_t *ack_seq,
805                         uint32_t *seq,
806                         int *rst,
807                         uint16_t *window)
808 {
809         const struct ip6_hdr *ip6;
810         const struct tcphdr *tcp;
811
812         if (pktlen < sizeof(struct ip6_hdr)) {
813                 return EMSGSIZE;
814         }
815
816         /* IP6 */
817         ip6 = (const struct ip6_hdr *)ip_pkt;
818
819         /* we only want TCP */
820         if (ip6->ip6_nxt != IPPROTO_TCP) {
821                 return ENOMSG;
822         }
823
824         /* TCP */
825         tcp = (const struct tcphdr *)(ip6+1);
826
827         /* tell the caller which one we've found */
828         src->sin6_family = AF_INET6;
829         src->sin6_port   = tcp->th_sport;
830         src->sin6_addr   = ip6->ip6_src;
831
832         dst->sin6_family = AF_INET6;
833         dst->sin6_port   = tcp->th_dport;
834         dst->sin6_addr   = ip6->ip6_dst;
835
836         *ack_seq             = tcp->th_ack;
837         *seq                 = tcp->th_seq;
838         if (window != NULL) {
839                 *window = tcp->th_win;
840         }
841         if (rst != NULL) {
842                 *rst = tcp->th_flags & TH_RST;
843         }
844
845         return 0;
846 }
847
848
849 #ifdef HAVE_AF_PACKET
850
851 /*
852  * This function is used to open a raw socket to capture from
853  */
854 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
855 {
856         int s, ret;
857
858         /* Open a socket to capture all traffic */
859         s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
860         if (s == -1) {
861                 DBG_ERR("Failed to open raw socket\n");
862                 return -1;
863         }
864
865         DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s);
866
867         ret = set_blocking(s, false);
868         if (ret != 0) {
869                 DBG_ERR("Failed to set socket non-blocking (%s)\n",
870                         strerror(errno));
871                 close(s);
872                 return -1;
873         }
874
875         set_close_on_exec(s);
876
877         return s;
878 }
879
880 /*
881  * This function is used to do any additional cleanup required when closing
882  * a capture socket.
883  * Note that the socket itself is closed automatically in the caller.
884  */
885 int ctdb_sys_close_capture_socket(void *private_data)
886 {
887         return 0;
888 }
889
890
891 /*
892  * called when the raw socket becomes readable
893  */
894 int ctdb_sys_read_tcp_packet(int s, void *private_data,
895                              ctdb_sock_addr *src,
896                              ctdb_sock_addr *dst,
897                              uint32_t *ack_seq,
898                              uint32_t *seq,
899                              int *rst,
900                              uint16_t *window)
901 {
902         ssize_t nread;
903         uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
904         struct ether_header *eth;
905         int ret;
906
907         nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
908         if (nread < sizeof(*eth)) {
909                 return EMSGSIZE;
910         }
911
912         ZERO_STRUCTP(src);
913         ZERO_STRUCTP(dst);
914
915         /* Ethernet */
916         eth = (struct ether_header *)pkt;
917
918         /* we want either IPv4 or IPv6 */
919         if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
920                 ret = tcp4_extract(pkt + sizeof(struct ether_header),
921                                    (size_t)nread - sizeof(struct ether_header),
922                                    &src->ip,
923                                    &dst->ip,
924                                    ack_seq,
925                                    seq,
926                                    rst,
927                                    window);
928                 return ret;
929
930         } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
931                 ret = tcp6_extract(pkt + sizeof(struct ether_header),
932                                    (size_t)nread - sizeof(struct ether_header),
933                                    &src->ip6,
934                                    &dst->ip6,
935                                    ack_seq,
936                                    seq,
937                                    rst,
938                                    window);
939                 return ret;
940         }
941
942         return ENOMSG;
943 }
944
945 #else /* HAVE_AF_PACKET */
946
947 #include <pcap.h>
948
949 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
950 {
951         pcap_t *pt;
952
953         pt=pcap_open_live(iface, 100, 0, 0, NULL);
954         if (pt == NULL) {
955                 DBG_ERR("Failed to open capture device %s\n", iface);
956                 return -1;
957         }
958         *((pcap_t **)private_data) = pt;
959
960         return pcap_fileno(pt);
961 }
962
963 int ctdb_sys_close_capture_socket(void *private_data)
964 {
965         pcap_t *pt = (pcap_t *)private_data;
966         pcap_close(pt);
967         return 0;
968 }
969
970 int ctdb_sys_read_tcp_packet(int s,
971                              void *private_data,
972                              ctdb_sock_addr *src,
973                              ctdb_sock_addr *dst,
974                              uint32_t *ack_seq,
975                              uint32_t *seq,
976                              int *rst,
977                              uint16_t *window)
978 {
979         int ret;
980         struct ether_header *eth;
981         struct pcap_pkthdr pkthdr;
982         const u_char *buffer;
983         pcap_t *pt = (pcap_t *)private_data;
984
985         buffer=pcap_next(pt, &pkthdr);
986         if (buffer==NULL) {
987                 return ENOMSG;
988         }
989
990         ZERO_STRUCTP(src);
991         ZERO_STRUCTP(dst);
992
993         /* Ethernet */
994         eth = (struct ether_header *)buffer;
995
996         /* we want either IPv4 or IPv6 */
997         if (eth->ether_type == htons(ETHERTYPE_IP)) {
998                 ret = tcp4_extract(buffer + sizeof(struct ether_header),
999                                    (size_t)(pkthdr.caplen -
1000                                             sizeof(struct ether_header)),
1001                                    &src->ip,
1002                                    &dst->ip,
1003                                    ack_seq,
1004                                    seq,
1005                                    rst,
1006                                    window);
1007                 return ret;
1008
1009         } else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
1010                 ret = tcp6_extract(buffer + sizeof(struct ether_header),
1011                                    (size_t)(pkthdr.caplen -
1012                                             sizeof(struct ether_header)),
1013                                    &src->ip6,
1014                                    &dst->ip6,
1015                                    ack_seq,
1016                                    seq,
1017                                    rst,
1018                                    window);
1019                 return ret;
1020         }
1021
1022         return ENOMSG;
1023 }
1024
1025 #endif /* HAVE_AF_PACKET */