ctdb-scripts: Update statd-callout to try several configuration files
[vlendec/samba-autobuild/.git] / ctdb / common / system_socket.c
1 /*
2    ctdb system specific code to manage raw sockets on linux
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Marc Dequènes (Duck) 2009
7    Copyright (C) Volker Lendecke 2012
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "replace.h"
24
25 /*
26  * Use BSD struct tcphdr field names for portability.  Modern glibc
27  * makes them available by default via <netinet/tcp.h> but older glibc
28  * requires __FAVOR_BSD to be defined.
29  *
30  * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
31  * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
32  * set.  Including "replace.h" above causes <features.h> to be
33  * indirectly included and this will not set __FAVOR_BSD because
34  * _GNU_SOURCE is set in Samba's "config.h" (which is included by
35  * "replace.h").
36  *
37  * Therefore, set __FAVOR_BSD by hand below.
38  */
39 #define __FAVOR_BSD 1
40 #include "system/network.h"
41
42 #ifdef HAVE_NETINET_IF_ETHER_H
43 #include <netinet/if_ether.h>
44 #endif
45 #ifdef HAVE_NETINET_IP6_H
46 #include <netinet/ip6.h>
47 #endif
48 #ifdef HAVE_NETINET_ICMP6_H
49 #include <netinet/icmp6.h>
50 #endif
51 #ifdef HAVE_LINUX_IF_PACKET_H
52 #include <linux/if_packet.h>
53 #endif
54
55 #ifndef ETHERTYPE_IP6
56 #define ETHERTYPE_IP6 0x86dd
57 #endif
58
59 #include "lib/util/debug.h"
60 #include "lib/util/blocking.h"
61
62 #include "protocol/protocol.h"
63
64 #include "common/logging.h"
65 #include "common/system_socket.h"
66
67 /*
68   uint16 checksum for n bytes
69  */
70 static uint32_t uint16_checksum(uint16_t *data, size_t n)
71 {
72         uint32_t sum=0;
73         while (n>=2) {
74                 sum += (uint32_t)ntohs(*data);
75                 data++;
76                 n -= 2;
77         }
78         if (n == 1) {
79                 sum += (uint32_t)ntohs(*(uint8_t *)data);
80         }
81         return sum;
82 }
83
84 /*
85  * See if the given IP is currently on an interface
86  */
87 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
88 {
89         int s;
90         int ret;
91         ctdb_sock_addr __addr = *_addr;
92         ctdb_sock_addr *addr = &__addr;
93         socklen_t addrlen = 0;
94
95         switch (addr->sa.sa_family) {
96         case AF_INET:
97                 addr->ip.sin_port = 0;
98                 addrlen = sizeof(struct sockaddr_in);
99                 break;
100         case AF_INET6:
101                 addr->ip6.sin6_port = 0;
102                 addrlen = sizeof(struct sockaddr_in6);
103                 break;
104         }
105
106         s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
107         if (s == -1) {
108                 return false;
109         }
110
111         ret = bind(s, (struct sockaddr *)addr, addrlen);
112
113         close(s);
114         return ret == 0;
115 }
116
117 /*
118  * simple TCP checksum - assumes data is multiple of 2 bytes long
119  */
120 static uint16_t ip_checksum(uint16_t *data, size_t n, struct ip *ip)
121 {
122         uint32_t sum = uint16_checksum(data, n);
123         uint16_t sum2;
124
125         sum += uint16_checksum((uint16_t *)&ip->ip_src, sizeof(ip->ip_src));
126         sum += uint16_checksum((uint16_t *)&ip->ip_dst, sizeof(ip->ip_dst));
127         sum += ip->ip_p + n;
128         sum = (sum & 0xFFFF) + (sum >> 16);
129         sum = (sum & 0xFFFF) + (sum >> 16);
130         sum2 = htons(sum);
131         sum2 = ~sum2;
132         if (sum2 == 0) {
133                 return 0xFFFF;
134         }
135         return sum2;
136 }
137
138 static uint16_t ip6_checksum(uint16_t *data, size_t n, struct ip6_hdr *ip6)
139 {
140         uint16_t phdr[3];
141         uint32_t sum = 0;
142         uint16_t sum2;
143         uint32_t len;
144
145         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
146         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
147
148         len = htonl(n);
149         phdr[0] = len & UINT16_MAX;
150         phdr[1] = (len >> 16) & UINT16_MAX;
151         /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
152         phdr[2] = htons(ip6->ip6_nxt);
153         sum += uint16_checksum(phdr, sizeof(phdr));
154
155         sum += uint16_checksum(data, n);
156
157         sum = (sum & 0xFFFF) + (sum >> 16);
158         sum = (sum & 0xFFFF) + (sum >> 16);
159         sum2 = htons(sum);
160         sum2 = ~sum2;
161         if (sum2 == 0) {
162                 return 0xFFFF;
163         }
164         return sum2;
165 }
166
167 /*
168  * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
169  */
170
171 #ifdef HAVE_PACKETSOCKET
172
173 /*
174  * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
175  * packets
176  */
177
178 #define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
179                         sizeof(struct ether_arp)
180
181 #define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
182                            sizeof(struct ip6_hdr) + \
183                            sizeof(struct nd_neighbor_advert) + \
184                            sizeof(struct nd_opt_hdr) + \
185                            sizeof(struct ether_addr)
186
187 #define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
188
189 #define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
190
191 static int arp_build(uint8_t *buffer,
192                      size_t buflen,
193                      const struct sockaddr_in *addr,
194                      const struct ether_addr *hwaddr,
195                      bool reply,
196                      struct ether_addr **ether_dhost,
197                      size_t *len)
198 {
199         size_t l = ARP_BUFFER_SIZE;
200         struct ether_header *eh;
201         struct ether_arp *ea;
202         struct arphdr *ah;
203
204         if (addr->sin_family != AF_INET) {
205                 return EINVAL;
206         }
207
208         if (buflen < l) {
209                 return EMSGSIZE;
210         }
211
212         memset(buffer, 0 , l);
213
214         eh = (struct ether_header *)buffer;
215         memset(eh->ether_dhost, 0xff, ETH_ALEN);
216         memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
217         eh->ether_type = htons(ETHERTYPE_ARP);
218
219         ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
220         ah = &ea->ea_hdr;
221         ah->ar_hrd = htons(ARPHRD_ETHER);
222         ah->ar_pro = htons(ETH_P_IP);
223         ah->ar_hln = ETH_ALEN;
224         ah->ar_pln = sizeof(ea->arp_spa);
225
226         if (! reply) {
227                 ah->ar_op  = htons(ARPOP_REQUEST);
228                 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
229                 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
230                 memset(ea->arp_tha, 0, ETH_ALEN);
231                 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
232         } else {
233                 ah->ar_op  = htons(ARPOP_REPLY);
234                 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
235                 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
236                 memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
237                 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
238         }
239
240         *ether_dhost = (struct ether_addr *)eh->ether_dhost;
241         *len = l;
242         return 0;
243 }
244
245 static int ip6_na_build(uint8_t *buffer,
246                         size_t buflen,
247                         const struct sockaddr_in6 *addr,
248                         const struct ether_addr *hwaddr,
249                         struct ether_addr **ether_dhost,
250                         size_t *len)
251 {
252         size_t l = IP6_NA_BUFFER_SIZE;
253         struct ether_header *eh;
254         struct ip6_hdr *ip6;
255         struct nd_neighbor_advert *nd_na;
256         struct nd_opt_hdr *nd_oh;
257         struct ether_addr *ea;
258         int ret;
259
260         if (addr->sin6_family != AF_INET6) {
261                 return EINVAL;
262         }
263
264         if (buflen < l) {
265                 return EMSGSIZE;
266         }
267
268         memset(buffer, 0 , l);
269
270         eh = (struct ether_header *)buffer;
271         /*
272          * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
273          * section 7) - note memset 0 above!
274          */
275         eh->ether_dhost[0] = 0x33;
276         eh->ether_dhost[1] = 0x33;
277         eh->ether_dhost[5] = 0x01;
278         memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
279         eh->ether_type = htons(ETHERTYPE_IP6);
280
281         ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
282         ip6->ip6_vfc  = 6 << 4;
283         ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
284                               sizeof(struct nd_opt_hdr) +
285                               ETH_ALEN);
286         ip6->ip6_nxt  = IPPROTO_ICMPV6;
287         ip6->ip6_hlim = 255;
288         ip6->ip6_src  = addr->sin6_addr;
289         /* all-nodes multicast */
290
291         ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
292         if (ret != 1) {
293                 return EIO;
294         }
295
296         nd_na = (struct nd_neighbor_advert *)(buffer +
297                                               sizeof(struct ether_header) +
298                                               sizeof(struct ip6_hdr));
299         nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
300         nd_na->nd_na_code = 0;
301         nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
302         nd_na->nd_na_target = addr->sin6_addr;
303
304         /* Option: Target link-layer address */
305         nd_oh = (struct nd_opt_hdr *)(buffer +
306                                       sizeof(struct ether_header) +
307                                       sizeof(struct ip6_hdr) +
308                                       sizeof(struct nd_neighbor_advert));
309         nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
310         nd_oh->nd_opt_len = 1;  /* multiple of 8 octets */
311
312         ea = (struct ether_addr *)(buffer +
313                                    sizeof(struct ether_header) +
314                                    sizeof(struct ip6_hdr) +
315                                    sizeof(struct nd_neighbor_advert) +
316                                    sizeof(struct nd_opt_hdr));
317         memcpy(ea, hwaddr, ETH_ALEN);
318
319         nd_na->nd_na_cksum = ip6_checksum((uint16_t *)nd_na,
320                                           ntohs(ip6->ip6_plen),
321                                           ip6);
322
323         *ether_dhost = (struct ether_addr *)eh->ether_dhost;
324         *len = l;
325         return 0;
326 }
327
328 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
329 {
330         int s;
331         struct sockaddr_ll sall = {0};
332         struct ifreq if_hwaddr = {
333                 .ifr_ifru = {
334                         .ifru_flags = 0
335                 },
336         };
337         uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
338         struct ifreq ifr = {
339                 .ifr_ifru = {
340                         .ifru_flags = 0
341                 },
342         };
343         struct ether_addr *hwaddr = NULL;
344         struct ether_addr *ether_dhost = NULL;
345         size_t len = 0;
346         int ret = 0;
347
348         s = socket(AF_PACKET, SOCK_RAW, 0);
349         if (s == -1) {
350                 ret = errno;
351                 DBG_ERR("Failed to open raw socket\n");
352                 return ret;
353         }
354         DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
355
356         /* Find interface */
357         strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
358         if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
359                 ret = errno;
360                 DBG_ERR("Interface '%s' not found\n", iface);
361                 goto fail;
362         }
363
364         /* Get MAC address */
365         strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
366         ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
367         if ( ret < 0 ) {
368                 ret = errno;
369                 DBG_ERR("ioctl failed\n");
370                 goto fail;
371         }
372         if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
373                 ret = 0;
374                 D_DEBUG("Ignoring loopback arp request\n");
375                 goto fail;
376         }
377         if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
378                 ret = EINVAL;
379                 DBG_ERR("Not an ethernet address family (0x%x)\n",
380                         if_hwaddr.ifr_hwaddr.sa_family);
381                 goto fail;;
382         }
383
384         /* Set up most of destination address structure */
385         sall.sll_family = AF_PACKET;
386         sall.sll_halen = sizeof(struct ether_addr);
387         sall.sll_protocol = htons(ETH_P_ALL);
388         sall.sll_ifindex = ifr.ifr_ifindex;
389
390         /* For clarity */
391         hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
392
393         switch (addr->ip.sin_family) {
394         case AF_INET:
395                 /* Send gratuitous ARP */
396                 ret = arp_build(buffer,
397                                 sizeof(buffer),
398                                 &addr->ip,
399                                 hwaddr,
400                                 false,
401                                 &ether_dhost,
402                                 &len);
403                 if (ret != 0) {
404                         DBG_ERR("Failed to build ARP request\n");
405                         goto fail;
406                 }
407
408                 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
409
410                 ret = sendto(s,
411                              buffer,
412                              len,
413                              0,
414                              (struct sockaddr *)&sall,
415                              sizeof(sall));
416                 if (ret < 0 ) {
417                         ret = errno;
418                         DBG_ERR("Failed sendto\n");
419                         goto fail;
420                 }
421
422                 /* Send unsolicited ARP reply */
423                 ret = arp_build(buffer,
424                                 sizeof(buffer),
425                                 &addr->ip,
426                                 hwaddr,
427                                 true,
428                                 &ether_dhost,
429                                 &len);
430                 if (ret != 0) {
431                         DBG_ERR("Failed to build ARP reply\n");
432                         goto fail;
433                 }
434
435                 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
436
437                 ret = sendto(s,
438                              buffer,
439                              len,
440                              0,
441                              (struct sockaddr *)&sall,
442                              sizeof(sall));
443                 if (ret < 0 ) {
444                         ret = errno;
445                         DBG_ERR("Failed sendto\n");
446                         goto fail;
447                 }
448
449                 close(s);
450                 break;
451
452         case AF_INET6:
453                 ret = ip6_na_build(buffer,
454                                    sizeof(buffer),
455                                    &addr->ip6,
456                                    hwaddr,
457                                    &ether_dhost,
458                                    &len);
459                 if (ret != 0) {
460                         DBG_ERR("Failed to build IPv6 neighbor advertisment\n");
461                         goto fail;
462                 }
463
464                 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
465
466                 ret = sendto(s,
467                              buffer,
468                              len,
469                              0,
470                              (struct sockaddr *)&sall,
471                              sizeof(sall));
472                 if (ret < 0 ) {
473                         ret = errno;
474                         DBG_ERR("Failed sendto\n");
475                         goto fail;
476                 }
477
478                 close(s);
479                 break;
480
481         default:
482                 ret = EINVAL;
483                 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
484                         addr->ip.sin_family);
485                 goto fail;
486         }
487
488         return 0;
489
490 fail:
491         close(s);
492         return ret;
493 }
494
495 #else /* HAVE_PACKETSOCKET */
496
497 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
498 {
499         /* Not implemented */
500         return ENOSYS;
501 }
502
503 #endif /* HAVE_PACKETSOCKET */
504
505
506 #define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
507                             sizeof(struct tcphdr)
508
509 #define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
510                             sizeof(struct tcphdr)
511
512 static int tcp4_build(uint8_t *buf,
513                       size_t buflen,
514                       const struct sockaddr_in *src,
515                       const struct sockaddr_in *dst,
516                       uint32_t seq,
517                       uint32_t ack,
518                       int rst,
519                       size_t *len)
520 {
521         size_t l = IP4_TCP_BUFFER_SIZE;
522         struct {
523                 struct ip ip;
524                 struct tcphdr tcp;
525         } *ip4pkt;
526
527         if (l != sizeof(*ip4pkt)) {
528                 return EMSGSIZE;
529         }
530
531         if (buflen < l) {
532                 return EMSGSIZE;
533         }
534
535         ip4pkt = (void *)buf;
536         memset(ip4pkt, 0, l);
537
538         ip4pkt->ip.ip_v     = 4;
539         ip4pkt->ip.ip_hl    = sizeof(ip4pkt->ip)/sizeof(uint32_t);
540         ip4pkt->ip.ip_len   = htons(sizeof(ip4pkt));
541         ip4pkt->ip.ip_ttl   = 255;
542         ip4pkt->ip.ip_p     = IPPROTO_TCP;
543         ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
544         ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
545         ip4pkt->ip.ip_sum   = 0;
546
547         ip4pkt->tcp.th_sport = src->sin_port;
548         ip4pkt->tcp.th_dport = dst->sin_port;
549         ip4pkt->tcp.th_seq   = seq;
550         ip4pkt->tcp.th_ack   = ack;
551         ip4pkt->tcp.th_flags = 0;
552         ip4pkt->tcp.th_flags |= TH_ACK;
553         if (rst) {
554                 ip4pkt->tcp.th_flags |= TH_RST;
555         }
556         ip4pkt->tcp.th_off   = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
557         /* this makes it easier to spot in a sniffer */
558         ip4pkt->tcp.th_win   = htons(1234);
559         ip4pkt->tcp.th_sum   = ip_checksum((uint16_t *)&ip4pkt->tcp,
560                                            sizeof(ip4pkt->tcp),
561                                            &ip4pkt->ip);
562
563         *len = l;
564         return 0;
565 }
566
567 static int tcp6_build(uint8_t *buf,
568                       size_t buflen,
569                       const struct sockaddr_in6 *src,
570                       const struct sockaddr_in6 *dst,
571                       uint32_t seq,
572                       uint32_t ack,
573                       int rst,
574                       size_t *len)
575 {
576         size_t l = IP6_TCP_BUFFER_SIZE;
577         struct {
578                 struct ip6_hdr ip6;
579                 struct tcphdr tcp;
580         } *ip6pkt;
581
582         if (l != sizeof(*ip6pkt)) {
583                 return EMSGSIZE;
584         }
585
586         if (buflen < l) {
587                 return EMSGSIZE;
588         }
589
590         ip6pkt = (void *)buf;
591         memset(ip6pkt, 0, l);
592
593         ip6pkt->ip6.ip6_vfc  = 6 << 4;
594         ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
595         ip6pkt->ip6.ip6_nxt  = IPPROTO_TCP;
596         ip6pkt->ip6.ip6_hlim = 64;
597         ip6pkt->ip6.ip6_src  = src->sin6_addr;
598         ip6pkt->ip6.ip6_dst  = dst->sin6_addr;
599
600         ip6pkt->tcp.th_sport = src->sin6_port;
601         ip6pkt->tcp.th_dport = dst->sin6_port;
602         ip6pkt->tcp.th_seq   = seq;
603         ip6pkt->tcp.th_ack   = ack;
604         ip6pkt->tcp.th_flags = 0;
605         ip6pkt->tcp.th_flags |= TH_ACK;
606         if (rst) {
607                 ip6pkt->tcp.th_flags |= TH_RST;
608         }
609         ip6pkt->tcp.th_off    = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
610         /* this makes it easier to spot in a sniffer */
611         ip6pkt->tcp.th_win   = htons(1234);
612         ip6pkt->tcp.th_sum   = ip6_checksum((uint16_t *)&ip6pkt->tcp,
613                                             sizeof(ip6pkt->tcp),
614                                             &ip6pkt->ip6);
615
616         *len = l;
617         return 0;
618 }
619
620 /*
621  * Send tcp segment from the specified IP/port to the specified
622  * destination IP/port.
623  *
624  * This is used to trigger the receiving host into sending its own ACK,
625  * which should trigger early detection of TCP reset by the client
626  * after IP takeover
627  *
628  * This can also be used to send RST segments (if rst is true) and also
629  * if correct seq and ack numbers are provided.
630  */
631 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
632                       const ctdb_sock_addr *src,
633                       uint32_t seq,
634                       uint32_t ack,
635                       int rst)
636 {
637         uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
638         size_t len = 0;
639         int ret;
640         int s;
641         uint32_t one = 1;
642         struct sockaddr_in6 tmpdest = { 0 };
643         int saved_errno;
644
645         switch (src->ip.sin_family) {
646         case AF_INET:
647                 ret = tcp4_build(buf,
648                                  sizeof(buf),
649                                  &src->ip,
650                                  &dest->ip,
651                                  seq,
652                                  ack,
653                                  rst,
654                                  &len);
655                 if (ret != 0) {
656                         DBG_ERR("Failed to build TCP packet (%d)\n", ret);
657                         return ret;
658                 }
659
660                 /* open a raw socket to send this segment from */
661                 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
662                 if (s == -1) {
663                         DBG_ERR("Failed to open raw socket (%s)\n",
664                                 strerror(errno));
665                         return -1;
666                 }
667
668                 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
669                 if (ret != 0) {
670                         DBG_ERR("Failed to setup IP headers (%s)\n",
671                                 strerror(errno));
672                         close(s);
673                         return -1;
674                 }
675
676                 ret = sendto(s,
677                              buf,
678                              len,
679                              0,
680                              (const struct sockaddr *)&dest->ip,
681                              sizeof(dest->ip));
682                 saved_errno = errno;
683                 close(s);
684                 if (ret != len) {
685                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
686                         return -1;
687                 }
688                 break;
689
690         case AF_INET6:
691                 ret = tcp6_build(buf,
692                                  sizeof(buf),
693                                  &src->ip6,
694                                  &dest->ip6,
695                                  seq,
696                                  ack,
697                                  rst,
698                                  &len);
699                 if (ret != 0) {
700                         DBG_ERR("Failed to build TCP packet (%d)\n", ret);
701                         return ret;
702                 }
703
704                 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
705                 if (s == -1) {
706                         DBG_ERR("Failed to open sending socket\n");
707                         return -1;
708
709                 }
710                 /*
711                  * sendto() on an IPv6 raw socket requires the port to
712                  * be either 0 or a protocol value
713                  */
714                 tmpdest = dest->ip6;
715                 tmpdest.sin6_port = 0;
716
717                 ret = sendto(s,
718                              buf,
719                              len,
720                              0,
721                              (const struct sockaddr *)&tmpdest,
722                              sizeof(tmpdest));
723                 saved_errno = errno;
724                 close(s);
725
726                 if (ret != len) {
727                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
728                         return -1;
729                 }
730                 break;
731
732         default:
733                 DBG_ERR("Not an ipv4/v6 address\n");
734                 return -1;
735         }
736
737         return 0;
738 }
739
740 /*
741  * Packet capture
742  *
743  * If AF_PACKET is available then use a raw socket otherwise use pcap.
744  * wscript has checked to make sure that pcap is available if needed.
745  */
746
747 static int tcp4_extract(const uint8_t *ip_pkt,
748                         size_t pktlen,
749                         struct sockaddr_in *src,
750                         struct sockaddr_in *dst,
751                         uint32_t *ack_seq,
752                         uint32_t *seq,
753                         int *rst,
754                         uint16_t *window)
755 {
756         const struct ip *ip;
757         const struct tcphdr *tcp;
758
759         if (pktlen < sizeof(struct ip)) {
760                 return EMSGSIZE;
761         }
762
763         ip = (const struct ip *)ip_pkt;
764
765         /* IPv4 only */
766         if (ip->ip_v != 4) {
767                 return ENOMSG;
768         }
769         /* Don't look at fragments */
770         if ((ntohs(ip->ip_off)&0x1fff) != 0) {
771                 return ENOMSG;
772         }
773         /* TCP only */
774         if (ip->ip_p != IPPROTO_TCP) {
775                 return ENOMSG;
776         }
777
778         /* Ensure there is enough of the packet to gather required fields */
779         if (pktlen <
780             (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
781                 return EMSGSIZE;
782         }
783
784         tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
785
786         src->sin_family      = AF_INET;
787         src->sin_addr.s_addr = ip->ip_src.s_addr;
788         src->sin_port        = tcp->th_sport;
789
790         dst->sin_family      = AF_INET;
791         dst->sin_addr.s_addr = ip->ip_dst.s_addr;
792         dst->sin_port        = tcp->th_dport;
793
794         *ack_seq             = tcp->th_ack;
795         *seq                 = tcp->th_seq;
796         if (window != NULL) {
797                 *window = tcp->th_win;
798         }
799         if (rst != NULL) {
800                 *rst = tcp->th_flags & TH_RST;
801         }
802
803         return 0;
804 }
805
806 static int tcp6_extract(const uint8_t *ip_pkt,
807                         size_t pktlen,
808                         struct sockaddr_in6 *src,
809                         struct sockaddr_in6 *dst,
810                         uint32_t *ack_seq,
811                         uint32_t *seq,
812                         int *rst,
813                         uint16_t *window)
814 {
815         const struct ip6_hdr *ip6;
816         const struct tcphdr *tcp;
817
818         /* Ensure there is enough of the packet to gather required fields */
819         if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
820                 return EMSGSIZE;
821         }
822
823         ip6 = (const struct ip6_hdr *)ip_pkt;
824
825         /* IPv6 only */
826         if ((ip6->ip6_vfc >> 4) != 6){
827                 return ENOMSG;
828         }
829
830         /* TCP only */
831         if (ip6->ip6_nxt != IPPROTO_TCP) {
832                 return ENOMSG;
833         }
834
835         tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
836
837         src->sin6_family = AF_INET6;
838         src->sin6_port   = tcp->th_sport;
839         src->sin6_addr   = ip6->ip6_src;
840
841         dst->sin6_family = AF_INET6;
842         dst->sin6_port   = tcp->th_dport;
843         dst->sin6_addr   = ip6->ip6_dst;
844
845         *ack_seq             = tcp->th_ack;
846         *seq                 = tcp->th_seq;
847         if (window != NULL) {
848                 *window = tcp->th_win;
849         }
850         if (rst != NULL) {
851                 *rst = tcp->th_flags & TH_RST;
852         }
853
854         return 0;
855 }
856
857
858 #ifdef HAVE_AF_PACKET
859
860 /*
861  * This function is used to open a raw socket to capture from
862  */
863 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
864 {
865         int s, ret;
866
867         /* Open a socket to capture all traffic */
868         s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
869         if (s == -1) {
870                 DBG_ERR("Failed to open raw socket\n");
871                 return -1;
872         }
873
874         DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s);
875
876         ret = set_blocking(s, false);
877         if (ret != 0) {
878                 DBG_ERR("Failed to set socket non-blocking (%s)\n",
879                         strerror(errno));
880                 close(s);
881                 return -1;
882         }
883
884         set_close_on_exec(s);
885
886         return s;
887 }
888
889 /*
890  * This function is used to do any additional cleanup required when closing
891  * a capture socket.
892  * Note that the socket itself is closed automatically in the caller.
893  */
894 int ctdb_sys_close_capture_socket(void *private_data)
895 {
896         return 0;
897 }
898
899
900 /*
901  * called when the raw socket becomes readable
902  */
903 int ctdb_sys_read_tcp_packet(int s, void *private_data,
904                              ctdb_sock_addr *src,
905                              ctdb_sock_addr *dst,
906                              uint32_t *ack_seq,
907                              uint32_t *seq,
908                              int *rst,
909                              uint16_t *window)
910 {
911         ssize_t nread;
912         uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
913         struct ether_header *eth;
914         int ret;
915
916         nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
917         if (nread < sizeof(*eth)) {
918                 return EMSGSIZE;
919         }
920
921         ZERO_STRUCTP(src);
922         ZERO_STRUCTP(dst);
923
924         /* Ethernet */
925         eth = (struct ether_header *)pkt;
926
927         /* we want either IPv4 or IPv6 */
928         if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
929                 ret = tcp4_extract(pkt + sizeof(struct ether_header),
930                                    (size_t)nread - sizeof(struct ether_header),
931                                    &src->ip,
932                                    &dst->ip,
933                                    ack_seq,
934                                    seq,
935                                    rst,
936                                    window);
937                 return ret;
938
939         } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
940                 ret = tcp6_extract(pkt + sizeof(struct ether_header),
941                                    (size_t)nread - sizeof(struct ether_header),
942                                    &src->ip6,
943                                    &dst->ip6,
944                                    ack_seq,
945                                    seq,
946                                    rst,
947                                    window);
948                 return ret;
949         }
950
951         return ENOMSG;
952 }
953
954 #else /* HAVE_AF_PACKET */
955
956 #include <pcap.h>
957
958 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
959 {
960         pcap_t *pt;
961
962         pt=pcap_open_live(iface, 100, 0, 0, NULL);
963         if (pt == NULL) {
964                 DBG_ERR("Failed to open capture device %s\n", iface);
965                 return -1;
966         }
967         *((pcap_t **)private_data) = pt;
968
969         return pcap_fileno(pt);
970 }
971
972 int ctdb_sys_close_capture_socket(void *private_data)
973 {
974         pcap_t *pt = (pcap_t *)private_data;
975         pcap_close(pt);
976         return 0;
977 }
978
979 int ctdb_sys_read_tcp_packet(int s,
980                              void *private_data,
981                              ctdb_sock_addr *src,
982                              ctdb_sock_addr *dst,
983                              uint32_t *ack_seq,
984                              uint32_t *seq,
985                              int *rst,
986                              uint16_t *window)
987 {
988         int ret;
989         struct ether_header *eth;
990         struct pcap_pkthdr pkthdr;
991         const u_char *buffer;
992         pcap_t *pt = (pcap_t *)private_data;
993
994         buffer=pcap_next(pt, &pkthdr);
995         if (buffer==NULL) {
996                 return ENOMSG;
997         }
998
999         ZERO_STRUCTP(src);
1000         ZERO_STRUCTP(dst);
1001
1002         /* Ethernet */
1003         eth = (struct ether_header *)buffer;
1004
1005         /* we want either IPv4 or IPv6 */
1006         if (eth->ether_type == htons(ETHERTYPE_IP)) {
1007                 ret = tcp4_extract(buffer + sizeof(struct ether_header),
1008                                    (size_t)(pkthdr.caplen -
1009                                             sizeof(struct ether_header)),
1010                                    &src->ip,
1011                                    &dst->ip,
1012                                    ack_seq,
1013                                    seq,
1014                                    rst,
1015                                    window);
1016                 return ret;
1017
1018         } else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
1019                 ret = tcp6_extract(buffer + sizeof(struct ether_header),
1020                                    (size_t)(pkthdr.caplen -
1021                                             sizeof(struct ether_header)),
1022                                    &src->ip6,
1023                                    &dst->ip6,
1024                                    ack_seq,
1025                                    seq,
1026                                    rst,
1027                                    window);
1028                 return ret;
1029         }
1030
1031         return ENOMSG;
1032 }
1033
1034 #endif /* HAVE_AF_PACKET */