ctdb-common: Move capture_socket functions to ctdb_socket.[ch]
[vlendec/samba-autobuild/.git] / ctdb / common / system_socket.c
1 /*
2    ctdb system specific code to manage raw sockets on linux
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "replace.h"
22
23 /*
24  * Use BSD struct tcphdr field names for portability.  Modern glibc
25  * makes them available by default via <netinet/tcp.h> but older glibc
26  * requires __FAVOR_BSD to be defined.
27  *
28  * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
29  * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
30  * set.  Including "replace.h" above causes <features.h> to be
31  * indirectly included and this will not set __FAVOR_BSD because
32  * _GNU_SOURCE is set in Samba's "config.h" (which is included by
33  * "replace.h").
34  *
35  * Therefore, set __FAVOR_BSD by hand below.
36  */
37 #define __FAVOR_BSD 1
38 #include "system/network.h"
39
40 #ifdef HAVE_NETINET_IF_ETHER_H
41 #include <netinet/if_ether.h>
42 #endif
43 #ifdef HAVE_NETINET_IP6_H
44 #include <netinet/ip6.h>
45 #endif
46 #ifdef HAVE_NETINET_ICMP6_H
47 #include <netinet/icmp6.h>
48 #endif
49 #ifdef HAVE_LINUX_IF_PACKET_H
50 #include <linux/if_packet.h>
51 #endif
52
53 #ifndef ETHERTYPE_IP6
54 #define ETHERTYPE_IP6 0x86dd
55 #endif
56
57 #include "lib/util/debug.h"
58 #include "lib/util/blocking.h"
59
60 #include "protocol/protocol.h"
61
62 #include "common/logging.h"
63 #include "common/system_socket.h"
64
65 /*
66   uint16 checksum for n bytes
67  */
68 static uint32_t uint16_checksum(uint16_t *data, size_t n)
69 {
70         uint32_t sum=0;
71         while (n>=2) {
72                 sum += (uint32_t)ntohs(*data);
73                 data++;
74                 n -= 2;
75         }
76         if (n == 1) {
77                 sum += (uint32_t)ntohs(*(uint8_t *)data);
78         }
79         return sum;
80 }
81
82 /*
83  * See if the given IP is currently on an interface
84  */
85 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
86 {
87         int s;
88         int ret;
89         ctdb_sock_addr __addr = *_addr;
90         ctdb_sock_addr *addr = &__addr;
91         socklen_t addrlen = 0;
92
93         switch (addr->sa.sa_family) {
94         case AF_INET:
95                 addr->ip.sin_port = 0;
96                 addrlen = sizeof(struct sockaddr_in);
97                 break;
98         case AF_INET6:
99                 addr->ip6.sin6_port = 0;
100                 addrlen = sizeof(struct sockaddr_in6);
101                 break;
102         }
103
104         s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
105         if (s == -1) {
106                 return false;
107         }
108
109         ret = bind(s, (struct sockaddr *)addr, addrlen);
110
111         close(s);
112         return ret == 0;
113 }
114
115 static bool parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin)
116 {
117         sin->sin_family = AF_INET;
118         sin->sin_port   = htons(port);
119
120         if (inet_pton(AF_INET, s, &sin->sin_addr) != 1) {
121                 DBG_ERR("Failed to translate %s into sin_addr\n", s);
122                 return false;
123         }
124
125 #ifdef HAVE_SOCK_SIN_LEN
126         sin->sin_len = sizeof(*sin);
127 #endif
128         return true;
129 }
130
131 static bool parse_ipv6(const char *s,
132                        const char *ifaces,
133                        unsigned port,
134                        ctdb_sock_addr *saddr)
135 {
136         saddr->ip6.sin6_family   = AF_INET6;
137         saddr->ip6.sin6_port     = htons(port);
138         saddr->ip6.sin6_flowinfo = 0;
139         saddr->ip6.sin6_scope_id = 0;
140
141         if (inet_pton(AF_INET6, s, &saddr->ip6.sin6_addr) != 1) {
142                 DBG_ERR("Failed to translate %s into sin6_addr\n", s);
143                 return false;
144         }
145
146         if (ifaces && IN6_IS_ADDR_LINKLOCAL(&saddr->ip6.sin6_addr)) {
147                 if (strchr(ifaces, ',')) {
148                         DBG_ERR("Link local address %s "
149                                 "is specified for multiple ifaces %s\n",
150                                 s, ifaces);
151                         return false;
152                 }
153                 saddr->ip6.sin6_scope_id = if_nametoindex(ifaces);
154         }
155
156 #ifdef HAVE_SOCK_SIN6_LEN
157         saddr->ip6.sin6_len = sizeof(*saddr);
158 #endif
159         return true;
160 }
161
162 static bool parse_ip(const char *addr,
163                      const char *ifaces,
164                      unsigned port,
165                      ctdb_sock_addr *saddr)
166 {
167         char *p;
168         bool ret;
169
170         ZERO_STRUCTP(saddr); /* valgrind :-) */
171
172         /*
173          * IPv4 or IPv6 address?
174          *
175          * Use rindex() because we need the right-most ':' below for
176          * IPv4-mapped IPv6 addresses anyway...
177          */
178         p = rindex(addr, ':');
179         if (p == NULL) {
180                 ret = parse_ipv4(addr, port, &saddr->ip);
181         } else {
182                 uint8_t ipv4_mapped_prefix[12] = {
183                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff
184                 };
185
186                 ret = parse_ipv6(addr, ifaces, port, saddr);
187                 if (! ret) {
188                         return ret;
189                 }
190
191                 /*
192                  * Check for IPv4-mapped IPv6 address
193                  * (e.g. ::ffff:192.0.2.128) - reparse as IPv4 if
194                  * necessary
195                  */
196                 if (memcmp(&saddr->ip6.sin6_addr.s6_addr[0],
197                            ipv4_mapped_prefix,
198                            sizeof(ipv4_mapped_prefix)) == 0) {
199                         /* Reparse as IPv4 */
200                         ret = parse_ipv4(p+1, port, &saddr->ip);
201                 }
202         }
203
204         return ret;
205 }
206
207 /*
208  * Parse an ip/mask pair
209  */
210 bool parse_ip_mask(const char *str,
211                    const char *ifaces,
212                    ctdb_sock_addr *addr,
213                    unsigned *mask)
214 {
215         char *p;
216         char s[64]; /* Much longer than INET6_ADDRSTRLEN */
217         char *endp = NULL;
218         ssize_t len;
219         bool ret;
220
221         ZERO_STRUCT(*addr);
222
223         len = strlen(str);
224         if (len >= sizeof(s)) {
225                 DBG_ERR("Address %s is unreasonably long\n", str);
226                 return false;
227         }
228
229         strncpy(s, str, len+1);
230
231         p = rindex(s, '/');
232         if (p == NULL) {
233                 DBG_ERR("Address %s does not contain a mask\n", s);
234                 return false;
235         }
236
237         *mask = strtoul(p+1, &endp, 10);
238         if (endp == NULL || *endp != 0) {
239                 /* trailing garbage */
240                 DBG_ERR("Trailing garbage after the mask in %s\n", s);
241                 return false;
242         }
243         *p = 0;
244
245
246         /* now is this a ipv4 or ipv6 address ?*/
247         ret = parse_ip(s, ifaces, 0, addr);
248
249         return ret;
250 }
251
252 /*
253  * simple TCP checksum - assumes data is multiple of 2 bytes long
254  */
255 static uint16_t ip_checksum(uint16_t *data, size_t n, struct ip *ip)
256 {
257         uint32_t sum = uint16_checksum(data, n);
258         uint16_t sum2;
259
260         sum += uint16_checksum((uint16_t *)&ip->ip_src, sizeof(ip->ip_src));
261         sum += uint16_checksum((uint16_t *)&ip->ip_dst, sizeof(ip->ip_dst));
262         sum += ip->ip_p + n;
263         sum = (sum & 0xFFFF) + (sum >> 16);
264         sum = (sum & 0xFFFF) + (sum >> 16);
265         sum2 = htons(sum);
266         sum2 = ~sum2;
267         if (sum2 == 0) {
268                 return 0xFFFF;
269         }
270         return sum2;
271 }
272
273 static uint16_t ip6_checksum(uint16_t *data, size_t n, struct ip6_hdr *ip6)
274 {
275         uint32_t phdr[2];
276         uint32_t sum = 0;
277         uint16_t sum2;
278
279         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
280         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
281
282         phdr[0] = htonl(n);
283         phdr[1] = htonl(ip6->ip6_nxt);
284         sum += uint16_checksum((uint16_t *)phdr, 8);
285
286         sum += uint16_checksum(data, n);
287
288         sum = (sum & 0xFFFF) + (sum >> 16);
289         sum = (sum & 0xFFFF) + (sum >> 16);
290         sum2 = htons(sum);
291         sum2 = ~sum2;
292         if (sum2 == 0) {
293                 return 0xFFFF;
294         }
295         return sum2;
296 }
297
298 /*
299  * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
300  */
301
302 #ifdef HAVE_PACKETSOCKET
303
304 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
305 {
306         int s, ret;
307         struct sockaddr_ll sall;
308         struct ether_header *eh;
309         struct arphdr *ah;
310         struct ip6_hdr *ip6;
311         struct nd_neighbor_advert *nd_na;
312         struct nd_opt_hdr *nd_oh;
313         struct ifreq if_hwaddr;
314         /* Size of IPv6 neighbor advertisement (with option) */
315         unsigned char buffer[sizeof(struct ether_header) +
316                              sizeof(struct ip6_hdr) +
317                              sizeof(struct nd_neighbor_advert) +
318                              sizeof(struct nd_opt_hdr) + ETH_ALEN];
319         char *ptr;
320         char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
321         struct ifreq ifr;
322
323         ZERO_STRUCT(sall);
324         ZERO_STRUCT(ifr);
325         ZERO_STRUCT(if_hwaddr);
326
327         switch (addr->ip.sin_family) {
328         case AF_INET:
329                 s = socket(AF_PACKET, SOCK_RAW, 0);
330                 if (s == -1){
331                         DBG_ERR("Failed to open raw socket\n");
332                         return -1;
333                 }
334
335                 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
336                 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
337                 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
338                         DBG_ERR("Interface '%s' not found\n", iface);
339                         close(s);
340                         return -1;
341                 }
342
343                 /* get the mac address */
344                 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
345                 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
346                 if ( ret < 0 ) {
347                         close(s);
348                         DBG_ERR("ioctl failed\n");
349                         return -1;
350                 }
351                 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
352                         D_DEBUG("Ignoring loopback arp request\n");
353                         close(s);
354                         return 0;
355                 }
356                 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
357                         close(s);
358                         errno = EINVAL;
359                         DBG_ERR("Not an ethernet address family (0x%x)\n",
360                                 if_hwaddr.ifr_hwaddr.sa_family);
361                         return -1;
362                 }
363
364
365                 memset(buffer, 0 , 64);
366                 eh = (struct ether_header *)buffer;
367                 memset(eh->ether_dhost, 0xff, ETH_ALEN);
368                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
369                 eh->ether_type = htons(ETHERTYPE_ARP);
370
371                 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
372                 ah->ar_hrd = htons(ARPHRD_ETHER);
373                 ah->ar_pro = htons(ETH_P_IP);
374                 ah->ar_hln = ETH_ALEN;
375                 ah->ar_pln = 4;
376
377                 /* send a gratious arp */
378                 ah->ar_op  = htons(ARPOP_REQUEST);
379                 ptr = (char *)&ah[1];
380                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
381                 ptr+=ETH_ALEN;
382                 memcpy(ptr, &addr->ip.sin_addr, 4);
383                 ptr+=4;
384                 memset(ptr, 0, ETH_ALEN);
385                 ptr+=ETH_ALEN;
386                 memcpy(ptr, &addr->ip.sin_addr, 4);
387                 ptr+=4;
388
389                 sall.sll_family = AF_PACKET;
390                 sall.sll_halen = 6;
391                 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
392                 sall.sll_protocol = htons(ETH_P_ALL);
393                 sall.sll_ifindex = ifr.ifr_ifindex;
394                 ret = sendto(s,buffer, 64, 0,
395                              (struct sockaddr *)&sall, sizeof(sall));
396                 if (ret < 0 ){
397                         close(s);
398                         DBG_ERR("Failed sendto\n");
399                         return -1;
400                 }
401
402                 /* send unsolicited arp reply broadcast */
403                 ah->ar_op  = htons(ARPOP_REPLY);
404                 ptr = (char *)&ah[1];
405                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
406                 ptr+=ETH_ALEN;
407                 memcpy(ptr, &addr->ip.sin_addr, 4);
408                 ptr+=4;
409                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
410                 ptr+=ETH_ALEN;
411                 memcpy(ptr, &addr->ip.sin_addr, 4);
412                 ptr+=4;
413
414                 ret = sendto(s, buffer, 64, 0,
415                              (struct sockaddr *)&sall, sizeof(sall));
416                 if (ret < 0 ){
417                         DBG_ERR("Failed sendto\n");
418                         close(s);
419                         return -1;
420                 }
421
422                 close(s);
423                 break;
424         case AF_INET6:
425                 s = socket(AF_PACKET, SOCK_RAW, 0);
426                 if (s == -1){
427                         DBG_ERR("Failed to open raw socket\n");
428                         return -1;
429                 }
430
431                 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
432                 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
433                 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
434                         DBG_ERR("Interface '%s' not found\n", iface);
435                         close(s);
436                         return -1;
437                 }
438
439                 /* get the mac address */
440                 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
441                 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
442                 if ( ret < 0 ) {
443                         close(s);
444                         DBG_ERR("ioctl failed\n");
445                         return -1;
446                 }
447                 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
448                         DBG_DEBUG("Ignoring loopback arp request\n");
449                         close(s);
450                         return 0;
451                 }
452                 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
453                         close(s);
454                         errno = EINVAL;
455                         DBG_ERR("Not an ethernet address family (0x%x)\n",
456                                 if_hwaddr.ifr_hwaddr.sa_family);
457                         return -1;
458                 }
459
460                 memset(buffer, 0 , sizeof(buffer));
461                 eh = (struct ether_header *)buffer;
462                 /*
463                  * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
464                  * section 7) - note zeroes above!
465                  */
466                 eh->ether_dhost[0] = eh->ether_dhost[1] = 0x33;
467                 eh->ether_dhost[5] = 0x01;
468                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
469                 eh->ether_type = htons(ETHERTYPE_IP6);
470
471                 ip6 = (struct ip6_hdr *)(eh+1);
472                 ip6->ip6_vfc  = 0x60;
473                 ip6->ip6_plen = htons(sizeof(*nd_na) +
474                                       sizeof(struct nd_opt_hdr) +
475                                       ETH_ALEN);
476                 ip6->ip6_nxt  = IPPROTO_ICMPV6;
477                 ip6->ip6_hlim = 255;
478                 ip6->ip6_src  = addr->ip6.sin6_addr;
479                 /* all-nodes multicast */
480
481                 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
482                 if (ret != 1) {
483                         close(s);
484                         DBG_ERR("Failed inet_pton\n");
485                         return -1;
486                 }
487
488                 nd_na = (struct nd_neighbor_advert *)(ip6+1);
489                 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
490                 nd_na->nd_na_code = 0;
491                 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
492                 nd_na->nd_na_target = addr->ip6.sin6_addr;
493                 /* Option: Target link-layer address */
494                 nd_oh = (struct nd_opt_hdr *)(nd_na+1);
495                 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
496                 nd_oh->nd_opt_len = 1;
497                 memcpy(&(nd_oh+1)[0], if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
498
499                 nd_na->nd_na_cksum = ip6_checksum((uint16_t *)nd_na,
500                                                   ntohs(ip6->ip6_plen), ip6);
501
502                 sall.sll_family = AF_PACKET;
503                 sall.sll_halen = 6;
504                 memcpy(&sall.sll_addr[0], &eh->ether_dhost[0], sall.sll_halen);
505                 sall.sll_protocol = htons(ETH_P_ALL);
506                 sall.sll_ifindex = ifr.ifr_ifindex;
507                 ret = sendto(s, buffer, sizeof(buffer),
508                              0, (struct sockaddr *)&sall, sizeof(sall));
509                 if (ret < 0 ){
510                         close(s);
511                         DBG_ERR("Failed sendto\n");
512                         return -1;
513                 }
514
515                 close(s);
516                 break;
517         default:
518                 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
519                         addr->ip.sin_family);
520                 return -1;
521         }
522
523         return 0;
524 }
525
526 #else /* HAVE_PACKETSOCKET */
527
528 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
529 {
530         /* Not implemented */
531         errno = ENOSYS;
532         return -1;
533 }
534
535 #endif /* HAVE_PACKETSOCKET */
536
537 /*
538  * Send tcp segment from the specified IP/port to the specified
539  * destination IP/port.
540  *
541  * This is used to trigger the receiving host into sending its own ACK,
542  * which should trigger early detection of TCP reset by the client
543  * after IP takeover
544  *
545  * This can also be used to send RST segments (if rst is true) and also
546  * if correct seq and ack numbers are provided.
547  */
548 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
549                       const ctdb_sock_addr *src,
550                       uint32_t seq,
551                       uint32_t ack,
552                       int rst)
553 {
554         int s;
555         int ret;
556         uint32_t one = 1;
557         uint16_t tmpport;
558         ctdb_sock_addr *tmpdest;
559         struct {
560                 struct ip ip;
561                 struct tcphdr tcp;
562         } ip4pkt;
563         struct {
564                 struct ip6_hdr ip6;
565                 struct tcphdr tcp;
566         } ip6pkt;
567         int saved_errno;
568
569         switch (src->ip.sin_family) {
570         case AF_INET:
571                 ZERO_STRUCT(ip4pkt);
572                 ip4pkt.ip.ip_v     = 4;
573                 ip4pkt.ip.ip_hl    = sizeof(ip4pkt.ip)/4;
574                 ip4pkt.ip.ip_len   = htons(sizeof(ip4pkt));
575                 ip4pkt.ip.ip_ttl   = 255;
576                 ip4pkt.ip.ip_p     = IPPROTO_TCP;
577                 ip4pkt.ip.ip_src.s_addr    = src->ip.sin_addr.s_addr;
578                 ip4pkt.ip.ip_dst.s_addr    = dest->ip.sin_addr.s_addr;
579                 ip4pkt.ip.ip_sum   = 0;
580
581                 ip4pkt.tcp.th_sport = src->ip.sin_port;
582                 ip4pkt.tcp.th_dport = dest->ip.sin_port;
583                 ip4pkt.tcp.th_seq   = seq;
584                 ip4pkt.tcp.th_ack   = ack;
585                 ip4pkt.tcp.th_flags = 0;
586                 ip4pkt.tcp.th_flags |= TH_ACK;
587                 if (rst) {
588                         ip4pkt.tcp.th_flags |= TH_RST;
589                 }
590                 ip4pkt.tcp.th_off   = sizeof(ip4pkt.tcp)/4;
591                 /* this makes it easier to spot in a sniffer */
592                 ip4pkt.tcp.th_win   = htons(1234);
593                 ip4pkt.tcp.th_sum   = ip_checksum((uint16_t *)&ip4pkt.tcp,
594                                                   sizeof(ip4pkt.tcp),
595                                                   &ip4pkt.ip);
596
597                 /* open a raw socket to send this segment from */
598                 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
599                 if (s == -1) {
600                         DBG_ERR("Failed to open raw socket (%s)\n",
601                                 strerror(errno));
602                         return -1;
603                 }
604
605                 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
606                 if (ret != 0) {
607                         DBG_ERR("Failed to setup IP headers (%s)\n",
608                                 strerror(errno));
609                         close(s);
610                         return -1;
611                 }
612
613                 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
614                              (const struct sockaddr *)&dest->ip,
615                              sizeof(dest->ip));
616                 saved_errno = errno;
617                 close(s);
618                 if (ret != sizeof(ip4pkt)) {
619                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
620                         return -1;
621                 }
622                 break;
623         case AF_INET6:
624                 ZERO_STRUCT(ip6pkt);
625                 ip6pkt.ip6.ip6_vfc  = 0x60;
626                 ip6pkt.ip6.ip6_plen = htons(20);
627                 ip6pkt.ip6.ip6_nxt  = IPPROTO_TCP;
628                 ip6pkt.ip6.ip6_hlim = 64;
629                 ip6pkt.ip6.ip6_src  = src->ip6.sin6_addr;
630                 ip6pkt.ip6.ip6_dst  = dest->ip6.sin6_addr;
631
632                 ip6pkt.tcp.th_sport = src->ip6.sin6_port;
633                 ip6pkt.tcp.th_dport = dest->ip6.sin6_port;
634                 ip6pkt.tcp.th_seq   = seq;
635                 ip6pkt.tcp.th_ack   = ack;
636                 ip6pkt.tcp.th_flags = 0;
637                 ip6pkt.tcp.th_flags |= TH_RST;
638                 if (rst) {
639                         ip6pkt.tcp.th_flags |= TH_RST;
640                 }
641                 ip6pkt.tcp.th_off    = sizeof(ip6pkt.tcp)/4;
642                 /* this makes it easier to spot in a sniffer */
643                 ip6pkt.tcp.th_win   = htons(1234);
644                 ip6pkt.tcp.th_sum   = ip6_checksum((uint16_t *)&ip6pkt.tcp,
645                                                    sizeof(ip6pkt.tcp),
646                                                    &ip6pkt.ip6);
647
648                 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
649                 if (s == -1) {
650                         DBG_ERR("Failed to open sending socket\n");
651                         return -1;
652
653                 }
654                 /* sendto() don't like if the port is set and the socket is
655                    in raw mode.
656                 */
657                 tmpdest = discard_const(dest);
658                 tmpport = tmpdest->ip6.sin6_port;
659
660                 tmpdest->ip6.sin6_port = 0;
661                 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
662                              (const struct sockaddr *)&dest->ip6,
663                              sizeof(dest->ip6));
664                 saved_errno = errno;
665                 tmpdest->ip6.sin6_port = tmpport;
666                 close(s);
667
668                 if (ret != sizeof(ip6pkt)) {
669                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
670                         return -1;
671                 }
672                 break;
673
674         default:
675                 DBG_ERR("Not an ipv4/v6 address\n");
676                 return -1;
677         }
678
679         return 0;
680 }
681
682 /*
683  * Packet capture
684  *
685  * If AF_PACKET is available then use a raw socket otherwise use pcap.
686  * wscript has checked to make sure that pcap is available if needed.
687  */
688
689 #ifdef HAVE_AF_PACKET
690
691 /*
692  * This function is used to open a raw socket to capture from
693  */
694 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
695 {
696         int s, ret;
697
698         /* Open a socket to capture all traffic */
699         s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
700         if (s == -1) {
701                 DBG_ERR("Failed to open raw socket\n");
702                 return -1;
703         }
704
705         DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s);
706
707         ret = set_blocking(s, false);
708         if (ret != 0) {
709                 DBG_ERR("Failed to set socket non-blocking (%s)\n",
710                         strerror(errno));
711                 close(s);
712                 return -1;
713         }
714
715         set_close_on_exec(s);
716
717         return s;
718 }
719
720 /*
721  * This function is used to do any additional cleanup required when closing
722  * a capture socket.
723  * Note that the socket itself is closed automatically in the caller.
724  */
725 int ctdb_sys_close_capture_socket(void *private_data)
726 {
727         return 0;
728 }
729
730
731 /*
732  * called when the raw socket becomes readable
733  */
734 int ctdb_sys_read_tcp_packet(int s, void *private_data,
735                              ctdb_sock_addr *src,
736                              ctdb_sock_addr *dst,
737                              uint32_t *ack_seq,
738                              uint32_t *seq,
739                              int *rst,
740                              uint16_t *window)
741 {
742         int ret;
743 #define RCVPKTSIZE 100
744         char pkt[RCVPKTSIZE];
745         struct ether_header *eth;
746         struct iphdr *ip;
747         struct ip6_hdr *ip6;
748         struct tcphdr *tcp;
749
750         ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
751         if (ret < sizeof(*eth)+sizeof(*ip)) {
752                 return -1;
753         }
754
755         ZERO_STRUCTP(src);
756         ZERO_STRUCTP(dst);
757
758         /* Ethernet */
759         eth = (struct ether_header *)pkt;
760
761         /* we want either IPv4 or IPv6 */
762         if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
763                 /* IP */
764                 ip = (struct iphdr *)(eth+1);
765
766                 /* We only want IPv4 packets */
767                 if (ip->version != 4) {
768                         return -1;
769                 }
770                 /* Dont look at fragments */
771                 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
772                         return -1;
773                 }
774                 /* we only want TCP */
775                 if (ip->protocol != IPPROTO_TCP) {
776                         return -1;
777                 }
778
779                 /* make sure its not a short packet */
780                 if (offsetof(struct tcphdr, th_ack) + 4 +
781                     (ip->ihl*4) + sizeof(*eth) > ret) {
782                         return -1;
783                 }
784                 /* TCP */
785                 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
786
787                 /* tell the caller which one we've found */
788                 src->ip.sin_family      = AF_INET;
789                 src->ip.sin_addr.s_addr = ip->saddr;
790                 src->ip.sin_port        = tcp->th_sport;
791                 dst->ip.sin_family      = AF_INET;
792                 dst->ip.sin_addr.s_addr = ip->daddr;
793                 dst->ip.sin_port        = tcp->th_dport;
794                 *ack_seq                = tcp->th_ack;
795                 *seq                    = tcp->th_seq;
796                 if (window != NULL) {
797                         *window = tcp->th_win;
798                 }
799                 if (rst != NULL) {
800                         *rst = tcp->th_flags & TH_RST;
801                 }
802
803                 return 0;
804         } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
805                 /* IP6 */
806                 ip6 = (struct ip6_hdr *)(eth+1);
807
808                 /* we only want TCP */
809                 if (ip6->ip6_nxt != IPPROTO_TCP) {
810                         return -1;
811                 }
812
813                 /* TCP */
814                 tcp = (struct tcphdr *)(ip6+1);
815
816                 /* tell the caller which one we've found */
817                 src->ip6.sin6_family = AF_INET6;
818                 src->ip6.sin6_port   = tcp->th_sport;
819                 src->ip6.sin6_addr   = ip6->ip6_src;
820
821                 dst->ip6.sin6_family = AF_INET6;
822                 dst->ip6.sin6_port   = tcp->th_dport;
823                 dst->ip6.sin6_addr   = ip6->ip6_dst;
824
825                 *ack_seq             = tcp->th_ack;
826                 *seq                 = tcp->th_seq;
827                 if (window != NULL) {
828                         *window = tcp->th_win;
829                 }
830                 if (rst != NULL) {
831                         *rst = tcp->th_flags & TH_RST;
832                 }
833
834                 return 0;
835         }
836
837         return -1;
838 }
839
840 #else /* HAVE_AF_PACKET */
841
842 #include <pcap.h>
843
844 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
845 {
846         pcap_t *pt;
847
848         pt=pcap_open_live(iface, 100, 0, 0, NULL);
849         if (pt == NULL) {
850                 DBG_ERR("Failed to open capture device %s\n", iface);
851                 return -1;
852         }
853         *((pcap_t **)private_data) = pt;
854
855         return pcap_fileno(pt);
856 }
857
858 int ctdb_sys_close_capture_socket(void *private_data)
859 {
860         pcap_t *pt = (pcap_t *)private_data;
861         pcap_close(pt);
862         return 0;
863 }
864
865 int ctdb_sys_read_tcp_packet(int s,
866                              void *private_data,
867                              ctdb_sock_addr *src,
868                              ctdb_sock_addr *dst,
869                              uint32_t *ack_seq,
870                              uint32_t *seq,
871                              int *rst,
872                              uint16_t *window)
873 {
874         int ret;
875         struct ether_header *eth;
876         struct ip *ip;
877         struct ip6_hdr *ip6;
878         struct tcphdr *tcp;
879         struct ctdb_killtcp_connection *conn;
880         struct pcap_pkthdr pkthdr;
881         const u_char *buffer;
882         pcap_t *pt = (pcap_t *)private_data;
883
884         buffer=pcap_next(pt, &pkthdr);
885         if (buffer==NULL) {
886                 return -1;
887         }
888
889         ZERO_STRUCTP(src);
890         ZERO_STRUCTP(dst);
891
892         /* Ethernet */
893         eth = (struct ether_header *)buffer;
894
895         /* we want either IPv4 or IPv6 */
896         if (eth->ether_type == htons(ETHERTYPE_IP)) {
897                 /* IP */
898                 ip = (struct ip *)(eth+1);
899
900                 /* We only want IPv4 packets */
901                 if (ip->ip_v != 4) {
902                         return -1;
903                 }
904                 /* Dont look at fragments */
905                 if ((ntohs(ip->ip_off)&0x1fff) != 0) {
906                         return -1;
907                 }
908                 /* we only want TCP */
909                 if (ip->ip_p != IPPROTO_TCP) {
910                         return -1;
911                 }
912
913                 /* make sure its not a short packet */
914                 if (offsetof(struct tcphdr, th_ack) + 4 +
915                     (ip->ip_hl*4) > ret) {
916                         return -1;
917                 }
918                 /* TCP */
919                 tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip);
920
921                 /* tell the caller which one we've found */
922                 src->ip.sin_family      = AF_INET;
923                 src->ip.sin_addr.s_addr = ip->ip_src.s_addr;
924                 src->ip.sin_port        = tcp->th_sport;
925                 dst->ip.sin_family      = AF_INET;
926                 dst->ip.sin_addr.s_addr = ip->ip_dst.s_addr;
927                 dst->ip.sin_port        = tcp->th_dport;
928                 *ack_seq                = tcp->th_ack;
929                 *seq                    = tcp->th_seq;
930                 if (window != NULL) {
931                         *window = tcp->th_win;
932                 }
933                 if (rst != NULL) {
934                         *rst = tcp->th_flags & TH_RST;
935                 }
936
937                 return 0;
938         } else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
939                         /* IP6 */
940                 ip6 = (struct ip6_hdr *)(eth+1);
941
942                 /* we only want TCP */
943                 if (ip6->ip6_nxt != IPPROTO_TCP) {
944                         return -1;
945                 }
946
947                 /* TCP */
948                 tcp = (struct tcphdr *)(ip6+1);
949
950                 /* tell the caller which one we've found */
951                 src->ip6.sin6_family = AF_INET6;
952                 src->ip6.sin6_port   = tcp->th_sport;
953                 src->ip6.sin6_addr   = ip6->ip6_src;
954
955                 dst->ip6.sin6_family = AF_INET6;
956                 dst->ip6.sin6_port   = tcp->th_dport;
957                 dst->ip6.sin6_addr   = ip6->ip6_dst;
958
959                 *ack_seq             = tcp->th_ack;
960                 *seq                 = tcp->th_seq;
961                 if (window != NULL) {
962                         *window = tcp->th_win;
963                 }
964                 if (rst != NULL) {
965                         *rst = tcp->th_flags & TH_RST;
966                 }
967
968                 return 0;
969         }
970
971         return -1;
972 }
973
974 #endif /* HAVE_AF_PACKET */