ctdb-common: Fix error handling when sending ARPs
[vlendec/samba-autobuild/.git] / ctdb / common / system_socket.c
1 /*
2    ctdb system specific code to manage raw sockets on linux
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Marc Dequènes (Duck) 2009
7    Copyright (C) Volker Lendecke 2012
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "replace.h"
24
25 /*
26  * Use BSD struct tcphdr field names for portability.  Modern glibc
27  * makes them available by default via <netinet/tcp.h> but older glibc
28  * requires __FAVOR_BSD to be defined.
29  *
30  * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
31  * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
32  * set.  Including "replace.h" above causes <features.h> to be
33  * indirectly included and this will not set __FAVOR_BSD because
34  * _GNU_SOURCE is set in Samba's "config.h" (which is included by
35  * "replace.h").
36  *
37  * Therefore, set __FAVOR_BSD by hand below.
38  */
39 #define __FAVOR_BSD 1
40 #include "system/network.h"
41
42 #ifdef HAVE_NETINET_IF_ETHER_H
43 #include <netinet/if_ether.h>
44 #endif
45 #ifdef HAVE_NETINET_IP6_H
46 #include <netinet/ip6.h>
47 #endif
48 #ifdef HAVE_NETINET_ICMP6_H
49 #include <netinet/icmp6.h>
50 #endif
51 #ifdef HAVE_LINUX_IF_PACKET_H
52 #include <linux/if_packet.h>
53 #endif
54
55 #ifndef ETHERTYPE_IP6
56 #define ETHERTYPE_IP6 0x86dd
57 #endif
58
59 #include "lib/util/debug.h"
60 #include "lib/util/blocking.h"
61
62 #include "protocol/protocol.h"
63
64 #include "common/logging.h"
65 #include "common/system_socket.h"
66
67 /*
68   uint16 checksum for n bytes
69  */
70 static uint32_t uint16_checksum(uint16_t *data, size_t n)
71 {
72         uint32_t sum=0;
73         while (n>=2) {
74                 sum += (uint32_t)ntohs(*data);
75                 data++;
76                 n -= 2;
77         }
78         if (n == 1) {
79                 sum += (uint32_t)ntohs(*(uint8_t *)data);
80         }
81         return sum;
82 }
83
84 /*
85  * See if the given IP is currently on an interface
86  */
87 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
88 {
89         int s;
90         int ret;
91         ctdb_sock_addr __addr = *_addr;
92         ctdb_sock_addr *addr = &__addr;
93         socklen_t addrlen = 0;
94
95         switch (addr->sa.sa_family) {
96         case AF_INET:
97                 addr->ip.sin_port = 0;
98                 addrlen = sizeof(struct sockaddr_in);
99                 break;
100         case AF_INET6:
101                 addr->ip6.sin6_port = 0;
102                 addrlen = sizeof(struct sockaddr_in6);
103                 break;
104         }
105
106         s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
107         if (s == -1) {
108                 return false;
109         }
110
111         ret = bind(s, (struct sockaddr *)addr, addrlen);
112
113         close(s);
114         return ret == 0;
115 }
116
117 /*
118  * simple TCP checksum - assumes data is multiple of 2 bytes long
119  */
120 static uint16_t ip_checksum(uint16_t *data, size_t n, struct ip *ip)
121 {
122         uint32_t sum = uint16_checksum(data, n);
123         uint16_t sum2;
124
125         sum += uint16_checksum((uint16_t *)&ip->ip_src, sizeof(ip->ip_src));
126         sum += uint16_checksum((uint16_t *)&ip->ip_dst, sizeof(ip->ip_dst));
127         sum += ip->ip_p + n;
128         sum = (sum & 0xFFFF) + (sum >> 16);
129         sum = (sum & 0xFFFF) + (sum >> 16);
130         sum2 = htons(sum);
131         sum2 = ~sum2;
132         if (sum2 == 0) {
133                 return 0xFFFF;
134         }
135         return sum2;
136 }
137
138 static uint16_t ip6_checksum(uint16_t *data, size_t n, struct ip6_hdr *ip6)
139 {
140         uint16_t phdr[3];
141         uint32_t sum = 0;
142         uint16_t sum2;
143         uint32_t len;
144
145         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
146         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
147
148         len = htonl(n);
149         phdr[0] = len & UINT16_MAX;
150         phdr[1] = (len >> 16) & UINT16_MAX;
151         /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
152         phdr[2] = htons(ip6->ip6_nxt);
153         sum += uint16_checksum(phdr, sizeof(phdr));
154
155         sum += uint16_checksum(data, n);
156
157         sum = (sum & 0xFFFF) + (sum >> 16);
158         sum = (sum & 0xFFFF) + (sum >> 16);
159         sum2 = htons(sum);
160         sum2 = ~sum2;
161         if (sum2 == 0) {
162                 return 0xFFFF;
163         }
164         return sum2;
165 }
166
167 /*
168  * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
169  */
170
171 #ifdef HAVE_PACKETSOCKET
172
173 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
174 {
175         int s;
176         struct sockaddr_ll sall = {0};
177         struct ether_header *eh;
178         struct arphdr *ah;
179         struct ip6_hdr *ip6;
180         struct nd_neighbor_advert *nd_na;
181         struct nd_opt_hdr *nd_oh;
182         struct ether_addr *ea;
183         struct ifreq if_hwaddr = {{{0}}};
184         /* Size of IPv6 neighbor advertisement (with option) */
185         unsigned char buffer[sizeof(struct ether_header) +
186                              sizeof(struct ip6_hdr) +
187                              sizeof(struct nd_neighbor_advert) +
188                              sizeof(struct nd_opt_hdr) + ETH_ALEN];
189         char *ptr;
190         char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
191         struct ifreq ifr = {{{0}}};
192         int ret = 0;
193
194         s = socket(AF_PACKET, SOCK_RAW, 0);
195         if (s == -1) {
196                 ret = errno;
197                 DBG_ERR("Failed to open raw socket\n");
198                 return ret;
199         }
200         DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
201
202         /* Find interface */
203         strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
204         if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
205                 ret = errno;
206                 DBG_ERR("Interface '%s' not found\n", iface);
207                 goto fail;
208         }
209
210         /* Get MAC address */
211         strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
212         ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
213         if ( ret < 0 ) {
214                 ret = errno;
215                 DBG_ERR("ioctl failed\n");
216                 goto fail;
217         }
218         if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
219                 ret = 0;
220                 D_DEBUG("Ignoring loopback arp request\n");
221                 goto fail;
222         }
223         if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
224                 ret = EINVAL;
225                 DBG_ERR("Not an ethernet address family (0x%x)\n",
226                         if_hwaddr.ifr_hwaddr.sa_family);
227                 goto fail;;
228         }
229
230         /* Set up most of destination address structure */
231         sall.sll_family = AF_PACKET;
232         sall.sll_halen = sizeof(struct ether_addr);
233         sall.sll_protocol = htons(ETH_P_ALL);
234         sall.sll_ifindex = ifr.ifr_ifindex;
235
236         switch (addr->ip.sin_family) {
237         case AF_INET:
238                 memset(buffer, 0 , 64);
239                 eh = (struct ether_header *)buffer;
240                 memset(eh->ether_dhost, 0xff, ETH_ALEN);
241                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
242                 eh->ether_type = htons(ETHERTYPE_ARP);
243
244                 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
245                 ah->ar_hrd = htons(ARPHRD_ETHER);
246                 ah->ar_pro = htons(ETH_P_IP);
247                 ah->ar_hln = ETH_ALEN;
248                 ah->ar_pln = 4;
249
250                 /* send a gratious arp */
251                 ah->ar_op  = htons(ARPOP_REQUEST);
252                 ptr = (char *)&ah[1];
253                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
254                 ptr+=ETH_ALEN;
255                 memcpy(ptr, &addr->ip.sin_addr, 4);
256                 ptr+=4;
257                 memset(ptr, 0, ETH_ALEN);
258                 ptr+=ETH_ALEN;
259                 memcpy(ptr, &addr->ip.sin_addr, 4);
260                 ptr+=4;
261
262                 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
263
264                 ret = sendto(s,buffer, 64, 0,
265                              (struct sockaddr *)&sall, sizeof(sall));
266                 if (ret < 0 ) {
267                         ret = errno;
268                         DBG_ERR("Failed sendto\n");
269                         goto fail;
270                 }
271
272                 /* send unsolicited arp reply broadcast */
273                 ah->ar_op  = htons(ARPOP_REPLY);
274                 ptr = (char *)&ah[1];
275                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
276                 ptr+=ETH_ALEN;
277                 memcpy(ptr, &addr->ip.sin_addr, 4);
278                 ptr+=4;
279                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
280                 ptr+=ETH_ALEN;
281                 memcpy(ptr, &addr->ip.sin_addr, 4);
282                 ptr+=4;
283
284                 ret = sendto(s, buffer, 64, 0,
285                              (struct sockaddr *)&sall, sizeof(sall));
286                 if (ret < 0 ) {
287                         ret = errno;
288                         DBG_ERR("Failed sendto\n");
289                         goto fail;
290                 }
291
292                 close(s);
293                 break;
294         case AF_INET6:
295                 memset(buffer, 0 , sizeof(buffer));
296                 eh = (struct ether_header *)buffer;
297                 /*
298                  * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
299                  * section 7) - note zeroes above!
300                  */
301                 eh->ether_dhost[0] = eh->ether_dhost[1] = 0x33;
302                 eh->ether_dhost[5] = 0x01;
303                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
304                 eh->ether_type = htons(ETHERTYPE_IP6);
305
306                 ip6 = (struct ip6_hdr *)(eh+1);
307                 ip6->ip6_vfc  = 0x60;
308                 ip6->ip6_plen = htons(sizeof(*nd_na) +
309                                       sizeof(struct nd_opt_hdr) +
310                                       ETH_ALEN);
311                 ip6->ip6_nxt  = IPPROTO_ICMPV6;
312                 ip6->ip6_hlim = 255;
313                 ip6->ip6_src  = addr->ip6.sin6_addr;
314                 /* all-nodes multicast */
315
316                 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
317                 if (ret != 1) {
318                         ret = errno;
319                         DBG_ERR("Failed inet_pton\n");
320                         goto fail;
321                 }
322
323                 nd_na = (struct nd_neighbor_advert *)(ip6+1);
324                 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
325                 nd_na->nd_na_code = 0;
326                 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
327                 nd_na->nd_na_target = addr->ip6.sin6_addr;
328                 /* Option: Target link-layer address */
329                 nd_oh = (struct nd_opt_hdr *)(nd_na+1);
330                 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
331                 nd_oh->nd_opt_len = 1;
332
333                 ea = (struct ether_addr *)(nd_oh+1);
334                 memcpy(ea, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
335
336                 nd_na->nd_na_cksum = ip6_checksum((uint16_t *)nd_na,
337                                                   ntohs(ip6->ip6_plen), ip6);
338
339                 memcpy(&sall.sll_addr[0], &eh->ether_dhost[0], sall.sll_halen);
340
341                 ret = sendto(s, buffer, sizeof(buffer),
342                              0, (struct sockaddr *)&sall, sizeof(sall));
343                 if (ret < 0 ) {
344                         ret = errno;
345                         DBG_ERR("Failed sendto\n");
346                         goto fail;
347                 }
348
349                 close(s);
350                 break;
351         default:
352                 ret = EINVAL;
353                 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
354                         addr->ip.sin_family);
355                 goto fail;
356         }
357
358         return 0;
359
360 fail:
361         close(s);
362         return ret;
363 }
364
365 #else /* HAVE_PACKETSOCKET */
366
367 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
368 {
369         /* Not implemented */
370         return ENOSYS;
371 }
372
373 #endif /* HAVE_PACKETSOCKET */
374
375 /*
376  * Send tcp segment from the specified IP/port to the specified
377  * destination IP/port.
378  *
379  * This is used to trigger the receiving host into sending its own ACK,
380  * which should trigger early detection of TCP reset by the client
381  * after IP takeover
382  *
383  * This can also be used to send RST segments (if rst is true) and also
384  * if correct seq and ack numbers are provided.
385  */
386 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
387                       const ctdb_sock_addr *src,
388                       uint32_t seq,
389                       uint32_t ack,
390                       int rst)
391 {
392         int s;
393         int ret;
394         uint32_t one = 1;
395         uint16_t tmpport;
396         ctdb_sock_addr *tmpdest;
397         struct {
398                 struct ip ip;
399                 struct tcphdr tcp;
400         } ip4pkt;
401         struct {
402                 struct ip6_hdr ip6;
403                 struct tcphdr tcp;
404         } ip6pkt;
405         int saved_errno;
406
407         switch (src->ip.sin_family) {
408         case AF_INET:
409                 ZERO_STRUCT(ip4pkt);
410                 ip4pkt.ip.ip_v     = 4;
411                 ip4pkt.ip.ip_hl    = sizeof(ip4pkt.ip)/4;
412                 ip4pkt.ip.ip_len   = htons(sizeof(ip4pkt));
413                 ip4pkt.ip.ip_ttl   = 255;
414                 ip4pkt.ip.ip_p     = IPPROTO_TCP;
415                 ip4pkt.ip.ip_src.s_addr    = src->ip.sin_addr.s_addr;
416                 ip4pkt.ip.ip_dst.s_addr    = dest->ip.sin_addr.s_addr;
417                 ip4pkt.ip.ip_sum   = 0;
418
419                 ip4pkt.tcp.th_sport = src->ip.sin_port;
420                 ip4pkt.tcp.th_dport = dest->ip.sin_port;
421                 ip4pkt.tcp.th_seq   = seq;
422                 ip4pkt.tcp.th_ack   = ack;
423                 ip4pkt.tcp.th_flags = 0;
424                 ip4pkt.tcp.th_flags |= TH_ACK;
425                 if (rst) {
426                         ip4pkt.tcp.th_flags |= TH_RST;
427                 }
428                 ip4pkt.tcp.th_off   = sizeof(ip4pkt.tcp)/4;
429                 /* this makes it easier to spot in a sniffer */
430                 ip4pkt.tcp.th_win   = htons(1234);
431                 ip4pkt.tcp.th_sum   = ip_checksum((uint16_t *)&ip4pkt.tcp,
432                                                   sizeof(ip4pkt.tcp),
433                                                   &ip4pkt.ip);
434
435                 /* open a raw socket to send this segment from */
436                 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
437                 if (s == -1) {
438                         DBG_ERR("Failed to open raw socket (%s)\n",
439                                 strerror(errno));
440                         return -1;
441                 }
442
443                 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
444                 if (ret != 0) {
445                         DBG_ERR("Failed to setup IP headers (%s)\n",
446                                 strerror(errno));
447                         close(s);
448                         return -1;
449                 }
450
451                 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
452                              (const struct sockaddr *)&dest->ip,
453                              sizeof(dest->ip));
454                 saved_errno = errno;
455                 close(s);
456                 if (ret != sizeof(ip4pkt)) {
457                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
458                         return -1;
459                 }
460                 break;
461         case AF_INET6:
462                 ZERO_STRUCT(ip6pkt);
463                 ip6pkt.ip6.ip6_vfc  = 0x60;
464                 ip6pkt.ip6.ip6_plen = htons(20);
465                 ip6pkt.ip6.ip6_nxt  = IPPROTO_TCP;
466                 ip6pkt.ip6.ip6_hlim = 64;
467                 ip6pkt.ip6.ip6_src  = src->ip6.sin6_addr;
468                 ip6pkt.ip6.ip6_dst  = dest->ip6.sin6_addr;
469
470                 ip6pkt.tcp.th_sport = src->ip6.sin6_port;
471                 ip6pkt.tcp.th_dport = dest->ip6.sin6_port;
472                 ip6pkt.tcp.th_seq   = seq;
473                 ip6pkt.tcp.th_ack   = ack;
474                 ip6pkt.tcp.th_flags = 0;
475                 ip6pkt.tcp.th_flags |= TH_RST;
476                 if (rst) {
477                         ip6pkt.tcp.th_flags |= TH_RST;
478                 }
479                 ip6pkt.tcp.th_off    = sizeof(ip6pkt.tcp)/4;
480                 /* this makes it easier to spot in a sniffer */
481                 ip6pkt.tcp.th_win   = htons(1234);
482                 ip6pkt.tcp.th_sum   = ip6_checksum((uint16_t *)&ip6pkt.tcp,
483                                                    sizeof(ip6pkt.tcp),
484                                                    &ip6pkt.ip6);
485
486                 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
487                 if (s == -1) {
488                         DBG_ERR("Failed to open sending socket\n");
489                         return -1;
490
491                 }
492                 /* sendto() don't like if the port is set and the socket is
493                    in raw mode.
494                 */
495                 tmpdest = discard_const(dest);
496                 tmpport = tmpdest->ip6.sin6_port;
497
498                 tmpdest->ip6.sin6_port = 0;
499                 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
500                              (const struct sockaddr *)&dest->ip6,
501                              sizeof(dest->ip6));
502                 saved_errno = errno;
503                 tmpdest->ip6.sin6_port = tmpport;
504                 close(s);
505
506                 if (ret != sizeof(ip6pkt)) {
507                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
508                         return -1;
509                 }
510                 break;
511
512         default:
513                 DBG_ERR("Not an ipv4/v6 address\n");
514                 return -1;
515         }
516
517         return 0;
518 }
519
520 /*
521  * Packet capture
522  *
523  * If AF_PACKET is available then use a raw socket otherwise use pcap.
524  * wscript has checked to make sure that pcap is available if needed.
525  */
526
527 #ifdef HAVE_AF_PACKET
528
529 /*
530  * This function is used to open a raw socket to capture from
531  */
532 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
533 {
534         int s, ret;
535
536         /* Open a socket to capture all traffic */
537         s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
538         if (s == -1) {
539                 DBG_ERR("Failed to open raw socket\n");
540                 return -1;
541         }
542
543         DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s);
544
545         ret = set_blocking(s, false);
546         if (ret != 0) {
547                 DBG_ERR("Failed to set socket non-blocking (%s)\n",
548                         strerror(errno));
549                 close(s);
550                 return -1;
551         }
552
553         set_close_on_exec(s);
554
555         return s;
556 }
557
558 /*
559  * This function is used to do any additional cleanup required when closing
560  * a capture socket.
561  * Note that the socket itself is closed automatically in the caller.
562  */
563 int ctdb_sys_close_capture_socket(void *private_data)
564 {
565         return 0;
566 }
567
568
569 /*
570  * called when the raw socket becomes readable
571  */
572 int ctdb_sys_read_tcp_packet(int s, void *private_data,
573                              ctdb_sock_addr *src,
574                              ctdb_sock_addr *dst,
575                              uint32_t *ack_seq,
576                              uint32_t *seq,
577                              int *rst,
578                              uint16_t *window)
579 {
580         int ret;
581 #define RCVPKTSIZE 100
582         char pkt[RCVPKTSIZE];
583         struct ether_header *eth;
584         struct iphdr *ip;
585         struct ip6_hdr *ip6;
586         struct tcphdr *tcp;
587
588         ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
589         if (ret < sizeof(*eth)+sizeof(*ip)) {
590                 return -1;
591         }
592
593         ZERO_STRUCTP(src);
594         ZERO_STRUCTP(dst);
595
596         /* Ethernet */
597         eth = (struct ether_header *)pkt;
598
599         /* we want either IPv4 or IPv6 */
600         if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
601                 /* IP */
602                 ip = (struct iphdr *)(eth+1);
603
604                 /* We only want IPv4 packets */
605                 if (ip->version != 4) {
606                         return -1;
607                 }
608                 /* Dont look at fragments */
609                 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
610                         return -1;
611                 }
612                 /* we only want TCP */
613                 if (ip->protocol != IPPROTO_TCP) {
614                         return -1;
615                 }
616
617                 /* make sure its not a short packet */
618                 if (offsetof(struct tcphdr, th_ack) + 4 +
619                     (ip->ihl*4) + sizeof(*eth) > ret) {
620                         return -1;
621                 }
622                 /* TCP */
623                 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
624
625                 /* tell the caller which one we've found */
626                 src->ip.sin_family      = AF_INET;
627                 src->ip.sin_addr.s_addr = ip->saddr;
628                 src->ip.sin_port        = tcp->th_sport;
629                 dst->ip.sin_family      = AF_INET;
630                 dst->ip.sin_addr.s_addr = ip->daddr;
631                 dst->ip.sin_port        = tcp->th_dport;
632                 *ack_seq                = tcp->th_ack;
633                 *seq                    = tcp->th_seq;
634                 if (window != NULL) {
635                         *window = tcp->th_win;
636                 }
637                 if (rst != NULL) {
638                         *rst = tcp->th_flags & TH_RST;
639                 }
640
641                 return 0;
642         } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
643                 /* IP6 */
644                 ip6 = (struct ip6_hdr *)(eth+1);
645
646                 /* we only want TCP */
647                 if (ip6->ip6_nxt != IPPROTO_TCP) {
648                         return -1;
649                 }
650
651                 /* TCP */
652                 tcp = (struct tcphdr *)(ip6+1);
653
654                 /* tell the caller which one we've found */
655                 src->ip6.sin6_family = AF_INET6;
656                 src->ip6.sin6_port   = tcp->th_sport;
657                 src->ip6.sin6_addr   = ip6->ip6_src;
658
659                 dst->ip6.sin6_family = AF_INET6;
660                 dst->ip6.sin6_port   = tcp->th_dport;
661                 dst->ip6.sin6_addr   = ip6->ip6_dst;
662
663                 *ack_seq             = tcp->th_ack;
664                 *seq                 = tcp->th_seq;
665                 if (window != NULL) {
666                         *window = tcp->th_win;
667                 }
668                 if (rst != NULL) {
669                         *rst = tcp->th_flags & TH_RST;
670                 }
671
672                 return 0;
673         }
674
675         return -1;
676 }
677
678 #else /* HAVE_AF_PACKET */
679
680 #include <pcap.h>
681
682 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
683 {
684         pcap_t *pt;
685
686         pt=pcap_open_live(iface, 100, 0, 0, NULL);
687         if (pt == NULL) {
688                 DBG_ERR("Failed to open capture device %s\n", iface);
689                 return -1;
690         }
691         *((pcap_t **)private_data) = pt;
692
693         return pcap_fileno(pt);
694 }
695
696 int ctdb_sys_close_capture_socket(void *private_data)
697 {
698         pcap_t *pt = (pcap_t *)private_data;
699         pcap_close(pt);
700         return 0;
701 }
702
703 int ctdb_sys_read_tcp_packet(int s,
704                              void *private_data,
705                              ctdb_sock_addr *src,
706                              ctdb_sock_addr *dst,
707                              uint32_t *ack_seq,
708                              uint32_t *seq,
709                              int *rst,
710                              uint16_t *window)
711 {
712         int ret;
713         struct ether_header *eth;
714         struct ip *ip;
715         struct ip6_hdr *ip6;
716         struct tcphdr *tcp;
717         struct ctdb_killtcp_connection *conn;
718         struct pcap_pkthdr pkthdr;
719         const u_char *buffer;
720         pcap_t *pt = (pcap_t *)private_data;
721
722         buffer=pcap_next(pt, &pkthdr);
723         if (buffer==NULL) {
724                 return -1;
725         }
726
727         ZERO_STRUCTP(src);
728         ZERO_STRUCTP(dst);
729
730         /* Ethernet */
731         eth = (struct ether_header *)buffer;
732
733         /* we want either IPv4 or IPv6 */
734         if (eth->ether_type == htons(ETHERTYPE_IP)) {
735                 /* IP */
736                 ip = (struct ip *)(eth+1);
737
738                 /* We only want IPv4 packets */
739                 if (ip->ip_v != 4) {
740                         return -1;
741                 }
742                 /* Dont look at fragments */
743                 if ((ntohs(ip->ip_off)&0x1fff) != 0) {
744                         return -1;
745                 }
746                 /* we only want TCP */
747                 if (ip->ip_p != IPPROTO_TCP) {
748                         return -1;
749                 }
750
751                 /* make sure its not a short packet */
752                 if (offsetof(struct tcphdr, th_ack) + 4 +
753                     (ip->ip_hl*4) > pkthdr.len) {
754                         return -1;
755                 }
756                 /* TCP */
757                 tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip);
758
759                 /* tell the caller which one we've found */
760                 src->ip.sin_family      = AF_INET;
761                 src->ip.sin_addr.s_addr = ip->ip_src.s_addr;
762                 src->ip.sin_port        = tcp->th_sport;
763                 dst->ip.sin_family      = AF_INET;
764                 dst->ip.sin_addr.s_addr = ip->ip_dst.s_addr;
765                 dst->ip.sin_port        = tcp->th_dport;
766                 *ack_seq                = tcp->th_ack;
767                 *seq                    = tcp->th_seq;
768                 if (window != NULL) {
769                         *window = tcp->th_win;
770                 }
771                 if (rst != NULL) {
772                         *rst = tcp->th_flags & TH_RST;
773                 }
774
775                 return 0;
776         } else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
777                         /* IP6 */
778                 ip6 = (struct ip6_hdr *)(eth+1);
779
780                 /* we only want TCP */
781                 if (ip6->ip6_nxt != IPPROTO_TCP) {
782                         return -1;
783                 }
784
785                 /* TCP */
786                 tcp = (struct tcphdr *)(ip6+1);
787
788                 /* tell the caller which one we've found */
789                 src->ip6.sin6_family = AF_INET6;
790                 src->ip6.sin6_port   = tcp->th_sport;
791                 src->ip6.sin6_addr   = ip6->ip6_src;
792
793                 dst->ip6.sin6_family = AF_INET6;
794                 dst->ip6.sin6_port   = tcp->th_dport;
795                 dst->ip6.sin6_addr   = ip6->ip6_dst;
796
797                 *ack_seq             = tcp->th_ack;
798                 *seq                 = tcp->th_seq;
799                 if (window != NULL) {
800                         *window = tcp->th_win;
801                 }
802                 if (rst != NULL) {
803                         *rst = tcp->th_flags & TH_RST;
804                 }
805
806                 return 0;
807         }
808
809         return -1;
810 }
811
812 #endif /* HAVE_AF_PACKET */