562b2bf1aafff73d8914d5f86b675114fb4d9e69
[bbaumbach/samba-autobuild/.git] / ctdb / common / system_socket.c
1 /*
2    ctdb system specific code to manage raw sockets on linux
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "replace.h"
22
23 /*
24  * Use BSD struct tcphdr field names for portability.  Modern glibc
25  * makes them available by default via <netinet/tcp.h> but older glibc
26  * requires __FAVOR_BSD to be defined.
27  *
28  * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
29  * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
30  * set.  Including "replace.h" above causes <features.h> to be
31  * indirectly included and this will not set __FAVOR_BSD because
32  * _GNU_SOURCE is set in Samba's "config.h" (which is included by
33  * "replace.h").
34  *
35  * Therefore, set __FAVOR_BSD by hand below.
36  */
37 #define __FAVOR_BSD 1
38 #include "system/network.h"
39
40 #ifdef HAVE_NETINET_IF_ETHER_H
41 #include <netinet/if_ether.h>
42 #endif
43 #ifdef HAVE_NETINET_IP6_H
44 #include <netinet/ip6.h>
45 #endif
46 #ifdef HAVE_NETINET_ICMP6_H
47 #include <netinet/icmp6.h>
48 #endif
49 #ifdef HAVE_LINUX_IF_PACKET_H
50 #include <linux/if_packet.h>
51 #endif
52
53 #ifndef ETHERTYPE_IP6
54 #define ETHERTYPE_IP6 0x86dd
55 #endif
56
57 #include "lib/util/debug.h"
58 #include "lib/util/blocking.h"
59
60 #include "protocol/protocol.h"
61
62 #include "common/logging.h"
63 #include "common/system_socket.h"
64
65 /*
66   uint16 checksum for n bytes
67  */
68 static uint32_t uint16_checksum(uint16_t *data, size_t n)
69 {
70         uint32_t sum=0;
71         while (n>=2) {
72                 sum += (uint32_t)ntohs(*data);
73                 data++;
74                 n -= 2;
75         }
76         if (n == 1) {
77                 sum += (uint32_t)ntohs(*(uint8_t *)data);
78         }
79         return sum;
80 }
81
82 /*
83  * See if the given IP is currently on an interface
84  */
85 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
86 {
87         int s;
88         int ret;
89         ctdb_sock_addr __addr = *_addr;
90         ctdb_sock_addr *addr = &__addr;
91         socklen_t addrlen = 0;
92
93         switch (addr->sa.sa_family) {
94         case AF_INET:
95                 addr->ip.sin_port = 0;
96                 addrlen = sizeof(struct sockaddr_in);
97                 break;
98         case AF_INET6:
99                 addr->ip6.sin6_port = 0;
100                 addrlen = sizeof(struct sockaddr_in6);
101                 break;
102         }
103
104         s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
105         if (s == -1) {
106                 return false;
107         }
108
109         ret = bind(s, (struct sockaddr *)addr, addrlen);
110
111         close(s);
112         return ret == 0;
113 }
114
115 /*
116  * simple TCP checksum - assumes data is multiple of 2 bytes long
117  */
118 static uint16_t ip_checksum(uint16_t *data, size_t n, struct ip *ip)
119 {
120         uint32_t sum = uint16_checksum(data, n);
121         uint16_t sum2;
122
123         sum += uint16_checksum((uint16_t *)&ip->ip_src, sizeof(ip->ip_src));
124         sum += uint16_checksum((uint16_t *)&ip->ip_dst, sizeof(ip->ip_dst));
125         sum += ip->ip_p + n;
126         sum = (sum & 0xFFFF) + (sum >> 16);
127         sum = (sum & 0xFFFF) + (sum >> 16);
128         sum2 = htons(sum);
129         sum2 = ~sum2;
130         if (sum2 == 0) {
131                 return 0xFFFF;
132         }
133         return sum2;
134 }
135
136 static uint16_t ip6_checksum(uint16_t *data, size_t n, struct ip6_hdr *ip6)
137 {
138         uint32_t phdr[2];
139         uint32_t sum = 0;
140         uint16_t sum2;
141
142         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
143         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
144
145         phdr[0] = htonl(n);
146         phdr[1] = htonl(ip6->ip6_nxt);
147         sum += uint16_checksum((uint16_t *)phdr, 8);
148
149         sum += uint16_checksum(data, n);
150
151         sum = (sum & 0xFFFF) + (sum >> 16);
152         sum = (sum & 0xFFFF) + (sum >> 16);
153         sum2 = htons(sum);
154         sum2 = ~sum2;
155         if (sum2 == 0) {
156                 return 0xFFFF;
157         }
158         return sum2;
159 }
160
161 /*
162  * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
163  */
164
165 #ifdef HAVE_PACKETSOCKET
166
167 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
168 {
169         int s, ret;
170         struct sockaddr_ll sall;
171         struct ether_header *eh;
172         struct arphdr *ah;
173         struct ip6_hdr *ip6;
174         struct nd_neighbor_advert *nd_na;
175         struct nd_opt_hdr *nd_oh;
176         struct ifreq if_hwaddr;
177         /* Size of IPv6 neighbor advertisement (with option) */
178         unsigned char buffer[sizeof(struct ether_header) +
179                              sizeof(struct ip6_hdr) +
180                              sizeof(struct nd_neighbor_advert) +
181                              sizeof(struct nd_opt_hdr) + ETH_ALEN];
182         char *ptr;
183         char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
184         struct ifreq ifr;
185
186         ZERO_STRUCT(sall);
187         ZERO_STRUCT(ifr);
188         ZERO_STRUCT(if_hwaddr);
189
190         switch (addr->ip.sin_family) {
191         case AF_INET:
192                 s = socket(AF_PACKET, SOCK_RAW, 0);
193                 if (s == -1){
194                         DBG_ERR("Failed to open raw socket\n");
195                         return -1;
196                 }
197
198                 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
199                 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
200                 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
201                         DBG_ERR("Interface '%s' not found\n", iface);
202                         close(s);
203                         return -1;
204                 }
205
206                 /* get the mac address */
207                 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
208                 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
209                 if ( ret < 0 ) {
210                         close(s);
211                         DBG_ERR("ioctl failed\n");
212                         return -1;
213                 }
214                 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
215                         D_DEBUG("Ignoring loopback arp request\n");
216                         close(s);
217                         return 0;
218                 }
219                 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
220                         close(s);
221                         errno = EINVAL;
222                         DBG_ERR("Not an ethernet address family (0x%x)\n",
223                                 if_hwaddr.ifr_hwaddr.sa_family);
224                         return -1;
225                 }
226
227
228                 memset(buffer, 0 , 64);
229                 eh = (struct ether_header *)buffer;
230                 memset(eh->ether_dhost, 0xff, ETH_ALEN);
231                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
232                 eh->ether_type = htons(ETHERTYPE_ARP);
233
234                 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
235                 ah->ar_hrd = htons(ARPHRD_ETHER);
236                 ah->ar_pro = htons(ETH_P_IP);
237                 ah->ar_hln = ETH_ALEN;
238                 ah->ar_pln = 4;
239
240                 /* send a gratious arp */
241                 ah->ar_op  = htons(ARPOP_REQUEST);
242                 ptr = (char *)&ah[1];
243                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
244                 ptr+=ETH_ALEN;
245                 memcpy(ptr, &addr->ip.sin_addr, 4);
246                 ptr+=4;
247                 memset(ptr, 0, ETH_ALEN);
248                 ptr+=ETH_ALEN;
249                 memcpy(ptr, &addr->ip.sin_addr, 4);
250                 ptr+=4;
251
252                 sall.sll_family = AF_PACKET;
253                 sall.sll_halen = 6;
254                 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
255                 sall.sll_protocol = htons(ETH_P_ALL);
256                 sall.sll_ifindex = ifr.ifr_ifindex;
257                 ret = sendto(s,buffer, 64, 0,
258                              (struct sockaddr *)&sall, sizeof(sall));
259                 if (ret < 0 ){
260                         close(s);
261                         DBG_ERR("Failed sendto\n");
262                         return -1;
263                 }
264
265                 /* send unsolicited arp reply broadcast */
266                 ah->ar_op  = htons(ARPOP_REPLY);
267                 ptr = (char *)&ah[1];
268                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
269                 ptr+=ETH_ALEN;
270                 memcpy(ptr, &addr->ip.sin_addr, 4);
271                 ptr+=4;
272                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
273                 ptr+=ETH_ALEN;
274                 memcpy(ptr, &addr->ip.sin_addr, 4);
275                 ptr+=4;
276
277                 ret = sendto(s, buffer, 64, 0,
278                              (struct sockaddr *)&sall, sizeof(sall));
279                 if (ret < 0 ){
280                         DBG_ERR("Failed sendto\n");
281                         close(s);
282                         return -1;
283                 }
284
285                 close(s);
286                 break;
287         case AF_INET6:
288                 s = socket(AF_PACKET, SOCK_RAW, 0);
289                 if (s == -1){
290                         DBG_ERR("Failed to open raw socket\n");
291                         return -1;
292                 }
293
294                 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
295                 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
296                 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
297                         DBG_ERR("Interface '%s' not found\n", iface);
298                         close(s);
299                         return -1;
300                 }
301
302                 /* get the mac address */
303                 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
304                 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
305                 if ( ret < 0 ) {
306                         close(s);
307                         DBG_ERR("ioctl failed\n");
308                         return -1;
309                 }
310                 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
311                         DBG_DEBUG("Ignoring loopback arp request\n");
312                         close(s);
313                         return 0;
314                 }
315                 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
316                         close(s);
317                         errno = EINVAL;
318                         DBG_ERR("Not an ethernet address family (0x%x)\n",
319                                 if_hwaddr.ifr_hwaddr.sa_family);
320                         return -1;
321                 }
322
323                 memset(buffer, 0 , sizeof(buffer));
324                 eh = (struct ether_header *)buffer;
325                 /*
326                  * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
327                  * section 7) - note zeroes above!
328                  */
329                 eh->ether_dhost[0] = eh->ether_dhost[1] = 0x33;
330                 eh->ether_dhost[5] = 0x01;
331                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
332                 eh->ether_type = htons(ETHERTYPE_IP6);
333
334                 ip6 = (struct ip6_hdr *)(eh+1);
335                 ip6->ip6_vfc  = 0x60;
336                 ip6->ip6_plen = htons(sizeof(*nd_na) +
337                                       sizeof(struct nd_opt_hdr) +
338                                       ETH_ALEN);
339                 ip6->ip6_nxt  = IPPROTO_ICMPV6;
340                 ip6->ip6_hlim = 255;
341                 ip6->ip6_src  = addr->ip6.sin6_addr;
342                 /* all-nodes multicast */
343
344                 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
345                 if (ret != 1) {
346                         close(s);
347                         DBG_ERR("Failed inet_pton\n");
348                         return -1;
349                 }
350
351                 nd_na = (struct nd_neighbor_advert *)(ip6+1);
352                 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
353                 nd_na->nd_na_code = 0;
354                 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
355                 nd_na->nd_na_target = addr->ip6.sin6_addr;
356                 /* Option: Target link-layer address */
357                 nd_oh = (struct nd_opt_hdr *)(nd_na+1);
358                 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
359                 nd_oh->nd_opt_len = 1;
360                 memcpy(&(nd_oh+1)[0], if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
361
362                 nd_na->nd_na_cksum = ip6_checksum((uint16_t *)nd_na,
363                                                   ntohs(ip6->ip6_plen), ip6);
364
365                 sall.sll_family = AF_PACKET;
366                 sall.sll_halen = 6;
367                 memcpy(&sall.sll_addr[0], &eh->ether_dhost[0], sall.sll_halen);
368                 sall.sll_protocol = htons(ETH_P_ALL);
369                 sall.sll_ifindex = ifr.ifr_ifindex;
370                 ret = sendto(s, buffer, sizeof(buffer),
371                              0, (struct sockaddr *)&sall, sizeof(sall));
372                 if (ret < 0 ){
373                         close(s);
374                         DBG_ERR("Failed sendto\n");
375                         return -1;
376                 }
377
378                 close(s);
379                 break;
380         default:
381                 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
382                         addr->ip.sin_family);
383                 return -1;
384         }
385
386         return 0;
387 }
388
389 #else /* HAVE_PACKETSOCKET */
390
391 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
392 {
393         /* Not implemented */
394         errno = ENOSYS;
395         return -1;
396 }
397
398 #endif /* HAVE_PACKETSOCKET */
399
400 /*
401  * Send tcp segment from the specified IP/port to the specified
402  * destination IP/port.
403  *
404  * This is used to trigger the receiving host into sending its own ACK,
405  * which should trigger early detection of TCP reset by the client
406  * after IP takeover
407  *
408  * This can also be used to send RST segments (if rst is true) and also
409  * if correct seq and ack numbers are provided.
410  */
411 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
412                       const ctdb_sock_addr *src,
413                       uint32_t seq,
414                       uint32_t ack,
415                       int rst)
416 {
417         int s;
418         int ret;
419         uint32_t one = 1;
420         uint16_t tmpport;
421         ctdb_sock_addr *tmpdest;
422         struct {
423                 struct ip ip;
424                 struct tcphdr tcp;
425         } ip4pkt;
426         struct {
427                 struct ip6_hdr ip6;
428                 struct tcphdr tcp;
429         } ip6pkt;
430         int saved_errno;
431
432         switch (src->ip.sin_family) {
433         case AF_INET:
434                 ZERO_STRUCT(ip4pkt);
435                 ip4pkt.ip.ip_v     = 4;
436                 ip4pkt.ip.ip_hl    = sizeof(ip4pkt.ip)/4;
437                 ip4pkt.ip.ip_len   = htons(sizeof(ip4pkt));
438                 ip4pkt.ip.ip_ttl   = 255;
439                 ip4pkt.ip.ip_p     = IPPROTO_TCP;
440                 ip4pkt.ip.ip_src.s_addr    = src->ip.sin_addr.s_addr;
441                 ip4pkt.ip.ip_dst.s_addr    = dest->ip.sin_addr.s_addr;
442                 ip4pkt.ip.ip_sum   = 0;
443
444                 ip4pkt.tcp.th_sport = src->ip.sin_port;
445                 ip4pkt.tcp.th_dport = dest->ip.sin_port;
446                 ip4pkt.tcp.th_seq   = seq;
447                 ip4pkt.tcp.th_ack   = ack;
448                 ip4pkt.tcp.th_flags = 0;
449                 ip4pkt.tcp.th_flags |= TH_ACK;
450                 if (rst) {
451                         ip4pkt.tcp.th_flags |= TH_RST;
452                 }
453                 ip4pkt.tcp.th_off   = sizeof(ip4pkt.tcp)/4;
454                 /* this makes it easier to spot in a sniffer */
455                 ip4pkt.tcp.th_win   = htons(1234);
456                 ip4pkt.tcp.th_sum   = ip_checksum((uint16_t *)&ip4pkt.tcp,
457                                                   sizeof(ip4pkt.tcp),
458                                                   &ip4pkt.ip);
459
460                 /* open a raw socket to send this segment from */
461                 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
462                 if (s == -1) {
463                         DBG_ERR("Failed to open raw socket (%s)\n",
464                                 strerror(errno));
465                         return -1;
466                 }
467
468                 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
469                 if (ret != 0) {
470                         DBG_ERR("Failed to setup IP headers (%s)\n",
471                                 strerror(errno));
472                         close(s);
473                         return -1;
474                 }
475
476                 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
477                              (const struct sockaddr *)&dest->ip,
478                              sizeof(dest->ip));
479                 saved_errno = errno;
480                 close(s);
481                 if (ret != sizeof(ip4pkt)) {
482                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
483                         return -1;
484                 }
485                 break;
486         case AF_INET6:
487                 ZERO_STRUCT(ip6pkt);
488                 ip6pkt.ip6.ip6_vfc  = 0x60;
489                 ip6pkt.ip6.ip6_plen = htons(20);
490                 ip6pkt.ip6.ip6_nxt  = IPPROTO_TCP;
491                 ip6pkt.ip6.ip6_hlim = 64;
492                 ip6pkt.ip6.ip6_src  = src->ip6.sin6_addr;
493                 ip6pkt.ip6.ip6_dst  = dest->ip6.sin6_addr;
494
495                 ip6pkt.tcp.th_sport = src->ip6.sin6_port;
496                 ip6pkt.tcp.th_dport = dest->ip6.sin6_port;
497                 ip6pkt.tcp.th_seq   = seq;
498                 ip6pkt.tcp.th_ack   = ack;
499                 ip6pkt.tcp.th_flags = 0;
500                 ip6pkt.tcp.th_flags |= TH_RST;
501                 if (rst) {
502                         ip6pkt.tcp.th_flags |= TH_RST;
503                 }
504                 ip6pkt.tcp.th_off    = sizeof(ip6pkt.tcp)/4;
505                 /* this makes it easier to spot in a sniffer */
506                 ip6pkt.tcp.th_win   = htons(1234);
507                 ip6pkt.tcp.th_sum   = ip6_checksum((uint16_t *)&ip6pkt.tcp,
508                                                    sizeof(ip6pkt.tcp),
509                                                    &ip6pkt.ip6);
510
511                 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
512                 if (s == -1) {
513                         DBG_ERR("Failed to open sending socket\n");
514                         return -1;
515
516                 }
517                 /* sendto() don't like if the port is set and the socket is
518                    in raw mode.
519                 */
520                 tmpdest = discard_const(dest);
521                 tmpport = tmpdest->ip6.sin6_port;
522
523                 tmpdest->ip6.sin6_port = 0;
524                 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
525                              (const struct sockaddr *)&dest->ip6,
526                              sizeof(dest->ip6));
527                 saved_errno = errno;
528                 tmpdest->ip6.sin6_port = tmpport;
529                 close(s);
530
531                 if (ret != sizeof(ip6pkt)) {
532                         D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
533                         return -1;
534                 }
535                 break;
536
537         default:
538                 DBG_ERR("Not an ipv4/v6 address\n");
539                 return -1;
540         }
541
542         return 0;
543 }
544
545 /*
546  * Packet capture
547  *
548  * If AF_PACKET is available then use a raw socket otherwise use pcap.
549  * wscript has checked to make sure that pcap is available if needed.
550  */
551
552 #ifdef HAVE_AF_PACKET
553
554 /*
555  * This function is used to open a raw socket to capture from
556  */
557 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
558 {
559         int s, ret;
560
561         /* Open a socket to capture all traffic */
562         s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
563         if (s == -1) {
564                 DBG_ERR("Failed to open raw socket\n");
565                 return -1;
566         }
567
568         DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s);
569
570         ret = set_blocking(s, false);
571         if (ret != 0) {
572                 DBG_ERR("Failed to set socket non-blocking (%s)\n",
573                         strerror(errno));
574                 close(s);
575                 return -1;
576         }
577
578         set_close_on_exec(s);
579
580         return s;
581 }
582
583 /*
584  * This function is used to do any additional cleanup required when closing
585  * a capture socket.
586  * Note that the socket itself is closed automatically in the caller.
587  */
588 int ctdb_sys_close_capture_socket(void *private_data)
589 {
590         return 0;
591 }
592
593
594 /*
595  * called when the raw socket becomes readable
596  */
597 int ctdb_sys_read_tcp_packet(int s, void *private_data,
598                              ctdb_sock_addr *src,
599                              ctdb_sock_addr *dst,
600                              uint32_t *ack_seq,
601                              uint32_t *seq,
602                              int *rst,
603                              uint16_t *window)
604 {
605         int ret;
606 #define RCVPKTSIZE 100
607         char pkt[RCVPKTSIZE];
608         struct ether_header *eth;
609         struct iphdr *ip;
610         struct ip6_hdr *ip6;
611         struct tcphdr *tcp;
612
613         ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
614         if (ret < sizeof(*eth)+sizeof(*ip)) {
615                 return -1;
616         }
617
618         ZERO_STRUCTP(src);
619         ZERO_STRUCTP(dst);
620
621         /* Ethernet */
622         eth = (struct ether_header *)pkt;
623
624         /* we want either IPv4 or IPv6 */
625         if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
626                 /* IP */
627                 ip = (struct iphdr *)(eth+1);
628
629                 /* We only want IPv4 packets */
630                 if (ip->version != 4) {
631                         return -1;
632                 }
633                 /* Dont look at fragments */
634                 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
635                         return -1;
636                 }
637                 /* we only want TCP */
638                 if (ip->protocol != IPPROTO_TCP) {
639                         return -1;
640                 }
641
642                 /* make sure its not a short packet */
643                 if (offsetof(struct tcphdr, th_ack) + 4 +
644                     (ip->ihl*4) + sizeof(*eth) > ret) {
645                         return -1;
646                 }
647                 /* TCP */
648                 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
649
650                 /* tell the caller which one we've found */
651                 src->ip.sin_family      = AF_INET;
652                 src->ip.sin_addr.s_addr = ip->saddr;
653                 src->ip.sin_port        = tcp->th_sport;
654                 dst->ip.sin_family      = AF_INET;
655                 dst->ip.sin_addr.s_addr = ip->daddr;
656                 dst->ip.sin_port        = tcp->th_dport;
657                 *ack_seq                = tcp->th_ack;
658                 *seq                    = tcp->th_seq;
659                 if (window != NULL) {
660                         *window = tcp->th_win;
661                 }
662                 if (rst != NULL) {
663                         *rst = tcp->th_flags & TH_RST;
664                 }
665
666                 return 0;
667         } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
668                 /* IP6 */
669                 ip6 = (struct ip6_hdr *)(eth+1);
670
671                 /* we only want TCP */
672                 if (ip6->ip6_nxt != IPPROTO_TCP) {
673                         return -1;
674                 }
675
676                 /* TCP */
677                 tcp = (struct tcphdr *)(ip6+1);
678
679                 /* tell the caller which one we've found */
680                 src->ip6.sin6_family = AF_INET6;
681                 src->ip6.sin6_port   = tcp->th_sport;
682                 src->ip6.sin6_addr   = ip6->ip6_src;
683
684                 dst->ip6.sin6_family = AF_INET6;
685                 dst->ip6.sin6_port   = tcp->th_dport;
686                 dst->ip6.sin6_addr   = ip6->ip6_dst;
687
688                 *ack_seq             = tcp->th_ack;
689                 *seq                 = tcp->th_seq;
690                 if (window != NULL) {
691                         *window = tcp->th_win;
692                 }
693                 if (rst != NULL) {
694                         *rst = tcp->th_flags & TH_RST;
695                 }
696
697                 return 0;
698         }
699
700         return -1;
701 }
702
703 #else /* HAVE_AF_PACKET */
704
705 #include <pcap.h>
706
707 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
708 {
709         pcap_t *pt;
710
711         pt=pcap_open_live(iface, 100, 0, 0, NULL);
712         if (pt == NULL) {
713                 DBG_ERR("Failed to open capture device %s\n", iface);
714                 return -1;
715         }
716         *((pcap_t **)private_data) = pt;
717
718         return pcap_fileno(pt);
719 }
720
721 int ctdb_sys_close_capture_socket(void *private_data)
722 {
723         pcap_t *pt = (pcap_t *)private_data;
724         pcap_close(pt);
725         return 0;
726 }
727
728 int ctdb_sys_read_tcp_packet(int s,
729                              void *private_data,
730                              ctdb_sock_addr *src,
731                              ctdb_sock_addr *dst,
732                              uint32_t *ack_seq,
733                              uint32_t *seq,
734                              int *rst,
735                              uint16_t *window)
736 {
737         int ret;
738         struct ether_header *eth;
739         struct ip *ip;
740         struct ip6_hdr *ip6;
741         struct tcphdr *tcp;
742         struct ctdb_killtcp_connection *conn;
743         struct pcap_pkthdr pkthdr;
744         const u_char *buffer;
745         pcap_t *pt = (pcap_t *)private_data;
746
747         buffer=pcap_next(pt, &pkthdr);
748         if (buffer==NULL) {
749                 return -1;
750         }
751
752         ZERO_STRUCTP(src);
753         ZERO_STRUCTP(dst);
754
755         /* Ethernet */
756         eth = (struct ether_header *)buffer;
757
758         /* we want either IPv4 or IPv6 */
759         if (eth->ether_type == htons(ETHERTYPE_IP)) {
760                 /* IP */
761                 ip = (struct ip *)(eth+1);
762
763                 /* We only want IPv4 packets */
764                 if (ip->ip_v != 4) {
765                         return -1;
766                 }
767                 /* Dont look at fragments */
768                 if ((ntohs(ip->ip_off)&0x1fff) != 0) {
769                         return -1;
770                 }
771                 /* we only want TCP */
772                 if (ip->ip_p != IPPROTO_TCP) {
773                         return -1;
774                 }
775
776                 /* make sure its not a short packet */
777                 if (offsetof(struct tcphdr, th_ack) + 4 +
778                     (ip->ip_hl*4) > ret) {
779                         return -1;
780                 }
781                 /* TCP */
782                 tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip);
783
784                 /* tell the caller which one we've found */
785                 src->ip.sin_family      = AF_INET;
786                 src->ip.sin_addr.s_addr = ip->ip_src.s_addr;
787                 src->ip.sin_port        = tcp->th_sport;
788                 dst->ip.sin_family      = AF_INET;
789                 dst->ip.sin_addr.s_addr = ip->ip_dst.s_addr;
790                 dst->ip.sin_port        = tcp->th_dport;
791                 *ack_seq                = tcp->th_ack;
792                 *seq                    = tcp->th_seq;
793                 if (window != NULL) {
794                         *window = tcp->th_win;
795                 }
796                 if (rst != NULL) {
797                         *rst = tcp->th_flags & TH_RST;
798                 }
799
800                 return 0;
801         } else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
802                         /* IP6 */
803                 ip6 = (struct ip6_hdr *)(eth+1);
804
805                 /* we only want TCP */
806                 if (ip6->ip6_nxt != IPPROTO_TCP) {
807                         return -1;
808                 }
809
810                 /* TCP */
811                 tcp = (struct tcphdr *)(ip6+1);
812
813                 /* tell the caller which one we've found */
814                 src->ip6.sin6_family = AF_INET6;
815                 src->ip6.sin6_port   = tcp->th_sport;
816                 src->ip6.sin6_addr   = ip6->ip6_src;
817
818                 dst->ip6.sin6_family = AF_INET6;
819                 dst->ip6.sin6_port   = tcp->th_dport;
820                 dst->ip6.sin6_addr   = ip6->ip6_dst;
821
822                 *ack_seq             = tcp->th_ack;
823                 *seq                 = tcp->th_seq;
824                 if (window != NULL) {
825                         *window = tcp->th_win;
826                 }
827                 if (rst != NULL) {
828                         *rst = tcp->th_flags & TH_RST;
829                 }
830
831                 return 0;
832         }
833
834         return -1;
835 }
836
837 #endif /* HAVE_AF_PACKET */