2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include <netinet/if_ether.h>
28 #include <netinet/ip6.h>
29 #include <netinet/icmp6.h>
30 #include <net/if_arp.h>
31 #include <netpacket/packet.h>
34 #define ETHERTYPE_IP6 0x86dd
38 calculate the tcp checksum for tcp over ipv6
40 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
46 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
47 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
50 phdr[1] = htonl(ip6->ip6_nxt);
51 sum += uint16_checksum((uint16_t *)phdr, 8);
53 sum += uint16_checksum(data, n);
55 sum = (sum & 0xFFFF) + (sum >> 16);
56 sum = (sum & 0xFFFF) + (sum >> 16);
66 send gratuitous arp reply after we have taken over an ip address
68 saddr is the address we are trying to claim
69 iface is the interface name we will be using to claim the address
71 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
74 struct sockaddr_ll sall;
75 struct ether_header *eh;
78 struct icmp6_hdr *icmp6;
79 struct ifreq if_hwaddr;
80 unsigned char buffer[78]; /* ipv6 neigh solicitation size */
82 char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
87 switch (addr->ip.sin_family) {
89 s = socket(PF_PACKET, SOCK_RAW, htons(ETHERTYPE_ARP));
91 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
95 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
96 strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
97 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
98 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
103 /* get the mac address */
104 strcpy(if_hwaddr.ifr_name, iface);
105 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
108 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
111 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
112 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
116 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
119 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
120 if_hwaddr.ifr_hwaddr.sa_family));
125 memset(buffer, 0 , 64);
126 eh = (struct ether_header *)buffer;
127 memset(eh->ether_dhost, 0xff, ETH_ALEN);
128 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
129 eh->ether_type = htons(ETHERTYPE_ARP);
131 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
132 ah->ar_hrd = htons(ARPHRD_ETHER);
133 ah->ar_pro = htons(ETH_P_IP);
134 ah->ar_hln = ETH_ALEN;
137 /* send a gratious arp */
138 ah->ar_op = htons(ARPOP_REQUEST);
139 ptr = (char *)&ah[1];
140 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
142 memcpy(ptr, &addr->ip.sin_addr, 4);
144 memset(ptr, 0, ETH_ALEN);
146 memcpy(ptr, &addr->ip.sin_addr, 4);
149 sall.sll_family = AF_PACKET;
151 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
152 sall.sll_protocol = htons(ETH_P_ALL);
153 sall.sll_ifindex = ifr.ifr_ifindex;
154 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
157 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
161 /* send unsolicited arp reply broadcast */
162 ah->ar_op = htons(ARPOP_REPLY);
163 ptr = (char *)&ah[1];
164 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
166 memcpy(ptr, &addr->ip.sin_addr, 4);
168 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
170 memcpy(ptr, &addr->ip.sin_addr, 4);
173 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
175 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
183 s = socket(PF_PACKET, SOCK_RAW, htons(ETHERTYPE_ARP));
185 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
189 DEBUG(DEBUG_NOTICE, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
190 strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
191 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
192 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
197 /* get the mac address */
198 strcpy(if_hwaddr.ifr_name, iface);
199 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
202 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
205 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
206 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
210 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
213 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
214 if_hwaddr.ifr_hwaddr.sa_family));
218 memset(buffer, 0 , sizeof(buffer));
219 eh = (struct ether_header *)buffer;
220 memset(eh->ether_dhost, 0xff, ETH_ALEN);
221 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
222 eh->ether_type = htons(ETHERTYPE_IP6);
224 ip6 = (struct ip6_hdr *)(eh+1);
226 ip6->ip6_plen = htons(24);
227 ip6->ip6_nxt = IPPROTO_ICMPV6;
229 ip6->ip6_dst = addr->ip6.sin6_addr;
231 icmp6 = (struct icmp6_hdr *)(ip6+1);
232 icmp6->icmp6_type = ND_NEIGHBOR_SOLICIT;
233 icmp6->icmp6_code = 0;
234 memcpy(&icmp6->icmp6_data32[1], &addr->ip6.sin6_addr, 16);
236 icmp6->icmp6_cksum = tcp_checksum6((uint16_t *)icmp6, ntohs(ip6->ip6_plen), ip6);
238 sall.sll_family = AF_PACKET;
240 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
241 sall.sll_protocol = htons(ETH_P_ALL);
242 sall.sll_ifindex = ifr.ifr_ifindex;
243 ret = sendto(s, buffer, 78, 0, (struct sockaddr *)&sall, sizeof(sall));
246 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
253 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family));
262 simple TCP checksum - assumes data is multiple of 2 bytes long
264 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
266 uint32_t sum = uint16_checksum(data, n);
268 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
270 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
272 sum += ip->protocol + n;
273 sum = (sum & 0xFFFF) + (sum >> 16);
274 sum = (sum & 0xFFFF) + (sum >> 16);
284 Send tcp segment from the specified IP/port to the specified
287 This is used to trigger the receiving host into sending its own ACK,
288 which should trigger early detection of TCP reset by the client
291 This can also be used to send RST segments (if rst is true) and also
292 if correct seq and ack numbers are provided.
294 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
295 const ctdb_sock_addr *src,
296 uint32_t seq, uint32_t ack, int rst)
302 ctdb_sock_addr *tmpdest;
312 switch (src->ip.sin_family) {
315 ip4pkt.ip.version = 4;
316 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
317 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
319 ip4pkt.ip.protocol = IPPROTO_TCP;
320 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
321 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
324 ip4pkt.tcp.source = src->ip.sin_port;
325 ip4pkt.tcp.dest = dest->ip.sin_port;
326 ip4pkt.tcp.seq = seq;
327 ip4pkt.tcp.ack_seq = ack;
332 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
333 /* this makes it easier to spot in a sniffer */
334 ip4pkt.tcp.window = htons(1234);
335 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
337 /* open a raw socket to send this segment from */
338 s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
340 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
345 ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
347 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
354 set_close_on_exec(s);
356 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, &dest->ip, sizeof(dest->ip));
358 if (ret != sizeof(ip4pkt)) {
359 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
365 ip6pkt.ip6.ip6_vfc = 0x60;
366 ip6pkt.ip6.ip6_plen = htons(20);
367 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
368 ip6pkt.ip6.ip6_hlim = 64;
369 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
370 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
372 ip6pkt.tcp.source = src->ip6.sin6_port;
373 ip6pkt.tcp.dest = dest->ip6.sin6_port;
374 ip6pkt.tcp.seq = seq;
375 ip6pkt.tcp.ack_seq = ack;
380 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
381 /* this makes it easier to spot in a sniffer */
382 ip6pkt.tcp.window = htons(1234);
383 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
385 s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
387 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
391 /* sendto() dont like if the port is set and the socket is
394 tmpdest = discard_const(dest);
395 tmpport = tmpdest->ip6.sin6_port;
397 tmpdest->ip6.sin6_port = 0;
398 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, &dest->ip6, sizeof(dest->ip6));
399 tmpdest->ip6.sin6_port = tmpport;
402 if (ret != sizeof(ip6pkt)) {
403 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
409 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
417 This function is used to open a raw socket to capture from
419 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
423 /* Open a socket to capture all traffic */
424 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
426 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
430 DEBUG(DEBUG_NOTICE, (__location__ " Created RAW SOCKET FD:%d for tcp tickle\n", s));
433 set_close_on_exec(s);
439 This function is used to do any additional cleanup required when closing
441 Note that the socket itself is closed automatically in the caller.
443 int ctdb_sys_close_capture_socket(void *private_data)
450 called when the raw socket becomes readable
452 int ctdb_sys_read_tcp_packet(int s, void *private_data,
453 ctdb_sock_addr *src, ctdb_sock_addr *dst,
454 uint32_t *ack_seq, uint32_t *seq)
457 #define RCVPKTSIZE 100
458 char pkt[RCVPKTSIZE];
459 struct ether_header *eth;
464 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
465 if (ret < sizeof(*eth)+sizeof(*ip)) {
470 eth = (struct ether_header *)pkt;
472 /* we want either IPv4 or IPv6 */
473 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
475 ip = (struct iphdr *)(eth+1);
477 /* We only want IPv4 packets */
478 if (ip->version != 4) {
481 /* Dont look at fragments */
482 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
485 /* we only want TCP */
486 if (ip->protocol != IPPROTO_TCP) {
490 /* make sure its not a short packet */
491 if (offsetof(struct tcphdr, ack_seq) + 4 +
492 (ip->ihl*4) + sizeof(*eth) > ret) {
496 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
498 /* tell the caller which one we've found */
499 src->ip.sin_family = AF_INET;
500 src->ip.sin_addr.s_addr = ip->saddr;
501 src->ip.sin_port = tcp->source;
502 dst->ip.sin_family = AF_INET;
503 dst->ip.sin_addr.s_addr = ip->daddr;
504 dst->ip.sin_port = tcp->dest;
505 *ack_seq = tcp->ack_seq;
509 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
511 ip6 = (struct ip6_hdr *)(eth+1);
513 /* we only want TCP */
514 if (ip6->ip6_nxt != IPPROTO_TCP) {
519 tcp = (struct tcphdr *)(ip6+1);
521 /* tell the caller which one we've found */
522 src->ip6.sin6_family = AF_INET6;
523 src->ip6.sin6_port = tcp->source;
524 src->ip6.sin6_addr = ip6->ip6_src;
526 dst->ip6.sin6_family = AF_INET6;
527 dst->ip6.sin6_port = tcp->dest;
528 dst->ip6.sin6_addr = ip6->ip6_dst;
530 *ack_seq = tcp->ack_seq;