2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include <netinet/if_ether.h>
28 #include <netinet/ip6.h>
29 #include <netinet/icmp6.h>
30 #include <net/if_arp.h>
34 #define ETHERTYPE_IP6 0x86dd
38 uint16 checksum for n bytes
40 static uint32_t uint16_checksum(uint16_t *data, size_t n)
44 sum += (uint32_t)ntohs(*data);
49 sum += (uint32_t)ntohs(*(uint8_t *)data);
55 calculate the tcp checksum for tcp over ipv6
57 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
63 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
64 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
67 phdr[1] = htonl(ip6->ip6_nxt);
68 sum += uint16_checksum((uint16_t *)phdr, 8);
70 sum += uint16_checksum(data, n);
72 sum = (sum & 0xFFFF) + (sum >> 16);
73 sum = (sum & 0xFFFF) + (sum >> 16);
83 send gratuitous arp reply after we have taken over an ip address
85 saddr is the address we are trying to claim
86 iface is the interface name we will be using to claim the address
88 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
92 struct ether_header *eh;
95 struct icmp6_hdr *icmp6;
96 struct ifreq if_hwaddr;
97 unsigned char buffer[78]; /* ipv6 neigh solicitation size */
102 switch (addr->ip.sin_family) {
104 s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_ARP));
106 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
110 /* get the mac address */
111 strcpy(if_hwaddr.ifr_name, iface);
112 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
115 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
118 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
119 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
123 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
126 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
127 if_hwaddr.ifr_hwaddr.sa_family));
132 memset(buffer, 0 , 64);
133 eh = (struct ether_header *)buffer;
134 memset(eh->ether_dhost, 0xff, ETH_ALEN);
135 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
136 eh->ether_type = htons(ETHERTYPE_ARP);
138 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
139 ah->ar_hrd = htons(ARPHRD_ETHER);
140 ah->ar_pro = htons(ETH_P_IP);
141 ah->ar_hln = ETH_ALEN;
144 /* send a gratious arp */
145 ah->ar_op = htons(ARPOP_REQUEST);
146 ptr = (char *)&ah[1];
147 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
149 memcpy(ptr, &addr->ip.sin_addr, 4);
151 memset(ptr, 0, ETH_ALEN);
153 memcpy(ptr, &addr->ip.sin_addr, 4);
156 strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
157 ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
160 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
164 /* send unsolicited arp reply broadcast */
165 ah->ar_op = htons(ARPOP_REPLY);
166 ptr = (char *)&ah[1];
167 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
169 memcpy(ptr, &addr->ip.sin_addr, 4);
171 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
173 memcpy(ptr, &addr->ip.sin_addr, 4);
176 strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
177 ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
179 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
186 s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_IP6));
188 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
192 /* get the mac address */
193 strcpy(if_hwaddr.ifr_name, iface);
194 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
197 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
200 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
201 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
205 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
208 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
209 if_hwaddr.ifr_hwaddr.sa_family));
213 memset(buffer, 0 , sizeof(buffer));
214 eh = (struct ether_header *)buffer;
215 memset(eh->ether_dhost, 0xff, ETH_ALEN);
216 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
217 eh->ether_type = htons(ETHERTYPE_IP6);
219 ip6 = (struct ip6_hdr *)(eh+1);
221 ip6->ip6_plen = htons(24);
222 ip6->ip6_nxt = IPPROTO_ICMPV6;
224 ip6->ip6_dst = addr->ip6.sin6_addr;
226 icmp6 = (struct icmp6_hdr *)(ip6+1);
227 icmp6->icmp6_type = ND_NEIGHBOR_SOLICIT;
228 icmp6->icmp6_code = 0;
229 memcpy(&icmp6->icmp6_data32[1], &addr->ip6.sin6_addr, 16);
231 icmp6->icmp6_cksum = tcp_checksum6((uint16_t *)icmp6, ntohs(ip6->ip6_plen), ip6);
233 strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
234 ret = sendto(s, buffer, 78, 0, &sa, sizeof(sa));
237 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
244 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family));
253 simple TCP checksum - assumes data is multiple of 2 bytes long
255 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
257 uint32_t sum = uint16_checksum(data, n);
259 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
261 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
263 sum += ip->protocol + n;
264 sum = (sum & 0xFFFF) + (sum >> 16);
265 sum = (sum & 0xFFFF) + (sum >> 16);
275 Send tcp segment from the specified IP/port to the specified
278 This is used to trigger the receiving host into sending its own ACK,
279 which should trigger early detection of TCP reset by the client
282 This can also be used to send RST segments (if rst is true) and also
283 if correct seq and ack numbers are provided.
285 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
286 const ctdb_sock_addr *src,
287 uint32_t seq, uint32_t ack, int rst)
293 ctdb_sock_addr *tmpdest;
303 switch (src->ip.sin_family) {
306 ip4pkt.ip.version = 4;
307 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
308 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
310 ip4pkt.ip.protocol = IPPROTO_TCP;
311 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
312 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
315 ip4pkt.tcp.source = src->ip.sin_port;
316 ip4pkt.tcp.dest = dest->ip.sin_port;
317 ip4pkt.tcp.seq = seq;
318 ip4pkt.tcp.ack_seq = ack;
323 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
324 /* this makes it easier to spot in a sniffer */
325 ip4pkt.tcp.window = htons(1234);
326 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
328 /* open a raw socket to send this segment from */
329 s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
331 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
336 ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
338 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
345 set_close_on_exec(s);
347 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, &dest->ip, sizeof(dest->ip));
349 if (ret != sizeof(ip4pkt)) {
350 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
356 ip6pkt.ip6.ip6_vfc = 0x60;
357 ip6pkt.ip6.ip6_plen = htons(20);
358 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
359 ip6pkt.ip6.ip6_hlim = 64;
360 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
361 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
363 ip6pkt.tcp.source = src->ip6.sin6_port;
364 ip6pkt.tcp.dest = dest->ip6.sin6_port;
365 ip6pkt.tcp.seq = seq;
366 ip6pkt.tcp.ack_seq = ack;
371 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
372 /* this makes it easier to spot in a sniffer */
373 ip6pkt.tcp.window = htons(1234);
374 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
376 s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
378 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
382 /* sendto() dont like if the port is set and the socket is
385 tmpdest = discard_const(dest);
386 tmpport = tmpdest->ip6.sin6_port;
388 tmpdest->ip6.sin6_port = 0;
389 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, &dest->ip6, sizeof(dest->ip6));
390 tmpdest->ip6.sin6_port = tmpport;
393 if (ret != sizeof(ip6pkt)) {
394 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
400 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
409 see if we currently have an interface with the given IP
411 we try to bind to it, and if that fails then we don't have that IP
414 ifname, if non-NULL, will return the name of the interface this ip is tied to
416 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
420 ctdb_sock_addr __addr = *_addr;
421 ctdb_sock_addr *addr = &__addr;
423 switch (addr->sa.sa_family) {
425 addr->ip.sin_port = 0;
428 addr->ip6.sin6_port = 0;
431 s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
435 ret = bind(s, (struct sockaddr *)addr, sizeof(ctdb_sock_addr));
442 This function is used to open a raw socket to capture from
444 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
448 /* Open a socket to capture all traffic */
449 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
451 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
456 set_close_on_exec(s);
462 This function is used to do any additional cleanup required when closing
464 Note that the socket itself is closed automatically in the caller.
466 int ctdb_sys_close_capture_socket(void *private_data)
473 called when the raw socket becomes readable
475 int ctdb_sys_read_tcp_packet(int s, void *private_data,
476 ctdb_sock_addr *src, ctdb_sock_addr *dst,
477 uint32_t *ack_seq, uint32_t *seq)
480 #define RCVPKTSIZE 100
481 char pkt[RCVPKTSIZE];
482 struct ether_header *eth;
487 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
488 if (ret < sizeof(*eth)+sizeof(*ip)) {
493 eth = (struct ether_header *)pkt;
495 /* we want either IPv4 or IPv6 */
496 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
498 ip = (struct iphdr *)(eth+1);
500 /* We only want IPv4 packets */
501 if (ip->version != 4) {
504 /* Dont look at fragments */
505 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
508 /* we only want TCP */
509 if (ip->protocol != IPPROTO_TCP) {
513 /* make sure its not a short packet */
514 if (offsetof(struct tcphdr, ack_seq) + 4 +
515 (ip->ihl*4) + sizeof(*eth) > ret) {
519 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
521 /* tell the caller which one we've found */
522 src->ip.sin_family = AF_INET;
523 src->ip.sin_addr.s_addr = ip->saddr;
524 src->ip.sin_port = tcp->source;
525 dst->ip.sin_family = AF_INET;
526 dst->ip.sin_addr.s_addr = ip->daddr;
527 dst->ip.sin_port = tcp->dest;
528 *ack_seq = tcp->ack_seq;
532 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
534 ip6 = (struct ip6_hdr *)(eth+1);
536 /* we only want TCP */
537 if (ip6->ip6_nxt != IPPROTO_TCP) {
542 tcp = (struct tcphdr *)(ip6+1);
544 /* tell the caller which one we've found */
545 src->ip6.sin6_family = AF_INET6;
546 src->ip6.sin6_port = tcp->source;
547 src->ip6.sin6_addr = ip6->ip6_src;
549 dst->ip6.sin6_family = AF_INET6;
550 dst->ip6.sin6_port = tcp->dest;
551 dst->ip6.sin6_addr = ip6->ip6_dst;
553 *ack_seq = tcp->ack_seq;