2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include <netinet/if_ether.h>
28 #include <netinet/ip6.h>
29 #include <net/if_arp.h>
34 send gratuitous arp reply after we have taken over an ip address
36 saddr is the address we are trying to claim
37 iface is the interface name we will be using to claim the address
39 int ctdb_sys_send_arp(const struct sockaddr_in *saddr, const char *iface)
43 struct ether_header *eh;
45 struct ifreq if_hwaddr;
46 unsigned char buffer[64]; /*minimum eth frame size */
51 /* for now, we only handle AF_INET addresses */
52 if (saddr->sin_family != AF_INET) {
53 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4 address (family is %u)\n", saddr->sin_family));
57 s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_ARP));
59 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
63 /* get the mac address */
64 strcpy(if_hwaddr.ifr_name, iface);
65 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
68 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
71 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
72 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
76 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
79 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
80 if_hwaddr.ifr_hwaddr.sa_family));
85 memset(buffer, 0 , 64);
86 eh = (struct ether_header *)buffer;
87 memset(eh->ether_dhost, 0xff, ETH_ALEN);
88 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
89 eh->ether_type = htons(ETHERTYPE_ARP);
91 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
92 ah->ar_hrd = htons(ARPHRD_ETHER);
93 ah->ar_pro = htons(ETH_P_IP);
94 ah->ar_hln = ETH_ALEN;
97 /* send a gratious arp */
98 ah->ar_op = htons(ARPOP_REQUEST);
100 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
102 memcpy(ptr, &saddr->sin_addr, 4);
104 memset(ptr, 0, ETH_ALEN);
106 memcpy(ptr, &saddr->sin_addr, 4);
109 strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
110 ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
113 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
117 /* send unsolicited arp reply broadcast */
118 ah->ar_op = htons(ARPOP_REPLY);
119 ptr = (char *)&ah[1];
120 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
122 memcpy(ptr, &saddr->sin_addr, 4);
124 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
126 memcpy(ptr, &saddr->sin_addr, 4);
129 strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
130 ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
132 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
142 uint16 checksum for n bytes
144 static uint32_t uint16_checksum(uint16_t *data, size_t n)
148 sum += (uint32_t)ntohs(*data);
153 sum += (uint32_t)ntohs(*(uint8_t *)data);
159 simple TCP checksum - assumes data is multiple of 2 bytes long
161 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
163 uint32_t sum = uint16_checksum(data, n);
165 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
167 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
169 sum += ip->protocol + n;
170 sum = (sum & 0xFFFF) + (sum >> 16);
171 sum = (sum & 0xFFFF) + (sum >> 16);
181 calculate the tcp checksum for tcp over ipv6
183 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
185 uint32_t sum = uint16_checksum(data, n);
188 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
189 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
190 sum += ip6->ip6_plen;
193 sum = (sum & 0xFFFF) + (sum >> 16);
194 sum = (sum & 0xFFFF) + (sum >> 16);
204 Send tcp segment from the specified IP/port to the specified
207 This is used to trigger the receiving host into sending its own ACK,
208 which should trigger early detection of TCP reset by the client
211 This can also be used to send RST segments (if rst is true) and also
212 if correct seq and ack numbers are provided.
214 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
215 const ctdb_sock_addr *src,
216 uint32_t seq, uint32_t ack, int rst)
222 ctdb_sock_addr *tmpdest;
232 switch (src->ip.sin_family) {
235 ip4pkt.ip.version = 4;
236 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
237 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
239 ip4pkt.ip.protocol = IPPROTO_TCP;
240 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
241 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
244 ip4pkt.tcp.source = src->ip.sin_port;
245 ip4pkt.tcp.dest = dest->ip.sin_port;
246 ip4pkt.tcp.seq = seq;
247 ip4pkt.tcp.ack_seq = ack;
252 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
253 /* this makes it easier to spot in a sniffer */
254 ip4pkt.tcp.window = htons(1234);
255 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
257 /* open a raw socket to send this segment from */
258 s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
260 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
265 ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
267 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
274 set_close_on_exec(s);
276 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, &dest->ip, sizeof(dest->ip));
278 if (ret != sizeof(ip4pkt)) {
279 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
285 ip6pkt.ip6.ip6_vfc = 0x60;
286 ip6pkt.ip6.ip6_plen = 20;
287 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
288 ip6pkt.ip6.ip6_hlim = 64;
289 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
290 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
292 ip6pkt.tcp.source = src->ip6.sin6_port;
293 ip6pkt.tcp.dest = dest->ip6.sin6_port;
294 ip6pkt.tcp.seq = seq;
295 ip6pkt.tcp.ack_seq = ack;
300 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
301 /* this makes it easier to spot in a sniffer */
302 ip6pkt.tcp.window = htons(1234);
303 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
305 s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
307 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
311 /* sendto() dont like if the port is set and the socket is
314 tmpdest = discard_const(dest);
315 tmpport = tmpdest->ip6.sin6_port;
317 tmpdest->ip6.sin6_port = 0;
318 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, &dest->ip6, sizeof(dest->ip6));
319 tmpdest->ip6.sin6_port = tmpport;
322 if (ret != sizeof(ip6pkt)) {
323 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
329 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
338 see if we currently have an interface with the given IP
340 we try to bind to it, and if that fails then we don't have that IP
343 ifname, if non-NULL, will return the name of the interface this ip is tied to
345 bool ctdb_sys_have_ip(struct sockaddr_in ip)
351 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
355 ret = bind(s, (struct sockaddr *)&ip, sizeof(ip));
362 This function is used to open a raw socket to capture from
364 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
368 /* Open a socket to capture all traffic */
369 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
371 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
376 set_close_on_exec(s);
382 This function is used to do any additional cleanup required when closing
384 Note that the socket itself is closed automatically in the caller.
386 int ctdb_sys_close_capture_socket(void *private_data)
393 called when the raw socket becomes readable
395 int ctdb_sys_read_tcp_packet(int s, void *private_data,
396 struct sockaddr_in *src, struct sockaddr_in *dst,
397 uint32_t *ack_seq, uint32_t *seq)
400 #define RCVPKTSIZE 100
401 char pkt[RCVPKTSIZE];
402 struct ether_header *eth;
406 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
407 if (ret < sizeof(*eth)+sizeof(*ip)) {
412 eth = (struct ether_header *)pkt;
414 /* We only want IP packets */
415 if (ntohs(eth->ether_type) != ETHERTYPE_IP) {
420 ip = (struct iphdr *)(eth+1);
422 /* We only want IPv4 packets */
423 if (ip->version != 4) {
426 /* Dont look at fragments */
427 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
430 /* we only want TCP */
431 if (ip->protocol != IPPROTO_TCP) {
435 /* make sure its not a short packet */
436 if (offsetof(struct tcphdr, ack_seq) + 4 +
437 (ip->ihl*4) + sizeof(*eth) > ret) {
442 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
444 /* tell the caller which one we've found */
445 src->sin_addr.s_addr = ip->saddr;
446 src->sin_port = tcp->source;
447 dst->sin_addr.s_addr = ip->daddr;
448 dst->sin_port = tcp->dest;
449 *ack_seq = tcp->ack_seq;