2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Marc Dequènes (Duck) 2009
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 This file is a copy of 'common/system_linux.c' adapted for Hurd needs,
23 and inspired by 'common/system_aix.c' for the pcap usage.
27 #include "system/network.h"
28 #include "system/filesys.h"
29 #include "system/wait.h"
30 #include "../include/ctdb_private.h"
31 #include "lib/tevent/tevent.h"
32 #include <net/ethernet.h>
33 #include <netinet/ip6.h>
34 #include <net/if_arp.h>
39 #define ETHERTYPE_IP6 0x86dd
43 calculate the tcp checksum for tcp over ipv6
45 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
51 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
52 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
55 phdr[1] = htonl(ip6->ip6_nxt);
56 sum += uint16_checksum((uint16_t *)phdr, 8);
58 sum += uint16_checksum(data, n);
60 sum = (sum & 0xFFFF) + (sum >> 16);
61 sum = (sum & 0xFFFF) + (sum >> 16);
71 send gratuitous arp reply after we have taken over an ip address
73 saddr is the address we are trying to claim
74 iface is the interface name we will be using to claim the address
76 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
78 /* FIXME We dont do gratuitous arp on Hurd yet */
84 simple TCP checksum - assumes data is multiple of 2 bytes long
86 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
88 uint32_t sum = uint16_checksum(data, n);
90 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
92 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
94 sum += ip->protocol + n;
95 sum = (sum & 0xFFFF) + (sum >> 16);
96 sum = (sum & 0xFFFF) + (sum >> 16);
106 Send tcp segment from the specified IP/port to the specified
109 This is used to trigger the receiving host into sending its own ACK,
110 which should trigger early detection of TCP reset by the client
113 This can also be used to send RST segments (if rst is true) and also
114 if correct seq and ack numbers are provided.
116 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
117 const ctdb_sock_addr *src,
118 uint32_t seq, uint32_t ack, int rst)
124 ctdb_sock_addr *tmpdest;
134 switch (src->ip.sin_family) {
137 ip4pkt.ip.version = 4;
138 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
139 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
141 ip4pkt.ip.protocol = IPPROTO_TCP;
142 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
143 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
146 ip4pkt.tcp.source = src->ip.sin_port;
147 ip4pkt.tcp.dest = dest->ip.sin_port;
148 ip4pkt.tcp.seq = seq;
149 ip4pkt.tcp.ack_seq = ack;
154 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
155 /* this makes it easier to spot in a sniffer */
156 ip4pkt.tcp.window = htons(1234);
157 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
159 /* open a raw socket to send this segment from */
160 s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
162 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
167 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
169 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
176 set_close_on_exec(s);
178 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, &dest->ip, sizeof(dest->ip));
180 if (ret != sizeof(ip4pkt)) {
181 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
187 ip6pkt.ip6.ip6_vfc = 0x60;
188 ip6pkt.ip6.ip6_plen = htons(20);
189 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
190 ip6pkt.ip6.ip6_hlim = 64;
191 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
192 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
194 ip6pkt.tcp.source = src->ip6.sin6_port;
195 ip6pkt.tcp.dest = dest->ip6.sin6_port;
196 ip6pkt.tcp.seq = seq;
197 ip6pkt.tcp.ack_seq = ack;
202 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
203 /* this makes it easier to spot in a sniffer */
204 ip6pkt.tcp.window = htons(1234);
205 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
207 s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
209 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
213 /* sendto() dont like if the port is set and the socket is
216 tmpdest = discard_const(dest);
217 tmpport = tmpdest->ip6.sin6_port;
219 tmpdest->ip6.sin6_port = 0;
220 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, &dest->ip6, sizeof(dest->ip6));
221 tmpdest->ip6.sin6_port = tmpport;
224 if (ret != sizeof(ip6pkt)) {
225 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
231 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
239 This function is used to open a raw socket to capture from
241 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
245 pt=pcap_open_live(iface, 100, 0, 0, NULL);
247 DEBUG(DEBUG_CRIT,("Failed to open capture device %s\n", iface));
250 *((pcap_t **)private_data) = pt;
252 return pcap_fileno(pt);
255 /* This function is used to close the capture socket
257 int ctdb_sys_close_capture_socket(void *private_data)
259 pcap_t *pt = (pcap_t *)private_data;
266 called when the raw socket becomes readable
268 int ctdb_sys_read_tcp_packet(int s, void *private_data,
269 ctdb_sock_addr *src, ctdb_sock_addr *dst,
270 uint32_t *ack_seq, uint32_t *seq)
273 #define RCVPKTSIZE 100
274 char pkt[RCVPKTSIZE];
275 struct ether_header *eth;
280 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
281 if (ret < sizeof(*eth)+sizeof(*ip)) {
286 eth = (struct ether_header *)pkt;
288 /* we want either IPv4 or IPv6 */
289 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
291 ip = (struct iphdr *)(eth+1);
293 /* We only want IPv4 packets */
294 if (ip->version != 4) {
297 /* Dont look at fragments */
298 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
301 /* we only want TCP */
302 if (ip->protocol != IPPROTO_TCP) {
306 /* make sure its not a short packet */
307 if (offsetof(struct tcphdr, ack_seq) + 4 +
308 (ip->ihl*4) + sizeof(*eth) > ret) {
312 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
314 /* tell the caller which one we've found */
315 src->ip.sin_family = AF_INET;
316 src->ip.sin_addr.s_addr = ip->saddr;
317 src->ip.sin_port = tcp->source;
318 dst->ip.sin_family = AF_INET;
319 dst->ip.sin_addr.s_addr = ip->daddr;
320 dst->ip.sin_port = tcp->dest;
321 *ack_seq = tcp->ack_seq;
325 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
327 ip6 = (struct ip6_hdr *)(eth+1);
329 /* we only want TCP */
330 if (ip6->ip6_nxt != IPPROTO_TCP) {
335 tcp = (struct tcphdr *)(ip6+1);
337 /* tell the caller which one we've found */
338 src->ip6.sin6_family = AF_INET6;
339 src->ip6.sin6_port = tcp->source;
340 src->ip6.sin6_addr = ip6->ip6_src;
342 dst->ip6.sin6_family = AF_INET6;
343 dst->ip6.sin6_port = tcp->dest;
344 dst->ip6.sin6_addr = ip6->ip6_dst;
346 *ack_seq = tcp->ack_seq;
355 bool ctdb_sys_check_iface_exists(const char *iface)
360 int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
362 /* FIXME not implemented */