2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Marc Dequènes (Duck) 2009
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 This file is a copy of 'common/system_linux.c' adapted for Hurd^W kFreeBSD
23 needs, and inspired by 'common/system_aix.c' for the pcap usage.
27 #include "system/network.h"
28 #include "system/filesys.h"
29 #include "system/wait.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/blocking.h"
34 #include "protocol/protocol.h"
36 #include <net/ethernet.h>
37 #include <netinet/ip6.h>
38 #include <net/if_arp.h>
41 #include "common/logging.h"
42 #include "common/system.h"
45 #define ETHERTYPE_IP6 0x86dd
49 calculate the tcp checksum for tcp over ipv6
51 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
57 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
58 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
61 phdr[1] = htonl(ip6->ip6_nxt);
62 sum += uint16_checksum((uint16_t *)phdr, 8);
64 sum += uint16_checksum(data, n);
66 sum = (sum & 0xFFFF) + (sum >> 16);
67 sum = (sum & 0xFFFF) + (sum >> 16);
77 send gratuitous arp reply after we have taken over an ip address
79 saddr is the address we are trying to claim
80 iface is the interface name we will be using to claim the address
82 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
84 /* FIXME kFreeBSD: We don't do gratuitous arp yet */
90 simple TCP checksum - assumes data is multiple of 2 bytes long
92 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
94 uint32_t sum = uint16_checksum(data, n);
96 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
98 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
100 sum += ip->protocol + n;
101 sum = (sum & 0xFFFF) + (sum >> 16);
102 sum = (sum & 0xFFFF) + (sum >> 16);
112 Send tcp segment from the specified IP/port to the specified
115 This is used to trigger the receiving host into sending its own ACK,
116 which should trigger early detection of TCP reset by the client
119 This can also be used to send RST segments (if rst is true) and also
120 if correct seq and ack numbers are provided.
122 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
123 const ctdb_sock_addr *src,
124 uint32_t seq, uint32_t ack, int rst)
130 ctdb_sock_addr *tmpdest;
140 switch (src->ip.sin_family) {
143 ip4pkt.ip.version = 4;
144 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
145 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
147 ip4pkt.ip.protocol = IPPROTO_TCP;
148 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
149 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
152 ip4pkt.tcp.source = src->ip.sin_port;
153 ip4pkt.tcp.dest = dest->ip.sin_port;
154 ip4pkt.tcp.seq = seq;
155 ip4pkt.tcp.ack_seq = ack;
160 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
161 /* this makes it easier to spot in a sniffer */
162 ip4pkt.tcp.window = htons(1234);
163 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
165 /* open a raw socket to send this segment from */
166 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
168 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
173 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
175 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
181 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, &dest->ip, sizeof(dest->ip));
183 if (ret != sizeof(ip4pkt)) {
184 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
190 ip6pkt.ip6.ip6_vfc = 0x60;
191 ip6pkt.ip6.ip6_plen = htons(20);
192 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
193 ip6pkt.ip6.ip6_hlim = 64;
194 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
195 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
197 ip6pkt.tcp.source = src->ip6.sin6_port;
198 ip6pkt.tcp.dest = dest->ip6.sin6_port;
199 ip6pkt.tcp.seq = seq;
200 ip6pkt.tcp.ack_seq = ack;
205 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
206 /* this makes it easier to spot in a sniffer */
207 ip6pkt.tcp.window = htons(1234);
208 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
210 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
212 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
216 /* sendto() don't like if the port is set and the socket is
219 tmpdest = discard_const(dest);
220 tmpport = tmpdest->ip6.sin6_port;
222 tmpdest->ip6.sin6_port = 0;
223 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, &dest->ip6, sizeof(dest->ip6));
224 tmpdest->ip6.sin6_port = tmpport;
227 if (ret != sizeof(ip6pkt)) {
228 DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
234 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
242 This function is used to open a raw socket to capture from
244 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
248 pt=pcap_open_live(iface, 100, 0, 0, NULL);
250 DEBUG(DEBUG_CRIT,("Failed to open capture device %s\n", iface));
253 *((pcap_t **)private_data) = pt;
255 return pcap_fileno(pt);
258 /* This function is used to close the capture socket
260 int ctdb_sys_close_capture_socket(void *private_data)
262 pcap_t *pt = (pcap_t *)private_data;
269 called when the raw socket becomes readable
271 int ctdb_sys_read_tcp_packet(int s, void *private_data,
272 ctdb_sock_addr *src, ctdb_sock_addr *dst,
273 uint32_t *ack_seq, uint32_t *seq,
274 int *rst, uint16_t *window)
277 #define RCVPKTSIZE 100
278 char pkt[RCVPKTSIZE];
279 struct ether_header *eth;
284 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
285 if (ret < sizeof(*eth)+sizeof(*ip)) {
290 eth = (struct ether_header *)pkt;
292 /* we want either IPv4 or IPv6 */
293 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
295 ip = (struct iphdr *)(eth+1);
297 /* We only want IPv4 packets */
298 if (ip->version != 4) {
301 /* Dont look at fragments */
302 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
305 /* we only want TCP */
306 if (ip->protocol != IPPROTO_TCP) {
310 /* make sure its not a short packet */
311 if (offsetof(struct tcphdr, ack_seq) + 4 +
312 (ip->ihl*4) + sizeof(*eth) > ret) {
316 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
318 /* tell the caller which one we've found */
319 src->ip.sin_family = AF_INET;
320 src->ip.sin_addr.s_addr = ip->saddr;
321 src->ip.sin_port = tcp->source;
322 dst->ip.sin_family = AF_INET;
323 dst->ip.sin_addr.s_addr = ip->daddr;
324 dst->ip.sin_port = tcp->dest;
325 *ack_seq = tcp->ack_seq;
327 if (window != NULL) {
328 *window = tcp->window;
335 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
337 ip6 = (struct ip6_hdr *)(eth+1);
339 /* we only want TCP */
340 if (ip6->ip6_nxt != IPPROTO_TCP) {
345 tcp = (struct tcphdr *)(ip6+1);
347 /* tell the caller which one we've found */
348 src->ip6.sin6_family = AF_INET6;
349 src->ip6.sin6_port = tcp->source;
350 src->ip6.sin6_addr = ip6->ip6_src;
352 dst->ip6.sin6_family = AF_INET6;
353 dst->ip6.sin6_port = tcp->dest;
354 dst->ip6.sin6_addr = ip6->ip6_dst;
356 *ack_seq = tcp->ack_seq;
358 if (window != NULL) {
359 *window = tcp->window;
371 bool ctdb_sys_check_iface_exists(const char *iface)
373 /* FIXME kFreeBSD: Interface always considered present */
377 int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
379 /* FIXME kFreeBSD: get_peer_pid not implemented */