1 /* Copyright (C) 2017 Cavium, Inc.
3 * This program is free software; you can redistribute it and/or modify it
4 * under the terms of version 2 of the GNU General Public License
5 * as published by the Free Software Foundation.
8 #include <linux/netlink.h>
9 #include <linux/rtnetlink.h>
16 #include <sys/socket.h>
19 #include <arpa/inet.h>
24 #include <sys/ioctl.h>
25 #include <sys/syscall.h>
27 #include "bpf/libbpf.h"
28 #include <sys/resource.h>
31 int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
32 static int total_ifindex;
33 static int *ifindex_list;
34 static __u32 *prog_id_list;
36 static int lpm_map_fd;
37 static int rxcnt_map_fd;
38 static int arp_table_map_fd;
39 static int exact_match_map_fd;
40 static int tx_port_map_fd;
42 static int get_route_table(int rtm_family);
43 static void int_exit(int sig)
48 for (i = 0; i < total_ifindex; i++) {
49 if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
50 printf("bpf_get_link_xdp_id on iface %d failed\n",
54 if (prog_id_list[i] == prog_id)
55 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
57 printf("couldn't find a prog id on iface %d\n",
60 printf("program on iface %d changed, not removing\n",
67 static void close_and_exit(int sig)
75 /* Get the mac address of the interface given interface name */
76 static __be64 getmac(char *iface)
82 fd = socket(AF_INET, SOCK_DGRAM, 0);
83 ifr.ifr_addr.sa_family = AF_INET;
84 strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
85 if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
86 printf("ioctl failed leaving....\n");
89 for (i = 0; i < 6 ; i++)
90 *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
95 static int recv_msg(struct sockaddr_nl sock_addr, int sock)
103 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
107 nh = (struct nlmsghdr *)buf_ptr;
109 if (nh->nlmsg_type == NLMSG_DONE)
113 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
116 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
122 /* Function to parse the route entry returned by netlink
123 * Updates the route entry related map entries
125 static void read_route(struct nlmsghdr *nh, int nll)
127 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
128 struct bpf_lpm_trie_key *prefix_key;
129 struct rtattr *rt_attr;
130 struct rtmsg *rt_msg;
135 int dst_len, iface, metric;
146 struct arp_table arp;
151 if (nh->nlmsg_type == RTM_DELROUTE)
152 printf("DELETING Route entry\n");
153 else if (nh->nlmsg_type == RTM_GETROUTE)
154 printf("READING Route entry\n");
155 else if (nh->nlmsg_type == RTM_NEWROUTE)
156 printf("NEW Route entry\n");
158 printf("%d\n", nh->nlmsg_type);
160 memset(&route, 0, sizeof(route));
161 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
162 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
163 rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
164 rtm_family = rt_msg->rtm_family;
165 if (rtm_family == AF_INET)
166 if (rt_msg->rtm_table != RT_TABLE_MAIN)
168 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
169 rtl = RTM_PAYLOAD(nh);
171 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
172 switch (rt_attr->rta_type) {
175 (*((__be32 *)RTA_DATA(rt_attr))));
179 *((__be32 *)RTA_DATA(rt_attr)));
183 *((int *)RTA_DATA(rt_attr)));
186 sprintf(metrics, "%u",
187 *((int *)RTA_DATA(rt_attr)));
192 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
193 route.dst = atoi(dsts);
194 route.dst_len = atoi(dsts_len);
195 route.gw = atoi(gws);
196 route.iface = atoi(ifs);
197 route.metric = atoi(metrics);
198 route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
199 route.iface_name = if_indextoname(route.iface, route.iface_name);
200 route.mac = getmac(route.iface_name);
203 assert(bpf_map_update_elem(tx_port_map_fd,
204 &route.iface, &route.iface, 0) == 0);
205 if (rtm_family == AF_INET) {
214 prefix_key = alloca(sizeof(*prefix_key) + 3);
215 prefix_value = alloca(sizeof(*prefix_value));
217 prefix_key->prefixlen = 32;
218 prefix_key->prefixlen = route.dst_len;
219 direct_entry.mac = route.mac & 0xffffffffffff;
220 direct_entry.ifindex = route.iface;
221 direct_entry.arp.mac = 0;
222 direct_entry.arp.dst = 0;
223 if (route.dst_len == 32) {
224 if (nh->nlmsg_type == RTM_DELROUTE) {
225 assert(bpf_map_delete_elem(exact_match_map_fd,
228 if (bpf_map_lookup_elem(arp_table_map_fd,
230 &direct_entry.arp.mac) == 0)
231 direct_entry.arp.dst = route.dst;
232 assert(bpf_map_update_elem(exact_match_map_fd,
234 &direct_entry, 0) == 0);
237 for (i = 0; i < 4; i++)
238 prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
240 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
241 (int)prefix_key->data[0],
242 (int)prefix_key->data[1],
243 (int)prefix_key->data[2],
244 (int)prefix_key->data[3],
245 route.gw, route.dst_len,
248 if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
250 for (i = 0; i < 4; i++)
251 prefix_value->prefix[i] = prefix_key->data[i];
252 prefix_value->value = route.mac & 0xffffffffffff;
253 prefix_value->ifindex = route.iface;
254 prefix_value->gw = route.gw;
255 prefix_value->metric = route.metric;
257 assert(bpf_map_update_elem(lpm_map_fd,
262 if (nh->nlmsg_type == RTM_DELROUTE) {
263 printf("deleting entry\n");
264 printf("prefix key=%d.%d.%d.%d/%d",
269 prefix_key->prefixlen);
270 assert(bpf_map_delete_elem(lpm_map_fd,
273 /* Rereading the route table to check if
274 * there is an entry with the same
275 * prefix but a different metric as the
278 get_route_table(AF_INET);
279 } else if (prefix_key->data[0] ==
280 prefix_value->prefix[0] &&
281 prefix_key->data[1] ==
282 prefix_value->prefix[1] &&
283 prefix_key->data[2] ==
284 prefix_value->prefix[2] &&
285 prefix_key->data[3] ==
286 prefix_value->prefix[3] &&
287 route.metric >= prefix_value->metric) {
290 for (i = 0; i < 4; i++)
291 prefix_value->prefix[i] =
293 prefix_value->value =
294 route.mac & 0xffffffffffff;
295 prefix_value->ifindex = route.iface;
296 prefix_value->gw = route.gw;
297 prefix_value->metric = route.metric;
298 assert(bpf_map_update_elem(lpm_map_fd,
305 memset(&route, 0, sizeof(route));
306 memset(dsts, 0, sizeof(dsts));
307 memset(dsts_len, 0, sizeof(dsts_len));
308 memset(gws, 0, sizeof(gws));
309 memset(ifs, 0, sizeof(ifs));
310 memset(&route, 0, sizeof(route));
314 /* Function to read the existing route table when the process is launched*/
315 static int get_route_table(int rtm_family)
317 struct sockaddr_nl sa;
331 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
333 printf("open netlink socket: %s\n", strerror(errno));
336 memset(&sa, 0, sizeof(sa));
337 sa.nl_family = AF_NETLINK;
338 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
339 printf("bind to netlink: %s\n", strerror(errno));
343 memset(&req, 0, sizeof(req));
344 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
345 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
346 req.nl.nlmsg_type = RTM_GETROUTE;
348 req.rt.rtm_family = rtm_family;
349 req.rt.rtm_table = RT_TABLE_MAIN;
350 req.nl.nlmsg_pid = 0;
351 req.nl.nlmsg_seq = ++seq;
352 memset(&msg, 0, sizeof(msg));
353 iov.iov_base = (void *)&req.nl;
354 iov.iov_len = req.nl.nlmsg_len;
357 ret = sendmsg(sock, &msg, 0);
359 printf("send to netlink: %s\n", strerror(errno));
363 memset(buf, 0, sizeof(buf));
364 nll = recv_msg(sa, sock);
366 printf("recv from netlink: %s\n", strerror(nll));
370 nh = (struct nlmsghdr *)buf;
377 /* Function to parse the arp entry returned by netlink
378 * Updates the arp entry related map entries
380 static void read_arp(struct nlmsghdr *nh, int nll)
382 struct rtattr *rt_attr;
383 char dsts[24], mac[24];
384 struct ndmsg *rt_msg;
392 struct arp_table arp;
397 if (nh->nlmsg_type == RTM_GETNEIGH)
398 printf("READING arp entry\n");
399 printf("Address\tHwAddress\n");
400 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
401 rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
402 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
403 ndm_family = rt_msg->ndm_family;
404 rtl = RTM_PAYLOAD(nh);
405 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
406 switch (rt_attr->rta_type) {
409 *((__be32 *)RTA_DATA(rt_attr)));
413 *((__be64 *)RTA_DATA(rt_attr)));
419 arp_entry.dst = atoi(dsts);
420 arp_entry.mac = atol(mac);
421 printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
422 if (ndm_family == AF_INET) {
423 if (bpf_map_lookup_elem(exact_match_map_fd,
425 &direct_entry) == 0) {
426 if (nh->nlmsg_type == RTM_DELNEIGH) {
427 direct_entry.arp.dst = 0;
428 direct_entry.arp.mac = 0;
429 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
430 direct_entry.arp.dst = arp_entry.dst;
431 direct_entry.arp.mac = arp_entry.mac;
433 assert(bpf_map_update_elem(exact_match_map_fd,
437 memset(&direct_entry, 0, sizeof(direct_entry));
439 if (nh->nlmsg_type == RTM_DELNEIGH) {
440 assert(bpf_map_delete_elem(arp_table_map_fd,
441 &arp_entry.dst) == 0);
442 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
443 assert(bpf_map_update_elem(arp_table_map_fd,
449 memset(&arp_entry, 0, sizeof(arp_entry));
450 memset(dsts, 0, sizeof(dsts));
454 /* Function to read the existing arp table when the process is launched*/
455 static int get_arp_table(int rtm_family)
457 struct sockaddr_nl sa;
470 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
472 printf("open netlink socket: %s\n", strerror(errno));
475 memset(&sa, 0, sizeof(sa));
476 sa.nl_family = AF_NETLINK;
477 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
478 printf("bind to netlink: %s\n", strerror(errno));
482 memset(&req, 0, sizeof(req));
483 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
484 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
485 req.nl.nlmsg_type = RTM_GETNEIGH;
486 req.rt.ndm_state = NUD_REACHABLE;
487 req.rt.ndm_family = rtm_family;
488 req.nl.nlmsg_pid = 0;
489 req.nl.nlmsg_seq = ++seq;
490 memset(&msg, 0, sizeof(msg));
491 iov.iov_base = (void *)&req.nl;
492 iov.iov_len = req.nl.nlmsg_len;
495 ret = sendmsg(sock, &msg, 0);
497 printf("send to netlink: %s\n", strerror(errno));
501 memset(buf, 0, sizeof(buf));
502 nll = recv_msg(sa, sock);
504 printf("recv from netlink: %s\n", strerror(nll));
508 nh = (struct nlmsghdr *)buf;
515 /* Function to keep track and update changes in route and arp table
516 * Give regular statistics of packets forwarded
518 static int monitor_route(void)
520 unsigned int nr_cpus = bpf_num_possible_cpus();
521 const unsigned int nr_keys = 256;
522 struct pollfd fds_route, fds_arp;
523 __u64 prev[nr_keys][nr_cpus];
524 struct sockaddr_nl la, lr;
525 __u64 values[nr_cpus];
532 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
534 printf("open netlink socket: %s\n", strerror(errno));
538 fcntl(sock, F_SETFL, O_NONBLOCK);
539 memset(&lr, 0, sizeof(lr));
540 lr.nl_family = AF_NETLINK;
541 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
542 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
543 printf("bind to netlink: %s\n", strerror(errno));
548 fds_route.events = POLL_IN;
550 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
552 printf("open netlink socket: %s\n", strerror(errno));
556 fcntl(sock_arp, F_SETFL, O_NONBLOCK);
557 memset(&la, 0, sizeof(la));
558 la.nl_family = AF_NETLINK;
559 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
560 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
561 printf("bind to netlink: %s\n", strerror(errno));
565 fds_arp.fd = sock_arp;
566 fds_arp.events = POLL_IN;
568 memset(prev, 0, sizeof(prev));
570 signal(SIGINT, close_and_exit);
571 signal(SIGTERM, close_and_exit);
574 for (key = 0; key < nr_keys; key++) {
577 assert(bpf_map_lookup_elem(rxcnt_map_fd,
579 for (i = 0; i < nr_cpus; i++)
580 sum += (values[i] - prev[key][i]);
582 printf("proto %u: %10llu pkt/s\n",
583 key, sum / interval);
584 memcpy(prev[key], values, sizeof(values));
587 memset(buf, 0, sizeof(buf));
588 if (poll(&fds_route, 1, 3) == POLL_IN) {
589 nll = recv_msg(lr, sock);
591 printf("recv from netlink: %s\n", strerror(nll));
596 nh = (struct nlmsghdr *)buf;
597 printf("Routing table updated.\n");
600 memset(buf, 0, sizeof(buf));
601 if (poll(&fds_arp, 1, 3) == POLL_IN) {
602 nll = recv_msg(la, sock_arp);
604 printf("recv from netlink: %s\n", strerror(nll));
609 nh = (struct nlmsghdr *)buf;
619 static void usage(const char *prog)
622 "%s: %s [OPTS] interface name list\n\n"
625 " -F force loading prog\n",
629 int main(int ac, char **argv)
631 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
632 struct bpf_prog_load_attr prog_load_attr = {
633 .prog_type = BPF_PROG_TYPE_XDP,
635 struct bpf_prog_info info = {};
636 __u32 info_len = sizeof(info);
637 const char *optstr = "SF";
638 struct bpf_object *obj;
644 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
645 prog_load_attr.file = filename;
647 total_ifindex = ac - 1;
648 ifname_list = (argv + 1);
650 while ((opt = getopt(ac, argv, optstr)) != -1) {
653 flags |= XDP_FLAGS_SKB_MODE;
658 flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
663 usage(basename(argv[0]));
669 usage(basename(argv[0]));
673 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
674 perror("setrlimit(RLIMIT_MEMLOCK)");
678 if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
681 printf("\n**************loading bpf file*********************\n\n\n");
683 printf("bpf_prog_load_xattr: %s\n", strerror(errno));
687 lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
688 rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
689 arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table");
690 exact_match_map_fd = bpf_object__find_map_fd_by_name(obj,
692 tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
693 if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 ||
694 exact_match_map_fd < 0 || tx_port_map_fd < 0) {
695 printf("bpf_object__find_map_fd_by_name failed\n");
699 ifindex_list = (int *)calloc(total_ifindex, sizeof(int *));
700 for (i = 0; i < total_ifindex; i++) {
701 ifindex_list[i] = if_nametoindex(ifname_list[i]);
702 if (!ifindex_list[i]) {
703 printf("Couldn't translate interface name: %s",
708 prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
709 for (i = 0; i < total_ifindex; i++) {
710 if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
711 printf("link set xdp fd failed\n");
712 int recovery_index = i;
714 for (i = 0; i < recovery_index; i++)
715 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
719 err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
721 printf("can't get prog info - %s\n", strerror(errno));
724 prog_id_list[i] = info.id;
725 memset(&info, 0, sizeof(info));
726 printf("Attached to %d\n", ifindex_list[i]);
728 signal(SIGINT, int_exit);
729 signal(SIGTERM, int_exit);
731 printf("*******************ROUTE TABLE*************************\n\n\n");
732 get_route_table(AF_INET);
733 printf("*******************ARP TABLE***************************\n\n\n");
734 get_arp_table(AF_INET);
735 if (monitor_route() < 0) {
736 printf("Error in receiving route update");