kill dentry_update_name_case()
[sfrench/cifs-2.6.git] / samples / bpf / xdpsock_user.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017 - 2018 Intel Corporation. */
3
4 #include <assert.h>
5 #include <errno.h>
6 #include <getopt.h>
7 #include <libgen.h>
8 #include <linux/bpf.h>
9 #include <linux/if_link.h>
10 #include <linux/if_xdp.h>
11 #include <linux/if_ether.h>
12 #include <net/if.h>
13 #include <signal.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <net/ethernet.h>
19 #include <sys/resource.h>
20 #include <sys/socket.h>
21 #include <sys/mman.h>
22 #include <time.h>
23 #include <unistd.h>
24 #include <pthread.h>
25 #include <locale.h>
26 #include <sys/types.h>
27 #include <poll.h>
28
29 #include "bpf_load.h"
30 #include "bpf_util.h"
31 #include <bpf/bpf.h>
32
33 #include "xdpsock.h"
34
35 #ifndef SOL_XDP
36 #define SOL_XDP 283
37 #endif
38
39 #ifndef AF_XDP
40 #define AF_XDP 44
41 #endif
42
43 #ifndef PF_XDP
44 #define PF_XDP AF_XDP
45 #endif
46
47 #define NUM_FRAMES 131072
48 #define FRAME_HEADROOM 0
49 #define FRAME_SHIFT 11
50 #define FRAME_SIZE 2048
51 #define NUM_DESCS 1024
52 #define BATCH_SIZE 16
53
54 #define FQ_NUM_DESCS 1024
55 #define CQ_NUM_DESCS 1024
56
57 #define DEBUG_HEXDUMP 0
58
59 typedef __u64 u64;
60 typedef __u32 u32;
61
62 static unsigned long prev_time;
63
64 enum benchmark_type {
65         BENCH_RXDROP = 0,
66         BENCH_TXONLY = 1,
67         BENCH_L2FWD = 2,
68 };
69
70 static enum benchmark_type opt_bench = BENCH_RXDROP;
71 static u32 opt_xdp_flags;
72 static const char *opt_if = "";
73 static int opt_ifindex;
74 static int opt_queue;
75 static int opt_poll;
76 static int opt_shared_packet_buffer;
77 static int opt_interval = 1;
78 static u32 opt_xdp_bind_flags;
79
80 struct xdp_umem_uqueue {
81         u32 cached_prod;
82         u32 cached_cons;
83         u32 mask;
84         u32 size;
85         u32 *producer;
86         u32 *consumer;
87         u64 *ring;
88         void *map;
89 };
90
91 struct xdp_umem {
92         char *frames;
93         struct xdp_umem_uqueue fq;
94         struct xdp_umem_uqueue cq;
95         int fd;
96 };
97
98 struct xdp_uqueue {
99         u32 cached_prod;
100         u32 cached_cons;
101         u32 mask;
102         u32 size;
103         u32 *producer;
104         u32 *consumer;
105         struct xdp_desc *ring;
106         void *map;
107 };
108
109 struct xdpsock {
110         struct xdp_uqueue rx;
111         struct xdp_uqueue tx;
112         int sfd;
113         struct xdp_umem *umem;
114         u32 outstanding_tx;
115         unsigned long rx_npkts;
116         unsigned long tx_npkts;
117         unsigned long prev_rx_npkts;
118         unsigned long prev_tx_npkts;
119 };
120
121 #define MAX_SOCKS 4
122 static int num_socks;
123 struct xdpsock *xsks[MAX_SOCKS];
124
125 static unsigned long get_nsecs(void)
126 {
127         struct timespec ts;
128
129         clock_gettime(CLOCK_MONOTONIC, &ts);
130         return ts.tv_sec * 1000000000UL + ts.tv_nsec;
131 }
132
133 static void dump_stats(void);
134
135 #define lassert(expr)                                                   \
136         do {                                                            \
137                 if (!(expr)) {                                          \
138                         fprintf(stderr, "%s:%s:%i: Assertion failed: "  \
139                                 #expr ": errno: %d/\"%s\"\n",           \
140                                 __FILE__, __func__, __LINE__,           \
141                                 errno, strerror(errno));                \
142                         dump_stats();                                   \
143                         exit(EXIT_FAILURE);                             \
144                 }                                                       \
145         } while (0)
146
147 #define barrier() __asm__ __volatile__("": : :"memory")
148 #define u_smp_rmb() barrier()
149 #define u_smp_wmb() barrier()
150 #define likely(x) __builtin_expect(!!(x), 1)
151 #define unlikely(x) __builtin_expect(!!(x), 0)
152
153 static const char pkt_data[] =
154         "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
155         "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
156         "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
157         "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
158
159 static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
160 {
161         u32 free_entries = q->cached_cons - q->cached_prod;
162
163         if (free_entries >= nb)
164                 return free_entries;
165
166         /* Refresh the local tail pointer */
167         q->cached_cons = *q->consumer + q->size;
168
169         return q->cached_cons - q->cached_prod;
170 }
171
172 static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
173 {
174         u32 free_entries = q->cached_cons - q->cached_prod;
175
176         if (free_entries >= ndescs)
177                 return free_entries;
178
179         /* Refresh the local tail pointer */
180         q->cached_cons = *q->consumer + q->size;
181         return q->cached_cons - q->cached_prod;
182 }
183
184 static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
185 {
186         u32 entries = q->cached_prod - q->cached_cons;
187
188         if (entries == 0) {
189                 q->cached_prod = *q->producer;
190                 entries = q->cached_prod - q->cached_cons;
191         }
192
193         return (entries > nb) ? nb : entries;
194 }
195
196 static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
197 {
198         u32 entries = q->cached_prod - q->cached_cons;
199
200         if (entries == 0) {
201                 q->cached_prod = *q->producer;
202                 entries = q->cached_prod - q->cached_cons;
203         }
204
205         return (entries > ndescs) ? ndescs : entries;
206 }
207
208 static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
209                                          struct xdp_desc *d,
210                                          size_t nb)
211 {
212         u32 i;
213
214         if (umem_nb_free(fq, nb) < nb)
215                 return -ENOSPC;
216
217         for (i = 0; i < nb; i++) {
218                 u32 idx = fq->cached_prod++ & fq->mask;
219
220                 fq->ring[idx] = d[i].addr;
221         }
222
223         u_smp_wmb();
224
225         *fq->producer = fq->cached_prod;
226
227         return 0;
228 }
229
230 static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d,
231                                       size_t nb)
232 {
233         u32 i;
234
235         if (umem_nb_free(fq, nb) < nb)
236                 return -ENOSPC;
237
238         for (i = 0; i < nb; i++) {
239                 u32 idx = fq->cached_prod++ & fq->mask;
240
241                 fq->ring[idx] = d[i];
242         }
243
244         u_smp_wmb();
245
246         *fq->producer = fq->cached_prod;
247
248         return 0;
249 }
250
251 static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
252                                                u64 *d, size_t nb)
253 {
254         u32 idx, i, entries = umem_nb_avail(cq, nb);
255
256         u_smp_rmb();
257
258         for (i = 0; i < entries; i++) {
259                 idx = cq->cached_cons++ & cq->mask;
260                 d[i] = cq->ring[idx];
261         }
262
263         if (entries > 0) {
264                 u_smp_wmb();
265
266                 *cq->consumer = cq->cached_cons;
267         }
268
269         return entries;
270 }
271
272 static inline void *xq_get_data(struct xdpsock *xsk, u64 addr)
273 {
274         return &xsk->umem->frames[addr];
275 }
276
277 static inline int xq_enq(struct xdp_uqueue *uq,
278                          const struct xdp_desc *descs,
279                          unsigned int ndescs)
280 {
281         struct xdp_desc *r = uq->ring;
282         unsigned int i;
283
284         if (xq_nb_free(uq, ndescs) < ndescs)
285                 return -ENOSPC;
286
287         for (i = 0; i < ndescs; i++) {
288                 u32 idx = uq->cached_prod++ & uq->mask;
289
290                 r[idx].addr = descs[i].addr;
291                 r[idx].len = descs[i].len;
292         }
293
294         u_smp_wmb();
295
296         *uq->producer = uq->cached_prod;
297         return 0;
298 }
299
300 static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
301                                  unsigned int id, unsigned int ndescs)
302 {
303         struct xdp_desc *r = uq->ring;
304         unsigned int i;
305
306         if (xq_nb_free(uq, ndescs) < ndescs)
307                 return -ENOSPC;
308
309         for (i = 0; i < ndescs; i++) {
310                 u32 idx = uq->cached_prod++ & uq->mask;
311
312                 r[idx].addr     = (id + i) << FRAME_SHIFT;
313                 r[idx].len      = sizeof(pkt_data) - 1;
314         }
315
316         u_smp_wmb();
317
318         *uq->producer = uq->cached_prod;
319         return 0;
320 }
321
322 static inline int xq_deq(struct xdp_uqueue *uq,
323                          struct xdp_desc *descs,
324                          int ndescs)
325 {
326         struct xdp_desc *r = uq->ring;
327         unsigned int idx;
328         int i, entries;
329
330         entries = xq_nb_avail(uq, ndescs);
331
332         u_smp_rmb();
333
334         for (i = 0; i < entries; i++) {
335                 idx = uq->cached_cons++ & uq->mask;
336                 descs[i] = r[idx];
337         }
338
339         if (entries > 0) {
340                 u_smp_wmb();
341
342                 *uq->consumer = uq->cached_cons;
343         }
344
345         return entries;
346 }
347
348 static void swap_mac_addresses(void *data)
349 {
350         struct ether_header *eth = (struct ether_header *)data;
351         struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
352         struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
353         struct ether_addr tmp;
354
355         tmp = *src_addr;
356         *src_addr = *dst_addr;
357         *dst_addr = tmp;
358 }
359
360 static void hex_dump(void *pkt, size_t length, u64 addr)
361 {
362         const unsigned char *address = (unsigned char *)pkt;
363         const unsigned char *line = address;
364         size_t line_size = 32;
365         unsigned char c;
366         char buf[32];
367         int i = 0;
368
369         if (!DEBUG_HEXDUMP)
370                 return;
371
372         sprintf(buf, "addr=%llu", addr);
373         printf("length = %zu\n", length);
374         printf("%s | ", buf);
375         while (length-- > 0) {
376                 printf("%02X ", *address++);
377                 if (!(++i % line_size) || (length == 0 && i % line_size)) {
378                         if (length == 0) {
379                                 while (i++ % line_size)
380                                         printf("__ ");
381                         }
382                         printf(" | ");  /* right close */
383                         while (line < address) {
384                                 c = *line++;
385                                 printf("%c", (c < 33 || c == 255) ? 0x2E : c);
386                         }
387                         printf("\n");
388                         if (length > 0)
389                                 printf("%s | ", buf);
390                 }
391         }
392         printf("\n");
393 }
394
395 static size_t gen_eth_frame(char *frame)
396 {
397         memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
398         return sizeof(pkt_data) - 1;
399 }
400
401 static struct xdp_umem *xdp_umem_configure(int sfd)
402 {
403         int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
404         struct xdp_mmap_offsets off;
405         struct xdp_umem_reg mr;
406         struct xdp_umem *umem;
407         socklen_t optlen;
408         void *bufs;
409
410         umem = calloc(1, sizeof(*umem));
411         lassert(umem);
412
413         lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
414                                NUM_FRAMES * FRAME_SIZE) == 0);
415
416         mr.addr = (__u64)bufs;
417         mr.len = NUM_FRAMES * FRAME_SIZE;
418         mr.chunk_size = FRAME_SIZE;
419         mr.headroom = FRAME_HEADROOM;
420
421         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
422         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
423                            sizeof(int)) == 0);
424         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
425                            sizeof(int)) == 0);
426
427         optlen = sizeof(off);
428         lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
429                            &optlen) == 0);
430
431         umem->fq.map = mmap(0, off.fr.desc +
432                             FQ_NUM_DESCS * sizeof(u64),
433                             PROT_READ | PROT_WRITE,
434                             MAP_SHARED | MAP_POPULATE, sfd,
435                             XDP_UMEM_PGOFF_FILL_RING);
436         lassert(umem->fq.map != MAP_FAILED);
437
438         umem->fq.mask = FQ_NUM_DESCS - 1;
439         umem->fq.size = FQ_NUM_DESCS;
440         umem->fq.producer = umem->fq.map + off.fr.producer;
441         umem->fq.consumer = umem->fq.map + off.fr.consumer;
442         umem->fq.ring = umem->fq.map + off.fr.desc;
443         umem->fq.cached_cons = FQ_NUM_DESCS;
444
445         umem->cq.map = mmap(0, off.cr.desc +
446                              CQ_NUM_DESCS * sizeof(u64),
447                              PROT_READ | PROT_WRITE,
448                              MAP_SHARED | MAP_POPULATE, sfd,
449                              XDP_UMEM_PGOFF_COMPLETION_RING);
450         lassert(umem->cq.map != MAP_FAILED);
451
452         umem->cq.mask = CQ_NUM_DESCS - 1;
453         umem->cq.size = CQ_NUM_DESCS;
454         umem->cq.producer = umem->cq.map + off.cr.producer;
455         umem->cq.consumer = umem->cq.map + off.cr.consumer;
456         umem->cq.ring = umem->cq.map + off.cr.desc;
457
458         umem->frames = bufs;
459         umem->fd = sfd;
460
461         if (opt_bench == BENCH_TXONLY) {
462                 int i;
463
464                 for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE)
465                         (void)gen_eth_frame(&umem->frames[i]);
466         }
467
468         return umem;
469 }
470
471 static struct xdpsock *xsk_configure(struct xdp_umem *umem)
472 {
473         struct sockaddr_xdp sxdp = {};
474         struct xdp_mmap_offsets off;
475         int sfd, ndescs = NUM_DESCS;
476         struct xdpsock *xsk;
477         bool shared = true;
478         socklen_t optlen;
479         u64 i;
480
481         sfd = socket(PF_XDP, SOCK_RAW, 0);
482         lassert(sfd >= 0);
483
484         xsk = calloc(1, sizeof(*xsk));
485         lassert(xsk);
486
487         xsk->sfd = sfd;
488         xsk->outstanding_tx = 0;
489
490         if (!umem) {
491                 shared = false;
492                 xsk->umem = xdp_umem_configure(sfd);
493         } else {
494                 xsk->umem = umem;
495         }
496
497         lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
498                            &ndescs, sizeof(int)) == 0);
499         lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
500                            &ndescs, sizeof(int)) == 0);
501         optlen = sizeof(off);
502         lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
503                            &optlen) == 0);
504
505         /* Rx */
506         xsk->rx.map = mmap(NULL,
507                            off.rx.desc +
508                            NUM_DESCS * sizeof(struct xdp_desc),
509                            PROT_READ | PROT_WRITE,
510                            MAP_SHARED | MAP_POPULATE, sfd,
511                            XDP_PGOFF_RX_RING);
512         lassert(xsk->rx.map != MAP_FAILED);
513
514         if (!shared) {
515                 for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE)
516                         lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
517                                 == 0);
518         }
519
520         /* Tx */
521         xsk->tx.map = mmap(NULL,
522                            off.tx.desc +
523                            NUM_DESCS * sizeof(struct xdp_desc),
524                            PROT_READ | PROT_WRITE,
525                            MAP_SHARED | MAP_POPULATE, sfd,
526                            XDP_PGOFF_TX_RING);
527         lassert(xsk->tx.map != MAP_FAILED);
528
529         xsk->rx.mask = NUM_DESCS - 1;
530         xsk->rx.size = NUM_DESCS;
531         xsk->rx.producer = xsk->rx.map + off.rx.producer;
532         xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
533         xsk->rx.ring = xsk->rx.map + off.rx.desc;
534
535         xsk->tx.mask = NUM_DESCS - 1;
536         xsk->tx.size = NUM_DESCS;
537         xsk->tx.producer = xsk->tx.map + off.tx.producer;
538         xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
539         xsk->tx.ring = xsk->tx.map + off.tx.desc;
540         xsk->tx.cached_cons = NUM_DESCS;
541
542         sxdp.sxdp_family = PF_XDP;
543         sxdp.sxdp_ifindex = opt_ifindex;
544         sxdp.sxdp_queue_id = opt_queue;
545
546         if (shared) {
547                 sxdp.sxdp_flags = XDP_SHARED_UMEM;
548                 sxdp.sxdp_shared_umem_fd = umem->fd;
549         } else {
550                 sxdp.sxdp_flags = opt_xdp_bind_flags;
551         }
552
553         lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
554
555         return xsk;
556 }
557
558 static void print_benchmark(bool running)
559 {
560         const char *bench_str = "INVALID";
561
562         if (opt_bench == BENCH_RXDROP)
563                 bench_str = "rxdrop";
564         else if (opt_bench == BENCH_TXONLY)
565                 bench_str = "txonly";
566         else if (opt_bench == BENCH_L2FWD)
567                 bench_str = "l2fwd";
568
569         printf("%s:%d %s ", opt_if, opt_queue, bench_str);
570         if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
571                 printf("xdp-skb ");
572         else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
573                 printf("xdp-drv ");
574         else
575                 printf("        ");
576
577         if (opt_poll)
578                 printf("poll() ");
579
580         if (running) {
581                 printf("running...");
582                 fflush(stdout);
583         }
584 }
585
586 static void dump_stats(void)
587 {
588         unsigned long now = get_nsecs();
589         long dt = now - prev_time;
590         int i;
591
592         prev_time = now;
593
594         for (i = 0; i < num_socks; i++) {
595                 char *fmt = "%-15s %'-11.0f %'-11lu\n";
596                 double rx_pps, tx_pps;
597
598                 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
599                          1000000000. / dt;
600                 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
601                          1000000000. / dt;
602
603                 printf("\n sock%d@", i);
604                 print_benchmark(false);
605                 printf("\n");
606
607                 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
608                        dt / 1000000000.);
609                 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
610                 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
611
612                 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
613                 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
614         }
615 }
616
617 static void *poller(void *arg)
618 {
619         (void)arg;
620         for (;;) {
621                 sleep(opt_interval);
622                 dump_stats();
623         }
624
625         return NULL;
626 }
627
628 static void int_exit(int sig)
629 {
630         (void)sig;
631         dump_stats();
632         bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
633         exit(EXIT_SUCCESS);
634 }
635
636 static struct option long_options[] = {
637         {"rxdrop", no_argument, 0, 'r'},
638         {"txonly", no_argument, 0, 't'},
639         {"l2fwd", no_argument, 0, 'l'},
640         {"interface", required_argument, 0, 'i'},
641         {"queue", required_argument, 0, 'q'},
642         {"poll", no_argument, 0, 'p'},
643         {"shared-buffer", no_argument, 0, 's'},
644         {"xdp-skb", no_argument, 0, 'S'},
645         {"xdp-native", no_argument, 0, 'N'},
646         {"interval", required_argument, 0, 'n'},
647         {0, 0, 0, 0}
648 };
649
650 static void usage(const char *prog)
651 {
652         const char *str =
653                 "  Usage: %s [OPTIONS]\n"
654                 "  Options:\n"
655                 "  -r, --rxdrop         Discard all incoming packets (default)\n"
656                 "  -t, --txonly         Only send packets\n"
657                 "  -l, --l2fwd          MAC swap L2 forwarding\n"
658                 "  -i, --interface=n    Run on interface n\n"
659                 "  -q, --queue=n        Use queue n (default 0)\n"
660                 "  -p, --poll           Use poll syscall\n"
661                 "  -s, --shared-buffer  Use shared packet buffer\n"
662                 "  -S, --xdp-skb=n      Use XDP skb-mod\n"
663                 "  -N, --xdp-native=n   Enfore XDP native mode\n"
664                 "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
665                 "\n";
666         fprintf(stderr, str, prog);
667         exit(EXIT_FAILURE);
668 }
669
670 static void parse_command_line(int argc, char **argv)
671 {
672         int option_index, c;
673
674         opterr = 0;
675
676         for (;;) {
677                 c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options,
678                                 &option_index);
679                 if (c == -1)
680                         break;
681
682                 switch (c) {
683                 case 'r':
684                         opt_bench = BENCH_RXDROP;
685                         break;
686                 case 't':
687                         opt_bench = BENCH_TXONLY;
688                         break;
689                 case 'l':
690                         opt_bench = BENCH_L2FWD;
691                         break;
692                 case 'i':
693                         opt_if = optarg;
694                         break;
695                 case 'q':
696                         opt_queue = atoi(optarg);
697                         break;
698                 case 's':
699                         opt_shared_packet_buffer = 1;
700                         break;
701                 case 'p':
702                         opt_poll = 1;
703                         break;
704                 case 'S':
705                         opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
706                         opt_xdp_bind_flags |= XDP_COPY;
707                         break;
708                 case 'N':
709                         opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
710                         break;
711                 case 'n':
712                         opt_interval = atoi(optarg);
713                         break;
714                 default:
715                         usage(basename(argv[0]));
716                 }
717         }
718
719         opt_ifindex = if_nametoindex(opt_if);
720         if (!opt_ifindex) {
721                 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
722                         opt_if);
723                 usage(basename(argv[0]));
724         }
725 }
726
727 static void kick_tx(int fd)
728 {
729         int ret;
730
731         ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
732         if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
733                 return;
734         lassert(0);
735 }
736
737 static inline void complete_tx_l2fwd(struct xdpsock *xsk)
738 {
739         u64 descs[BATCH_SIZE];
740         unsigned int rcvd;
741         size_t ndescs;
742
743         if (!xsk->outstanding_tx)
744                 return;
745
746         kick_tx(xsk->sfd);
747         ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
748                  xsk->outstanding_tx;
749
750         /* re-add completed Tx buffers */
751         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
752         if (rcvd > 0) {
753                 umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
754                 xsk->outstanding_tx -= rcvd;
755                 xsk->tx_npkts += rcvd;
756         }
757 }
758
759 static inline void complete_tx_only(struct xdpsock *xsk)
760 {
761         u64 descs[BATCH_SIZE];
762         unsigned int rcvd;
763
764         if (!xsk->outstanding_tx)
765                 return;
766
767         kick_tx(xsk->sfd);
768
769         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
770         if (rcvd > 0) {
771                 xsk->outstanding_tx -= rcvd;
772                 xsk->tx_npkts += rcvd;
773         }
774 }
775
776 static void rx_drop(struct xdpsock *xsk)
777 {
778         struct xdp_desc descs[BATCH_SIZE];
779         unsigned int rcvd, i;
780
781         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
782         if (!rcvd)
783                 return;
784
785         for (i = 0; i < rcvd; i++) {
786                 char *pkt = xq_get_data(xsk, descs[i].addr);
787
788                 hex_dump(pkt, descs[i].len, descs[i].addr);
789         }
790
791         xsk->rx_npkts += rcvd;
792
793         umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
794 }
795
796 static void rx_drop_all(void)
797 {
798         struct pollfd fds[MAX_SOCKS + 1];
799         int i, ret, timeout, nfds = 1;
800
801         memset(fds, 0, sizeof(fds));
802
803         for (i = 0; i < num_socks; i++) {
804                 fds[i].fd = xsks[i]->sfd;
805                 fds[i].events = POLLIN;
806                 timeout = 1000; /* 1sn */
807         }
808
809         for (;;) {
810                 if (opt_poll) {
811                         ret = poll(fds, nfds, timeout);
812                         if (ret <= 0)
813                                 continue;
814                 }
815
816                 for (i = 0; i < num_socks; i++)
817                         rx_drop(xsks[i]);
818         }
819 }
820
821 static void tx_only(struct xdpsock *xsk)
822 {
823         int timeout, ret, nfds = 1;
824         struct pollfd fds[nfds + 1];
825         unsigned int idx = 0;
826
827         memset(fds, 0, sizeof(fds));
828         fds[0].fd = xsk->sfd;
829         fds[0].events = POLLOUT;
830         timeout = 1000; /* 1sn */
831
832         for (;;) {
833                 if (opt_poll) {
834                         ret = poll(fds, nfds, timeout);
835                         if (ret <= 0)
836                                 continue;
837
838                         if (fds[0].fd != xsk->sfd ||
839                             !(fds[0].revents & POLLOUT))
840                                 continue;
841                 }
842
843                 if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
844                         lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
845
846                         xsk->outstanding_tx += BATCH_SIZE;
847                         idx += BATCH_SIZE;
848                         idx %= NUM_FRAMES;
849                 }
850
851                 complete_tx_only(xsk);
852         }
853 }
854
855 static void l2fwd(struct xdpsock *xsk)
856 {
857         for (;;) {
858                 struct xdp_desc descs[BATCH_SIZE];
859                 unsigned int rcvd, i;
860                 int ret;
861
862                 for (;;) {
863                         complete_tx_l2fwd(xsk);
864
865                         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
866                         if (rcvd > 0)
867                                 break;
868                 }
869
870                 for (i = 0; i < rcvd; i++) {
871                         char *pkt = xq_get_data(xsk, descs[i].addr);
872
873                         swap_mac_addresses(pkt);
874
875                         hex_dump(pkt, descs[i].len, descs[i].addr);
876                 }
877
878                 xsk->rx_npkts += rcvd;
879
880                 ret = xq_enq(&xsk->tx, descs, rcvd);
881                 lassert(ret == 0);
882                 xsk->outstanding_tx += rcvd;
883         }
884 }
885
886 int main(int argc, char **argv)
887 {
888         struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
889         char xdp_filename[256];
890         int i, ret, key = 0;
891         pthread_t pt;
892
893         parse_command_line(argc, argv);
894
895         if (setrlimit(RLIMIT_MEMLOCK, &r)) {
896                 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
897                         strerror(errno));
898                 exit(EXIT_FAILURE);
899         }
900
901         snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
902
903         if (load_bpf_file(xdp_filename)) {
904                 fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
905                 exit(EXIT_FAILURE);
906         }
907
908         if (!prog_fd[0]) {
909                 fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
910                         strerror(errno));
911                 exit(EXIT_FAILURE);
912         }
913
914         if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
915                 fprintf(stderr, "ERROR: link set xdp fd failed\n");
916                 exit(EXIT_FAILURE);
917         }
918
919         ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
920         if (ret) {
921                 fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
922                 exit(EXIT_FAILURE);
923         }
924
925         /* Create sockets... */
926         xsks[num_socks++] = xsk_configure(NULL);
927
928 #if RR_LB
929         for (i = 0; i < MAX_SOCKS - 1; i++)
930                 xsks[num_socks++] = xsk_configure(xsks[0]->umem);
931 #endif
932
933         /* ...and insert them into the map. */
934         for (i = 0; i < num_socks; i++) {
935                 key = i;
936                 ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
937                 if (ret) {
938                         fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
939                         exit(EXIT_FAILURE);
940                 }
941         }
942
943         signal(SIGINT, int_exit);
944         signal(SIGTERM, int_exit);
945         signal(SIGABRT, int_exit);
946
947         setlocale(LC_ALL, "");
948
949         ret = pthread_create(&pt, NULL, poller, NULL);
950         lassert(ret == 0);
951
952         prev_time = get_nsecs();
953
954         if (opt_bench == BENCH_RXDROP)
955                 rx_drop_all();
956         else if (opt_bench == BENCH_TXONLY)
957                 tx_only(xsks[0]);
958         else
959                 l2fwd(xsks[0]);
960
961         return 0;
962 }