btrfs: don't initialize 'offset' in map_private_extent_buffer()
[sfrench/cifs-2.6.git] / samples / bpf / xdpsock_user.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017 - 2018 Intel Corporation. */
3
4 #include <assert.h>
5 #include <errno.h>
6 #include <getopt.h>
7 #include <libgen.h>
8 #include <linux/bpf.h>
9 #include <linux/if_link.h>
10 #include <linux/if_xdp.h>
11 #include <linux/if_ether.h>
12 #include <net/if.h>
13 #include <signal.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <net/ethernet.h>
19 #include <sys/resource.h>
20 #include <sys/socket.h>
21 #include <sys/mman.h>
22 #include <time.h>
23 #include <unistd.h>
24 #include <pthread.h>
25 #include <locale.h>
26 #include <sys/types.h>
27 #include <poll.h>
28
29 #include "bpf/libbpf.h"
30 #include "bpf_util.h"
31 #include <bpf/bpf.h>
32
33 #include "xdpsock.h"
34
35 #ifndef SOL_XDP
36 #define SOL_XDP 283
37 #endif
38
39 #ifndef AF_XDP
40 #define AF_XDP 44
41 #endif
42
43 #ifndef PF_XDP
44 #define PF_XDP AF_XDP
45 #endif
46
47 #define NUM_FRAMES 131072
48 #define FRAME_HEADROOM 0
49 #define FRAME_SHIFT 11
50 #define FRAME_SIZE 2048
51 #define NUM_DESCS 1024
52 #define BATCH_SIZE 16
53
54 #define FQ_NUM_DESCS 1024
55 #define CQ_NUM_DESCS 1024
56
57 #define DEBUG_HEXDUMP 0
58
59 typedef __u64 u64;
60 typedef __u32 u32;
61
62 static unsigned long prev_time;
63
64 enum benchmark_type {
65         BENCH_RXDROP = 0,
66         BENCH_TXONLY = 1,
67         BENCH_L2FWD = 2,
68 };
69
70 static enum benchmark_type opt_bench = BENCH_RXDROP;
71 static u32 opt_xdp_flags;
72 static const char *opt_if = "";
73 static int opt_ifindex;
74 static int opt_queue;
75 static int opt_poll;
76 static int opt_shared_packet_buffer;
77 static int opt_interval = 1;
78 static u32 opt_xdp_bind_flags;
79
80 struct xdp_umem_uqueue {
81         u32 cached_prod;
82         u32 cached_cons;
83         u32 mask;
84         u32 size;
85         u32 *producer;
86         u32 *consumer;
87         u64 *ring;
88         void *map;
89 };
90
91 struct xdp_umem {
92         char *frames;
93         struct xdp_umem_uqueue fq;
94         struct xdp_umem_uqueue cq;
95         int fd;
96 };
97
98 struct xdp_uqueue {
99         u32 cached_prod;
100         u32 cached_cons;
101         u32 mask;
102         u32 size;
103         u32 *producer;
104         u32 *consumer;
105         struct xdp_desc *ring;
106         void *map;
107 };
108
109 struct xdpsock {
110         struct xdp_uqueue rx;
111         struct xdp_uqueue tx;
112         int sfd;
113         struct xdp_umem *umem;
114         u32 outstanding_tx;
115         unsigned long rx_npkts;
116         unsigned long tx_npkts;
117         unsigned long prev_rx_npkts;
118         unsigned long prev_tx_npkts;
119 };
120
121 static int num_socks;
122 struct xdpsock *xsks[MAX_SOCKS];
123
124 static unsigned long get_nsecs(void)
125 {
126         struct timespec ts;
127
128         clock_gettime(CLOCK_MONOTONIC, &ts);
129         return ts.tv_sec * 1000000000UL + ts.tv_nsec;
130 }
131
132 static void dump_stats(void);
133
134 #define lassert(expr)                                                   \
135         do {                                                            \
136                 if (!(expr)) {                                          \
137                         fprintf(stderr, "%s:%s:%i: Assertion failed: "  \
138                                 #expr ": errno: %d/\"%s\"\n",           \
139                                 __FILE__, __func__, __LINE__,           \
140                                 errno, strerror(errno));                \
141                         dump_stats();                                   \
142                         exit(EXIT_FAILURE);                             \
143                 }                                                       \
144         } while (0)
145
146 #define barrier() __asm__ __volatile__("": : :"memory")
147 #ifdef __aarch64__
148 #define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
149 #define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
150 #else
151 #define u_smp_rmb() barrier()
152 #define u_smp_wmb() barrier()
153 #endif
154 #define likely(x) __builtin_expect(!!(x), 1)
155 #define unlikely(x) __builtin_expect(!!(x), 0)
156
157 static const char pkt_data[] =
158         "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
159         "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
160         "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
161         "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
162
163 static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
164 {
165         u32 free_entries = q->cached_cons - q->cached_prod;
166
167         if (free_entries >= nb)
168                 return free_entries;
169
170         /* Refresh the local tail pointer */
171         q->cached_cons = *q->consumer + q->size;
172
173         return q->cached_cons - q->cached_prod;
174 }
175
176 static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
177 {
178         u32 free_entries = q->cached_cons - q->cached_prod;
179
180         if (free_entries >= ndescs)
181                 return free_entries;
182
183         /* Refresh the local tail pointer */
184         q->cached_cons = *q->consumer + q->size;
185         return q->cached_cons - q->cached_prod;
186 }
187
188 static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
189 {
190         u32 entries = q->cached_prod - q->cached_cons;
191
192         if (entries == 0) {
193                 q->cached_prod = *q->producer;
194                 entries = q->cached_prod - q->cached_cons;
195         }
196
197         return (entries > nb) ? nb : entries;
198 }
199
200 static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
201 {
202         u32 entries = q->cached_prod - q->cached_cons;
203
204         if (entries == 0) {
205                 q->cached_prod = *q->producer;
206                 entries = q->cached_prod - q->cached_cons;
207         }
208
209         return (entries > ndescs) ? ndescs : entries;
210 }
211
212 static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
213                                          struct xdp_desc *d,
214                                          size_t nb)
215 {
216         u32 i;
217
218         if (umem_nb_free(fq, nb) < nb)
219                 return -ENOSPC;
220
221         for (i = 0; i < nb; i++) {
222                 u32 idx = fq->cached_prod++ & fq->mask;
223
224                 fq->ring[idx] = d[i].addr;
225         }
226
227         u_smp_wmb();
228
229         *fq->producer = fq->cached_prod;
230
231         return 0;
232 }
233
234 static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d,
235                                       size_t nb)
236 {
237         u32 i;
238
239         if (umem_nb_free(fq, nb) < nb)
240                 return -ENOSPC;
241
242         for (i = 0; i < nb; i++) {
243                 u32 idx = fq->cached_prod++ & fq->mask;
244
245                 fq->ring[idx] = d[i];
246         }
247
248         u_smp_wmb();
249
250         *fq->producer = fq->cached_prod;
251
252         return 0;
253 }
254
255 static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
256                                                u64 *d, size_t nb)
257 {
258         u32 idx, i, entries = umem_nb_avail(cq, nb);
259
260         u_smp_rmb();
261
262         for (i = 0; i < entries; i++) {
263                 idx = cq->cached_cons++ & cq->mask;
264                 d[i] = cq->ring[idx];
265         }
266
267         if (entries > 0) {
268                 u_smp_wmb();
269
270                 *cq->consumer = cq->cached_cons;
271         }
272
273         return entries;
274 }
275
276 static inline void *xq_get_data(struct xdpsock *xsk, u64 addr)
277 {
278         return &xsk->umem->frames[addr];
279 }
280
281 static inline int xq_enq(struct xdp_uqueue *uq,
282                          const struct xdp_desc *descs,
283                          unsigned int ndescs)
284 {
285         struct xdp_desc *r = uq->ring;
286         unsigned int i;
287
288         if (xq_nb_free(uq, ndescs) < ndescs)
289                 return -ENOSPC;
290
291         for (i = 0; i < ndescs; i++) {
292                 u32 idx = uq->cached_prod++ & uq->mask;
293
294                 r[idx].addr = descs[i].addr;
295                 r[idx].len = descs[i].len;
296         }
297
298         u_smp_wmb();
299
300         *uq->producer = uq->cached_prod;
301         return 0;
302 }
303
304 static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
305                                  unsigned int id, unsigned int ndescs)
306 {
307         struct xdp_desc *r = uq->ring;
308         unsigned int i;
309
310         if (xq_nb_free(uq, ndescs) < ndescs)
311                 return -ENOSPC;
312
313         for (i = 0; i < ndescs; i++) {
314                 u32 idx = uq->cached_prod++ & uq->mask;
315
316                 r[idx].addr     = (id + i) << FRAME_SHIFT;
317                 r[idx].len      = sizeof(pkt_data) - 1;
318         }
319
320         u_smp_wmb();
321
322         *uq->producer = uq->cached_prod;
323         return 0;
324 }
325
326 static inline int xq_deq(struct xdp_uqueue *uq,
327                          struct xdp_desc *descs,
328                          int ndescs)
329 {
330         struct xdp_desc *r = uq->ring;
331         unsigned int idx;
332         int i, entries;
333
334         entries = xq_nb_avail(uq, ndescs);
335
336         u_smp_rmb();
337
338         for (i = 0; i < entries; i++) {
339                 idx = uq->cached_cons++ & uq->mask;
340                 descs[i] = r[idx];
341         }
342
343         if (entries > 0) {
344                 u_smp_wmb();
345
346                 *uq->consumer = uq->cached_cons;
347         }
348
349         return entries;
350 }
351
352 static void swap_mac_addresses(void *data)
353 {
354         struct ether_header *eth = (struct ether_header *)data;
355         struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
356         struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
357         struct ether_addr tmp;
358
359         tmp = *src_addr;
360         *src_addr = *dst_addr;
361         *dst_addr = tmp;
362 }
363
364 static void hex_dump(void *pkt, size_t length, u64 addr)
365 {
366         const unsigned char *address = (unsigned char *)pkt;
367         const unsigned char *line = address;
368         size_t line_size = 32;
369         unsigned char c;
370         char buf[32];
371         int i = 0;
372
373         if (!DEBUG_HEXDUMP)
374                 return;
375
376         sprintf(buf, "addr=%llu", addr);
377         printf("length = %zu\n", length);
378         printf("%s | ", buf);
379         while (length-- > 0) {
380                 printf("%02X ", *address++);
381                 if (!(++i % line_size) || (length == 0 && i % line_size)) {
382                         if (length == 0) {
383                                 while (i++ % line_size)
384                                         printf("__ ");
385                         }
386                         printf(" | ");  /* right close */
387                         while (line < address) {
388                                 c = *line++;
389                                 printf("%c", (c < 33 || c == 255) ? 0x2E : c);
390                         }
391                         printf("\n");
392                         if (length > 0)
393                                 printf("%s | ", buf);
394                 }
395         }
396         printf("\n");
397 }
398
399 static size_t gen_eth_frame(char *frame)
400 {
401         memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
402         return sizeof(pkt_data) - 1;
403 }
404
405 static struct xdp_umem *xdp_umem_configure(int sfd)
406 {
407         int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
408         struct xdp_mmap_offsets off;
409         struct xdp_umem_reg mr;
410         struct xdp_umem *umem;
411         socklen_t optlen;
412         void *bufs;
413
414         umem = calloc(1, sizeof(*umem));
415         lassert(umem);
416
417         lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
418                                NUM_FRAMES * FRAME_SIZE) == 0);
419
420         mr.addr = (__u64)bufs;
421         mr.len = NUM_FRAMES * FRAME_SIZE;
422         mr.chunk_size = FRAME_SIZE;
423         mr.headroom = FRAME_HEADROOM;
424
425         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
426         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
427                            sizeof(int)) == 0);
428         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
429                            sizeof(int)) == 0);
430
431         optlen = sizeof(off);
432         lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
433                            &optlen) == 0);
434
435         umem->fq.map = mmap(0, off.fr.desc +
436                             FQ_NUM_DESCS * sizeof(u64),
437                             PROT_READ | PROT_WRITE,
438                             MAP_SHARED | MAP_POPULATE, sfd,
439                             XDP_UMEM_PGOFF_FILL_RING);
440         lassert(umem->fq.map != MAP_FAILED);
441
442         umem->fq.mask = FQ_NUM_DESCS - 1;
443         umem->fq.size = FQ_NUM_DESCS;
444         umem->fq.producer = umem->fq.map + off.fr.producer;
445         umem->fq.consumer = umem->fq.map + off.fr.consumer;
446         umem->fq.ring = umem->fq.map + off.fr.desc;
447         umem->fq.cached_cons = FQ_NUM_DESCS;
448
449         umem->cq.map = mmap(0, off.cr.desc +
450                              CQ_NUM_DESCS * sizeof(u64),
451                              PROT_READ | PROT_WRITE,
452                              MAP_SHARED | MAP_POPULATE, sfd,
453                              XDP_UMEM_PGOFF_COMPLETION_RING);
454         lassert(umem->cq.map != MAP_FAILED);
455
456         umem->cq.mask = CQ_NUM_DESCS - 1;
457         umem->cq.size = CQ_NUM_DESCS;
458         umem->cq.producer = umem->cq.map + off.cr.producer;
459         umem->cq.consumer = umem->cq.map + off.cr.consumer;
460         umem->cq.ring = umem->cq.map + off.cr.desc;
461
462         umem->frames = bufs;
463         umem->fd = sfd;
464
465         if (opt_bench == BENCH_TXONLY) {
466                 int i;
467
468                 for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE)
469                         (void)gen_eth_frame(&umem->frames[i]);
470         }
471
472         return umem;
473 }
474
475 static struct xdpsock *xsk_configure(struct xdp_umem *umem)
476 {
477         struct sockaddr_xdp sxdp = {};
478         struct xdp_mmap_offsets off;
479         int sfd, ndescs = NUM_DESCS;
480         struct xdpsock *xsk;
481         bool shared = true;
482         socklen_t optlen;
483         u64 i;
484
485         sfd = socket(PF_XDP, SOCK_RAW, 0);
486         lassert(sfd >= 0);
487
488         xsk = calloc(1, sizeof(*xsk));
489         lassert(xsk);
490
491         xsk->sfd = sfd;
492         xsk->outstanding_tx = 0;
493
494         if (!umem) {
495                 shared = false;
496                 xsk->umem = xdp_umem_configure(sfd);
497         } else {
498                 xsk->umem = umem;
499         }
500
501         lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
502                            &ndescs, sizeof(int)) == 0);
503         lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
504                            &ndescs, sizeof(int)) == 0);
505         optlen = sizeof(off);
506         lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
507                            &optlen) == 0);
508
509         /* Rx */
510         xsk->rx.map = mmap(NULL,
511                            off.rx.desc +
512                            NUM_DESCS * sizeof(struct xdp_desc),
513                            PROT_READ | PROT_WRITE,
514                            MAP_SHARED | MAP_POPULATE, sfd,
515                            XDP_PGOFF_RX_RING);
516         lassert(xsk->rx.map != MAP_FAILED);
517
518         if (!shared) {
519                 for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE)
520                         lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
521                                 == 0);
522         }
523
524         /* Tx */
525         xsk->tx.map = mmap(NULL,
526                            off.tx.desc +
527                            NUM_DESCS * sizeof(struct xdp_desc),
528                            PROT_READ | PROT_WRITE,
529                            MAP_SHARED | MAP_POPULATE, sfd,
530                            XDP_PGOFF_TX_RING);
531         lassert(xsk->tx.map != MAP_FAILED);
532
533         xsk->rx.mask = NUM_DESCS - 1;
534         xsk->rx.size = NUM_DESCS;
535         xsk->rx.producer = xsk->rx.map + off.rx.producer;
536         xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
537         xsk->rx.ring = xsk->rx.map + off.rx.desc;
538
539         xsk->tx.mask = NUM_DESCS - 1;
540         xsk->tx.size = NUM_DESCS;
541         xsk->tx.producer = xsk->tx.map + off.tx.producer;
542         xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
543         xsk->tx.ring = xsk->tx.map + off.tx.desc;
544         xsk->tx.cached_cons = NUM_DESCS;
545
546         sxdp.sxdp_family = PF_XDP;
547         sxdp.sxdp_ifindex = opt_ifindex;
548         sxdp.sxdp_queue_id = opt_queue;
549
550         if (shared) {
551                 sxdp.sxdp_flags = XDP_SHARED_UMEM;
552                 sxdp.sxdp_shared_umem_fd = umem->fd;
553         } else {
554                 sxdp.sxdp_flags = opt_xdp_bind_flags;
555         }
556
557         lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
558
559         return xsk;
560 }
561
562 static void print_benchmark(bool running)
563 {
564         const char *bench_str = "INVALID";
565
566         if (opt_bench == BENCH_RXDROP)
567                 bench_str = "rxdrop";
568         else if (opt_bench == BENCH_TXONLY)
569                 bench_str = "txonly";
570         else if (opt_bench == BENCH_L2FWD)
571                 bench_str = "l2fwd";
572
573         printf("%s:%d %s ", opt_if, opt_queue, bench_str);
574         if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
575                 printf("xdp-skb ");
576         else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
577                 printf("xdp-drv ");
578         else
579                 printf("        ");
580
581         if (opt_poll)
582                 printf("poll() ");
583
584         if (running) {
585                 printf("running...");
586                 fflush(stdout);
587         }
588 }
589
590 static void dump_stats(void)
591 {
592         unsigned long now = get_nsecs();
593         long dt = now - prev_time;
594         int i;
595
596         prev_time = now;
597
598         for (i = 0; i < num_socks && xsks[i]; i++) {
599                 char *fmt = "%-15s %'-11.0f %'-11lu\n";
600                 double rx_pps, tx_pps;
601
602                 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
603                          1000000000. / dt;
604                 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
605                          1000000000. / dt;
606
607                 printf("\n sock%d@", i);
608                 print_benchmark(false);
609                 printf("\n");
610
611                 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
612                        dt / 1000000000.);
613                 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
614                 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
615
616                 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
617                 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
618         }
619 }
620
621 static void *poller(void *arg)
622 {
623         (void)arg;
624         for (;;) {
625                 sleep(opt_interval);
626                 dump_stats();
627         }
628
629         return NULL;
630 }
631
632 static void int_exit(int sig)
633 {
634         (void)sig;
635         dump_stats();
636         bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
637         exit(EXIT_SUCCESS);
638 }
639
640 static struct option long_options[] = {
641         {"rxdrop", no_argument, 0, 'r'},
642         {"txonly", no_argument, 0, 't'},
643         {"l2fwd", no_argument, 0, 'l'},
644         {"interface", required_argument, 0, 'i'},
645         {"queue", required_argument, 0, 'q'},
646         {"poll", no_argument, 0, 'p'},
647         {"shared-buffer", no_argument, 0, 's'},
648         {"xdp-skb", no_argument, 0, 'S'},
649         {"xdp-native", no_argument, 0, 'N'},
650         {"interval", required_argument, 0, 'n'},
651         {"zero-copy", no_argument, 0, 'z'},
652         {"copy", no_argument, 0, 'c'},
653         {0, 0, 0, 0}
654 };
655
656 static void usage(const char *prog)
657 {
658         const char *str =
659                 "  Usage: %s [OPTIONS]\n"
660                 "  Options:\n"
661                 "  -r, --rxdrop         Discard all incoming packets (default)\n"
662                 "  -t, --txonly         Only send packets\n"
663                 "  -l, --l2fwd          MAC swap L2 forwarding\n"
664                 "  -i, --interface=n    Run on interface n\n"
665                 "  -q, --queue=n        Use queue n (default 0)\n"
666                 "  -p, --poll           Use poll syscall\n"
667                 "  -s, --shared-buffer  Use shared packet buffer\n"
668                 "  -S, --xdp-skb=n      Use XDP skb-mod\n"
669                 "  -N, --xdp-native=n   Enfore XDP native mode\n"
670                 "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
671                 "  -z, --zero-copy      Force zero-copy mode.\n"
672                 "  -c, --copy           Force copy mode.\n"
673                 "\n";
674         fprintf(stderr, str, prog);
675         exit(EXIT_FAILURE);
676 }
677
678 static void parse_command_line(int argc, char **argv)
679 {
680         int option_index, c;
681
682         opterr = 0;
683
684         for (;;) {
685                 c = getopt_long(argc, argv, "rtli:q:psSNn:cz", long_options,
686                                 &option_index);
687                 if (c == -1)
688                         break;
689
690                 switch (c) {
691                 case 'r':
692                         opt_bench = BENCH_RXDROP;
693                         break;
694                 case 't':
695                         opt_bench = BENCH_TXONLY;
696                         break;
697                 case 'l':
698                         opt_bench = BENCH_L2FWD;
699                         break;
700                 case 'i':
701                         opt_if = optarg;
702                         break;
703                 case 'q':
704                         opt_queue = atoi(optarg);
705                         break;
706                 case 's':
707                         opt_shared_packet_buffer = 1;
708                         break;
709                 case 'p':
710                         opt_poll = 1;
711                         break;
712                 case 'S':
713                         opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
714                         opt_xdp_bind_flags |= XDP_COPY;
715                         break;
716                 case 'N':
717                         opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
718                         break;
719                 case 'n':
720                         opt_interval = atoi(optarg);
721                         break;
722                 case 'z':
723                         opt_xdp_bind_flags |= XDP_ZEROCOPY;
724                         break;
725                 case 'c':
726                         opt_xdp_bind_flags |= XDP_COPY;
727                         break;
728                 default:
729                         usage(basename(argv[0]));
730                 }
731         }
732
733         opt_ifindex = if_nametoindex(opt_if);
734         if (!opt_ifindex) {
735                 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
736                         opt_if);
737                 usage(basename(argv[0]));
738         }
739 }
740
741 static void kick_tx(int fd)
742 {
743         int ret;
744
745         ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
746         if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
747                 return;
748         lassert(0);
749 }
750
751 static inline void complete_tx_l2fwd(struct xdpsock *xsk)
752 {
753         u64 descs[BATCH_SIZE];
754         unsigned int rcvd;
755         size_t ndescs;
756
757         if (!xsk->outstanding_tx)
758                 return;
759
760         kick_tx(xsk->sfd);
761         ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
762                  xsk->outstanding_tx;
763
764         /* re-add completed Tx buffers */
765         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
766         if (rcvd > 0) {
767                 umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
768                 xsk->outstanding_tx -= rcvd;
769                 xsk->tx_npkts += rcvd;
770         }
771 }
772
773 static inline void complete_tx_only(struct xdpsock *xsk)
774 {
775         u64 descs[BATCH_SIZE];
776         unsigned int rcvd;
777
778         if (!xsk->outstanding_tx)
779                 return;
780
781         kick_tx(xsk->sfd);
782
783         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
784         if (rcvd > 0) {
785                 xsk->outstanding_tx -= rcvd;
786                 xsk->tx_npkts += rcvd;
787         }
788 }
789
790 static void rx_drop(struct xdpsock *xsk)
791 {
792         struct xdp_desc descs[BATCH_SIZE];
793         unsigned int rcvd, i;
794
795         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
796         if (!rcvd)
797                 return;
798
799         for (i = 0; i < rcvd; i++) {
800                 char *pkt = xq_get_data(xsk, descs[i].addr);
801
802                 hex_dump(pkt, descs[i].len, descs[i].addr);
803         }
804
805         xsk->rx_npkts += rcvd;
806
807         umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
808 }
809
810 static void rx_drop_all(void)
811 {
812         struct pollfd fds[MAX_SOCKS + 1];
813         int i, ret, timeout, nfds = 1;
814
815         memset(fds, 0, sizeof(fds));
816
817         for (i = 0; i < num_socks; i++) {
818                 fds[i].fd = xsks[i]->sfd;
819                 fds[i].events = POLLIN;
820                 timeout = 1000; /* 1sn */
821         }
822
823         for (;;) {
824                 if (opt_poll) {
825                         ret = poll(fds, nfds, timeout);
826                         if (ret <= 0)
827                                 continue;
828                 }
829
830                 for (i = 0; i < num_socks; i++)
831                         rx_drop(xsks[i]);
832         }
833 }
834
835 static void tx_only(struct xdpsock *xsk)
836 {
837         int timeout, ret, nfds = 1;
838         struct pollfd fds[nfds + 1];
839         unsigned int idx = 0;
840
841         memset(fds, 0, sizeof(fds));
842         fds[0].fd = xsk->sfd;
843         fds[0].events = POLLOUT;
844         timeout = 1000; /* 1sn */
845
846         for (;;) {
847                 if (opt_poll) {
848                         ret = poll(fds, nfds, timeout);
849                         if (ret <= 0)
850                                 continue;
851
852                         if (fds[0].fd != xsk->sfd ||
853                             !(fds[0].revents & POLLOUT))
854                                 continue;
855                 }
856
857                 if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
858                         lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
859
860                         xsk->outstanding_tx += BATCH_SIZE;
861                         idx += BATCH_SIZE;
862                         idx %= NUM_FRAMES;
863                 }
864
865                 complete_tx_only(xsk);
866         }
867 }
868
869 static void l2fwd(struct xdpsock *xsk)
870 {
871         for (;;) {
872                 struct xdp_desc descs[BATCH_SIZE];
873                 unsigned int rcvd, i;
874                 int ret;
875
876                 for (;;) {
877                         complete_tx_l2fwd(xsk);
878
879                         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
880                         if (rcvd > 0)
881                                 break;
882                 }
883
884                 for (i = 0; i < rcvd; i++) {
885                         char *pkt = xq_get_data(xsk, descs[i].addr);
886
887                         swap_mac_addresses(pkt);
888
889                         hex_dump(pkt, descs[i].len, descs[i].addr);
890                 }
891
892                 xsk->rx_npkts += rcvd;
893
894                 ret = xq_enq(&xsk->tx, descs, rcvd);
895                 lassert(ret == 0);
896                 xsk->outstanding_tx += rcvd;
897         }
898 }
899
900 int main(int argc, char **argv)
901 {
902         struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
903         struct bpf_prog_load_attr prog_load_attr = {
904                 .prog_type      = BPF_PROG_TYPE_XDP,
905         };
906         int prog_fd, qidconf_map, xsks_map;
907         struct bpf_object *obj;
908         char xdp_filename[256];
909         struct bpf_map *map;
910         int i, ret, key = 0;
911         pthread_t pt;
912
913         parse_command_line(argc, argv);
914
915         if (setrlimit(RLIMIT_MEMLOCK, &r)) {
916                 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
917                         strerror(errno));
918                 exit(EXIT_FAILURE);
919         }
920
921         snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
922         prog_load_attr.file = xdp_filename;
923
924         if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
925                 exit(EXIT_FAILURE);
926         if (prog_fd < 0) {
927                 fprintf(stderr, "ERROR: no program found: %s\n",
928                         strerror(prog_fd));
929                 exit(EXIT_FAILURE);
930         }
931
932         map = bpf_object__find_map_by_name(obj, "qidconf_map");
933         qidconf_map = bpf_map__fd(map);
934         if (qidconf_map < 0) {
935                 fprintf(stderr, "ERROR: no qidconf map found: %s\n",
936                         strerror(qidconf_map));
937                 exit(EXIT_FAILURE);
938         }
939
940         map = bpf_object__find_map_by_name(obj, "xsks_map");
941         xsks_map = bpf_map__fd(map);
942         if (xsks_map < 0) {
943                 fprintf(stderr, "ERROR: no xsks map found: %s\n",
944                         strerror(xsks_map));
945                 exit(EXIT_FAILURE);
946         }
947
948         if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
949                 fprintf(stderr, "ERROR: link set xdp fd failed\n");
950                 exit(EXIT_FAILURE);
951         }
952
953         ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
954         if (ret) {
955                 fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
956                 exit(EXIT_FAILURE);
957         }
958
959         /* Create sockets... */
960         xsks[num_socks++] = xsk_configure(NULL);
961
962 #if RR_LB
963         for (i = 0; i < MAX_SOCKS - 1; i++)
964                 xsks[num_socks++] = xsk_configure(xsks[0]->umem);
965 #endif
966
967         /* ...and insert them into the map. */
968         for (i = 0; i < num_socks; i++) {
969                 key = i;
970                 ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
971                 if (ret) {
972                         fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
973                         exit(EXIT_FAILURE);
974                 }
975         }
976
977         signal(SIGINT, int_exit);
978         signal(SIGTERM, int_exit);
979         signal(SIGABRT, int_exit);
980
981         setlocale(LC_ALL, "");
982
983         ret = pthread_create(&pt, NULL, poller, NULL);
984         lassert(ret == 0);
985
986         prev_time = get_nsecs();
987
988         if (opt_bench == BENCH_RXDROP)
989                 rx_drop_all();
990         else if (opt_bench == BENCH_TXONLY)
991                 tx_only(xsks[0]);
992         else
993                 l2fwd(xsks[0]);
994
995         return 0;
996 }