Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <davem@davemloft.net>
Thu, 23 Jan 2020 07:10:16 +0000 (08:10 +0100)
committerDavid S. Miller <davem@davemloft.net>
Thu, 23 Jan 2020 07:10:16 +0000 (08:10 +0100)
Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-01-22

The following pull-request contains BPF updates for your *net-next* tree.

We've added 92 non-merge commits during the last 16 day(s) which contain
a total of 320 files changed, 7532 insertions(+), 1448 deletions(-).

The main changes are:

1) function by function verification and program extensions from Alexei.

2) massive cleanup of selftests/bpf from Toke and Andrii.

3) batched bpf map operations from Brian and Yonghong.

4) tcp congestion control in bpf from Martin.

5) bulking for non-map xdp_redirect form Toke.

6) bpf_send_signal_thread helper from Yonghong.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
320 files changed:
arch/x86/net/bpf_jit_comp.c
drivers/net/tun.c
drivers/net/veth.c
drivers/net/virtio_net.c
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/bpf_verifier.h
include/linux/btf.h
include/linux/filter.h
include/linux/netdevice.h
include/net/sock.h
include/net/tcp.h
include/trace/events/xdp.h
include/uapi/linux/bpf.h
include/uapi/linux/btf.h
kernel/bpf/Makefile
kernel/bpf/arraymap.c
kernel/bpf/bpf_struct_ops.c [new file with mode: 0644]
kernel/bpf/bpf_struct_ops_types.h [new file with mode: 0644]
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/devmap.c
kernel/bpf/hashtab.c
kernel/bpf/helpers.c
kernel/bpf/inode.c
kernel/bpf/map_in_map.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/core/dev.c
net/core/filter.c
net/core/sock.c
net/ipv4/Makefile
net/ipv4/bpf_tcp_ca.c [new file with mode: 0644]
net/ipv4/tcp_cong.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/xdp/xdp_umem.c
net/xdp/xsk.c
samples/bpf/Makefile
samples/bpf/cpustat_kern.c
samples/bpf/fds_example.c
samples/bpf/hbm.c
samples/bpf/hbm_kern.h
samples/bpf/ibumad_kern.c
samples/bpf/ibumad_user.c
samples/bpf/lathist_kern.c
samples/bpf/lwt_len_hist_kern.c
samples/bpf/map_perf_test_kern.c
samples/bpf/offwaketime_kern.c
samples/bpf/offwaketime_user.c
samples/bpf/parse_ldabs.c
samples/bpf/parse_simple.c
samples/bpf/parse_varlen.c
samples/bpf/sampleip_kern.c
samples/bpf/sampleip_user.c
samples/bpf/sock_flags_kern.c
samples/bpf/sockex1_kern.c
samples/bpf/sockex1_user.c
samples/bpf/sockex2_kern.c
samples/bpf/sockex2_user.c
samples/bpf/sockex3_kern.c
samples/bpf/spintest_kern.c
samples/bpf/spintest_user.c
samples/bpf/syscall_tp_kern.c
samples/bpf/task_fd_query_kern.c
samples/bpf/task_fd_query_user.c
samples/bpf/tc_l2_redirect_kern.c
samples/bpf/tcbpf1_kern.c
samples/bpf/tcp_basertt_kern.c
samples/bpf/tcp_bufs_kern.c
samples/bpf/tcp_clamp_kern.c
samples/bpf/tcp_cong_kern.c
samples/bpf/tcp_dumpstats_kern.c
samples/bpf/tcp_iw_kern.c
samples/bpf/tcp_rwnd_kern.c
samples/bpf/tcp_synrto_kern.c
samples/bpf/tcp_tos_reflect_kern.c
samples/bpf/test_cgrp2_tc_kern.c
samples/bpf/test_current_task_under_cgroup_kern.c
samples/bpf/test_lwt_bpf.c
samples/bpf/test_map_in_map_kern.c
samples/bpf/test_overhead_kprobe_kern.c
samples/bpf/test_overhead_raw_tp_kern.c
samples/bpf/test_overhead_tp_kern.c
samples/bpf/test_probe_write_user_kern.c
samples/bpf/trace_event_kern.c
samples/bpf/trace_event_user.c
samples/bpf/trace_output_kern.c
samples/bpf/trace_output_user.c
samples/bpf/tracex1_kern.c
samples/bpf/tracex2_kern.c
samples/bpf/tracex3_kern.c
samples/bpf/tracex4_kern.c
samples/bpf/tracex5_kern.c
samples/bpf/tracex6_kern.c
samples/bpf/tracex7_kern.c
samples/bpf/xdp1_kern.c
samples/bpf/xdp1_user.c
samples/bpf/xdp2_kern.c
samples/bpf/xdp2skb_meta_kern.c
samples/bpf/xdp_adjust_tail_kern.c
samples/bpf/xdp_adjust_tail_user.c
samples/bpf/xdp_fwd_kern.c
samples/bpf/xdp_fwd_user.c
samples/bpf/xdp_monitor_kern.c
samples/bpf/xdp_redirect_cpu_kern.c
samples/bpf/xdp_redirect_cpu_user.c
samples/bpf/xdp_redirect_kern.c
samples/bpf/xdp_redirect_map_kern.c
samples/bpf/xdp_redirect_map_user.c
samples/bpf/xdp_redirect_user.c
samples/bpf/xdp_router_ipv4_kern.c
samples/bpf/xdp_router_ipv4_user.c
samples/bpf/xdp_rxq_info_kern.c
samples/bpf/xdp_rxq_info_user.c
samples/bpf/xdp_sample_pkts_kern.c
samples/bpf/xdp_sample_pkts_user.c
samples/bpf/xdp_tx_iptunnel_kern.c
samples/bpf/xdp_tx_iptunnel_user.c
samples/bpf/xdpsock_kern.c
samples/bpf/xdpsock_user.c
scripts/bpf_helpers_doc.py
scripts/link-vmlinux.sh
tools/bpf/Makefile
tools/bpf/bpftool/Documentation/bpftool-gen.rst
tools/bpf/bpftool/Makefile
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/btf_dumper.c
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/feature.c
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/jit_disasm.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/map.c
tools/bpf/bpftool/map_perf_ring.c
tools/bpf/bpftool/net.c
tools/bpf/bpftool/netlink_dumper.c
tools/bpf/bpftool/perf.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/xlated_dumper.c
tools/bpf/runqslower/.gitignore [new file with mode: 0644]
tools/bpf/runqslower/Makefile [new file with mode: 0644]
tools/bpf/runqslower/runqslower.bpf.c [new file with mode: 0644]
tools/bpf/runqslower/runqslower.c [new file with mode: 0644]
tools/bpf/runqslower/runqslower.h [new file with mode: 0644]
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/btf.h
tools/include/uapi/linux/if_link.h
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_prog_linfo.c
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/btf_dump.c
tools/lib/bpf/hashmap.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_errno.c
tools/lib/bpf/libbpf_probes.c
tools/lib/bpf/netlink.c
tools/lib/bpf/nlattr.c
tools/lib/bpf/str_error.c
tools/lib/bpf/xsk.c
tools/perf/examples/bpf/5sec.c
tools/perf/examples/bpf/empty.c
tools/perf/examples/bpf/sys_enter_openat.c
tools/perf/include/bpf/pid_filter.h
tools/perf/include/bpf/stdio.h
tools/perf/include/bpf/unistd.h
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_tcp_helpers.h [new file with mode: 0644]
tools/testing/selftests/bpf/bpf_trace_helpers.h
tools/testing/selftests/bpf/bpf_util.h
tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c [new file with mode: 0644]
tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
tools/testing/selftests/bpf/prog_tests/cpu_mask.c
tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
tools/testing/selftests/bpf/prog_tests/perf_buffer.c
tools/testing/selftests/bpf/prog_tests/send_signal.c
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
tools/testing/selftests/bpf/prog_tests/test_global_funcs.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/test_overhead.c
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_cubic.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_dctcp.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_flow.c
tools/testing/selftests/bpf/progs/connect4_prog.c
tools/testing/selftests/bpf/progs/connect6_prog.c
tools/testing/selftests/bpf/progs/dev_cgroup.c
tools/testing/selftests/bpf/progs/fentry_test.c
tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
tools/testing/selftests/bpf/progs/fexit_test.c
tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
tools/testing/selftests/bpf/progs/kfree_skb.c
tools/testing/selftests/bpf/progs/loop1.c
tools/testing/selftests/bpf/progs/loop2.c
tools/testing/selftests/bpf/progs/loop3.c
tools/testing/selftests/bpf/progs/loop4.c
tools/testing/selftests/bpf/progs/loop5.c
tools/testing/selftests/bpf/progs/netcnt_prog.c
tools/testing/selftests/bpf/progs/pyperf.h
tools/testing/selftests/bpf/progs/pyperf_global.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sample_map_ret0.c
tools/testing/selftests/bpf/progs/sendmsg4_prog.c
tools/testing/selftests/bpf/progs/sendmsg6_prog.c
tools/testing/selftests/bpf/progs/socket_cookie_prog.c
tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
tools/testing/selftests/bpf/progs/sockopt_inherit.c
tools/testing/selftests/bpf/progs/sockopt_multi.c
tools/testing/selftests/bpf/progs/sockopt_sk.c
tools/testing/selftests/bpf/progs/strobemeta.h
tools/testing/selftests/bpf/progs/tailcall1.c
tools/testing/selftests/bpf/progs/tailcall2.c
tools/testing/selftests/bpf/progs/tailcall3.c
tools/testing/selftests/bpf/progs/tailcall4.c
tools/testing/selftests/bpf/progs/tailcall5.c
tools/testing/selftests/bpf/progs/tcp_rtt.c
tools/testing/selftests/bpf/progs/test_adjust_tail.c
tools/testing/selftests/bpf/progs/test_attach_probe.c
tools/testing/selftests/bpf/progs/test_btf_haskv.c
tools/testing/selftests/bpf/progs/test_btf_newkv.c
tools/testing/selftests/bpf/progs/test_btf_nokv.c
tools/testing/selftests/bpf/progs/test_core_extern.c
tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c
tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
tools/testing/selftests/bpf/progs/test_core_reloc_size.c
tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
tools/testing/selftests/bpf/progs/test_global_data.c
tools/testing/selftests/bpf/progs/test_global_func1.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_global_func2.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_global_func3.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_global_func4.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_global_func5.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_global_func6.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_global_func7.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_l4lb.c
tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c
tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
tools/testing/selftests/bpf/progs/test_map_in_map.c
tools/testing/selftests/bpf/progs/test_map_lock.c
tools/testing/selftests/bpf/progs/test_mmap.c
tools/testing/selftests/bpf/progs/test_obj_id.c
tools/testing/selftests/bpf/progs/test_overhead.c
tools/testing/selftests/bpf/progs/test_perf_buffer.c
tools/testing/selftests/bpf/progs/test_pinning.c
tools/testing/selftests/bpf/progs/test_pinning_invalid.c
tools/testing/selftests/bpf/progs/test_pkt_access.c
tools/testing/selftests/bpf/progs/test_pkt_md_access.c
tools/testing/selftests/bpf/progs/test_probe_user.c
tools/testing/selftests/bpf/progs/test_queue_stack_map.h
tools/testing/selftests/bpf/progs/test_rdonly_maps.c
tools/testing/selftests/bpf/progs/test_seg6_loop.c
tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
tools/testing/selftests/bpf/progs/test_send_signal_kern.c
tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
tools/testing/selftests/bpf/progs/test_skb_ctx.c
tools/testing/selftests/bpf/progs/test_skeleton.c
tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
tools/testing/selftests/bpf/progs/test_spin_lock.c
tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
tools/testing/selftests/bpf/progs/test_stacktrace_map.c
tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
tools/testing/selftests/bpf/progs/test_sysctl_prog.c
tools/testing/selftests/bpf/progs/test_tc_edt.c
tools/testing/selftests/bpf/progs/test_tc_tunnel.c
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
tools/testing/selftests/bpf/progs/test_tcp_estats.c
tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
tools/testing/selftests/bpf/progs/test_tracepoint.c
tools/testing/selftests/bpf/progs/test_tunnel_kern.c
tools/testing/selftests/bpf/progs/test_verif_scale1.c
tools/testing/selftests/bpf/progs/test_verif_scale2.c
tools/testing/selftests/bpf/progs/test_verif_scale3.c
tools/testing/selftests/bpf/progs/test_xdp.c
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_loop.c
tools/testing/selftests/bpf/progs/test_xdp_meta.c
tools/testing/selftests/bpf/progs/test_xdp_noinline.c
tools/testing/selftests/bpf/progs/test_xdp_redirect.c
tools/testing/selftests/bpf/progs/test_xdp_vlan.c
tools/testing/selftests/bpf/progs/xdp_dummy.c
tools/testing/selftests/bpf/progs/xdp_redirect_map.c
tools/testing/selftests/bpf/progs/xdp_tx.c
tools/testing/selftests/bpf/progs/xdping_kern.c
tools/testing/selftests/bpf/test_btf.c
tools/testing/selftests/bpf/test_cpp.cpp
tools/testing/selftests/bpf/test_hashmap.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_progs.h
tools/testing/selftests/bpf/test_sock.c
tools/testing/selftests/bpf/test_sockmap_kern.h
tools/testing/selftests/bpf/test_sysctl.c
tools/testing/selftests/bpf/trace_helpers.h

index 4c8a2d1f84700dc97ac24efc260155d89487a963..9ba08e9abc0944b361a749d95e0d8e38d275180c 100644 (file)
@@ -1328,7 +1328,7 @@ emit_jmp:
        return proglen;
 }
 
-static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
                      int stack_size)
 {
        int i;
@@ -1344,7 +1344,7 @@ static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
                         -(stack_size - i * 8));
 }
 
-static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
                         int stack_size)
 {
        int i;
@@ -1361,7 +1361,7 @@ static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
                         -(stack_size - i * 8));
 }
 
-static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
                      struct bpf_prog **progs, int prog_cnt, int stack_size)
 {
        u8 *prog = *pprog;
@@ -1456,7 +1456,8 @@ static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
  * add rsp, 8                      // skip eth_type_trans's frame
  * ret                             // return to its caller
  */
-int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+int arch_prepare_bpf_trampoline(void *image, void *image_end,
+                               const struct btf_func_model *m, u32 flags,
                                struct bpf_prog **fentry_progs, int fentry_cnt,
                                struct bpf_prog **fexit_progs, int fexit_cnt,
                                void *orig_call)
@@ -1523,13 +1524,10 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
                /* skip our return address and return to parent */
                EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
        EMIT1(0xC3); /* ret */
-       /* One half of the page has active running trampoline.
-        * Another half is an area for next trampoline.
-        * Make sure the trampoline generation logic doesn't overflow.
-        */
-       if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+       /* Make sure the trampoline generation logic doesn't overflow */
+       if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY))
                return -EFAULT;
-       return 0;
+       return prog - (u8 *)image;
 }
 
 static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
index 683d371e6e82063bec7ade102a3b40aa38b1b6af..3a5a6c655ddae7b8dffd83658332c01b60422f3a 100644 (file)
@@ -1718,7 +1718,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
                if (err < 0)
                        goto err_xdp;
                if (err == XDP_REDIRECT)
-                       xdp_do_flush_map();
+                       xdp_do_flush();
                if (err != XDP_PASS)
                        goto out;
 
@@ -2549,7 +2549,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
                }
 
                if (flush)
-                       xdp_do_flush_map();
+                       xdp_do_flush();
 
                rcu_read_unlock();
                local_bh_enable();
index a552df37a347c72fed84909c45188e5dd1201df7..1c89017beebbab5456307cc108ecc7e5e63c5082 100644 (file)
@@ -769,7 +769,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
        if (xdp_xmit & VETH_XDP_TX)
                veth_xdp_flush(rq->dev, &bq);
        if (xdp_xmit & VETH_XDP_REDIR)
-               xdp_do_flush_map();
+               xdp_do_flush();
        xdp_clear_return_frame_no_direct();
 
        return done;
index 4d7d5434cc5ded2eb7ed18ebf96af7cc9b4cb657..c458cd313281e0afc8437b8dd88c75b23e0747f6 100644 (file)
@@ -1432,7 +1432,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
                virtqueue_napi_complete(napi, rq->vq, received);
 
        if (xdp_xmit & VIRTIO_XDP_REDIR)
-               xdp_do_flush_map();
+               xdp_do_flush();
 
        if (xdp_xmit & VIRTIO_XDP_TX) {
                sq = virtnet_xdp_sq(vi);
index b14e51d56a82e1225807b2808b2dbdd52aedeb1c..a9687861fd7e1de6a7ac76a6d18907cb766f9403 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/mutex.h>
+#include <linux/module.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -43,6 +44,15 @@ struct bpf_map_ops {
        int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
        void (*map_release_uref)(struct bpf_map *map);
        void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
+       int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
+                               union bpf_attr __user *uattr);
+       int (*map_lookup_and_delete_batch)(struct bpf_map *map,
+                                          const union bpf_attr *attr,
+                                          union bpf_attr __user *uattr);
+       int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
+                               union bpf_attr __user *uattr);
+       int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
+                               union bpf_attr __user *uattr);
 
        /* funcs callable from userspace and from eBPF programs */
        void *(*map_lookup_elem)(struct bpf_map *map, void *key);
@@ -106,6 +116,7 @@ struct bpf_map {
        struct btf *btf;
        struct bpf_map_memory memory;
        char name[BPF_OBJ_NAME_LEN];
+       u32 btf_vmlinux_value_type_id;
        bool unpriv_array;
        bool frozen; /* write-once; write-protected by freeze_mutex */
        /* 22 bytes hole */
@@ -183,7 +194,8 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
 
 static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 {
-       return map->btf && map->ops->map_seq_show_elem;
+       return (map->btf_value_type_id || map->btf_vmlinux_value_type_id) &&
+               map->ops->map_seq_show_elem;
 }
 
 int map_check_no_btf(const struct bpf_map *map,
@@ -349,6 +361,10 @@ struct bpf_verifier_ops {
                                  const struct bpf_insn *src,
                                  struct bpf_insn *dst,
                                  struct bpf_prog *prog, u32 *target_size);
+       int (*btf_struct_access)(struct bpf_verifier_log *log,
+                                const struct btf_type *t, int off, int size,
+                                enum bpf_access_type atype,
+                                u32 *next_btf_id);
 };
 
 struct bpf_prog_offload_ops {
@@ -437,7 +453,8 @@ struct btf_func_model {
  *      fentry = a set of program to run before calling original function
  *      fexit = a set of program to run after original function
  */
-int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+int arch_prepare_bpf_trampoline(void *image, void *image_end,
+                               const struct btf_func_model *m, u32 flags,
                                struct bpf_prog **fentry_progs, int fentry_cnt,
                                struct bpf_prog **fexit_progs, int fexit_cnt,
                                void *orig_call);
@@ -448,7 +465,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 enum bpf_tramp_prog_type {
        BPF_TRAMP_FENTRY,
        BPF_TRAMP_FEXIT,
-       BPF_TRAMP_MAX
+       BPF_TRAMP_MAX,
+       BPF_TRAMP_REPLACE, /* more than MAX */
 };
 
 struct bpf_trampoline {
@@ -463,6 +481,11 @@ struct bpf_trampoline {
                void *addr;
                bool ftrace_managed;
        } func;
+       /* if !NULL this is BPF_PROG_TYPE_EXT program that extends another BPF
+        * program by replacing one of its functions. func.addr is the address
+        * of the function it replaced.
+        */
+       struct bpf_prog *extension_prog;
        /* list of BPF programs using this trampoline */
        struct hlist_head progs_hlist[BPF_TRAMP_MAX];
        /* Number of attached programs. A counter per kind. */
@@ -558,6 +581,7 @@ static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
 #endif
 
 struct bpf_func_info_aux {
+       u16 linkage;
        bool unreliable;
 };
 
@@ -668,6 +692,73 @@ struct bpf_array_aux {
        struct work_struct work;
 };
 
+struct bpf_struct_ops_value;
+struct btf_type;
+struct btf_member;
+
+#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
+struct bpf_struct_ops {
+       const struct bpf_verifier_ops *verifier_ops;
+       int (*init)(struct btf *btf);
+       int (*check_member)(const struct btf_type *t,
+                           const struct btf_member *member);
+       int (*init_member)(const struct btf_type *t,
+                          const struct btf_member *member,
+                          void *kdata, const void *udata);
+       int (*reg)(void *kdata);
+       void (*unreg)(void *kdata);
+       const struct btf_type *type;
+       const struct btf_type *value_type;
+       const char *name;
+       struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
+       u32 type_id;
+       u32 value_id;
+};
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
+const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
+void bpf_struct_ops_init(struct btf *btf);
+bool bpf_struct_ops_get(const void *kdata);
+void bpf_struct_ops_put(const void *kdata);
+int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
+                                      void *value);
+static inline bool bpf_try_module_get(const void *data, struct module *owner)
+{
+       if (owner == BPF_MODULE_OWNER)
+               return bpf_struct_ops_get(data);
+       else
+               return try_module_get(owner);
+}
+static inline void bpf_module_put(const void *data, struct module *owner)
+{
+       if (owner == BPF_MODULE_OWNER)
+               bpf_struct_ops_put(data);
+       else
+               module_put(owner);
+}
+#else
+static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
+{
+       return NULL;
+}
+static inline void bpf_struct_ops_init(struct btf *btf) { }
+static inline bool bpf_try_module_get(const void *data, struct module *owner)
+{
+       return try_module_get(owner);
+}
+static inline void bpf_module_put(const void *data, struct module *owner)
+{
+       module_put(owner);
+}
+static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
+                                                    void *key,
+                                                    void *value)
+{
+       return -EINVAL;
+}
+#endif
+
 struct bpf_array {
        struct bpf_map map;
        u32 elem_size;
@@ -906,6 +997,15 @@ void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
+int  generic_map_lookup_batch(struct bpf_map *map,
+                             const union bpf_attr *attr,
+                             union bpf_attr __user *uattr);
+int  generic_map_update_batch(struct bpf_map *map,
+                             const union bpf_attr *attr,
+                             union bpf_attr __user *uattr);
+int  generic_map_delete_batch(struct bpf_map *map,
+                             const union bpf_attr *attr,
+                             union bpf_attr __user *uattr);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
@@ -962,7 +1062,9 @@ struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
-void __dev_map_flush(void);
+void __dev_flush(void);
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+                   struct net_device *dev_rx);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
                    struct net_device *dev_rx);
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
@@ -1006,7 +1108,13 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
                           const char *func_name,
                           struct btf_func_model *m);
 
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog);
+struct bpf_reg_state;
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
+                            struct bpf_reg_state *regs);
+int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
+                         struct bpf_reg_state *reg);
+int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
+                        struct btf *btf, const struct btf_type *t);
 
 struct bpf_prog *bpf_prog_by_id(u32 id);
 
@@ -1071,13 +1179,20 @@ static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map
        return NULL;
 }
 
-static inline void __dev_map_flush(void)
+static inline void __dev_flush(void)
 {
 }
 
 struct xdp_buff;
 struct bpf_dtab_netdev;
 
+static inline
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
+{
+       return 0;
+}
+
 static inline
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
                    struct net_device *dev_rx)
@@ -1299,6 +1414,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
 extern const struct bpf_func_proto bpf_strtol_proto;
 extern const struct bpf_func_proto bpf_strtoul_proto;
 extern const struct bpf_func_proto bpf_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_jiffies64_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
index 93740b3614d77f54cfbc7f398d917ace264abe2d..c81d4ece79a499d92a2b38276943f10c91821327 100644 (file)
@@ -65,6 +65,12 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport,
              struct sk_reuseport_md, struct sk_reuseport_kern)
 #endif
+#if defined(CONFIG_BPF_JIT)
+BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,
+             void *, void *)
+BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension,
+             void *, void *)
+#endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
@@ -105,3 +111,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
+#if defined(CONFIG_BPF_JIT)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
+#endif
index 26e40de9ef55f881a2282291670e810bff8318be..5406e6e96585ccd89acb98f9cc3efb3359d09f60 100644 (file)
@@ -304,11 +304,13 @@ struct bpf_insn_aux_data {
        u64 map_key_state; /* constant (32 bit) key tracking for maps */
        int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
        int sanitize_stack_off; /* stack slot to be cleared */
-       bool seen; /* this insn was processed by the verifier */
+       u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
        bool zext_dst; /* this insn zero extends dst reg */
        u8 alu_state; /* used in combination with alu_limit */
-       bool prune_point;
+
+       /* below fields are initialized once */
        unsigned int orig_idx; /* original instruction index */
+       bool prune_point;
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -379,6 +381,7 @@ struct bpf_verifier_env {
                int *insn_stack;
                int cur_stack;
        } cfg;
+       u32 pass_cnt; /* number of times do_check() was called */
        u32 subprog_cnt;
        /* number of instructions analyzed by the verifier */
        u32 prev_insn_processed, insn_processed;
@@ -428,4 +431,7 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
 void
 bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 
+int check_ctx_reg(struct bpf_verifier_env *env,
+                 const struct bpf_reg_state *reg, int regno);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
index 79d4abc2556a1959fe719d1a5af33b4378109862..5c1ea99b480fa7164766454be9871b7d247d0d40 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/types.h>
 #include <uapi/linux/btf.h>
 
+#define BTF_TYPE_EMIT(type) ((void)(type *)0)
+
 struct btf;
 struct btf_member;
 struct btf_type;
@@ -53,6 +55,22 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
                           u32 expected_offset, u32 expected_size);
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
+s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
+const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
+                                              u32 id, u32 *res_id);
+const struct btf_type *btf_type_resolve_ptr(const struct btf *btf,
+                                           u32 id, u32 *res_id);
+const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
+                                                u32 id, u32 *res_id);
+const struct btf_type *
+btf_resolve_size(const struct btf *btf, const struct btf_type *type,
+                u32 *type_size, const struct btf_type **elem_type,
+                u32 *total_nelems);
+
+#define for_each_member(i, struct_type, member)                        \
+       for (i = 0, member = btf_type_member(struct_type);      \
+            i < btf_type_vlen(struct_type);                    \
+            i++, member++)
 
 static inline bool btf_type_is_ptr(const struct btf_type *t)
 {
@@ -84,6 +102,40 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
        return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
 }
 
+static inline u16 btf_type_vlen(const struct btf_type *t)
+{
+       return BTF_INFO_VLEN(t->info);
+}
+
+static inline u16 btf_func_linkage(const struct btf_type *t)
+{
+       return BTF_INFO_VLEN(t->info);
+}
+
+static inline bool btf_type_kflag(const struct btf_type *t)
+{
+       return BTF_INFO_KFLAG(t->info);
+}
+
+static inline u32 btf_member_bit_offset(const struct btf_type *struct_type,
+                                       const struct btf_member *member)
+{
+       return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset)
+                                          : member->offset;
+}
+
+static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type,
+                                          const struct btf_member *member)
+{
+       return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset)
+                                          : 0;
+}
+
+static inline const struct btf_member *btf_type_member(const struct btf_type *t)
+{
+       return (const struct btf_member *)(t + 1);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
index 70e6dd960bcafc54ce44b194f9041f0ae4e1a37d..f349e2c0884c4cace4337b1ff730ff5002cc942e 100644 (file)
@@ -843,6 +843,8 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
                              bpf_aux_classic_check_t trans, bool save_orig);
 void bpf_prog_destroy(struct bpf_prog *fp);
+const struct bpf_func_proto *
+bpf_base_func_proto(enum bpf_func_id func_id);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
@@ -916,7 +918,7 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
        return 0;
 }
 
-/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
+/* The pair of xdp_do_redirect and xdp_do_flush MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
  * because we only track one map and force a flush when the map changes.
@@ -927,7 +929,13 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 int xdp_do_redirect(struct net_device *dev,
                    struct xdp_buff *xdp,
                    struct bpf_prog *prog);
-void xdp_do_flush_map(void);
+void xdp_do_flush(void);
+
+/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
+ * it is no longer only flushing maps. Keep this define for compatibility
+ * until all drivers are updated - do not use xdp_do_flush_map() in new code!
+ */
+#define xdp_do_flush_map xdp_do_flush
 
 void bpf_warn_invalid_xdp_action(u32 act);
 
index 2741aa35bec6cc53441e4bf8ea2fa59606cda84a..5ec3537fbdb17fafcbac65cc925042f316c23136 100644 (file)
@@ -876,6 +876,7 @@ enum bpf_netdev_command {
 struct bpf_prog_offload_ops;
 struct netlink_ext_ack;
 struct xdp_umem;
+struct xdp_dev_bulk_queue;
 
 struct netdev_bpf {
        enum bpf_netdev_command command;
@@ -1986,12 +1987,10 @@ struct net_device {
        unsigned int            num_tx_queues;
        unsigned int            real_num_tx_queues;
        struct Qdisc            *qdisc;
-#ifdef CONFIG_NET_SCHED
-       DECLARE_HASHTABLE       (qdisc_hash, 4);
-#endif
        unsigned int            tx_queue_len;
        spinlock_t              tx_global_lock;
-       int                     watchdog_timeo;
+
+       struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
 
 #ifdef CONFIG_XPS
        struct xps_dev_maps __rcu *xps_cpus_map;
@@ -2001,11 +2000,15 @@ struct net_device {
        struct mini_Qdisc __rcu *miniq_egress;
 #endif
 
+#ifdef CONFIG_NET_SCHED
+       DECLARE_HASHTABLE       (qdisc_hash, 4);
+#endif
        /* These may be needed for future network-power-down code. */
        struct timer_list       watchdog_timer;
+       int                     watchdog_timeo;
 
-       int __percpu            *pcpu_refcnt;
        struct list_head        todo_list;
+       int __percpu            *pcpu_refcnt;
 
        struct list_head        link_watch_list;
 
index 432ff73d20f391459c0786988350932b6e0a74fb..02162b0378f73f9221aec78e7adedd7124ef652b 100644 (file)
@@ -2597,4 +2597,6 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
        return false;
 }
 
+void sock_def_readable(struct sock *sk);
+
 #endif /* _SOCK_H */
index 2869d28ed5d8401f182171629821f8dfd378f5e0..a5ea27df3c2b25ad5e159d28a968ce94d3036ff7 100644 (file)
@@ -1019,6 +1019,7 @@ enum tcp_ca_ack_event_flags {
 #define TCP_CONG_NON_RESTRICTED 0x1
 /* Requires ECN/ECT set on all packets */
 #define TCP_CONG_NEEDS_ECN     0x2
+#define TCP_CONG_MASK  (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
 
 union tcp_cc_info;
 
@@ -1113,6 +1114,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
+struct tcp_congestion_ops *tcp_ca_find(const char *name);
 struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
 u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
 #ifdef CONFIG_INET
index a7378bcd99281c21d069e0108774b66b8c57693e..b95d65e8c62847050900b7280c16b33b445c865f 100644 (file)
@@ -79,14 +79,26 @@ TRACE_EVENT(xdp_bulk_tx,
                  __entry->sent, __entry->drops, __entry->err)
 );
 
+#ifndef __DEVMAP_OBJ_TYPE
+#define __DEVMAP_OBJ_TYPE
+struct _bpf_dtab_netdev {
+       struct net_device *dev;
+};
+#endif /* __DEVMAP_OBJ_TYPE */
+
+#define devmap_ifindex(tgt, map)                               \
+       (((map->map_type == BPF_MAP_TYPE_DEVMAP ||      \
+                 map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
+         ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
+
 DECLARE_EVENT_CLASS(xdp_redirect_template,
 
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp,
-                int to_ifindex, int err,
-                const struct bpf_map *map, u32 map_index),
+                const void *tgt, int err,
+                const struct bpf_map *map, u32 index),
 
-       TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+       TP_ARGS(dev, xdp, tgt, err, map, index),
 
        TP_STRUCT__entry(
                __field(int, prog_id)
@@ -103,90 +115,65 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
                __entry->act            = XDP_REDIRECT;
                __entry->ifindex        = dev->ifindex;
                __entry->err            = err;
-               __entry->to_ifindex     = to_ifindex;
+               __entry->to_ifindex     = map ? devmap_ifindex(tgt, map) :
+                                               index;
                __entry->map_id         = map ? map->id : 0;
-               __entry->map_index      = map_index;
+               __entry->map_index      = map ? index : 0;
        ),
 
-       TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d",
+       TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
+                 " map_id=%d map_index=%d",
                  __entry->prog_id,
                  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
                  __entry->ifindex, __entry->to_ifindex,
-                 __entry->err)
+                 __entry->err, __entry->map_id, __entry->map_index)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp,
-                int to_ifindex, int err,
-                const struct bpf_map *map, u32 map_index),
-       TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
+                const void *tgt, int err,
+                const struct bpf_map *map, u32 index),
+       TP_ARGS(dev, xdp, tgt, err, map, index)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp,
-                int to_ifindex, int err,
-                const struct bpf_map *map, u32 map_index),
-       TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
+                const void *tgt, int err,
+                const struct bpf_map *map, u32 index),
+       TP_ARGS(dev, xdp, tgt, err, map, index)
 );
 
 #define _trace_xdp_redirect(dev, xdp, to)              \
-        trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);
+        trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to);
 
 #define _trace_xdp_redirect_err(dev, xdp, to, err)     \
-        trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);
+        trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to);
 
-DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map,
+#define _trace_xdp_redirect_map(dev, xdp, to, map, index)              \
+        trace_xdp_redirect(dev, xdp, to, 0, map, index);
+
+#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err)     \
+        trace_xdp_redirect_err(dev, xdp, to, err, map, index);
+
+/* not used anymore, but kept around so as not to break old programs */
+DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp,
-                int to_ifindex, int err,
-                const struct bpf_map *map, u32 map_index),
-       TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
-       TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
-                 " map_id=%d map_index=%d",
-                 __entry->prog_id,
-                 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
-                 __entry->ifindex, __entry->to_ifindex,
-                 __entry->err,
-                 __entry->map_id, __entry->map_index)
+                const void *tgt, int err,
+                const struct bpf_map *map, u32 index),
+       TP_ARGS(dev, xdp, tgt, err, map, index)
 );
 
-DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
+DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp,
-                int to_ifindex, int err,
-                const struct bpf_map *map, u32 map_index),
-       TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
-       TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
-                 " map_id=%d map_index=%d",
-                 __entry->prog_id,
-                 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
-                 __entry->ifindex, __entry->to_ifindex,
-                 __entry->err,
-                 __entry->map_id, __entry->map_index)
+                const void *tgt, int err,
+                const struct bpf_map *map, u32 index),
+       TP_ARGS(dev, xdp, tgt, err, map, index)
 );
 
-#ifndef __DEVMAP_OBJ_TYPE
-#define __DEVMAP_OBJ_TYPE
-struct _bpf_dtab_netdev {
-       struct net_device *dev;
-};
-#endif /* __DEVMAP_OBJ_TYPE */
-
-#define devmap_ifindex(fwd, map)                               \
-       ((map->map_type == BPF_MAP_TYPE_DEVMAP ||               \
-         map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ?          \
-         ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
-
-#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)               \
-        trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),     \
-                               0, map, idx)
-
-#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)      \
-        trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \
-                                   err, map, idx)
-
 TRACE_EVENT(xdp_cpumap_kthread,
 
        TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
@@ -259,43 +246,38 @@ TRACE_EVENT(xdp_cpumap_enqueue,
 
 TRACE_EVENT(xdp_devmap_xmit,
 
-       TP_PROTO(const struct bpf_map *map, u32 map_index,
-                int sent, int drops,
-                const struct net_device *from_dev,
-                const struct net_device *to_dev, int err),
+       TP_PROTO(const struct net_device *from_dev,
+                const struct net_device *to_dev,
+                int sent, int drops, int err),
 
-       TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
+       TP_ARGS(from_dev, to_dev, sent, drops, err),
 
        TP_STRUCT__entry(
-               __field(int, map_id)
+               __field(int, from_ifindex)
                __field(u32, act)
-               __field(u32, map_index)
+               __field(int, to_ifindex)
                __field(int, drops)
                __field(int, sent)
-               __field(int, from_ifindex)
-               __field(int, to_ifindex)
                __field(int, err)
        ),
 
        TP_fast_assign(
-               __entry->map_id         = map->id;
+               __entry->from_ifindex   = from_dev->ifindex;
                __entry->act            = XDP_REDIRECT;
-               __entry->map_index      = map_index;
+               __entry->to_ifindex     = to_dev->ifindex;
                __entry->drops          = drops;
                __entry->sent           = sent;
-               __entry->from_ifindex   = from_dev->ifindex;
-               __entry->to_ifindex     = to_dev->ifindex;
                __entry->err            = err;
        ),
 
        TP_printk("ndo_xdp_xmit"
-                 " map_id=%d map_index=%d action=%s"
+                 " from_ifindex=%d to_ifindex=%d action=%s"
                  " sent=%d drops=%d"
-                 " from_ifindex=%d to_ifindex=%d err=%d",
-                 __entry->map_id, __entry->map_index,
+                 " err=%d",
+                 __entry->from_ifindex, __entry->to_ifindex,
                  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
                  __entry->sent, __entry->drops,
-                 __entry->from_ifindex, __entry->to_ifindex, __entry->err)
+                 __entry->err)
 );
 
 /* Expect users already include <net/xdp.h>, but not xdp_priv.h */
index 7df436da542d76ddd73eb9a83851071e6e2f4fe3..f1d74a2bd23493635afcbd6a7336c2fd2806f471 100644 (file)
@@ -107,6 +107,10 @@ enum bpf_cmd {
        BPF_MAP_LOOKUP_AND_DELETE_ELEM,
        BPF_MAP_FREEZE,
        BPF_BTF_GET_NEXT_ID,
+       BPF_MAP_LOOKUP_BATCH,
+       BPF_MAP_LOOKUP_AND_DELETE_BATCH,
+       BPF_MAP_UPDATE_BATCH,
+       BPF_MAP_DELETE_BATCH,
 };
 
 enum bpf_map_type {
@@ -136,6 +140,7 @@ enum bpf_map_type {
        BPF_MAP_TYPE_STACK,
        BPF_MAP_TYPE_SK_STORAGE,
        BPF_MAP_TYPE_DEVMAP_HASH,
+       BPF_MAP_TYPE_STRUCT_OPS,
 };
 
 /* Note that tracing related programs such as
@@ -174,6 +179,8 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
        BPF_PROG_TYPE_CGROUP_SOCKOPT,
        BPF_PROG_TYPE_TRACING,
+       BPF_PROG_TYPE_STRUCT_OPS,
+       BPF_PROG_TYPE_EXT,
 };
 
 enum bpf_attach_type {
@@ -357,7 +364,12 @@ enum bpf_attach_type {
 /* Enable memory-mapping BPF map */
 #define BPF_F_MMAPABLE         (1U << 10)
 
-/* flags for BPF_PROG_QUERY */
+/* Flags for BPF_PROG_QUERY. */
+
+/* Query effective (directly attached + inherited from ancestor cgroups)
+ * programs that will be executed for events within a cgroup.
+ * attach_flags with this flag are returned only for directly attached programs.
+ */
 #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
 
 enum bpf_stack_build_id_status {
@@ -397,6 +409,10 @@ union bpf_attr {
                __u32   btf_fd;         /* fd pointing to a BTF type data */
                __u32   btf_key_type_id;        /* BTF type_id of the key */
                __u32   btf_value_type_id;      /* BTF type_id of the value */
+               __u32   btf_vmlinux_value_type_id;/* BTF type_id of a kernel-
+                                                  * struct stored as the
+                                                  * map value
+                                                  */
        };
 
        struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -409,6 +425,23 @@ union bpf_attr {
                __u64           flags;
        };
 
+       struct { /* struct used by BPF_MAP_*_BATCH commands */
+               __aligned_u64   in_batch;       /* start batch,
+                                                * NULL to start from beginning
+                                                */
+               __aligned_u64   out_batch;      /* output: next start batch */
+               __aligned_u64   keys;
+               __aligned_u64   values;
+               __u32           count;          /* input/output:
+                                                * input: # of key/value
+                                                * elements
+                                                * output: # of filled elements
+                                                */
+               __u32           map_fd;
+               __u64           elem_flags;
+               __u64           flags;
+       } batch;
+
        struct { /* anonymous struct used by BPF_PROG_LOAD command */
                __u32           prog_type;      /* one of enum bpf_prog_type */
                __u32           insn_cnt;
@@ -2703,7 +2736,8 @@ union bpf_attr {
  *
  * int bpf_send_signal(u32 sig)
  *     Description
- *             Send signal *sig* to the current task.
+ *             Send signal *sig* to the process of the current task.
+ *             The signal may be delivered to any of this process's threads.
  *     Return
  *             0 on success or successfully queued.
  *
@@ -2831,6 +2865,33 @@ union bpf_attr {
  *     Return
  *             On success, the strictly positive length of the string, including
  *             the trailing NUL character. On error, a negative value.
+ *
+ * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ *     Description
+ *             Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *             *rcv_nxt* is the ack_seq to be sent out.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_send_signal_thread(u32 sig)
+ *     Description
+ *             Send signal *sig* to the thread corresponding to the current task.
+ *     Return
+ *             0 on success or successfully queued.
+ *
+ *             **-EBUSY** if work queue under nmi is full.
+ *
+ *             **-EINVAL** if *sig* is invalid.
+ *
+ *             **-EPERM** if no permission to send the *sig*.
+ *
+ *             **-EAGAIN** if bpf program can try again.
+ *
+ * u64 bpf_jiffies64(void)
+ *     Description
+ *             Obtain the 64bit jiffies
+ *     Return
+ *             The 64 bit jiffies
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2948,7 +3009,10 @@ union bpf_attr {
        FN(probe_read_user),            \
        FN(probe_read_kernel),          \
        FN(probe_read_user_str),        \
-       FN(probe_read_kernel_str),
+       FN(probe_read_kernel_str),      \
+       FN(tcp_send_ack),               \
+       FN(send_signal_thread),         \
+       FN(jiffies64),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3349,7 +3413,7 @@ struct bpf_map_info {
        __u32 map_flags;
        char  name[BPF_OBJ_NAME_LEN];
        __u32 ifindex;
-       __u32 :32;
+       __u32 btf_vmlinux_value_type_id;
        __u64 netns_dev;
        __u64 netns_ino;
        __u32 btf_id;
index 1a2898c482eef3cbb88bd760ff3e6caa8d9816b0..5a667107ad2cce1980e01d3c7bd6273de8439990 100644 (file)
@@ -146,6 +146,12 @@ enum {
        BTF_VAR_GLOBAL_EXTERN = 2,
 };
 
+enum btf_func_linkage {
+       BTF_FUNC_STATIC = 0,
+       BTF_FUNC_GLOBAL = 1,
+       BTF_FUNC_EXTERN = 2,
+};
+
 /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
  * additional information related to the variable such as its linkage.
  */
index d4f330351f87d67099e18dac673dfc97bf9d7847..046ce5d980331cbc4407e82b95fa25ddf45c7e9b 100644 (file)
@@ -27,3 +27,6 @@ endif
 ifeq ($(CONFIG_SYSFS),y)
 obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
 endif
+ifeq ($(CONFIG_BPF_JIT),y)
+obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
+endif
index f0d19bbb9211e108498a74f20cb1709d0da0f8f1..95d77770353c9bb5af0311f5813471e11d70c86e 100644 (file)
@@ -503,6 +503,8 @@ const struct bpf_map_ops array_map_ops = {
        .map_mmap = array_map_mmap,
        .map_seq_show_elem = array_map_seq_show_elem,
        .map_check_btf = array_map_check_btf,
+       .map_lookup_batch = generic_map_lookup_batch,
+       .map_update_batch = generic_map_update_batch,
 };
 
 const struct bpf_map_ops percpu_array_map_ops = {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
new file mode 100644 (file)
index 0000000..8ad1c9e
--- /dev/null
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/slab.h>
+#include <linux/numa.h>
+#include <linux/seq_file.h>
+#include <linux/refcount.h>
+#include <linux/mutex.h>
+
+enum bpf_struct_ops_state {
+       BPF_STRUCT_OPS_STATE_INIT,
+       BPF_STRUCT_OPS_STATE_INUSE,
+       BPF_STRUCT_OPS_STATE_TOBEFREE,
+};
+
+#define BPF_STRUCT_OPS_COMMON_VALUE                    \
+       refcount_t refcnt;                              \
+       enum bpf_struct_ops_state state
+
+struct bpf_struct_ops_value {
+       BPF_STRUCT_OPS_COMMON_VALUE;
+       char data[0] ____cacheline_aligned_in_smp;
+};
+
+struct bpf_struct_ops_map {
+       struct bpf_map map;
+       const struct bpf_struct_ops *st_ops;
+       /* protect map_update */
+       struct mutex lock;
+       /* progs has all the bpf_prog that is populated
+        * to the func ptr of the kernel's struct
+        * (in kvalue.data).
+        */
+       struct bpf_prog **progs;
+       /* image is a page that has all the trampolines
+        * that stores the func args before calling the bpf_prog.
+        * A PAGE_SIZE "image" is enough to store all trampoline for
+        * "progs[]".
+        */
+       void *image;
+       /* uvalue->data stores the kernel struct
+        * (e.g. tcp_congestion_ops) that is more useful
+        * to userspace than the kvalue.  For example,
+        * the bpf_prog's id is stored instead of the kernel
+        * address of a func ptr.
+        */
+       struct bpf_struct_ops_value *uvalue;
+       /* kvalue.data stores the actual kernel's struct
+        * (e.g. tcp_congestion_ops) that will be
+        * registered to the kernel subsystem.
+        */
+       struct bpf_struct_ops_value kvalue;
+};
+
+#define VALUE_PREFIX "bpf_struct_ops_"
+#define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
+
+/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
+ * the map's value exposed to the userspace and its btf-type-id is
+ * stored at the map->btf_vmlinux_value_type_id.
+ *
+ */
+#define BPF_STRUCT_OPS_TYPE(_name)                             \
+extern struct bpf_struct_ops bpf_##_name;                      \
+                                                               \
+struct bpf_struct_ops_##_name {                                                \
+       BPF_STRUCT_OPS_COMMON_VALUE;                            \
+       struct _name data ____cacheline_aligned_in_smp;         \
+};
+#include "bpf_struct_ops_types.h"
+#undef BPF_STRUCT_OPS_TYPE
+
+enum {
+#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name,
+#include "bpf_struct_ops_types.h"
+#undef BPF_STRUCT_OPS_TYPE
+       __NR_BPF_STRUCT_OPS_TYPE,
+};
+
+static struct bpf_struct_ops * const bpf_struct_ops[] = {
+#define BPF_STRUCT_OPS_TYPE(_name)                             \
+       [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name,
+#include "bpf_struct_ops_types.h"
+#undef BPF_STRUCT_OPS_TYPE
+};
+
+const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
+};
+
+const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
+};
+
+static const struct btf_type *module_type;
+
+void bpf_struct_ops_init(struct btf *btf)
+{
+       s32 type_id, value_id, module_id;
+       const struct btf_member *member;
+       struct bpf_struct_ops *st_ops;
+       struct bpf_verifier_log log = {};
+       const struct btf_type *t;
+       char value_name[128];
+       const char *mname;
+       u32 i, j;
+
+       /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */
+#define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name);
+#include "bpf_struct_ops_types.h"
+#undef BPF_STRUCT_OPS_TYPE
+
+       module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT);
+       if (module_id < 0) {
+               pr_warn("Cannot find struct module in btf_vmlinux\n");
+               return;
+       }
+       module_type = btf_type_by_id(btf, module_id);
+
+       for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
+               st_ops = bpf_struct_ops[i];
+
+               if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
+                   sizeof(value_name)) {
+                       pr_warn("struct_ops name %s is too long\n",
+                               st_ops->name);
+                       continue;
+               }
+               sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
+
+               value_id = btf_find_by_name_kind(btf, value_name,
+                                                BTF_KIND_STRUCT);
+               if (value_id < 0) {
+                       pr_warn("Cannot find struct %s in btf_vmlinux\n",
+                               value_name);
+                       continue;
+               }
+
+               type_id = btf_find_by_name_kind(btf, st_ops->name,
+                                               BTF_KIND_STRUCT);
+               if (type_id < 0) {
+                       pr_warn("Cannot find struct %s in btf_vmlinux\n",
+                               st_ops->name);
+                       continue;
+               }
+               t = btf_type_by_id(btf, type_id);
+               if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
+                       pr_warn("Cannot support #%u members in struct %s\n",
+                               btf_type_vlen(t), st_ops->name);
+                       continue;
+               }
+
+               for_each_member(j, t, member) {
+                       const struct btf_type *func_proto;
+
+                       mname = btf_name_by_offset(btf, member->name_off);
+                       if (!*mname) {
+                               pr_warn("anon member in struct %s is not supported\n",
+                                       st_ops->name);
+                               break;
+                       }
+
+                       if (btf_member_bitfield_size(t, member)) {
+                               pr_warn("bit field member %s in struct %s is not supported\n",
+                                       mname, st_ops->name);
+                               break;
+                       }
+
+                       func_proto = btf_type_resolve_func_ptr(btf,
+                                                              member->type,
+                                                              NULL);
+                       if (func_proto &&
+                           btf_distill_func_proto(&log, btf,
+                                                  func_proto, mname,
+                                                  &st_ops->func_models[j])) {
+                               pr_warn("Error in parsing func ptr %s in struct %s\n",
+                                       mname, st_ops->name);
+                               break;
+                       }
+               }
+
+               if (j == btf_type_vlen(t)) {
+                       if (st_ops->init(btf)) {
+                               pr_warn("Error in init bpf_struct_ops %s\n",
+                                       st_ops->name);
+                       } else {
+                               st_ops->type_id = type_id;
+                               st_ops->type = t;
+                               st_ops->value_id = value_id;
+                               st_ops->value_type = btf_type_by_id(btf,
+                                                                   value_id);
+                       }
+               }
+       }
+}
+
+extern struct btf *btf_vmlinux;
+
+static const struct bpf_struct_ops *
+bpf_struct_ops_find_value(u32 value_id)
+{
+       unsigned int i;
+
+       if (!value_id || !btf_vmlinux)
+               return NULL;
+
+       for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
+               if (bpf_struct_ops[i]->value_id == value_id)
+                       return bpf_struct_ops[i];
+       }
+
+       return NULL;
+}
+
+const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
+{
+       unsigned int i;
+
+       if (!type_id || !btf_vmlinux)
+               return NULL;
+
+       for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
+               if (bpf_struct_ops[i]->type_id == type_id)
+                       return bpf_struct_ops[i];
+       }
+
+       return NULL;
+}
+
+static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
+                                          void *next_key)
+{
+       if (key && *(u32 *)key == 0)
+               return -ENOENT;
+
+       *(u32 *)next_key = 0;
+       return 0;
+}
+
+int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
+                                      void *value)
+{
+       struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+       struct bpf_struct_ops_value *uvalue, *kvalue;
+       enum bpf_struct_ops_state state;
+
+       if (unlikely(*(u32 *)key != 0))
+               return -ENOENT;
+
+       kvalue = &st_map->kvalue;
+       /* Pair with smp_store_release() during map_update */
+       state = smp_load_acquire(&kvalue->state);
+       if (state == BPF_STRUCT_OPS_STATE_INIT) {
+               memset(value, 0, map->value_size);
+               return 0;
+       }
+
+       /* No lock is needed.  state and refcnt do not need
+        * to be updated together under atomic context.
+        */
+       uvalue = (struct bpf_struct_ops_value *)value;
+       memcpy(uvalue, st_map->uvalue, map->value_size);
+       uvalue->state = state;
+       refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
+
+       return 0;
+}
+
+static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       return ERR_PTR(-EINVAL);
+}
+
+static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
+{
+       const struct btf_type *t = st_map->st_ops->type;
+       u32 i;
+
+       for (i = 0; i < btf_type_vlen(t); i++) {
+               if (st_map->progs[i]) {
+                       bpf_prog_put(st_map->progs[i]);
+                       st_map->progs[i] = NULL;
+               }
+       }
+}
+
+static int check_zero_holes(const struct btf_type *t, void *data)
+{
+       const struct btf_member *member;
+       u32 i, moff, msize, prev_mend = 0;
+       const struct btf_type *mtype;
+
+       for_each_member(i, t, member) {
+               moff = btf_member_bit_offset(t, member) / 8;
+               if (moff > prev_mend &&
+                   memchr_inv(data + prev_mend, 0, moff - prev_mend))
+                       return -EINVAL;
+
+               mtype = btf_type_by_id(btf_vmlinux, member->type);
+               mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
+                                        NULL, NULL);
+               if (IS_ERR(mtype))
+                       return PTR_ERR(mtype);
+               prev_mend = moff + msize;
+       }
+
+       if (t->size > prev_mend &&
+           memchr_inv(data + prev_mend, 0, t->size - prev_mend))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
+                                         void *value, u64 flags)
+{
+       struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+       const struct bpf_struct_ops *st_ops = st_map->st_ops;
+       struct bpf_struct_ops_value *uvalue, *kvalue;
+       const struct btf_member *member;
+       const struct btf_type *t = st_ops->type;
+       void *udata, *kdata;
+       int prog_fd, err = 0;
+       void *image;
+       u32 i;
+
+       if (flags)
+               return -EINVAL;
+
+       if (*(u32 *)key != 0)
+               return -E2BIG;
+
+       err = check_zero_holes(st_ops->value_type, value);
+       if (err)
+               return err;
+
+       uvalue = (struct bpf_struct_ops_value *)value;
+       err = check_zero_holes(t, uvalue->data);
+       if (err)
+               return err;
+
+       if (uvalue->state || refcount_read(&uvalue->refcnt))
+               return -EINVAL;
+
+       uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
+       kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
+
+       mutex_lock(&st_map->lock);
+
+       if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) {
+               err = -EBUSY;
+               goto unlock;
+       }
+
+       memcpy(uvalue, value, map->value_size);
+
+       udata = &uvalue->data;
+       kdata = &kvalue->data;
+       image = st_map->image;
+
+       for_each_member(i, t, member) {
+               const struct btf_type *mtype, *ptype;
+               struct bpf_prog *prog;
+               u32 moff;
+
+               moff = btf_member_bit_offset(t, member) / 8;
+               ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
+               if (ptype == module_type) {
+                       if (*(void **)(udata + moff))
+                               goto reset_unlock;
+                       *(void **)(kdata + moff) = BPF_MODULE_OWNER;
+                       continue;
+               }
+
+               err = st_ops->init_member(t, member, kdata, udata);
+               if (err < 0)
+                       goto reset_unlock;
+
+               /* The ->init_member() has handled this member */
+               if (err > 0)
+                       continue;
+
+               /* If st_ops->init_member does not handle it,
+                * we will only handle func ptrs and zero-ed members
+                * here.  Reject everything else.
+                */
+
+               /* All non func ptr member must be 0 */
+               if (!ptype || !btf_type_is_func_proto(ptype)) {
+                       u32 msize;
+
+                       mtype = btf_type_by_id(btf_vmlinux, member->type);
+                       mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
+                                                NULL, NULL);
+                       if (IS_ERR(mtype)) {
+                               err = PTR_ERR(mtype);
+                               goto reset_unlock;
+                       }
+
+                       if (memchr_inv(udata + moff, 0, msize)) {
+                               err = -EINVAL;
+                               goto reset_unlock;
+                       }
+
+                       continue;
+               }
+
+               prog_fd = (int)(*(unsigned long *)(udata + moff));
+               /* Similar check as the attr->attach_prog_fd */
+               if (!prog_fd)
+                       continue;
+
+               prog = bpf_prog_get(prog_fd);
+               if (IS_ERR(prog)) {
+                       err = PTR_ERR(prog);
+                       goto reset_unlock;
+               }
+               st_map->progs[i] = prog;
+
+               if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
+                   prog->aux->attach_btf_id != st_ops->type_id ||
+                   prog->expected_attach_type != i) {
+                       err = -EINVAL;
+                       goto reset_unlock;
+               }
+
+               err = arch_prepare_bpf_trampoline(image,
+                                                 st_map->image + PAGE_SIZE,
+                                                 &st_ops->func_models[i], 0,
+                                                 &prog, 1, NULL, 0, NULL);
+               if (err < 0)
+                       goto reset_unlock;
+
+               *(void **)(kdata + moff) = image;
+               image += err;
+
+               /* put prog_id to udata */
+               *(unsigned long *)(udata + moff) = prog->aux->id;
+       }
+
+       refcount_set(&kvalue->refcnt, 1);
+       bpf_map_inc(map);
+
+       set_memory_ro((long)st_map->image, 1);
+       set_memory_x((long)st_map->image, 1);
+       err = st_ops->reg(kdata);
+       if (likely(!err)) {
+               /* Pair with smp_load_acquire() during lookup_elem().
+                * It ensures the above udata updates (e.g. prog->aux->id)
+                * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
+                */
+               smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
+               goto unlock;
+       }
+
+       /* Error during st_ops->reg().  It is very unlikely since
+        * the above init_member() should have caught it earlier
+        * before reg().  The only possibility is if there was a race
+        * in registering the struct_ops (under the same name) to
+        * a sub-system through different struct_ops's maps.
+        */
+       set_memory_nx((long)st_map->image, 1);
+       set_memory_rw((long)st_map->image, 1);
+       bpf_map_put(map);
+
+reset_unlock:
+       bpf_struct_ops_map_put_progs(st_map);
+       memset(uvalue, 0, map->value_size);
+       memset(kvalue, 0, map->value_size);
+unlock:
+       mutex_unlock(&st_map->lock);
+       return err;
+}
+
+static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
+{
+       enum bpf_struct_ops_state prev_state;
+       struct bpf_struct_ops_map *st_map;
+
+       st_map = (struct bpf_struct_ops_map *)map;
+       prev_state = cmpxchg(&st_map->kvalue.state,
+                            BPF_STRUCT_OPS_STATE_INUSE,
+                            BPF_STRUCT_OPS_STATE_TOBEFREE);
+       if (prev_state == BPF_STRUCT_OPS_STATE_INUSE) {
+               st_map->st_ops->unreg(&st_map->kvalue.data);
+               if (refcount_dec_and_test(&st_map->kvalue.refcnt))
+                       bpf_map_put(map);
+       }
+
+       return 0;
+}
+
+static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
+                                            struct seq_file *m)
+{
+       void *value;
+       int err;
+
+       value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
+       if (!value)
+               return;
+
+       err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
+       if (!err) {
+               btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id,
+                                 value, m);
+               seq_puts(m, "\n");
+       }
+
+       kfree(value);
+}
+
+static void bpf_struct_ops_map_free(struct bpf_map *map)
+{
+       struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+
+       if (st_map->progs)
+               bpf_struct_ops_map_put_progs(st_map);
+       bpf_map_area_free(st_map->progs);
+       bpf_jit_free_exec(st_map->image);
+       bpf_map_area_free(st_map->uvalue);
+       bpf_map_area_free(st_map);
+}
+
+static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
+{
+       if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
+           attr->map_flags || !attr->btf_vmlinux_value_type_id)
+               return -EINVAL;
+       return 0;
+}
+
+static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
+{
+       const struct bpf_struct_ops *st_ops;
+       size_t map_total_size, st_map_size;
+       struct bpf_struct_ops_map *st_map;
+       const struct btf_type *t, *vt;
+       struct bpf_map_memory mem;
+       struct bpf_map *map;
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
+       if (!st_ops)
+               return ERR_PTR(-ENOTSUPP);
+
+       vt = st_ops->value_type;
+       if (attr->value_size != vt->size)
+               return ERR_PTR(-EINVAL);
+
+       t = st_ops->type;
+
+       st_map_size = sizeof(*st_map) +
+               /* kvalue stores the
+                * struct bpf_struct_ops_tcp_congestions_ops
+                */
+               (vt->size - sizeof(struct bpf_struct_ops_value));
+       map_total_size = st_map_size +
+               /* uvalue */
+               sizeof(vt->size) +
+               /* struct bpf_progs **progs */
+                btf_type_vlen(t) * sizeof(struct bpf_prog *);
+       err = bpf_map_charge_init(&mem, map_total_size);
+       if (err < 0)
+               return ERR_PTR(err);
+
+       st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
+       if (!st_map) {
+               bpf_map_charge_finish(&mem);
+               return ERR_PTR(-ENOMEM);
+       }
+       st_map->st_ops = st_ops;
+       map = &st_map->map;
+
+       st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
+       st_map->progs =
+               bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *),
+                                  NUMA_NO_NODE);
+       st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
+       if (!st_map->uvalue || !st_map->progs || !st_map->image) {
+               bpf_struct_ops_map_free(map);
+               bpf_map_charge_finish(&mem);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       mutex_init(&st_map->lock);
+       set_vm_flush_reset_perms(st_map->image);
+       bpf_map_init_from_attr(map, attr);
+       bpf_map_charge_move(&map->memory, &mem);
+
+       return map;
+}
+
+const struct bpf_map_ops bpf_struct_ops_map_ops = {
+       .map_alloc_check = bpf_struct_ops_map_alloc_check,
+       .map_alloc = bpf_struct_ops_map_alloc,
+       .map_free = bpf_struct_ops_map_free,
+       .map_get_next_key = bpf_struct_ops_map_get_next_key,
+       .map_lookup_elem = bpf_struct_ops_map_lookup_elem,
+       .map_delete_elem = bpf_struct_ops_map_delete_elem,
+       .map_update_elem = bpf_struct_ops_map_update_elem,
+       .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
+};
+
+/* "const void *" because some subsystem is
+ * passing a const (e.g. const struct tcp_congestion_ops *)
+ */
+bool bpf_struct_ops_get(const void *kdata)
+{
+       struct bpf_struct_ops_value *kvalue;
+
+       kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+
+       return refcount_inc_not_zero(&kvalue->refcnt);
+}
+
+void bpf_struct_ops_put(const void *kdata)
+{
+       struct bpf_struct_ops_value *kvalue;
+
+       kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+       if (refcount_dec_and_test(&kvalue->refcnt)) {
+               struct bpf_struct_ops_map *st_map;
+
+               st_map = container_of(kvalue, struct bpf_struct_ops_map,
+                                     kvalue);
+               bpf_map_put(&st_map->map);
+       }
+}
diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
new file mode 100644 (file)
index 0000000..066d83e
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* internal file - do not include directly */
+
+#ifdef CONFIG_BPF_JIT
+#ifdef CONFIG_INET
+#include <net/tcp.h>
+BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
+#endif
+#endif
index ed20758847247c988c41cbd5610368160b22b358..32963b6d5a9c4e29272d8846eba5f3a171700427 100644 (file)
  */
 #define BTF_MAX_SIZE (16 * 1024 * 1024)
 
-#define for_each_member(i, struct_type, member)                        \
-       for (i = 0, member = btf_type_member(struct_type);      \
-            i < btf_type_vlen(struct_type);                    \
-            i++, member++)
-
 #define for_each_member_from(i, from, struct_type, member)             \
        for (i = from, member = btf_type_member(struct_type) + from;    \
             i < btf_type_vlen(struct_type);                            \
@@ -281,6 +276,11 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
        [BTF_KIND_DATASEC]      = "DATASEC",
 };
 
+static const char *btf_type_str(const struct btf_type *t)
+{
+       return btf_kind_str[BTF_INFO_KIND(t->info)];
+}
+
 struct btf_kind_operations {
        s32 (*check_meta)(struct btf_verifier_env *env,
                          const struct btf_type *t,
@@ -382,6 +382,65 @@ static bool btf_type_is_datasec(const struct btf_type *t)
        return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
 }
 
+s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
+{
+       const struct btf_type *t;
+       const char *tname;
+       u32 i;
+
+       for (i = 1; i <= btf->nr_types; i++) {
+               t = btf->types[i];
+               if (BTF_INFO_KIND(t->info) != kind)
+                       continue;
+
+               tname = btf_name_by_offset(btf, t->name_off);
+               if (!strcmp(tname, name))
+                       return i;
+       }
+
+       return -ENOENT;
+}
+
+const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
+                                              u32 id, u32 *res_id)
+{
+       const struct btf_type *t = btf_type_by_id(btf, id);
+
+       while (btf_type_is_modifier(t)) {
+               id = t->type;
+               t = btf_type_by_id(btf, t->type);
+       }
+
+       if (res_id)
+               *res_id = id;
+
+       return t;
+}
+
+const struct btf_type *btf_type_resolve_ptr(const struct btf *btf,
+                                           u32 id, u32 *res_id)
+{
+       const struct btf_type *t;
+
+       t = btf_type_skip_modifiers(btf, id, NULL);
+       if (!btf_type_is_ptr(t))
+               return NULL;
+
+       return btf_type_skip_modifiers(btf, t->type, res_id);
+}
+
+const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
+                                                u32 id, u32 *res_id)
+{
+       const struct btf_type *ptype;
+
+       ptype = btf_type_resolve_ptr(btf, id, res_id);
+       if (ptype && btf_type_is_func_proto(ptype))
+               return ptype;
+
+       return NULL;
+}
+
 /* Types that act only as a source, not sink or intermediate
  * type when resolving.
  */
@@ -446,30 +505,6 @@ static const char *btf_int_encoding_str(u8 encoding)
                return "UNKN";
 }
 
-static u16 btf_type_vlen(const struct btf_type *t)
-{
-       return BTF_INFO_VLEN(t->info);
-}
-
-static bool btf_type_kflag(const struct btf_type *t)
-{
-       return BTF_INFO_KFLAG(t->info);
-}
-
-static u32 btf_member_bit_offset(const struct btf_type *struct_type,
-                            const struct btf_member *member)
-{
-       return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset)
-                                          : member->offset;
-}
-
-static u32 btf_member_bitfield_size(const struct btf_type *struct_type,
-                                   const struct btf_member *member)
-{
-       return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset)
-                                          : 0;
-}
-
 static u32 btf_type_int(const struct btf_type *t)
 {
        return *(u32 *)(t + 1);
@@ -480,11 +515,6 @@ static const struct btf_array *btf_type_array(const struct btf_type *t)
        return (const struct btf_array *)(t + 1);
 }
 
-static const struct btf_member *btf_type_member(const struct btf_type *t)
-{
-       return (const struct btf_member *)(t + 1);
-}
-
 static const struct btf_enum *btf_type_enum(const struct btf_type *t)
 {
        return (const struct btf_enum *)(t + 1);
@@ -1057,7 +1087,7 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
  * *elem_type: same as return type ("struct X")
  * *total_nelems: 1
  */
-static const struct btf_type *
+const struct btf_type *
 btf_resolve_size(const struct btf *btf, const struct btf_type *type,
                 u32 *type_size, const struct btf_type **elem_type,
                 u32 *total_nelems)
@@ -1111,8 +1141,10 @@ resolved:
                return ERR_PTR(-EINVAL);
 
        *type_size = nelems * size;
-       *total_nelems = nelems;
-       *elem_type = type;
+       if (total_nelems)
+               *total_nelems = nelems;
+       if (elem_type)
+               *elem_type = type;
 
        return array_type ? : type;
 }
@@ -1826,7 +1858,10 @@ static void btf_modifier_seq_show(const struct btf *btf,
                                  u32 type_id, void *data,
                                  u8 bits_offset, struct seq_file *m)
 {
-       t = btf_type_id_resolve(btf, &type_id);
+       if (btf->resolved_ids)
+               t = btf_type_id_resolve(btf, &type_id);
+       else
+               t = btf_type_skip_modifiers(btf, type_id, NULL);
 
        btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
 }
@@ -2621,8 +2656,8 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env,
                return -EINVAL;
        }
 
-       if (btf_type_vlen(t)) {
-               btf_verifier_log_type(env, t, "vlen != 0");
+       if (btf_type_vlen(t) > BTF_FUNC_GLOBAL) {
+               btf_verifier_log_type(env, t, "Invalid func linkage");
                return -EINVAL;
        }
 
@@ -3476,7 +3511,8 @@ static u8 bpf_ctx_convert_map[] = {
 
 static const struct btf_member *
 btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
-                     const struct btf_type *t, enum bpf_prog_type prog_type)
+                     const struct btf_type *t, enum bpf_prog_type prog_type,
+                     int arg)
 {
        const struct btf_type *conv_struct;
        const struct btf_type *ctx_struct;
@@ -3497,12 +3533,13 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
                 * is not supported yet.
                 * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
                 */
-               bpf_log(log, "BPF program ctx type is not a struct\n");
+               if (log->level & BPF_LOG_LEVEL)
+                       bpf_log(log, "arg#%d type is not a struct\n", arg);
                return NULL;
        }
        tname = btf_name_by_offset(btf, t->name_off);
        if (!tname) {
-               bpf_log(log, "BPF program ctx struct doesn't have a name\n");
+               bpf_log(log, "arg#%d struct doesn't have a name\n", arg);
                return NULL;
        }
        /* prog_type is valid bpf program type. No need for bounds check. */
@@ -3535,11 +3572,12 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
 static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
                                     struct btf *btf,
                                     const struct btf_type *t,
-                                    enum bpf_prog_type prog_type)
+                                    enum bpf_prog_type prog_type,
+                                    int arg)
 {
        const struct btf_member *prog_ctx_type, *kern_ctx_type;
 
-       prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type);
+       prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg);
        if (!prog_ctx_type)
                return -ENOENT;
        kern_ctx_type = prog_ctx_type + 1;
@@ -3605,6 +3643,8 @@ struct btf *btf_parse_vmlinux(void)
                goto errout;
        }
 
+       bpf_struct_ops_init(btf);
+
        btf_verifier_env_free(env);
        refcount_set(&btf->refcnt, 1);
        return btf;
@@ -3677,7 +3717,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
        /* skip modifiers */
        while (btf_type_is_modifier(t))
                t = btf_type_by_id(btf, t->type);
-       if (btf_type_is_int(t))
+       if (btf_type_is_int(t) || btf_type_is_enum(t))
                /* accessing a scalar */
                return true;
        if (!btf_type_is_ptr(t)) {
@@ -3697,10 +3737,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
        /* this is a pointer to another type */
        info->reg_type = PTR_TO_BTF_ID;
-       info->btf_id = t->type;
 
        if (tgt_prog) {
-               ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type);
+               ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
                if (ret > 0) {
                        info->btf_id = ret;
                        return true;
@@ -3708,10 +3747,14 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                        return false;
                }
        }
+
+       info->btf_id = t->type;
        t = btf_type_by_id(btf, t->type);
        /* skip modifiers */
-       while (btf_type_is_modifier(t))
+       while (btf_type_is_modifier(t)) {
+               info->btf_id = t->type;
                t = btf_type_by_id(btf, t->type);
+       }
        if (!btf_type_is_struct(t)) {
                bpf_log(log,
                        "func '%s' arg%d type %s is not a struct\n",
@@ -3737,23 +3780,57 @@ int btf_struct_access(struct bpf_verifier_log *log,
 again:
        tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
        if (!btf_type_is_struct(t)) {
-               bpf_log(log, "Type '%s' is not a struct", tname);
+               bpf_log(log, "Type '%s' is not a struct\n", tname);
                return -EINVAL;
        }
 
-       for_each_member(i, t, member) {
-               if (btf_member_bitfield_size(t, member))
-                       /* bitfields are not supported yet */
-                       continue;
+       if (off + size > t->size) {
+               bpf_log(log, "access beyond struct %s at off %u size %u\n",
+                       tname, off, size);
+               return -EACCES;
+       }
 
+       for_each_member(i, t, member) {
                /* offset of the field in bytes */
                moff = btf_member_bit_offset(t, member) / 8;
                if (off + size <= moff)
                        /* won't find anything, field is already too far */
                        break;
+
+               if (btf_member_bitfield_size(t, member)) {
+                       u32 end_bit = btf_member_bit_offset(t, member) +
+                               btf_member_bitfield_size(t, member);
+
+                       /* off <= moff instead of off == moff because clang
+                        * does not generate a BTF member for anonymous
+                        * bitfield like the ":16" here:
+                        * struct {
+                        *      int :16;
+                        *      int x:8;
+                        * };
+                        */
+                       if (off <= moff &&
+                           BITS_ROUNDUP_BYTES(end_bit) <= off + size)
+                               return SCALAR_VALUE;
+
+                       /* off may be accessing a following member
+                        *
+                        * or
+                        *
+                        * Doing partial access at either end of this
+                        * bitfield.  Continue on this case also to
+                        * treat it as not accessing this bitfield
+                        * and eventually error out as field not
+                        * found to keep it simple.
+                        * It could be relaxed if there was a legit
+                        * partial access case later.
+                        */
+                       continue;
+               }
+
                /* In case of "off" is pointing to holes of a struct */
                if (off < moff)
-                       continue;
+                       break;
 
                /* type of the field */
                mtype = btf_type_by_id(btf_vmlinux, member->type);
@@ -4043,11 +4120,158 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
        return 0;
 }
 
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog)
+/* Compare BTFs of two functions assuming only scalars and pointers to context.
+ * t1 points to BTF_KIND_FUNC in btf1
+ * t2 points to BTF_KIND_FUNC in btf2
+ * Returns:
+ * EINVAL - function prototype mismatch
+ * EFAULT - verifier bug
+ * 0 - 99% match. The last 1% is validated by the verifier.
+ */
+int btf_check_func_type_match(struct bpf_verifier_log *log,
+                             struct btf *btf1, const struct btf_type *t1,
+                             struct btf *btf2, const struct btf_type *t2)
+{
+       const struct btf_param *args1, *args2;
+       const char *fn1, *fn2, *s1, *s2;
+       u32 nargs1, nargs2, i;
+
+       fn1 = btf_name_by_offset(btf1, t1->name_off);
+       fn2 = btf_name_by_offset(btf2, t2->name_off);
+
+       if (btf_func_linkage(t1) != BTF_FUNC_GLOBAL) {
+               bpf_log(log, "%s() is not a global function\n", fn1);
+               return -EINVAL;
+       }
+       if (btf_func_linkage(t2) != BTF_FUNC_GLOBAL) {
+               bpf_log(log, "%s() is not a global function\n", fn2);
+               return -EINVAL;
+       }
+
+       t1 = btf_type_by_id(btf1, t1->type);
+       if (!t1 || !btf_type_is_func_proto(t1))
+               return -EFAULT;
+       t2 = btf_type_by_id(btf2, t2->type);
+       if (!t2 || !btf_type_is_func_proto(t2))
+               return -EFAULT;
+
+       args1 = (const struct btf_param *)(t1 + 1);
+       nargs1 = btf_type_vlen(t1);
+       args2 = (const struct btf_param *)(t2 + 1);
+       nargs2 = btf_type_vlen(t2);
+
+       if (nargs1 != nargs2) {
+               bpf_log(log, "%s() has %d args while %s() has %d args\n",
+                       fn1, nargs1, fn2, nargs2);
+               return -EINVAL;
+       }
+
+       t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
+       t2 = btf_type_skip_modifiers(btf2, t2->type, NULL);
+       if (t1->info != t2->info) {
+               bpf_log(log,
+                       "Return type %s of %s() doesn't match type %s of %s()\n",
+                       btf_type_str(t1), fn1,
+                       btf_type_str(t2), fn2);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < nargs1; i++) {
+               t1 = btf_type_skip_modifiers(btf1, args1[i].type, NULL);
+               t2 = btf_type_skip_modifiers(btf2, args2[i].type, NULL);
+
+               if (t1->info != t2->info) {
+                       bpf_log(log, "arg%d in %s() is %s while %s() has %s\n",
+                               i, fn1, btf_type_str(t1),
+                               fn2, btf_type_str(t2));
+                       return -EINVAL;
+               }
+               if (btf_type_has_size(t1) && t1->size != t2->size) {
+                       bpf_log(log,
+                               "arg%d in %s() has size %d while %s() has %d\n",
+                               i, fn1, t1->size,
+                               fn2, t2->size);
+                       return -EINVAL;
+               }
+
+               /* global functions are validated with scalars and pointers
+                * to context only. And only global functions can be replaced.
+                * Hence type check only those types.
+                */
+               if (btf_type_is_int(t1) || btf_type_is_enum(t1))
+                       continue;
+               if (!btf_type_is_ptr(t1)) {
+                       bpf_log(log,
+                               "arg%d in %s() has unrecognized type\n",
+                               i, fn1);
+                       return -EINVAL;
+               }
+               t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
+               t2 = btf_type_skip_modifiers(btf2, t2->type, NULL);
+               if (!btf_type_is_struct(t1)) {
+                       bpf_log(log,
+                               "arg%d in %s() is not a pointer to context\n",
+                               i, fn1);
+                       return -EINVAL;
+               }
+               if (!btf_type_is_struct(t2)) {
+                       bpf_log(log,
+                               "arg%d in %s() is not a pointer to context\n",
+                               i, fn2);
+                       return -EINVAL;
+               }
+               /* This is an optional check to make program writing easier.
+                * Compare names of structs and report an error to the user.
+                * btf_prepare_func_args() already checked that t2 struct
+                * is a context type. btf_prepare_func_args() will check
+                * later that t1 struct is a context type as well.
+                */
+               s1 = btf_name_by_offset(btf1, t1->name_off);
+               s2 = btf_name_by_offset(btf2, t2->name_off);
+               if (strcmp(s1, s2)) {
+                       bpf_log(log,
+                               "arg%d %s(struct %s *) doesn't match %s(struct %s *)\n",
+                               i, fn1, s1, fn2, s2);
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+/* Compare BTFs of given program with BTF of target program */
+int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
+                        struct btf *btf2, const struct btf_type *t2)
+{
+       struct btf *btf1 = prog->aux->btf;
+       const struct btf_type *t1;
+       u32 btf_id = 0;
+
+       if (!prog->aux->func_info) {
+               bpf_log(&env->log, "Program extension requires BTF\n");
+               return -EINVAL;
+       }
+
+       btf_id = prog->aux->func_info[0].type_id;
+       if (!btf_id)
+               return -EFAULT;
+
+       t1 = btf_type_by_id(btf1, btf_id);
+       if (!t1 || !btf_type_is_func(t1))
+               return -EFAULT;
+
+       return btf_check_func_type_match(&env->log, btf1, t1, btf2, t2);
+}
+
+/* Compare BTF of a function with given bpf_reg_state.
+ * Returns:
+ * EFAULT - there is a verifier bug. Abort verification.
+ * EINVAL - there is a type mismatch or BTF is not available.
+ * 0 - BTF matches with what bpf_reg_state expects.
+ * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
+ */
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
+                            struct bpf_reg_state *reg)
 {
-       struct bpf_verifier_state *st = env->cur_state;
-       struct bpf_func_state *func = st->frame[st->curframe];
-       struct bpf_reg_state *reg = func->regs;
        struct bpf_verifier_log *log = &env->log;
        struct bpf_prog *prog = env->prog;
        struct btf *btf = prog->aux->btf;
@@ -4057,27 +4281,30 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog)
        const char *tname;
 
        if (!prog->aux->func_info)
-               return 0;
+               return -EINVAL;
 
        btf_id = prog->aux->func_info[subprog].type_id;
        if (!btf_id)
-               return 0;
+               return -EFAULT;
 
        if (prog->aux->func_info_aux[subprog].unreliable)
-               return 0;
+               return -EINVAL;
 
        t = btf_type_by_id(btf, btf_id);
        if (!t || !btf_type_is_func(t)) {
-               bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n",
+               /* These checks were already done by the verifier while loading
+                * struct bpf_func_info
+                */
+               bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
                        subprog);
-               return -EINVAL;
+               return -EFAULT;
        }
        tname = btf_name_by_offset(btf, t->name_off);
 
        t = btf_type_by_id(btf, t->type);
        if (!t || !btf_type_is_func_proto(t)) {
-               bpf_log(log, "Invalid type of func %s\n", tname);
-               return -EINVAL;
+               bpf_log(log, "Invalid BTF of func %s\n", tname);
+               return -EFAULT;
        }
        args = (const struct btf_param *)(t + 1);
        nargs = btf_type_vlen(t);
@@ -4103,25 +4330,130 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog)
                                bpf_log(log, "R%d is not a pointer\n", i + 1);
                                goto out;
                        }
-                       /* If program is passing PTR_TO_CTX into subprogram
-                        * check that BTF type matches.
+                       /* If function expects ctx type in BTF check that caller
+                        * is passing PTR_TO_CTX.
                         */
-                       if (reg[i + 1].type == PTR_TO_CTX &&
-                           !btf_get_prog_ctx_type(log, btf, t, prog->type))
-                               goto out;
-                       /* All other pointers are ok */
-                       continue;
+                       if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) {
+                               if (reg[i + 1].type != PTR_TO_CTX) {
+                                       bpf_log(log,
+                                               "arg#%d expected pointer to ctx, but got %s\n",
+                                               i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+                                       goto out;
+                               }
+                               if (check_ctx_reg(env, &reg[i + 1], i + 1))
+                                       goto out;
+                               continue;
+                       }
                }
-               bpf_log(log, "Unrecognized argument type %s\n",
-                       btf_kind_str[BTF_INFO_KIND(t->info)]);
+               bpf_log(log, "Unrecognized arg#%d type %s\n",
+                       i, btf_kind_str[BTF_INFO_KIND(t->info)]);
                goto out;
        }
        return 0;
 out:
-       /* LLVM optimizations can remove arguments from static functions. */
-       bpf_log(log,
-               "Type info disagrees with actual arguments due to compiler optimizations\n");
+       /* Compiler optimizations can remove arguments from static functions
+        * or mismatched type can be passed into a global function.
+        * In such cases mark the function as unreliable from BTF point of view.
+        */
        prog->aux->func_info_aux[subprog].unreliable = true;
+       return -EINVAL;
+}
+
+/* Convert BTF of a function into bpf_reg_state if possible
+ * Returns:
+ * EFAULT - there is a verifier bug. Abort verification.
+ * EINVAL - cannot convert BTF.
+ * 0 - Successfully converted BTF into bpf_reg_state
+ * (either PTR_TO_CTX or SCALAR_VALUE).
+ */
+int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
+                         struct bpf_reg_state *reg)
+{
+       struct bpf_verifier_log *log = &env->log;
+       struct bpf_prog *prog = env->prog;
+       enum bpf_prog_type prog_type = prog->type;
+       struct btf *btf = prog->aux->btf;
+       const struct btf_param *args;
+       const struct btf_type *t;
+       u32 i, nargs, btf_id;
+       const char *tname;
+
+       if (!prog->aux->func_info ||
+           prog->aux->func_info_aux[subprog].linkage != BTF_FUNC_GLOBAL) {
+               bpf_log(log, "Verifier bug\n");
+               return -EFAULT;
+       }
+
+       btf_id = prog->aux->func_info[subprog].type_id;
+       if (!btf_id) {
+               bpf_log(log, "Global functions need valid BTF\n");
+               return -EFAULT;
+       }
+
+       t = btf_type_by_id(btf, btf_id);
+       if (!t || !btf_type_is_func(t)) {
+               /* These checks were already done by the verifier while loading
+                * struct bpf_func_info
+                */
+               bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
+                       subprog);
+               return -EFAULT;
+       }
+       tname = btf_name_by_offset(btf, t->name_off);
+
+       if (log->level & BPF_LOG_LEVEL)
+               bpf_log(log, "Validating %s() func#%d...\n",
+                       tname, subprog);
+
+       if (prog->aux->func_info_aux[subprog].unreliable) {
+               bpf_log(log, "Verifier bug in function %s()\n", tname);
+               return -EFAULT;
+       }
+       if (prog_type == BPF_PROG_TYPE_EXT)
+               prog_type = prog->aux->linked_prog->type;
+
+       t = btf_type_by_id(btf, t->type);
+       if (!t || !btf_type_is_func_proto(t)) {
+               bpf_log(log, "Invalid type of function %s()\n", tname);
+               return -EFAULT;
+       }
+       args = (const struct btf_param *)(t + 1);
+       nargs = btf_type_vlen(t);
+       if (nargs > 5) {
+               bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n",
+                       tname, nargs);
+               return -EINVAL;
+       }
+       /* check that function returns int */
+       t = btf_type_by_id(btf, t->type);
+       while (btf_type_is_modifier(t))
+               t = btf_type_by_id(btf, t->type);
+       if (!btf_type_is_int(t) && !btf_type_is_enum(t)) {
+               bpf_log(log,
+                       "Global function %s() doesn't return scalar. Only those are supported.\n",
+                       tname);
+               return -EINVAL;
+       }
+       /* Convert BTF function arguments into verifier types.
+        * Only PTR_TO_CTX and SCALAR are supported atm.
+        */
+       for (i = 0; i < nargs; i++) {
+               t = btf_type_by_id(btf, args[i].type);
+               while (btf_type_is_modifier(t))
+                       t = btf_type_by_id(btf, t->type);
+               if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+                       reg[i + 1].type = SCALAR_VALUE;
+                       continue;
+               }
+               if (btf_type_is_ptr(t) &&
+                   btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+                       reg[i + 1].type = PTR_TO_CTX;
+                       continue;
+               }
+               bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n",
+                       i, btf_kind_str[BTF_INFO_KIND(t->info)], tname);
+               return -EINVAL;
+       }
        return 0;
 }
 
index 29d47aae0dd1ddd9df0330abdcefede6f40fbe81..973a20d4974981fcd7bca0d6f56c94e5c65bdee9 100644 (file)
@@ -2137,6 +2137,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
 const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
 const struct bpf_func_proto bpf_spin_lock_proto __weak;
 const struct bpf_func_proto bpf_spin_unlock_proto __weak;
+const struct bpf_func_proto bpf_jiffies64_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
index da9c832fc5c84320d179d24f5df4bbba51048ec5..de630f9802825503a114946b53457fd3dda94963 100644 (file)
        (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
 #define DEV_MAP_BULK_SIZE 16
-struct bpf_dtab_netdev;
-
-struct xdp_bulk_queue {
+struct xdp_dev_bulk_queue {
        struct xdp_frame *q[DEV_MAP_BULK_SIZE];
        struct list_head flush_node;
+       struct net_device *dev;
        struct net_device *dev_rx;
-       struct bpf_dtab_netdev *obj;
        unsigned int count;
 };
 
@@ -67,9 +65,8 @@ struct bpf_dtab_netdev {
        struct net_device *dev; /* must be first member, due to tracepoint */
        struct hlist_node index_hlist;
        struct bpf_dtab *dtab;
-       struct xdp_bulk_queue __percpu *bulkq;
        struct rcu_head rcu;
-       unsigned int idx; /* keep track of map index for tracepoint */
+       unsigned int idx;
 };
 
 struct bpf_dtab {
@@ -84,7 +81,7 @@ struct bpf_dtab {
        u32 n_buckets;
 };
 
-static DEFINE_PER_CPU(struct list_head, dev_map_flush_list);
+static DEFINE_PER_CPU(struct list_head, dev_flush_list);
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
@@ -219,7 +216,6 @@ static void dev_map_free(struct bpf_map *map)
 
                        hlist_for_each_entry_safe(dev, next, head, index_hlist) {
                                hlist_del_rcu(&dev->index_hlist);
-                               free_percpu(dev->bulkq);
                                dev_put(dev->dev);
                                kfree(dev);
                        }
@@ -234,7 +230,6 @@ static void dev_map_free(struct bpf_map *map)
                        if (!dev)
                                continue;
 
-                       free_percpu(dev->bulkq);
                        dev_put(dev->dev);
                        kfree(dev);
                }
@@ -320,10 +315,9 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
        return -ENOENT;
 }
 
-static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
+static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
-       struct bpf_dtab_netdev *obj = bq->obj;
-       struct net_device *dev = obj->dev;
+       struct net_device *dev = bq->dev;
        int sent = 0, drops = 0, err = 0;
        int i;
 
@@ -346,8 +340,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
 out:
        bq->count = 0;
 
-       trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx,
-                             sent, drops, bq->dev_rx, dev, err);
+       trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
        bq->dev_rx = NULL;
        __list_del_clearprev(&bq->flush_node);
        return 0;
@@ -364,17 +357,17 @@ error:
        goto out;
 }
 
-/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
+/* __dev_flush is called from xdp_do_flush() which _must_ be signaled
  * from the driver before returning from its napi->poll() routine. The poll()
  * routine is called either from busy_poll context or net_rx_action signaled
  * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
  * net device can be torn down. On devmap tear down we ensure the flush list
  * is empty before completing to ensure all flush operations have completed.
  */
-void __dev_map_flush(void)
+void __dev_flush(void)
 {
-       struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
-       struct xdp_bulk_queue *bq, *tmp;
+       struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+       struct xdp_dev_bulk_queue *bq, *tmp;
 
        rcu_read_lock();
        list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
@@ -401,12 +394,11 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 /* Runs under RCU-read-side, plus in softirq under NAPI protection.
  * Thus, safe percpu variable access.
  */
-static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
+static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
                      struct net_device *dev_rx)
-
 {
-       struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
-       struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
+       struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+       struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
 
        if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
                bq_xmit_all(bq, 0);
@@ -426,10 +418,9 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
        return 0;
 }
 
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
-                   struct net_device *dev_rx)
+static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+                              struct net_device *dev_rx)
 {
-       struct net_device *dev = dst->dev;
        struct xdp_frame *xdpf;
        int err;
 
@@ -444,7 +435,21 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
        if (unlikely(!xdpf))
                return -EOVERFLOW;
 
-       return bq_enqueue(dst, xdpf, dev_rx);
+       return bq_enqueue(dev, xdpf, dev_rx);
+}
+
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
+{
+       return __xdp_enqueue(dev, xdp, dev_rx);
+}
+
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
+{
+       struct net_device *dev = dst->dev;
+
+       return __xdp_enqueue(dev, xdp, dev_rx);
 }
 
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
@@ -483,7 +488,6 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
        struct bpf_dtab_netdev *dev;
 
        dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
-       free_percpu(dev->bulkq);
        dev_put(dev->dev);
        kfree(dev);
 }
@@ -538,30 +542,15 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
                                                    u32 ifindex,
                                                    unsigned int idx)
 {
-       gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
        struct bpf_dtab_netdev *dev;
-       struct xdp_bulk_queue *bq;
-       int cpu;
 
-       dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
+       dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
+                          dtab->map.numa_node);
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
-                                       sizeof(void *), gfp);
-       if (!dev->bulkq) {
-               kfree(dev);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for_each_possible_cpu(cpu) {
-               bq = per_cpu_ptr(dev->bulkq, cpu);
-               bq->obj = dev;
-       }
-
        dev->dev = dev_get_by_index(net, ifindex);
        if (!dev->dev) {
-               free_percpu(dev->bulkq);
                kfree(dev);
                return ERR_PTR(-EINVAL);
        }
@@ -721,9 +710,23 @@ static int dev_map_notification(struct notifier_block *notifier,
 {
        struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
        struct bpf_dtab *dtab;
-       int i;
+       int i, cpu;
 
        switch (event) {
+       case NETDEV_REGISTER:
+               if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
+                       break;
+
+               /* will be freed in free_netdev() */
+               netdev->xdp_bulkq =
+                       __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue),
+                                          sizeof(void *), GFP_ATOMIC);
+               if (!netdev->xdp_bulkq)
+                       return NOTIFY_BAD;
+
+               for_each_possible_cpu(cpu)
+                       per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
+               break;
        case NETDEV_UNREGISTER:
                /* This rcu_read_lock/unlock pair is needed because
                 * dev_map_list is an RCU list AND to ensure a delete
@@ -771,7 +774,7 @@ static int __init dev_map_init(void)
        register_netdevice_notifier(&dev_map_notifier);
 
        for_each_possible_cpu(cpu)
-               INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu));
+               INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
        return 0;
 }
 
index 22066a62c8c976b093c836391a21629d9b166f61..2d182c4ee9d9964a6ec55ea102a5de9c7b6fc811 100644 (file)
        (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE |    \
         BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
 
+#define BATCH_OPS(_name)                       \
+       .map_lookup_batch =                     \
+       _name##_map_lookup_batch,               \
+       .map_lookup_and_delete_batch =          \
+       _name##_map_lookup_and_delete_batch,    \
+       .map_update_batch =                     \
+       generic_map_update_batch,               \
+       .map_delete_batch =                     \
+       generic_map_delete_batch
+
 struct bucket {
        struct hlist_nulls_head head;
        raw_spinlock_t lock;
@@ -1232,6 +1242,256 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
        rcu_read_unlock();
 }
 
+static int
+__htab_map_lookup_and_delete_batch(struct bpf_map *map,
+                                  const union bpf_attr *attr,
+                                  union bpf_attr __user *uattr,
+                                  bool do_delete, bool is_lru_map,
+                                  bool is_percpu)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
+       void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
+       void __user *uvalues = u64_to_user_ptr(attr->batch.values);
+       void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
+       void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+       u32 batch, max_count, size, bucket_size;
+       u64 elem_map_flags, map_flags;
+       struct hlist_nulls_head *head;
+       struct hlist_nulls_node *n;
+       unsigned long flags;
+       struct htab_elem *l;
+       struct bucket *b;
+       int ret = 0;
+
+       elem_map_flags = attr->batch.elem_flags;
+       if ((elem_map_flags & ~BPF_F_LOCK) ||
+           ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
+               return -EINVAL;
+
+       map_flags = attr->batch.flags;
+       if (map_flags)
+               return -EINVAL;
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       if (put_user(0, &uattr->batch.count))
+               return -EFAULT;
+
+       batch = 0;
+       if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
+               return -EFAULT;
+
+       if (batch >= htab->n_buckets)
+               return -ENOENT;
+
+       key_size = htab->map.key_size;
+       roundup_key_size = round_up(htab->map.key_size, 8);
+       value_size = htab->map.value_size;
+       size = round_up(value_size, 8);
+       if (is_percpu)
+               value_size = size * num_possible_cpus();
+       total = 0;
+       /* while experimenting with hash tables with sizes ranging from 10 to
+        * 1000, it was observed that a bucket can have upto 5 entries.
+        */
+       bucket_size = 5;
+
+alloc:
+       /* We cannot do copy_from_user or copy_to_user inside
+        * the rcu_read_lock. Allocate enough space here.
+        */
+       keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN);
+       values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN);
+       if (!keys || !values) {
+               ret = -ENOMEM;
+               goto after_loop;
+       }
+
+again:
+       preempt_disable();
+       this_cpu_inc(bpf_prog_active);
+       rcu_read_lock();
+again_nocopy:
+       dst_key = keys;
+       dst_val = values;
+       b = &htab->buckets[batch];
+       head = &b->head;
+       raw_spin_lock_irqsave(&b->lock, flags);
+
+       bucket_cnt = 0;
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+               bucket_cnt++;
+
+       if (bucket_cnt > (max_count - total)) {
+               if (total == 0)
+                       ret = -ENOSPC;
+               raw_spin_unlock_irqrestore(&b->lock, flags);
+               rcu_read_unlock();
+               this_cpu_dec(bpf_prog_active);
+               preempt_enable();
+               goto after_loop;
+       }
+
+       if (bucket_cnt > bucket_size) {
+               bucket_size = bucket_cnt;
+               raw_spin_unlock_irqrestore(&b->lock, flags);
+               rcu_read_unlock();
+               this_cpu_dec(bpf_prog_active);
+               preempt_enable();
+               kvfree(keys);
+               kvfree(values);
+               goto alloc;
+       }
+
+       hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+               memcpy(dst_key, l->key, key_size);
+
+               if (is_percpu) {
+                       int off = 0, cpu;
+                       void __percpu *pptr;
+
+                       pptr = htab_elem_get_ptr(l, map->key_size);
+                       for_each_possible_cpu(cpu) {
+                               bpf_long_memcpy(dst_val + off,
+                                               per_cpu_ptr(pptr, cpu), size);
+                               off += size;
+                       }
+               } else {
+                       value = l->key + roundup_key_size;
+                       if (elem_map_flags & BPF_F_LOCK)
+                               copy_map_value_locked(map, dst_val, value,
+                                                     true);
+                       else
+                               copy_map_value(map, dst_val, value);
+                       check_and_init_map_lock(map, dst_val);
+               }
+               if (do_delete) {
+                       hlist_nulls_del_rcu(&l->hash_node);
+                       if (is_lru_map)
+                               bpf_lru_push_free(&htab->lru, &l->lru_node);
+                       else
+                               free_htab_elem(htab, l);
+               }
+               dst_key += key_size;
+               dst_val += value_size;
+       }
+
+       raw_spin_unlock_irqrestore(&b->lock, flags);
+       /* If we are not copying data, we can go to next bucket and avoid
+        * unlocking the rcu.
+        */
+       if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
+               batch++;
+               goto again_nocopy;
+       }
+
+       rcu_read_unlock();
+       this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
+           key_size * bucket_cnt) ||
+           copy_to_user(uvalues + total * value_size, values,
+           value_size * bucket_cnt))) {
+               ret = -EFAULT;
+               goto after_loop;
+       }
+
+       total += bucket_cnt;
+       batch++;
+       if (batch >= htab->n_buckets) {
+               ret = -ENOENT;
+               goto after_loop;
+       }
+       goto again;
+
+after_loop:
+       if (ret == -EFAULT)
+               goto out;
+
+       /* copy # of entries and next batch */
+       ubatch = u64_to_user_ptr(attr->batch.out_batch);
+       if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
+           put_user(total, &uattr->batch.count))
+               ret = -EFAULT;
+
+out:
+       kvfree(keys);
+       kvfree(values);
+       return ret;
+}
+
+static int
+htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
+                                                 false, true);
+}
+
+static int
+htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
+                                       const union bpf_attr *attr,
+                                       union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
+                                                 false, true);
+}
+
+static int
+htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
+                     union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
+                                                 false, false);
+}
+
+static int
+htab_map_lookup_and_delete_batch(struct bpf_map *map,
+                                const union bpf_attr *attr,
+                                union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
+                                                 false, false);
+}
+
+static int
+htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
+                                const union bpf_attr *attr,
+                                union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
+                                                 true, true);
+}
+
+static int
+htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
+                                           const union bpf_attr *attr,
+                                           union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
+                                                 true, true);
+}
+
+static int
+htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
+                         union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
+                                                 true, false);
+}
+
+static int
+htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
+                                    const union bpf_attr *attr,
+                                    union bpf_attr __user *uattr)
+{
+       return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
+                                                 true, false);
+}
+
 const struct bpf_map_ops htab_map_ops = {
        .map_alloc_check = htab_map_alloc_check,
        .map_alloc = htab_map_alloc,
@@ -1242,6 +1502,7 @@ const struct bpf_map_ops htab_map_ops = {
        .map_delete_elem = htab_map_delete_elem,
        .map_gen_lookup = htab_map_gen_lookup,
        .map_seq_show_elem = htab_map_seq_show_elem,
+       BATCH_OPS(htab),
 };
 
 const struct bpf_map_ops htab_lru_map_ops = {
@@ -1255,6 +1516,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
        .map_delete_elem = htab_lru_map_delete_elem,
        .map_gen_lookup = htab_lru_map_gen_lookup,
        .map_seq_show_elem = htab_map_seq_show_elem,
+       BATCH_OPS(htab_lru),
 };
 
 /* Called from eBPF program */
@@ -1368,6 +1630,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
        .map_update_elem = htab_percpu_map_update_elem,
        .map_delete_elem = htab_map_delete_elem,
        .map_seq_show_elem = htab_percpu_map_seq_show_elem,
+       BATCH_OPS(htab_percpu),
 };
 
 const struct bpf_map_ops htab_lru_percpu_map_ops = {
@@ -1379,6 +1642,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
        .map_update_elem = htab_lru_percpu_map_update_elem,
        .map_delete_elem = htab_lru_map_delete_elem,
        .map_seq_show_elem = htab_percpu_map_seq_show_elem,
+       BATCH_OPS(htab_lru_percpu),
 };
 
 static int fd_htab_map_alloc_check(union bpf_attr *attr)
index cada974c9f4e38efd6b9c11635f87a87042634c3..d8b7b110a1c5e397ffb2af11705a255d80587736 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/uidgid.h>
 #include <linux/filter.h>
 #include <linux/ctype.h>
+#include <linux/jiffies.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -312,6 +313,17 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
        preempt_enable();
 }
 
+BPF_CALL_0(bpf_jiffies64)
+{
+       return get_jiffies_64();
+}
+
+const struct bpf_func_proto bpf_jiffies64_proto = {
+       .func           = bpf_jiffies64,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+};
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
index ecf42bec38c001178563e11feab3ded018e0a312..e11059b99f1883b1f509dfbf96e68f644436fb58 100644 (file)
@@ -380,7 +380,7 @@ static const struct inode_operations bpf_dir_iops = {
        .unlink         = simple_unlink,
 };
 
-static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
+static int bpf_obj_do_pin(const char __user *pathname, void *raw,
                          enum bpf_type type)
 {
        struct dentry *dentry;
@@ -389,7 +389,7 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
        umode_t mode;
        int ret;
 
-       dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
+       dentry = user_path_create(AT_FDCWD, pathname, &path, 0);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
@@ -422,30 +422,22 @@ out:
 
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 {
-       struct filename *pname;
        enum bpf_type type;
        void *raw;
        int ret;
 
-       pname = getname(pathname);
-       if (IS_ERR(pname))
-               return PTR_ERR(pname);
-
        raw = bpf_fd_probe_obj(ufd, &type);
-       if (IS_ERR(raw)) {
-               ret = PTR_ERR(raw);
-               goto out;
-       }
+       if (IS_ERR(raw))
+               return PTR_ERR(raw);
 
-       ret = bpf_obj_do_pin(pname, raw, type);
+       ret = bpf_obj_do_pin(pathname, raw, type);
        if (ret != 0)
                bpf_any_put(raw, type);
-out:
-       putname(pname);
+
        return ret;
 }
 
-static void *bpf_obj_do_get(const struct filename *pathname,
+static void *bpf_obj_do_get(const char __user *pathname,
                            enum bpf_type *type, int flags)
 {
        struct inode *inode;
@@ -453,7 +445,7 @@ static void *bpf_obj_do_get(const struct filename *pathname,
        void *raw;
        int ret;
 
-       ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
+       ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path);
        if (ret)
                return ERR_PTR(ret);
 
@@ -480,36 +472,27 @@ out:
 int bpf_obj_get_user(const char __user *pathname, int flags)
 {
        enum bpf_type type = BPF_TYPE_UNSPEC;
-       struct filename *pname;
-       int ret = -ENOENT;
        int f_flags;
        void *raw;
+       int ret;
 
        f_flags = bpf_get_file_flag(flags);
        if (f_flags < 0)
                return f_flags;
 
-       pname = getname(pathname);
-       if (IS_ERR(pname))
-               return PTR_ERR(pname);
-
-       raw = bpf_obj_do_get(pname, &type, f_flags);
-       if (IS_ERR(raw)) {
-               ret = PTR_ERR(raw);
-               goto out;
-       }
+       raw = bpf_obj_do_get(pathname, &type, f_flags);
+       if (IS_ERR(raw))
+               return PTR_ERR(raw);
 
        if (type == BPF_TYPE_PROG)
                ret = bpf_prog_new_fd(raw);
        else if (type == BPF_TYPE_MAP)
                ret = bpf_map_new_fd(raw, f_flags);
        else
-               goto out;
+               return -ENOENT;
 
        if (ret < 0)
                bpf_any_put(raw, type);
-out:
-       putname(pname);
        return ret;
 }
 
index 5e9366b33f0f41108a83a53c169407ab37ea43da..b3c48d1533cb5c9cabfc899afd09c2ff9eaca5fb 100644 (file)
@@ -22,7 +22,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
         */
        if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
            inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
-           inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+           inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
+           inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
                fdput(f);
                return ERR_PTR(-ENOTSUPP);
        }
index 81ee8595dfee9cc5c1de24e0b6e5a5558c088917..a91ad518c050335be6c751296c42149752e7c2fe 100644 (file)
@@ -129,6 +129,152 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
        return map;
 }
 
+static u32 bpf_map_value_size(struct bpf_map *map)
+{
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+           map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+               return round_up(map->value_size, 8) * num_possible_cpus();
+       else if (IS_FD_MAP(map))
+               return sizeof(u32);
+       else
+               return  map->value_size;
+}
+
+static void maybe_wait_bpf_programs(struct bpf_map *map)
+{
+       /* Wait for any running BPF programs to complete so that
+        * userspace, when we return to it, knows that all programs
+        * that could be running use the new map value.
+        */
+       if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
+           map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+               synchronize_rcu();
+}
+
+static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
+                               void *value, __u64 flags)
+{
+       int err;
+
+       /* Need to create a kthread, thus must support schedule */
+       if (bpf_map_is_dev_bound(map)) {
+               return bpf_map_offload_update_elem(map, key, value, flags);
+       } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
+                  map->map_type == BPF_MAP_TYPE_SOCKHASH ||
+                  map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+                  map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               return map->ops->map_update_elem(map, key, value, flags);
+       } else if (IS_FD_PROG_ARRAY(map)) {
+               return bpf_fd_array_map_update_elem(map, f.file, key, value,
+                                                   flags);
+       }
+
+       /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+        * inside bpf map update or delete otherwise deadlocks are possible
+        */
+       preempt_disable();
+       __this_cpu_inc(bpf_prog_active);
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               err = bpf_percpu_hash_update(map, key, value, flags);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+               err = bpf_percpu_array_update(map, key, value, flags);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+               err = bpf_percpu_cgroup_storage_update(map, key, value,
+                                                      flags);
+       } else if (IS_FD_ARRAY(map)) {
+               rcu_read_lock();
+               err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+                                                  flags);
+               rcu_read_unlock();
+       } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+               rcu_read_lock();
+               err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+                                                 flags);
+               rcu_read_unlock();
+       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+               /* rcu_read_lock() is not needed */
+               err = bpf_fd_reuseport_array_update_elem(map, key, value,
+                                                        flags);
+       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+                  map->map_type == BPF_MAP_TYPE_STACK) {
+               err = map->ops->map_push_elem(map, value, flags);
+       } else {
+               rcu_read_lock();
+               err = map->ops->map_update_elem(map, key, value, flags);
+               rcu_read_unlock();
+       }
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       maybe_wait_bpf_programs(map);
+
+       return err;
+}
+
+static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
+                             __u64 flags)
+{
+       void *ptr;
+       int err;
+
+       if (bpf_map_is_dev_bound(map))
+               return bpf_map_offload_lookup_elem(map, key, value);
+
+       preempt_disable();
+       this_cpu_inc(bpf_prog_active);
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               err = bpf_percpu_hash_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+               err = bpf_percpu_array_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+               err = bpf_percpu_cgroup_storage_copy(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
+               err = bpf_stackmap_copy(map, key, value);
+       } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
+               err = bpf_fd_array_map_lookup_elem(map, key, value);
+       } else if (IS_FD_HASH(map)) {
+               err = bpf_fd_htab_map_lookup_elem(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+               err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+                  map->map_type == BPF_MAP_TYPE_STACK) {
+               err = map->ops->map_peek_elem(map, value);
+       } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               /* struct_ops map requires directly updating "value" */
+               err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
+       } else {
+               rcu_read_lock();
+               if (map->ops->map_lookup_elem_sys_only)
+                       ptr = map->ops->map_lookup_elem_sys_only(map, key);
+               else
+                       ptr = map->ops->map_lookup_elem(map, key);
+               if (IS_ERR(ptr)) {
+                       err = PTR_ERR(ptr);
+               } else if (!ptr) {
+                       err = -ENOENT;
+               } else {
+                       err = 0;
+                       if (flags & BPF_F_LOCK)
+                               /* lock 'ptr' and copy everything but lock */
+                               copy_map_value_locked(map, value, ptr, true);
+                       else
+                               copy_map_value(map, value, ptr);
+                       /* mask lock, since value wasn't zero inited */
+                       check_and_init_map_lock(map, value);
+               }
+               rcu_read_unlock();
+       }
+
+       this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       maybe_wait_bpf_programs(map);
+
+       return err;
+}
+
 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
 {
        /* We really just want to fail instead of triggering OOM killer
@@ -628,7 +774,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
        return ret;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -642,6 +788,14 @@ static int map_create(union bpf_attr *attr)
        if (err)
                return -EINVAL;
 
+       if (attr->btf_vmlinux_value_type_id) {
+               if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
+                   attr->btf_key_type_id || attr->btf_value_type_id)
+                       return -EINVAL;
+       } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
+               return -EINVAL;
+       }
+
        f_flags = bpf_get_file_flag(attr->map_flags);
        if (f_flags < 0)
                return f_flags;
@@ -664,32 +818,35 @@ static int map_create(union bpf_attr *attr)
        atomic64_set(&map->usercnt, 1);
        mutex_init(&map->freeze_mutex);
 
-       if (attr->btf_key_type_id || attr->btf_value_type_id) {
+       map->spin_lock_off = -EINVAL;
+       if (attr->btf_key_type_id || attr->btf_value_type_id ||
+           /* Even the map's value is a kernel's struct,
+            * the bpf_prog.o must have BTF to begin with
+            * to figure out the corresponding kernel's
+            * counter part.  Thus, attr->btf_fd has
+            * to be valid also.
+            */
+           attr->btf_vmlinux_value_type_id) {
                struct btf *btf;
 
-               if (!attr->btf_value_type_id) {
-                       err = -EINVAL;
-                       goto free_map;
-               }
-
                btf = btf_get_by_fd(attr->btf_fd);
                if (IS_ERR(btf)) {
                        err = PTR_ERR(btf);
                        goto free_map;
                }
+               map->btf = btf;
 
-               err = map_check_btf(map, btf, attr->btf_key_type_id,
-                                   attr->btf_value_type_id);
-               if (err) {
-                       btf_put(btf);
-                       goto free_map;
+               if (attr->btf_value_type_id) {
+                       err = map_check_btf(map, btf, attr->btf_key_type_id,
+                                           attr->btf_value_type_id);
+                       if (err)
+                               goto free_map;
                }
 
-               map->btf = btf;
                map->btf_key_type_id = attr->btf_key_type_id;
                map->btf_value_type_id = attr->btf_value_type_id;
-       } else {
-               map->spin_lock_off = -EINVAL;
+               map->btf_vmlinux_value_type_id =
+                       attr->btf_vmlinux_value_type_id;
        }
 
        err = security_bpf_map_alloc(map);
@@ -816,7 +973,7 @@ static int map_lookup_elem(union bpf_attr *attr)
        void __user *uvalue = u64_to_user_ptr(attr->value);
        int ufd = attr->map_fd;
        struct bpf_map *map;
-       void *key, *value, *ptr;
+       void *key, *value;
        u32 value_size;
        struct fd f;
        int err;
@@ -848,72 +1005,14 @@ static int map_lookup_elem(union bpf_attr *attr)
                goto err_put;
        }
 
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-           map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
-               value_size = round_up(map->value_size, 8) * num_possible_cpus();
-       else if (IS_FD_MAP(map))
-               value_size = sizeof(u32);
-       else
-               value_size = map->value_size;
+       value_size = bpf_map_value_size(map);
 
        err = -ENOMEM;
        value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value)
                goto free_key;
 
-       if (bpf_map_is_dev_bound(map)) {
-               err = bpf_map_offload_lookup_elem(map, key, value);
-               goto done;
-       }
-
-       preempt_disable();
-       this_cpu_inc(bpf_prog_active);
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
-               err = bpf_percpu_hash_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
-               err = bpf_percpu_array_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
-               err = bpf_percpu_cgroup_storage_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
-               err = bpf_stackmap_copy(map, key, value);
-       } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
-               err = bpf_fd_array_map_lookup_elem(map, key, value);
-       } else if (IS_FD_HASH(map)) {
-               err = bpf_fd_htab_map_lookup_elem(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
-               err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
-                  map->map_type == BPF_MAP_TYPE_STACK) {
-               err = map->ops->map_peek_elem(map, value);
-       } else {
-               rcu_read_lock();
-               if (map->ops->map_lookup_elem_sys_only)
-                       ptr = map->ops->map_lookup_elem_sys_only(map, key);
-               else
-                       ptr = map->ops->map_lookup_elem(map, key);
-               if (IS_ERR(ptr)) {
-                       err = PTR_ERR(ptr);
-               } else if (!ptr) {
-                       err = -ENOENT;
-               } else {
-                       err = 0;
-                       if (attr->flags & BPF_F_LOCK)
-                               /* lock 'ptr' and copy everything but lock */
-                               copy_map_value_locked(map, value, ptr, true);
-                       else
-                               copy_map_value(map, value, ptr);
-                       /* mask lock, since value wasn't zero inited */
-                       check_and_init_map_lock(map, value);
-               }
-               rcu_read_unlock();
-       }
-       this_cpu_dec(bpf_prog_active);
-       preempt_enable();
-
-done:
+       err = bpf_map_copy_value(map, key, value, attr->flags);
        if (err)
                goto free_value;
 
@@ -932,16 +1031,6 @@ err_put:
        return err;
 }
 
-static void maybe_wait_bpf_programs(struct bpf_map *map)
-{
-       /* Wait for any running BPF programs to complete so that
-        * userspace, when we return to it, knows that all programs
-        * that could be running use the new map value.
-        */
-       if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
-           map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
-               synchronize_rcu();
-}
 
 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 
@@ -997,60 +1086,8 @@ static int map_update_elem(union bpf_attr *attr)
        if (copy_from_user(value, uvalue, value_size) != 0)
                goto free_value;
 
-       /* Need to create a kthread, thus must support schedule */
-       if (bpf_map_is_dev_bound(map)) {
-               err = bpf_map_offload_update_elem(map, key, value, attr->flags);
-               goto out;
-       } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
-                  map->map_type == BPF_MAP_TYPE_SOCKHASH ||
-                  map->map_type == BPF_MAP_TYPE_SOCKMAP) {
-               err = map->ops->map_update_elem(map, key, value, attr->flags);
-               goto out;
-       } else if (IS_FD_PROG_ARRAY(map)) {
-               err = bpf_fd_array_map_update_elem(map, f.file, key, value,
-                                                  attr->flags);
-               goto out;
-       }
+       err = bpf_map_update_value(map, f, key, value, attr->flags);
 
-       /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
-        * inside bpf map update or delete otherwise deadlocks are possible
-        */
-       preempt_disable();
-       __this_cpu_inc(bpf_prog_active);
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
-               err = bpf_percpu_hash_update(map, key, value, attr->flags);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
-               err = bpf_percpu_array_update(map, key, value, attr->flags);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
-               err = bpf_percpu_cgroup_storage_update(map, key, value,
-                                                      attr->flags);
-       } else if (IS_FD_ARRAY(map)) {
-               rcu_read_lock();
-               err = bpf_fd_array_map_update_elem(map, f.file, key, value,
-                                                  attr->flags);
-               rcu_read_unlock();
-       } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-               rcu_read_lock();
-               err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
-                                                 attr->flags);
-               rcu_read_unlock();
-       } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
-               /* rcu_read_lock() is not needed */
-               err = bpf_fd_reuseport_array_update_elem(map, key, value,
-                                                        attr->flags);
-       } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
-                  map->map_type == BPF_MAP_TYPE_STACK) {
-               err = map->ops->map_push_elem(map, value, attr->flags);
-       } else {
-               rcu_read_lock();
-               err = map->ops->map_update_elem(map, key, value, attr->flags);
-               rcu_read_unlock();
-       }
-       __this_cpu_dec(bpf_prog_active);
-       preempt_enable();
-       maybe_wait_bpf_programs(map);
-out:
 free_value:
        kfree(value);
 free_key:
@@ -1092,7 +1129,9 @@ static int map_delete_elem(union bpf_attr *attr)
        if (bpf_map_is_dev_bound(map)) {
                err = bpf_map_offload_delete_elem(map, key);
                goto out;
-       } else if (IS_FD_PROG_ARRAY(map)) {
+       } else if (IS_FD_PROG_ARRAY(map) ||
+                  map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               /* These maps require sleepable context */
                err = map->ops->map_delete_elem(map, key);
                goto out;
        }
@@ -1179,6 +1218,220 @@ err_put:
        return err;
 }
 
+int generic_map_delete_batch(struct bpf_map *map,
+                            const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       void __user *keys = u64_to_user_ptr(attr->batch.keys);
+       u32 cp, max_count;
+       int err = 0;
+       void *key;
+
+       if (attr->batch.elem_flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
+       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map)) {
+               return -EINVAL;
+       }
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       if (!key)
+               return -ENOMEM;
+
+       for (cp = 0; cp < max_count; cp++) {
+               err = -EFAULT;
+               if (copy_from_user(key, keys + cp * map->key_size,
+                                  map->key_size))
+                       break;
+
+               if (bpf_map_is_dev_bound(map)) {
+                       err = bpf_map_offload_delete_elem(map, key);
+                       break;
+               }
+
+               preempt_disable();
+               __this_cpu_inc(bpf_prog_active);
+               rcu_read_lock();
+               err = map->ops->map_delete_elem(map, key);
+               rcu_read_unlock();
+               __this_cpu_dec(bpf_prog_active);
+               preempt_enable();
+               maybe_wait_bpf_programs(map);
+               if (err)
+                       break;
+       }
+       if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+               err = -EFAULT;
+
+       kfree(key);
+       return err;
+}
+
+int generic_map_update_batch(struct bpf_map *map,
+                            const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       void __user *values = u64_to_user_ptr(attr->batch.values);
+       void __user *keys = u64_to_user_ptr(attr->batch.keys);
+       u32 value_size, cp, max_count;
+       int ufd = attr->map_fd;
+       void *key, *value;
+       struct fd f;
+       int err = 0;
+
+       f = fdget(ufd);
+       if (attr->batch.elem_flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
+       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map)) {
+               return -EINVAL;
+       }
+
+       value_size = bpf_map_value_size(map);
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       if (!key)
+               return -ENOMEM;
+
+       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       if (!value) {
+               kfree(key);
+               return -ENOMEM;
+       }
+
+       for (cp = 0; cp < max_count; cp++) {
+               err = -EFAULT;
+               if (copy_from_user(key, keys + cp * map->key_size,
+                   map->key_size) ||
+                   copy_from_user(value, values + cp * value_size, value_size))
+                       break;
+
+               err = bpf_map_update_value(map, f, key, value,
+                                          attr->batch.elem_flags);
+
+               if (err)
+                       break;
+       }
+
+       if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+               err = -EFAULT;
+
+       kfree(value);
+       kfree(key);
+       return err;
+}
+
+#define MAP_LOOKUP_RETRIES 3
+
+int generic_map_lookup_batch(struct bpf_map *map,
+                                   const union bpf_attr *attr,
+                                   union bpf_attr __user *uattr)
+{
+       void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
+       void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+       void __user *values = u64_to_user_ptr(attr->batch.values);
+       void __user *keys = u64_to_user_ptr(attr->batch.keys);
+       void *buf, *buf_prevkey, *prev_key, *key, *value;
+       int err, retry = MAP_LOOKUP_RETRIES;
+       u32 value_size, cp, max_count;
+
+       if (attr->batch.elem_flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
+       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map))
+               return -EINVAL;
+
+       value_size = bpf_map_value_size(map);
+
+       max_count = attr->batch.count;
+       if (!max_count)
+               return 0;
+
+       if (put_user(0, &uattr->batch.count))
+               return -EFAULT;
+
+       buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       if (!buf_prevkey)
+               return -ENOMEM;
+
+       buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+       if (!buf) {
+               kvfree(buf_prevkey);
+               return -ENOMEM;
+       }
+
+       err = -EFAULT;
+       prev_key = NULL;
+       if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
+               goto free_buf;
+       key = buf;
+       value = key + map->key_size;
+       if (ubatch)
+               prev_key = buf_prevkey;
+
+       for (cp = 0; cp < max_count;) {
+               rcu_read_lock();
+               err = map->ops->map_get_next_key(map, prev_key, key);
+               rcu_read_unlock();
+               if (err)
+                       break;
+               err = bpf_map_copy_value(map, key, value,
+                                        attr->batch.elem_flags);
+
+               if (err == -ENOENT) {
+                       if (retry) {
+                               retry--;
+                               continue;
+                       }
+                       err = -EINTR;
+                       break;
+               }
+
+               if (err)
+                       goto free_buf;
+
+               if (copy_to_user(keys + cp * map->key_size, key,
+                                map->key_size)) {
+                       err = -EFAULT;
+                       goto free_buf;
+               }
+               if (copy_to_user(values + cp * value_size, value, value_size)) {
+                       err = -EFAULT;
+                       goto free_buf;
+               }
+
+               if (!prev_key)
+                       prev_key = buf_prevkey;
+
+               swap(prev_key, key);
+               retry = MAP_LOOKUP_RETRIES;
+               cp++;
+       }
+
+       if (err == -EFAULT)
+               goto free_buf;
+
+       if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
+                   (cp && copy_to_user(uobatch, prev_key, map->key_size))))
+               err = -EFAULT;
+
+free_buf:
+       kfree(buf_prevkey);
+       kfree(buf);
+       return err;
+}
+
 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
 
 static int map_lookup_and_delete_elem(union bpf_attr *attr)
@@ -1672,17 +1925,24 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                           enum bpf_attach_type expected_attach_type,
                           u32 btf_id, u32 prog_fd)
 {
-       switch (prog_type) {
-       case BPF_PROG_TYPE_TRACING:
+       if (btf_id) {
                if (btf_id > BTF_MAX_TYPE)
                        return -EINVAL;
-               break;
-       default:
-               if (btf_id || prog_fd)
+
+               switch (prog_type) {
+               case BPF_PROG_TYPE_TRACING:
+               case BPF_PROG_TYPE_STRUCT_OPS:
+               case BPF_PROG_TYPE_EXT:
+                       break;
+               default:
                        return -EINVAL;
-               break;
+               }
        }
 
+       if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
+           prog_type != BPF_PROG_TYPE_EXT)
+               return -EINVAL;
+
        switch (prog_type) {
        case BPF_PROG_TYPE_CGROUP_SOCK:
                switch (expected_attach_type) {
@@ -1723,6 +1983,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                default:
                        return -EINVAL;
                }
+       case BPF_PROG_TYPE_EXT:
+               if (expected_attach_type)
+                       return -EINVAL;
+               /* fallthrough */
        default:
                return 0;
        }
@@ -1925,7 +2189,8 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
        int tr_fd, err;
 
        if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
-           prog->expected_attach_type != BPF_TRACE_FEXIT) {
+           prog->expected_attach_type != BPF_TRACE_FEXIT &&
+           prog->type != BPF_PROG_TYPE_EXT) {
                err = -EINVAL;
                goto out_put_prog;
        }
@@ -1992,12 +2257,14 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 
        if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
            prog->type != BPF_PROG_TYPE_TRACING &&
+           prog->type != BPF_PROG_TYPE_EXT &&
            prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
                err = -EINVAL;
                goto out_put_prog;
        }
 
-       if (prog->type == BPF_PROG_TYPE_TRACING) {
+       if (prog->type == BPF_PROG_TYPE_TRACING ||
+           prog->type == BPF_PROG_TYPE_EXT) {
                if (attr->raw_tracepoint.name) {
                        /* The attach point for this category of programs
                         * should be specified via btf_id during program load.
@@ -2817,6 +3084,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
                info.btf_key_type_id = map->btf_key_type_id;
                info.btf_value_type_id = map->btf_value_type_id;
        }
+       info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
 
        if (bpf_map_is_dev_bound(map)) {
                err = bpf_map_offload_info_fill(&info, map);
@@ -3029,6 +3297,61 @@ out:
        return err;
 }
 
+#define BPF_MAP_BATCH_LAST_FIELD batch.flags
+
+#define BPF_DO_BATCH(fn)                       \
+       do {                                    \
+               if (!fn) {                      \
+                       err = -ENOTSUPP;        \
+                       goto err_put;           \
+               }                               \
+               err = fn(map, attr, uattr);     \
+       } while (0)
+
+static int bpf_map_do_batch(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr,
+                           int cmd)
+{
+       struct bpf_map *map;
+       int err, ufd;
+       struct fd f;
+
+       if (CHECK_ATTR(BPF_MAP_BATCH))
+               return -EINVAL;
+
+       ufd = attr->batch.map_fd;
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       if ((cmd == BPF_MAP_LOOKUP_BATCH ||
+            cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
+           !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+               err = -EPERM;
+               goto err_put;
+       }
+
+       if (cmd != BPF_MAP_LOOKUP_BATCH &&
+           !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+               err = -EPERM;
+               goto err_put;
+       }
+
+       if (cmd == BPF_MAP_LOOKUP_BATCH)
+               BPF_DO_BATCH(map->ops->map_lookup_batch);
+       else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
+               BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
+       else if (cmd == BPF_MAP_UPDATE_BATCH)
+               BPF_DO_BATCH(map->ops->map_update_batch);
+       else
+               BPF_DO_BATCH(map->ops->map_delete_batch);
+
+err_put:
+       fdput(f);
+       return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -3126,6 +3449,19 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
                err = map_lookup_and_delete_elem(&attr);
                break;
+       case BPF_MAP_LOOKUP_BATCH:
+               err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
+               break;
+       case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
+               err = bpf_map_do_batch(&attr, uattr,
+                                      BPF_MAP_LOOKUP_AND_DELETE_BATCH);
+               break;
+       case BPF_MAP_UPDATE_BATCH:
+               err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
+               break;
+       case BPF_MAP_DELETE_BATCH:
+               err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
+               break;
        default:
                err = -EINVAL;
                break;
index 505f4e4b31d243c46a531871206be2d0f747f6e5..eb64c245052b427720490c5cc79ddfead714f14b 100644 (file)
@@ -5,6 +5,12 @@
 #include <linux/filter.h>
 #include <linux/ftrace.h>
 
+/* dummy _ops. The verifier will operate on target program's ops. */
+const struct bpf_verifier_ops bpf_extension_verifier_ops = {
+};
+const struct bpf_prog_ops bpf_extension_prog_ops = {
+};
+
 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
 #define TRAMPOLINE_HASH_BITS 10
 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
@@ -160,11 +166,20 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
        if (fexit_cnt)
                flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 
-       err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
+       /* Though the second half of trampoline page is unused a task could be
+        * preempted in the middle of the first half of trampoline and two
+        * updates to trampoline would change the code from underneath the
+        * preempted task. Hence wait for tasks to voluntarily schedule or go
+        * to userspace.
+        */
+       synchronize_rcu_tasks();
+
+       err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
+                                         &tr->func.model, flags,
                                          fentry, fentry_cnt,
                                          fexit, fexit_cnt,
                                          tr->func.addr);
-       if (err)
+       if (err < 0)
                goto out;
 
        if (tr->selector)
@@ -185,8 +200,10 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
        switch (t) {
        case BPF_TRACE_FENTRY:
                return BPF_TRAMP_FENTRY;
-       default:
+       case BPF_TRACE_FEXIT:
                return BPF_TRAMP_FEXIT;
+       default:
+               return BPF_TRAMP_REPLACE;
        }
 }
 
@@ -195,12 +212,31 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
        enum bpf_tramp_prog_type kind;
        struct bpf_trampoline *tr;
        int err = 0;
+       int cnt;
 
        tr = prog->aux->trampoline;
        kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
        mutex_lock(&tr->mutex);
-       if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
-           >= BPF_MAX_TRAMP_PROGS) {
+       if (tr->extension_prog) {
+               /* cannot attach fentry/fexit if extension prog is attached.
+                * cannot overwrite extension prog either.
+                */
+               err = -EBUSY;
+               goto out;
+       }
+       cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
+       if (kind == BPF_TRAMP_REPLACE) {
+               /* Cannot attach extension if fentry/fexit are in use. */
+               if (cnt) {
+                       err = -EBUSY;
+                       goto out;
+               }
+               tr->extension_prog = prog;
+               err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
+                                        prog->bpf_func);
+               goto out;
+       }
+       if (cnt >= BPF_MAX_TRAMP_PROGS) {
                err = -E2BIG;
                goto out;
        }
@@ -231,9 +267,17 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
        tr = prog->aux->trampoline;
        kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
        mutex_lock(&tr->mutex);
+       if (kind == BPF_TRAMP_REPLACE) {
+               WARN_ON_ONCE(!tr->extension_prog);
+               err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
+                                        tr->extension_prog->bpf_func, NULL);
+               tr->extension_prog = NULL;
+               goto out;
+       }
        hlist_del(&prog->aux->tramp_hlist);
        tr->progs_cnt[kind]--;
        err = bpf_trampoline_update(prog->aux->trampoline);
+out:
        mutex_unlock(&tr->mutex);
        return err;
 }
@@ -250,6 +294,8 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
                goto out;
        if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
                goto out;
+       /* wait for tasks to get out of trampoline before freeing it */
+       synchronize_rcu_tasks();
        bpf_jit_free_exec(tr->image);
        hlist_del(&tr->hlist);
        kfree(tr);
@@ -296,7 +342,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 }
 
 int __weak
-arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+arch_prepare_bpf_trampoline(void *image, void *image_end,
+                           const struct btf_func_model *m, u32 flags,
                            struct bpf_prog **fentry_progs, int fentry_cnt,
                            struct bpf_prog **fexit_progs, int fexit_cnt,
                            void *orig_call)
index 7d530ce8719d8d06efc9498f70e13f35c700ccd9..1cc945daa9c8a7812acce1710f9e7c5b6ef3c1b6 100644 (file)
@@ -1122,10 +1122,6 @@ static void init_reg_state(struct bpf_verifier_env *env,
        regs[BPF_REG_FP].type = PTR_TO_STACK;
        mark_reg_known_zero(env, regs, BPF_REG_FP);
        regs[BPF_REG_FP].frameno = state->frameno;
-
-       /* 1st arg to a function */
-       regs[BPF_REG_1].type = PTR_TO_CTX;
-       mark_reg_known_zero(env, regs, BPF_REG_1);
 }
 
 #define BPF_MAIN_FUNC (-1)
@@ -1916,6 +1912,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
        case PTR_TO_TCP_SOCK:
        case PTR_TO_TCP_SOCK_OR_NULL:
        case PTR_TO_XDP_SOCK:
+       case PTR_TO_BTF_ID:
                return true;
        default:
                return false;
@@ -2738,8 +2735,8 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
 }
 #endif
 
-static int check_ctx_reg(struct bpf_verifier_env *env,
-                        const struct bpf_reg_state *reg, int regno)
+int check_ctx_reg(struct bpf_verifier_env *env,
+                 const struct bpf_reg_state *reg, int regno)
 {
        /* Access to ctx or passing it to a helper is only allowed in
         * its original, unmodified form.
@@ -2858,11 +2855,6 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
        u32 btf_id;
        int ret;
 
-       if (atype != BPF_READ) {
-               verbose(env, "only read is supported\n");
-               return -EACCES;
-       }
-
        if (off < 0) {
                verbose(env,
                        "R%d is ptr_%s invalid negative access: off=%d\n",
@@ -2879,17 +2871,32 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
                return -EACCES;
        }
 
-       ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+       if (env->ops->btf_struct_access) {
+               ret = env->ops->btf_struct_access(&env->log, t, off, size,
+                                                 atype, &btf_id);
+       } else {
+               if (atype != BPF_READ) {
+                       verbose(env, "only read is supported\n");
+                       return -EACCES;
+               }
+
+               ret = btf_struct_access(&env->log, t, off, size, atype,
+                                       &btf_id);
+       }
+
        if (ret < 0)
                return ret;
 
-       if (ret == SCALAR_VALUE) {
-               mark_reg_unknown(env, regs, value_regno);
-               return 0;
+       if (atype == BPF_READ) {
+               if (ret == SCALAR_VALUE) {
+                       mark_reg_unknown(env, regs, value_regno);
+                       return 0;
+               }
+               mark_reg_known_zero(env, regs, value_regno);
+               regs[value_regno].type = PTR_TO_BTF_ID;
+               regs[value_regno].btf_id = btf_id;
        }
-       mark_reg_known_zero(env, regs, value_regno);
-       regs[value_regno].type = PTR_TO_BTF_ID;
-       regs[value_regno].btf_id = btf_id;
+
        return 0;
 }
 
@@ -3945,12 +3952,26 @@ static int release_reference(struct bpf_verifier_env *env,
        return 0;
 }
 
+static void clear_caller_saved_regs(struct bpf_verifier_env *env,
+                                   struct bpf_reg_state *regs)
+{
+       int i;
+
+       /* after the call registers r0 - r5 were scratched */
+       for (i = 0; i < CALLER_SAVED_REGS; i++) {
+               mark_reg_not_init(env, regs, caller_saved[i]);
+               check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+       }
+}
+
 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                           int *insn_idx)
 {
        struct bpf_verifier_state *state = env->cur_state;
+       struct bpf_func_info_aux *func_info_aux;
        struct bpf_func_state *caller, *callee;
        int i, err, subprog, target_insn;
+       bool is_global = false;
 
        if (state->curframe + 1 >= MAX_CALL_FRAMES) {
                verbose(env, "the call stack of %d frames is too deep\n",
@@ -3973,6 +3994,32 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                return -EFAULT;
        }
 
+       func_info_aux = env->prog->aux->func_info_aux;
+       if (func_info_aux)
+               is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
+       err = btf_check_func_arg_match(env, subprog, caller->regs);
+       if (err == -EFAULT)
+               return err;
+       if (is_global) {
+               if (err) {
+                       verbose(env, "Caller passes invalid args into func#%d\n",
+                               subprog);
+                       return err;
+               } else {
+                       if (env->log.level & BPF_LOG_LEVEL)
+                               verbose(env,
+                                       "Func#%d is global and valid. Skipping.\n",
+                                       subprog);
+                       clear_caller_saved_regs(env, caller->regs);
+
+                       /* All global functions return SCALAR_VALUE */
+                       mark_reg_unknown(env, caller->regs, BPF_REG_0);
+
+                       /* continue with next insn after call */
+                       return 0;
+               }
+       }
+
        callee = kzalloc(sizeof(*callee), GFP_KERNEL);
        if (!callee)
                return -ENOMEM;
@@ -3999,18 +4046,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
        for (i = BPF_REG_1; i <= BPF_REG_5; i++)
                callee->regs[i] = caller->regs[i];
 
-       /* after the call registers r0 - r5 were scratched */
-       for (i = 0; i < CALLER_SAVED_REGS; i++) {
-               mark_reg_not_init(env, caller->regs, caller_saved[i]);
-               check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
-       }
+       clear_caller_saved_regs(env, caller->regs);
 
        /* only increment it after check_reg_arg() finished */
        state->curframe++;
 
-       if (btf_check_func_arg_match(env, subprog))
-               return -EINVAL;
-
        /* and go analyze first insn of the callee */
        *insn_idx = target_insn;
 
@@ -6360,8 +6400,30 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 static int check_return_code(struct bpf_verifier_env *env)
 {
        struct tnum enforce_attach_type_range = tnum_unknown;
+       const struct bpf_prog *prog = env->prog;
        struct bpf_reg_state *reg;
        struct tnum range = tnum_range(0, 1);
+       int err;
+
+       /* The struct_ops func-ptr's return type could be "void" */
+       if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
+           !prog->aux->attach_func_proto->type)
+               return 0;
+
+       /* eBPF calling convetion is such that R0 is used
+        * to return the value from eBPF program.
+        * Make sure that it's readable at this time
+        * of bpf_exit, which means that program wrote
+        * something into it earlier
+        */
+       err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+       if (err)
+               return err;
+
+       if (is_pointer_value(env, BPF_REG_0)) {
+               verbose(env, "R0 leaks addr as return value\n");
+               return -EACCES;
+       }
 
        switch (env->prog->type) {
        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
@@ -6750,12 +6812,13 @@ static int check_btf_func(struct bpf_verifier_env *env,
 
                /* check type_id */
                type = btf_type_by_id(btf, krecord[i].type_id);
-               if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
+               if (!type || !btf_type_is_func(type)) {
                        verbose(env, "invalid type id %d in func info",
                                krecord[i].type_id);
                        ret = -EINVAL;
                        goto err_free;
                }
+               info_aux[i].linkage = BTF_INFO_VLEN(type->info);
                prev_offset = krecord[i].insn_off;
                urecord += urec_size;
        }
@@ -7735,35 +7798,13 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
 
 static int do_check(struct bpf_verifier_env *env)
 {
-       struct bpf_verifier_state *state;
+       struct bpf_verifier_state *state = env->cur_state;
        struct bpf_insn *insns = env->prog->insnsi;
        struct bpf_reg_state *regs;
        int insn_cnt = env->prog->len;
        bool do_print_state = false;
        int prev_insn_idx = -1;
 
-       env->prev_linfo = NULL;
-
-       state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
-       if (!state)
-               return -ENOMEM;
-       state->curframe = 0;
-       state->speculative = false;
-       state->branches = 1;
-       state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
-       if (!state->frame[0]) {
-               kfree(state);
-               return -ENOMEM;
-       }
-       env->cur_state = state;
-       init_func_state(env, state->frame[0],
-                       BPF_MAIN_FUNC /* callsite */,
-                       0 /* frameno */,
-                       0 /* subprogno, zero == main subprog */);
-
-       if (btf_check_func_arg_match(env, 0))
-               return -EINVAL;
-
        for (;;) {
                struct bpf_insn *insn;
                u8 class;
@@ -7841,7 +7882,7 @@ static int do_check(struct bpf_verifier_env *env)
                }
 
                regs = cur_regs(env);
-               env->insn_aux_data[env->insn_idx].seen = true;
+               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
                prev_insn_idx = env->insn_idx;
 
                if (class == BPF_ALU || class == BPF_ALU64) {
@@ -8027,21 +8068,6 @@ static int do_check(struct bpf_verifier_env *env)
                                if (err)
                                        return err;
 
-                               /* eBPF calling convetion is such that R0 is used
-                                * to return the value from eBPF program.
-                                * Make sure that it's readable at this time
-                                * of bpf_exit, which means that program wrote
-                                * something into it earlier
-                                */
-                               err = check_reg_arg(env, BPF_REG_0, SRC_OP);
-                               if (err)
-                                       return err;
-
-                               if (is_pointer_value(env, BPF_REG_0)) {
-                                       verbose(env, "R0 leaks addr as return value\n");
-                                       return -EACCES;
-                               }
-
                                err = check_return_code(env);
                                if (err)
                                        return err;
@@ -8076,7 +8102,7 @@ process_bpf_exit:
                                        return err;
 
                                env->insn_idx++;
-                               env->insn_aux_data[env->insn_idx].seen = true;
+                               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
                        } else {
                                verbose(env, "invalid BPF_LD mode\n");
                                return -EINVAL;
@@ -8089,7 +8115,6 @@ process_bpf_exit:
                env->insn_idx++;
        }
 
-       env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
        return 0;
 }
 
@@ -8149,6 +8174,11 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
                return -EINVAL;
        }
 
+       if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+               verbose(env, "bpf_struct_ops map cannot be used in prog\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -8361,7 +8391,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
        memcpy(new_data + off + cnt - 1, old_data + off,
               sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
        for (i = off; i < off + cnt - 1; i++) {
-               new_data[i].seen = true;
+               new_data[i].seen = env->pass_cnt;
                new_data[i].zext_dst = insn_has_def32(env, insn + i);
        }
        env->insn_aux_data = new_data;
@@ -8840,12 +8870,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                        convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
                        break;
                case PTR_TO_BTF_ID:
-                       if (type == BPF_WRITE) {
+                       if (type == BPF_READ) {
+                               insn->code = BPF_LDX | BPF_PROBE_MEM |
+                                       BPF_SIZE((insn)->code);
+                               env->prog->aux->num_exentries++;
+                       } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
                                verbose(env, "Writes through BTF pointers are not allowed\n");
                                return -EINVAL;
                        }
-                       insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
-                       env->prog->aux->num_exentries++;
                        continue;
                default:
                        continue;
@@ -9425,6 +9457,30 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                        goto patch_call_imm;
                }
 
+               if (prog->jit_requested && BITS_PER_LONG == 64 &&
+                   insn->imm == BPF_FUNC_jiffies64) {
+                       struct bpf_insn ld_jiffies_addr[2] = {
+                               BPF_LD_IMM64(BPF_REG_0,
+                                            (unsigned long)&jiffies),
+                       };
+
+                       insn_buf[0] = ld_jiffies_addr[0];
+                       insn_buf[1] = ld_jiffies_addr[1];
+                       insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
+                                                 BPF_REG_0, 0);
+                       cnt = 3;
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
+                                                      cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta    += cnt - 1;
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       continue;
+               }
+
 patch_call_imm:
                fn = env->ops->get_func_proto(insn->imm, env->prog);
                /* all functions that have prototype and verifier allowed
@@ -9471,6 +9527,7 @@ static void free_states(struct bpf_verifier_env *env)
                kfree(sl);
                sl = sln;
        }
+       env->free_list = NULL;
 
        if (!env->explored_states)
                return;
@@ -9484,11 +9541,164 @@ static void free_states(struct bpf_verifier_env *env)
                        kfree(sl);
                        sl = sln;
                }
+               env->explored_states[i] = NULL;
        }
+}
 
-       kvfree(env->explored_states);
+/* The verifier is using insn_aux_data[] to store temporary data during
+ * verification and to store information for passes that run after the
+ * verification like dead code sanitization. do_check_common() for subprogram N
+ * may analyze many other subprograms. sanitize_insn_aux_data() clears all
+ * temporary data after do_check_common() finds that subprogram N cannot be
+ * verified independently. pass_cnt counts the number of times
+ * do_check_common() was run and insn->aux->seen tells the pass number
+ * insn_aux_data was touched. These variables are compared to clear temporary
+ * data from failed pass. For testing and experiments do_check_common() can be
+ * run multiple times even when prior attempt to verify is unsuccessful.
+ */
+static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
+{
+       struct bpf_insn *insn = env->prog->insnsi;
+       struct bpf_insn_aux_data *aux;
+       int i, class;
+
+       for (i = 0; i < env->prog->len; i++) {
+               class = BPF_CLASS(insn[i].code);
+               if (class != BPF_LDX && class != BPF_STX)
+                       continue;
+               aux = &env->insn_aux_data[i];
+               if (aux->seen != env->pass_cnt)
+                       continue;
+               memset(aux, 0, offsetof(typeof(*aux), orig_idx));
+       }
 }
 
+static int do_check_common(struct bpf_verifier_env *env, int subprog)
+{
+       struct bpf_verifier_state *state;
+       struct bpf_reg_state *regs;
+       int ret, i;
+
+       env->prev_linfo = NULL;
+       env->pass_cnt++;
+
+       state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
+       if (!state)
+               return -ENOMEM;
+       state->curframe = 0;
+       state->speculative = false;
+       state->branches = 1;
+       state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+       if (!state->frame[0]) {
+               kfree(state);
+               return -ENOMEM;
+       }
+       env->cur_state = state;
+       init_func_state(env, state->frame[0],
+                       BPF_MAIN_FUNC /* callsite */,
+                       0 /* frameno */,
+                       subprog);
+
+       regs = state->frame[state->curframe]->regs;
+       if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
+               ret = btf_prepare_func_args(env, subprog, regs);
+               if (ret)
+                       goto out;
+               for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
+                       if (regs[i].type == PTR_TO_CTX)
+                               mark_reg_known_zero(env, regs, i);
+                       else if (regs[i].type == SCALAR_VALUE)
+                               mark_reg_unknown(env, regs, i);
+               }
+       } else {
+               /* 1st arg to a function */
+               regs[BPF_REG_1].type = PTR_TO_CTX;
+               mark_reg_known_zero(env, regs, BPF_REG_1);
+               ret = btf_check_func_arg_match(env, subprog, regs);
+               if (ret == -EFAULT)
+                       /* unlikely verifier bug. abort.
+                        * ret == 0 and ret < 0 are sadly acceptable for
+                        * main() function due to backward compatibility.
+                        * Like socket filter program may be written as:
+                        * int bpf_prog(struct pt_regs *ctx)
+                        * and never dereference that ctx in the program.
+                        * 'struct pt_regs' is a type mismatch for socket
+                        * filter that should be using 'struct __sk_buff'.
+                        */
+                       goto out;
+       }
+
+       ret = do_check(env);
+out:
+       /* check for NULL is necessary, since cur_state can be freed inside
+        * do_check() under memory pressure.
+        */
+       if (env->cur_state) {
+               free_verifier_state(env->cur_state, true);
+               env->cur_state = NULL;
+       }
+       while (!pop_stack(env, NULL, NULL));
+       free_states(env);
+       if (ret)
+               /* clean aux data in case subprog was rejected */
+               sanitize_insn_aux_data(env);
+       return ret;
+}
+
+/* Verify all global functions in a BPF program one by one based on their BTF.
+ * All global functions must pass verification. Otherwise the whole program is rejected.
+ * Consider:
+ * int bar(int);
+ * int foo(int f)
+ * {
+ *    return bar(f);
+ * }
+ * int bar(int b)
+ * {
+ *    ...
+ * }
+ * foo() will be verified first for R1=any_scalar_value. During verification it
+ * will be assumed that bar() already verified successfully and call to bar()
+ * from foo() will be checked for type match only. Later bar() will be verified
+ * independently to check that it's safe for R1=any_scalar_value.
+ */
+static int do_check_subprogs(struct bpf_verifier_env *env)
+{
+       struct bpf_prog_aux *aux = env->prog->aux;
+       int i, ret;
+
+       if (!aux->func_info)
+               return 0;
+
+       for (i = 1; i < env->subprog_cnt; i++) {
+               if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
+                       continue;
+               env->insn_idx = env->subprog_info[i].start;
+               WARN_ON_ONCE(env->insn_idx == 0);
+               ret = do_check_common(env, i);
+               if (ret) {
+                       return ret;
+               } else if (env->log.level & BPF_LOG_LEVEL) {
+                       verbose(env,
+                               "Func#%d is safe for any args that match its prototype\n",
+                               i);
+               }
+       }
+       return 0;
+}
+
+static int do_check_main(struct bpf_verifier_env *env)
+{
+       int ret;
+
+       env->insn_idx = 0;
+       ret = do_check_common(env, 0);
+       if (!ret)
+               env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+       return ret;
+}
+
+
 static void print_verification_stats(struct bpf_verifier_env *env)
 {
        int i;
@@ -9513,9 +9723,62 @@ static void print_verification_stats(struct bpf_verifier_env *env)
                env->peak_states, env->longest_mark_read_walk);
 }
 
+static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
+{
+       const struct btf_type *t, *func_proto;
+       const struct bpf_struct_ops *st_ops;
+       const struct btf_member *member;
+       struct bpf_prog *prog = env->prog;
+       u32 btf_id, member_idx;
+       const char *mname;
+
+       btf_id = prog->aux->attach_btf_id;
+       st_ops = bpf_struct_ops_find(btf_id);
+       if (!st_ops) {
+               verbose(env, "attach_btf_id %u is not a supported struct\n",
+                       btf_id);
+               return -ENOTSUPP;
+       }
+
+       t = st_ops->type;
+       member_idx = prog->expected_attach_type;
+       if (member_idx >= btf_type_vlen(t)) {
+               verbose(env, "attach to invalid member idx %u of struct %s\n",
+                       member_idx, st_ops->name);
+               return -EINVAL;
+       }
+
+       member = &btf_type_member(t)[member_idx];
+       mname = btf_name_by_offset(btf_vmlinux, member->name_off);
+       func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
+                                              NULL);
+       if (!func_proto) {
+               verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
+                       mname, member_idx, st_ops->name);
+               return -EINVAL;
+       }
+
+       if (st_ops->check_member) {
+               int err = st_ops->check_member(t, member);
+
+               if (err) {
+                       verbose(env, "attach to unsupported member %s of struct %s\n",
+                               mname, st_ops->name);
+                       return err;
+               }
+       }
+
+       prog->aux->attach_func_proto = func_proto;
+       prog->aux->attach_func_name = mname;
+       env->ops = st_ops->verifier_ops;
+
+       return 0;
+}
+
 static int check_attach_btf_id(struct bpf_verifier_env *env)
 {
        struct bpf_prog *prog = env->prog;
+       bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
        struct bpf_prog *tgt_prog = prog->aux->linked_prog;
        u32 btf_id = prog->aux->attach_btf_id;
        const char prefix[] = "btf_trace_";
@@ -9528,7 +9791,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
        long addr;
        u64 key;
 
-       if (prog->type != BPF_PROG_TYPE_TRACING)
+       if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+               return check_struct_ops_btf_id(env);
+
+       if (prog->type != BPF_PROG_TYPE_TRACING && !prog_extension)
                return 0;
 
        if (!btf_id) {
@@ -9564,8 +9830,59 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                        return -EINVAL;
                }
                conservative = aux->func_info_aux[subprog].unreliable;
+               if (prog_extension) {
+                       if (conservative) {
+                               verbose(env,
+                                       "Cannot replace static functions\n");
+                               return -EINVAL;
+                       }
+                       if (!prog->jit_requested) {
+                               verbose(env,
+                                       "Extension programs should be JITed\n");
+                               return -EINVAL;
+                       }
+                       env->ops = bpf_verifier_ops[tgt_prog->type];
+               }
+               if (!tgt_prog->jited) {
+                       verbose(env, "Can attach to only JITed progs\n");
+                       return -EINVAL;
+               }
+               if (tgt_prog->type == prog->type) {
+                       /* Cannot fentry/fexit another fentry/fexit program.
+                        * Cannot attach program extension to another extension.
+                        * It's ok to attach fentry/fexit to extension program.
+                        */
+                       verbose(env, "Cannot recursively attach\n");
+                       return -EINVAL;
+               }
+               if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
+                   prog_extension &&
+                   (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
+                    tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
+                       /* Program extensions can extend all program types
+                        * except fentry/fexit. The reason is the following.
+                        * The fentry/fexit programs are used for performance
+                        * analysis, stats and can be attached to any program
+                        * type except themselves. When extension program is
+                        * replacing XDP function it is necessary to allow
+                        * performance analysis of all functions. Both original
+                        * XDP program and its program extension. Hence
+                        * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
+                        * allowed. If extending of fentry/fexit was allowed it
+                        * would be possible to create long call chain
+                        * fentry->extension->fentry->extension beyond
+                        * reasonable stack size. Hence extending fentry is not
+                        * allowed.
+                        */
+                       verbose(env, "Cannot extend fentry/fexit\n");
+                       return -EINVAL;
+               }
                key = ((u64)aux->id) << 32 | btf_id;
        } else {
+               if (prog_extension) {
+                       verbose(env, "Cannot replace kernel functions\n");
+                       return -EINVAL;
+               }
                key = btf_id;
        }
 
@@ -9603,6 +9920,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                prog->aux->attach_func_proto = t;
                prog->aux->attach_btf_trace = true;
                return 0;
+       default:
+               if (!prog_extension)
+                       return -EINVAL;
+               /* fallthrough */
        case BPF_TRACE_FENTRY:
        case BPF_TRACE_FEXIT:
                if (!btf_type_is_func(t)) {
@@ -9610,6 +9931,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                                btf_id);
                        return -EINVAL;
                }
+               if (prog_extension &&
+                   btf_check_type_match(env, prog, btf, t))
+                       return -EINVAL;
                t = btf_type_by_id(btf, t->type);
                if (!btf_type_is_func_proto(t))
                        return -EINVAL;
@@ -9633,18 +9957,6 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
                if (ret < 0)
                        goto out;
                if (tgt_prog) {
-                       if (!tgt_prog->jited) {
-                               /* for now */
-                               verbose(env, "Can trace only JITed BPF progs\n");
-                               ret = -EINVAL;
-                               goto out;
-                       }
-                       if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
-                               /* prevent cycles */
-                               verbose(env, "Cannot recursively attach\n");
-                               ret = -EINVAL;
-                               goto out;
-                       }
                        if (subprog == 0)
                                addr = (long) tgt_prog->bpf_func;
                        else
@@ -9666,8 +9978,6 @@ out:
                if (ret)
                        bpf_trampoline_put(tr);
                return ret;
-       default:
-               return -EINVAL;
        }
 }
 
@@ -9737,10 +10047,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
                goto skip_full_check;
        }
 
-       ret = check_attach_btf_id(env);
-       if (ret)
-               goto skip_full_check;
-
        env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
                env->strict_alignment = true;
@@ -9777,22 +10083,22 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
        if (ret < 0)
                goto skip_full_check;
 
+       ret = check_attach_btf_id(env);
+       if (ret)
+               goto skip_full_check;
+
        ret = check_cfg(env);
        if (ret < 0)
                goto skip_full_check;
 
-       ret = do_check(env);
-       if (env->cur_state) {
-               free_verifier_state(env->cur_state, true);
-               env->cur_state = NULL;
-       }
+       ret = do_check_subprogs(env);
+       ret = ret ?: do_check_main(env);
 
        if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
                ret = bpf_prog_offload_finalize(env);
 
 skip_full_check:
-       while (!pop_stack(env, NULL, NULL));
-       free_states(env);
+       kvfree(env->explored_states);
 
        if (ret == 0)
                ret = check_max_stack_depth(env);
index e5ef4ae9edb5060c78470826e28c452e687ce2fa..19e793aa441a8edb3b2f1292dec878344f7258fb 100644 (file)
@@ -703,6 +703,7 @@ struct send_signal_irq_work {
        struct irq_work irq_work;
        struct task_struct *task;
        u32 sig;
+       enum pid_type type;
 };
 
 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
@@ -712,10 +713,10 @@ static void do_bpf_send_signal(struct irq_work *entry)
        struct send_signal_irq_work *work;
 
        work = container_of(entry, struct send_signal_irq_work, irq_work);
-       group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
+       group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
 }
 
-BPF_CALL_1(bpf_send_signal, u32, sig)
+static int bpf_send_signal_common(u32 sig, enum pid_type type)
 {
        struct send_signal_irq_work *work = NULL;
 
@@ -748,11 +749,17 @@ BPF_CALL_1(bpf_send_signal, u32, sig)
                 */
                work->task = current;
                work->sig = sig;
+               work->type = type;
                irq_work_queue(&work->irq_work);
                return 0;
        }
 
-       return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
+       return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
+}
+
+BPF_CALL_1(bpf_send_signal, u32, sig)
+{
+       return bpf_send_signal_common(sig, PIDTYPE_TGID);
 }
 
 static const struct bpf_func_proto bpf_send_signal_proto = {
@@ -762,6 +769,18 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
        .arg1_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_send_signal_thread, u32, sig)
+{
+       return bpf_send_signal_common(sig, PIDTYPE_PID);
+}
+
+static const struct bpf_func_proto bpf_send_signal_thread_proto = {
+       .func           = bpf_send_signal_thread,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -822,6 +841,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
        case BPF_FUNC_send_signal:
                return &bpf_send_signal_proto;
+       case BPF_FUNC_send_signal_thread:
+               return &bpf_send_signal_thread_proto;
        default:
                return NULL;
        }
index 6368c94c9e0a21fddc3c9d7551afde58976ff885..4dcc1b3906673ca72779d04fed1be4ef1f3f1797 100644 (file)
@@ -9835,6 +9835,8 @@ void free_netdev(struct net_device *dev)
 
        free_percpu(dev->pcpu_refcnt);
        dev->pcpu_refcnt = NULL;
+       free_percpu(dev->xdp_bulkq);
+       dev->xdp_bulkq = NULL;
 
        netdev_unregister_lockdep_key(dev);
 
index 1012b70a7c84d2542315cc9b16b17ccbfc106538..792e3744b915f64619499feba9dd3747d3ac9eca 100644 (file)
@@ -3459,58 +3459,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
-static int __bpf_tx_xdp(struct net_device *dev,
-                       struct bpf_map *map,
-                       struct xdp_buff *xdp,
-                       u32 index)
-{
-       struct xdp_frame *xdpf;
-       int err, sent;
-
-       if (!dev->netdev_ops->ndo_xdp_xmit) {
-               return -EOPNOTSUPP;
-       }
-
-       err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
-       if (unlikely(err))
-               return err;
-
-       xdpf = convert_to_xdp_frame(xdp);
-       if (unlikely(!xdpf))
-               return -EOVERFLOW;
-
-       sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
-       if (sent <= 0)
-               return sent;
-       return 0;
-}
-
-static noinline int
-xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
-                    struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
-{
-       struct net_device *fwd;
-       u32 index = ri->tgt_index;
-       int err;
-
-       fwd = dev_get_by_index_rcu(dev_net(dev), index);
-       ri->tgt_index = 0;
-       if (unlikely(!fwd)) {
-               err = -EINVAL;
-               goto err;
-       }
-
-       err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
-       if (unlikely(err))
-               goto err;
-
-       _trace_xdp_redirect(dev, xdp_prog, index);
-       return 0;
-err:
-       _trace_xdp_redirect_err(dev, xdp_prog, index, err);
-       return err;
-}
-
 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
                            struct bpf_map *map, struct xdp_buff *xdp)
 {
@@ -3523,18 +3471,18 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
        case BPF_MAP_TYPE_XSKMAP:
                return __xsk_map_redirect(fwd, xdp);
        default:
-               break;
+               return -EBADRQC;
        }
        return 0;
 }
 
-void xdp_do_flush_map(void)
+void xdp_do_flush(void)
 {
-       __dev_map_flush();
+       __dev_flush();
        __cpu_map_flush();
        __xsk_map_flush();
 }
-EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+EXPORT_SYMBOL_GPL(xdp_do_flush);
 
 static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 {
@@ -3569,10 +3517,11 @@ void bpf_clear_redirect_map(struct bpf_map *map)
        }
 }
 
-static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
-                              struct bpf_prog *xdp_prog, struct bpf_map *map,
-                              struct bpf_redirect_info *ri)
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+                   struct bpf_prog *xdp_prog)
 {
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       struct bpf_map *map = READ_ONCE(ri->map);
        u32 index = ri->tgt_index;
        void *fwd = ri->tgt_value;
        int err;
@@ -3581,7 +3530,18 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
        ri->tgt_value = NULL;
        WRITE_ONCE(ri->map, NULL);
 
-       err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+       if (unlikely(!map)) {
+               fwd = dev_get_by_index_rcu(dev_net(dev), index);
+               if (unlikely(!fwd)) {
+                       err = -EINVAL;
+                       goto err;
+               }
+
+               err = dev_xdp_enqueue(fwd, xdp, dev);
+       } else {
+               err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+       }
+
        if (unlikely(err))
                goto err;
 
@@ -3591,18 +3551,6 @@ err:
        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
        return err;
 }
-
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
-                   struct bpf_prog *xdp_prog)
-{
-       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-       struct bpf_map *map = READ_ONCE(ri->map);
-
-       if (likely(map))
-               return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
-
-       return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
-}
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
 static int xdp_do_generic_redirect_map(struct net_device *dev,
@@ -5935,7 +5883,7 @@ bool bpf_helper_changes_pkt_data(void *func)
        return false;
 }
 
-static const struct bpf_func_proto *
+const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
        switch (func_id) {
@@ -5975,6 +5923,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_spin_unlock_proto;
        case BPF_FUNC_trace_printk:
                return bpf_get_trace_printk_proto();
+       case BPF_FUNC_jiffies64:
+               return &bpf_jiffies64_proto;
        default:
                return NULL;
        }
index 8459ad579f735ce724b559f7114d1b77f360e5b2..a4c8fac781ff3ceba7fb6c85f636e18a0de66891 100644 (file)
@@ -2786,7 +2786,7 @@ static void sock_def_error_report(struct sock *sk)
        rcu_read_unlock();
 }
 
-static void sock_def_readable(struct sock *sk)
+void sock_def_readable(struct sock *sk)
 {
        struct socket_wq *wq;
 
index d57ecfaf89d48c73f00bf7f2a151365648c1001c..9d97bace13c83bbbf13efb9c1c790d480f77808b 100644 (file)
@@ -65,3 +65,7 @@ obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
                      xfrm4_output.o xfrm4_protocol.o
+
+ifeq ($(CONFIG_BPF_JIT),y)
+obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o
+endif
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
new file mode 100644 (file)
index 0000000..574972b
--- /dev/null
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook  */
+
+#include <linux/types.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <net/tcp.h>
+
+static u32 optional_ops[] = {
+       offsetof(struct tcp_congestion_ops, init),
+       offsetof(struct tcp_congestion_ops, release),
+       offsetof(struct tcp_congestion_ops, set_state),
+       offsetof(struct tcp_congestion_ops, cwnd_event),
+       offsetof(struct tcp_congestion_ops, in_ack_event),
+       offsetof(struct tcp_congestion_ops, pkts_acked),
+       offsetof(struct tcp_congestion_ops, min_tso_segs),
+       offsetof(struct tcp_congestion_ops, sndbuf_expand),
+       offsetof(struct tcp_congestion_ops, cong_control),
+};
+
+static u32 unsupported_ops[] = {
+       offsetof(struct tcp_congestion_ops, get_info),
+};
+
+static const struct btf_type *tcp_sock_type;
+static u32 tcp_sock_id, sock_id;
+
+static int bpf_tcp_ca_init(struct btf *btf)
+{
+       s32 type_id;
+
+       type_id = btf_find_by_name_kind(btf, "sock", BTF_KIND_STRUCT);
+       if (type_id < 0)
+               return -EINVAL;
+       sock_id = type_id;
+
+       type_id = btf_find_by_name_kind(btf, "tcp_sock", BTF_KIND_STRUCT);
+       if (type_id < 0)
+               return -EINVAL;
+       tcp_sock_id = type_id;
+       tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
+
+       return 0;
+}
+
+static bool is_optional(u32 member_offset)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
+               if (member_offset == optional_ops[i])
+                       return true;
+       }
+
+       return false;
+}
+
+static bool is_unsupported(u32 member_offset)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(unsupported_ops); i++) {
+               if (member_offset == unsupported_ops[i])
+                       return true;
+       }
+
+       return false;
+}
+
+extern struct btf *btf_vmlinux;
+
+static bool bpf_tcp_ca_is_valid_access(int off, int size,
+                                      enum bpf_access_type type,
+                                      const struct bpf_prog *prog,
+                                      struct bpf_insn_access_aux *info)
+{
+       if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
+               return false;
+       if (type != BPF_READ)
+               return false;
+       if (off % size != 0)
+               return false;
+
+       if (!btf_ctx_access(off, size, type, prog, info))
+               return false;
+
+       if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
+               /* promote it to tcp_sock */
+               info->btf_id = tcp_sock_id;
+
+       return true;
+}
+
+static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
+                                       const struct btf_type *t, int off,
+                                       int size, enum bpf_access_type atype,
+                                       u32 *next_btf_id)
+{
+       size_t end;
+
+       if (atype == BPF_READ)
+               return btf_struct_access(log, t, off, size, atype, next_btf_id);
+
+       if (t != tcp_sock_type) {
+               bpf_log(log, "only read is supported\n");
+               return -EACCES;
+       }
+
+       switch (off) {
+       case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
+               end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
+               break;
+       case offsetof(struct inet_connection_sock, icsk_ack.pending):
+               end = offsetofend(struct inet_connection_sock,
+                                 icsk_ack.pending);
+               break;
+       case offsetof(struct tcp_sock, snd_cwnd):
+               end = offsetofend(struct tcp_sock, snd_cwnd);
+               break;
+       case offsetof(struct tcp_sock, snd_cwnd_cnt):
+               end = offsetofend(struct tcp_sock, snd_cwnd_cnt);
+               break;
+       case offsetof(struct tcp_sock, snd_ssthresh):
+               end = offsetofend(struct tcp_sock, snd_ssthresh);
+               break;
+       case offsetof(struct tcp_sock, ecn_flags):
+               end = offsetofend(struct tcp_sock, ecn_flags);
+               break;
+       default:
+               bpf_log(log, "no write support to tcp_sock at off %d\n", off);
+               return -EACCES;
+       }
+
+       if (off + size > end) {
+               bpf_log(log,
+                       "write access at off %d with size %d beyond the member of tcp_sock ended at %zu\n",
+                       off, size, end);
+               return -EACCES;
+       }
+
+       return NOT_INIT;
+}
+
+BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt)
+{
+       /* bpf_tcp_ca prog cannot have NULL tp */
+       __tcp_send_ack((struct sock *)tp, rcv_nxt);
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
+       .func           = bpf_tcp_send_ack,
+       .gpl_only       = false,
+       /* In case we want to report error later */
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg2_type      = ARG_ANYTHING,
+       .btf_id         = &tcp_sock_id,
+};
+
+static const struct bpf_func_proto *
+bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
+                         const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_tcp_send_ack:
+               return &bpf_tcp_send_ack_proto;
+       default:
+               return bpf_base_func_proto(func_id);
+       }
+}
+
+static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
+       .get_func_proto         = bpf_tcp_ca_get_func_proto,
+       .is_valid_access        = bpf_tcp_ca_is_valid_access,
+       .btf_struct_access      = bpf_tcp_ca_btf_struct_access,
+};
+
+static int bpf_tcp_ca_init_member(const struct btf_type *t,
+                                 const struct btf_member *member,
+                                 void *kdata, const void *udata)
+{
+       const struct tcp_congestion_ops *utcp_ca;
+       struct tcp_congestion_ops *tcp_ca;
+       size_t tcp_ca_name_len;
+       int prog_fd;
+       u32 moff;
+
+       utcp_ca = (const struct tcp_congestion_ops *)udata;
+       tcp_ca = (struct tcp_congestion_ops *)kdata;
+
+       moff = btf_member_bit_offset(t, member) / 8;
+       switch (moff) {
+       case offsetof(struct tcp_congestion_ops, flags):
+               if (utcp_ca->flags & ~TCP_CONG_MASK)
+                       return -EINVAL;
+               tcp_ca->flags = utcp_ca->flags;
+               return 1;
+       case offsetof(struct tcp_congestion_ops, name):
+               tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name));
+               if (!tcp_ca_name_len ||
+                   tcp_ca_name_len == sizeof(utcp_ca->name))
+                       return -EINVAL;
+               if (tcp_ca_find(utcp_ca->name))
+                       return -EEXIST;
+               memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name));
+               return 1;
+       }
+
+       if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
+               return 0;
+
+       /* Ensure bpf_prog is provided for compulsory func ptr */
+       prog_fd = (int)(*(unsigned long *)(udata + moff));
+       if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int bpf_tcp_ca_check_member(const struct btf_type *t,
+                                  const struct btf_member *member)
+{
+       if (is_unsupported(btf_member_bit_offset(t, member) / 8))
+               return -ENOTSUPP;
+       return 0;
+}
+
+static int bpf_tcp_ca_reg(void *kdata)
+{
+       return tcp_register_congestion_control(kdata);
+}
+
+static void bpf_tcp_ca_unreg(void *kdata)
+{
+       tcp_unregister_congestion_control(kdata);
+}
+
+/* Avoid sparse warning.  It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+
+struct bpf_struct_ops bpf_tcp_congestion_ops = {
+       .verifier_ops = &bpf_tcp_ca_verifier_ops,
+       .reg = bpf_tcp_ca_reg,
+       .unreg = bpf_tcp_ca_unreg,
+       .check_member = bpf_tcp_ca_check_member,
+       .init_member = bpf_tcp_ca_init_member,
+       .init = bpf_tcp_ca_init,
+       .name = "tcp_congestion_ops",
+};
index 3737ec096650271b49456de9906ccf26465c7b02..3172e31987be4232af90e7b204742c5bb09ef6ca 100644 (file)
@@ -21,7 +21,7 @@ static DEFINE_SPINLOCK(tcp_cong_list_lock);
 static LIST_HEAD(tcp_cong_list);
 
 /* Simple linear search, don't expect many entries! */
-static struct tcp_congestion_ops *tcp_ca_find(const char *name)
+struct tcp_congestion_ops *tcp_ca_find(const char *name)
 {
        struct tcp_congestion_ops *e;
 
@@ -162,7 +162,7 @@ void tcp_assign_congestion_control(struct sock *sk)
 
        rcu_read_lock();
        ca = rcu_dereference(net->ipv4.tcp_congestion_control);
-       if (unlikely(!try_module_get(ca->owner)))
+       if (unlikely(!bpf_try_module_get(ca, ca->owner)))
                ca = &tcp_reno;
        icsk->icsk_ca_ops = ca;
        rcu_read_unlock();
@@ -208,7 +208,7 @@ void tcp_cleanup_congestion_control(struct sock *sk)
 
        if (icsk->icsk_ca_ops->release)
                icsk->icsk_ca_ops->release(sk);
-       module_put(icsk->icsk_ca_ops->owner);
+       bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
 }
 
 /* Used by sysctl to change default congestion control */
@@ -222,12 +222,12 @@ int tcp_set_default_congestion_control(struct net *net, const char *name)
        ca = tcp_ca_find_autoload(net, name);
        if (!ca) {
                ret = -ENOENT;
-       } else if (!try_module_get(ca->owner)) {
+       } else if (!bpf_try_module_get(ca, ca->owner)) {
                ret = -EBUSY;
        } else {
                prev = xchg(&net->ipv4.tcp_congestion_control, ca);
                if (prev)
-                       module_put(prev->owner);
+                       bpf_module_put(prev, prev->owner);
 
                ca->flags |= TCP_CONG_NON_RESTRICTED;
                ret = 0;
@@ -366,19 +366,19 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
        } else if (!load) {
                const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
 
-               if (try_module_get(ca->owner)) {
+               if (bpf_try_module_get(ca, ca->owner)) {
                        if (reinit) {
                                tcp_reinit_congestion_control(sk, ca);
                        } else {
                                icsk->icsk_ca_ops = ca;
-                               module_put(old_ca->owner);
+                               bpf_module_put(old_ca, old_ca->owner);
                        }
                } else {
                        err = -EBUSY;
                }
        } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
                err = -EPERM;
-       } else if (!try_module_get(ca->owner)) {
+       } else if (!bpf_try_module_get(ca, ca->owner)) {
                err = -EBUSY;
        } else {
                tcp_reinit_congestion_control(sk, ca);
index fedb537839ec466329afc302b7d94d37a15c42e9..df1166b76126742c21cdbde8c09f76268c2f42cb 100644 (file)
@@ -2678,7 +2678,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
        int cpu;
 
        if (net->ipv4.tcp_congestion_control)
-               module_put(net->ipv4.tcp_congestion_control->owner);
+               bpf_module_put(net->ipv4.tcp_congestion_control,
+                              net->ipv4.tcp_congestion_control->owner);
 
        for_each_possible_cpu(cpu)
                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
@@ -2785,7 +2786,8 @@ static int __net_init tcp_sk_init(struct net *net)
 
        /* Reno is always built in */
        if (!net_eq(net, &init_net) &&
-           try_module_get(init_net.ipv4.tcp_congestion_control->owner))
+           bpf_try_module_get(init_net.ipv4.tcp_congestion_control,
+                              init_net.ipv4.tcp_congestion_control->owner))
                net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
        else
                net->ipv4.tcp_congestion_control = &tcp_reno;
index c802bc80c4006f82c2e9189ef1fc11b8f321e70d..ad3b56d9fa7156f724f7558abccb1367fb5ea8d3 100644 (file)
@@ -414,7 +414,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
 
                rcu_read_lock();
                ca = tcp_ca_find_key(ca_key);
-               if (likely(ca && try_module_get(ca->owner))) {
+               if (likely(ca && bpf_try_module_get(ca, ca->owner))) {
                        icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
                        icsk->icsk_ca_ops = ca;
                        ca_got_dst = true;
@@ -425,7 +425,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
        /* If no valid choice made yet, assign current system default ca. */
        if (!ca_got_dst &&
            (!icsk->icsk_ca_setsockopt ||
-            !try_module_get(icsk->icsk_ca_ops->owner)))
+            !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner)))
                tcp_assign_congestion_control(sk);
 
        tcp_set_ca_state(sk, TCP_CA_Open);
index 05109d0c675b814e4975614d6eef8b074e6ed95a..786978cb2db78b744c7301c113328c5387c09ee9 100644 (file)
@@ -3372,8 +3372,8 @@ static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
 
        rcu_read_lock();
        ca = tcp_ca_find_key(ca_key);
-       if (likely(ca && try_module_get(ca->owner))) {
-               module_put(icsk->icsk_ca_ops->owner);
+       if (likely(ca && bpf_try_module_get(ca, ca->owner))) {
+               bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
                icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
                icsk->icsk_ca_ops = ca;
        }
index 3049af269fbf6b3fc3fc0f8d40651ba39d187ed8..f93e917e09298b39bb3f1146775221a9adc8fecc 100644 (file)
@@ -249,7 +249,7 @@ static void xdp_umem_release(struct xdp_umem *umem)
        xdp_umem_unmap_pages(umem);
        xdp_umem_unpin_pages(umem);
 
-       kfree(umem->pages);
+       kvfree(umem->pages);
        umem->pages = NULL;
 
        xdp_umem_unaccount_pages(umem);
@@ -409,7 +409,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
        if (err)
                goto out_account;
 
-       umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
+       umem->pages = kvcalloc(umem->npgs, sizeof(*umem->pages),
+                              GFP_KERNEL_ACCOUNT);
        if (!umem->pages) {
                err = -ENOMEM;
                goto out_pin;
@@ -419,7 +420,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
        if (!err)
                return 0;
 
-       kfree(umem->pages);
+       kvfree(umem->pages);
 
 out_pin:
        xdp_umem_unpin_pages(umem);
index 02ada7ab8c6eb08a0e2ca56353dd0dc62dd55911..df600487a68d513c3f7abcb1b584d2f4c229a7dd 100644 (file)
@@ -217,7 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 static void xsk_flush(struct xdp_sock *xs)
 {
        xskq_prod_submit(xs->rx);
-       xs->sk.sk_data_ready(&xs->sk);
+       sock_def_readable(&xs->sk);
 }
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
index 5b89c0370f33213c55488c75f586168b70baa583..b0e8adf7eb01b5aeaed83f9982493aa9937a62f3 100644 (file)
@@ -184,7 +184,6 @@ TPROGS_CFLAGS += -Wmissing-prototypes
 TPROGS_CFLAGS += -Wstrict-prototypes
 
 TPROGS_CFLAGS += -I$(objtree)/usr/include
-TPROGS_CFLAGS += -I$(srctree)/tools/lib/bpf/
 TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
 TPROGS_CFLAGS += -I$(srctree)/tools/lib/
 TPROGS_CFLAGS += -I$(srctree)/tools/include
@@ -254,7 +253,7 @@ all:
 
 clean:
        $(MAKE) -C ../../ M=$(CURDIR) clean
-       @rm -f *~
+       @find $(CURDIR) -type f -name '*~' -delete
 
 $(LIBBPF): FORCE
 # Fix up variables inherited from Kbuild that tools/ build system won't like
@@ -305,7 +304,7 @@ $(obj)/%.o: $(src)/%.c
        @echo "  CLANG-bpf " $@
        $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
                -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
-               -I$(srctree)/tools/lib/bpf/ \
+               -I$(srctree)/tools/lib/ \
                -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
                -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
                -Wno-gnu-variable-sized-type-not-at-end \
index 68c84da065b1515a5fbe8141011fc37c1f55193c..a86a19d5f0333141bd1bd446faebad94389e70db 100644 (file)
@@ -3,7 +3,7 @@
 #include <linux/version.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /*
  * The CPU number, cstate number and pstate number are based
index 2d4b717726b645b9956dc65f672027cc071158b9..d5992f787232808691dbfe7da9445c7edfe902ba 100644 (file)
@@ -14,7 +14,7 @@
 
 #include <bpf/bpf.h>
 
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "bpf_insn.h"
 #include "sock_example.h"
 
index 829b68d8768770b053e2695d9dd4c4a9a27c9001..7d71537776785c9537d1ae66ea09a1d2a1e15432 100644 (file)
@@ -50,8 +50,8 @@
 #include "cgroup_helpers.h"
 #include "hbm.h"
 #include "bpf_util.h"
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 bool outFlag = true;
 int minRate = 1000;            /* cgroup rate limit in Mbps */
index 4edaf47876ca77d75ca25886749cdb2e8d89932c..e00f26f6afbaf2f26700d757b878500574240ae1 100644 (file)
@@ -22,8 +22,8 @@
 #include <uapi/linux/pkt_cls.h>
 #include <net/ipv6.h>
 #include <net/inet_ecn.h>
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
 #include "hbm.h"
 
 #define DROP_PKT       0
index f281df7e0089d85b285a785c47e22883245a953c..3a91b4c1989aa81710be6e0b57d88f7938c48294 100644 (file)
@@ -13,7 +13,7 @@
 #define KBUILD_MODNAME "ibumad_count_pkts_by_class"
 #include <uapi/linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 
 struct bpf_map_def SEC("maps") read_count = {
index cb5a8f994849b60b1211d39dafad2e5cec5e6795..fa06eef31a846fc9b4c26a5f4b273b8614aa8437 100644 (file)
@@ -25,7 +25,7 @@
 
 #include "bpf_load.h"
 #include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 
 static void dump_counts(int fd)
 {
index 18fa088473cdd5c10399782d4a7d10a93545cce1..ca9c2e4e69aa2019626f6ba9e9e16ba176c458e3 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/version.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define MAX_ENTRIES    20
 #define MAX_CPU                4
index df75383280f9efc250f15cb92eb8d47200fce3be..9ed63e10e17095f45e179625073593f9d2b9ed6c 100644 (file)
@@ -14,7 +14,7 @@
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/ip.h>
 #include <uapi/linux/in.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 # define printk(fmt, ...)                                              \
                ({                                                      \
index 281bcdaee58e2ee7c12ee4b0e82ec877558bf4bc..12e91ae64d4d9deddcfcb8e692b4144219e72569 100644 (file)
@@ -8,9 +8,9 @@
 #include <linux/netdevice.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_tracing.h>
 
 #define MAX_ENTRIES 1000
 #define MAX_NR_CPUS 1024
index 9cb5207a692f294aa9b23edcf4b34d9177f9924f..c4ec10dbfc3b446c50bd56aef2eca86bc53a2034 100644 (file)
@@ -5,8 +5,8 @@
  * License as published by the Free Software Foundation.
  */
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 #include <uapi/linux/ptrace.h>
 #include <uapi/linux/perf_event.h>
 #include <linux/version.h>
index fc8767d001f6f3b03a24bc8cbbd328eadef880fa..51c7da5341cc699840d92e445b0bf42d02cdd3dd 100644 (file)
@@ -12,7 +12,7 @@
 #include <assert.h>
 #include <stdbool.h>
 #include <sys/resource.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "bpf_load.h"
 #include "trace_helpers.h"
 
index ef5892377beba44578f77f168b2d7a4f827b8ca9..c6f65f90a0973afe711c033f75e04beb5f33416d 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
 #define DEFAULT_PKTGEN_UDP_PORT        9
index 10af53d33cc2925a928fe496fdc1047629b998e3..4a486cb1e0df139446ea88324ca4d95ad09f930d 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/udp.h>
 #include <uapi/linux/bpf.h>
 #include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define DEFAULT_PKTGEN_UDP_PORT 9
 
index 0b6f22feb2c9ce37787ea5384276c85a4e1171eb..d8623846e810df86f9a3e00d1dd243ff9211aebf 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/udp.h>
 #include <uapi/linux/bpf.h>
 #include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define DEFAULT_PKTGEN_UDP_PORT 9
 #define DEBUG 0
index 4a190893894f9e69b1be103c053b8c1a513bf1ab..e504dc308371b7ce5ca42b171330b7609f41a59d 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/bpf_perf_event.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 #define MAX_IPS                8192
 
index 6b5dc26d97012536ba14b4ee40ae9a1fa84331fc..b0f115f938bc914654551f077393fb89e7062a41 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
 #include "trace_helpers.h"
index 05dcdf8a4baab23ac039668f5ca73a6bb3e00a0c..6d0ac7569d6f486e9556b32cb592a9df17a3d7ca 100644 (file)
@@ -3,7 +3,7 @@
 #include <linux/net.h>
 #include <uapi/linux/in.h>
 #include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("cgroup/sock1")
 int bpf_prog1(struct bpf_sock *sk)
index 2408dbfb7a216f2cda086cf555a890eb5d652042..431c956460ad4a51f1ea8f3ad62f185d103e88a3 100644 (file)
@@ -2,7 +2,7 @@
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
 struct {
index a219442afbeef683d95592f90f8a1cd00ec12336..3c83722877dc58aebd7e15f4a037f721e82cc2f4 100644 (file)
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "sock_example.h"
 #include <unistd.h>
 #include <arpa/inet.h>
index a7bcd03bf5294bff885c0013967a499b5dfc10d8..a41dd520bc53a16161feca25a708a9576c85b3aa 100644 (file)
@@ -1,5 +1,5 @@
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 #include <uapi/linux/in.h>
 #include <uapi/linux/if.h>
index 6de383ddd08ba7a198b3e2c81d6710dc43a53811..af925a5afd1da7edceb49e6801d2d2e59610a91e 100644 (file)
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "sock_example.h"
 #include <unistd.h>
 #include <arpa/inet.h>
index 151dd842ecc0b492df8eb8acae495a3420ee3b4b..36d4dac2354933e3616e8787bfa9de1f592f4b11 100644 (file)
@@ -5,7 +5,7 @@
  * License as published by the Free Software Foundation.
  */
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 #include <uapi/linux/in.h>
 #include <uapi/linux/if.h>
index 6e9478aa2938f40a54b4a1aaff34677e2dcec428..f508af357251b5e180f8fabfbc6d4da603e6cff6 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_HASH,
index 2556af2d9b3e8e2725bfc2c062f99d588b7f727a..fb430ea2ef5109a0f81c5dec868e5686c62946b8 100644 (file)
@@ -5,7 +5,7 @@
 #include <string.h>
 #include <assert.h>
 #include <sys/resource.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "bpf_load.h"
 #include "trace_helpers.h"
 
index 630ce8c4d5a23c25406de1724b0ba53fb5867259..5a62b03b1f88f367090134f5d82fe575ff0d11d0 100644 (file)
@@ -2,7 +2,7 @@
 /* Copyright (c) 2017 Facebook
  */
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct syscalls_enter_open_args {
        unsigned long long unused;
index fb56fc2a3e5d6b40d7f7e4895dff463fc673e549..278ade5427c83644a5a6eadc4c11ec59b37cfbd6 100644 (file)
@@ -2,7 +2,7 @@
 #include <linux/version.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("kprobe/blk_mq_start_request")
 int bpf_prog1(struct pt_regs *ctx)
index 4c31b305e6efc08448fd7c4a63ce1a36e7fbee1d..ff2e9c1c7266a23491b1f6cf40f8b554c2e5a78f 100644 (file)
@@ -15,7 +15,7 @@
 #include <sys/stat.h>
 #include <linux/perf_event.h>
 
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "bpf_load.h"
 #include "bpf_util.h"
 #include "perf-sys.h"
index 7ef2a12b25b244b2700ab7feee961fb91b435970..fd2fa00043305a7de0d3a3d51061c5a0d6833046 100644 (file)
@@ -15,7 +15,7 @@
 #include <uapi/linux/filter.h>
 #include <uapi/linux/pkt_cls.h>
 #include <net/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define _htonl __builtin_bswap32
 
index ff43341bdfce12152b236119891fb420786eafde..e9356130f84e5323eb51ab4b28486f4c1319c419 100644 (file)
@@ -7,7 +7,7 @@
 #include <uapi/linux/tcp.h>
 #include <uapi/linux/filter.h>
 #include <uapi/linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
 /* compiler workaround */
index 9dba48c2b92074130c12fd3423c52a4dd6c48f30..8dfe09a92feca7cd3beedb73bef242a3a0e85571 100644 (file)
@@ -16,8 +16,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index af8486f33771ccb8ddbbb3834a26126a31579ca9..6a80d08952ad8ef05c1bd226e29e2df1ceb00118 100644 (file)
@@ -17,8 +17,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index 26c0fd091f3cf2fa567595dde3ef2fa16c38878d..e88bd9ab0695ed6f9a2a60dcfa3e0c90459e985c 100644 (file)
@@ -17,8 +17,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index 6d4dc4c7dd1e4a32fe5cf927b14093c2c8243403..2311fc9dde854caaa257d4982388fca0d1faa3ec 100644 (file)
@@ -16,8 +16,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index 8557913106a0fab6a98ed32bb50c8c835675635c..e80d3afd24bd36b98f996a5c153248133a0ca4cc 100644 (file)
@@ -4,8 +4,8 @@
  */
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define INTERVAL                       1000000000ULL
 
index da61d53378b377895a1f56f73022b99c3accaa3a..d1444557358e70ca088b6c2a46d4e2269abf01a4 100644 (file)
@@ -17,8 +17,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index d011e38b80d298b39346fb5fcb36a80f82d95c87..223d9c23b10ccfc01dd2c102e8e75f91fdc470c6 100644 (file)
@@ -16,8 +16,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index 720d1950322d20d8c9fcd4fdf3f8e4ccad84ae8b..d58004eef124733b7f601e66f31c059a4ccea261 100644 (file)
@@ -16,8 +16,8 @@
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index 369faca70a15d40a8a600d223ec802b9706d2e9e..953fedc79ce1170f1ae97f402be88731d049d265 100644 (file)
@@ -15,8 +15,8 @@
 #include <uapi/linux/ipv6.h>
 #include <uapi/linux/in.h>
 #include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define DEBUG 1
 
index 1547b36a7b7b9bd5251dd1be6ae2a451cdca0a29..4dd532a312b9dd6a257cefbebf2e988b10ceb2ce 100644 (file)
@@ -10,7 +10,7 @@
 #include <uapi/linux/ipv6.h>
 #include <uapi/linux/pkt_cls.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* copy of 'struct ethhdr' without __packed */
 struct eth_hdr {
index 86b28d7d6c998553a264e0872fd2d6a9d1949de9..6dc4f41bb6cba981f6f12a90f469e60132df47cf 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include <uapi/linux/utsname.h>
 
 struct bpf_map_def SEC("maps") cgroup_map = {
index bacc8013436b0fe304ea10896f829c93968c56f4..1b568575ad11aae31b44f908e596bf3bffe97491 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/udp.h>
 #include <linux/icmpv6.h>
 #include <linux/if_ether.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include <string.h>
 
 # define printk(fmt, ...)                                              \
index 32ee752f19df6032e6f1bff274eb1a557986106b..6cee61e8ce9b6fdcb4b28a1d5837b3482d8a97c7 100644 (file)
@@ -10,9 +10,9 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_tracing.h>
 
 #define MAX_NR_PORTS 65536
 
index 8d2518e68db91530ed406116bc753f39dd22841f..8b811c29dc7996272c987a3f7a76ddaeff009aaf 100644 (file)
@@ -7,8 +7,8 @@
 #include <linux/version.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
 
index d2af8bc1c8051bfa4600f8821c4b7dd804a89c6b..8763181a32f37a419dfe6a530acf7a9e308c080d 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2018 Facebook */
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("raw_tracepoint/task_rename")
 int prog(struct bpf_raw_tracepoint_args *ctx)
index 38f5c0b9da9f450847324aeb4c0f6c6fc72d621e..eaa32693f8fc1ab1d0d181eb425d42949e7e0646 100644 (file)
@@ -5,7 +5,7 @@
  * License as published by the Free Software Foundation.
  */
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* from /sys/kernel/debug/tracing/events/task/task_rename/format */
 struct task_rename {
index b7c48f37132c3a83f8e6492597649eb977cefe12..f033f36a13a389514d2bda36438c0b1a27b8136a 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/netdevice.h>
 #include <uapi/linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct bpf_map_def SEC("maps") dnat_map = {
        .type = BPF_MAP_TYPE_HASH,
index 8dc18d233a2775d78c11501ce0811f6c3eee0f3b..da1d69e206452731f613665e971b6cc1da65a3dc 100644 (file)
@@ -9,8 +9,8 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/bpf_perf_event.h>
 #include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct key_t {
        char comm[TASK_COMM_LEN];
index 749a50f2f9f32cc608e290179d0ac543598b08b0..356171bc392bf0583ce18c0545f1cc3a1ccaaa8e 100644 (file)
@@ -15,7 +15,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <sys/resource.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
 #include "trace_helpers.h"
index 9b96f4fb8cea64f9b98c835b045bd8c840719433..1d7d422cae6fa426f9330dd2e5774b148a4f450b 100644 (file)
@@ -1,7 +1,7 @@
 #include <linux/ptrace.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
index 8ee47699a870a3b2c7b278c256e74fb95fa4b5f3..60a17dd053454d1ac675397593ce0d1f3e1f1ebf 100644 (file)
@@ -15,7 +15,7 @@
 #include <sys/mman.h>
 #include <time.h>
 #include <signal.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
 
index 1a15f6605129e3d9208e48e21c37a4cb8746bf18..8e2610e14475edc099fb00833217feb3f1e20522 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/netdevice.h>
 #include <uapi/linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
 
index d70b3ea79ea70461a6752571085438f86d3413d0..d865bb309bcb536ed9a34c66988e0a5a35f2cf43 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/netdevice.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_HASH,
index 9af546bebfa9a84716b700b74487d7dd343400e6..fe21c14feb8da583a3656a81f4d77d2593b0b3b3 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/netdevice.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct bpf_map_def SEC("maps") my_map = {
        .type = BPF_MAP_TYPE_HASH,
index 2a02cbe9d9a13a5d3fe05143ed2866b766df8711..b1bb9df88f8e1609f56719e0bfa6ddb484b51496 100644 (file)
@@ -7,8 +7,8 @@
 #include <linux/ptrace.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 struct pair {
        u64 val;
index b3557b21a8fe8711c0e8e5f4c9787ac40ed0e5f3..481790fde8645510d10379e95e47ffefde1be7b2 100644 (file)
@@ -10,8 +10,8 @@
 #include <uapi/linux/seccomp.h>
 #include <uapi/linux/unistd.h>
 #include "syscall_nrs.h"
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
 
index 46c557afac73a62cd8a4be0bd504afa53d946270..96c234efa852fc189cd4c3a3973f8c398547140f 100644 (file)
@@ -1,7 +1,7 @@
 #include <linux/ptrace.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct bpf_map_def SEC("maps") counters = {
        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
index 1ab308a43e0ff0dd99d156c05536eef41717a2be..c5a92df8ac311c781cc2f89c615d28b63f2e12c7 100644 (file)
@@ -1,7 +1,7 @@
 #include <uapi/linux/ptrace.h>
 #include <uapi/linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("kprobe/open_ctree")
 int bpf_prog1(struct pt_regs *ctx)
index db6870aee42c0f0b09abca2ece222550d6565d31..34b64394ed9cafdcda9336a95ebc4993ccc025cd 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
index 38a8852cb57f51688f3b404d7d294ec907f0f04d..c447ad9e3a1d4f8c11c4ad7cbd0de1819abb4153 100644 (file)
@@ -15,8 +15,8 @@
 #include <net/if.h>
 
 #include "bpf_util.h"
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 static int ifindex;
 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
index c74b52c6d945924cffceb645154e5fe1629998c0..c787f4b496462605a9aae8511cade3e21a39bd36 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
index 0c12048ac79fa9f9e2dc184b508880ae0b72878b..9b783316e8604c19b21db5c9d50e05211134b90c 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/pkt_cls.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /*
  * This struct is stored in the XDP 'data_meta' area, which is located
index 0f707e0fb37558a0c73ec49e137f8d9a98317088..ffdd548627f0a42df9d64e2f7b72ea16957852a5 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define DEFAULT_TTL 64
 #define MAX_PCKT_SIZE 600
index 008789eb6adad060c8330adb030d7d399a4460cf..ba482dc3da3368e83e7fa10c30ff4e5ab7cc0fa7 100644 (file)
@@ -19,8 +19,8 @@
 #include <netinet/ether.h>
 #include <unistd.h>
 #include <time.h>
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 #define STATS_INTERVAL_S 2U
 #define MAX_PCKT_SIZE 600
index d013029aeaa2a8f1a2d507f6a803c6625e47703d..54c099cbd63930c1fddf57eb77638752e94ee7c9 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
 
index c30f9acfdb84ddcda784ad57be0d662f9e34c574..74a4583d0d866435dd85979ed2893fc2576abd9e 100644 (file)
@@ -24,7 +24,7 @@
 #include <fcntl.h>
 #include <libgen.h>
 
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include <bpf/bpf.h>
 
 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
index ad10fe700d7d37e4a6f786403aa79cf71d189c09..3d33cca2d48a8283a24c15794e46dd50675e43db 100644 (file)
@@ -4,7 +4,7 @@
  * XDP monitor tool, based on tracepoints
  */
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct bpf_map_def SEC("maps") redirect_err_cnt = {
        .type = BPF_MAP_TYPE_PERCPU_ARRAY,
@@ -222,14 +222,12 @@ struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
  */
 struct devmap_xmit_ctx {
        u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int map_id;             //      offset:8;  size:4; signed:1;
+       int from_ifindex;       //      offset:8;  size:4; signed:1;
        u32 act;                //      offset:12; size:4; signed:0;
-       u32 map_index;          //      offset:16; size:4; signed:0;
+       int to_ifindex;         //      offset:16; size:4; signed:1;
        int drops;              //      offset:20; size:4; signed:1;
        int sent;               //      offset:24; size:4; signed:1;
-       int from_ifindex;       //      offset:28; size:4; signed:1;
-       int to_ifindex;         //      offset:32; size:4; signed:1;
-       int err;                //      offset:36; size:4; signed:1;
+       int err;                //      offset:28; size:4; signed:1;
 };
 
 SEC("tracepoint/xdp/xdp_devmap_xmit")
index cfcc31e51197846d06ecdfb70da46c0a10b37f92..313a8fe6d125cfb19a574b6b070683a52e08b8d0 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/linux/udp.h>
 
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "hash_func01.h"
 
 #define MAX_CPUS 64 /* WARNING - sync with _user.c */
index 79a2fb7d16cb79a45d563b297089af50a13b7c70..15bdf047a2221218c78bc4678f4be8f60269ad59 100644 (file)
@@ -30,7 +30,7 @@ static const char *__doc__ =
 #define MAX_PROG 6
 
 #include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 
 #include "bpf_util.h"
 
index 1f0b7d05abb2fa0ef2c1eacb066aef2b08784381..d26ec3aa215e215538b26e23e1d43db73c36849c 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_ARRAY);
index 4631b484c43202333a8b579dc0b3bee629f3c501..6489352ab7a4065940927677b35ed2581c76e77a 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_DEVMAP);
index cc840661faabc909d41f9dd679db903129ad3467..35e16dee613ec703936e1de024a18dff2368bbd9 100644 (file)
@@ -17,7 +17,7 @@
 
 #include "bpf_util.h"
 #include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 
 static int ifindex_in;
 static int ifindex_out;
index 71dff8e3382a884f0097b776ed6322191f2003a2..9ca2bf457cdaefe387e49b458e05defed7d9ba37 100644 (file)
@@ -17,7 +17,7 @@
 
 #include "bpf_util.h"
 #include <bpf/bpf.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 
 static int ifindex_in;
 static int ifindex_out;
index bf11efc8e9494b5aa63c3ff028c88b4421497c2c..b37ca2b1306387b69ea1779e7519ed17f65e7edd 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include <linux/slab.h>
 #include <net/ip_fib.h>
 
index fef286c5add2d8b62c88fdb5c5048e06c93ec8e3..c2da1b51ff950ec8e41c4feff3634e39d38ecfcc 100644 (file)
@@ -21,7 +21,7 @@
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include "bpf_util.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include <sys/resource.h>
 #include <libgen.h>
 
index 272d0f82a6b5687bca3c7cb61001eff1b7ca5a04..5e7459f9bf3e545f1bb4742560e197a4f6a90b30 100644 (file)
@@ -6,7 +6,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/in.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* Config setup from with userspace
  *
index fc4983fd69590af0c45d1db9087f01aa99c8c56d..4fe47502ebed4778e0126b2984f7dc232b451e83 100644 (file)
@@ -22,8 +22,8 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"
 #include <arpa/inet.h>
 #include <linux/if_link.h>
 
-#include "bpf.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include "bpf_util.h"
 
 static int ifindex = -1;
index 6c7c7e0aaedacdd3f7967778a96a7aaab8d7d29b..33377289e2a8519522a2e9739c1c4d358840eb5d 100644 (file)
@@ -2,7 +2,7 @@
 #include <linux/ptrace.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define SAMPLE_SIZE 64ul
 #define MAX_CPUS 128
index 8c1af1b7372d4c733c9d39865456a5969e7fb4d8..991ef6f0880b2226e732412c8a3e1199f6b356f1 100644 (file)
@@ -10,7 +10,7 @@
 #include <sys/sysinfo.h>
 #include <sys/ioctl.h>
 #include <signal.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 #include <bpf/bpf.h>
 #include <sys/resource.h>
 #include <libgen.h>
index 6db450a5c1ca3e444c93f206f8d096c3015ff1b2..575d57e4b8d6333ad374a0ad01919df634ad0223 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "xdp_tx_iptunnel_common.h"
 
 struct {
index 5f33b553003238259f85ece33521d103f6387426..a419bee151a85c60ded0a55943a2a6da43f38080 100644 (file)
@@ -15,7 +15,7 @@
 #include <netinet/ether.h>
 #include <unistd.h>
 #include <time.h>
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 #include <bpf/bpf.h>
 #include "bpf_util.h"
 #include "xdp_tx_iptunnel_common.h"
index a06177c262cd691d7682e8c694b5b691cda330a5..05430484375c1d488df40f670e0b88b632bed017 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "xdpsock.h"
 
 /* This XDP program is only needed for the XDP_SHARED_UMEM mode.
index d74c4c83fc939e7f04bb5db2cd5a1ea327e1a879..0b5acd7223062aaf98d553552246efebfd30bd84 100644 (file)
 #include <time.h>
 #include <unistd.h>
 
-#include "libbpf.h"
-#include "xsk.h"
-#include "xdpsock.h"
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
 #include <bpf/bpf.h>
+#include "xdpsock.h"
 
 #ifndef SOL_XDP
 #define SOL_XDP 283
index 7548569e8076f1b3caff45df7d32ef8fe4ceaaec..90baf7d70911a6a96bc349aff2881717a7ca6c82 100755 (executable)
@@ -158,8 +158,6 @@ class HeaderParser(object):
                 break
 
         self.reader.close()
-        print('Parsed description of %d helper function(s)' % len(self.helpers),
-              file=sys.stderr)
 
 ###############################################################################
 
index 436379940356130eb1cd4b2a20ded511e4a8b4c8..408b5c0b99b1b88c8048fbc6103429a8c7689856 100755 (executable)
@@ -108,13 +108,13 @@ gen_btf()
        local bin_arch
 
        if ! [ -x "$(command -v ${PAHOLE})" ]; then
-               info "BTF" "${1}: pahole (${PAHOLE}) is not available"
+               echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
                return 1
        fi
 
        pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
        if [ "${pahole_ver}" -lt "113" ]; then
-               info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
+               echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
                return 1
        fi
 
index 5535650800ab29607ae825a2e1d81ba32a2ac3ad..f897eeeb0b4fb250dc4dca55aba112579771edfd 100644 (file)
@@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args
 FEATURE_DISPLAY = libbfd disassembler-four-args
 
 check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
   check_feat := 0
@@ -73,7 +73,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
 
 PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
-all: $(PROGS) bpftool
+all: $(PROGS) bpftool runqslower
 
 $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
 $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
@@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
 $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
 $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
 
-clean: bpftool_clean
+clean: bpftool_clean runqslower_clean
        $(call QUIET_CLEAN, bpf-progs)
        $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
               $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
@@ -97,7 +97,7 @@ clean: bpftool_clean
        $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf
        $(Q)$(RM) -r -- $(OUTPUT)feature
 
-install: $(PROGS) bpftool_install
+install: $(PROGS) bpftool_install runqslower_install
        $(call QUIET_INSTALL, bpf_jit_disasm)
        $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
        $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
@@ -115,4 +115,14 @@ bpftool_install:
 bpftool_clean:
        $(call descend,bpftool,clean)
 
-.PHONY: all install clean bpftool bpftool_install bpftool_clean
+runqslower:
+       $(call descend,runqslower)
+
+runqslower_install:
+       $(call descend,runqslower,install)
+
+runqslower_clean:
+       $(call descend,runqslower,clean)
+
+.PHONY: all install clean bpftool bpftool_install bpftool_clean \
+       runqslower runqslower_install runqslower_clean
index 86a87da97d0bb69cde1e55aa73921a7c2279ace8..94d91322895ab735f11447ae078363b2012c3aed 100644 (file)
@@ -196,7 +196,7 @@ and global variables.
   #define __EXAMPLE_SKEL_H__
 
   #include <stdlib.h>
-  #include <libbpf.h>
+  #include <bpf/libbpf.h>
 
   struct example {
        struct bpf_object_skeleton *skeleton;
index 39bc6f0f4f0bb839ade8b6962bff515729f318ea..c4e8103358104efcd792791e6dc2d89b171773c5 100644 (file)
@@ -45,7 +45,7 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
        -I$(srctree)/kernel/bpf/ \
        -I$(srctree)/tools/include \
        -I$(srctree)/tools/include/uapi \
-       -I$(srctree)/tools/lib/bpf \
+       -I$(srctree)/tools/lib \
        -I$(srctree)/tools/perf
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 ifneq ($(EXTRA_CFLAGS),)
index e5bc97b71ceb67bb5e714be2750bb45baf82ed18..4ba90d81b6a1856bcc0f94e69a95622e24f1dd30 100644 (file)
@@ -8,15 +8,15 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
-#include <bpf.h>
-#include <libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
 #include <linux/btf.h>
 #include <linux/hashtable.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
-#include "btf.h"
 #include "json_writer.h"
 #include "main.h"
 
@@ -370,6 +370,10 @@ static int dump_btf_c(const struct btf *btf,
        if (IS_ERR(d))
                return PTR_ERR(d);
 
+       printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
+       printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n");
+       printf("#endif\n\n");
+
        if (root_type_cnt) {
                for (i = 0; i < root_type_cnt; i++) {
                        err = btf_dump__dump_type(d, root_type_ids[i]);
@@ -386,6 +390,10 @@ static int dump_btf_c(const struct btf *btf,
                }
        }
 
+       printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
+       printf("#pragma clang attribute pop\n");
+       printf("#endif\n");
+
 done:
        btf_dump__free(d);
        return err;
@@ -524,7 +532,7 @@ static int do_dump(int argc, char **argv)
                if (IS_ERR(btf)) {
                        err = PTR_ERR(btf);
                        btf = NULL;
-                       p_err("failed to load BTF from %s: %s", 
+                       p_err("failed to load BTF from %s: %s",
                              *argv, strerror(err));
                        goto done;
                }
index 397e5716ab6d869f51496d29bd218f1a30eadf10..01cc52b834fabbd2b16229b82125b69c8b94a548 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/bitops.h>
 #include <linux/btf.h>
 #include <linux/err.h>
+#include <bpf/btf.h>
 
-#include "btf.h"
 #include "json_writer.h"
 #include "main.h"
 
index 2f017caa678dc973d6c697a0d70a29ca22d49d5f..62c6a1d7cd18b66261f17d9d8b14571f85bcd8e1 100644 (file)
@@ -14,7 +14,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 #include "main.h"
 
index 88264abaa738ad1dbab60b407ba584dac617a0b6..b75b8ec5469c207cce1dc4c72f8e21bcea5a7ce9 100644 (file)
@@ -20,8 +20,8 @@
 #include <sys/stat.h>
 #include <sys/vfs.h>
 
-#include <bpf.h>
-#include <libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
 
 #include "main.h"
 
index 03bdc5b3ac49e11b9a2790b7c867d20178afc9d9..446ba891f1e23311799726bc9ce1082e9acc3dc3 100644 (file)
@@ -12,8 +12,8 @@
 #include <linux/filter.h>
 #include <linux/limits.h>
 
-#include <bpf.h>
-#include <libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include <zlib.h>
 
 #include "main.h"
@@ -572,6 +572,18 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
                printf("\n");
 }
 
+static void
+probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
+{
+       bool res;
+
+       res = bpf_probe_large_insn_limit(ifindex);
+       print_bool_feature("have_large_insn_limit",
+                          "Large program size limit",
+                          "HAVE_LARGE_INSN_LIMIT",
+                          res, define_prefix);
+}
+
 static int do_probe(int argc, char **argv)
 {
        enum probe_component target = COMPONENT_UNSPEC;
@@ -724,6 +736,12 @@ static int do_probe(int argc, char **argv)
                probe_helpers_for_progtype(i, supported_types[i],
                                           define_prefix, ifindex);
 
+       print_end_then_start_section("misc",
+                                    "Scanning miscellaneous eBPF features...",
+                                    "/*** eBPF misc features ***/",
+                                    define_prefix);
+       probe_large_insn_limit(define_prefix, ifindex);
+
 exit_close_json:
        if (json_output) {
                /* End current "section" of probes */
index 7ce09a9a69994c666cc2733b9f844372b63a70b1..f8113b3646f52b328c1ce409fe4da09384dfdf5d 100644 (file)
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
-#include <bpf.h>
-#include <libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#include <bpf/btf.h>
 
-#include "btf.h"
-#include "libbpf_internal.h"
+#include "bpf/libbpf_internal.h"
 #include "json_writer.h"
 #include "main.h"
 
@@ -333,7 +333,7 @@ static int do_skeleton(int argc, char **argv)
                #define %2$s                                                \n\
                                                                            \n\
                #include <stdlib.h>                                         \n\
-               #include <libbpf.h>                                         \n\
+               #include <bpf/libbpf.h>                                     \n\
                                                                            \n\
                struct %1$s {                                               \n\
                        struct bpf_object_skeleton *skeleton;               \n\
index bfed711258cedd26ab93da98bab17ea1b91fdc9e..f7f5885aa3ba2bae338bbe23e8ece4ce337812b8 100644 (file)
@@ -24,7 +24,7 @@
 #include <dis-asm.h>
 #include <sys/stat.h>
 #include <limits.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 
 #include "json_writer.h"
 #include "main.h"
index 1fe91c55850833da48cbea721a40d7de08bcf0d1..6d41bbfc645937debb45753501c34f71802ffd2d 100644 (file)
@@ -9,8 +9,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include <bpf.h>
-#include <libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 #include "main.h"
 
index c01f76fa687619b1b5c7be9d9ce7a59a0d13c6c6..e6c85680b34d70d810b287926a1a77b4e265c027 100644 (file)
@@ -15,9 +15,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#include <bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
 
-#include "btf.h"
 #include "json_writer.h"
 #include "main.h"
 
@@ -48,6 +48,7 @@ const char * const map_type_name[] = {
        [BPF_MAP_TYPE_QUEUE]                    = "queue",
        [BPF_MAP_TYPE_STACK]                    = "stack",
        [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
+       [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -251,6 +252,7 @@ static int do_dump_btf(const struct btf_dumper *d,
                       struct bpf_map_info *map_info, void *key,
                       void *value)
 {
+       __u32 value_id;
        int ret;
 
        /* start of key-value pair */
@@ -264,9 +266,12 @@ static int do_dump_btf(const struct btf_dumper *d,
                        goto err_end_obj;
        }
 
+       value_id = map_info->btf_vmlinux_value_type_id ?
+               : map_info->btf_value_type_id;
+
        if (!map_is_per_cpu(map_info->type)) {
                jsonw_name(d->jw, "value");
-               ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
+               ret = btf_dumper_type(d, value_id, value);
        } else {
                unsigned int i, n, step;
 
@@ -278,8 +283,7 @@ static int do_dump_btf(const struct btf_dumper *d,
                        jsonw_start_object(d->jw);
                        jsonw_int_field(d->jw, "cpu", i);
                        jsonw_name(d->jw, "value");
-                       ret = btf_dumper_type(d, map_info->btf_value_type_id,
-                                             value + i * step);
+                       ret = btf_dumper_type(d, value_id, value + i * step);
                        jsonw_end_object(d->jw);
                        if (ret)
                                break;
@@ -915,37 +919,63 @@ static int maps_have_btf(int *fds, int nb_fds)
 {
        struct bpf_map_info info = {};
        __u32 len = sizeof(info);
-       struct btf *btf = NULL;
        int err, i;
 
        for (i = 0; i < nb_fds; i++) {
                err = bpf_obj_get_info_by_fd(fds[i], &info, &len);
                if (err) {
                        p_err("can't get map info: %s", strerror(errno));
-                       goto err_close;
-               }
-
-               err = btf__get_from_id(info.btf_id, &btf);
-               if (err) {
-                       p_err("failed to get btf");
-                       goto err_close;
+                       return -1;
                }
 
-               if (!btf)
+               if (!info.btf_id)
                        return 0;
        }
 
        return 1;
+}
 
-err_close:
-       for (; i < nb_fds; i++)
-               close(fds[i]);
-       return -1;
+static struct btf *btf_vmlinux;
+
+static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
+{
+       struct btf *btf = NULL;
+
+       if (info->btf_vmlinux_value_type_id) {
+               if (!btf_vmlinux) {
+                       btf_vmlinux = libbpf_find_kernel_btf();
+                       if (IS_ERR(btf_vmlinux))
+                               p_err("failed to get kernel btf");
+               }
+               return btf_vmlinux;
+       } else if (info->btf_value_type_id) {
+               int err;
+
+               err = btf__get_from_id(info->btf_id, &btf);
+               if (err || !btf) {
+                       p_err("failed to get btf");
+                       btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+               }
+       }
+
+       return btf;
+}
+
+static void free_map_kv_btf(struct btf *btf)
+{
+       if (!IS_ERR(btf) && btf != btf_vmlinux)
+               btf__free(btf);
+}
+
+static void free_btf_vmlinux(void)
+{
+       if (!IS_ERR(btf_vmlinux))
+               btf__free(btf_vmlinux);
 }
 
 static int
 map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
-        bool enable_btf, bool show_header)
+        bool show_header)
 {
        void *key, *value, *prev_key;
        unsigned int num_elems = 0;
@@ -962,18 +992,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
 
        prev_key = NULL;
 
-       if (enable_btf) {
-               err = btf__get_from_id(info->btf_id, &btf);
-               if (err || !btf) {
-                       /* enable_btf is true only if we've already checked
-                        * that all maps have BTF information.
-                        */
-                       p_err("failed to get btf");
+       if (wtr) {
+               btf = get_map_kv_btf(info);
+               if (IS_ERR(btf)) {
+                       err = PTR_ERR(btf);
                        goto exit_free;
                }
-       }
 
-       if (wtr) {
                if (show_header) {
                        jsonw_start_object(wtr);        /* map object */
                        show_map_header_json(info, wtr);
@@ -1012,7 +1037,7 @@ exit_free:
        free(key);
        free(value);
        close(fd);
-       btf__free(btf);
+       free_map_kv_btf(btf);
 
        return err;
 }
@@ -1021,7 +1046,7 @@ static int do_dump(int argc, char **argv)
 {
        json_writer_t *wtr = NULL, *btf_wtr = NULL;
        struct bpf_map_info info = {};
-       int nb_fds, i = 0, btf = 0;
+       int nb_fds, i = 0;
        __u32 len = sizeof(info);
        int *fds = NULL;
        int err = -1;
@@ -1041,17 +1066,17 @@ static int do_dump(int argc, char **argv)
        if (json_output) {
                wtr = json_wtr;
        } else {
-               btf = maps_have_btf(fds, nb_fds);
-               if (btf < 0)
+               int do_plain_btf;
+
+               do_plain_btf = maps_have_btf(fds, nb_fds);
+               if (do_plain_btf < 0)
                        goto exit_close;
-               if (btf) {
+
+               if (do_plain_btf) {
                        btf_wtr = get_btf_writer();
-                       if (btf_wtr) {
-                               wtr = btf_wtr;
-                       } else {
+                       wtr = btf_wtr;
+                       if (!btf_wtr)
                                p_info("failed to create json writer for btf. falling back to plain output");
-                               btf = 0;
-                       }
                }
        }
 
@@ -1062,7 +1087,7 @@ static int do_dump(int argc, char **argv)
                        p_err("can't get map info: %s", strerror(errno));
                        break;
                }
-               err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1);
+               err = map_dump(fds[i], &info, wtr, nb_fds > 1);
                if (!wtr && i != nb_fds - 1)
                        printf("\n");
 
@@ -1073,13 +1098,14 @@ static int do_dump(int argc, char **argv)
        if (wtr && nb_fds > 1)
                jsonw_end_array(wtr);   /* root array */
 
-       if (btf)
+       if (btf_wtr)
                jsonw_destroy(&btf_wtr);
 exit_close:
        for (; i < nb_fds; i++)
                close(fds[i]);
 exit_free:
        free(fds);
+       free_btf_vmlinux();
        return err;
 }
 
index 4c5531d1a45002e79f2e0b2de48317bb408d9988..d9b29c17fbb8e53f01c4b0807811812b38a5c54f 100644 (file)
@@ -6,7 +6,7 @@
  */
 #include <errno.h>
 #include <fcntl.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 #include <poll.h>
 #include <signal.h>
 #include <stdbool.h>
@@ -21,7 +21,7 @@
 #include <sys/mman.h>
 #include <sys/syscall.h>
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 #include <perf-sys.h>
 
 #include "main.h"
index d93bee298e54c320030290302f66b853128f9c8f..c5e3895b7c8b6a94a84758774eae400eb40432c6 100644 (file)
@@ -7,7 +7,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include <net/if.h>
 #include <linux/if.h>
 #include <linux/rtnetlink.h>
@@ -16,9 +17,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include <bpf.h>
-#include <nlattr.h>
-#include "libbpf_internal.h"
+#include "bpf/nlattr.h"
+#include "bpf/libbpf_internal.h"
 #include "main.h"
 #include "netlink_dumper.h"
 
index 550a0f537eedd8b0fc0a1b0df7d448916a24690d..5f65140b003b2e6a2d8460b64d2cb47700174014 100644 (file)
@@ -3,11 +3,11 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 #include <linux/rtnetlink.h>
 #include <linux/tc_act/tc_bpf.h>
 
-#include <nlattr.h>
+#include "bpf/nlattr.h"
 #include "main.h"
 #include "netlink_dumper.h"
 
index b2046f33e23f1b719a617f7a66f96d5bb13cb934..3341aa14acdacced1a52fc136d0b8fc146c1030c 100644 (file)
@@ -13,7 +13,7 @@
 #include <unistd.h>
 #include <ftw.h>
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 #include "main.h"
 
index 47a61ac42dc05648634bc63b94f024053cc364f9..a3521deca86943aa93e24560b5aa7a47d1fb983d 100644 (file)
@@ -17,9 +17,9 @@
 #include <linux/err.h>
 #include <linux/sizes.h>
 
-#include <bpf.h>
-#include <btf.h>
-#include <libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
 
 #include "cfg.h"
 #include "main.h"
index 5b91ee65a080260e857187a30b921687c1532ab4..8608cd68cdd0794ba711925e0ca71e6bf98f33ff 100644 (file)
@@ -7,7 +7,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 
 #include "disasm.h"
 #include "json_writer.h"
diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore
new file mode 100644 (file)
index 0000000..90a456a
--- /dev/null
@@ -0,0 +1 @@
+/.output
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
new file mode 100644 (file)
index 0000000..faf5418
--- /dev/null
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+OUTPUT := .output
+CLANG := clang
+LLC := llc
+LLVM_STRIP := llvm-strip
+DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+LIBBPF_SRC := $(abspath ../../lib/bpf)
+BPFOBJ := $(OUTPUT)/libbpf.a
+BPF_INCLUDE := $(OUTPUT)
+INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib)
+CFLAGS := -g -Wall
+
+# Try to detect best kernel BTF source
+KERNEL_REL := $(shell uname -r)
+VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
+VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword                           \
+                                         $(wildcard $(VMLINUX_BTF_PATHS))))
+
+abs_out := $(abspath $(OUTPUT))
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf '  %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+MAKEFLAGS += --no-print-directory
+submake_extras := feature_display=0
+endif
+
+.DELETE_ON_ERROR:
+
+.PHONY: all clean runqslower
+all: runqslower
+
+runqslower: $(OUTPUT)/runqslower
+
+clean:
+       $(call msg,CLEAN)
+       $(Q)rm -rf $(OUTPUT) runqslower
+
+$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
+       $(call msg,BINARY,$@)
+       $(Q)$(CC) $(CFLAGS) -lelf -lz $^ -o $@
+
+$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h             \
+                       $(OUTPUT)/runqslower.bpf.o
+
+$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h
+
+$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
+       $(call msg,GEN-SKEL,$@)
+       $(Q)$(BPFTOOL) gen skeleton $< > $@
+
+$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
+       $(call msg,BPF,$@)
+       $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES)                           \
+                -c $(filter %.c,$^) -o $@ &&                                 \
+       $(LLVM_STRIP) -g $@
+
+$(OUTPUT)/%.o: %.c | $(OUTPUT)
+       $(call msg,CC,$@)
+       $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
+
+$(OUTPUT):
+       $(call msg,MKDIR,$@)
+       $(Q)mkdir -p $(OUTPUT)
+
+$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
+       $(call msg,GEN,$@)
+       $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
+               echo "Couldn't find kernel BTF; set VMLINUX_BTF to"            \
+                       "specify its location." >&2;                           \
+               exit 1;\
+       fi
+       $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+
+$(BPFOBJ): | $(OUTPUT)
+       $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)                         \
+                   OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+
+$(DEFAULT_BPFTOOL):
+       $(Q)$(MAKE) $(submake_extras) -C ../bpftool                           \
+                   prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
new file mode 100644 (file)
index 0000000..48a39f7
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "runqslower.h"
+
+#define TASK_RUNNING 0
+
+#define BPF_F_INDEX_MASK               0xffffffffULL
+#define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+
+const volatile __u64 min_us = 0;
+const volatile pid_t targ_pid = 0;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 10240);
+       __type(key, u32);
+       __type(value, u64);
+} start SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(u32));
+       __uint(value_size, sizeof(u32));
+} events SEC(".maps");
+
+/* record enqueue timestamp */
+__always_inline
+static int trace_enqueue(u32 tgid, u32 pid)
+{
+       u64 ts;
+
+       if (!pid || (targ_pid && targ_pid != pid))
+               return 0;
+
+       ts = bpf_ktime_get_ns();
+       bpf_map_update_elem(&start, &pid, &ts, 0);
+       return 0;
+}
+
+SEC("tp_btf/sched_wakeup")
+int handle__sched_wakeup(u64 *ctx)
+{
+       /* TP_PROTO(struct task_struct *p) */
+       struct task_struct *p = (void *)ctx[0];
+
+       return trace_enqueue(p->tgid, p->pid);
+}
+
+SEC("tp_btf/sched_wakeup_new")
+int handle__sched_wakeup_new(u64 *ctx)
+{
+       /* TP_PROTO(struct task_struct *p) */
+       struct task_struct *p = (void *)ctx[0];
+
+       return trace_enqueue(p->tgid, p->pid);
+}
+
+SEC("tp_btf/sched_switch")
+int handle__sched_switch(u64 *ctx)
+{
+       /* TP_PROTO(bool preempt, struct task_struct *prev,
+        *          struct task_struct *next)
+        */
+       struct task_struct *prev = (struct task_struct *)ctx[1];
+       struct task_struct *next = (struct task_struct *)ctx[2];
+       struct event event = {};
+       u64 *tsp, delta_us;
+       long state;
+       u32 pid;
+
+       /* ivcsw: treat like an enqueue event and store timestamp */
+       if (prev->state == TASK_RUNNING)
+               trace_enqueue(prev->tgid, prev->pid);
+
+       pid = next->pid;
+
+       /* fetch timestamp and calculate delta */
+       tsp = bpf_map_lookup_elem(&start, &pid);
+       if (!tsp)
+               return 0;   /* missed enqueue */
+
+       delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
+       if (min_us && delta_us <= min_us)
+               return 0;
+
+       event.pid = pid;
+       event.delta_us = delta_us;
+       bpf_get_current_comm(&event.task, sizeof(event.task));
+
+       /* output */
+       bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
+                             &event, sizeof(event));
+
+       bpf_map_delete_elem(&start, &pid);
+       return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
new file mode 100644 (file)
index 0000000..d897158
--- /dev/null
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+#include <argp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <time.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "runqslower.h"
+#include "runqslower.skel.h"
+
+struct env {
+       pid_t pid;
+       __u64 min_us;
+       bool verbose;
+} env = {
+       .min_us = 10000,
+};
+
+const char *argp_program_version = "runqslower 0.1";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"runqslower    Trace long process scheduling delays.\n"
+"              For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n"
+"\n"
+"This script traces high scheduling delays between tasks being\n"
+"ready to run and them running on CPU after that.\n"
+"\n"
+"USAGE: runqslower [-p PID] [min_us]\n"
+"\n"
+"EXAMPLES:\n"
+"    runqslower         # trace run queue latency higher than 10000 us (default)\n"
+"    runqslower 1000    # trace run queue latency higher than 1000 us\n"
+"    runqslower -p 123  # trace pid 123 only\n";
+
+static const struct argp_option opts[] = {
+       { "pid", 'p', "PID", 0, "Process PID to trace"},
+       { "verbose", 'v', NULL, 0, "Verbose debug output" },
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       static int pos_args;
+       int pid;
+       long long min_us;
+
+       switch (key) {
+       case 'v':
+               env.verbose = true;
+               break;
+       case 'p':
+               errno = 0;
+               pid = strtol(arg, NULL, 10);
+               if (errno || pid <= 0) {
+                       fprintf(stderr, "Invalid PID: %s\n", arg);
+                       argp_usage(state);
+               }
+               env.pid = pid;
+               break;
+       case ARGP_KEY_ARG:
+               if (pos_args++) {
+                       fprintf(stderr,
+                               "Unrecognized positional argument: %s\n", arg);
+                       argp_usage(state);
+               }
+               errno = 0;
+               min_us = strtoll(arg, NULL, 10);
+               if (errno || min_us <= 0) {
+                       fprintf(stderr, "Invalid delay (in us): %s\n", arg);
+                       argp_usage(state);
+               }
+               env.min_us = min_us;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+int libbpf_print_fn(enum libbpf_print_level level,
+                   const char *format, va_list args)
+{
+       if (level == LIBBPF_DEBUG && !env.verbose)
+               return 0;
+       return vfprintf(stderr, format, args);
+}
+
+static int bump_memlock_rlimit(void)
+{
+       struct rlimit rlim_new = {
+               .rlim_cur       = RLIM_INFINITY,
+               .rlim_max       = RLIM_INFINITY,
+       };
+
+       return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
+{
+       const struct event *e = data;
+       struct tm *tm;
+       char ts[32];
+       time_t t;
+
+       time(&t);
+       tm = localtime(&t);
+       strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+       printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us);
+}
+
+void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
+{
+       printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu);
+}
+
+int main(int argc, char **argv)
+{
+       static const struct argp argp = {
+               .options = opts,
+               .parser = parse_arg,
+               .doc = argp_program_doc,
+       };
+       struct perf_buffer_opts pb_opts;
+       struct perf_buffer *pb = NULL;
+       struct runqslower_bpf *obj;
+       int err;
+
+       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+       if (err)
+               return err;
+
+       libbpf_set_print(libbpf_print_fn);
+
+       err = bump_memlock_rlimit();
+       if (err) {
+               fprintf(stderr, "failed to increase rlimit: %d", err);
+               return 1;
+       }
+
+       obj = runqslower_bpf__open();
+       if (!obj) {
+               fprintf(stderr, "failed to open and/or load BPF object\n");
+               return 1;
+       }
+
+       /* initialize global data (filtering options) */
+       obj->rodata->targ_pid = env.pid;
+       obj->rodata->min_us = env.min_us;
+
+       err = runqslower_bpf__load(obj);
+       if (err) {
+               fprintf(stderr, "failed to load BPF object: %d\n", err);
+               goto cleanup;
+       }
+
+       err = runqslower_bpf__attach(obj);
+       if (err) {
+               fprintf(stderr, "failed to attach BPF programs\n");
+               goto cleanup;
+       }
+
+       printf("Tracing run queue latency higher than %llu us\n", env.min_us);
+       printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)");
+
+       pb_opts.sample_cb = handle_event;
+       pb_opts.lost_cb = handle_lost_events;
+       pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts);
+       err = libbpf_get_error(pb);
+       if (err) {
+               pb = NULL;
+               fprintf(stderr, "failed to open perf buffer: %d\n", err);
+               goto cleanup;
+       }
+
+       while ((err = perf_buffer__poll(pb, 100)) >= 0)
+               ;
+       printf("Error polling perf buffer: %d\n", err);
+
+cleanup:
+       perf_buffer__free(pb);
+       runqslower_bpf__destroy(obj);
+
+       return err != 0;
+}
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
new file mode 100644 (file)
index 0000000..9db2254
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __RUNQSLOWER_H
+#define __RUNQSLOWER_H
+
+#define TASK_COMM_LEN 16
+
+struct event {
+       char task[TASK_COMM_LEN];
+       __u64 delta_us;
+       pid_t pid;
+};
+
+#endif /* __RUNQSLOWER_H */
index 7df436da542d76ddd73eb9a83851071e6e2f4fe3..f1d74a2bd23493635afcbd6a7336c2fd2806f471 100644 (file)
@@ -107,6 +107,10 @@ enum bpf_cmd {
        BPF_MAP_LOOKUP_AND_DELETE_ELEM,
        BPF_MAP_FREEZE,
        BPF_BTF_GET_NEXT_ID,
+       BPF_MAP_LOOKUP_BATCH,
+       BPF_MAP_LOOKUP_AND_DELETE_BATCH,
+       BPF_MAP_UPDATE_BATCH,
+       BPF_MAP_DELETE_BATCH,
 };
 
 enum bpf_map_type {
@@ -136,6 +140,7 @@ enum bpf_map_type {
        BPF_MAP_TYPE_STACK,
        BPF_MAP_TYPE_SK_STORAGE,
        BPF_MAP_TYPE_DEVMAP_HASH,
+       BPF_MAP_TYPE_STRUCT_OPS,
 };
 
 /* Note that tracing related programs such as
@@ -174,6 +179,8 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
        BPF_PROG_TYPE_CGROUP_SOCKOPT,
        BPF_PROG_TYPE_TRACING,
+       BPF_PROG_TYPE_STRUCT_OPS,
+       BPF_PROG_TYPE_EXT,
 };
 
 enum bpf_attach_type {
@@ -357,7 +364,12 @@ enum bpf_attach_type {
 /* Enable memory-mapping BPF map */
 #define BPF_F_MMAPABLE         (1U << 10)
 
-/* flags for BPF_PROG_QUERY */
+/* Flags for BPF_PROG_QUERY. */
+
+/* Query effective (directly attached + inherited from ancestor cgroups)
+ * programs that will be executed for events within a cgroup.
+ * attach_flags with this flag are returned only for directly attached programs.
+ */
 #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
 
 enum bpf_stack_build_id_status {
@@ -397,6 +409,10 @@ union bpf_attr {
                __u32   btf_fd;         /* fd pointing to a BTF type data */
                __u32   btf_key_type_id;        /* BTF type_id of the key */
                __u32   btf_value_type_id;      /* BTF type_id of the value */
+               __u32   btf_vmlinux_value_type_id;/* BTF type_id of a kernel-
+                                                  * struct stored as the
+                                                  * map value
+                                                  */
        };
 
        struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -409,6 +425,23 @@ union bpf_attr {
                __u64           flags;
        };
 
+       struct { /* struct used by BPF_MAP_*_BATCH commands */
+               __aligned_u64   in_batch;       /* start batch,
+                                                * NULL to start from beginning
+                                                */
+               __aligned_u64   out_batch;      /* output: next start batch */
+               __aligned_u64   keys;
+               __aligned_u64   values;
+               __u32           count;          /* input/output:
+                                                * input: # of key/value
+                                                * elements
+                                                * output: # of filled elements
+                                                */
+               __u32           map_fd;
+               __u64           elem_flags;
+               __u64           flags;
+       } batch;
+
        struct { /* anonymous struct used by BPF_PROG_LOAD command */
                __u32           prog_type;      /* one of enum bpf_prog_type */
                __u32           insn_cnt;
@@ -2703,7 +2736,8 @@ union bpf_attr {
  *
  * int bpf_send_signal(u32 sig)
  *     Description
- *             Send signal *sig* to the current task.
+ *             Send signal *sig* to the process of the current task.
+ *             The signal may be delivered to any of this process's threads.
  *     Return
  *             0 on success or successfully queued.
  *
@@ -2831,6 +2865,33 @@ union bpf_attr {
  *     Return
  *             On success, the strictly positive length of the string, including
  *             the trailing NUL character. On error, a negative value.
+ *
+ * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ *     Description
+ *             Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *             *rcv_nxt* is the ack_seq to be sent out.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_send_signal_thread(u32 sig)
+ *     Description
+ *             Send signal *sig* to the thread corresponding to the current task.
+ *     Return
+ *             0 on success or successfully queued.
+ *
+ *             **-EBUSY** if work queue under nmi is full.
+ *
+ *             **-EINVAL** if *sig* is invalid.
+ *
+ *             **-EPERM** if no permission to send the *sig*.
+ *
+ *             **-EAGAIN** if bpf program can try again.
+ *
+ * u64 bpf_jiffies64(void)
+ *     Description
+ *             Obtain the 64bit jiffies
+ *     Return
+ *             The 64 bit jiffies
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2948,7 +3009,10 @@ union bpf_attr {
        FN(probe_read_user),            \
        FN(probe_read_kernel),          \
        FN(probe_read_user_str),        \
-       FN(probe_read_kernel_str),
+       FN(probe_read_kernel_str),      \
+       FN(tcp_send_ack),               \
+       FN(send_signal_thread),         \
+       FN(jiffies64),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3349,7 +3413,7 @@ struct bpf_map_info {
        __u32 map_flags;
        char  name[BPF_OBJ_NAME_LEN];
        __u32 ifindex;
-       __u32 :32;
+       __u32 btf_vmlinux_value_type_id;
        __u64 netns_dev;
        __u64 netns_ino;
        __u32 btf_id;
index 1a2898c482eef3cbb88bd760ff3e6caa8d9816b0..5a667107ad2cce1980e01d3c7bd6273de8439990 100644 (file)
@@ -146,6 +146,12 @@ enum {
        BTF_VAR_GLOBAL_EXTERN = 2,
 };
 
+enum btf_func_linkage {
+       BTF_FUNC_STATIC = 0,
+       BTF_FUNC_GLOBAL = 1,
+       BTF_FUNC_EXTERN = 2,
+};
+
 /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
  * additional information related to the variable such as its linkage.
  */
index 42efdb84d18982ce577b4f5baaf17c9290402402..024af2d1d0af4059cba0c40211b1095ecd55387e 100644 (file)
@@ -169,6 +169,7 @@ enum {
        IFLA_MAX_MTU,
        IFLA_PROP_LIST,
        IFLA_ALT_IFNAME, /* Alternative ifname */
+       IFLA_PERM_ADDRESS,
        __IFLA_MAX
 };
 
index d87830e7ea6326891779cf9107400f029b066d3b..aee7f1a83c7755cfd4966cd190350b035bb789e4 100644 (file)
@@ -183,7 +183,7 @@ $(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
        $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
 $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
-       $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header              \
+       $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
                --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
 
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
@@ -273,10 +273,11 @@ config-clean:
        $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
 clean:
-       $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
-               *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
-               *.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
-               $(SHARED_OBJDIR) $(STATIC_OBJDIR)
+       $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS)                 \
+               *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS)              \
+               $(SHARED_OBJDIR) $(STATIC_OBJDIR)                            \
+               $(addprefix $(OUTPUT),                                       \
+                           *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc)
        $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
 
index a787d53699c8193492b2919dfbc551fd2ee29db3..c6dafe56317659b8573d5e12bfed9def3e004e6d 100644 (file)
@@ -32,6 +32,9 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
  * required to be defined explicitly.
@@ -95,7 +98,11 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
        attr.btf_key_type_id = create_attr->btf_key_type_id;
        attr.btf_value_type_id = create_attr->btf_value_type_id;
        attr.map_ifindex = create_attr->map_ifindex;
-       attr.inner_map_fd = create_attr->inner_map_fd;
+       if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
+               attr.btf_vmlinux_value_type_id =
+                       create_attr->btf_vmlinux_value_type_id;
+       else
+               attr.inner_map_fd = create_attr->inner_map_fd;
 
        return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
@@ -228,7 +235,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
        memset(&attr, 0, sizeof(attr));
        attr.prog_type = load_attr->prog_type;
        attr.expected_attach_type = load_attr->expected_attach_type;
-       if (attr.prog_type == BPF_PROG_TYPE_TRACING) {
+       if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
+               attr.attach_btf_id = load_attr->attach_btf_id;
+       } else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
+                  attr.prog_type == BPF_PROG_TYPE_EXT) {
                attr.attach_btf_id = load_attr->attach_btf_id;
                attr.attach_prog_fd = load_attr->attach_prog_fd;
        } else {
@@ -443,6 +453,64 @@ int bpf_map_freeze(int fd)
        return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
 }
 
+static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
+                               void *out_batch, void *keys, void *values,
+                               __u32 *count,
+                               const struct bpf_map_batch_opts *opts)
+{
+       union bpf_attr attr;
+       int ret;
+
+       if (!OPTS_VALID(opts, bpf_map_batch_opts))
+               return -EINVAL;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.batch.map_fd = fd;
+       attr.batch.in_batch = ptr_to_u64(in_batch);
+       attr.batch.out_batch = ptr_to_u64(out_batch);
+       attr.batch.keys = ptr_to_u64(keys);
+       attr.batch.values = ptr_to_u64(values);
+       attr.batch.count = *count;
+       attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
+       attr.batch.flags = OPTS_GET(opts, flags, 0);
+
+       ret = sys_bpf(cmd, &attr, sizeof(attr));
+       *count = attr.batch.count;
+
+       return ret;
+}
+
+int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+                        const struct bpf_map_batch_opts *opts)
+{
+       return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
+                                   NULL, keys, NULL, count, opts);
+}
+
+int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
+                        void *values, __u32 *count,
+                        const struct bpf_map_batch_opts *opts)
+{
+       return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
+                                   out_batch, keys, values, count, opts);
+}
+
+int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
+                                   void *keys, void *values, __u32 *count,
+                                   const struct bpf_map_batch_opts *opts)
+{
+       return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
+                                   fd, in_batch, out_batch, keys, values,
+                                   count, opts);
+}
+
+int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+                        const struct bpf_map_batch_opts *opts)
+{
+       return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
+                                   keys, values, count, opts);
+}
+
 int bpf_obj_pin(int fd, const char *pathname)
 {
        union bpf_attr attr;
index f0ab8519986ec05b6274118395adaf7a568f8da9..b976e77316ccac33a506fed237a0d8bbdf6a4341 100644 (file)
@@ -46,7 +46,10 @@ struct bpf_create_map_attr {
        __u32 btf_key_type_id;
        __u32 btf_value_type_id;
        __u32 map_ifindex;
-       __u32 inner_map_fd;
+       union {
+               __u32 inner_map_fd;
+               __u32 btf_vmlinux_value_type_id;
+       };
 };
 
 LIBBPF_API int
@@ -124,6 +127,28 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);
+
+struct bpf_map_batch_opts {
+       size_t sz; /* size of this struct for forward/backward compatibility */
+       __u64 elem_flags;
+       __u64 flags;
+};
+#define bpf_map_batch_opts__last_field flags
+
+LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+                                   __u32 *count,
+                                   const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
+                                   void *keys, void *values, __u32 *count,
+                                   const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
+                                       void *out_batch, void *keys,
+                                       void *values, __u32 *count,
+                                       const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+                                   __u32 *count,
+                                   const struct bpf_map_batch_opts *opts);
+
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
 LIBBPF_API int bpf_obj_get(const char *pathname);
 
index 3ed1a27b5f7ce2cf9589c12a5dd8a3da3ad33844..bafca49cb1e65de5ef8c918ab405971c6d746b6d 100644 (file)
@@ -8,6 +8,9 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 struct bpf_prog_linfo {
        void *raw_linfo;
        void *raw_jited_linfo;
index 5f04f56e1eb650a07567b7363af12bab387c6ddf..3d1c25fc97aefcfeafe62bbc9ad145cb69a67fd4 100644 (file)
@@ -8,6 +8,10 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
+#include <sys/utsname.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/btf.h>
 #include <gelf.h>
 #include "libbpf_internal.h"
 #include "hashmap.h"
 
-#define BTF_MAX_NR_TYPES 0x7fffffff
-#define BTF_MAX_STR_OFFSET 0x7fffffff
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+#define BTF_MAX_NR_TYPES 0x7fffffffU
+#define BTF_MAX_STR_OFFSET 0x7fffffffU
 
 static struct btf_type btf_void;
 
@@ -50,7 +57,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
                if (btf->types_size == BTF_MAX_NR_TYPES)
                        return -E2BIG;
 
-               expand_by = max(btf->types_size >> 2, 16);
+               expand_by = max(btf->types_size >> 2, 16U);
                new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
 
                new_types = realloc(btf->types, sizeof(*new_types) * new_size);
@@ -286,7 +293,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
        switch (kind) {
        case BTF_KIND_INT:
        case BTF_KIND_ENUM:
-               return min(sizeof(void *), t->size);
+               return min(sizeof(void *), (size_t)t->size);
        case BTF_KIND_PTR:
                return sizeof(void *);
        case BTF_KIND_TYPEDEF:
@@ -1398,7 +1405,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,
        if (d->hypot_cnt == d->hypot_cap) {
                __u32 *new_list;
 
-               d->hypot_cap += max(16, d->hypot_cap / 2);
+               d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
                new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
                if (!new_list)
                        return -ENOMEM;
@@ -1694,7 +1701,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
                if (strs.cnt + 1 > strs.cap) {
                        struct btf_str_ptr *new_ptrs;
 
-                       strs.cap += max(strs.cnt / 2, 16);
+                       strs.cap += max(strs.cnt / 2, 16U);
                        new_ptrs = realloc(strs.ptrs,
                                           sizeof(strs.ptrs[0]) * strs.cap);
                        if (!new_ptrs) {
@@ -2928,3 +2935,89 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
        }
        return 0;
 }
+
+static struct btf *btf_load_raw(const char *path)
+{
+       struct btf *btf;
+       size_t read_cnt;
+       struct stat st;
+       void *data;
+       FILE *f;
+
+       if (stat(path, &st))
+               return ERR_PTR(-errno);
+
+       data = malloc(st.st_size);
+       if (!data)
+               return ERR_PTR(-ENOMEM);
+
+       f = fopen(path, "rb");
+       if (!f) {
+               btf = ERR_PTR(-errno);
+               goto cleanup;
+       }
+
+       read_cnt = fread(data, 1, st.st_size, f);
+       fclose(f);
+       if (read_cnt < st.st_size) {
+               btf = ERR_PTR(-EBADF);
+               goto cleanup;
+       }
+
+       btf = btf__new(data, read_cnt);
+
+cleanup:
+       free(data);
+       return btf;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+struct btf *libbpf_find_kernel_btf(void)
+{
+       struct {
+               const char *path_fmt;
+               bool raw_btf;
+       } locations[] = {
+               /* try canonical vmlinux BTF through sysfs first */
+               { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
+               /* fall back to trying to find vmlinux ELF on disk otherwise */
+               { "/boot/vmlinux-%1$s" },
+               { "/lib/modules/%1$s/vmlinux-%1$s" },
+               { "/lib/modules/%1$s/build/vmlinux" },
+               { "/usr/lib/modules/%1$s/kernel/vmlinux" },
+               { "/usr/lib/debug/boot/vmlinux-%1$s" },
+               { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
+               { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+       };
+       char path[PATH_MAX + 1];
+       struct utsname buf;
+       struct btf *btf;
+       int i;
+
+       uname(&buf);
+
+       for (i = 0; i < ARRAY_SIZE(locations); i++) {
+               snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+
+               if (access(path, R_OK))
+                       continue;
+
+               if (locations[i].raw_btf)
+                       btf = btf_load_raw(path);
+               else
+                       btf = btf__parse_elf(path, NULL);
+
+               pr_debug("loading kernel BTF '%s': %ld\n",
+                        path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
+               if (IS_ERR(btf))
+                       continue;
+
+               return btf;
+       }
+
+       pr_warn("failed to find valid kernel BTF\n");
+       return ERR_PTR(-ESRCH);
+}
index 8d73f7f5551ff311b422f131bd0cbc8520f55130..70c1b7ec2bd0352d6a0193bae0a694403c31d62a 100644 (file)
@@ -102,6 +102,8 @@ LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
 LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
 LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
 struct btf_dedup_opts {
        unsigned int dedup_table_size;
        bool dont_resolve_fwds;
index e95f7710f21017864ffad3cd60d5a44dde9f15a8..bd09ed1710f12ee06e921591d042b8354cb4e0e1 100644 (file)
@@ -18,6 +18,9 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
 static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
 
@@ -139,6 +142,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
        if (IS_ERR(d->type_names)) {
                err = PTR_ERR(d->type_names);
                d->type_names = NULL;
+               goto err;
        }
        d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
        if (IS_ERR(d->ident_names)) {
index 6122272943e6285798fd49559fe13049bdb4650e..54c30c8020705e9569366bfcdc2f57138eb204be 100644 (file)
@@ -12,6 +12,9 @@
 #include <linux/err.h>
 #include "hashmap.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 /* start with 4 buckets */
 #define HASHMAP_MIN_CAP_BITS 2
 
index 7513165b104f5ad618d3e500f8aa04ee0d0280ec..ae34b681ae820b53b45b703d3a91eaa0854d9d2b 100644 (file)
@@ -55,6 +55,9 @@
 #include "libbpf_internal.h"
 #include "hashmap.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 #ifndef EM_BPF
 #define EM_BPF 247
 #endif
 
 #define __printf(a, b) __attribute__((format(printf, a, b)))
 
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
+static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj,
+                                                       int idx);
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
 static int __base_pr(enum libbpf_print_level level, const char *format,
                     va_list args)
 {
@@ -166,6 +175,8 @@ struct bpf_capabilities {
        __u32 btf_datasec:1;
        /* BPF_F_MMAPABLE is supported for arrays */
        __u32 array_mmap:1;
+       /* BTF_FUNC_GLOBAL is supported */
+       __u32 btf_func_global:1;
 };
 
 enum reloc_type {
@@ -229,10 +240,32 @@ struct bpf_program {
        __u32 prog_flags;
 };
 
+struct bpf_struct_ops {
+       const char *tname;
+       const struct btf_type *type;
+       struct bpf_program **progs;
+       __u32 *kern_func_off;
+       /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
+       void *data;
+       /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
+        *      btf_vmlinux's format.
+        * struct bpf_struct_ops_tcp_congestion_ops {
+        *      [... some other kernel fields ...]
+        *      struct tcp_congestion_ops data;
+        * }
+        * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
+        * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
+        * from "data".
+        */
+       void *kern_vdata;
+       __u32 type_id;
+};
+
 #define DATA_SEC ".data"
 #define BSS_SEC ".bss"
 #define RODATA_SEC ".rodata"
 #define KCONFIG_SEC ".kconfig"
+#define STRUCT_OPS_SEC ".struct_ops"
 
 enum libbpf_map_type {
        LIBBPF_MAP_UNSPEC,
@@ -259,10 +292,12 @@ struct bpf_map {
        struct bpf_map_def def;
        __u32 btf_key_type_id;
        __u32 btf_value_type_id;
+       __u32 btf_vmlinux_value_type_id;
        void *priv;
        bpf_map_clear_priv_t clear_priv;
        enum libbpf_map_type libbpf_type;
        void *mmaped;
+       struct bpf_struct_ops *st_ops;
        char *pin_path;
        bool pinned;
        bool reused;
@@ -326,6 +361,7 @@ struct bpf_object {
                Elf_Data *data;
                Elf_Data *rodata;
                Elf_Data *bss;
+               Elf_Data *st_ops_data;
                size_t strtabidx;
                struct {
                        GElf_Shdr shdr;
@@ -339,6 +375,7 @@ struct bpf_object {
                int data_shndx;
                int rodata_shndx;
                int bss_shndx;
+               int st_ops_shndx;
        } efile;
        /*
         * All loaded bpf_object is linked in a list, which is
@@ -348,6 +385,10 @@ struct bpf_object {
        struct list_head list;
 
        struct btf *btf;
+       /* Parse and load BTF vmlinux if any of the programs in the object need
+        * it at load time.
+        */
+       struct btf *btf_vmlinux;
        struct btf_ext *btf_ext;
 
        void *priv;
@@ -566,6 +607,348 @@ static __u32 get_kernel_version(void)
        return KERNEL_VERSION(major, minor, patch);
 }
 
+static const struct btf_member *
+find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
+{
+       struct btf_member *m;
+       int i;
+
+       for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+               if (btf_member_bit_offset(t, i) == bit_offset)
+                       return m;
+       }
+
+       return NULL;
+}
+
+static const struct btf_member *
+find_member_by_name(const struct btf *btf, const struct btf_type *t,
+                   const char *name)
+{
+       struct btf_member *m;
+       int i;
+
+       for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+               if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
+                       return m;
+       }
+
+       return NULL;
+}
+
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
+                                  const char *name, __u32 kind);
+
+static int
+find_struct_ops_kern_types(const struct btf *btf, const char *tname,
+                          const struct btf_type **type, __u32 *type_id,
+                          const struct btf_type **vtype, __u32 *vtype_id,
+                          const struct btf_member **data_member)
+{
+       const struct btf_type *kern_type, *kern_vtype;
+       const struct btf_member *kern_data_member;
+       __s32 kern_vtype_id, kern_type_id;
+       __u32 i;
+
+       kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+       if (kern_type_id < 0) {
+               pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
+                       tname);
+               return kern_type_id;
+       }
+       kern_type = btf__type_by_id(btf, kern_type_id);
+
+       /* Find the corresponding "map_value" type that will be used
+        * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
+        * find "struct bpf_struct_ops_tcp_congestion_ops" from the
+        * btf_vmlinux.
+        */
+       kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
+                                               tname, BTF_KIND_STRUCT);
+       if (kern_vtype_id < 0) {
+               pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
+                       STRUCT_OPS_VALUE_PREFIX, tname);
+               return kern_vtype_id;
+       }
+       kern_vtype = btf__type_by_id(btf, kern_vtype_id);
+
+       /* Find "struct tcp_congestion_ops" from
+        * struct bpf_struct_ops_tcp_congestion_ops {
+        *      [ ... ]
+        *      struct tcp_congestion_ops data;
+        * }
+        */
+       kern_data_member = btf_members(kern_vtype);
+       for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
+               if (kern_data_member->type == kern_type_id)
+                       break;
+       }
+       if (i == btf_vlen(kern_vtype)) {
+               pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
+                       tname, STRUCT_OPS_VALUE_PREFIX, tname);
+               return -EINVAL;
+       }
+
+       *type = kern_type;
+       *type_id = kern_type_id;
+       *vtype = kern_vtype;
+       *vtype_id = kern_vtype_id;
+       *data_member = kern_data_member;
+
+       return 0;
+}
+
+static bool bpf_map__is_struct_ops(const struct bpf_map *map)
+{
+       return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
+}
+
+/* Init the map's fields that depend on kern_btf */
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
+                                        const struct btf *btf,
+                                        const struct btf *kern_btf)
+{
+       const struct btf_member *member, *kern_member, *kern_data_member;
+       const struct btf_type *type, *kern_type, *kern_vtype;
+       __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
+       struct bpf_struct_ops *st_ops;
+       void *data, *kern_data;
+       const char *tname;
+       int err;
+
+       st_ops = map->st_ops;
+       type = st_ops->type;
+       tname = st_ops->tname;
+       err = find_struct_ops_kern_types(kern_btf, tname,
+                                        &kern_type, &kern_type_id,
+                                        &kern_vtype, &kern_vtype_id,
+                                        &kern_data_member);
+       if (err)
+               return err;
+
+       pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
+                map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
+
+       map->def.value_size = kern_vtype->size;
+       map->btf_vmlinux_value_type_id = kern_vtype_id;
+
+       st_ops->kern_vdata = calloc(1, kern_vtype->size);
+       if (!st_ops->kern_vdata)
+               return -ENOMEM;
+
+       data = st_ops->data;
+       kern_data_off = kern_data_member->offset / 8;
+       kern_data = st_ops->kern_vdata + kern_data_off;
+
+       member = btf_members(type);
+       for (i = 0; i < btf_vlen(type); i++, member++) {
+               const struct btf_type *mtype, *kern_mtype;
+               __u32 mtype_id, kern_mtype_id;
+               void *mdata, *kern_mdata;
+               __s64 msize, kern_msize;
+               __u32 moff, kern_moff;
+               __u32 kern_member_idx;
+               const char *mname;
+
+               mname = btf__name_by_offset(btf, member->name_off);
+               kern_member = find_member_by_name(kern_btf, kern_type, mname);
+               if (!kern_member) {
+                       pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
+                               map->name, mname);
+                       return -ENOTSUP;
+               }
+
+               kern_member_idx = kern_member - btf_members(kern_type);
+               if (btf_member_bitfield_size(type, i) ||
+                   btf_member_bitfield_size(kern_type, kern_member_idx)) {
+                       pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
+                               map->name, mname);
+                       return -ENOTSUP;
+               }
+
+               moff = member->offset / 8;
+               kern_moff = kern_member->offset / 8;
+
+               mdata = data + moff;
+               kern_mdata = kern_data + kern_moff;
+
+               mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
+               kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
+                                                   &kern_mtype_id);
+               if (BTF_INFO_KIND(mtype->info) !=
+                   BTF_INFO_KIND(kern_mtype->info)) {
+                       pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
+                               map->name, mname, BTF_INFO_KIND(mtype->info),
+                               BTF_INFO_KIND(kern_mtype->info));
+                       return -ENOTSUP;
+               }
+
+               if (btf_is_ptr(mtype)) {
+                       struct bpf_program *prog;
+
+                       mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id);
+                       kern_mtype = skip_mods_and_typedefs(kern_btf,
+                                                           kern_mtype->type,
+                                                           &kern_mtype_id);
+                       if (!btf_is_func_proto(mtype) ||
+                           !btf_is_func_proto(kern_mtype)) {
+                               pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n",
+                                       map->name, mname);
+                               return -ENOTSUP;
+                       }
+
+                       prog = st_ops->progs[i];
+                       if (!prog) {
+                               pr_debug("struct_ops init_kern %s: func ptr %s is not set\n",
+                                        map->name, mname);
+                               continue;
+                       }
+
+                       prog->attach_btf_id = kern_type_id;
+                       prog->expected_attach_type = kern_member_idx;
+
+                       st_ops->kern_func_off[i] = kern_data_off + kern_moff;
+
+                       pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
+                                map->name, mname, prog->name, moff,
+                                kern_moff);
+
+                       continue;
+               }
+
+               msize = btf__resolve_size(btf, mtype_id);
+               kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
+               if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
+                       pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
+                               map->name, mname, (ssize_t)msize,
+                               (ssize_t)kern_msize);
+                       return -ENOTSUP;
+               }
+
+               pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
+                        map->name, mname, (unsigned int)msize,
+                        moff, kern_moff);
+               memcpy(kern_mdata, mdata, msize);
+       }
+
+       return 0;
+}
+
+static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
+{
+       struct bpf_map *map;
+       size_t i;
+       int err;
+
+       for (i = 0; i < obj->nr_maps; i++) {
+               map = &obj->maps[i];
+
+               if (!bpf_map__is_struct_ops(map))
+                       continue;
+
+               err = bpf_map__init_kern_struct_ops(map, obj->btf,
+                                                   obj->btf_vmlinux);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
+{
+       const struct btf_type *type, *datasec;
+       const struct btf_var_secinfo *vsi;
+       struct bpf_struct_ops *st_ops;
+       const char *tname, *var_name;
+       __s32 type_id, datasec_id;
+       const struct btf *btf;
+       struct bpf_map *map;
+       __u32 i;
+
+       if (obj->efile.st_ops_shndx == -1)
+               return 0;
+
+       btf = obj->btf;
+       datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
+                                           BTF_KIND_DATASEC);
+       if (datasec_id < 0) {
+               pr_warn("struct_ops init: DATASEC %s not found\n",
+                       STRUCT_OPS_SEC);
+               return -EINVAL;
+       }
+
+       datasec = btf__type_by_id(btf, datasec_id);
+       vsi = btf_var_secinfos(datasec);
+       for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
+               type = btf__type_by_id(obj->btf, vsi->type);
+               var_name = btf__name_by_offset(obj->btf, type->name_off);
+
+               type_id = btf__resolve_type(obj->btf, vsi->type);
+               if (type_id < 0) {
+                       pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
+                               vsi->type, STRUCT_OPS_SEC);
+                       return -EINVAL;
+               }
+
+               type = btf__type_by_id(obj->btf, type_id);
+               tname = btf__name_by_offset(obj->btf, type->name_off);
+               if (!tname[0]) {
+                       pr_warn("struct_ops init: anonymous type is not supported\n");
+                       return -ENOTSUP;
+               }
+               if (!btf_is_struct(type)) {
+                       pr_warn("struct_ops init: %s is not a struct\n", tname);
+                       return -EINVAL;
+               }
+
+               map = bpf_object__add_map(obj);
+               if (IS_ERR(map))
+                       return PTR_ERR(map);
+
+               map->sec_idx = obj->efile.st_ops_shndx;
+               map->sec_offset = vsi->offset;
+               map->name = strdup(var_name);
+               if (!map->name)
+                       return -ENOMEM;
+
+               map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
+               map->def.key_size = sizeof(int);
+               map->def.value_size = type->size;
+               map->def.max_entries = 1;
+
+               map->st_ops = calloc(1, sizeof(*map->st_ops));
+               if (!map->st_ops)
+                       return -ENOMEM;
+               st_ops = map->st_ops;
+               st_ops->data = malloc(type->size);
+               st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
+               st_ops->kern_func_off = malloc(btf_vlen(type) *
+                                              sizeof(*st_ops->kern_func_off));
+               if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
+                       return -ENOMEM;
+
+               if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
+                       pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
+                               var_name, STRUCT_OPS_SEC);
+                       return -EINVAL;
+               }
+
+               memcpy(st_ops->data,
+                      obj->efile.st_ops_data->d_buf + vsi->offset,
+                      type->size);
+               st_ops->tname = tname;
+               st_ops->type = type;
+               st_ops->type_id = type_id;
+
+               pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
+                        tname, type_id, var_name, vsi->offset);
+       }
+
+       return 0;
+}
+
 static struct bpf_object *bpf_object__new(const char *path,
                                          const void *obj_buf,
                                          size_t obj_buf_sz,
@@ -607,6 +990,7 @@ static struct bpf_object *bpf_object__new(const char *path,
        obj->efile.data_shndx = -1;
        obj->efile.rodata_shndx = -1;
        obj->efile.bss_shndx = -1;
+       obj->efile.st_ops_shndx = -1;
        obj->kconfig_map_idx = -1;
 
        obj->kern_version = get_kernel_version();
@@ -630,6 +1014,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
        obj->efile.data = NULL;
        obj->efile.rodata = NULL;
        obj->efile.bss = NULL;
+       obj->efile.st_ops_data = NULL;
 
        zfree(&obj->efile.reloc_sects);
        obj->efile.nr_reloc_sects = 0;
@@ -735,16 +1120,6 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
        return 0;
 }
 
-static int compare_bpf_map(const void *_a, const void *_b)
-{
-       const struct bpf_map *a = _a;
-       const struct bpf_map *b = _b;
-
-       if (a->sec_idx != b->sec_idx)
-               return a->sec_idx - b->sec_idx;
-       return a->sec_offset - b->sec_offset;
-}
-
 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 {
        if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
@@ -815,6 +1190,9 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
        } else if (!strcmp(name, RODATA_SEC)) {
                if (obj->efile.rodata)
                        *size = obj->efile.rodata->d_size;
+       } else if (!strcmp(name, STRUCT_OPS_SEC)) {
+               if (obj->efile.st_ops_data)
+                       *size = obj->efile.st_ops_data->d_size;
        } else {
                ret = bpf_object_search_section_size(obj, name, &d_size);
                if (!ret)
@@ -898,7 +1276,7 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map)
        long page_sz = sysconf(_SC_PAGE_SIZE);
        size_t map_sz;
 
-       map_sz = roundup(map->def.value_size, 8) * map->def.max_entries;
+       map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
        map_sz = roundup(map_sz, page_sz);
        return map_sz;
 }
@@ -1440,6 +1818,20 @@ skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
        return t;
 }
 
+static const struct btf_type *
+resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+       const struct btf_type *t;
+
+       t = skip_mods_and_typedefs(btf, id, NULL);
+       if (!btf_is_ptr(t))
+               return NULL;
+
+       t = skip_mods_and_typedefs(btf, t->type, res_id);
+
+       return btf_is_func_proto(t) ? t : NULL;
+}
+
 /*
  * Fetch integer attribute of BTF map definition. Such attributes are
  * represented using a pointer to an array, in which dimensionality of array
@@ -1787,13 +2179,10 @@ static int bpf_object__init_maps(struct bpf_object *obj,
        err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
        err = err ?: bpf_object__init_global_data_maps(obj);
        err = err ?: bpf_object__init_kconfig_map(obj);
+       err = err ?: bpf_object__init_struct_ops_maps(obj);
        if (err)
                return err;
 
-       if (obj->nr_maps) {
-               qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
-                     compare_bpf_map);
-       }
        return 0;
 }
 
@@ -1817,13 +2206,14 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 
 static void bpf_object__sanitize_btf(struct bpf_object *obj)
 {
+       bool has_func_global = obj->caps.btf_func_global;
        bool has_datasec = obj->caps.btf_datasec;
        bool has_func = obj->caps.btf_func;
        struct btf *btf = obj->btf;
        struct btf_type *t;
        int i, j, vlen;
 
-       if (!obj->btf || (has_func && has_datasec))
+       if (!obj->btf || (has_func && has_datasec && has_func_global))
                return;
 
        for (i = 1; i <= btf__get_nr_types(btf); i++) {
@@ -1871,6 +2261,9 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
                } else if (!has_func && btf_is_func(t)) {
                        /* replace FUNC with TYPEDEF */
                        t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
+               } else if (!has_func_global && btf_is_func(t)) {
+                       /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
+                       t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
                }
        }
 }
@@ -1889,23 +2282,26 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
 static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
 {
        return obj->efile.btf_maps_shndx >= 0 ||
-              obj->nr_extern > 0;
+               obj->efile.st_ops_shndx >= 0 ||
+               obj->nr_extern > 0;
 }
 
 static int bpf_object__init_btf(struct bpf_object *obj,
                                Elf_Data *btf_data,
                                Elf_Data *btf_ext_data)
 {
-       bool btf_required = bpf_object__is_btf_mandatory(obj);
-       int err = 0;
+       int err = -ENOENT;
 
        if (btf_data) {
                obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
                if (IS_ERR(obj->btf)) {
+                       err = PTR_ERR(obj->btf);
+                       obj->btf = NULL;
                        pr_warn("Error loading ELF section %s: %d.\n",
                                BTF_ELF_SEC, err);
                        goto out;
                }
+               err = 0;
        }
        if (btf_ext_data) {
                if (!obj->btf) {
@@ -1923,18 +2319,9 @@ static int bpf_object__init_btf(struct bpf_object *obj,
                }
        }
 out:
-       if (err || IS_ERR(obj->btf)) {
-               if (btf_required)
-                       err = err ? : PTR_ERR(obj->btf);
-               else
-                       err = 0;
-               if (!IS_ERR_OR_NULL(obj->btf))
-                       btf__free(obj->btf);
-               obj->btf = NULL;
-       }
-       if (btf_required && !obj->btf) {
+       if (err && bpf_object__is_btf_mandatory(obj)) {
                pr_warn("BTF is required, but is missing or corrupted.\n");
-               return err == 0 ? -ENOENT : err;
+               return err;
        }
        return 0;
 }
@@ -1963,6 +2350,41 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
        return 0;
 }
 
+static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+{
+       if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+               return true;
+
+       /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
+        * also need vmlinux BTF
+        */
+       if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
+               return true;
+
+       return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+{
+       struct bpf_program *prog;
+       int err;
+
+       bpf_object__for_each_program(prog, obj) {
+               if (libbpf_prog_needs_vmlinux_btf(prog)) {
+                       obj->btf_vmlinux = libbpf_find_kernel_btf();
+                       if (IS_ERR(obj->btf_vmlinux)) {
+                               err = PTR_ERR(obj->btf_vmlinux);
+                               pr_warn("Error loading vmlinux BTF: %d\n", err);
+                               obj->btf_vmlinux = NULL;
+                               return err;
+                       }
+                       return 0;
+               }
+       }
+
+       return 0;
+}
+
 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 {
        int err = 0;
@@ -2088,6 +2510,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                        } else if (strcmp(name, RODATA_SEC) == 0) {
                                obj->efile.rodata = data;
                                obj->efile.rodata_shndx = idx;
+                       } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
+                               obj->efile.st_ops_data = data;
+                               obj->efile.st_ops_shndx = idx;
                        } else {
                                pr_debug("skip section(%d) %s\n", idx, name);
                        }
@@ -2097,7 +2522,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                        int sec = sh.sh_info; /* points to other section */
 
                        /* Only do relo for section with exec instructions */
-                       if (!section_have_execinstr(obj, sec)) {
+                       if (!section_have_execinstr(obj, sec) &&
+                           strcmp(name, ".rel" STRUCT_OPS_SEC)) {
                                pr_debug("skip relo %s(%d) for section(%d)\n",
                                         name, idx, sec);
                                continue;
@@ -2599,8 +3025,12 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
        __u32 key_type_id = 0, value_type_id = 0;
        int ret;
 
-       /* if it's BTF-defined map, we don't need to search for type IDs */
-       if (map->sec_idx == obj->efile.btf_maps_shndx)
+       /* if it's BTF-defined map, we don't need to search for type IDs.
+        * For struct_ops map, it does not need btf_key_type_id and
+        * btf_value_type_id.
+        */
+       if (map->sec_idx == obj->efile.btf_maps_shndx ||
+           bpf_map__is_struct_ops(map))
                return 0;
 
        if (!bpf_map__is_internal(map)) {
@@ -2804,6 +3234,32 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)
        return 0;
 }
 
+static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
+{
+       static const char strs[] = "\0int\0x\0a";
+       /* static void x(int a) {} */
+       __u32 types[] = {
+               /* int */
+               BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+               /* FUNC_PROTO */                                /* [2] */
+               BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+               BTF_PARAM_ENC(7, 1),
+               /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
+               BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+       };
+       int btf_fd;
+
+       btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
+                                     strs, sizeof(strs));
+       if (btf_fd >= 0) {
+               obj->caps.btf_func_global = 1;
+               close(btf_fd);
+               return 1;
+       }
+
+       return 0;
+}
+
 static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
 {
        static const char strs[] = "\0x\0.data";
@@ -2859,6 +3315,7 @@ bpf_object__probe_caps(struct bpf_object *obj)
                bpf_object__probe_name,
                bpf_object__probe_global_data,
                bpf_object__probe_btf_func,
+               bpf_object__probe_btf_func_global,
                bpf_object__probe_btf_datasec,
                bpf_object__probe_array_mmap,
        };
@@ -3025,6 +3482,9 @@ bpf_object__create_maps(struct bpf_object *obj)
                if (bpf_map_type__is_map_in_map(def->type) &&
                    map->inner_map_fd >= 0)
                        create_attr.inner_map_fd = map->inner_map_fd;
+               if (bpf_map__is_struct_ops(map))
+                       create_attr.btf_vmlinux_value_type_id =
+                               map->btf_vmlinux_value_type_id;
 
                if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
                        create_attr.btf_fd = btf__fd(obj->btf);
@@ -3860,92 +4320,6 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
        return 0;
 }
 
-static struct btf *btf_load_raw(const char *path)
-{
-       struct btf *btf;
-       size_t read_cnt;
-       struct stat st;
-       void *data;
-       FILE *f;
-
-       if (stat(path, &st))
-               return ERR_PTR(-errno);
-
-       data = malloc(st.st_size);
-       if (!data)
-               return ERR_PTR(-ENOMEM);
-
-       f = fopen(path, "rb");
-       if (!f) {
-               btf = ERR_PTR(-errno);
-               goto cleanup;
-       }
-
-       read_cnt = fread(data, 1, st.st_size, f);
-       fclose(f);
-       if (read_cnt < st.st_size) {
-               btf = ERR_PTR(-EBADF);
-               goto cleanup;
-       }
-
-       btf = btf__new(data, read_cnt);
-
-cleanup:
-       free(data);
-       return btf;
-}
-
-/*
- * Probe few well-known locations for vmlinux kernel image and try to load BTF
- * data out of it to use for target BTF.
- */
-static struct btf *bpf_core_find_kernel_btf(void)
-{
-       struct {
-               const char *path_fmt;
-               bool raw_btf;
-       } locations[] = {
-               /* try canonical vmlinux BTF through sysfs first */
-               { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
-               /* fall back to trying to find vmlinux ELF on disk otherwise */
-               { "/boot/vmlinux-%1$s" },
-               { "/lib/modules/%1$s/vmlinux-%1$s" },
-               { "/lib/modules/%1$s/build/vmlinux" },
-               { "/usr/lib/modules/%1$s/kernel/vmlinux" },
-               { "/usr/lib/debug/boot/vmlinux-%1$s" },
-               { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
-               { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
-       };
-       char path[PATH_MAX + 1];
-       struct utsname buf;
-       struct btf *btf;
-       int i;
-
-       uname(&buf);
-
-       for (i = 0; i < ARRAY_SIZE(locations); i++) {
-               snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
-
-               if (access(path, R_OK))
-                       continue;
-
-               if (locations[i].raw_btf)
-                       btf = btf_load_raw(path);
-               else
-                       btf = btf__parse_elf(path, NULL);
-
-               pr_debug("loading kernel BTF '%s': %ld\n",
-                        path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
-               if (IS_ERR(btf))
-                       continue;
-
-               return btf;
-       }
-
-       pr_warn("failed to find valid kernel BTF\n");
-       return ERR_PTR(-ESRCH);
-}
-
 /* Output spec definition in the format:
  * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
  * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
@@ -4180,7 +4554,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
        if (targ_btf_path)
                targ_btf = btf__parse_elf(targ_btf_path, NULL);
        else
-               targ_btf = bpf_core_find_kernel_btf();
+               targ_btf = libbpf_find_kernel_btf();
        if (IS_ERR(targ_btf)) {
                pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
                return PTR_ERR(targ_btf);
@@ -4252,13 +4626,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
        size_t new_cnt;
        int err;
 
-       if (prog->idx == obj->efile.text_shndx) {
-               pr_warn("relo in .text insn %d into off %d (insn #%d)\n",
-                       relo->insn_idx, relo->sym_off, relo->sym_off / 8);
-               return -LIBBPF_ERRNO__RELOC;
-       }
-
-       if (prog->main_prog_cnt == 0) {
+       if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) {
                text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
                if (!text) {
                        pr_warn("no .text section found yet relo into text exist\n");
@@ -4288,6 +4656,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
                         text->insns_cnt, text->section_name,
                         prog->section_name);
        }
+
        insn = &prog->insns[relo->insn_idx];
        insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
        return 0;
@@ -4367,8 +4736,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
                        return err;
                }
        }
+       /* ensure .text is relocated first, as it's going to be copied as-is
+        * later for sub-program calls
+        */
+       for (i = 0; i < obj->nr_programs; i++) {
+               prog = &obj->programs[i];
+               if (prog->idx != obj->efile.text_shndx)
+                       continue;
+
+               err = bpf_program__relocate(prog, obj);
+               if (err) {
+                       pr_warn("failed to relocate '%s'\n", prog->section_name);
+                       return err;
+               }
+               break;
+       }
+       /* now relocate everything but .text, which by now is relocated
+        * properly, so we can copy raw sub-program instructions as is safely
+        */
        for (i = 0; i < obj->nr_programs; i++) {
                prog = &obj->programs[i];
+               if (prog->idx == obj->efile.text_shndx)
+                       continue;
 
                err = bpf_program__relocate(prog, obj);
                if (err) {
@@ -4379,6 +4768,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
        return 0;
 }
 
+static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
+                                                   GElf_Shdr *shdr,
+                                                   Elf_Data *data);
+
 static int bpf_object__collect_reloc(struct bpf_object *obj)
 {
        int i, err;
@@ -4399,6 +4792,15 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
                        return -LIBBPF_ERRNO__INTERNAL;
                }
 
+               if (idx == obj->efile.st_ops_shndx) {
+                       err = bpf_object__collect_struct_ops_map_reloc(obj,
+                                                                      shdr,
+                                                                      data);
+                       if (err)
+                               return err;
+                       continue;
+               }
+
                prog = bpf_object__find_prog_by_idx(obj, idx);
                if (!prog) {
                        pr_warn("relocation failed: no section(%d)\n", idx);
@@ -4433,7 +4835,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
        load_attr.insns = insns;
        load_attr.insns_cnt = insns_cnt;
        load_attr.license = license;
-       if (prog->type == BPF_PROG_TYPE_TRACING) {
+       if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+               load_attr.attach_btf_id = prog->attach_btf_id;
+       } else if (prog->type == BPF_PROG_TYPE_TRACING ||
+                  prog->type == BPF_PROG_TYPE_EXT) {
                load_attr.attach_prog_fd = prog->attach_prog_fd;
                load_attr.attach_btf_id = prog->attach_btf_id;
        } else {
@@ -4508,18 +4913,15 @@ out:
        return ret;
 }
 
-static int libbpf_find_attach_btf_id(const char *name,
-                                    enum bpf_attach_type attach_type,
-                                    __u32 attach_prog_fd);
+static int libbpf_find_attach_btf_id(struct bpf_program *prog);
 
 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
        int err = 0, fd, i, btf_id;
 
-       if (prog->type == BPF_PROG_TYPE_TRACING) {
-               btf_id = libbpf_find_attach_btf_id(prog->section_name,
-                                                  prog->expected_attach_type,
-                                                  prog->attach_prog_fd);
+       if (prog->type == BPF_PROG_TYPE_TRACING ||
+           prog->type == BPF_PROG_TYPE_EXT) {
+               btf_id = libbpf_find_attach_btf_id(prog);
                if (btf_id <= 0)
                        return btf_id;
                prog->attach_btf_id = btf_id;
@@ -4679,6 +5081,9 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
                enum bpf_prog_type prog_type;
                enum bpf_attach_type attach_type;
 
+               if (prog->type != BPF_PROG_TYPE_UNSPEC)
+                       continue;
+
                err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
                                               &attach_type);
                if (err == -ESRCH)
@@ -4689,7 +5094,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 
                bpf_program__set_type(prog, prog_type);
                bpf_program__set_expected_attach_type(prog, attach_type);
-               if (prog_type == BPF_PROG_TYPE_TRACING)
+               if (prog_type == BPF_PROG_TYPE_TRACING ||
+                   prog_type == BPF_PROG_TYPE_EXT)
                        prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
        }
 
@@ -4774,8 +5180,11 @@ int bpf_object__unload(struct bpf_object *obj)
        if (!obj)
                return -EINVAL;
 
-       for (i = 0; i < obj->nr_maps; i++)
+       for (i = 0; i < obj->nr_maps; i++) {
                zclose(obj->maps[i].fd);
+               if (obj->maps[i].st_ops)
+                       zfree(&obj->maps[i].st_ops->kern_vdata);
+       }
 
        for (i = 0; i < obj->nr_programs; i++)
                bpf_program__unload(&obj->programs[i]);
@@ -4891,9 +5300,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
        err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
        err = err ? : bpf_object__sanitize_and_load_btf(obj);
        err = err ? : bpf_object__sanitize_maps(obj);
+       err = err ? : bpf_object__load_vmlinux_btf(obj);
+       err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
        err = err ? : bpf_object__create_maps(obj);
        err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
        err = err ? : bpf_object__load_progs(obj, attr->log_level);
+
+       btf__free(obj->btf_vmlinux);
+       obj->btf_vmlinux = NULL;
+
        if (err)
                goto out;
 
@@ -5478,6 +5893,13 @@ void bpf_object__close(struct bpf_object *obj)
                        map->mmaped = NULL;
                }
 
+               if (map->st_ops) {
+                       zfree(&map->st_ops->data);
+                       zfree(&map->st_ops->progs);
+                       zfree(&map->st_ops->kern_func_off);
+                       zfree(&map->st_ops);
+               }
+
                zfree(&map->name);
                zfree(&map->pin_path);
        }
@@ -5746,6 +6168,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
+BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
+BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
 
 enum bpf_attach_type
 bpf_program__get_expected_attach_type(struct bpf_program *prog)
@@ -5845,6 +6269,9 @@ static const struct bpf_sec_def section_defs[] = {
                .expected_attach_type = BPF_TRACE_FEXIT,
                .is_attach_btf = true,
                .attach_fn = attach_trace),
+       SEC_DEF("freplace/", EXT,
+               .is_attach_btf = true,
+               .attach_fn = attach_trace),
        BPF_PROG_SEC("xdp",                     BPF_PROG_TYPE_XDP),
        BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
        BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
@@ -5899,6 +6326,7 @@ static const struct bpf_sec_def section_defs[] = {
                                                BPF_CGROUP_GETSOCKOPT),
        BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
                                                BPF_CGROUP_SETSOCKOPT),
+       BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
 };
 
 #undef BPF_PROG_SEC_IMPL
@@ -5975,34 +6403,182 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
        return -ESRCH;
 }
 
-#define BTF_PREFIX "btf_trace_"
+static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
+                                                    size_t offset)
+{
+       struct bpf_map *map;
+       size_t i;
+
+       for (i = 0; i < obj->nr_maps; i++) {
+               map = &obj->maps[i];
+               if (!bpf_map__is_struct_ops(map))
+                       continue;
+               if (map->sec_offset <= offset &&
+                   offset - map->sec_offset < map->def.value_size)
+                       return map;
+       }
+
+       return NULL;
+}
+
+/* Collect the reloc from ELF and populate the st_ops->progs[] */
+static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
+                                                   GElf_Shdr *shdr,
+                                                   Elf_Data *data)
+{
+       const struct btf_member *member;
+       struct bpf_struct_ops *st_ops;
+       struct bpf_program *prog;
+       unsigned int shdr_idx;
+       const struct btf *btf;
+       struct bpf_map *map;
+       Elf_Data *symbols;
+       unsigned int moff;
+       const char *name;
+       __u32 member_idx;
+       GElf_Sym sym;
+       GElf_Rel rel;
+       int i, nrels;
+
+       symbols = obj->efile.symbols;
+       btf = obj->btf;
+       nrels = shdr->sh_size / shdr->sh_entsize;
+       for (i = 0; i < nrels; i++) {
+               if (!gelf_getrel(data, i, &rel)) {
+                       pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
+                       return -LIBBPF_ERRNO__FORMAT;
+               }
+
+               if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+                       pr_warn("struct_ops reloc: symbol %zx not found\n",
+                               (size_t)GELF_R_SYM(rel.r_info));
+                       return -LIBBPF_ERRNO__FORMAT;
+               }
+
+               name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+                                 sym.st_name) ? : "<?>";
+               map = find_struct_ops_map_by_offset(obj, rel.r_offset);
+               if (!map) {
+                       pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
+                               (size_t)rel.r_offset);
+                       return -EINVAL;
+               }
+
+               moff = rel.r_offset - map->sec_offset;
+               shdr_idx = sym.st_shndx;
+               st_ops = map->st_ops;
+               pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
+                        map->name,
+                        (long long)(rel.r_info >> 32),
+                        (long long)sym.st_value,
+                        shdr_idx, (size_t)rel.r_offset,
+                        map->sec_offset, sym.st_name, name);
+
+               if (shdr_idx >= SHN_LORESERVE) {
+                       pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
+                               map->name, (size_t)rel.r_offset, shdr_idx);
+                       return -LIBBPF_ERRNO__RELOC;
+               }
+
+               member = find_member_by_offset(st_ops->type, moff * 8);
+               if (!member) {
+                       pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
+                               map->name, moff);
+                       return -EINVAL;
+               }
+               member_idx = member - btf_members(st_ops->type);
+               name = btf__name_by_offset(btf, member->name_off);
+
+               if (!resolve_func_ptr(btf, member->type, NULL)) {
+                       pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
+                               map->name, name);
+                       return -EINVAL;
+               }
+
+               prog = bpf_object__find_prog_by_idx(obj, shdr_idx);
+               if (!prog) {
+                       pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
+                               map->name, shdr_idx, name);
+                       return -EINVAL;
+               }
+
+               if (prog->type == BPF_PROG_TYPE_UNSPEC) {
+                       const struct bpf_sec_def *sec_def;
+
+                       sec_def = find_sec_def(prog->section_name);
+                       if (sec_def &&
+                           sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
+                               /* for pr_warn */
+                               prog->type = sec_def->prog_type;
+                               goto invalid_prog;
+                       }
+
+                       prog->type = BPF_PROG_TYPE_STRUCT_OPS;
+                       prog->attach_btf_id = st_ops->type_id;
+                       prog->expected_attach_type = member_idx;
+               } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
+                          prog->attach_btf_id != st_ops->type_id ||
+                          prog->expected_attach_type != member_idx) {
+                       goto invalid_prog;
+               }
+               st_ops->progs[member_idx] = prog;
+       }
+
+       return 0;
+
+invalid_prog:
+       pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
+               map->name, prog->name, prog->section_name, prog->type,
+               prog->attach_btf_id, prog->expected_attach_type, name);
+       return -EINVAL;
+}
+
+#define BTF_TRACE_PREFIX "btf_trace_"
+#define BTF_MAX_NAME_SIZE 128
+
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
+                                  const char *name, __u32 kind)
+{
+       char btf_type_name[BTF_MAX_NAME_SIZE];
+       int ret;
+
+       ret = snprintf(btf_type_name, sizeof(btf_type_name),
+                      "%s%s", prefix, name);
+       /* snprintf returns the number of characters written excluding the
+        * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+        * indicates truncation.
+        */
+       if (ret < 0 || ret >= sizeof(btf_type_name))
+               return -ENAMETOOLONG;
+       return btf__find_by_name_kind(btf, btf_type_name, kind);
+}
+
+static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
+                                       enum bpf_attach_type attach_type)
+{
+       int err;
+
+       if (attach_type == BPF_TRACE_RAW_TP)
+               err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
+                                             BTF_KIND_TYPEDEF);
+       else
+               err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+
+       return err;
+}
+
 int libbpf_find_vmlinux_btf_id(const char *name,
                               enum bpf_attach_type attach_type)
 {
-       struct btf *btf = bpf_core_find_kernel_btf();
-       char raw_tp_btf[128] = BTF_PREFIX;
-       char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1;
-       const char *btf_name;
-       int err = -EINVAL;
-       __u32 kind;
+       struct btf *btf;
 
+       btf = libbpf_find_kernel_btf();
        if (IS_ERR(btf)) {
                pr_warn("vmlinux BTF is not found\n");
                return -EINVAL;
        }
 
-       if (attach_type == BPF_TRACE_RAW_TP) {
-               /* prepend "btf_trace_" prefix per kernel convention */
-               strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX));
-               btf_name = raw_tp_btf;
-               kind = BTF_KIND_TYPEDEF;
-       } else {
-               btf_name = name;
-               kind = BTF_KIND_FUNC;
-       }
-       err = btf__find_by_name_kind(btf, btf_name, kind);
-       btf__free(btf);
-       return err;
+       return __find_vmlinux_btf_id(btf, name, attach_type);
 }
 
 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -6038,10 +6614,11 @@ out:
        return err;
 }
 
-static int libbpf_find_attach_btf_id(const char *name,
-                                    enum bpf_attach_type attach_type,
-                                    __u32 attach_prog_fd)
+static int libbpf_find_attach_btf_id(struct bpf_program *prog)
 {
+       enum bpf_attach_type attach_type = prog->expected_attach_type;
+       __u32 attach_prog_fd = prog->attach_prog_fd;
+       const char *name = prog->section_name;
        int i, err;
 
        if (!name)
@@ -6056,8 +6633,9 @@ static int libbpf_find_attach_btf_id(const char *name,
                        err = libbpf_find_prog_btf_id(name + section_defs[i].len,
                                                      attach_prog_fd);
                else
-                       err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len,
-                                                        attach_type);
+                       err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
+                                                   name + section_defs[i].len,
+                                                   attach_type);
                if (err <= 0)
                        pr_warn("%s is not found in vmlinux BTF\n", name);
                return err;
@@ -6805,6 +7383,58 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
        return sec_def->attach_fn(sec_def, prog);
 }
 
+static int bpf_link__detach_struct_ops(struct bpf_link *link)
+{
+       struct bpf_link_fd *l = (void *)link;
+       __u32 zero = 0;
+
+       if (bpf_map_delete_elem(l->fd, &zero))
+               return -errno;
+
+       return 0;
+}
+
+struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
+{
+       struct bpf_struct_ops *st_ops;
+       struct bpf_link_fd *link;
+       __u32 i, zero = 0;
+       int err;
+
+       if (!bpf_map__is_struct_ops(map) || map->fd == -1)
+               return ERR_PTR(-EINVAL);
+
+       link = calloc(1, sizeof(*link));
+       if (!link)
+               return ERR_PTR(-EINVAL);
+
+       st_ops = map->st_ops;
+       for (i = 0; i < btf_vlen(st_ops->type); i++) {
+               struct bpf_program *prog = st_ops->progs[i];
+               void *kern_data;
+               int prog_fd;
+
+               if (!prog)
+                       continue;
+
+               prog_fd = bpf_program__fd(prog);
+               kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
+               *(unsigned long *)kern_data = prog_fd;
+       }
+
+       err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
+       if (err) {
+               err = -errno;
+               free(link);
+               return ERR_PTR(err);
+       }
+
+       link->link.detach = bpf_link__detach_struct_ops;
+       link->fd = map->fd;
+
+       return (struct bpf_link *)link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
                           void **copy_mem, size_t *copy_size,
index fe592ef48f1bfaf2dcb2d2bdb3374f543c7aeab2..2a5e3b087002e4b0e83959b9dedda18add97dee7 100644 (file)
@@ -239,6 +239,8 @@ bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_trace(struct bpf_program *prog);
+struct bpf_map;
+LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
 struct bpf_insn;
 
 /*
@@ -315,6 +317,8 @@ LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
 
 LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -335,6 +339,8 @@ LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -354,7 +360,6 @@ struct bpf_map_def {
  * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
  * so no need to worry about a name clash.
  */
-struct bpf_map;
 LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
 
@@ -521,6 +526,7 @@ LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type,
 LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
 LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
                                 enum bpf_prog_type prog_type, __u32 ifindex);
+LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
 
 /*
  * Get bpf_prog_info in continuous memory
index e9713a57424325c6e69092e0390c42f89bd17f45..b035122142bb9f372eee148856a4f5edde0b2793 100644 (file)
@@ -213,14 +213,25 @@ LIBBPF_0.0.7 {
        global:
                btf_dump__emit_type_decl;
                bpf_link__disconnect;
+               bpf_map__attach_struct_ops;
+               bpf_map_delete_batch;
+               bpf_map_lookup_and_delete_batch;
+               bpf_map_lookup_batch;
+               bpf_map_update_batch;
                bpf_object__find_program_by_name;
                bpf_object__attach_skeleton;
                bpf_object__destroy_skeleton;
                bpf_object__detach_skeleton;
                bpf_object__load_skeleton;
                bpf_object__open_skeleton;
+               bpf_probe_large_insn_limit;
                bpf_prog_attach_xattr;
                bpf_program__attach;
                bpf_program__name;
+               bpf_program__is_extension;
+               bpf_program__is_struct_ops;
+               bpf_program__set_extension;
+               bpf_program__set_struct_ops;
                btf__align_of;
+               libbpf_find_kernel_btf;
 } LIBBPF_0.0.6;
index 4343e40588c6688419dfea2012a1b14a02dfc185..0afb51f7a91941dbd63e634905d498841c2283b7 100644 (file)
@@ -13,6 +13,9 @@
 
 #include "libbpf.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 #define ERRNO_OFFSET(e)                ((e) - __LIBBPF_ERRNO__START)
 #define ERRCODE_OFFSET(c)      ERRNO_OFFSET(LIBBPF_ERRNO__##c)
 #define NR_ERRNO       (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
index a9eb8b322671fb7ab1b9bb459e5419c799dfd008..b782ebef6ac9265269ab47db41b3584b1ef326ba 100644 (file)
@@ -17,6 +17,9 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 static bool grep(const char *buffer, const char *pattern)
 {
        return !!strstr(buffer, pattern);
@@ -103,6 +106,8 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
        case BPF_PROG_TYPE_CGROUP_SYSCTL:
        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
        case BPF_PROG_TYPE_TRACING:
+       case BPF_PROG_TYPE_STRUCT_OPS:
+       case BPF_PROG_TYPE_EXT:
        default:
                break;
        }
@@ -251,6 +256,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
        case BPF_MAP_TYPE_XSKMAP:
        case BPF_MAP_TYPE_SOCKHASH:
        case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+       case BPF_MAP_TYPE_STRUCT_OPS:
        default:
                break;
        }
@@ -321,3 +327,24 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
 
        return res;
 }
+
+/*
+ * Probe for availability of kernel commit (5.3):
+ *
+ * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
+ */
+bool bpf_probe_large_insn_limit(__u32 ifindex)
+{
+       struct bpf_insn insns[BPF_MAXINSNS + 1];
+       int i;
+
+       for (i = 0; i < BPF_MAXINSNS; i++)
+               insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
+       insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
+
+       errno = 0;
+       probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
+                  ifindex);
+
+       return errno != E2BIG && errno != EINVAL;
+}
index 5065c1aa1061a3ccbc8799fab4eec5188f0eef6f..431bd25c6cdb5abb036860c972eb348acc2d77a4 100644 (file)
@@ -15,6 +15,9 @@
 #include "libbpf_internal.h"
 #include "nlattr.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 #ifndef SOL_NETLINK
 #define SOL_NETLINK 270
 #endif
index 8db44bbfc66ddb7f02a062cc7a6f589577e429ba..0ad41dfea8eb2b9152dd9ecb4eeb80cde791a064 100644 (file)
@@ -13,6 +13,9 @@
 #include <string.h>
 #include <stdio.h>
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
        [LIBBPF_NLA_U8]         = sizeof(uint8_t),
        [LIBBPF_NLA_U16]        = sizeof(uint16_t),
index b8064eedc177006ee0d8d9bba2fd0b91051ae8d1..146da01979c7fc52b3c5dae00909b5d53663d04c 100644 (file)
@@ -4,6 +4,9 @@
 #include <stdio.h>
 #include "str_error.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 /*
  * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
  * libc, while checking strerror_r() return to avoid having to check this in
index 8e0ffa800a71362fb6e93b4d9804cae20dfb79f7..9807903f121e8e92fb0718270ad162a416b7c940 100644 (file)
@@ -32,6 +32,9 @@
 #include "libbpf_internal.h"
 #include "xsk.h"
 
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 #ifndef SOL_XDP
  #define SOL_XDP 283
 #endif
index b9c203219691a4758eff6578bd00ee7c09bec10b..49f4f84da48593b0b43030f846d75857972e9eab 100644 (file)
@@ -39,7 +39,7 @@
    Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
 */
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
 {
index 3776d26db9e79ed2644506c4a932a0ccd69f3e8b..7d7fb0c9fe76b79ced9f0069616362588d7e8530 100644 (file)
@@ -1,3 +1,3 @@
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 license(GPL);
index 9cd124b09392c45a60e547988014b1d65b2feaf6..c4481c390d232d7582bd182392aaa3152383e9f0 100644 (file)
@@ -14,7 +14,7 @@
  * the return value.
  */
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 struct syscall_enter_openat_args {
        unsigned long long unused;
index 6e61c4bdf54826ce6538c98984f2751d6ddff755..607189a315b2cbd32f36878d0fcdfb97b1001685 100644 (file)
@@ -3,7 +3,7 @@
 #ifndef _PERF_BPF_PID_FILTER_
 #define _PERF_BPF_PID_FILTER_
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 #define pid_filter(name) pid_map(name, bool)
 
index 316af5b2ff3516b3aba5365423c69e24f3f5dee5..7ca6fa5463eea9faebb2dda6e561e98654da9692 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 struct bpf_map SEC("maps") __bpf_stdout__ = {
        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
index ca7877f9a976fbcaf55beb5196eedcc8eed9351b..d1a35b6c649dc7b860ccb661f3e71ba0b595ce69 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: LGPL-2.1
 
-#include <bpf.h>
+#include <bpf/bpf.h>
 
 static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
 
index 301ac12d5d6929312d2cb86feda66d29299792a4..ec464859c6b61e4fe8926790cf7d6ee255f1bee1 100644 (file)
@@ -22,17 +22,13 @@ get_cgroup_id_user
 test_skb_cgroup_id_user
 test_socket_cookie
 test_cgroup_storage
-test_select_reuseport
 test_flow_dissector
 flow_dissector_load
 test_netcnt
-test_section_names
 test_tcpnotify_user
 test_libbpf
 test_tcp_check_syncookie_user
 test_sysctl
-libbpf.pc
-libbpf.so.*
 test_hashmap
 test_btf_dump
 xdping
@@ -41,4 +37,4 @@ test_cpp
 /no_alu32
 /bpf_gcc
 /tools
-bpf_helper_defs.h
+
index f1f949cd8ed9af2904f64ac2e21709fda37c5297..5f41f5bd8033fa22449e3e1c62e1d1ccbbc85749 100644 (file)
@@ -20,8 +20,8 @@ CLANG         ?= clang
 LLC            ?= llc
 LLVM_OBJCOPY   ?= llvm-objcopy
 BPF_GCC                ?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \
-         -I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR)                      \
+CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)             \
+         -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)     \
          -Dbpf_prog_load=bpf_prog_test_load                            \
          -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lz -lrt -lpthread
@@ -73,7 +73,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
        flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-       test_lirc_mode2_user xdping test_cpp
+       test_lirc_mode2_user xdping test_cpp runqslower
 
 TEST_CUSTOM_PROGS = urandom_read
 
@@ -83,20 +83,29 @@ TEST_CUSTOM_PROGS = urandom_read
 # $3 - target (assumed to be file); only file name will be emitted;
 # $4 - optional extra arg, emitted as-is, if provided.
 ifeq ($(V),1)
+Q =
 msg =
 else
-msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4))
+Q = @
+msg = @printf '  %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+submake_extras := feature_display=0
 endif
 
 # override lib.mk's default rules
 OVERRIDE_TARGETS := 1
 override define CLEAN
-       $(call msg,    CLEAN)
+       $(call msg,CLEAN)
        $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 endef
 
 include ../lib.mk
 
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+INCLUDE_DIR := $(SCRATCH_DIR)/include
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
 # to build individual tests.
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
@@ -108,18 +117,25 @@ $(notdir $(TEST_GEN_PROGS)                                                \
         $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
 
 $(OUTPUT)/%:%.c
-       $(call msg,     BINARY,,$@)
+       $(call msg,BINARY,,$@)
        $(LINK.c) $^ $(LDLIBS) -o $@
 
 $(OUTPUT)/urandom_read: urandom_read.c
-       $(call msg,     BINARY,,$@)
-       $(CC) -o $@ $< -Wl,--build-id
+       $(call msg,BINARY,,$@)
+       $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
 
-$(OUTPUT)/test_stub.o: test_stub.c
-       $(call msg,         CC,,$@)
+$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
+       $(call msg,CC,,$@)
        $(CC) -c $(CFLAGS) -o $@ $<
 
-BPFOBJ := $(OUTPUT)/libbpf.a
+VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux)                    \
+                              /sys/kernel/btf/vmlinux                  \
+                              /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))
+$(OUTPUT)/runqslower: $(BPFOBJ)
+       $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower     \
+                   OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF)   \
+                   BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
 
@@ -137,23 +153,22 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 
-.PHONY: force
-
-# force a rebuild of BPFOBJ when its dependencies are updated
-force:
-
-DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool
+DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
+$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BUILD_DIR)/bpftool
+       $(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)                 \
+                   OUTPUT=$(BUILD_DIR)/bpftool/                        \
+                   prefix= DESTDIR=$(SCRATCH_DIR)/ install
 
-$(DEFAULT_BPFTOOL): force
-       $(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install
-
-$(BPFOBJ): force
-       $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.c $(BPFDIR)/*.h $(BPFDIR)/Makefile)          \
+          ../../../include/uapi/linux/bpf.h                                   \
+          | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
+       $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+               DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
 
-BPF_HELPERS := $(OUTPUT)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h)
-$(OUTPUT)/bpf_helper_defs.h:
-       $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h
+$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
+       $(call msg,MKDIR,,$@)
+       mkdir -p $@
 
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,
@@ -173,8 +188,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
 BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)                  \
-            -I. -I./include/uapi -I$(APIDIR)                           \
-            -I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include)
+            -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi      \
+            -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include)
 
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
               -Wno-compare-distinct-pointer-types
@@ -190,28 +205,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
 # $3 - CFLAGS
 # $4 - LDFLAGS
 define CLANG_BPF_BUILD_RULE
-       $(call msg,  CLANG-LLC,$(TRUNNER_BINARY),$2)
+       $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
        ($(CLANG) $3 -O2 -target bpf -emit-llvm                         \
                -c $1 -o - || echo "BPF obj compilation failed") |      \
        $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
 define CLANG_NOALU32_BPF_BUILD_RULE
-       $(call msg,  CLANG-LLC,$(TRUNNER_BINARY),$2)
+       $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
        ($(CLANG) $3 -O2 -target bpf -emit-llvm                         \
                -c $1 -o - || echo "BPF obj compilation failed") |      \
        $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
 define CLANG_NATIVE_BPF_BUILD_RULE
-       $(call msg,  CLANG-BPF,$(TRUNNER_BINARY),$2)
+       $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
        ($(CLANG) $3 -O2 -emit-llvm                                     \
                -c $1 -o - || echo "BPF obj compilation failed") |      \
        $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
 endef
 # Build BPF object using GCC
 define GCC_BPF_BUILD_RULE
-       $(call msg,    GCC-BPF,$(TRUNNER_BINARY),$2)
+       $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
        $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
 endef
 
@@ -252,6 +267,7 @@ define DEFINE_TEST_RUNNER_RULES
 ifeq ($($(TRUNNER_OUTPUT)-dir),)
 $(TRUNNER_OUTPUT)-dir := y
 $(TRUNNER_OUTPUT):
+       $$(call msg,MKDIR,,$$@)
        mkdir -p $$@
 endif
 
@@ -262,7 +278,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
 $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:                            \
                     $(TRUNNER_BPF_PROGS_DIR)/%.c                       \
                     $(TRUNNER_BPF_PROGS_DIR)/*.h                       \
-                    $$(BPF_HELPERS) | $(TRUNNER_OUTPUT)
+                    $$(BPFOBJ) | $(TRUNNER_OUTPUT)
        $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,                      \
                                          $(TRUNNER_BPF_CFLAGS),        \
                                          $(TRUNNER_BPF_LDFLAGS))
@@ -270,7 +286,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:                         \
 $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:                      \
                      $(TRUNNER_OUTPUT)/%.o                             \
                      | $(BPFTOOL) $(TRUNNER_OUTPUT)
-       $$(call msg,   GEN-SKEL,$(TRUNNER_BINARY),$$@)
+       $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
        $$(BPFTOOL) gen skeleton $$< > $$@
 endif
 
@@ -278,7 +294,7 @@ endif
 ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
 $(TRUNNER_TESTS_DIR)-tests-hdr := y
 $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
-       $$(call msg,   TEST-HDR,$(TRUNNER_BINARY),$$@)
+       $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@)
        $$(shell ( cd $(TRUNNER_TESTS_DIR);                             \
                  echo '/* Generated header, do not edit */';           \
                  ls *.c 2> /dev/null |                                 \
@@ -294,7 +310,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:                   \
                      $(TRUNNER_BPF_OBJS)                               \
                      $(TRUNNER_BPF_SKELS)                              \
                      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
-       $$(call msg,   TEST-OBJ,$(TRUNNER_BINARY),$$@)
+       $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
        cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
 
 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:                          \
@@ -302,20 +318,20 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:                             \
                       $(TRUNNER_EXTRA_HDRS)                            \
                       $(TRUNNER_TESTS_HDR)                             \
                       $$(BPFOBJ) | $(TRUNNER_OUTPUT)
-       $$(call msg,  EXTRA-OBJ,$(TRUNNER_BINARY),$$@)
+       $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
        $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
 
 # only copy extra resources if in flavored build
 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
 ifneq ($2,)
-       $$(call msg,  EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
+       $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
        cp -a $$^ $(TRUNNER_OUTPUT)/
 endif
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)                      \
                             $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ)           \
                             | $(TRUNNER_BINARY)-extras
-       $$(call msg,     BINARY,,$$@)
+       $$(call msg,BINARY,,$$@)
        $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
 
 endef
@@ -328,7 +344,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c      \
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read                          \
                       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := -I. -I$(OUTPUT) $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
 TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
@@ -367,15 +383,15 @@ verifier/tests.h: verifier/*.c
                  echo '#endif' \
                ) > verifier/tests.h)
 $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
-       $(call msg,     BINARY,,$@)
+       $(call msg,BINARY,,$@)
        $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
 # Make sure we are able to include and link libbpf against c++.
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
-       $(call msg,        CXX,,$@)
+       $(call msg,CXX,,$@)
        $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
 
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)                                    \
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)                     \
        prog_tests/tests.h map_tests/tests.h verifier/tests.h           \
-       feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc      \
-       tools *.skel.h
+       feature                                                         \
+       $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc)
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
new file mode 100644 (file)
index 0000000..8f21965
--- /dev/null
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TCP_HELPERS_H
+#define __BPF_TCP_HELPERS_H
+
+#include <stdbool.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_trace_helpers.h"
+
+#define BPF_STRUCT_OPS(name, args...) \
+SEC("struct_ops/"#name) \
+BPF_PROG(name, args)
+
+#define tcp_jiffies32 ((__u32)bpf_jiffies64())
+
+struct sock_common {
+       unsigned char   skc_state;
+} __attribute__((preserve_access_index));
+
+enum sk_pacing {
+       SK_PACING_NONE          = 0,
+       SK_PACING_NEEDED        = 1,
+       SK_PACING_FQ            = 2,
+};
+
+struct sock {
+       struct sock_common      __sk_common;
+       unsigned long           sk_pacing_rate;
+       __u32                   sk_pacing_status; /* see enum sk_pacing */
+} __attribute__((preserve_access_index));
+
+struct inet_sock {
+       struct sock             sk;
+} __attribute__((preserve_access_index));
+
+struct inet_connection_sock {
+       struct inet_sock          icsk_inet;
+       __u8                      icsk_ca_state:6,
+                                 icsk_ca_setsockopt:1,
+                                 icsk_ca_dst_locked:1;
+       struct {
+               __u8              pending;
+       } icsk_ack;
+       __u64                     icsk_ca_priv[104 / sizeof(__u64)];
+} __attribute__((preserve_access_index));
+
+struct tcp_sock {
+       struct inet_connection_sock     inet_conn;
+
+       __u32   rcv_nxt;
+       __u32   snd_nxt;
+       __u32   snd_una;
+       __u8    ecn_flags;
+       __u32   delivered;
+       __u32   delivered_ce;
+       __u32   snd_cwnd;
+       __u32   snd_cwnd_cnt;
+       __u32   snd_cwnd_clamp;
+       __u32   snd_ssthresh;
+       __u8    syn_data:1,     /* SYN includes data */
+               syn_fastopen:1, /* SYN includes Fast Open option */
+               syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
+               syn_fastopen_ch:1, /* Active TFO re-enabling probe */
+               syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+               save_syn:1,     /* Save headers of SYN packet */
+               is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
+               syn_smc:1;      /* SYN includes SMC */
+       __u32   max_packets_out;
+       __u32   lsndtime;
+       __u32   prior_cwnd;
+       __u64   tcp_mstamp;     /* most recent packet received/sent */
+} __attribute__((preserve_access_index));
+
+static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
+{
+       return (struct inet_connection_sock *)sk;
+}
+
+static __always_inline void *inet_csk_ca(const struct sock *sk)
+{
+       return (void *)inet_csk(sk)->icsk_ca_priv;
+}
+
+static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+       return (struct tcp_sock *)sk;
+}
+
+static __always_inline bool before(__u32 seq1, __u32 seq2)
+{
+       return (__s32)(seq1-seq2) < 0;
+}
+#define after(seq2, seq1)      before(seq1, seq2)
+
+#define        TCP_ECN_OK              1
+#define        TCP_ECN_QUEUE_CWR       2
+#define        TCP_ECN_DEMAND_CWR      4
+#define        TCP_ECN_SEEN            8
+
+enum inet_csk_ack_state_t {
+       ICSK_ACK_SCHED  = 1,
+       ICSK_ACK_TIMER  = 2,
+       ICSK_ACK_PUSHED = 4,
+       ICSK_ACK_PUSHED2 = 8,
+       ICSK_ACK_NOW = 16       /* Send the next ACK immediately (once) */
+};
+
+enum tcp_ca_event {
+       CA_EVENT_TX_START = 0,
+       CA_EVENT_CWND_RESTART = 1,
+       CA_EVENT_COMPLETE_CWR = 2,
+       CA_EVENT_LOSS = 3,
+       CA_EVENT_ECN_NO_CE = 4,
+       CA_EVENT_ECN_IS_CE = 5,
+};
+
+enum tcp_ca_state {
+       TCP_CA_Open = 0,
+       TCP_CA_Disorder = 1,
+       TCP_CA_CWR = 2,
+       TCP_CA_Recovery = 3,
+       TCP_CA_Loss = 4
+};
+
+struct ack_sample {
+       __u32 pkts_acked;
+       __s32 rtt_us;
+       __u32 in_flight;
+} __attribute__((preserve_access_index));
+
+struct rate_sample {
+       __u64  prior_mstamp; /* starting timestamp for interval */
+       __u32  prior_delivered; /* tp->delivered at "prior_mstamp" */
+       __s32  delivered;               /* number of packets delivered over interval */
+       long interval_us;       /* time for tp->delivered to incr "delivered" */
+       __u32 snd_interval_us;  /* snd interval for delivered packets */
+       __u32 rcv_interval_us;  /* rcv interval for delivered packets */
+       long rtt_us;            /* RTT of last (S)ACKed packet (or -1) */
+       int  losses;            /* number of packets marked lost upon ACK */
+       __u32  acked_sacked;    /* number of packets newly (S)ACKed upon ACK */
+       __u32  prior_in_flight; /* in flight before this ACK */
+       bool is_app_limited;    /* is sample from packet with bubble in pipe? */
+       bool is_retrans;        /* is sample from retransmission? */
+       bool is_ack_delayed;    /* is this (likely) a delayed ACK? */
+} __attribute__((preserve_access_index));
+
+#define TCP_CA_NAME_MAX                16
+#define TCP_CONG_NEEDS_ECN     0x2
+
+struct tcp_congestion_ops {
+       char name[TCP_CA_NAME_MAX];
+       __u32 flags;
+
+       /* initialize private data (optional) */
+       void (*init)(struct sock *sk);
+       /* cleanup private data  (optional) */
+       void (*release)(struct sock *sk);
+
+       /* return slow start threshold (required) */
+       __u32 (*ssthresh)(struct sock *sk);
+       /* do new cwnd calculation (required) */
+       void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
+       /* call before changing ca_state (optional) */
+       void (*set_state)(struct sock *sk, __u8 new_state);
+       /* call when cwnd event occurs (optional) */
+       void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+       /* call when ack arrives (optional) */
+       void (*in_ack_event)(struct sock *sk, __u32 flags);
+       /* new value of cwnd after loss (required) */
+       __u32  (*undo_cwnd)(struct sock *sk);
+       /* hook for packet ack accounting (optional) */
+       void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+       /* override sysctl_tcp_min_tso_segs */
+       __u32 (*min_tso_segs)(struct sock *sk);
+       /* returns the multiplier used in tcp_sndbuf_expand (optional) */
+       __u32 (*sndbuf_expand)(struct sock *sk);
+       /* call when packets are delivered to update cwnd and pacing rate,
+        * after all the ca_state processing. (optional)
+        */
+       void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+};
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min_not_zero(x, y) ({                  \
+       typeof(x) __x = (x);                    \
+       typeof(y) __y = (y);                    \
+       __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
+static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
+{
+       __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
+
+       acked -= cwnd - tp->snd_cwnd;
+       tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+
+       return acked;
+}
+
+static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+       return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
+static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+
+       /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+       if (tcp_in_slow_start(tp))
+               return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+       return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
+}
+
+static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
+{
+       /* If credits accumulated at a higher w, apply them gently now. */
+       if (tp->snd_cwnd_cnt >= w) {
+               tp->snd_cwnd_cnt = 0;
+               tp->snd_cwnd++;
+       }
+
+       tp->snd_cwnd_cnt += acked;
+       if (tp->snd_cwnd_cnt >= w) {
+               __u32 delta = tp->snd_cwnd_cnt / w;
+
+               tp->snd_cwnd_cnt -= delta * w;
+               tp->snd_cwnd += delta;
+       }
+       tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+}
+
+#endif
index c76a214a53b0504a03b22e1595f4b02d4dcd1236..c6f1354d93fb112cc987d8e9171e7406f8efa334 100644 (file)
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 #ifndef __BPF_TRACE_HELPERS_H
 #define __BPF_TRACE_HELPERS_H
 
-#include "bpf_helpers.h"
-
-#define __BPF_MAP_0(i, m, v, ...) v
-#define __BPF_MAP_1(i, m, v, t, a, ...) m(t, a, ctx[i])
-#define __BPF_MAP_2(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_1(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_3(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_2(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_4(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_3(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_5(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_4(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_6(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_5(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_7(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_6(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_8(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_7(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_9(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_8(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_10(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_9(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_11(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_10(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP_12(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_11(i+1, m, v, __VA_ARGS__)
-#define __BPF_MAP(n, ...) __BPF_MAP_##n(0, __VA_ARGS__)
-
-/* BPF sizeof(void *) is always 8, so no need to cast to long first
- * for ptr to avoid compiler warning.
+#include <bpf/bpf_helpers.h>
+
+#define ___bpf_concat(a, b) a ## b
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#define ___bpf_narg(...) \
+       ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_empty(...) \
+       ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
+#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
+#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
+#define ___bpf_ctx_cast(args...) \
+       ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+
+/*
+ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
+ * similar kinds of BPF programs, that accept input arguments as a single
+ * pointer to untyped u64 array, where each u64 can actually be a typed
+ * pointer or integer of different size. Instead of requring user to write
+ * manual casts and work with array elements by index, BPF_PROG macro
+ * allows user to declare a list of named and typed input arguments in the
+ * same syntax as for normal C function. All the casting is hidden and
+ * performed transparently, while user code can just assume working with
+ * function arguments of specified type and name.
+ *
+ * Original raw context argument is preserved as well as 'ctx' argument.
+ * This is useful when using BPF helpers that expect original context
+ * as one of the parameters (e.g., for bpf_perf_event_output()).
  */
-#define __BPF_CAST(t, a, ctx) (t) ctx
-#define __BPF_V void
-#define __BPF_N
-
-#define __BPF_DECL_ARGS(t, a, ctx) t a
-
-#define BPF_TRACE_x(x, sec_name, fname, ret_type, ...)                 \
-static __always_inline ret_type                                                \
-____##fname(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));      \
-                                                                       \
-SEC(sec_name)                                                          \
-ret_type fname(__u64 *ctx)                                             \
-{                                                                      \
-       return ____##fname(__BPF_MAP(x, __BPF_CAST, __BPF_N, __VA_ARGS__));\
-}                                                                      \
-                                                                       \
-static __always_inline                                                 \
-ret_type ____##fname(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
-
-#define BPF_TRACE_0(sec, fname, ...)  BPF_TRACE_x(0, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_1(sec, fname, ...)  BPF_TRACE_x(1, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_2(sec, fname, ...)  BPF_TRACE_x(2, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_3(sec, fname, ...)  BPF_TRACE_x(3, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_4(sec, fname, ...)  BPF_TRACE_x(4, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_5(sec, fname, ...)  BPF_TRACE_x(5, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_6(sec, fname, ...)  BPF_TRACE_x(6, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_7(sec, fname, ...)  BPF_TRACE_x(7, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_8(sec, fname, ...)  BPF_TRACE_x(8, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_9(sec, fname, ...)  BPF_TRACE_x(9, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_10(sec, fname, ...)  BPF_TRACE_x(10, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_11(sec, fname, ...)  BPF_TRACE_x(11, sec, fname, int, __VA_ARGS__)
-#define BPF_TRACE_12(sec, fname, ...)  BPF_TRACE_x(12, sec, fname, int, __VA_ARGS__)
+#define BPF_PROG(name, args...)                                                    \
+name(unsigned long long *ctx);                                             \
+static __always_inline typeof(name(0))                                     \
+____##name(unsigned long long *ctx, ##args);                               \
+typeof(name(0)) name(unsigned long long *ctx)                              \
+{                                                                          \
+       _Pragma("GCC diagnostic push")                                      \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")              \
+       return ____##name(___bpf_ctx_cast(args));                           \
+       _Pragma("GCC diagnostic pop")                                       \
+}                                                                          \
+static __always_inline typeof(name(0))                                     \
+____##name(unsigned long long *ctx, ##args)
+
+struct pt_regs;
+
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) \
+       ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) \
+       ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) \
+       ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) \
+       ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) \
+       ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) \
+       ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
 
+/*
+ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
+ * low-level way of getting kprobe input arguments from struct pt_regs, and
+ * provides a familiar typed and named function arguments syntax and
+ * semantics of accessing kprobe input paremeters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE(name, args...)                                          \
+name(struct pt_regs *ctx);                                                 \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
+typeof(name(0)) name(struct pt_regs *ctx)                                  \
+{                                                                          \
+       _Pragma("GCC diagnostic push")                                      \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")              \
+       return ____##name(___bpf_kprobe_args(args));                        \
+       _Pragma("GCC diagnostic pop")                                       \
+}                                                                          \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_argsN(x, args...) \
+       ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx)
+#define ___bpf_kretprobe_args(args...) \
+       ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args)
+
+/*
+ * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all
+ * input kprobe arguments, one last extra argument has to be specified, which
+ * captures kprobe return value.
+ */
+#define BPF_KRETPROBE(name, args...)                                       \
+name(struct pt_regs *ctx);                                                 \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
+typeof(name(0)) name(struct pt_regs *ctx)                                  \
+{                                                                          \
+       _Pragma("GCC diagnostic push")                                      \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")              \
+       return ____##name(___bpf_kretprobe_args(args));                     \
+       _Pragma("GCC diagnostic pop")                                       \
+}                                                                          \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 #endif
index ec219f84e0415715c0d572e209f39f706587d0b5..a3352a64c067fc50fbe6ef19d532aa3382babd99 100644 (file)
@@ -6,7 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
-#include <libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
 
 static inline unsigned int bpf_num_possible_cpus(void)
 {
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
new file mode 100644 (file)
index 0000000..f0a64d8
--- /dev/null
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
+                            int *values)
+{
+       int i, err;
+       DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+               .elem_flags = 0,
+               .flags = 0,
+       );
+
+       for (i = 0; i < max_entries; i++) {
+               keys[i] = i;
+               values[i] = i + 1;
+       }
+
+       err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+       CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+                            int *keys, int *values)
+{
+       int i;
+
+       memset(visited, 0, max_entries * sizeof(*visited));
+       for (i = 0; i < max_entries; i++) {
+               CHECK(keys[i] + 1 != values[i], "key/value checking",
+                     "error: i %d key %d value %d\n", i, keys[i], values[i]);
+               visited[i] = 1;
+       }
+       for (i = 0; i < max_entries; i++) {
+               CHECK(visited[i] != 1, "visited checking",
+                     "error: keys array at index %d missing\n", i);
+       }
+}
+
+void test_array_map_batch_ops(void)
+{
+       struct bpf_create_map_attr xattr = {
+               .name = "array_map",
+               .map_type = BPF_MAP_TYPE_ARRAY,
+               .key_size = sizeof(int),
+               .value_size = sizeof(int),
+       };
+       int map_fd, *keys, *values, *visited;
+       __u32 count, total, total_success;
+       const __u32 max_entries = 10;
+       bool nospace_err;
+       __u64 batch = 0;
+       int err, step;
+       DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+               .elem_flags = 0,
+               .flags = 0,
+       );
+
+       xattr.max_entries = max_entries;
+       map_fd = bpf_create_map_xattr(&xattr);
+       CHECK(map_fd == -1,
+             "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+
+       keys = malloc(max_entries * sizeof(int));
+       values = malloc(max_entries * sizeof(int));
+       visited = malloc(max_entries * sizeof(int));
+       CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+             strerror(errno));
+
+       /* populate elements to the map */
+       map_batch_update(map_fd, max_entries, keys, values);
+
+       /* test 1: lookup in a loop with various steps. */
+       total_success = 0;
+       for (step = 1; step < max_entries; step++) {
+               map_batch_update(map_fd, max_entries, keys, values);
+               map_batch_verify(visited, max_entries, keys, values);
+               memset(keys, 0, max_entries * sizeof(*keys));
+               memset(values, 0, max_entries * sizeof(*values));
+               batch = 0;
+               total = 0;
+               /* iteratively lookup/delete elements with 'step'
+                * elements each.
+                */
+               count = step;
+               nospace_err = false;
+               while (true) {
+                       err = bpf_map_lookup_batch(map_fd,
+                                               total ? &batch : NULL, &batch,
+                                               keys + total,
+                                               values + total,
+                                               &count, &opts);
+
+                       CHECK((err && errno != ENOENT), "lookup with steps",
+                             "error: %s\n", strerror(errno));
+
+                       total += count;
+                       if (err)
+                               break;
+
+               }
+
+               if (nospace_err == true)
+                       continue;
+
+               CHECK(total != max_entries, "lookup with steps",
+                     "total = %u, max_entries = %u\n", total, max_entries);
+
+               map_batch_verify(visited, max_entries, keys, values);
+
+               total_success++;
+       }
+
+       CHECK(total_success == 0, "check total_success",
+             "unexpected failure\n");
+
+       printf("%s:PASS\n", __func__);
+
+       free(keys);
+       free(values);
+       free(visited);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
new file mode 100644 (file)
index 0000000..976bf41
--- /dev/null
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook  */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <bpf_util.h>
+#include <test_maps.h>
+
+static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
+                            void *values, bool is_pcpu)
+{
+       typedef BPF_DECLARE_PERCPU(int, value);
+       value *v = NULL;
+       int i, j, err;
+       DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+               .elem_flags = 0,
+               .flags = 0,
+       );
+
+       if (is_pcpu)
+               v = (value *)values;
+
+       for (i = 0; i < max_entries; i++) {
+               keys[i] = i + 1;
+               if (is_pcpu)
+                       for (j = 0; j < bpf_num_possible_cpus(); j++)
+                               bpf_percpu(v[i], j) = i + 2 + j;
+               else
+                       ((int *)values)[i] = i + 2;
+       }
+
+       err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+       CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+                            int *keys, void *values, bool is_pcpu)
+{
+       typedef BPF_DECLARE_PERCPU(int, value);
+       value *v = NULL;
+       int i, j;
+
+       if (is_pcpu)
+               v = (value *)values;
+
+       memset(visited, 0, max_entries * sizeof(*visited));
+       for (i = 0; i < max_entries; i++) {
+
+               if (is_pcpu) {
+                       for (j = 0; j < bpf_num_possible_cpus(); j++) {
+                               CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j),
+                                     "key/value checking",
+                                     "error: i %d j %d key %d value %d\n",
+                                     i, j, keys[i], bpf_percpu(v[i],  j));
+                       }
+               } else {
+                       CHECK(keys[i] + 1 != ((int *)values)[i],
+                             "key/value checking",
+                             "error: i %d key %d value %d\n", i, keys[i],
+                             ((int *)values)[i]);
+               }
+
+               visited[i] = 1;
+
+       }
+       for (i = 0; i < max_entries; i++) {
+               CHECK(visited[i] != 1, "visited checking",
+                     "error: keys array at index %d missing\n", i);
+       }
+}
+
+void __test_map_lookup_and_delete_batch(bool is_pcpu)
+{
+       __u32 batch, count, total, total_success;
+       typedef BPF_DECLARE_PERCPU(int, value);
+       int map_fd, *keys, *visited, key;
+       const __u32 max_entries = 10;
+       value pcpu_values[max_entries];
+       int err, step, value_size;
+       bool nospace_err;
+       void *values;
+       struct bpf_create_map_attr xattr = {
+               .name = "hash_map",
+               .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH :
+                           BPF_MAP_TYPE_HASH,
+               .key_size = sizeof(int),
+               .value_size = sizeof(int),
+       };
+       DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+               .elem_flags = 0,
+               .flags = 0,
+       );
+
+       xattr.max_entries = max_entries;
+       map_fd = bpf_create_map_xattr(&xattr);
+       CHECK(map_fd == -1,
+             "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+
+       value_size = is_pcpu ? sizeof(value) : sizeof(int);
+       keys = malloc(max_entries * sizeof(int));
+       if (is_pcpu)
+               values = pcpu_values;
+       else
+               values = malloc(max_entries * sizeof(int));
+       visited = malloc(max_entries * sizeof(int));
+       CHECK(!keys || !values || !visited, "malloc()",
+             "error:%s\n", strerror(errno));
+
+       /* test 1: lookup/delete an empty hash table, -ENOENT */
+       count = max_entries;
+       err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
+                                             values, &count, &opts);
+       CHECK((err && errno != ENOENT), "empty map",
+             "error: %s\n", strerror(errno));
+
+       /* populate elements to the map */
+       map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+
+       /* test 2: lookup/delete with count = 0, success */
+       count = 0;
+       err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
+                                             values, &count, &opts);
+       CHECK(err, "count = 0", "error: %s\n", strerror(errno));
+
+       /* test 3: lookup/delete with count = max_entries, success */
+       memset(keys, 0, max_entries * sizeof(*keys));
+       memset(values, 0, max_entries * value_size);
+       count = max_entries;
+       err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
+                                             values, &count, &opts);
+       CHECK((err && errno != ENOENT), "count = max_entries",
+              "error: %s\n", strerror(errno));
+       CHECK(count != max_entries, "count = max_entries",
+             "count = %u, max_entries = %u\n", count, max_entries);
+       map_batch_verify(visited, max_entries, keys, values, is_pcpu);
+
+       /* bpf_map_get_next_key() should return -ENOENT for an empty map. */
+       err = bpf_map_get_next_key(map_fd, NULL, &key);
+       CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", strerror(errno));
+
+       /* test 4: lookup/delete in a loop with various steps. */
+       total_success = 0;
+       for (step = 1; step < max_entries; step++) {
+               map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+               memset(keys, 0, max_entries * sizeof(*keys));
+               memset(values, 0, max_entries * value_size);
+               total = 0;
+               /* iteratively lookup/delete elements with 'step'
+                * elements each
+                */
+               count = step;
+               nospace_err = false;
+               while (true) {
+                       err = bpf_map_lookup_batch(map_fd,
+                                                  total ? &batch : NULL,
+                                                  &batch, keys + total,
+                                                  values +
+                                                  total * value_size,
+                                                  &count, &opts);
+                       /* It is possible that we are failing due to buffer size
+                        * not big enough. In such cases, let us just exit and
+                        * go with large steps. Not that a buffer size with
+                        * max_entries should always work.
+                        */
+                       if (err && errno == ENOSPC) {
+                               nospace_err = true;
+                               break;
+                       }
+
+                       CHECK((err && errno != ENOENT), "lookup with steps",
+                             "error: %s\n", strerror(errno));
+
+                       total += count;
+                       if (err)
+                               break;
+
+               }
+               if (nospace_err == true)
+                       continue;
+
+               CHECK(total != max_entries, "lookup with steps",
+                     "total = %u, max_entries = %u\n", total, max_entries);
+               map_batch_verify(visited, max_entries, keys, values, is_pcpu);
+
+               total = 0;
+               count = step;
+               while (total < max_entries) {
+                       if (max_entries - total < step)
+                               count = max_entries - total;
+                       err = bpf_map_delete_batch(map_fd,
+                                                  keys + total,
+                                                  &count, &opts);
+                       CHECK((err && errno != ENOENT), "delete batch",
+                             "error: %s\n", strerror(errno));
+                       total += count;
+                       if (err)
+                               break;
+               }
+               CHECK(total != max_entries, "delete with steps",
+                     "total = %u, max_entries = %u\n", total, max_entries);
+
+               /* check map is empty, errono == ENOENT */
+               err = bpf_map_get_next_key(map_fd, NULL, &key);
+               CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+                     "error: %s\n", strerror(errno));
+
+               /* iteratively lookup/delete elements with 'step'
+                * elements each
+                */
+               map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+               memset(keys, 0, max_entries * sizeof(*keys));
+               memset(values, 0, max_entries * value_size);
+               total = 0;
+               count = step;
+               nospace_err = false;
+               while (true) {
+                       err = bpf_map_lookup_and_delete_batch(map_fd,
+                                                       total ? &batch : NULL,
+                                                       &batch, keys + total,
+                                                       values +
+                                                       total * value_size,
+                                                       &count, &opts);
+                       /* It is possible that we are failing due to buffer size
+                        * not big enough. In such cases, let us just exit and
+                        * go with large steps. Not that a buffer size with
+                        * max_entries should always work.
+                        */
+                       if (err && errno == ENOSPC) {
+                               nospace_err = true;
+                               break;
+                       }
+
+                       CHECK((err && errno != ENOENT), "lookup with steps",
+                             "error: %s\n", strerror(errno));
+
+                       total += count;
+                       if (err)
+                               break;
+               }
+
+               if (nospace_err == true)
+                       continue;
+
+               CHECK(total != max_entries, "lookup/delete with steps",
+                     "total = %u, max_entries = %u\n", total, max_entries);
+
+               map_batch_verify(visited, max_entries, keys, values, is_pcpu);
+               err = bpf_map_get_next_key(map_fd, NULL, &key);
+               CHECK(!err, "bpf_map_get_next_key()", "error: %s\n",
+                     strerror(errno));
+
+               total_success++;
+       }
+
+       CHECK(total_success == 0, "check total_success",
+             "unexpected failure\n");
+       free(keys);
+       free(visited);
+       if (!is_pcpu)
+               free(values);
+}
+
+void htab_map_batch_ops(void)
+{
+       __test_map_lookup_and_delete_batch(false);
+       printf("test_%s:PASS\n", __func__);
+}
+
+void htab_percpu_map_batch_ops(void)
+{
+       __test_map_lookup_and_delete_batch(true);
+       printf("test_%s:PASS\n", __func__);
+}
+
+void test_htab_map_batch_ops(void)
+{
+       htab_map_batch_ops();
+       htab_percpu_map_batch_ops();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
new file mode 100644 (file)
index 0000000..8482bbc
--- /dev/null
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/err.h>
+#include <test_progs.h>
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+static const struct timeval timeo_sec = { .tv_sec = 10 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+static int stop, duration;
+
+static int settimeo(int fd)
+{
+       int err;
+
+       err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+                        timeo_optlen);
+       if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
+                 errno))
+               return -1;
+
+       err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
+                        timeo_optlen);
+       if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
+                 errno))
+               return -1;
+
+       return 0;
+}
+
+static int settcpca(int fd, const char *tcp_ca)
+{
+       int err;
+
+       err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
+       if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
+                 errno))
+               return -1;
+
+       return 0;
+}
+
+static void *server(void *arg)
+{
+       int lfd = (int)(long)arg, err = 0, fd;
+       ssize_t nr_sent = 0, bytes = 0;
+       char batch[1500];
+
+       fd = accept(lfd, NULL, NULL);
+       while (fd == -1) {
+               if (errno == EINTR)
+                       continue;
+               err = -errno;
+               goto done;
+       }
+
+       if (settimeo(fd)) {
+               err = -errno;
+               goto done;
+       }
+
+       while (bytes < total_bytes && !READ_ONCE(stop)) {
+               nr_sent = send(fd, &batch,
+                              min(total_bytes - bytes, sizeof(batch)), 0);
+               if (nr_sent == -1 && errno == EINTR)
+                       continue;
+               if (nr_sent == -1) {
+                       err = -errno;
+                       break;
+               }
+               bytes += nr_sent;
+       }
+
+       CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
+             bytes, total_bytes, nr_sent, errno);
+
+done:
+       if (fd != -1)
+               close(fd);
+       if (err) {
+               WRITE_ONCE(stop, 1);
+               return ERR_PTR(err);
+       }
+       return NULL;
+}
+
+static void do_test(const char *tcp_ca)
+{
+       struct sockaddr_in6 sa6 = {};
+       ssize_t nr_recv = 0, bytes = 0;
+       int lfd = -1, fd = -1;
+       pthread_t srv_thread;
+       socklen_t addrlen = sizeof(sa6);
+       void *thread_ret;
+       char batch[1500];
+       int err;
+
+       WRITE_ONCE(stop, 0);
+
+       lfd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+               return;
+       fd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+               close(lfd);
+               return;
+       }
+
+       if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
+           settimeo(lfd) || settimeo(fd))
+               goto done;
+
+       /* bind, listen and start server thread to accept */
+       sa6.sin6_family = AF_INET6;
+       sa6.sin6_addr = in6addr_loopback;
+       err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
+       if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+               goto done;
+       err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
+       if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+               goto done;
+       err = listen(lfd, 1);
+       if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+               goto done;
+       err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
+       if (CHECK(err != 0, "pthread_create", "err:%d\n", err))
+               goto done;
+
+       /* connect to server */
+       err = connect(fd, (struct sockaddr *)&sa6, addrlen);
+       if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+               goto wait_thread;
+
+       /* recv total_bytes */
+       while (bytes < total_bytes && !READ_ONCE(stop)) {
+               nr_recv = recv(fd, &batch,
+                              min(total_bytes - bytes, sizeof(batch)), 0);
+               if (nr_recv == -1 && errno == EINTR)
+                       continue;
+               if (nr_recv == -1)
+                       break;
+               bytes += nr_recv;
+       }
+
+       CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
+             bytes, total_bytes, nr_recv, errno);
+
+wait_thread:
+       WRITE_ONCE(stop, 1);
+       pthread_join(srv_thread, &thread_ret);
+       CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
+             PTR_ERR(thread_ret));
+done:
+       close(lfd);
+       close(fd);
+}
+
+static void test_cubic(void)
+{
+       struct bpf_cubic *cubic_skel;
+       struct bpf_link *link;
+
+       cubic_skel = bpf_cubic__open_and_load();
+       if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+               return;
+
+       link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+       if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
+                 PTR_ERR(link))) {
+               bpf_cubic__destroy(cubic_skel);
+               return;
+       }
+
+       do_test("bpf_cubic");
+
+       bpf_link__destroy(link);
+       bpf_cubic__destroy(cubic_skel);
+}
+
+static void test_dctcp(void)
+{
+       struct bpf_dctcp *dctcp_skel;
+       struct bpf_link *link;
+
+       dctcp_skel = bpf_dctcp__open_and_load();
+       if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+               return;
+
+       link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+       if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
+                 PTR_ERR(link))) {
+               bpf_dctcp__destroy(dctcp_skel);
+               return;
+       }
+
+       do_test("bpf_dctcp");
+
+       bpf_link__destroy(link);
+       bpf_dctcp__destroy(dctcp_skel);
+}
+
+void test_bpf_tcp_ca(void)
+{
+       if (test__start_subtest("dctcp"))
+               test_dctcp();
+       if (test__start_subtest("cubic"))
+               test_cubic();
+}
index 9486c13af6b2cb697107de857a86d27d0809f987..e9f2f12ba06b54c31d4cddf62a38417ca4bfddab 100644 (file)
@@ -48,6 +48,8 @@ void test_bpf_verif_scale(void)
                { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS },
                { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
 
+               { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+
                /* full unroll by llvm */
                { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
                { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
index 1fa1bdbaffa9d7a655061e06149ecff3779f19de..f7c7e25232beda9ed1b11662181d81b0f95ed24e 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include <bpf/btf.h>
-#include "libbpf_internal.h"
+#include "bpf/libbpf_internal.h"
 
 static int duration = 0;
 
index b426bf2f97e42d41fa7e0ded6de7d7e91c6591d1..db5c74d2ce6d0245e6a7446e4369817c7cf67cfe 100644 (file)
@@ -26,7 +26,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 
        link = calloc(sizeof(struct bpf_link *), prog_cnt);
        prog = calloc(sizeof(struct bpf_program *), prog_cnt);
-       result = malloc(prog_cnt * sizeof(u64));
+       result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64));
        if (CHECK(!link || !prog || !result, "alloc_memory",
                  "failed to alloc memory"))
                goto close_prog;
@@ -98,6 +98,24 @@ static void test_target_yes_callees(void)
                "fexit/test_pkt_access",
                "fexit/test_pkt_access_subprog1",
                "fexit/test_pkt_access_subprog2",
+               "fexit/test_pkt_access_subprog3",
+       };
+       test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
+                                 "./test_pkt_access.o",
+                                 ARRAY_SIZE(prog_name),
+                                 prog_name);
+}
+
+static void test_func_replace(void)
+{
+       const char *prog_name[] = {
+               "fexit/test_pkt_access",
+               "fexit/test_pkt_access_subprog1",
+               "fexit/test_pkt_access_subprog2",
+               "fexit/test_pkt_access_subprog3",
+               "freplace/get_skb_len",
+               "freplace/get_skb_ifindex",
+               "freplace/get_constant",
        };
        test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
                                  "./test_pkt_access.o",
@@ -109,4 +127,5 @@ void test_fexit_bpf2bpf(void)
 {
        test_target_no_callees();
        test_target_yes_callees();
+       test_func_replace();
 }
index cf6c87936c69dec7a1e15ca06caf7fabccb2af78..1450ea2dd4cc947e7367816a250f1a8f6f5fd8b2 100644 (file)
@@ -4,7 +4,7 @@
 #include <sched.h>
 #include <sys/socket.h>
 #include <test_progs.h>
-#include "libbpf_internal.h"
+#include "bpf/libbpf_internal.h"
 
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
index b607112c64e7ac90781b38b16c90921132f61dc6..504abb7bfb959701cefb846c766b30af31df565a 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include "test_send_signal_kern.skel.h"
 
 static volatile int sigusr1_received = 0;
 
@@ -9,17 +10,15 @@ static void sigusr1_handler(int signum)
 }
 
 static void test_send_signal_common(struct perf_event_attr *attr,
-                                   int prog_type,
+                                   bool signal_thread,
                                    const char *test_name)
 {
-       int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
-       const char *file = "./test_send_signal_kern.o";
-       struct bpf_object *obj = NULL;
+       struct test_send_signal_kern *skel;
        int pipe_c2p[2], pipe_p2c[2];
-       __u32 key = 0, duration = 0;
+       int err = -1, pmu_fd = -1;
+       __u32 duration = 0;
        char buf[256];
        pid_t pid;
-       __u64 val;
 
        if (CHECK(pipe(pipe_c2p), test_name,
                  "pipe pipe_c2p error: %s\n", strerror(errno)))
@@ -73,45 +72,39 @@ static void test_send_signal_common(struct perf_event_attr *attr,
        close(pipe_c2p[1]); /* close write */
        close(pipe_p2c[0]); /* close read */
 
-       err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
-       if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
-                 strerror(errno)))
-               goto prog_load_failure;
-
-       pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
-                        -1 /* group id */, 0 /* flags */);
-       if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
-                 strerror(errno))) {
-               err = -1;
-               goto close_prog;
-       }
+       skel = test_send_signal_kern__open_and_load();
+       if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+               goto skel_open_load_failure;
 
-       err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-       if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
-                 strerror(errno)))
-               goto disable_pmu;
-
-       err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-       if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
-                 strerror(errno)))
-               goto disable_pmu;
-
-       err = -1;
-       info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
-       if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
-               goto disable_pmu;
+       if (!attr) {
+               err = test_send_signal_kern__attach(skel);
+               if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+                       err = -1;
+                       goto destroy_skel;
+               }
+       } else {
+               pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+                                -1 /* group id */, 0 /* flags */);
+               if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
+                       strerror(errno))) {
+                       err = -1;
+                       goto destroy_skel;
+               }
 
-       status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
-       if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
-               goto disable_pmu;
+               skel->links.send_signal_perf =
+                       bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
+               if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
+                         "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+                       goto disable_pmu;
+       }
 
        /* wait until child signal handler installed */
        read(pipe_c2p[0], buf, 1);
 
        /* trigger the bpf send_signal */
-       key = 0;
-       val = (((__u64)(SIGUSR1)) << 32) | pid;
-       bpf_map_update_elem(info_map_fd, &key, &val, 0);
+       skel->bss->pid = pid;
+       skel->bss->sig = SIGUSR1;
+       skel->bss->signal_thread = signal_thread;
 
        /* notify child that bpf program can send_signal now */
        write(pipe_p2c[1], buf, 1);
@@ -132,46 +125,20 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 
 disable_pmu:
        close(pmu_fd);
-close_prog:
-       bpf_object__close(obj);
-prog_load_failure:
+destroy_skel:
+       test_send_signal_kern__destroy(skel);
+skel_open_load_failure:
        close(pipe_c2p[0]);
        close(pipe_p2c[1]);
        wait(NULL);
 }
 
-static void test_send_signal_tracepoint(void)
+static void test_send_signal_tracepoint(bool signal_thread)
 {
-       const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
-       struct perf_event_attr attr = {
-               .type = PERF_TYPE_TRACEPOINT,
-               .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
-               .sample_period = 1,
-               .wakeup_events = 1,
-       };
-       __u32 duration = 0;
-       int bytes, efd;
-       char buf[256];
-
-       efd = open(id_path, O_RDONLY, 0);
-       if (CHECK(efd < 0, "tracepoint",
-                 "open syscalls/sys_enter_nanosleep/id failure: %s\n",
-                 strerror(errno)))
-               return;
-
-       bytes = read(efd, buf, sizeof(buf));
-       close(efd);
-       if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
-                 "read syscalls/sys_enter_nanosleep/id failure: %s\n",
-                 strerror(errno)))
-               return;
-
-       attr.config = strtol(buf, NULL, 0);
-
-       test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+       test_send_signal_common(NULL, signal_thread, "tracepoint");
 }
 
-static void test_send_signal_perf(void)
+static void test_send_signal_perf(bool signal_thread)
 {
        struct perf_event_attr attr = {
                .sample_period = 1,
@@ -179,15 +146,13 @@ static void test_send_signal_perf(void)
                .config = PERF_COUNT_SW_CPU_CLOCK,
        };
 
-       test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
-                               "perf_sw_event");
+       test_send_signal_common(&attr, signal_thread, "perf_sw_event");
 }
 
-static void test_send_signal_nmi(void)
+static void test_send_signal_nmi(bool signal_thread)
 {
        struct perf_event_attr attr = {
-               .sample_freq = 50,
-               .freq = 1,
+               .sample_period = 1,
                .type = PERF_TYPE_HARDWARE,
                .config = PERF_COUNT_HW_CPU_CYCLES,
        };
@@ -210,16 +175,21 @@ static void test_send_signal_nmi(void)
                close(pmu_fd);
        }
 
-       test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
-                               "perf_hw_event");
+       test_send_signal_common(&attr, signal_thread, "perf_hw_event");
 }
 
 void test_send_signal(void)
 {
        if (test__start_subtest("send_signal_tracepoint"))
-               test_send_signal_tracepoint();
+               test_send_signal_tracepoint(false);
        if (test__start_subtest("send_signal_perf"))
-               test_send_signal_perf();
+               test_send_signal_perf(false);
        if (test__start_subtest("send_signal_nmi"))
-               test_send_signal_nmi();
+               test_send_signal_nmi(false);
+       if (test__start_subtest("send_signal_tracepoint_thread"))
+               test_send_signal_tracepoint(true);
+       if (test__start_subtest("send_signal_perf_thread"))
+               test_send_signal_perf(true);
+       if (test__start_subtest("send_signal_nmi_thread"))
+               test_send_signal_nmi(true);
 }
index 8974450a4bdb0990a949a0cdd5a7e6a54ab1c4ae..f002e3090d92adcde2aaafbd6a2048abd6f31a5e 100644 (file)
@@ -49,8 +49,12 @@ retry:
        pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
                         0 /* cpu 0 */, -1 /* group id */,
                         0 /* flags */);
-       if (CHECK(pmu_fd < 0, "perf_event_open",
-                 "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+       if (pmu_fd < 0 && errno == ENOENT) {
+               printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+               test__skip();
+               goto cleanup;
+       }
+       if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
                  pmu_fd, errno))
                goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
new file mode 100644 (file)
index 0000000..25b0685
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+
+const char *err_str;
+bool found;
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+                             const char *format, va_list args)
+{
+       char *log_buf;
+
+       if (level != LIBBPF_WARN ||
+           strcmp(format, "libbpf: \n%s\n")) {
+               vprintf(format, args);
+               return 0;
+       }
+
+       log_buf = va_arg(args, char *);
+       if (!log_buf)
+               goto out;
+       if (strstr(log_buf, err_str) == 0)
+               found = true;
+out:
+       printf(format, log_buf);
+       return 0;
+}
+
+extern int extra_prog_load_log_flags;
+
+static int check_load(const char *file)
+{
+       struct bpf_prog_load_attr attr;
+       struct bpf_object *obj = NULL;
+       int err, prog_fd;
+
+       memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+       attr.file = file;
+       attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+       attr.log_level = extra_prog_load_log_flags;
+       attr.prog_flags = BPF_F_TEST_RND_HI32;
+       found = false;
+       err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+       bpf_object__close(obj);
+       return err;
+}
+
+struct test_def {
+       const char *file;
+       const char *err_str;
+};
+
+void test_test_global_funcs(void)
+{
+       struct test_def tests[] = {
+               { "test_global_func1.o", "combined stack size of 4 calls is 544" },
+               { "test_global_func2.o" },
+               { "test_global_func3.o" , "the call stack of 8 frames" },
+               { "test_global_func4.o" },
+               { "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
+               { "test_global_func6.o" , "modified ctx ptr R2" },
+               { "test_global_func7.o" , "foo() doesn't return scalar" },
+       };
+       libbpf_print_fn_t old_print_fn = NULL;
+       int err, i, duration = 0;
+
+       old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+       for (i = 0; i < ARRAY_SIZE(tests); i++) {
+               const struct test_def *test = &tests[i];
+
+               if (!test__start_subtest(test->file))
+                       continue;
+
+               err_str = test->err_str;
+               err = check_load(test->file);
+               CHECK_FAIL(!!err ^ !!err_str);
+               if (err_str)
+                       CHECK(found, "", "expected string '%s'", err_str);
+       }
+       libbpf_set_print(old_print_fn);
+}
index c32aa28bd93f30220422aad7ef3712171f99d521..465b371a561d89040a9c697b58626fa4edc83c7c 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 Facebook */
 #define _GNU_SOURCE
 #include <sched.h>
+#include <sys/prctl.h>
 #include <test_progs.h>
 
 #define MAX_CNT 100000
@@ -17,7 +18,7 @@ static __u64 time_get_ns(void)
 static int test_task_rename(const char *prog)
 {
        int i, fd, duration = 0, err;
-       char buf[] = "test\n";
+       char buf[] = "test_overhead";
        __u64 start_time;
 
        fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
@@ -66,6 +67,10 @@ void test_test_overhead(void)
        struct bpf_object *obj;
        struct bpf_link *link;
        int err, duration = 0;
+       char comm[16] = {};
+
+       if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
+               return;
 
        obj = bpf_object__open_file("./test_overhead.o", NULL);
        if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
@@ -138,5 +143,6 @@ void test_test_overhead(void)
        test_run("fexit");
        bpf_link__destroy(link);
 cleanup:
+       prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
        bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
new file mode 100644 (file)
index 0000000..6b56bdc
--- /dev/null
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <net/if.h>
+#include "test_xdp.skel.h"
+#include "test_xdp_bpf2bpf.skel.h"
+
+void test_xdp_bpf2bpf(void)
+{
+       __u32 duration = 0, retval, size;
+       char buf[128];
+       int err, pkt_fd, map_fd;
+       struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+       struct iptnl_info value4 = {.family = AF_INET};
+       struct test_xdp *pkt_skel = NULL;
+       struct test_xdp_bpf2bpf *ftrace_skel = NULL;
+       struct vip key4 = {.protocol = 6, .family = AF_INET};
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+       /* Load XDP program to introspect */
+       pkt_skel = test_xdp__open_and_load();
+       if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+               return;
+
+       pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
+
+       map_fd = bpf_map__fd(pkt_skel->maps.vip2tnl);
+       bpf_map_update_elem(map_fd, &key4, &value4, 0);
+
+       /* Load trace program */
+       opts.attach_prog_fd = pkt_fd,
+       ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts);
+       if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+               goto out;
+
+       err = test_xdp_bpf2bpf__load(ftrace_skel);
+       if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+               goto out;
+
+       err = test_xdp_bpf2bpf__attach(ftrace_skel);
+       if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+               goto out;
+
+       /* Run test program */
+       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               buf, &size, &retval, &duration);
+
+       if (CHECK(err || retval != XDP_TX || size != 74 ||
+                 iph->protocol != IPPROTO_IPIP, "ipv4",
+                 "err %d errno %d retval %d size %d\n",
+                 err, errno, retval, size))
+               goto out;
+
+       /* Verify test results */
+       if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
+                 "result", "fentry failed err %llu\n",
+                 ftrace_skel->bss->test_result_fentry))
+               goto out;
+
+       CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
+             "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
+
+out:
+       test_xdp__destroy(pkt_skel);
+       test_xdp_bpf2bpf__destroy(ftrace_skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
new file mode 100644 (file)
index 0000000..7897c8f
--- /dev/null
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* WARNING: This implemenation is not necessarily the same
+ * as the tcp_cubic.c.  The purpose is mainly for testing
+ * the kernel BPF logic.
+ *
+ * Highlights:
+ * 1. CONFIG_HZ .kconfig map is used.
+ * 2. In bictcp_update(), calculation is changed to use usec
+ *    resolution (i.e. USEC_PER_JIFFY) instead of using jiffies.
+ *    Thus, usecs_to_jiffies() is not used in the bpf_cubic.c.
+ * 3. In bitctcp_update() [under tcp_friendliness], the original
+ *    "while (ca->ack_cnt > delta)" loop is changed to the equivalent
+ *    "ca->ack_cnt / delta" operation.
+ */
+
+#include <linux/bpf.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+
+#define BICTCP_BETA_SCALE    1024      /* Scale factor beta calculation
+                                        * max_cwnd = snd_cwnd * beta
+                                        */
+#define        BICTCP_HZ               10      /* BIC HZ 2^10 = 1024 */
+
+/* Two methods of hybrid slow start */
+#define HYSTART_ACK_TRAIN      0x1
+#define HYSTART_DELAY          0x2
+
+/* Number of delay samples for detecting the increase of delay */
+#define HYSTART_MIN_SAMPLES    8
+#define HYSTART_DELAY_MIN      (4000U) /* 4ms */
+#define HYSTART_DELAY_MAX      (16000U)        /* 16 ms */
+#define HYSTART_DELAY_THRESH(x)        clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
+
+static int fast_convergence = 1;
+static const int beta = 717;   /* = 717/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh;
+static const int bic_scale = 41;
+static int tcp_friendliness = 1;
+
+static int hystart = 1;
+static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY;
+static int hystart_low_window = 16;
+static int hystart_ack_delta_us = 2000;
+
+static const __u32 cube_rtt_scale = (bic_scale * 10);  /* 1024*c/rtt */
+static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
+                               / (BICTCP_BETA_SCALE - beta);
+/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+ *  so K = cubic_root( (wmax-cwnd)*rtt/c )
+ * the unit of K is bictcp_HZ=2^10, not HZ
+ *
+ *  c = bic_scale >> 10
+ *  rtt = 100ms
+ *
+ * the following code has been designed and tested for
+ * cwnd < 1 million packets
+ * RTT < 100 seconds
+ * HZ < 1,000,00  (corresponding to 10 nano-second)
+ */
+
+/* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */
+static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
+                               / (bic_scale * 10);
+
+/* BIC TCP Parameters */
+struct bictcp {
+       __u32   cnt;            /* increase cwnd by 1 after ACKs */
+       __u32   last_max_cwnd;  /* last maximum snd_cwnd */
+       __u32   last_cwnd;      /* the last snd_cwnd */
+       __u32   last_time;      /* time when updated last_cwnd */
+       __u32   bic_origin_point;/* origin point of bic function */
+       __u32   bic_K;          /* time to origin point
+                                  from the beginning of the current epoch */
+       __u32   delay_min;      /* min delay (usec) */
+       __u32   epoch_start;    /* beginning of an epoch */
+       __u32   ack_cnt;        /* number of acks */
+       __u32   tcp_cwnd;       /* estimated tcp cwnd */
+       __u16   unused;
+       __u8    sample_cnt;     /* number of samples to decide curr_rtt */
+       __u8    found;          /* the exit point is found? */
+       __u32   round_start;    /* beginning of each round */
+       __u32   end_seq;        /* end_seq of the round */
+       __u32   last_ack;       /* last time when the ACK spacing is close */
+       __u32   curr_rtt;       /* the minimum rtt of current round */
+};
+
+static inline void bictcp_reset(struct bictcp *ca)
+{
+       ca->cnt = 0;
+       ca->last_max_cwnd = 0;
+       ca->last_cwnd = 0;
+       ca->last_time = 0;
+       ca->bic_origin_point = 0;
+       ca->bic_K = 0;
+       ca->delay_min = 0;
+       ca->epoch_start = 0;
+       ca->ack_cnt = 0;
+       ca->tcp_cwnd = 0;
+       ca->found = 0;
+}
+
+extern unsigned long CONFIG_HZ __kconfig;
+#define HZ CONFIG_HZ
+#define USEC_PER_MSEC  1000UL
+#define USEC_PER_SEC   1000000UL
+#define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
+
+static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
+{
+       return dividend / divisor;
+}
+
+#define div64_ul div64_u64
+
+#define BITS_PER_U64 (sizeof(__u64) * 8)
+static __always_inline int fls64(__u64 x)
+{
+       int num = BITS_PER_U64 - 1;
+
+       if (x == 0)
+               return 0;
+
+       if (!(x & (~0ull << (BITS_PER_U64-32)))) {
+               num -= 32;
+               x <<= 32;
+       }
+       if (!(x & (~0ull << (BITS_PER_U64-16)))) {
+               num -= 16;
+               x <<= 16;
+       }
+       if (!(x & (~0ull << (BITS_PER_U64-8)))) {
+               num -= 8;
+               x <<= 8;
+       }
+       if (!(x & (~0ull << (BITS_PER_U64-4)))) {
+               num -= 4;
+               x <<= 4;
+       }
+       if (!(x & (~0ull << (BITS_PER_U64-2)))) {
+               num -= 2;
+               x <<= 2;
+       }
+       if (!(x & (~0ull << (BITS_PER_U64-1))))
+               num -= 1;
+
+       return num + 1;
+}
+
+static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
+{
+       return tcp_sk(sk)->tcp_mstamp;
+}
+
+static __always_inline void bictcp_hystart_reset(struct sock *sk)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
+
+       ca->round_start = ca->last_ack = bictcp_clock_us(sk);
+       ca->end_seq = tp->snd_nxt;
+       ca->curr_rtt = ~0U;
+       ca->sample_cnt = 0;
+}
+
+/* "struct_ops/" prefix is not a requirement
+ * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS
+ * as long as it is used in one of the func ptr
+ * under SEC(".struct_ops").
+ */
+SEC("struct_ops/bictcp_init")
+void BPF_PROG(bictcp_init, struct sock *sk)
+{
+       struct bictcp *ca = inet_csk_ca(sk);
+
+       bictcp_reset(ca);
+
+       if (hystart)
+               bictcp_hystart_reset(sk);
+
+       if (!hystart && initial_ssthresh)
+               tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+}
+
+/* No prefix in SEC will also work.
+ * The remaining tcp-cubic functions have an easier way.
+ */
+SEC("no-sec-prefix-bictcp_cwnd_event")
+void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+       if (event == CA_EVENT_TX_START) {
+               struct bictcp *ca = inet_csk_ca(sk);
+               __u32 now = tcp_jiffies32;
+               __s32 delta;
+
+               delta = now - tcp_sk(sk)->lsndtime;
+
+               /* We were application limited (idle) for a while.
+                * Shift epoch_start to keep cwnd growth to cubic curve.
+                */
+               if (ca->epoch_start && delta > 0) {
+                       ca->epoch_start += delta;
+                       if (after(ca->epoch_start, now))
+                               ca->epoch_start = now;
+               }
+               return;
+       }
+}
+
+/*
+ * cbrt(x) MSB values for x MSB values in [0..63].
+ * Precomputed then refined by hand - Willy Tarreau
+ *
+ * For x in [0..63],
+ *   v = cbrt(x << 18) - 1
+ *   cbrt(x) = (v[x] + 10) >> 6
+ */
+static const __u8 v[] = {
+       /* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
+       /* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
+       /* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
+       /* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
+       /* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
+       /* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
+       /* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
+       /* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
+};
+
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
+ */
+static __always_inline __u32 cubic_root(__u64 a)
+{
+       __u32 x, b, shift;
+
+       if (a < 64) {
+               /* a in [0..63] */
+               return ((__u32)v[(__u32)a] + 35) >> 6;
+       }
+
+       b = fls64(a);
+       b = ((b * 84) >> 8) - 1;
+       shift = (a >> (b * 3));
+
+       /* it is needed for verifier's bound check on v */
+       if (shift >= 64)
+               return 0;
+
+       x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6;
+
+       /*
+        * Newton-Raphson iteration
+        *                         2
+        * x    = ( 2 * x  +  a / x  ) / 3
+        *  k+1          k         k
+        */
+       x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1)));
+       x = ((x * 341) >> 10);
+       return x;
+}
+
+/*
+ * Compute congestion window to use.
+ */
+static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
+                                         __u32 acked)
+{
+       __u32 delta, bic_target, max_cnt;
+       __u64 offs, t;
+
+       ca->ack_cnt += acked;   /* count the number of ACKed packets */
+
+       if (ca->last_cwnd == cwnd &&
+           (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
+               return;
+
+       /* The CUBIC function can update ca->cnt at most once per jiffy.
+        * On all cwnd reduction events, ca->epoch_start is set to 0,
+        * which will force a recalculation of ca->cnt.
+        */
+       if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
+               goto tcp_friendliness;
+
+       ca->last_cwnd = cwnd;
+       ca->last_time = tcp_jiffies32;
+
+       if (ca->epoch_start == 0) {
+               ca->epoch_start = tcp_jiffies32;        /* record beginning */
+               ca->ack_cnt = acked;                    /* start counting */
+               ca->tcp_cwnd = cwnd;                    /* syn with cubic */
+
+               if (ca->last_max_cwnd <= cwnd) {
+                       ca->bic_K = 0;
+                       ca->bic_origin_point = cwnd;
+               } else {
+                       /* Compute new K based on
+                        * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+                        */
+                       ca->bic_K = cubic_root(cube_factor
+                                              * (ca->last_max_cwnd - cwnd));
+                       ca->bic_origin_point = ca->last_max_cwnd;
+               }
+       }
+
+       /* cubic function - calc*/
+       /* calculate c * time^3 / rtt,
+        *  while considering overflow in calculation of time^3
+        * (so time^3 is done by using 64 bit)
+        * and without the support of division of 64bit numbers
+        * (so all divisions are done by using 32 bit)
+        *  also NOTE the unit of those veriables
+        *        time  = (t - K) / 2^bictcp_HZ
+        *        c = bic_scale >> 10
+        * rtt  = (srtt >> 3) / HZ
+        * !!! The following code does not have overflow problems,
+        * if the cwnd < 1 million packets !!!
+        */
+
+       t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY;
+       t += ca->delay_min;
+       /* change the unit from usec to bictcp_HZ */
+       t <<= BICTCP_HZ;
+       t /= USEC_PER_SEC;
+
+       if (t < ca->bic_K)              /* t - K */
+               offs = ca->bic_K - t;
+       else
+               offs = t - ca->bic_K;
+
+       /* c/rtt * (t-K)^3 */
+       delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
+       if (t < ca->bic_K)                            /* below origin*/
+               bic_target = ca->bic_origin_point - delta;
+       else                                          /* above origin*/
+               bic_target = ca->bic_origin_point + delta;
+
+       /* cubic function - calc bictcp_cnt*/
+       if (bic_target > cwnd) {
+               ca->cnt = cwnd / (bic_target - cwnd);
+       } else {
+               ca->cnt = 100 * cwnd;              /* very small increment*/
+       }
+
+       /*
+        * The initial growth of cubic function may be too conservative
+        * when the available bandwidth is still unknown.
+        */
+       if (ca->last_max_cwnd == 0 && ca->cnt > 20)
+               ca->cnt = 20;   /* increase cwnd 5% per RTT */
+
+tcp_friendliness:
+       /* TCP Friendly */
+       if (tcp_friendliness) {
+               __u32 scale = beta_scale;
+               __u32 n;
+
+               /* update tcp cwnd */
+               delta = (cwnd * scale) >> 3;
+               if (ca->ack_cnt > delta && delta) {
+                       n = ca->ack_cnt / delta;
+                       ca->ack_cnt -= n * delta;
+                       ca->tcp_cwnd += n;
+               }
+
+               if (ca->tcp_cwnd > cwnd) {      /* if bic is slower than tcp */
+                       delta = ca->tcp_cwnd - cwnd;
+                       max_cnt = cwnd / delta;
+                       if (ca->cnt > max_cnt)
+                               ca->cnt = max_cnt;
+               }
+       }
+
+       /* The maximum rate of cwnd increase CUBIC allows is 1 packet per
+        * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
+        */
+       ca->cnt = max(ca->cnt, 2U);
+}
+
+/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
+void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
+
+       if (!tcp_is_cwnd_limited(sk))
+               return;
+
+       if (tcp_in_slow_start(tp)) {
+               if (hystart && after(ack, ca->end_seq))
+                       bictcp_hystart_reset(sk);
+               acked = tcp_slow_start(tp, acked);
+               if (!acked)
+                       return;
+       }
+       bictcp_update(ca, tp->snd_cwnd, acked);
+       tcp_cong_avoid_ai(tp, ca->cnt, acked);
+}
+
+__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
+
+       ca->epoch_start = 0;    /* end of epoch */
+
+       /* Wmax and fast convergence */
+       if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+               ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+                       / (2 * BICTCP_BETA_SCALE);
+       else
+               ca->last_max_cwnd = tp->snd_cwnd;
+
+       return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+}
+
+void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+{
+       if (new_state == TCP_CA_Loss) {
+               bictcp_reset(inet_csk_ca(sk));
+               bictcp_hystart_reset(sk);
+       }
+}
+
+#define GSO_MAX_SIZE           65536
+
+/* Account for TSO/GRO delays.
+ * Otherwise short RTT flows could get too small ssthresh, since during
+ * slow start we begin with small TSO packets and ca->delay_min would
+ * not account for long aggregation delay when TSO packets get bigger.
+ * Ideally even with a very small RTT we would like to have at least one
+ * TSO packet being sent and received by GRO, and another one in qdisc layer.
+ * We apply another 100% factor because @rate is doubled at this point.
+ * We cap the cushion to 1ms.
+ */
+static __always_inline __u32 hystart_ack_delay(struct sock *sk)
+{
+       unsigned long rate;
+
+       rate = sk->sk_pacing_rate;
+       if (!rate)
+               return 0;
+       return min((__u64)USEC_PER_MSEC,
+                  div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+}
+
+static __always_inline void hystart_update(struct sock *sk, __u32 delay)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
+       __u32 threshold;
+
+       if (hystart_detect & HYSTART_ACK_TRAIN) {
+               __u32 now = bictcp_clock_us(sk);
+
+               /* first detection parameter - ack-train detection */
+               if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
+                       ca->last_ack = now;
+
+                       threshold = ca->delay_min + hystart_ack_delay(sk);
+
+                       /* Hystart ack train triggers if we get ack past
+                        * ca->delay_min/2.
+                        * Pacing might have delayed packets up to RTT/2
+                        * during slow start.
+                        */
+                       if (sk->sk_pacing_status == SK_PACING_NONE)
+                               threshold >>= 1;
+
+                       if ((__s32)(now - ca->round_start) > threshold) {
+                               ca->found = 1;
+                               tp->snd_ssthresh = tp->snd_cwnd;
+                       }
+               }
+       }
+
+       if (hystart_detect & HYSTART_DELAY) {
+               /* obtain the minimum delay of more than sampling packets */
+               if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
+                       if (ca->curr_rtt > delay)
+                               ca->curr_rtt = delay;
+
+                       ca->sample_cnt++;
+               } else {
+                       if (ca->curr_rtt > ca->delay_min +
+                           HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
+                               ca->found = 1;
+                               tp->snd_ssthresh = tp->snd_cwnd;
+                       }
+               }
+       }
+}
+
+void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+                   const struct ack_sample *sample)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct bictcp *ca = inet_csk_ca(sk);
+       __u32 delay;
+
+       /* Some calls are for duplicates without timetamps */
+       if (sample->rtt_us < 0)
+               return;
+
+       /* Discard delay samples right after fast recovery */
+       if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
+               return;
+
+       delay = sample->rtt_us;
+       if (delay == 0)
+               delay = 1;
+
+       /* first time call or link delay decreases */
+       if (ca->delay_min == 0 || ca->delay_min > delay)
+               ca->delay_min = delay;
+
+       /* hystart triggers when cwnd is larger than some threshold */
+       if (!ca->found && tcp_in_slow_start(tp) && hystart &&
+           tp->snd_cwnd >= hystart_low_window)
+               hystart_update(sk, delay);
+}
+
+__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+
+       return max(tp->snd_cwnd, tp->prior_cwnd);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops cubic = {
+       .init           = (void *)bictcp_init,
+       .ssthresh       = (void *)bictcp_recalc_ssthresh,
+       .cong_avoid     = (void *)bictcp_cong_avoid,
+       .set_state      = (void *)bictcp_state,
+       .undo_cwnd      = (void *)tcp_reno_undo_cwnd,
+       .cwnd_event     = (void *)bictcp_cwnd_event,
+       .pkts_acked     = (void *)bictcp_acked,
+       .name           = "bpf_cubic",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
new file mode 100644 (file)
index 0000000..b631fb5
--- /dev/null
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+/* WARNING: This implemenation is not necessarily the same
+ * as the tcp_dctcp.c.  The purpose is mainly for testing
+ * the kernel BPF logic.
+ */
+
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define DCTCP_MAX_ALPHA        1024U
+
+struct dctcp {
+       __u32 old_delivered;
+       __u32 old_delivered_ce;
+       __u32 prior_rcv_nxt;
+       __u32 dctcp_alpha;
+       __u32 next_seq;
+       __u32 ce_state;
+       __u32 loss_cwnd;
+};
+
+static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
+static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
+
+static __always_inline void dctcp_reset(const struct tcp_sock *tp,
+                                       struct dctcp *ca)
+{
+       ca->next_seq = tp->snd_nxt;
+
+       ca->old_delivered = tp->delivered;
+       ca->old_delivered_ce = tp->delivered_ce;
+}
+
+SEC("struct_ops/dctcp_init")
+void BPF_PROG(dctcp_init, struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct dctcp *ca = inet_csk_ca(sk);
+
+       ca->prior_rcv_nxt = tp->rcv_nxt;
+       ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+       ca->loss_cwnd = 0;
+       ca->ce_state = 0;
+
+       dctcp_reset(tp, ca);
+}
+
+SEC("struct_ops/dctcp_ssthresh")
+__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
+{
+       struct dctcp *ca = inet_csk_ca(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       ca->loss_cwnd = tp->snd_cwnd;
+       return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+}
+
+SEC("struct_ops/dctcp_update_alpha")
+void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct dctcp *ca = inet_csk_ca(sk);
+
+       /* Expired RTT */
+       if (!before(tp->snd_una, ca->next_seq)) {
+               __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
+               __u32 alpha = ca->dctcp_alpha;
+
+               /* alpha = (1 - g) * alpha + g * F */
+
+               alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
+               if (delivered_ce) {
+                       __u32 delivered = tp->delivered - ca->old_delivered;
+
+                       /* If dctcp_shift_g == 1, a 32bit value would overflow
+                        * after 8 M packets.
+                        */
+                       delivered_ce <<= (10 - dctcp_shift_g);
+                       delivered_ce /= max(1U, delivered);
+
+                       alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
+               }
+               ca->dctcp_alpha = alpha;
+               dctcp_reset(tp, ca);
+       }
+}
+
+static __always_inline void dctcp_react_to_loss(struct sock *sk)
+{
+       struct dctcp *ca = inet_csk_ca(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       ca->loss_cwnd = tp->snd_cwnd;
+       tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+SEC("struct_ops/dctcp_state")
+void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
+{
+       if (new_state == TCP_CA_Recovery &&
+           new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state))
+               dctcp_react_to_loss(sk);
+       /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+        * one loss-adjustment per RTT.
+        */
+}
+
+static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (ce_state == 1)
+               tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+       else
+               tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S:  0 <- last pkt was non-CE
+ *     1 <- last pkt was CE
+ */
+static __always_inline
+void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+                         __u32 *prior_rcv_nxt, __u32 *ce_state)
+{
+       __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
+
+       if (*ce_state != new_ce_state) {
+               /* CE state has changed, force an immediate ACK to
+                * reflect the new CE state. If an ACK was delayed,
+                * send that first to reflect the prior CE state.
+                */
+               if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+                       dctcp_ece_ack_cwr(sk, *ce_state);
+                       bpf_tcp_send_ack(sk, *prior_rcv_nxt);
+               }
+               inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+       }
+       *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
+       *ce_state = new_ce_state;
+       dctcp_ece_ack_cwr(sk, new_ce_state);
+}
+
+SEC("struct_ops/dctcp_cwnd_event")
+void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
+{
+       struct dctcp *ca = inet_csk_ca(sk);
+
+       switch (ev) {
+       case CA_EVENT_ECN_IS_CE:
+       case CA_EVENT_ECN_NO_CE:
+               dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
+               break;
+       case CA_EVENT_LOSS:
+               dctcp_react_to_loss(sk);
+               break;
+       default:
+               /* Don't care for the rest. */
+               break;
+       }
+}
+
+SEC("struct_ops/dctcp_cwnd_undo")
+__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
+{
+       const struct dctcp *ca = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
+SEC("struct_ops/tcp_reno_cong_avoid")
+void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (!tcp_is_cwnd_limited(sk))
+               return;
+
+       /* In "safe" area, increase. */
+       if (tcp_in_slow_start(tp)) {
+               acked = tcp_slow_start(tp, acked);
+               if (!acked)
+                       return;
+       }
+       /* In dangerous area, increase slowly. */
+       tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_nouse = {
+       .init           = (void *)dctcp_init,
+       .set_state      = (void *)dctcp_state,
+       .flags          = TCP_CONG_NEEDS_ECN,
+       .name           = "bpf_dctcp_nouse",
+};
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp = {
+       .init           = (void *)dctcp_init,
+       .in_ack_event   = (void *)dctcp_update_alpha,
+       .cwnd_event     = (void *)dctcp_cwnd_event,
+       .ssthresh       = (void *)dctcp_ssthresh,
+       .cong_avoid     = (void *)tcp_reno_cong_avoid,
+       .undo_cwnd      = (void *)dctcp_cwnd_undo,
+       .set_state      = (void *)dctcp_state,
+       .flags          = TCP_CONG_NEEDS_ECN,
+       .name           = "bpf_dctcp",
+};
index 040a44206f29f88ab0b4a541faec1965f19e3a59..9941f0ba471e72f1b893b68172d43f518cf41ae9 100644 (file)
@@ -16,8 +16,8 @@
 #include <sys/socket.h>
 #include <linux/if_tunnel.h>
 #include <linux/mpls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 #define PROG(F) SEC(#F) int bpf_func_##F
index 1fd244d35ba93558fc0d5a3d8f07e08c2a45c4b2..75085119c5bbf739d9c6bf9441ab65093fa93e56 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/in6.h>
 #include <sys/socket.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define SRC_REWRITE_IP4                0x7f000004U
 #define DST_REWRITE_IP4                0x7f000001U
index 26397ab7b3c784f807947b165b0fe7a7dc7a9bd5..506d0f81a375699ba7aefaeb83e32b862b6e8594 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/in6.h>
 #include <sys/socket.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define SRC_REWRITE_IP6_0      0
 #define SRC_REWRITE_IP6_1      0
index ce41a3475f27e3e8c6d272bb11efc5a695980691..8924e06bdef0634a0bb2763c287c2fb118e5f118 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("cgroup/dev")
 int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
index 615f7c6bca770cbbf0cae7199d4126bf478b8427..38d3a82144ca89e640fe3ee9b925697ad9f7dac4 100644 (file)
@@ -1,43 +1,46 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_trace_helpers.h"
 
 char _license[] SEC("license") = "GPL";
 
 __u64 test1_result = 0;
-BPF_TRACE_1("fentry/bpf_fentry_test1", test1, int, a)
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
 {
        test1_result = a == 1;
        return 0;
 }
 
 __u64 test2_result = 0;
-BPF_TRACE_2("fentry/bpf_fentry_test2", test2, int, a, __u64, b)
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test2, int a, __u64 b)
 {
        test2_result = a == 2 && b == 3;
        return 0;
 }
 
 __u64 test3_result = 0;
-BPF_TRACE_3("fentry/bpf_fentry_test3", test3, char, a, int, b, __u64, c)
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG(test3, char a, int b, __u64 c)
 {
        test3_result = a == 4 && b == 5 && c == 6;
        return 0;
 }
 
 __u64 test4_result = 0;
-BPF_TRACE_4("fentry/bpf_fentry_test4", test4,
-           void *, a, char, b, int, c, __u64, d)
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG(test4, void *a, char b, int c, __u64 d)
 {
        test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10;
        return 0;
 }
 
 __u64 test5_result = 0;
-BPF_TRACE_5("fentry/bpf_fentry_test5", test5,
-           __u64, a, void *, b, short, c, int, d, __u64, e)
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e)
 {
        test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
                e == 15;
@@ -45,8 +48,8 @@ BPF_TRACE_5("fentry/bpf_fentry_test5", test5,
 }
 
 __u64 test6_result = 0;
-BPF_TRACE_6("fentry/bpf_fentry_test6", test6,
-           __u64, a, void *, b, short, c, int, d, void *, e, __u64, f)
+SEC("fentry/bpf_fentry_test6")
+int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f)
 {
        test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
                e == (void *)20 && f == 21;
index 2d211ee98a1c92b99afb47cad0a216ca792edb65..c329fccf9842c24a4a028d1ac88bdabed21a009e 100644 (file)
@@ -1,7 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 #include "bpf_trace_helpers.h"
 
 struct sk_buff {
@@ -9,8 +12,8 @@ struct sk_buff {
 };
 
 __u64 test_result = 0;
-BPF_TRACE_2("fexit/test_pkt_access", test_main,
-           struct sk_buff *, skb, int, ret)
+SEC("fexit/test_pkt_access")
+int BPF_PROG(test_main, struct sk_buff *skb, int ret)
 {
        int len;
 
@@ -24,8 +27,8 @@ BPF_TRACE_2("fexit/test_pkt_access", test_main,
 }
 
 __u64 test_result_subprog1 = 0;
-BPF_TRACE_2("fexit/test_pkt_access_subprog1", test_subprog1,
-           struct sk_buff *, skb, int, ret)
+SEC("fexit/test_pkt_access_subprog1")
+int BPF_PROG(test_subprog1, struct sk_buff *skb, int ret)
 {
        int len;
 
@@ -79,4 +82,73 @@ int test_subprog2(struct args_subprog2 *ctx)
        test_result_subprog2 = 1;
        return 0;
 }
+
+__u64 test_result_subprog3 = 0;
+SEC("fexit/test_pkt_access_subprog3")
+int BPF_PROG(test_subprog3, int val, struct sk_buff *skb, int ret)
+{
+       int len;
+
+       __builtin_preserve_access_index(({
+               len = skb->len;
+       }));
+       if (len != 74 || ret != 74 * val || val != 3)
+               return 0;
+       test_result_subprog3 = 1;
+       return 0;
+}
+
+__u64 test_get_skb_len = 0;
+SEC("freplace/get_skb_len")
+int new_get_skb_len(struct __sk_buff *skb)
+{
+       int len = skb->len;
+
+       if (len != 74)
+               return 0;
+       test_get_skb_len = 1;
+       return 74; /* original get_skb_len() returns skb->len */
+}
+
+__u64 test_get_skb_ifindex = 0;
+SEC("freplace/get_skb_ifindex")
+int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       void *data = (void *)(long)skb->data;
+       struct ipv6hdr ip6, *ip6p;
+       int ifindex = skb->ifindex;
+       __u32 eth_proto;
+       __u32 nh_off;
+
+       /* check that BPF extension can read packet via direct packet access */
+       if (data + 14 + sizeof(ip6) > data_end)
+               return 0;
+       ip6p = data + 14;
+
+       if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123))
+               return 0;
+
+       /* check that legacy packet access helper works too */
+       if (bpf_skb_load_bytes(skb, 14, &ip6, sizeof(ip6)) < 0)
+               return 0;
+       ip6p = &ip6;
+       if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123))
+               return 0;
+
+       if (ifindex != 1 || val != 3 || var != 1)
+               return 0;
+       test_get_skb_ifindex = 1;
+       return 3; /* original get_skb_ifindex() returns val * ifindex * var */
+}
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int new_get_constant(long val)
+{
+       if (val != 123)
+               return 0;
+       test_get_constant = 1;
+       return test_get_constant; /* original get_constant() returns val - 122 */
+}
 char _license[] SEC("license") = "GPL";
index ebc0ab7f0f5ca39724ba90766058d056e1797993..92f3fa47cf4016781b0ad1c03a4203719e078440 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_trace_helpers.h"
 
 struct sk_buff {
@@ -9,8 +9,9 @@ struct sk_buff {
 };
 
 __u64 test_result = 0;
-BPF_TRACE_2("fexit/test_pkt_md_access", test_main2,
-           struct sk_buff *, skb, int, ret)
+
+SEC("fexit/test_pkt_md_access")
+int BPF_PROG(test_main2, struct sk_buff *skb, int ret)
 {
        int len;
 
index 86db0d60fb6ed7bb5e2b1d8635cf4ab24bf1779f..348109b9ea07865452e5b99026851fdc962ae58e 100644 (file)
@@ -1,45 +1,47 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_trace_helpers.h"
 
 char _license[] SEC("license") = "GPL";
 
 __u64 test1_result = 0;
-BPF_TRACE_2("fexit/bpf_fentry_test1", test1, int, a, int, ret)
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(test1, int a, int ret)
 {
        test1_result = a == 1 && ret == 2;
        return 0;
 }
 
 __u64 test2_result = 0;
-BPF_TRACE_3("fexit/bpf_fentry_test2", test2, int, a, __u64, b, int, ret)
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a, __u64 b, int ret)
 {
        test2_result = a == 2 && b == 3 && ret == 5;
        return 0;
 }
 
 __u64 test3_result = 0;
-BPF_TRACE_4("fexit/bpf_fentry_test3", test3, char, a, int, b, __u64, c, int, ret)
+SEC("fexit/bpf_fentry_test3")
+int BPF_PROG(test3, char a, int b, __u64 c, int ret)
 {
        test3_result = a == 4 && b == 5 && c == 6 && ret == 15;
        return 0;
 }
 
 __u64 test4_result = 0;
-BPF_TRACE_5("fexit/bpf_fentry_test4", test4,
-           void *, a, char, b, int, c, __u64, d, int, ret)
+SEC("fexit/bpf_fentry_test4")
+int BPF_PROG(test4, void *a, char b, int c, __u64 d, int ret)
 {
-
        test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10 &&
                ret == 34;
        return 0;
 }
 
 __u64 test5_result = 0;
-BPF_TRACE_6("fexit/bpf_fentry_test5", test5,
-           __u64, a, void *, b, short, c, int, d, __u64, e, int, ret)
+SEC("fexit/bpf_fentry_test5")
+int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e, int ret)
 {
        test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
                e == 15 && ret == 65;
@@ -47,9 +49,8 @@ BPF_TRACE_6("fexit/bpf_fentry_test5", test5,
 }
 
 __u64 test6_result = 0;
-BPF_TRACE_7("fexit/bpf_fentry_test6", test6,
-           __u64, a, void *, b, short, c, int, d, void *, e, __u64, f,
-           int, ret)
+SEC("fexit/bpf_fentry_test6")
+int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret)
 {
        test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
                e == (void *)20 && f == 21 && ret == 111;
index 16c54ade6888c78930877bc373937d0dadc141c9..6b42db2fe391a435f6428d66e64d024bc6a3d956 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (c) 2018 Facebook
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_ARRAY);
index 974d6f3bb3199425268d33a9fe4ecb46edaccd91..8f48a909f079bd70bb20b95d4c5d4e35b3b0325d 100644 (file)
@@ -2,8 +2,8 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include <stdbool.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 #include "bpf_trace_helpers.h"
 
 char _license[] SEC("license") = "GPL";
@@ -57,8 +57,8 @@ struct meta {
 /* TRACE_EVENT(kfree_skb,
  *         TP_PROTO(struct sk_buff *skb, void *location),
  */
-BPF_TRACE_2("tp_btf/kfree_skb", trace_kfree_skb,
-           struct sk_buff *, skb, void *, location)
+SEC("tp_btf/kfree_skb")
+int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
 {
        struct net_device *dev;
        struct callback_head *ptr;
@@ -114,9 +114,9 @@ static volatile struct {
        bool fexit_test_ok;
 } result;
 
-BPF_TRACE_3("fentry/eth_type_trans", fentry_eth_type_trans,
-           struct sk_buff *, skb, struct net_device *, dev,
-           unsigned short, protocol)
+SEC("fentry/eth_type_trans")
+int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
+            unsigned short protocol)
 {
        int len, ifindex;
 
@@ -132,9 +132,9 @@ BPF_TRACE_3("fentry/eth_type_trans", fentry_eth_type_trans,
        return 0;
 }
 
-BPF_TRACE_3("fexit/eth_type_trans", fexit_eth_type_trans,
-           struct sk_buff *, skb, struct net_device *, dev,
-           unsigned short, protocol)
+SEC("fexit/eth_type_trans")
+int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
+            unsigned short protocol)
 {
        int len, ifindex;
 
index 40ac722a9da5e850363af912a264b2b6de4abdb3..50e66772c0467c67e18a91cddcb3f4b9745857ce 100644 (file)
@@ -6,8 +6,8 @@
 #include <stddef.h>
 #include <stdbool.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index bb80f29aa7f7fd746a1ea460227e8347f52975bd..947bb7e988c21d9c6a83591fc3477c8b98fde783 100644 (file)
@@ -6,8 +6,8 @@
 #include <stddef.h>
 #include <stdbool.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index 2b9165a7afe1903b6daebb2e9b8b30bc63c00249..76e93b31c14b95308d6995e685a7fb5e57d13986 100644 (file)
@@ -6,8 +6,8 @@
 #include <stddef.h>
 #include <stdbool.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index 650859022771b580fbbc53c056ada3993e3e0567..b35337926d665b3a07e49850ea8644403e0c83a4 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
index 28d1d668f07ce522ee67a9d780a595bf91658ec5..913791923fa3249b584611e2e69628811bc1beeb 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #define barrier() __asm__ __volatile__("": : :"memory")
 
 char _license[] SEC("license") = "GPL";
index 38a997852cadb2cad2e094b8415d7527a2d0ab80..d071adf178bd914bbbd0295f7024afbec65f1561 100644 (file)
@@ -2,7 +2,7 @@
 #include <linux/bpf.h>
 #include <linux/version.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "netcnt_common.h"
 
 #define MAX_BPS        (3 * 1024 * 1024)
index 71d383cc9b85f4465b37fcf1f633da412e4fe694..cc615b82b56e046081a8856ed4deb81b24b30597 100644 (file)
@@ -6,7 +6,7 @@
 #include <stddef.h>
 #include <stdbool.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define FUNCTION_NAME_LEN 64
 #define FILE_NAME_LEN 128
@@ -154,7 +154,12 @@ struct {
        __uint(value_size, sizeof(long long) * 127);
 } stackmap SEC(".maps");
 
-static __always_inline int __on_event(struct pt_regs *ctx)
+#ifdef GLOBAL_FUNC
+__attribute__((noinline))
+#else
+static __always_inline
+#endif
+int __on_event(struct bpf_raw_tracepoint_args *ctx)
 {
        uint64_t pid_tgid = bpf_get_current_pid_tgid();
        pid_t pid = (pid_t)(pid_tgid >> 32);
@@ -254,7 +259,7 @@ static __always_inline int __on_event(struct pt_regs *ctx)
 }
 
 SEC("raw_tracepoint/kfree_skb")
-int on_event(struct pt_regs* ctx)
+int on_event(struct bpf_raw_tracepoint_args* ctx)
 {
        int i, ret = 0;
        ret |= __on_event(ctx);
diff --git a/tools/testing/selftests/bpf/progs/pyperf_global.c b/tools/testing/selftests/bpf/progs/pyperf_global.c
new file mode 100644 (file)
index 0000000..079e78a
--- /dev/null
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define GLOBAL_FUNC
+#include "pyperf.h"
index 0756303676aca1d19b6bd491ecb36db4bba3d5be..1612a32007b6955ced5373a44b5ce3c2ff173dd1 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct bpf_map_def SEC("maps") htab = {
        .type = BPF_MAP_TYPE_HASH,
index a91536b1c47e37b7ffcac9d7a23b549708cdf2f5..092d9da536f3db6a755642eff0fea7a010e0c59e 100644 (file)
@@ -5,8 +5,8 @@
 #include <linux/bpf.h>
 #include <sys/socket.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define SRC1_IP4               0xAC100001U /* 172.16.0.1 */
 #define SRC2_IP4               0x00000000U
index a680628204108b41d0c046a5371cec9f2cfedc0b..255a432bc163873af3918b9c994fa20df56f182d 100644 (file)
@@ -5,8 +5,8 @@
 #include <linux/bpf.h>
 #include <sys/socket.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define SRC_REWRITE_IP6_0      0
 #define SRC_REWRITE_IP6_1      0
index e4440fdd94cbe7729b4e050c19a7ade8f34e9d74..0cb5656a22b010800e7927b04ef19dd618a7f564 100644 (file)
@@ -4,8 +4,8 @@
 #include <linux/bpf.h>
 #include <sys/socket.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 struct socket_cookie {
        __u64 cookie_key;
index 9390e02442595a39ddfda628b398a91b5b78eede..a5c6d5903b223f551620cdf1261a20223ee97352 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 
index e80484d98a1a52acaee62d1527353ad723de25d2..fdb4bf4408fa782c6f0e91aafd500d9905b0e7f5 100644 (file)
@@ -1,7 +1,7 @@
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 
index 433e23918a6217bbd96d37f3bf77c4f9ba9cd624..4797dc9850645f5b01895f82cc184b99c30a6c20 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 
index dede0fcd610234e0bcb7c52b7e692f28e17ccf77..c6d428a8d78574acb69e32cd53621e19105b099d 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 __u32 _version SEC("version") = 1;
index 4afd2595c08ec9f5933b842fcabf1665fe90f86a..9d8c212dde9f5f8bf60af4fee88361ef413d9986 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <netinet/in.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 __u32 _version SEC("version") = 1;
index 1bafbb944e37405191416406fe878ac2dffe5956..d5a5eeb5fb52798f2b989545fd2dff8e56ecbf0a 100644 (file)
@@ -3,7 +3,7 @@
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 __u32 _version SEC("version") = 1;
index 4bf16e0a1b0e63475166d01a04160fe6fe112993..ad61b722a9ded0c7e705dd90dc6407f477115660 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/types.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 typedef uint32_t pid_t;
 struct task_struct {};
index 63531e1a9fa4ee7c4e9db94ba68518a4c2230f11..1f407e65ae5256181bd675b585a677314917c2e8 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
index 21c85c477210b5ecddac5ee5ef744e4f89aad810..a093e739cf0ed7b8313996819db73b8dd17a05ab 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
index 1ecae198b8c10ff302853185d2e240382aa7a84a..cabda877cf0ab97f039ea17025b4cd16f9b574ac 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
index 49938875811933b086ab2a9080db473ea3f40057..f82075b47d7dc86e4ce77be01982b91a5ac270ac 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
index 49c64eb53f19017ec12a80899d7d7fa9a9cdab47..ce5450744fd4c98dcb7748432a25ecf61b2fda77 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
index 2cf813a06b836245e97322728c8951aca36a9405..0cb3204ddb1829fcd5fa85c9df643c90602d83e9 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 __u32 _version SEC("version") = 1;
index 4cd5e860c9035e16cf140694ae3186a2b8008eea..b7fc85769bdcf8e7b157a71800a98130533a30e5 100644 (file)
@@ -7,7 +7,7 @@
  */
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 221b69700625df25e68f452ef6f7c876a3938344..dd8fae6660ab6316c571ddfecc20f163da3c3f89 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int kprobe_res = 0;
 int kretprobe_res = 0;
index 62ad7e22105e269e855a86e37a25f3556450cbf3..88b0566da13d2654a5a682089f4f9054b7038a11 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2018 Facebook */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
 int _version SEC("version") = 1;
index fb8d91a1dbe0988938c7effe3305bc7c60f68aaa..a924e53c8e9d8b32ac9f8afa0f19e90fbc1a79fa 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2018 Facebook */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
 int _version SEC("version") = 1;
index 3f442204475947a123793a6d5577507544a848c7..983aedd1c07252b4b69ae1b100b6dc86cb2c509f 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2018 Facebook */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 9bfc91d9d00440b8a865e9b021cd16ec39d42d5a..3ac3603ad53d3fe3e6fbbc8d76023e5dd4b0ec63 100644 (file)
@@ -5,7 +5,7 @@
 #include <stdbool.h>
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* non-existing BPF helper, to test dead code elimination */
 static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;
index 053b86f6b53f43546925008030bd64fdf7839280..51b3f79df5235708220e66b9b926be8c714e3c32 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index edc0f7c9e56d7a555050b46d5bd5c6a6279d189e..56aec20212b59a5a0602e309fd990baab5554026 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 6c20e433558be96a7d71a97ba7c0209daabd5b87..ab1e647aeb317923fcc7c72e0a38fc29e6d2338c 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 1b7f0ae49cfbb314a4170123b6fc55a2248e864b..7e45e2bdf6cd23a275ef8af810f2792f6438f0bb 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index b5dbeef540fdfcae73f6087d0e6e2f639315328a..525acc2f841b8b913001046c97c1829db0aef777 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index c78ab6d28a14d81f6734f9ed42755dfd266ac63b..6b529073980621fe90f29fc23726ab9596066ecf 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 270de441b60aeefc365df224b6e57fb31e861f45..aba928fd60d34f2ddcac47f677e340846bad9b31 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 292a5c4ee76a159bbc8109d74e0991d10125896c..d5756dbdef82abb7f777262289a3e0d3e6ea6bf7 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 0b28bfacc8fd304f8d55b94602adbc63d533bed9..8b533db4a7a5ede26527feea541e05690f6b0e80 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 39279bf0c9db9271c03ea280d942bdd65c38f496..2b4b6d49c677ea11bf86b0090183290595dc4658 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index ea57973cdd19e47b2d5023b88fc408f5541a084b..2a8975678aa6d924fdfa817bc90038f320231079 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index d1eb59d4ea64ae2fe4a45b813385d0868ef2fbbf..ca61a5183b8811cb4a9dd2e8489f4e8364440811 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 9e091124d3bd8b938772db5a5eaab3e3166986d7..d7fb6cfc7891228ac7643dd5dae3181800b5afce 100644 (file)
@@ -3,8 +3,8 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
-#include "bpf_core_read.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char _license[] SEC("license") = "GPL";
 
index 6a4a8f57f17406b5fa03311dd6740971a0958583..29817a703984dee9883a86c5bb11db17fd9d0e35 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* Permit pretty deep stack traces */
 #define MAX_STACK_RAWTP 100
index 32a6073acb9900be48e750eb69089d9a9f519fa9..dd7a4d3dbc0d43376398063f728649090de4f1e9 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/pkt_cls.h>
 #include <string.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
new file mode 100644 (file)
index 0000000..880260f
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#ifndef MAX_STACK
+#define MAX_STACK (512 - 3 * 32 + 8)
+#endif
+
+static __attribute__ ((noinline))
+int f0(int var, struct __sk_buff *skb)
+{
+       return skb->len;
+}
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+       volatile char buf[MAX_STACK] = {};
+
+       return f0(0, skb) + skb->len;
+}
+
+int f3(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+       return f1(skb) + f3(val, skb, 1);
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+       volatile char buf[MAX_STACK] = {};
+
+       return skb->ifindex * val * var;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+       return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c
new file mode 100644 (file)
index 0000000..2c18d82
--- /dev/null
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#define MAX_STACK (512 - 3 * 32)
+#include "test_global_func1.c"
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
new file mode 100644 (file)
index 0000000..86f0ecb
--- /dev/null
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+       return skb->len;
+}
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+       return f1(skb) + val;
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+       return f2(var, skb) + val;
+}
+
+__attribute__ ((noinline))
+int f4(struct __sk_buff *skb)
+{
+       return f3(1, skb, 2);
+}
+
+__attribute__ ((noinline))
+int f5(struct __sk_buff *skb)
+{
+       return f4(skb);
+}
+
+__attribute__ ((noinline))
+int f6(struct __sk_buff *skb)
+{
+       return f5(skb);
+}
+
+__attribute__ ((noinline))
+int f7(struct __sk_buff *skb)
+{
+       return f6(skb);
+}
+
+#ifndef NO_FN8
+__attribute__ ((noinline))
+int f8(struct __sk_buff *skb)
+{
+       return f7(skb);
+}
+#endif
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+#ifndef NO_FN8
+       return f8(skb);
+#else
+       return f7(skb);
+#endif
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c
new file mode 100644 (file)
index 0000000..610f75e
--- /dev/null
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#define NO_FN8
+#include "test_global_func3.c"
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
new file mode 100644 (file)
index 0000000..260c25b
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+       return skb->len;
+}
+
+int f3(int, struct __sk_buff *skb);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+       return f1(skb) + f3(val, (void *)&val); /* type mismatch */
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb)
+{
+       return skb->ifindex * val;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+       return f1(skb) + f2(2, skb) + f3(3, skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c
new file mode 100644 (file)
index 0000000..69e19c6
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+       return skb->len;
+}
+
+int f3(int, struct __sk_buff *skb);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+       return f1(skb) + f3(val, skb + 1); /* type mismatch */
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb)
+{
+       return skb->ifindex * val;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+       return f1(skb) + f2(2, skb) + f3(3, skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
new file mode 100644 (file)
index 0000000..309b3f6
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+void foo(struct __sk_buff *skb)
+{
+       skb->tc_index = 0;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+       foo(skb);
+       return 0;
+}
index 1d652ee8e73d7f35344fc3698070281e75f65400..33493911d87a4fdf007f40131ef84770aa724e2b 100644 (file)
@@ -17,9 +17,9 @@
 #include <linux/icmpv6.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "test_iptunnel_common.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 
index 2e4efe70b1e57bf546e6c601c46ce708e1e36c47..28351936a438edef17bec29cfa7da5b21aef8811 100644 (file)
@@ -13,9 +13,9 @@
 #include <linux/icmpv6.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #include "test_iptunnel_common.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 
index 4147130cc3b7bc3d845838baa764f7a595cabbd4..7a6620671a8333cc8d4d31110591170d0df45c2b 100644 (file)
@@ -5,7 +5,7 @@
 
 #include <linux/bpf.h>
 #include <linux/lirc.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("lirc_mode2")
 int bpf_decoder(unsigned int *sample)
index c957d6dfe6d7d99714d1f1ad466e6187cf0a5d89..d6cb986e7533fe70a05336a643da5372d2db5a6d 100644 (file)
@@ -4,8 +4,8 @@
 #include <linux/bpf.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 struct grehdr {
        __be16 flags;
index 41a3ebcd593dc766cd81d81108a4b9888a3cf34b..48ff2b2ad5e767445444b784255a99957a3050ea 100644 (file)
@@ -3,8 +3,8 @@
 #include <errno.h>
 #include <linux/seg6_local.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 /* Packet parsing state machine helpers. */
 #define cursor_advance(_cursor, _len) \
index 113226115365a5c5a6a00c43816370ee21c0d972..1cfeb940cf9fb259da3eb9fa864465b21e72794d 100644 (file)
@@ -3,7 +3,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <linux/types.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
index bb7ce35f691b388a71f6735ed0780ec808e34f91..b5c07ae7b68f01d1012633bf49f80f23afdd8fef 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define VAR_NUM 16
 
index e808791b7047c78a86abba639076f7f37568807b..6239596cd14e635866829ff7f9668191b3b3f714 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
index 3d30c02bdae9fd765671c92f564935ff60b32804..98b9de2fafd00bc2886cf7142fc35c43ca3ed489 100644 (file)
@@ -4,7 +4,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* It is a dumb bpf program such that it must have no
  * issue to be loaded since testing the verifier is
index 96c0124a04ba51310ec732e71c3299dbe65a9fdd..bfe9fbcb968416685c29aeff79537723651f8e98 100644 (file)
@@ -1,39 +1,45 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
+#include <stdbool.h>
+#include <stddef.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <linux/ptrace.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 #include "bpf_trace_helpers.h"
 
+struct task_struct;
+
 SEC("kprobe/__set_task_comm")
-int prog1(struct pt_regs *ctx)
+int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec)
 {
-       return 0;
+       return !tsk;
 }
 
 SEC("kretprobe/__set_task_comm")
-int prog2(struct pt_regs *ctx)
+int BPF_KRETPROBE(prog2,
+                 struct task_struct *tsk, const char *buf, bool exec,
+                 int ret)
 {
-       return 0;
+       return !PT_REGS_PARM1(ctx) && ret;
 }
 
 SEC("raw_tp/task_rename")
 int prog3(struct bpf_raw_tracepoint_args *ctx)
 {
-       return 0;
+       return !ctx->args[0];
 }
 
-struct task_struct;
-BPF_TRACE_3("fentry/__set_task_comm", prog4,
-           struct task_struct *, tsk, const char *, buf, __u8, exec)
+SEC("fentry/__set_task_comm")
+int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec)
 {
-       return 0;
+       return !tsk;
 }
 
-BPF_TRACE_3("fexit/__set_task_comm", prog5,
-           struct task_struct *, tsk, const char *, buf, __u8, exec)
+SEC("fexit/__set_task_comm")
+int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
 {
-       return 0;
+       return !tsk;
 }
 
 char _license[] SEC("license") = "GPL";
index 07c09ca5546a651651e342b33b1b48c0d6b36620..ebfcc9f50c35bf1aa828330c8cb07d68f7a5d0a9 100644 (file)
@@ -3,7 +3,8 @@
 
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
 
 struct {
        __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -12,7 +13,7 @@ struct {
 } perf_buf_map SEC(".maps");
 
 SEC("kprobe/sys_nanosleep")
-int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+int BPF_KPROBE(handle_sys_nanosleep_entry)
 {
        int cpu = bpf_get_smp_processor_id();
 
index f20e7e00373f34270ee1db0bfca4e93b16f6cacf..4ef2630292b261de9ea78e41583bbe7fe337af28 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 51b38abe7ba1d4b449c227fa6d761bc161db3929..5412e0c732c7601c98600e4cea95e2336fcbf37e 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 3a7b4b607ed3175abd5983861aca55e5a72e0adb..e72eba4a93d21fc44481e364f236c585380fdefe 100644 (file)
@@ -11,8 +11,8 @@
 #include <linux/in.h>
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define barrier() __asm__ __volatile__("": : :"memory")
 int _version SEC("version") = 1;
@@ -47,6 +47,38 @@ int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb)
        return skb->len * val;
 }
 
+#define MAX_STACK (512 - 2 * 32)
+
+__attribute__ ((noinline))
+int get_skb_len(struct __sk_buff *skb)
+{
+       volatile char buf[MAX_STACK] = {};
+
+       return skb->len;
+}
+
+__attribute__ ((noinline))
+int get_constant(long val)
+{
+       return val - 122;
+}
+
+int get_skb_ifindex(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int test_pkt_access_subprog3(int val, struct __sk_buff *skb)
+{
+       return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123));
+}
+
+__attribute__ ((noinline))
+int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
+{
+       volatile char buf[MAX_STACK] = {};
+
+       return skb->ifindex * val * var;
+}
+
 SEC("classifier/test_pkt_access")
 int test_pkt_access(struct __sk_buff *skb)
 {
@@ -82,6 +114,8 @@ int test_pkt_access(struct __sk_buff *skb)
                return TC_ACT_SHOT;
        if (test_pkt_access_subprog2(2, skb) != skb->len * 2)
                return TC_ACT_SHOT;
+       if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
+               return TC_ACT_SHOT;
        if (tcp) {
                if (((void *)(tcp) + 20) > data_end || proto != 6)
                        return TC_ACT_SHOT;
index 1db2623021ad484b7dbb6e5b5a9ba7fbdb2b11b3..610c74ea9f648a30209897e8195eac542b5ab075 100644 (file)
@@ -5,7 +5,7 @@
 #include <string.h>
 #include <linux/bpf.h>
 #include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 1871e2ece0c40012cec4ea52b86a1fe8500d3ee5..d556b1572cc66dbd9d3885ab1024a7f328cfde1d 100644 (file)
@@ -5,13 +5,14 @@
 
 #include <netinet/in.h>
 
-#include "bpf_helpers.h"
-#include "bpf_tracing.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_trace_helpers.h"
 
 static struct sockaddr_in old;
 
 SEC("kprobe/__sys_connect")
-int handle_sys_connect(struct pt_regs *ctx)
+int BPF_KPROBE(handle_sys_connect)
 {
        void *ptr = (void *)PT_REGS_PARM2(ctx);
        struct sockaddr_in new;
index 0e014d3b2b36ccc56d94a87f9bf175bdde9b2fff..4dd9806ad73b5bae9c9153c43daa280a1b0f821a 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/if_ether.h>
 #include <linux/ip.h>
 #include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 52d94e8b214dc836c79a7becbd47184d23917b70..ecbeea2df25989faa3ea0682ff5ce13d06d0218f 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 static volatile const struct {
        unsigned a[4];
index 69880c1e7700cf620c06fab0b994fe14508447f1..a7278f064368e15e5354d6092b7e10deedddaaf7 100644 (file)
@@ -3,8 +3,8 @@
 #include <errno.h>
 #include <linux/seg6_local.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 /* Packet parsing state machine helpers. */
 #define cursor_advance(_cursor, _len) \
index b1f09f5bb1cfbd123c508c02c29a4369beacd96f..d69a1f2bbbfde5a074aa52819324d386035cafd6 100644 (file)
@@ -11,8 +11,8 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
 #include "test_select_reuseport_common.h"
 
 int _version SEC("version") = 1;
index 0e6be01157e682d91bc0da1e16bc5bdfef6912f8..1acc91e87bfc591da317fb96fb97bcf160079ae9 100644 (file)
@@ -2,46 +2,39 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, 1);
-       __type(key, __u32);
-       __type(value, __u64);
-} info_map SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, 1);
-       __type(key, __u32);
-       __type(value, __u64);
-} status_map SEC(".maps");
-
-SEC("send_signal_demo")
-int bpf_send_signal_test(void *ctx)
+#include <bpf/bpf_helpers.h>
+
+__u32 sig = 0, pid = 0, status = 0, signal_thread = 0;
+
+static __always_inline int bpf_send_signal_test(void *ctx)
 {
-       __u64 *info_val, *status_val;
-       __u32 key = 0, pid, sig;
        int ret;
 
-       status_val = bpf_map_lookup_elem(&status_map, &key);
-       if (!status_val || *status_val != 0)
-               return 0;
-
-       info_val = bpf_map_lookup_elem(&info_map, &key);
-       if (!info_val || *info_val == 0)
+       if (status != 0 || sig == 0 || pid == 0)
                return 0;
 
-       sig = *info_val >> 32;
-       pid = *info_val & 0xffffFFFF;
-
        if ((bpf_get_current_pid_tgid() >> 32) == pid) {
-               ret = bpf_send_signal(sig);
+               if (signal_thread)
+                       ret = bpf_send_signal_thread(sig);
+               else
+                       ret = bpf_send_signal(sig);
                if (ret == 0)
-                       *status_val = 1;
+                       status = 1;
        }
 
        return 0;
 }
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int send_signal_tp(void *ctx)
+{
+       return bpf_send_signal_test(ctx);
+}
+
+SEC("perf_event")
+int send_signal_perf(void *ctx)
+{
+       return bpf_send_signal_test(ctx);
+}
+
 char __license[] SEC("license") = "GPL";
index cb49ccb707d1fb6ef9c439abe1e896594ad6bc56..d2b38fa6a5b0fccc54829e20ee499fa066ea3b25 100644 (file)
@@ -12,8 +12,8 @@
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
 #include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
index 68cf9829f5a7ed4ea6828aabd486ab24460b1f2a..552f2090665c504b9c7274115982a98c59f9f819 100644 (file)
@@ -6,7 +6,7 @@
 
 #include <string.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define NUM_CGROUP_LEVELS      4
 
index e18da87fe84fa2c9bbe1e0f663ef263ef40ab841..202de3938494e35466d8941086fa24e7e54fbedb 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
index 4f69aac5635f1ada6f3e0932cb01b62e5df4ace1..de03a90f78caa13eba7fd369bee74c51b701ca57 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <stdbool.h>
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct s {
        int a;
index a47b003623efb41deea171aa3049468cb1a1863c..9bcaa37f476aa68be463a0fa3fba72272d69e9d6 100644 (file)
@@ -5,8 +5,8 @@
 #include <netinet/in.h>
 #include <stdbool.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 enum bpf_addr_array_idx {
        ADDR_SRV_IDX,
index a43b999c8da2ef1b584e35ae91d1d60b445d628e..0d31a3b3505f50ae35435e56f67fdbff858d1102 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct hmap_elem {
        volatile int cnt;
index f5638e26865d030804f60df8ea706ed81abdea76..0cf0134631b4ba6ed2a1a475f9d97b817616a24b 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (c) 2018 Facebook
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #ifndef PERF_MAX_STACK_DEPTH
 #define PERF_MAX_STACK_DEPTH         127
index 3b7e1dca882937d182c3b901993c7a9873d5a919..00ed486726204b8d45bb1d0bc4d8a1d7eab4649f 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (c) 2018 Facebook
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #ifndef PERF_MAX_STACK_DEPTH
 #define PERF_MAX_STACK_DEPTH         127
index d22e438198cf7724faf3013ff45548c904e1fdb9..458b0d69133e48b722c9feb252d897519ba37d46 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
index cb201cbe11e775dd4cf26bf6414fe1a0dcc91797..b2e6f9b0894d8833965a5e358471d72af1c9bc65 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
index 5cbbff416998c78fb889129bf90a5f7aafc53a7c..2d0b0b82a78a2174637b5443ffcd13b54609fa80 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/bpf.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
 #define MAX_ULONG_STR_LEN 0xF
index 0961415ba4773ee9d396b9246ed6f88d051cddc3..bf28814bfde5597a7a63b8ef93ed6051e30fe288 100644 (file)
@@ -7,8 +7,8 @@
 #include <linux/ip.h>
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 /* the maximum delay we are willing to add (drop packets beyond that) */
 #define TIME_HORIZON_NS (2000 * 1000 * 1000)
index 74370e7e286d5fe2f6c0b244a7fa7382242d2dfa..37bce7a7c3944663297d56c3b0a72f7c01f427b1 100644 (file)
@@ -17,8 +17,8 @@
 #include <linux/pkt_cls.h>
 #include <linux/types.h>
 
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
 
 static const int cfg_port = 8000;
 
index d8803dfa8d32f0016327c62dbffd35061e1c2598..47cbe2eeae431b85d407dd2e11e21e0e8316b58a 100644 (file)
@@ -13,8 +13,8 @@
 #include <sys/socket.h>
 #include <linux/tcp.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 struct bpf_map_def SEC("maps") results = {
        .type = BPF_MAP_TYPE_ARRAY,
index 87b7d934ce73d0cd0a57ca2e198db84bb8d06d28..adc83a54c35270deeae584363f06895a78eebff4 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/ipv6.h>
 #include <linux/version.h>
 #include <sys/socket.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
 #define TCP_ESTATS_MAGIC 0xBAADBEEF
index 7fa4595d2b66b777dec629c97d9a524cf96472be..1f1966e86e9f9e9a815a00e27b79eb83509cf4af 100644 (file)
@@ -10,8 +10,8 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 #include "test_tcpbpf.h"
 
 struct {
index 08346e7765d52c4b2bbb64d83f122ee9e749b7d4..ac63410bb541369b209921a3a50be09e5640b28b 100644 (file)
@@ -10,8 +10,8 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 #include "test_tcpnotify.h"
 
 struct {
index 04bf084517e0d045485276808558037739df16b0..4b825ee122cf8cc5d674e841214f42132e53368d 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (c) 2017 Facebook
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 struct sched_switch_args {
index 504df69c83df42381e5e1508ce3873bf388d0364..f48dbfe24ddc8bd9990157f3858effe2ce1fc094 100644 (file)
@@ -19,8 +19,8 @@
 #include <linux/socket.h>
 #include <linux/pkt_cls.h>
 #include <linux/erspan.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define ERROR(ret) do {\
                char fmt[] = "ERROR line:%d ret:%d\n";\
index f3236ce35f313492e3e10e5c46cafbaa9aa08021..d38153dab3ddb3452f7b36d57b7d88dc18c6d742 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #define ATTR __attribute__((noinline))
 #include "test_jhash.h"
 
index 9897150ed5167ff2bb19143d25683fe2065a7f86..f024154c7be7d9a54335121393e466c9d44acd84 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #define ATTR __always_inline
 #include "test_jhash.h"
 
index 1848da04ea412237d8432bb54f4ff532c4412d42..9beb5bf80373c64e83307cd6547f00e567cdfb50 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 #define ATTR __attribute__((noinline))
 #include "test_jhash.h"
 
index 0941c655b07bb13693675371332c2359e6c66127..31f9bce37491a6ca842f0ac3eca3c8ad49759e6b 100644 (file)
@@ -16,8 +16,8 @@
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
 #include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 #include "test_iptunnel_common.h"
 
 int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
new file mode 100644 (file)
index 0000000..cb8a04a
--- /dev/null
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+
+struct net_device {
+       /* Structure does not need to contain all entries,
+        * as "preserve_access_index" will use BTF to fix this...
+        */
+       int ifindex;
+} __attribute__((preserve_access_index));
+
+struct xdp_rxq_info {
+       /* Structure does not need to contain all entries,
+        * as "preserve_access_index" will use BTF to fix this...
+        */
+       struct net_device *dev;
+       __u32 queue_index;
+} __attribute__((preserve_access_index));
+
+struct xdp_buff {
+       void *data;
+       void *data_end;
+       void *data_meta;
+       void *data_hard_start;
+       unsigned long handle;
+       struct xdp_rxq_info *rxq;
+} __attribute__((preserve_access_index));
+
+__u64 test_result_fentry = 0;
+SEC("fentry/_xdp_tx_iptunnel")
+int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
+{
+       test_result_fentry = xdp->rxq->dev->ifindex;
+       return 0;
+}
+
+__u64 test_result_fexit = 0;
+SEC("fexit/_xdp_tx_iptunnel")
+int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret)
+{
+       test_result_fexit = ret;
+       return 0;
+}
index 97175f73c3fea4b55cf11a11b757d463cfced321..fcabcda30ba32086a1594ed21f5fbb77443ae475 100644 (file)
@@ -12,8 +12,8 @@
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
 #include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 #include "test_iptunnel_common.h"
 
 int _version SEC("version") = 1;
index 8d01826506530917554278584cd8da09365d1d35..a7c4a7d49fe6b54b5841e36bcbe9ae9c8577df38 100644 (file)
@@ -2,7 +2,7 @@
 #include <linux/if_ether.h>
 #include <linux/pkt_cls.h>
 
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 #define __round_mask(x, y) ((__typeof__(x))((y) - 1))
 #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
index e88d7b9d65abf808eb839b60b72998ff4fc67256..8beecec166d9d78a94f1a482522b06e750434892 100644 (file)
@@ -13,8 +13,8 @@
 #include <linux/icmpv6.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 static __u32 rol32(__u32 word, unsigned int shift)
 {
@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
        return c;
 }
 
-static __attribute__ ((noinline))
+__attribute__ ((noinline))
 u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 {
        a += initval;
@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
        return c;
 }
 
-static __attribute__ ((noinline))
+__attribute__ ((noinline))
 u32 jhash_2words(u32 a, u32 b, u32 initval)
 {
        return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
index ef9e704be140972a94f3669092ede0c35e5bcb69..a5337cd9400bebae5dcb1915e4f29e433bac2d86 100644 (file)
@@ -10,7 +10,7 @@
  * General Public License for more details.
  */
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 int _version SEC("version") = 1;
 
index 365a7d2d9f5c46ea4de8848549ada8c28527665d..134768f6b788d52617dba9aa866184fd9f604d08 100644 (file)
@@ -22,8 +22,8 @@
 #include <linux/in.h>
 #include <linux/pkt_cls.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
  *
index 43b0ef1001ede9c415eb43854956ade4987fa317..ea25e88819928dda30a8a81b21adaba1d07b3919 100644 (file)
@@ -2,7 +2,7 @@
 
 #define KBUILD_MODNAME "xdp_dummy"
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("xdp_dummy")
 int xdp_dummy_prog(struct xdp_md *ctx)
index 1c5f298d7196185c0a270cd2212bb865f45cebdf..d037262c893711764c6a1beb1deaae63ffc6803f 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 struct {
        __uint(type, BPF_MAP_TYPE_DEVMAP);
index 57912e7c94b0abd395e110b35d58341b2d21382f..94e6c2b281cb6ba98ea98bcda587976ad1427a5d 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
 
 SEC("tx")
 int xdp_tx(struct xdp_md *xdp)
index 112a2857f4e274d7a9878303698ac90c3046d980..6b9ca40bd1f4f8a80c9c8b93f6f59dfc8f76d5a8 100644 (file)
@@ -12,8 +12,8 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #include "xdping.h"
 
index 3d617e806054803e332d8530447c6de1ecfbbc95..93040ca83e604963044dc685634654af831274c6 100644 (file)
@@ -4148,10 +4148,6 @@ static int do_test_file(unsigned int test_num)
        if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
                return PTR_ERR(obj);
 
-       err = bpf_object__btf_fd(obj);
-       if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
-               goto done;
-
        prog = bpf_program__next(NULL, obj);
        if (CHECK(!prog, "Cannot find bpf_prog")) {
                err = -1;
index 6fe23a10d48aa5f58e6e78456b8a32eed5a7eaf3..a8d2e9a87fbfa6a38f121728d7d9fa23a93abdce 100644 (file)
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 #include <iostream>
-#include "libbpf.h"
-#include "bpf.h"
-#include "btf.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
 #include "test_core_extern.skel.h"
 
 /* do nothing, just make sure we can link successfully */
index b64094c981e394b7872cccbbcf7181d605421f15..c490e012c23f7658f48200fba2121e679ab8f100 100644 (file)
@@ -8,7 +8,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <linux/err.h>
-#include "hashmap.h"
+#include "bpf/hashmap.h"
 
 #define CHECK(condition, format...) ({                                 \
        int __ret = !!(condition);                                      \
index 7fa7d08a8104d3359124f753c5a533e7c22d3070..bab1e6f1d8f139f301acc057fb28804d1ca56410 100644 (file)
@@ -8,7 +8,7 @@
 #include <string.h>
 
 /* defined in test_progs.h */
-struct test_env env;
+struct test_env env = {};
 
 struct prog_test_def {
        const char *test_name;
@@ -29,10 +29,19 @@ struct prog_test_def {
 
 static bool should_run(struct test_selector *sel, int num, const char *name)
 {
-       if (sel->name && sel->name[0] && !strstr(name, sel->name))
-               return false;
+       int i;
+
+       for (i = 0; i < sel->blacklist.cnt; i++) {
+               if (strstr(name, sel->blacklist.strs[i]))
+                       return false;
+       }
 
-       if (!sel->num_set)
+       for (i = 0; i < sel->whitelist.cnt; i++) {
+               if (strstr(name, sel->whitelist.strs[i]))
+                       return true;
+       }
+
+       if (!sel->whitelist.cnt && !sel->num_set)
                return true;
 
        return num < sel->num_set_len && sel->num_set[num];
@@ -334,6 +343,7 @@ const char argp_program_doc[] = "BPF selftests test runner";
 enum ARG_KEYS {
        ARG_TEST_NUM = 'n',
        ARG_TEST_NAME = 't',
+       ARG_TEST_NAME_BLACKLIST = 'b',
        ARG_VERIFIER_STATS = 's',
        ARG_VERBOSE = 'v',
 };
@@ -341,8 +351,10 @@ enum ARG_KEYS {
 static const struct argp_option opts[] = {
        { "num", ARG_TEST_NUM, "NUM", 0,
          "Run test number NUM only " },
-       { "name", ARG_TEST_NAME, "NAME", 0,
-         "Run tests with names containing NAME" },
+       { "name", ARG_TEST_NAME, "NAMES", 0,
+         "Run tests with names containing any string from NAMES list" },
+       { "name-blacklist", ARG_TEST_NAME_BLACKLIST, "NAMES", 0,
+         "Don't run tests with names containing any string from NAMES list" },
        { "verifier-stats", ARG_VERIFIER_STATS, NULL, 0,
          "Output verifier statistics", },
        { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
@@ -359,6 +371,41 @@ static int libbpf_print_fn(enum libbpf_print_level level,
        return 0;
 }
 
+static int parse_str_list(const char *s, struct str_set *set)
+{
+       char *input, *state = NULL, *next, **tmp, **strs = NULL;
+       int cnt = 0;
+
+       input = strdup(s);
+       if (!input)
+               return -ENOMEM;
+
+       set->cnt = 0;
+       set->strs = NULL;
+
+       while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+               tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
+               if (!tmp)
+                       goto err;
+               strs = tmp;
+
+               strs[cnt] = strdup(next);
+               if (!strs[cnt])
+                       goto err;
+
+               cnt++;
+       }
+
+       set->cnt = cnt;
+       set->strs = (const char **)strs;
+       free(input);
+       return 0;
+err:
+       free(strs);
+       free(input);
+       return -ENOMEM;
+}
+
 int parse_num_list(const char *s, struct test_selector *sel)
 {
        int i, set_len = 0, num, start = 0, end = -1;
@@ -449,12 +496,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 
                if (subtest_str) {
                        *subtest_str = '\0';
-                       env->subtest_selector.name = strdup(subtest_str + 1);
-                       if (!env->subtest_selector.name)
+                       if (parse_str_list(subtest_str + 1,
+                                          &env->subtest_selector.whitelist))
+                               return -ENOMEM;
+               }
+               if (parse_str_list(arg, &env->test_selector.whitelist))
+                       return -ENOMEM;
+               break;
+       }
+       case ARG_TEST_NAME_BLACKLIST: {
+               char *subtest_str = strchr(arg, '/');
+
+               if (subtest_str) {
+                       *subtest_str = '\0';
+                       if (parse_str_list(subtest_str + 1,
+                                          &env->subtest_selector.blacklist))
                                return -ENOMEM;
                }
-               env->test_selector.name = strdup(arg);
-               if (!env->test_selector.name)
+               if (parse_str_list(arg, &env->test_selector.blacklist))
                        return -ENOMEM;
                break;
        }
@@ -617,7 +676,11 @@ int main(int argc, char **argv)
        printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
               env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
+       free(env.test_selector.blacklist.strs);
+       free(env.test_selector.whitelist.strs);
        free(env.test_selector.num_set);
+       free(env.subtest_selector.blacklist.strs);
+       free(env.subtest_selector.whitelist.strs);
        free(env.subtest_selector.num_set);
 
        return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
index de1fdaa4e7b4309b4c832642b34ff0770c6dee4d..bcfa9ef23fda9f78f4d3ff40a3d7e3173c5b772d 100644 (file)
@@ -35,7 +35,7 @@ typedef __u16 __sum16;
 
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
 #include "trace_helpers.h"
 #include "flow_dissector_load.h"
 
@@ -46,8 +46,14 @@ enum verbosity {
        VERBOSE_SUPER,
 };
 
+struct str_set {
+       const char **strs;
+       int cnt;
+};
+
 struct test_selector {
-       const char *name;
+       struct str_set whitelist;
+       struct str_set blacklist;
        bool *num_set;
        int num_set_len;
 };
index 0e6652733462377cfe7bdc0fa57b5548c39f2e0f..52bf14955797d18d8f220a92a475b19c4191d4e2 100644 (file)
@@ -13,7 +13,7 @@
 #include <bpf/bpf.h>
 
 #include "cgroup_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
 
index d008b41b7d8d98daea38b4a579e1f38bc5e800f8..9b4d3a68a91a2f94d9c7e861c865567597ba5077 100644 (file)
@@ -12,8 +12,8 @@
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
 #include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 /* Sockmap sample program connects a client and a backend together
  * using cgroups.
index 40bd93a6e7aeb312fddb050dab65c0fe8a390f3b..d196e2a4a6e04d3be7b090bf4723218f57ad2f01 100644 (file)
@@ -13,7 +13,7 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
index aa4dcfe180508dd3b2fef12cf9d7cf32ccc5d13a..0383c9b8adc187271af96204e62251874baccdf3 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef __TRACE_HELPER_H
 #define __TRACE_HELPER_H
 
-#include <libbpf.h>
+#include <bpf/libbpf.h>
 
 struct ksym {
        long addr;