bpf: Implement minimal BPF perf link
authorAndrii Nakryiko <andrii@kernel.org>
Sun, 15 Aug 2021 07:05:57 +0000 (00:05 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Mon, 16 Aug 2021 22:45:07 +0000 (00:45 +0200)
Introduce a new type of BPF link - BPF perf link. This brings perf_event-based
BPF program attachments (perf_event, tracepoints, kprobes, and uprobes) into
the common BPF link infrastructure, allowing to list all active perf_event
based attachments, auto-detaching BPF program from perf_event when link's FD
is closed, get generic BPF link fdinfo/get_info functionality.

BPF_LINK_CREATE command expects perf_event's FD as target_fd. No extra flags
are currently supported.

Force-detaching and atomic BPF program updates are not yet implemented, but
with perf_event-based BPF links we now have common framework for this without
the need to extend ioctl()-based perf_event interface.

One interesting consideration is a new value for bpf_attach_type, which
BPF_LINK_CREATE command expects. Generally, it's either 1-to-1 mapping from
bpf_attach_type to bpf_prog_type, or many-to-1 mapping from a subset of
bpf_attach_types to one bpf_prog_type (e.g., see BPF_PROG_TYPE_SK_SKB or
BPF_PROG_TYPE_CGROUP_SOCK). In this case, though, we have three different
program types (KPROBE, TRACEPOINT, PERF_EVENT) using the same perf_event-based
mechanism, so it's many bpf_prog_types to one bpf_attach_type. I chose to
define a single BPF_PERF_EVENT attach type for all of them and adjust
link_create()'s logic for checking correspondence between attach type and
program type.

The alternative would be to define three new attach types (e.g., BPF_KPROBE,
BPF_TRACEPOINT, and BPF_PERF_EVENT), but that seemed like unnecessary overkill
and BPF_KPROBE will cause naming conflicts with BPF_KPROBE() macro, defined by
libbpf. I chose to not do this to avoid unnecessary proliferation of
bpf_attach_type enum values and not have to deal with naming conflicts.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/bpf/20210815070609.987780-5-andrii@kernel.org
include/linux/bpf_types.h
include/linux/trace_events.h
include/uapi/linux/bpf.h
kernel/bpf/syscall.c
kernel/events/core.c
tools/include/uapi/linux/bpf.h

index ae3ac3a2018caef521a75c7c4b4b49bccd73cc7f..9c81724e4b9850657ac484c9f5322b2fba905429 100644 (file)
@@ -136,3 +136,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
 BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
 BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
 #endif
+#ifdef CONFIG_PERF_EVENTS
+BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
+#endif
index ad413b382a3cab47a38263c3be6cd5c1a6ade72e..8ac92560d3a3a540aa3aa9bba0757ca9db98c007 100644 (file)
@@ -803,6 +803,9 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
 void perf_trace_buf_update(void *record, u16 type);
 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
+void perf_event_free_bpf_prog(struct perf_event *event);
+
 void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
 void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
 void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
index 2db6925e04f4ffb33cead69b9774b0cf7ad77660..94fe8329b28f77e0675b2c6521105809f51b570d 100644 (file)
@@ -993,6 +993,7 @@ enum bpf_attach_type {
        BPF_SK_SKB_VERDICT,
        BPF_SK_REUSEPORT_SELECT,
        BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+       BPF_PERF_EVENT,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -1006,6 +1007,7 @@ enum bpf_link_type {
        BPF_LINK_TYPE_ITER = 4,
        BPF_LINK_TYPE_NETNS = 5,
        BPF_LINK_TYPE_XDP = 6,
+       BPF_LINK_TYPE_PERF_EVENT = 7,
 
        MAX_BPF_LINK_TYPE,
 };
index 9a2068e39d232061cb5b6b958f310ba18d4efff6..80c03bedd6e6312dcaa0c1d4624d70321463da52 100644 (file)
@@ -2906,6 +2906,79 @@ static const struct bpf_link_ops bpf_raw_tp_link_lops = {
        .fill_link_info = bpf_raw_tp_link_fill_link_info,
 };
 
+#ifdef CONFIG_PERF_EVENTS
+struct bpf_perf_link {
+       struct bpf_link link;
+       struct file *perf_file;
+};
+
+static void bpf_perf_link_release(struct bpf_link *link)
+{
+       struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+       struct perf_event *event = perf_link->perf_file->private_data;
+
+       perf_event_free_bpf_prog(event);
+       fput(perf_link->perf_file);
+}
+
+static void bpf_perf_link_dealloc(struct bpf_link *link)
+{
+       struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+
+       kfree(perf_link);
+}
+
+static const struct bpf_link_ops bpf_perf_link_lops = {
+       .release = bpf_perf_link_release,
+       .dealloc = bpf_perf_link_dealloc,
+};
+
+static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+       struct bpf_link_primer link_primer;
+       struct bpf_perf_link *link;
+       struct perf_event *event;
+       struct file *perf_file;
+       int err;
+
+       if (attr->link_create.flags)
+               return -EINVAL;
+
+       perf_file = perf_event_get(attr->link_create.target_fd);
+       if (IS_ERR(perf_file))
+               return PTR_ERR(perf_file);
+
+       link = kzalloc(sizeof(*link), GFP_USER);
+       if (!link) {
+               err = -ENOMEM;
+               goto out_put_file;
+       }
+       bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
+       link->perf_file = perf_file;
+
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
+               kfree(link);
+               goto out_put_file;
+       }
+
+       event = perf_file->private_data;
+       err = perf_event_set_bpf_prog(event, prog);
+       if (err) {
+               bpf_link_cleanup(&link_primer);
+               goto out_put_file;
+       }
+       /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
+       bpf_prog_inc(prog);
+
+       return bpf_link_settle(&link_primer);
+
+out_put_file:
+       fput(perf_file);
+       return err;
+}
+#endif /* CONFIG_PERF_EVENTS */
+
 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
 
 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
@@ -4147,15 +4220,26 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
        if (ret)
                goto out;
 
-       if (prog->type == BPF_PROG_TYPE_EXT) {
+       switch (prog->type) {
+       case BPF_PROG_TYPE_EXT:
                ret = tracing_bpf_link_attach(attr, uattr, prog);
                goto out;
-       }
-
-       ptype = attach_type_to_prog_type(attr->link_create.attach_type);
-       if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
-               ret = -EINVAL;
-               goto out;
+       case BPF_PROG_TYPE_PERF_EVENT:
+       case BPF_PROG_TYPE_KPROBE:
+       case BPF_PROG_TYPE_TRACEPOINT:
+               if (attr->link_create.attach_type != BPF_PERF_EVENT) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               ptype = prog->type;
+               break;
+       default:
+               ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+               if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               break;
        }
 
        switch (ptype) {
@@ -4179,6 +4263,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
        case BPF_PROG_TYPE_XDP:
                ret = bpf_xdp_link_attach(attr, prog);
                break;
+#endif
+#ifdef CONFIG_PERF_EVENTS
+       case BPF_PROG_TYPE_PERF_EVENT:
+       case BPF_PROG_TYPE_TRACEPOINT:
+       case BPF_PROG_TYPE_KPROBE:
+               ret = bpf_perf_link_attach(attr, prog);
+               break;
 #endif
        default:
                ret = -EINVAL;
index 2f07718bd41ca5af3e81657bc503a355d64d5344..9fd65667bcb2df458fc1e5d2f0f6fb131b3e9aba 100644 (file)
@@ -4697,7 +4697,6 @@ errout:
 }
 
 static void perf_event_free_filter(struct perf_event *event);
-static void perf_event_free_bpf_prog(struct perf_event *event);
 
 static void free_event_rcu(struct rcu_head *head)
 {
@@ -5574,7 +5573,6 @@ static inline int perf_fget_light(int fd, struct fd *p)
 static int perf_event_set_output(struct perf_event *event,
                                 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
-static int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
 static int perf_copy_attr(struct perf_event_attr __user *uattr,
                          struct perf_event_attr *attr);
 
@@ -10013,7 +10011,7 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
        return false;
 }
 
-static int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
 {
        bool is_kprobe, is_tracepoint, is_syscall_tp;
 
@@ -10047,7 +10045,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *pr
        return perf_event_attach_bpf_prog(event, prog);
 }
 
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
 {
        if (!perf_event_is_tracing(event)) {
                perf_event_free_bpf_handler(event);
@@ -10066,12 +10064,12 @@ static void perf_event_free_filter(struct perf_event *event)
 {
 }
 
-static int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
 {
        return -ENOENT;
 }
 
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
 {
 }
 #endif /* CONFIG_EVENT_TRACING */
index 2db6925e04f4ffb33cead69b9774b0cf7ad77660..94fe8329b28f77e0675b2c6521105809f51b570d 100644 (file)
@@ -993,6 +993,7 @@ enum bpf_attach_type {
        BPF_SK_SKB_VERDICT,
        BPF_SK_REUSEPORT_SELECT,
        BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+       BPF_PERF_EVENT,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -1006,6 +1007,7 @@ enum bpf_link_type {
        BPF_LINK_TYPE_ITER = 4,
        BPF_LINK_TYPE_NETNS = 5,
        BPF_LINK_TYPE_XDP = 6,
+       BPF_LINK_TYPE_PERF_EVENT = 7,
 
        MAX_BPF_LINK_TYPE,
 };