Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[sfrench/cifs-2.6.git] / tools / lib / bpf / libbpf.c
index 0f9f06df49bce5bf16c6138ce403b21b1e4b2579..cbdf34a6fb9379fa241629a7cd34d3b48ba3ebb4 100644 (file)
@@ -31,6 +31,7 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
+#include <perf-sys.h>
 #include <asm/unistd.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -45,6 +46,7 @@
 
 #include "libbpf.h"
 #include "bpf.h"
+#include "btf.h"
 
 #ifndef EM_BPF
 #define EM_BPF 247
@@ -176,6 +178,7 @@ struct bpf_program {
        /* Index in elf obj file, for relocation use. */
        int idx;
        char *name;
+       int prog_ifindex;
        char *section_name;
        struct bpf_insn *insns;
        size_t insns_cnt, main_prog_cnt;
@@ -211,7 +214,10 @@ struct bpf_map {
        int fd;
        char *name;
        size_t offset;
+       int map_ifindex;
        struct bpf_map_def def;
+       uint32_t btf_key_id;
+       uint32_t btf_value_id;
        void *priv;
        bpf_map_clear_priv_t clear_priv;
 };
@@ -256,6 +262,8 @@ struct bpf_object {
         */
        struct list_head list;
 
+       struct btf *btf;
+
        void *priv;
        bpf_object_clear_priv_t clear_priv;
 
@@ -819,7 +827,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                                                        data->d_size);
                else if (strcmp(name, "maps") == 0)
                        obj->efile.maps_shndx = idx;
-               else if (sh.sh_type == SHT_SYMTAB) {
+               else if (strcmp(name, BTF_ELF_SEC) == 0) {
+                       obj->btf = btf__new(data->d_buf, data->d_size,
+                                           __pr_debug);
+                       if (IS_ERR(obj->btf)) {
+                               pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+                                          BTF_ELF_SEC, PTR_ERR(obj->btf));
+                               obj->btf = NULL;
+                       }
+               } else if (sh.sh_type == SHT_SYMTAB) {
                        if (obj->efile.symbols) {
                                pr_warning("bpf: multiple SYMTAB in %s\n",
                                           obj->path);
@@ -996,33 +1012,127 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
        return 0;
 }
 
+static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+{
+       struct bpf_map_def *def = &map->def;
+       const size_t max_name = 256;
+       int64_t key_size, value_size;
+       int32_t key_id, value_id;
+       char name[max_name];
+
+       /* Find key type by name from BTF */
+       if (snprintf(name, max_name, "%s_key", map->name) == max_name) {
+               pr_warning("map:%s length of BTF key_type:%s_key is too long\n",
+                          map->name, map->name);
+               return -EINVAL;
+       }
+
+       key_id = btf__find_by_name(btf, name);
+       if (key_id < 0) {
+               pr_debug("map:%s key_type:%s cannot be found in BTF\n",
+                        map->name, name);
+               return key_id;
+       }
+
+       key_size = btf__resolve_size(btf, key_id);
+       if (key_size < 0) {
+               pr_warning("map:%s key_type:%s cannot get the BTF type_size\n",
+                          map->name, name);
+               return key_size;
+       }
+
+       if (def->key_size != key_size) {
+               pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
+                          map->name, name, key_size, def->key_size);
+               return -EINVAL;
+       }
+
+       /* Find value type from BTF */
+       if (snprintf(name, max_name, "%s_value", map->name) == max_name) {
+               pr_warning("map:%s length of BTF value_type:%s_value is too long\n",
+                         map->name, map->name);
+               return -EINVAL;
+       }
+
+       value_id = btf__find_by_name(btf, name);
+       if (value_id < 0) {
+               pr_debug("map:%s value_type:%s cannot be found in BTF\n",
+                        map->name, name);
+               return value_id;
+       }
+
+       value_size = btf__resolve_size(btf, value_id);
+       if (value_size < 0) {
+               pr_warning("map:%s value_type:%s cannot get the BTF type_size\n",
+                          map->name, name);
+               return value_size;
+       }
+
+       if (def->value_size != value_size) {
+               pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
+                          map->name, name, value_size, def->value_size);
+               return -EINVAL;
+       }
+
+       map->btf_key_id = key_id;
+       map->btf_value_id = value_id;
+
+       return 0;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
+       struct bpf_create_map_attr create_attr = {};
        unsigned int i;
+       int err;
 
        for (i = 0; i < obj->nr_maps; i++) {
-               struct bpf_map_def *def = &obj->maps[i].def;
-               int *pfd = &obj->maps[i].fd;
-
-               *pfd = bpf_create_map_name(def->type,
-                                          obj->maps[i].name,
-                                          def->key_size,
-                                          def->value_size,
-                                          def->max_entries,
-                                          def->map_flags);
+               struct bpf_map *map = &obj->maps[i];
+               struct bpf_map_def *def = &map->def;
+               int *pfd = &map->fd;
+
+               create_attr.name = map->name;
+               create_attr.map_ifindex = map->map_ifindex;
+               create_attr.map_type = def->type;
+               create_attr.map_flags = def->map_flags;
+               create_attr.key_size = def->key_size;
+               create_attr.value_size = def->value_size;
+               create_attr.max_entries = def->max_entries;
+               create_attr.btf_fd = 0;
+               create_attr.btf_key_id = 0;
+               create_attr.btf_value_id = 0;
+
+               if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+                       create_attr.btf_fd = btf__fd(obj->btf);
+                       create_attr.btf_key_id = map->btf_key_id;
+                       create_attr.btf_value_id = map->btf_value_id;
+               }
+
+               *pfd = bpf_create_map_xattr(&create_attr);
+               if (*pfd < 0 && create_attr.btf_key_id) {
+                       pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+                                  map->name, strerror(errno), errno);
+                       create_attr.btf_fd = 0;
+                       create_attr.btf_key_id = 0;
+                       create_attr.btf_value_id = 0;
+                       map->btf_key_id = 0;
+                       map->btf_value_id = 0;
+                       *pfd = bpf_create_map_xattr(&create_attr);
+               }
+
                if (*pfd < 0) {
                        size_t j;
-                       int err = *pfd;
 
+                       err = *pfd;
                        pr_warning("failed to create map (name: '%s'): %s\n",
-                                  obj->maps[i].name,
+                                  map->name,
                                   strerror(errno));
                        for (j = 0; j < i; j++)
                                zclose(obj->maps[j].fd);
                        return err;
                }
-               pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd);
+               pr_debug("create map %s: fd=%d\n", map->name, *pfd);
        }
 
        return 0;
@@ -1166,7 +1276,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 static int
 load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
             const char *name, struct bpf_insn *insns, int insns_cnt,
-            char *license, u32 kern_version, int *pfd)
+            char *license, u32 kern_version, int *pfd, int prog_ifindex)
 {
        struct bpf_load_program_attr load_attr;
        char *log_buf;
@@ -1180,6 +1290,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
        load_attr.insns_cnt = insns_cnt;
        load_attr.license = license;
        load_attr.kern_version = kern_version;
+       load_attr.prog_ifindex = prog_ifindex;
 
        if (!load_attr.insns || !load_attr.insns_cnt)
                return -EINVAL;
@@ -1261,7 +1372,8 @@ bpf_program__load(struct bpf_program *prog,
                }
                err = load_program(prog->type, prog->expected_attach_type,
                                   prog->name, prog->insns, prog->insns_cnt,
-                                  license, kern_version, &fd);
+                                  license, kern_version, &fd,
+                                  prog->prog_ifindex);
                if (!err)
                        prog->instances.fds[0] = fd;
                goto out;
@@ -1292,7 +1404,8 @@ bpf_program__load(struct bpf_program *prog,
                err = load_program(prog->type, prog->expected_attach_type,
                                   prog->name, result.new_insn_ptr,
                                   result.new_insn_cnt,
-                                  license, kern_version, &fd);
+                                  license, kern_version, &fd,
+                                  prog->prog_ifindex);
 
                if (err) {
                        pr_warning("Loading the %dth instance of program '%s' failed\n",
@@ -1331,9 +1444,37 @@ bpf_object__load_progs(struct bpf_object *obj)
        return 0;
 }
 
-static int bpf_object__validate(struct bpf_object *obj)
+static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
+{
+       switch (type) {
+       case BPF_PROG_TYPE_SOCKET_FILTER:
+       case BPF_PROG_TYPE_SCHED_CLS:
+       case BPF_PROG_TYPE_SCHED_ACT:
+       case BPF_PROG_TYPE_XDP:
+       case BPF_PROG_TYPE_CGROUP_SKB:
+       case BPF_PROG_TYPE_CGROUP_SOCK:
+       case BPF_PROG_TYPE_LWT_IN:
+       case BPF_PROG_TYPE_LWT_OUT:
+       case BPF_PROG_TYPE_LWT_XMIT:
+       case BPF_PROG_TYPE_SOCK_OPS:
+       case BPF_PROG_TYPE_SK_SKB:
+       case BPF_PROG_TYPE_CGROUP_DEVICE:
+       case BPF_PROG_TYPE_SK_MSG:
+       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+               return false;
+       case BPF_PROG_TYPE_UNSPEC:
+       case BPF_PROG_TYPE_KPROBE:
+       case BPF_PROG_TYPE_TRACEPOINT:
+       case BPF_PROG_TYPE_PERF_EVENT:
+       case BPF_PROG_TYPE_RAW_TRACEPOINT:
+       default:
+               return true;
+       }
+}
+
+static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
 {
-       if (obj->kern_version == 0) {
+       if (needs_kver && obj->kern_version == 0) {
                pr_warning("%s doesn't provide kernel version\n",
                           obj->path);
                return -LIBBPF_ERRNO__KVERSION;
@@ -1342,7 +1483,8 @@ static int bpf_object__validate(struct bpf_object *obj)
 }
 
 static struct bpf_object *
-__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
+__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
+                  bool needs_kver)
 {
        struct bpf_object *obj;
        int err;
@@ -1360,7 +1502,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
        CHECK_ERR(bpf_object__check_endianness(obj), err, out);
        CHECK_ERR(bpf_object__elf_collect(obj), err, out);
        CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
-       CHECK_ERR(bpf_object__validate(obj), err, out);
+       CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
 
        bpf_object__elf_finish(obj);
        return obj;
@@ -1377,7 +1519,7 @@ struct bpf_object *bpf_object__open(const char *path)
 
        pr_debug("loading %s\n", path);
 
-       return __bpf_object__open(path, NULL, 0);
+       return __bpf_object__open(path, NULL, 0, true);
 }
 
 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
@@ -1400,7 +1542,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
        pr_debug("loading object '%s' from buffer\n",
                 name);
 
-       return __bpf_object__open(name, obj_buf, obj_buf_sz);
+       return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -1641,6 +1783,7 @@ void bpf_object__close(struct bpf_object *obj)
 
        bpf_object__elf_finish(obj);
        bpf_object__unload(obj);
+       btf__free(obj->btf);
 
        for (i = 0; i < obj->nr_maps; i++) {
                zfree(&obj->maps[i].name);
@@ -1692,6 +1835,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
        return obj ? obj->kern_version : 0;
 }
 
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+       return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
                         bpf_object_clear_priv_t clear_priv)
 {
@@ -1845,11 +1993,12 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
-                                                enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+                                          enum bpf_attach_type type)
 {
        prog->expected_attach_type = type;
 }
@@ -1859,6 +2008,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 
 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
 
+#define BPF_S_PROG_SEC(string, ptype) \
+       BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
+
 #define BPF_SA_PROG_SEC(string, ptype) \
        BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
 
@@ -1874,6 +2026,7 @@ static const struct {
        BPF_PROG_SEC("classifier",      BPF_PROG_TYPE_SCHED_CLS),
        BPF_PROG_SEC("action",          BPF_PROG_TYPE_SCHED_ACT),
        BPF_PROG_SEC("tracepoint/",     BPF_PROG_TYPE_TRACEPOINT),
+       BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
        BPF_PROG_SEC("xdp",             BPF_PROG_TYPE_XDP),
        BPF_PROG_SEC("perf_event",      BPF_PROG_TYPE_PERF_EVENT),
        BPF_PROG_SEC("cgroup/skb",      BPF_PROG_TYPE_CGROUP_SKB),
@@ -1889,10 +2042,13 @@ static const struct {
        BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
        BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
        BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+       BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
+       BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
 };
 
 #undef BPF_PROG_SEC
 #undef BPF_PROG_SEC_FULL
+#undef BPF_S_PROG_SEC
 #undef BPF_SA_PROG_SEC
 
 static int bpf_program__identify_section(struct bpf_program *prog)
@@ -1929,6 +2085,16 @@ const char *bpf_map__name(struct bpf_map *map)
        return map ? map->name : NULL;
 }
 
+uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
+{
+       return map ? map->btf_key_id : 0;
+}
+
+uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
+{
+       return map ? map->btf_value_id : 0;
+}
+
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
                     bpf_map_clear_priv_t clear_priv)
 {
@@ -2028,13 +2194,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        enum bpf_attach_type expected_attach_type;
        enum bpf_prog_type prog_type;
        struct bpf_object *obj;
+       struct bpf_map *map;
        int section_idx;
        int err;
 
        if (!attr)
                return -EINVAL;
+       if (!attr->file)
+               return -EINVAL;
 
-       obj = bpf_object__open(attr->file);
+       obj = __bpf_object__open(attr->file, NULL, 0,
+                                bpf_prog_type__needs_kver(attr->prog_type));
        if (IS_ERR_OR_NULL(obj))
                return -ENOENT;
 
@@ -2044,6 +2214,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                 * section name.
                 */
                prog_type = attr->prog_type;
+               prog->prog_ifindex = attr->ifindex;
                expected_attach_type = attr->expected_attach_type;
                if (prog_type == BPF_PROG_TYPE_UNSPEC) {
                        section_idx = bpf_program__identify_section(prog);
@@ -2064,6 +2235,10 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                        first_prog = prog;
        }
 
+       bpf_map__for_each(map, obj) {
+               map->map_ifindex = attr->ifindex;
+       }
+
        if (!first_prog) {
                pr_warning("object file doesn't contain bpf program\n");
                bpf_object__close(obj);
@@ -2080,3 +2255,63 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        *prog_fd = bpf_program__fd(first_prog);
        return 0;
 }
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mem, unsigned long size,
+                          unsigned long page_size, void **buf, size_t *buf_len,
+                          bpf_perf_event_print_t fn, void *priv)
+{
+       volatile struct perf_event_mmap_page *header = mem;
+       __u64 data_tail = header->data_tail;
+       __u64 data_head = header->data_head;
+       void *base, *begin, *end;
+       int ret;
+
+       asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+       if (data_head == data_tail)
+               return LIBBPF_PERF_EVENT_CONT;
+
+       base = ((char *)header) + page_size;
+
+       begin = base + data_tail % size;
+       end = base + data_head % size;
+
+       while (begin != end) {
+               struct perf_event_header *ehdr;
+
+               ehdr = begin;
+               if (begin + ehdr->size > base + size) {
+                       long len = base + size - begin;
+
+                       if (*buf_len < ehdr->size) {
+                               free(*buf);
+                               *buf = malloc(ehdr->size);
+                               if (!*buf) {
+                                       ret = LIBBPF_PERF_EVENT_ERROR;
+                                       break;
+                               }
+                               *buf_len = ehdr->size;
+                       }
+
+                       memcpy(*buf, begin, len);
+                       memcpy(*buf + len, base, ehdr->size - len);
+                       ehdr = (void *)*buf;
+                       begin = base + ehdr->size - len;
+               } else if (begin + ehdr->size == base + size) {
+                       begin = base;
+               } else {
+                       begin += ehdr->size;
+               }
+
+               ret = fn(ehdr, priv);
+               if (ret != LIBBPF_PERF_EVENT_CONT)
+                       break;
+
+               data_tail += ehdr->size;
+       }
+
+       __sync_synchronize(); /* smp_mb() */
+       header->data_tail = data_tail;
+
+       return ret;
+}