Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <davem@davemloft.net>
Sun, 17 Feb 2019 06:56:34 +0000 (22:56 -0800)
committerDavid S. Miller <davem@davemloft.net>
Sun, 17 Feb 2019 06:56:34 +0000 (22:56 -0800)
Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-02-16

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) numerous libbpf API improvements, from Andrii, Andrey, Yonghong.

2) test all bpf progs in alu32 mode, from Jiong.

3) skb->sk access and bpf_sk_fullsock(), bpf_tcp_sock() helpers, from Martin.

4) support for IP encap in lwt bpf progs, from Peter.

5) remove XDP_QUERY_XSK_UMEM dead code, from Jan.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
92 files changed:
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/i40e/i40e_xsk.h
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/offload.c
drivers/net/netdevsim/bpf.c
include/linux/bpf.h
include/linux/netdevice.h
include/net/addrconf.h
include/net/lwtunnel.h
include/uapi/linux/bpf.h
kernel/bpf/offload.c
kernel/bpf/verifier.c
net/Kconfig
net/core/filter.c
net/core/lwt_bpf.c
net/ipv6/addrconf_core.c
net/ipv6/af_inet6.c
tools/bpf/bpftool/Documentation/bpftool.rst
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/if_link.h
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_helpers.h
tools/testing/selftests/bpf/bpf_util.h
tools/testing/selftests/bpf/progs/bpf_flow.c [moved from tools/testing/selftests/bpf/bpf_flow.c with 100% similarity]
tools/testing/selftests/bpf/progs/connect4_prog.c [moved from tools/testing/selftests/bpf/connect4_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/connect6_prog.c [moved from tools/testing/selftests/bpf/connect6_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/dev_cgroup.c [moved from tools/testing/selftests/bpf/dev_cgroup.c with 100% similarity]
tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c [moved from tools/testing/selftests/bpf/get_cgroup_id_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/netcnt_prog.c [moved from tools/testing/selftests/bpf/netcnt_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/sample_map_ret0.c [moved from tools/testing/selftests/bpf/sample_map_ret0.c with 100% similarity]
tools/testing/selftests/bpf/progs/sample_ret0.c [moved from tools/testing/selftests/bpf/sample_ret0.c with 100% similarity]
tools/testing/selftests/bpf/progs/sendmsg4_prog.c [moved from tools/testing/selftests/bpf/sendmsg4_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/sendmsg6_prog.c [moved from tools/testing/selftests/bpf/sendmsg6_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/socket_cookie_prog.c [moved from tools/testing/selftests/bpf/socket_cookie_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/sockmap_parse_prog.c [moved from tools/testing/selftests/bpf/sockmap_parse_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c [moved from tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c [moved from tools/testing/selftests/bpf/sockmap_verdict_prog.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_adjust_tail.c [moved from tools/testing/selftests/bpf/test_adjust_tail.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_btf_haskv.c [moved from tools/testing/selftests/bpf/test_btf_haskv.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_btf_nokv.c [moved from tools/testing/selftests/bpf/test_btf_nokv.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c [moved from tools/testing/selftests/bpf/test_get_stack_rawtp.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_l4lb.c [moved from tools/testing/selftests/bpf/test_l4lb.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_l4lb_noinline.c [moved from tools/testing/selftests/bpf/test_l4lb_noinline.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c [moved from tools/testing/selftests/bpf/test_lirc_mode2_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_lwt_seg6local.c [moved from tools/testing/selftests/bpf/test_lwt_seg6local.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_map_in_map.c [moved from tools/testing/selftests/bpf/test_map_in_map.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_map_lock.c [moved from tools/testing/selftests/bpf/test_map_lock.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_obj_id.c [moved from tools/testing/selftests/bpf/test_obj_id.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_pkt_access.c [moved from tools/testing/selftests/bpf/test_pkt_access.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_pkt_md_access.c [moved from tools/testing/selftests/bpf/test_pkt_md_access.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_queue_map.c [moved from tools/testing/selftests/bpf/test_queue_map.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c [moved from tools/testing/selftests/bpf/test_select_reuseport_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c [moved from tools/testing/selftests/bpf/test_sk_lookup_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c [moved from tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_sock_fields_kern.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sockhash_kern.c [moved from tools/testing/selftests/bpf/test_sockhash_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_sockmap_kern.c [moved from tools/testing/selftests/bpf/test_sockmap_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_spin_lock.c [moved from tools/testing/selftests/bpf/test_spin_lock.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_stack_map.c [moved from tools/testing/selftests/bpf/test_stack_map.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c [moved from tools/testing/selftests/bpf/test_stacktrace_build_id.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_stacktrace_map.c [moved from tools/testing/selftests/bpf/test_stacktrace_map.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_tcp_estats.c [moved from tools/testing/selftests/bpf/test_tcp_estats.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c [moved from tools/testing/selftests/bpf/test_tcpbpf_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c [moved from tools/testing/selftests/bpf/test_tcpnotify_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_tracepoint.c [moved from tools/testing/selftests/bpf/test_tracepoint.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_tunnel_kern.c [moved from tools/testing/selftests/bpf/test_tunnel_kern.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_xdp.c [moved from tools/testing/selftests/bpf/test_xdp.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_xdp_meta.c [moved from tools/testing/selftests/bpf/test_xdp_meta.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_xdp_noinline.c [moved from tools/testing/selftests/bpf/test_xdp_noinline.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_xdp_redirect.c [moved from tools/testing/selftests/bpf/test_xdp_redirect.c with 100% similarity]
tools/testing/selftests/bpf/progs/test_xdp_vlan.c [moved from tools/testing/selftests/bpf/test_xdp_vlan.c with 100% similarity]
tools/testing/selftests/bpf/progs/xdp_dummy.c [moved from tools/testing/selftests/bpf/xdp_dummy.c with 100% similarity]
tools/testing/selftests/bpf/test_btf.c
tools/testing/selftests/bpf/test_lwt_ip_encap.sh [new file with mode: 0755]
tools/testing/selftests/bpf/test_sock.c
tools/testing/selftests/bpf/test_sock_fields.c [new file with mode: 0644]
tools/testing/selftests/bpf/verifier/ref_tracking.c
tools/testing/selftests/bpf/verifier/sock.c [new file with mode: 0644]
tools/testing/selftests/bpf/verifier/unpriv.c

index 44856a84738da6afb9c62a5072d8d46a4b13c9e8..5e74a512784938891d70ed2340ddafd09e0e336f 100644 (file)
@@ -12128,9 +12128,6 @@ static int i40e_xdp(struct net_device *dev,
        case XDP_QUERY_PROG:
                xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
                return 0;
-       case XDP_QUERY_XSK_UMEM:
-               return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
-                                          xdp->xsk.queue_id);
        case XDP_SETUP_XSK_UMEM:
                return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
                                           xdp->xsk.queue_id);
index 96d849460d9babb8f9f12d28bf204f21f8e8f0a5..e190a2c2b9ff412528b8e52c2f47e3092aaa5c80 100644 (file)
@@ -154,34 +154,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
        return 0;
 }
 
-/**
- * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM
- * @vsi: Current VSI
- * @umem: UMEM associated to the ring, if any
- * @qid: Rx ring to associate UMEM to
- *
- * This function will store, if any, the UMEM associated to certain ring.
- *
- * Returns 0 on success, <0 on failure
- **/
-int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
-                       u16 qid)
-{
-       struct net_device *netdev = vsi->netdev;
-       struct xdp_umem *queried_umem;
-
-       if (vsi->type != I40E_VSI_MAIN)
-               return -EINVAL;
-
-       queried_umem = xdp_get_umem_from_qid(netdev, qid);
-
-       if (!queried_umem)
-               return -EINVAL;
-
-       *umem = queried_umem;
-       return 0;
-}
-
 /**
  * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
  * @vsi: Current VSI
index 9038c5d5cf08334659aba347d6329071bc764d22..8cc0a2e7d9a2fa3253ceaec272099ace792d9e44 100644 (file)
@@ -10,8 +10,6 @@ struct zero_copy_allocator;
 
 int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
 int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
-int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
-                       u16 qid);
 int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
                        u16 qid);
 void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
index b53087a980ef481ef4ca05a1d4d7cf4e04269164..38c430b94ae3d2e278601dc428fe59a0147d0cea 100644 (file)
@@ -10280,9 +10280,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                xdp->prog_id = adapter->xdp_prog ?
                        adapter->xdp_prog->aux->id : 0;
                return 0;
-       case XDP_QUERY_XSK_UMEM:
-               return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem,
-                                           xdp->xsk.queue_id);
        case XDP_SETUP_XSK_UMEM:
                return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
                                            xdp->xsk.queue_id);
index 53d4089f5644958103a28df739f605c850d69d21..d93a690aff74f404549d067947e1faf0a4d08a02 100644 (file)
@@ -30,8 +30,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
 
 struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
                                struct ixgbe_ring *ring);
-int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
-                        u16 qid);
 int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
                         u16 qid);
 
index 65c3e2c979d4d89775d0d3fe9afad63a3046d075..98870707b51ad82b65eee2ebed8431a0e83d5ae4 100644 (file)
@@ -174,23 +174,6 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
        return 0;
 }
 
-int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
-                        u16 qid)
-{
-       if (qid >= adapter->num_rx_queues)
-               return -EINVAL;
-
-       if (adapter->xsk_umems) {
-               if (qid >= adapter->num_xsk_umems)
-                       return -EINVAL;
-               *umem = adapter->xsk_umems[qid];
-               return 0;
-       }
-
-       *umem = NULL;
-       return 0;
-}
-
 int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
                         u16 qid)
 {
index dccae03192045f302091a92945e4e09047c62125..275de9f4c61c635c69c5a6ac657e22511762998a 100644 (file)
@@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app)
                app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
        }
 
-       bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops);
+       bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf);
        err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
        if (err)
                goto err_free_neutral_maps;
index 55c7dbf8b4214be40d1e3b85d4a2a80df2637a41..15dce97650a5af251d0be613e66735221270a195 100644 (file)
@@ -185,8 +185,6 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
 
 static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
 {
-       struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
-       struct nfp_app *app = nn->app;
        struct nfp_prog *nfp_prog;
        int ret;
 
@@ -197,7 +195,7 @@ static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
 
        INIT_LIST_HEAD(&nfp_prog->insns);
        nfp_prog->type = prog->type;
-       nfp_prog->bpf = app->priv;
+       nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev);
 
        ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len);
        if (ret)
index 172b271c8bd269d9bb7d7a84669f69d63fae0fc4..f92c43453ec67e1408860561d6a9fe221158b7b3 100644 (file)
@@ -248,7 +248,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 
 static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
 {
-       struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev);
+       struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev);
 
        if (!ns->bpf_bind_accept)
                return -EOPNOTSUPP;
@@ -589,7 +589,8 @@ int nsim_bpf_init(struct netdevsim *ns)
                if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
                        return -ENOMEM;
 
-               ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops);
+               ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops,
+                                                          ns);
                err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
                if (err)
                        return err;
index bd169a7bcc93f251776f2274aad134e03084a9d4..de18227b3d95941c800bfbaaffb68b976d2e9b62 100644 (file)
@@ -194,6 +194,7 @@ enum bpf_arg_type {
        ARG_ANYTHING,           /* any (initialized) argument is ok */
        ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock */
        ARG_PTR_TO_SPIN_LOCK,   /* pointer to bpf_spin_lock */
+       ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
 };
 
 /* type of values returned from helper functions */
@@ -203,6 +204,7 @@ enum bpf_return_type {
        RET_PTR_TO_MAP_VALUE,           /* returns a pointer to map elem value */
        RET_PTR_TO_MAP_VALUE_OR_NULL,   /* returns a pointer to map elem value or NULL */
        RET_PTR_TO_SOCKET_OR_NULL,      /* returns a pointer to a socket or NULL */
+       RET_PTR_TO_TCP_SOCK_OR_NULL,    /* returns a pointer to a tcp_sock or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -256,6 +258,10 @@ enum bpf_reg_type {
        PTR_TO_FLOW_KEYS,        /* reg points to bpf_flow_keys */
        PTR_TO_SOCKET,           /* reg points to struct bpf_sock */
        PTR_TO_SOCKET_OR_NULL,   /* reg points to struct bpf_sock or NULL */
+       PTR_TO_SOCK_COMMON,      /* reg points to sock_common */
+       PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
+       PTR_TO_TCP_SOCK,         /* reg points to struct tcp_sock */
+       PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -767,8 +773,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
 struct bpf_offload_dev *
-bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops);
+bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv);
 void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
+void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev);
 int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
                                    struct net_device *netdev);
 void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
@@ -920,6 +927,9 @@ void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 #if defined(CONFIG_NET)
+bool bpf_sock_common_is_valid_access(int off, int size,
+                                    enum bpf_access_type type,
+                                    struct bpf_insn_access_aux *info);
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
                              struct bpf_insn_access_aux *info);
 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
@@ -928,6 +938,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
                                struct bpf_prog *prog,
                                u32 *target_size);
 #else
+static inline bool bpf_sock_common_is_valid_access(int off, int size,
+                                                  enum bpf_access_type type,
+                                                  struct bpf_insn_access_aux *info)
+{
+       return false;
+}
 static inline bool bpf_sock_is_valid_access(int off, int size,
                                            enum bpf_access_type type,
                                            struct bpf_insn_access_aux *info)
@@ -944,4 +960,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 }
 #endif
 
+#ifdef CONFIG_INET
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+                                 struct bpf_insn_access_aux *info);
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+                                   const struct bpf_insn *si,
+                                   struct bpf_insn *insn_buf,
+                                   struct bpf_prog *prog,
+                                   u32 *target_size);
+#else
+static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
+                                               enum bpf_access_type type,
+                                               struct bpf_insn_access_aux *info)
+{
+       return false;
+}
+
+static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+                                                 const struct bpf_insn *si,
+                                                 struct bpf_insn *insn_buf,
+                                                 struct bpf_prog *prog,
+                                                 u32 *target_size)
+{
+       return 0;
+}
+#endif /* CONFIG_INET */
+
 #endif /* _LINUX_BPF_H */
index 1fb733f38a47a0ee757f26a78438809f87d34173..aab4d9f6613dc41e8cabcd6be1f427e1ad31a631 100644 (file)
@@ -868,7 +868,6 @@ enum bpf_netdev_command {
        /* BPF program for offload callbacks, invoked at program load time. */
        BPF_OFFLOAD_MAP_ALLOC,
        BPF_OFFLOAD_MAP_FREE,
-       XDP_QUERY_XSK_UMEM,
        XDP_SETUP_XSK_UMEM,
 };
 
@@ -895,10 +894,10 @@ struct netdev_bpf {
                struct {
                        struct bpf_offloaded_map *offmap;
                };
-               /* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
+               /* XDP_SETUP_XSK_UMEM */
                struct {
-                       struct xdp_umem *umem; /* out for query*/
-                       u16 queue_id; /* in for query */
+                       struct xdp_umem *umem;
+                       u16 queue_id;
                } xsk;
        };
 };
index 20d523ee2fec3d69819a4587cc044fcadc3c040c..269ec27385e91f3ba15710b07e5e6caab21f5169 100644 (file)
@@ -248,6 +248,7 @@ struct ipv6_stub {
                                 const struct in6_addr *addr);
        int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
                               struct dst_entry **dst, struct flowi6 *fl6);
+       int (*ipv6_route_input)(struct sk_buff *skb);
 
        struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
        struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
index 33fd9ba7e0e5a29d5dd1705112a9213f08531612..671113bcb2cc20e2b4ff56f4160a9241cebce477 100644 (file)
@@ -126,6 +126,8 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+                         bool ingress);
 
 static inline void lwtunnel_set_redirect(struct dst_entry *dst)
 {
index 1777fa0c61e4a2dd9f15b1908a91a8582c8cad98..bcdd2474eee7549655358698cb509b95b7babc27 100644 (file)
@@ -2016,6 +2016,19 @@ union bpf_attr {
  *                     Only works if *skb* contains an IPv6 packet. Insert a
  *                     Segment Routing Header (**struct ipv6_sr_hdr**) inside
  *                     the IPv6 header.
+ *             **BPF_LWT_ENCAP_IP**
+ *                     IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ *                     must be IPv4 or IPv6, followed by zero or more
+ *                     additional headers, up to LWT_BPF_MAX_HEADROOM total
+ *                     bytes in all prepended headers. Please note that
+ *                     if skb_is_gso(skb) is true, no more than two headers
+ *                     can be prepended, and the inner header, if present,
+ *                     should be either GRE or UDP/GUE.
+ *
+ *             BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
+ *             type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
+ *             by bpf programs of types BPF_PROG_TYPE_LWT_IN and
+ *             BPF_PROG_TYPE_LWT_XMIT.
  *
  *             A call to this helper is susceptible to change the underlaying
  *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2329,6 +2342,23 @@ union bpf_attr {
  *             "**y**".
  *     Return
  *             0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ *     Description
+ *             This helper gets a **struct bpf_sock** pointer such
+ *             that all the fields in bpf_sock can be accessed.
+ *     Return
+ *             A **struct bpf_sock** pointer on success, or NULL in
+ *             case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ *     Description
+ *             This helper gets a **struct bpf_tcp_sock** pointer from a
+ *             **struct bpf_sock** pointer.
+ *
+ *     Return
+ *             A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *             case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2425,7 +2455,9 @@ union bpf_attr {
        FN(msg_pop_data),               \
        FN(rc_pointer_rel),             \
        FN(spin_lock),                  \
-       FN(spin_unlock),
+       FN(spin_unlock),                \
+       FN(sk_fullsock),                \
+       FN(tcp_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2498,7 +2530,8 @@ enum bpf_hdr_start_off {
 /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
 enum bpf_lwt_encap_mode {
        BPF_LWT_ENCAP_SEG6,
-       BPF_LWT_ENCAP_SEG6_INLINE
+       BPF_LWT_ENCAP_SEG6_INLINE,
+       BPF_LWT_ENCAP_IP,
 };
 
 #define __bpf_md_ptr(type, name)       \
@@ -2545,6 +2578,7 @@ struct __sk_buff {
        __u64 tstamp;
        __u32 wire_len;
        __u32 gso_segs;
+       __bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 struct bpf_tunnel_key {
@@ -2586,7 +2620,15 @@ enum bpf_ret_code {
        BPF_DROP = 2,
        /* 3-6 reserved */
        BPF_REDIRECT = 7,
-       /* >127 are reserved for prog type specific return codes */
+       /* >127 are reserved for prog type specific return codes.
+        *
+        * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+        *    BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+        *    changed and should be routed based on its new L3 header.
+        *    (This is an L3 redirect, as opposed to L2 redirect
+        *    represented by BPF_REDIRECT above).
+        */
+       BPF_LWT_REROUTE = 128,
 };
 
 struct bpf_sock {
@@ -2596,14 +2638,52 @@ struct bpf_sock {
        __u32 protocol;
        __u32 mark;
        __u32 priority;
-       __u32 src_ip4;          /* Allows 1,2,4-byte read.
-                                * Stored in network byte order.
+       /* IP address also allows 1 and 2 bytes access */
+       __u32 src_ip4;
+       __u32 src_ip6[4];
+       __u32 src_port;         /* host byte order */
+       __u32 dst_port;         /* network byte order */
+       __u32 dst_ip4;
+       __u32 dst_ip6[4];
+       __u32 state;
+};
+
+struct bpf_tcp_sock {
+       __u32 snd_cwnd;         /* Sending congestion window            */
+       __u32 srtt_us;          /* smoothed round trip time << 3 in usecs */
+       __u32 rtt_min;
+       __u32 snd_ssthresh;     /* Slow start size threshold            */
+       __u32 rcv_nxt;          /* What we want to receive next         */
+       __u32 snd_nxt;          /* Next sequence we send                */
+       __u32 snd_una;          /* First byte we want an ack for        */
+       __u32 mss_cache;        /* Cached effective mss, not including SACKS */
+       __u32 ecn_flags;        /* ECN status bits.                     */
+       __u32 rate_delivered;   /* saved rate sample: packets delivered */
+       __u32 rate_interval_us; /* saved rate sample: time elapsed */
+       __u32 packets_out;      /* Packets which are "in flight"        */
+       __u32 retrans_out;      /* Retransmitted packets out            */
+       __u32 total_retrans;    /* Total retransmits for entire connection */
+       __u32 segs_in;          /* RFC4898 tcpEStatsPerfSegsIn
+                                * total number of segments in.
                                 */
-       __u32 src_ip6[4];       /* Allows 1,2,4-byte read.
-                                * Stored in network byte order.
+       __u32 data_segs_in;     /* RFC4898 tcpEStatsPerfDataSegsIn
+                                * total number of data segments in.
+                                */
+       __u32 segs_out;         /* RFC4898 tcpEStatsPerfSegsOut
+                                * The total number of segments sent.
+                                */
+       __u32 data_segs_out;    /* RFC4898 tcpEStatsPerfDataSegsOut
+                                * total number of data segments sent.
+                                */
+       __u32 lost_out;         /* Lost packets                 */
+       __u32 sacked_out;       /* SACK'd packets                       */
+       __u64 bytes_received;   /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+                                * sum(delta(rcv_nxt)), or how many bytes
+                                * were acked.
                                 */
-       __u32 src_port;         /* Allows 4-byte read.
-                                * Stored in host byte order
+       __u64 bytes_acked;      /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+                                * sum(delta(snd_una)), or how many bytes
+                                * were acked.
                                 */
 };
 
index 39dba8c9033128d75817307accd7b8d277637fc6..ba635209ae9a8bca1c6d59b0e77814c07f7cc67c 100644 (file)
@@ -35,6 +35,7 @@ static DECLARE_RWSEM(bpf_devs_lock);
 struct bpf_offload_dev {
        const struct bpf_prog_offload_ops *ops;
        struct list_head netdevs;
+       void *priv;
 };
 
 struct bpf_offload_netdev {
@@ -669,7 +670,7 @@ unlock:
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
 
 struct bpf_offload_dev *
-bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
+bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
 {
        struct bpf_offload_dev *offdev;
        int err;
@@ -688,6 +689,7 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
                return ERR_PTR(-ENOMEM);
 
        offdev->ops = ops;
+       offdev->priv = priv;
        INIT_LIST_HEAD(&offdev->netdevs);
 
        return offdev;
@@ -700,3 +702,9 @@ void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
        kfree(offdev);
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
+
+void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
+{
+       return offdev->priv;
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);
index b63bc77af2d135f44bf2737439d73dc50ef0deea..1b9496c413833ad1f88c98eeee387f349cf241e0 100644 (file)
@@ -331,10 +331,19 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
               type == PTR_TO_PACKET_META;
 }
 
+static bool type_is_sk_pointer(enum bpf_reg_type type)
+{
+       return type == PTR_TO_SOCKET ||
+               type == PTR_TO_SOCK_COMMON ||
+               type == PTR_TO_TCP_SOCK;
+}
+
 static bool reg_type_may_be_null(enum bpf_reg_type type)
 {
        return type == PTR_TO_MAP_VALUE_OR_NULL ||
-              type == PTR_TO_SOCKET_OR_NULL;
+              type == PTR_TO_SOCKET_OR_NULL ||
+              type == PTR_TO_SOCK_COMMON_OR_NULL ||
+              type == PTR_TO_TCP_SOCK_OR_NULL;
 }
 
 static bool type_is_refcounted(enum bpf_reg_type type)
@@ -377,6 +386,12 @@ static bool is_release_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_sk_release;
 }
 
+static bool is_acquire_function(enum bpf_func_id func_id)
+{
+       return func_id == BPF_FUNC_sk_lookup_tcp ||
+               func_id == BPF_FUNC_sk_lookup_udp;
+}
+
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
        [NOT_INIT]              = "?",
@@ -392,6 +407,10 @@ static const char * const reg_type_str[] = {
        [PTR_TO_FLOW_KEYS]      = "flow_keys",
        [PTR_TO_SOCKET]         = "sock",
        [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
+       [PTR_TO_SOCK_COMMON]    = "sock_common",
+       [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
+       [PTR_TO_TCP_SOCK]       = "tcp_sock",
+       [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 };
 
 static char slot_type_char[] = {
@@ -618,13 +637,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 }
 
 /* release function corresponding to acquire_reference_state(). Idempotent. */
-static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
+static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 {
        int i, last_idx;
 
-       if (!ptr_id)
-               return -EFAULT;
-
        last_idx = state->acquired_refs - 1;
        for (i = 0; i < state->acquired_refs; i++) {
                if (state->refs[i].id == ptr_id) {
@@ -636,21 +652,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
                        return 0;
                }
        }
-       return -EFAULT;
-}
-
-/* variation on the above for cases where we expect that there must be an
- * outstanding reference for the specified ptr_id.
- */
-static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
-{
-       struct bpf_func_state *state = cur_func(env);
-       int err;
-
-       err = __release_reference_state(state, ptr_id);
-       if (WARN_ON_ONCE(err != 0))
-               verbose(env, "verifier internal error: can't release reference\n");
-       return err;
+       return -EINVAL;
 }
 
 static int transfer_reference_state(struct bpf_func_state *dst,
@@ -1209,6 +1211,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
        case CONST_PTR_TO_MAP:
        case PTR_TO_SOCKET:
        case PTR_TO_SOCKET_OR_NULL:
+       case PTR_TO_SOCK_COMMON:
+       case PTR_TO_SOCK_COMMON_OR_NULL:
+       case PTR_TO_TCP_SOCK:
+       case PTR_TO_TCP_SOCK_OR_NULL:
                return true;
        default:
                return false;
@@ -1640,12 +1646,14 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
        return 0;
 }
 
-static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
-                            int size, enum bpf_access_type t)
+static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
+                            u32 regno, int off, int size,
+                            enum bpf_access_type t)
 {
        struct bpf_reg_state *regs = cur_regs(env);
        struct bpf_reg_state *reg = &regs[regno];
-       struct bpf_insn_access_aux info;
+       struct bpf_insn_access_aux info = {};
+       bool valid;
 
        if (reg->smin_value < 0) {
                verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1653,13 +1661,31 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
                return -EACCES;
        }
 
-       if (!bpf_sock_is_valid_access(off, size, t, &info)) {
-               verbose(env, "invalid bpf_sock access off=%d size=%d\n",
-                       off, size);
-               return -EACCES;
+       switch (reg->type) {
+       case PTR_TO_SOCK_COMMON:
+               valid = bpf_sock_common_is_valid_access(off, size, t, &info);
+               break;
+       case PTR_TO_SOCKET:
+               valid = bpf_sock_is_valid_access(off, size, t, &info);
+               break;
+       case PTR_TO_TCP_SOCK:
+               valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
+               break;
+       default:
+               valid = false;
        }
 
-       return 0;
+
+       if (valid) {
+               env->insn_aux_data[insn_idx].ctx_field_size =
+                       info.ctx_field_size;
+               return 0;
+       }
+
+       verbose(env, "R%d invalid %s access off=%d size=%d\n",
+               regno, reg_type_str[reg->type], off, size);
+
+       return -EACCES;
 }
 
 static bool __is_pointer_value(bool allow_ptr_leaks,
@@ -1685,8 +1711,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 {
        const struct bpf_reg_state *reg = reg_state(env, regno);
 
-       return reg->type == PTR_TO_CTX ||
-              reg->type == PTR_TO_SOCKET;
+       return reg->type == PTR_TO_CTX;
+}
+
+static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
+{
+       const struct bpf_reg_state *reg = reg_state(env, regno);
+
+       return type_is_sk_pointer(reg->type);
 }
 
 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1797,6 +1829,12 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
        case PTR_TO_SOCKET:
                pointer_desc = "sock ";
                break;
+       case PTR_TO_SOCK_COMMON:
+               pointer_desc = "sock_common ";
+               break;
+       case PTR_TO_TCP_SOCK:
+               pointer_desc = "tcp_sock ";
+               break;
        default:
                break;
        }
@@ -2000,11 +2038,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                         * PTR_TO_PACKET[_META,_END]. In the latter
                         * case, we know the offset is zero.
                         */
-                       if (reg_type == SCALAR_VALUE)
+                       if (reg_type == SCALAR_VALUE) {
                                mark_reg_unknown(env, regs, value_regno);
-                       else
+                       } else {
                                mark_reg_known_zero(env, regs,
                                                    value_regno);
+                               if (reg_type_may_be_null(reg_type))
+                                       regs[value_regno].id = ++env->id_gen;
+                       }
                        regs[value_regno].type = reg_type;
                }
 
@@ -2050,12 +2091,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                err = check_flow_keys_access(env, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
                        mark_reg_unknown(env, regs, value_regno);
-       } else if (reg->type == PTR_TO_SOCKET) {
+       } else if (type_is_sk_pointer(reg->type)) {
                if (t == BPF_WRITE) {
-                       verbose(env, "cannot write into socket\n");
+                       verbose(env, "R%d cannot write into %s\n",
+                               regno, reg_type_str[reg->type]);
                        return -EACCES;
                }
-               err = check_sock_access(env, regno, off, size, t);
+               err = check_sock_access(env, insn_idx, regno, off, size, t);
                if (!err && value_regno >= 0)
                        mark_reg_unknown(env, regs, value_regno);
        } else {
@@ -2099,7 +2141,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 
        if (is_ctx_reg(env, insn->dst_reg) ||
            is_pkt_reg(env, insn->dst_reg) ||
-           is_flow_key_reg(env, insn->dst_reg)) {
+           is_flow_key_reg(env, insn->dst_reg) ||
+           is_sk_reg(env, insn->dst_reg)) {
                verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
                        insn->dst_reg,
                        reg_type_str[reg_state(env, insn->dst_reg)->type]);
@@ -2366,6 +2409,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                err = check_ctx_reg(env, reg, regno);
                if (err < 0)
                        return err;
+       } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
+               expected_type = PTR_TO_SOCK_COMMON;
+               /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
+               if (!type_is_sk_pointer(type))
+                       goto err_type;
        } else if (arg_type == ARG_PTR_TO_SOCKET) {
                expected_type = PTR_TO_SOCKET;
                if (type != expected_type)
@@ -2780,7 +2828,7 @@ static int release_reference(struct bpf_verifier_env *env,
        for (i = 0; i <= vstate->curframe; i++)
                release_reg_references(env, vstate->frame[i], meta->ptr_id);
 
-       return release_reference_state(env, meta->ptr_id);
+       return release_reference_state(cur_func(env), meta->ptr_id);
 }
 
 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3046,8 +3094,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                }
        } else if (is_release_function(func_id)) {
                err = release_reference(env, &meta);
-               if (err)
+               if (err) {
+                       verbose(env, "func %s#%d reference has not been acquired before\n",
+                               func_id_name(func_id), func_id);
                        return err;
+               }
        }
 
        regs = cur_regs(env);
@@ -3096,12 +3147,23 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                        regs[BPF_REG_0].id = ++env->id_gen;
                }
        } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
-               int id = acquire_reference_state(env, insn_idx);
-               if (id < 0)
-                       return id;
                mark_reg_known_zero(env, regs, BPF_REG_0);
                regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-               regs[BPF_REG_0].id = id;
+               if (is_acquire_function(func_id)) {
+                       int id = acquire_reference_state(env, insn_idx);
+
+                       if (id < 0)
+                               return id;
+                       /* For release_reference() */
+                       regs[BPF_REG_0].id = id;
+               } else {
+                       /* For mark_ptr_or_null_reg() */
+                       regs[BPF_REG_0].id = ++env->id_gen;
+               }
+       } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
+               mark_reg_known_zero(env, regs, BPF_REG_0);
+               regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
+               regs[BPF_REG_0].id = ++env->id_gen;
        } else {
                verbose(env, "unknown return type %d of func %s#%d\n",
                        fn->ret_type, func_id_name(func_id), func_id);
@@ -3361,6 +3423,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case PTR_TO_PACKET_END:
        case PTR_TO_SOCKET:
        case PTR_TO_SOCKET_OR_NULL:
+       case PTR_TO_SOCK_COMMON:
+       case PTR_TO_SOCK_COMMON_OR_NULL:
+       case PTR_TO_TCP_SOCK:
+       case PTR_TO_TCP_SOCK_OR_NULL:
                verbose(env, "R%d pointer arithmetic on %s prohibited\n",
                        dst, reg_type_str[ptr_reg->type]);
                return -EACCES;
@@ -4594,6 +4660,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
                        }
                } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
                        reg->type = PTR_TO_SOCKET;
+               } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
+                       reg->type = PTR_TO_SOCK_COMMON;
+               } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
+                       reg->type = PTR_TO_TCP_SOCK;
                }
                if (is_null || !(reg_is_refcounted(reg) ||
                                 reg_may_point_to_spin_lock(reg))) {
@@ -4618,7 +4688,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
        int i, j;
 
        if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
-               __release_reference_state(state, id);
+               release_reference_state(state, id);
 
        for (i = 0; i < MAX_BPF_REG; i++)
                mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -5787,6 +5857,10 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
        case PTR_TO_FLOW_KEYS:
        case PTR_TO_SOCKET:
        case PTR_TO_SOCKET_OR_NULL:
+       case PTR_TO_SOCK_COMMON:
+       case PTR_TO_SOCK_COMMON_OR_NULL:
+       case PTR_TO_TCP_SOCK:
+       case PTR_TO_TCP_SOCK_OR_NULL:
                /* Only valid matches are exact, which memcmp() above
                 * would have accepted
                 */
@@ -6107,6 +6181,10 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
        case PTR_TO_CTX:
        case PTR_TO_SOCKET:
        case PTR_TO_SOCKET_OR_NULL:
+       case PTR_TO_SOCK_COMMON:
+       case PTR_TO_SOCK_COMMON_OR_NULL:
+       case PTR_TO_TCP_SOCK:
+       case PTR_TO_TCP_SOCK_OR_NULL:
                return false;
        default:
                return true;
@@ -7109,8 +7187,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                        convert_ctx_access = ops->convert_ctx_access;
                        break;
                case PTR_TO_SOCKET:
+               case PTR_TO_SOCK_COMMON:
                        convert_ctx_access = bpf_sock_convert_ctx_access;
                        break;
+               case PTR_TO_TCP_SOCK:
+                       convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
+                       break;
                default:
                        continue;
                }
index 5cb9de1aaf886a7f6d0dd1eb883241d5f5104ff6..62da6148e9f8347f97384f7cc26d98901b1ad9fa 100644 (file)
@@ -403,7 +403,7 @@ config LWTUNNEL
 
 config LWTUNNEL_BPF
        bool "Execute BPF program as route nexthop action"
-       depends on LWTUNNEL
+       depends on LWTUNNEL && INET
        default y if LWTUNNEL=y
        ---help---
          Allows to run BPF programs as a nexthop action following a route
index b5a002d7b26390f59a1e58d3ea89e51e3d1dfd23..b584cb42a8037301e81ba3eea5cada8c18ad82bb 100644 (file)
@@ -73,6 +73,7 @@
 #include <linux/seg6_local.h>
 #include <net/seg6.h>
 #include <net/seg6_local.h>
+#include <net/lwtunnel.h>
 
 /**
  *     sk_filter_trim_cap - run a packet through a socket filter
@@ -1793,6 +1794,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
+{
+       sk = sk_to_full_sk(sk);
+
+       return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_sk_fullsock_proto = {
+       .func           = bpf_sk_fullsock,
+       .gpl_only       = false,
+       .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+};
+
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
                                           unsigned int write_len)
 {
@@ -4803,7 +4818,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
 }
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
-BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+                            bool ingress)
+{
+       return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
+}
+#endif
+
+BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
           u32, len)
 {
        switch (type) {
@@ -4811,14 +4834,41 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
        case BPF_LWT_ENCAP_SEG6:
        case BPF_LWT_ENCAP_SEG6_INLINE:
                return bpf_push_seg6_encap(skb, type, hdr, len);
+#endif
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+       case BPF_LWT_ENCAP_IP:
+               return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
 #endif
        default:
                return -EINVAL;
        }
 }
 
-static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
-       .func           = bpf_lwt_push_encap,
+BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
+          void *, hdr, u32, len)
+{
+       switch (type) {
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+       case BPF_LWT_ENCAP_IP:
+               return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
+#endif
+       default:
+               return -EINVAL;
+       }
+}
+
+static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
+       .func           = bpf_lwt_in_push_encap,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_MEM,
+       .arg4_type      = ARG_CONST_SIZE
+};
+
+static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
+       .func           = bpf_lwt_xmit_push_encap,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
@@ -5018,6 +5068,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 };
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
+#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT)               \
+do {                                                                   \
+       switch (si->off) {                                              \
+       case offsetof(md_type, snd_cwnd):                               \
+               CONVERT(snd_cwnd); break;                               \
+       case offsetof(md_type, srtt_us):                                \
+               CONVERT(srtt_us); break;                                \
+       case offsetof(md_type, snd_ssthresh):                           \
+               CONVERT(snd_ssthresh); break;                           \
+       case offsetof(md_type, rcv_nxt):                                \
+               CONVERT(rcv_nxt); break;                                \
+       case offsetof(md_type, snd_nxt):                                \
+               CONVERT(snd_nxt); break;                                \
+       case offsetof(md_type, snd_una):                                \
+               CONVERT(snd_una); break;                                \
+       case offsetof(md_type, mss_cache):                              \
+               CONVERT(mss_cache); break;                              \
+       case offsetof(md_type, ecn_flags):                              \
+               CONVERT(ecn_flags); break;                              \
+       case offsetof(md_type, rate_delivered):                         \
+               CONVERT(rate_delivered); break;                         \
+       case offsetof(md_type, rate_interval_us):                       \
+               CONVERT(rate_interval_us); break;                       \
+       case offsetof(md_type, packets_out):                            \
+               CONVERT(packets_out); break;                            \
+       case offsetof(md_type, retrans_out):                            \
+               CONVERT(retrans_out); break;                            \
+       case offsetof(md_type, total_retrans):                          \
+               CONVERT(total_retrans); break;                          \
+       case offsetof(md_type, segs_in):                                \
+               CONVERT(segs_in); break;                                \
+       case offsetof(md_type, data_segs_in):                           \
+               CONVERT(data_segs_in); break;                           \
+       case offsetof(md_type, segs_out):                               \
+               CONVERT(segs_out); break;                               \
+       case offsetof(md_type, data_segs_out):                          \
+               CONVERT(data_segs_out); break;                          \
+       case offsetof(md_type, lost_out):                               \
+               CONVERT(lost_out); break;                               \
+       case offsetof(md_type, sacked_out):                             \
+               CONVERT(sacked_out); break;                             \
+       case offsetof(md_type, bytes_received):                         \
+               CONVERT(bytes_received); break;                         \
+       case offsetof(md_type, bytes_acked):                            \
+               CONVERT(bytes_acked); break;                            \
+       }                                                               \
+} while (0)
+
 #ifdef CONFIG_INET
 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
                              int dif, int sdif, u8 family, u8 proto)
@@ -5255,6 +5353,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+                                 struct bpf_insn_access_aux *info)
+{
+       if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
+               return false;
+
+       if (off % size != 0)
+               return false;
+
+       switch (off) {
+       case offsetof(struct bpf_tcp_sock, bytes_received):
+       case offsetof(struct bpf_tcp_sock, bytes_acked):
+               return size == sizeof(__u64);
+       default:
+               return size == sizeof(__u32);
+       }
+}
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+                                   const struct bpf_insn *si,
+                                   struct bpf_insn *insn_buf,
+                                   struct bpf_prog *prog, u32 *target_size)
+{
+       struct bpf_insn *insn = insn_buf;
+
+#define BPF_TCP_SOCK_GET_COMMON(FIELD)                                 \
+       do {                                                            \
+               BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) >     \
+                            FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
+                                     si->dst_reg, si->src_reg,         \
+                                     offsetof(struct tcp_sock, FIELD)); \
+       } while (0)
+
+       CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
+                                      BPF_TCP_SOCK_GET_COMMON);
+
+       if (insn > insn_buf)
+               return insn - insn_buf;
+
+       switch (si->off) {
+       case offsetof(struct bpf_tcp_sock, rtt_min):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+                            sizeof(struct minmax));
+               BUILD_BUG_ON(sizeof(struct minmax) <
+                            sizeof(struct minmax_sample));
+
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                     offsetof(struct tcp_sock, rtt_min) +
+                                     offsetof(struct minmax_sample, v));
+               break;
+       }
+
+       return insn - insn_buf;
+}
+
+BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
+{
+       sk = sk_to_full_sk(sk);
+
+       if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+               return (unsigned long)sk;
+
+       return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_tcp_sock_proto = {
+       .func           = bpf_tcp_sock,
+       .gpl_only       = false,
+       .ret_type       = RET_PTR_TO_TCP_SOCK_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5284,7 +5455,8 @@ bool bpf_helper_changes_pkt_data(void *func)
            func == bpf_lwt_seg6_adjust_srh ||
            func == bpf_lwt_seg6_action ||
 #endif
-           func == bpf_lwt_push_encap)
+           func == bpf_lwt_in_push_encap ||
+           func == bpf_lwt_xmit_push_encap)
                return true;
 
        return false;
@@ -5408,6 +5580,12 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        switch (func_id) {
        case BPF_FUNC_get_local_storage:
                return &bpf_get_local_storage_proto;
+       case BPF_FUNC_sk_fullsock:
+               return &bpf_sk_fullsock_proto;
+#ifdef CONFIG_INET
+       case BPF_FUNC_tcp_sock:
+               return &bpf_tcp_sock_proto;
+#endif
        default:
                return sk_filter_func_proto(func_id, prog);
        }
@@ -5479,6 +5657,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_socket_uid_proto;
        case BPF_FUNC_fib_lookup:
                return &bpf_skb_fib_lookup_proto;
+       case BPF_FUNC_sk_fullsock:
+               return &bpf_sk_fullsock_proto;
 #ifdef CONFIG_XFRM
        case BPF_FUNC_skb_get_xfrm_state:
                return &bpf_skb_get_xfrm_state_proto;
@@ -5496,6 +5676,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
+       case BPF_FUNC_tcp_sock:
+               return &bpf_tcp_sock_proto;
 #endif
        default:
                return bpf_base_func_proto(func_id);
@@ -5672,7 +5854,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
        switch (func_id) {
        case BPF_FUNC_lwt_push_encap:
-               return &bpf_lwt_push_encap_proto;
+               return &bpf_lwt_in_push_encap_proto;
        default:
                return lwt_out_func_proto(func_id, prog);
        }
@@ -5708,6 +5890,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_l4_csum_replace_proto;
        case BPF_FUNC_set_hash_invalid:
                return &bpf_set_hash_invalid_proto;
+       case BPF_FUNC_lwt_push_encap:
+               return &bpf_lwt_xmit_push_encap_proto;
        default:
                return lwt_out_func_proto(func_id, prog);
        }
@@ -5766,6 +5950,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
                if (size != sizeof(__u64))
                        return false;
                break;
+       case offsetof(struct __sk_buff, sk):
+               if (type == BPF_WRITE || size != sizeof(__u64))
+                       return false;
+               info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+               break;
        default:
                /* Only narrow read access allowed for now. */
                if (type == BPF_WRITE) {
@@ -5937,31 +6126,44 @@ full_access:
        return true;
 }
 
-static bool __sock_filter_check_size(int off, int size,
+bool bpf_sock_common_is_valid_access(int off, int size,
+                                    enum bpf_access_type type,
                                     struct bpf_insn_access_aux *info)
 {
-       const int size_default = sizeof(__u32);
-
        switch (off) {
-       case bpf_ctx_range(struct bpf_sock, src_ip4):
-       case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
-               bpf_ctx_record_field_size(info, size_default);
-               return bpf_ctx_narrow_access_ok(off, size, size_default);
+       case bpf_ctx_range_till(struct bpf_sock, type, priority):
+               return false;
+       default:
+               return bpf_sock_is_valid_access(off, size, type, info);
        }
-
-       return size == size_default;
 }
 
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
                              struct bpf_insn_access_aux *info)
 {
+       const int size_default = sizeof(__u32);
+
        if (off < 0 || off >= sizeof(struct bpf_sock))
                return false;
        if (off % size != 0)
                return false;
-       if (!__sock_filter_check_size(off, size, info))
-               return false;
-       return true;
+
+       switch (off) {
+       case offsetof(struct bpf_sock, state):
+       case offsetof(struct bpf_sock, family):
+       case offsetof(struct bpf_sock, type):
+       case offsetof(struct bpf_sock, protocol):
+       case offsetof(struct bpf_sock, dst_port):
+       case offsetof(struct bpf_sock, src_port):
+       case bpf_ctx_range(struct bpf_sock, src_ip4):
+       case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+       case bpf_ctx_range(struct bpf_sock, dst_ip4):
+       case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+               bpf_ctx_record_field_size(info, size_default);
+               return bpf_ctx_narrow_access_ok(off, size, size_default);
+       }
+
+       return size == size_default;
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
@@ -6750,6 +6952,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                off += offsetof(struct qdisc_skb_cb, pkt_len);
                *target_size = 4;
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+               break;
+
+       case offsetof(struct __sk_buff, sk):
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct sk_buff, sk));
+               break;
        }
 
        return insn - insn_buf;
@@ -6798,24 +7007,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
                break;
 
        case offsetof(struct bpf_sock, family):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
-
-               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-                                     offsetof(struct sock, sk_family));
+               *insn++ = BPF_LDX_MEM(
+                       BPF_FIELD_SIZEOF(struct sock_common, skc_family),
+                       si->dst_reg, si->src_reg,
+                       bpf_target_off(struct sock_common,
+                                      skc_family,
+                                      FIELD_SIZEOF(struct sock_common,
+                                                   skc_family),
+                                      target_size));
                break;
 
        case offsetof(struct bpf_sock, type):
+               BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      offsetof(struct sock, __sk_flags_offset));
                *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
                *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+               *target_size = 2;
                break;
 
        case offsetof(struct bpf_sock, protocol):
+               BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      offsetof(struct sock, __sk_flags_offset));
                *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
                *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
+               *target_size = 1;
                break;
 
        case offsetof(struct bpf_sock, src_ip4):
@@ -6827,6 +7044,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
                                       target_size));
                break;
 
+       case offsetof(struct bpf_sock, dst_ip4):
+               *insn++ = BPF_LDX_MEM(
+                       BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+                       bpf_target_off(struct sock_common, skc_daddr,
+                                      FIELD_SIZEOF(struct sock_common,
+                                                   skc_daddr),
+                                      target_size));
+               break;
+
        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 #if IS_ENABLED(CONFIG_IPV6)
                off = si->off;
@@ -6845,6 +7071,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 #endif
                break;
 
+       case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+               off = si->off;
+               off -= offsetof(struct bpf_sock, dst_ip6[0]);
+               *insn++ = BPF_LDX_MEM(
+                       BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+                       bpf_target_off(struct sock_common,
+                                      skc_v6_daddr.s6_addr32[0],
+                                      FIELD_SIZEOF(struct sock_common,
+                                                   skc_v6_daddr.s6_addr32[0]),
+                                      target_size) + off);
+#else
+               *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+               *target_size = 4;
+#endif
+               break;
+
        case offsetof(struct bpf_sock, src_port):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock_common, skc_num),
@@ -6854,6 +7097,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
                                                    skc_num),
                                       target_size));
                break;
+
+       case offsetof(struct bpf_sock, dst_port):
+               *insn++ = BPF_LDX_MEM(
+                       BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
+                       si->dst_reg, si->src_reg,
+                       bpf_target_off(struct sock_common, skc_dport,
+                                      FIELD_SIZEOF(struct sock_common,
+                                                   skc_dport),
+                                      target_size));
+               break;
+
+       case offsetof(struct bpf_sock, state):
+               *insn++ = BPF_LDX_MEM(
+                       BPF_FIELD_SIZEOF(struct sock_common, skc_state),
+                       si->dst_reg, si->src_reg,
+                       bpf_target_off(struct sock_common, skc_state,
+                                      FIELD_SIZEOF(struct sock_common,
+                                                   skc_state),
+                                      target_size));
+               break;
        }
 
        return insn - insn_buf;
@@ -7101,6 +7364,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
        struct bpf_insn *insn = insn_buf;
        int off;
 
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                        \
+       do {                                                                  \
+               BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >                   \
+                            FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
+                                               struct bpf_sock_ops_kern,     \
+                                               is_fullsock),                 \
+                                     si->dst_reg, si->src_reg,               \
+                                     offsetof(struct bpf_sock_ops_kern,      \
+                                              is_fullsock));                 \
+               *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);            \
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
+                                               struct bpf_sock_ops_kern, sk),\
+                                     si->dst_reg, si->src_reg,               \
+                                     offsetof(struct bpf_sock_ops_kern, sk));\
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,                   \
+                                                      OBJ_FIELD),            \
+                                     si->dst_reg, si->dst_reg,               \
+                                     offsetof(OBJ, OBJ_FIELD));              \
+       } while (0)
+
+#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
+               SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                        \
+       do {                                                                  \
+               int reg = BPF_REG_9;                                          \
+               BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >                   \
+                            FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
+               if (si->dst_reg == reg || si->src_reg == reg)                 \
+                       reg--;                                                \
+               if (si->dst_reg == reg || si->src_reg == reg)                 \
+                       reg--;                                                \
+               *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,               \
+                                     offsetof(struct bpf_sock_ops_kern,      \
+                                              temp));                        \
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
+                                               struct bpf_sock_ops_kern,     \
+                                               is_fullsock),                 \
+                                     reg, si->dst_reg,                       \
+                                     offsetof(struct bpf_sock_ops_kern,      \
+                                              is_fullsock));                 \
+               *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);                    \
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
+                                               struct bpf_sock_ops_kern, sk),\
+                                     reg, si->dst_reg,                       \
+                                     offsetof(struct bpf_sock_ops_kern, sk));\
+               *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),       \
+                                     reg, si->src_reg,                       \
+                                     offsetof(OBJ, OBJ_FIELD));              \
+               *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,               \
+                                     offsetof(struct bpf_sock_ops_kern,      \
+                                              temp));                        \
+       } while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)           \
+       do {                                                                  \
+               if (TYPE == BPF_WRITE)                                        \
+                       SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
+               else                                                          \
+                       SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
+       } while (0)
+
+       CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
+                                      SOCK_OPS_GET_TCP_SOCK_FIELD);
+
+       if (insn > insn_buf)
+               return insn - insn_buf;
+
        switch (si->off) {
        case offsetof(struct bpf_sock_ops, op) ...
             offsetof(struct bpf_sock_ops, replylong[3]):
@@ -7258,175 +7600,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
                                      FIELD_SIZEOF(struct minmax_sample, t));
                break;
 
-/* Helper macro for adding read access to tcp_sock or sock fields. */
-#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                        \
-       do {                                                                  \
-               BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >                   \
-                            FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
-               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
-                                               struct bpf_sock_ops_kern,     \
-                                               is_fullsock),                 \
-                                     si->dst_reg, si->src_reg,               \
-                                     offsetof(struct bpf_sock_ops_kern,      \
-                                              is_fullsock));                 \
-               *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);            \
-               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
-                                               struct bpf_sock_ops_kern, sk),\
-                                     si->dst_reg, si->src_reg,               \
-                                     offsetof(struct bpf_sock_ops_kern, sk));\
-               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,                   \
-                                                      OBJ_FIELD),            \
-                                     si->dst_reg, si->dst_reg,               \
-                                     offsetof(OBJ, OBJ_FIELD));              \
-       } while (0)
-
-/* Helper macro for adding write access to tcp_sock or sock fields.
- * The macro is called with two registers, dst_reg which contains a pointer
- * to ctx (context) and src_reg which contains the value that should be
- * stored. However, we need an additional register since we cannot overwrite
- * dst_reg because it may be used later in the program.
- * Instead we "borrow" one of the other register. We first save its value
- * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
- * it at the end of the macro.
- */
-#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                        \
-       do {                                                                  \
-               int reg = BPF_REG_9;                                          \
-               BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >                   \
-                            FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
-               if (si->dst_reg == reg || si->src_reg == reg)                 \
-                       reg--;                                                \
-               if (si->dst_reg == reg || si->src_reg == reg)                 \
-                       reg--;                                                \
-               *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,               \
-                                     offsetof(struct bpf_sock_ops_kern,      \
-                                              temp));                        \
-               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
-                                               struct bpf_sock_ops_kern,     \
-                                               is_fullsock),                 \
-                                     reg, si->dst_reg,                       \
-                                     offsetof(struct bpf_sock_ops_kern,      \
-                                              is_fullsock));                 \
-               *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);                    \
-               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
-                                               struct bpf_sock_ops_kern, sk),\
-                                     reg, si->dst_reg,                       \
-                                     offsetof(struct bpf_sock_ops_kern, sk));\
-               *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),       \
-                                     reg, si->src_reg,                       \
-                                     offsetof(OBJ, OBJ_FIELD));              \
-               *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,               \
-                                     offsetof(struct bpf_sock_ops_kern,      \
-                                              temp));                        \
-       } while (0)
-
-#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)           \
-       do {                                                                  \
-               if (TYPE == BPF_WRITE)                                        \
-                       SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
-               else                                                          \
-                       SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
-       } while (0)
-
-       case offsetof(struct bpf_sock_ops, snd_cwnd):
-               SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, srtt_us):
-               SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
-               break;
-
        case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
                SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
                                   struct tcp_sock);
                break;
 
-       case offsetof(struct bpf_sock_ops, snd_ssthresh):
-               SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, rcv_nxt):
-               SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, snd_nxt):
-               SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, snd_una):
-               SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, mss_cache):
-               SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, ecn_flags):
-               SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, rate_delivered):
-               SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
-                                  struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, rate_interval_us):
-               SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
-                                  struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, packets_out):
-               SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, retrans_out):
-               SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, total_retrans):
-               SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
-                                  struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, segs_in):
-               SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, data_segs_in):
-               SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, segs_out):
-               SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, data_segs_out):
-               SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
-                                  struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, lost_out):
-               SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, sacked_out):
-               SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
-               break;
-
        case offsetof(struct bpf_sock_ops, sk_txhash):
                SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
                                          struct sock, type);
                break;
-
-       case offsetof(struct bpf_sock_ops, bytes_received):
-               SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
-                                  struct tcp_sock);
-               break;
-
-       case offsetof(struct bpf_sock_ops, bytes_acked):
-               SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
-               break;
-
        }
        return insn - insn_buf;
 }
index a648568c5e8fed087769f70c71e9c6300aa8002c..a5c8c79d468a3b7e142f73bb84899c37eb1441db 100644 (file)
@@ -16,6 +16,8 @@
 #include <linux/types.h>
 #include <linux/bpf.h>
 #include <net/lwtunnel.h>
+#include <net/gre.h>
+#include <net/ip6_route.h>
 
 struct bpf_lwt_prog {
        struct bpf_prog *prog;
@@ -55,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 
        switch (ret) {
        case BPF_OK:
+       case BPF_LWT_REROUTE:
                break;
 
        case BPF_REDIRECT:
@@ -87,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
        return ret;
 }
 
+static int bpf_lwt_input_reroute(struct sk_buff *skb)
+{
+       int err = -EINVAL;
+
+       if (skb->protocol == htons(ETH_P_IP)) {
+               struct iphdr *iph = ip_hdr(skb);
+
+               err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+                                          iph->tos, skb_dst(skb)->dev);
+       } else if (skb->protocol == htons(ETH_P_IPV6)) {
+               err = ipv6_stub->ipv6_route_input(skb);
+       } else {
+               err = -EAFNOSUPPORT;
+       }
+
+       if (err)
+               goto err;
+       return dst_input(skb);
+
+err:
+       kfree_skb(skb);
+       return err;
+}
+
 static int bpf_input(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
@@ -98,11 +125,11 @@ static int bpf_input(struct sk_buff *skb)
                ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
                if (ret < 0)
                        return ret;
+               if (ret == BPF_LWT_REROUTE)
+                       return bpf_lwt_input_reroute(skb);
        }
 
        if (unlikely(!dst->lwtstate->orig_input)) {
-               pr_warn_once("orig_input not set on dst for prog %s\n",
-                            bpf->out.name);
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -147,6 +174,102 @@ static int xmit_check_hhlen(struct sk_buff *skb)
        return 0;
 }
 
+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
+{
+       struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
+       int oif = l3mdev ? l3mdev->ifindex : 0;
+       struct dst_entry *dst = NULL;
+       int err = -EAFNOSUPPORT;
+       struct sock *sk;
+       struct net *net;
+       bool ipv4;
+
+       if (skb->protocol == htons(ETH_P_IP))
+               ipv4 = true;
+       else if (skb->protocol == htons(ETH_P_IPV6))
+               ipv4 = false;
+       else
+               goto err;
+
+       sk = sk_to_full_sk(skb->sk);
+       if (sk) {
+               if (sk->sk_bound_dev_if)
+                       oif = sk->sk_bound_dev_if;
+               net = sock_net(sk);
+       } else {
+               net = dev_net(skb_dst(skb)->dev);
+       }
+
+       if (ipv4) {
+               struct iphdr *iph = ip_hdr(skb);
+               struct flowi4 fl4 = {};
+               struct rtable *rt;
+
+               fl4.flowi4_oif = oif;
+               fl4.flowi4_mark = skb->mark;
+               fl4.flowi4_uid = sock_net_uid(net, sk);
+               fl4.flowi4_tos = RT_TOS(iph->tos);
+               fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+               fl4.flowi4_proto = iph->protocol;
+               fl4.daddr = iph->daddr;
+               fl4.saddr = iph->saddr;
+
+               rt = ip_route_output_key(net, &fl4);
+               if (IS_ERR(rt)) {
+                       err = PTR_ERR(rt);
+                       goto err;
+               }
+               dst = &rt->dst;
+       } else {
+               struct ipv6hdr *iph6 = ipv6_hdr(skb);
+               struct flowi6 fl6 = {};
+
+               fl6.flowi6_oif = oif;
+               fl6.flowi6_mark = skb->mark;
+               fl6.flowi6_uid = sock_net_uid(net, sk);
+               fl6.flowlabel = ip6_flowinfo(iph6);
+               fl6.flowi6_proto = iph6->nexthdr;
+               fl6.daddr = iph6->daddr;
+               fl6.saddr = iph6->saddr;
+
+               err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
+               if (unlikely(err))
+                       goto err;
+               if (IS_ERR(dst)) {
+                       err = PTR_ERR(dst);
+                       goto err;
+               }
+       }
+       if (unlikely(dst->error)) {
+               err = dst->error;
+               dst_release(dst);
+               goto err;
+       }
+
+       /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
+        * was done for the previous dst, so we are doing it here again, in
+        * case the new dst needs much more space. The call below is a noop
+        * if there is enough header space in skb.
+        */
+       err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+       if (unlikely(err))
+               goto err;
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+       if (unlikely(err))
+               goto err;
+
+       /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
+       return LWTUNNEL_XMIT_DONE;
+
+err:
+       kfree_skb(skb);
+       return err;
+}
+
 static int bpf_xmit(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
@@ -154,11 +277,20 @@ static int bpf_xmit(struct sk_buff *skb)
 
        bpf = bpf_lwt_lwtunnel(dst->lwtstate);
        if (bpf->xmit.prog) {
+               __be16 proto = skb->protocol;
                int ret;
 
                ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
                switch (ret) {
                case BPF_OK:
+                       /* If the header changed, e.g. via bpf_lwt_push_encap,
+                        * BPF_LWT_REROUTE below should have been used if the
+                        * protocol was also changed.
+                        */
+                       if (skb->protocol != proto) {
+                               kfree_skb(skb);
+                               return -EINVAL;
+                       }
                        /* If the header was expanded, headroom might be too
                         * small for L2 header to come, expand as needed.
                         */
@@ -169,6 +301,8 @@ static int bpf_xmit(struct sk_buff *skb)
                        return LWTUNNEL_XMIT_CONTINUE;
                case BPF_REDIRECT:
                        return LWTUNNEL_XMIT_DONE;
+               case BPF_LWT_REROUTE:
+                       return bpf_lwt_xmit_reroute(skb);
                default:
                        return ret;
                }
@@ -390,6 +524,133 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
        .owner          = THIS_MODULE,
 };
 
+static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
+                          int encap_len)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+       gso_type |= SKB_GSO_DODGY;
+       shinfo->gso_type |= gso_type;
+       skb_decrease_gso_size(shinfo, encap_len);
+       shinfo->gso_segs = 0;
+       return 0;
+}
+
+static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
+{
+       int next_hdr_offset;
+       void *next_hdr;
+       __u8 protocol;
+
+       /* SCTP and UDP_L4 gso need more nuanced handling than what
+        * handle_gso_type() does above: skb_decrease_gso_size() is not enough.
+        * So at the moment only TCP GSO packets are let through.
+        */
+       if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+               return -ENOTSUPP;
+
+       if (ipv4) {
+               protocol = ip_hdr(skb)->protocol;
+               next_hdr_offset = sizeof(struct iphdr);
+               next_hdr = skb_network_header(skb) + next_hdr_offset;
+       } else {
+               protocol = ipv6_hdr(skb)->nexthdr;
+               next_hdr_offset = sizeof(struct ipv6hdr);
+               next_hdr = skb_network_header(skb) + next_hdr_offset;
+       }
+
+       switch (protocol) {
+       case IPPROTO_GRE:
+               next_hdr_offset += sizeof(struct gre_base_hdr);
+               if (next_hdr_offset > encap_len)
+                       return -EINVAL;
+
+               if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
+                       return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
+                                              encap_len);
+               return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
+
+       case IPPROTO_UDP:
+               next_hdr_offset += sizeof(struct udphdr);
+               if (next_hdr_offset > encap_len)
+                       return -EINVAL;
+
+               if (((struct udphdr *)next_hdr)->check)
+                       return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
+                                              encap_len);
+               return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
+
+       case IPPROTO_IP:
+       case IPPROTO_IPV6:
+               if (ipv4)
+                       return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
+               else
+                       return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
+
+       default:
+               return -EPROTONOSUPPORT;
+       }
+}
+
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
+{
+       struct iphdr *iph;
+       bool ipv4;
+       int err;
+
+       if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
+               return -EINVAL;
+
+       /* validate protocol and length */
+       iph = (struct iphdr *)hdr;
+       if (iph->version == 4) {
+               ipv4 = true;
+               if (unlikely(len < iph->ihl * 4))
+                       return -EINVAL;
+       } else if (iph->version == 6) {
+               ipv4 = false;
+               if (unlikely(len < sizeof(struct ipv6hdr)))
+                       return -EINVAL;
+       } else {
+               return -EINVAL;
+       }
+
+       if (ingress)
+               err = skb_cow_head(skb, len + skb->mac_len);
+       else
+               err = skb_cow_head(skb,
+                                  len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
+       if (unlikely(err))
+               return err;
+
+       /* push the encap headers and fix pointers */
+       skb_reset_inner_headers(skb);
+       skb->encapsulation = 1;
+       skb_push(skb, len);
+       if (ingress)
+               skb_postpush_rcsum(skb, iph, len);
+       skb_reset_network_header(skb);
+       memcpy(skb_network_header(skb), hdr, len);
+       bpf_compute_data_pointers(skb);
+       skb_clear_hash(skb);
+
+       if (ipv4) {
+               skb->protocol = htons(ETH_P_IP);
+               iph = ip_hdr(skb);
+
+               if (!iph->check)
+                       iph->check = ip_fast_csum((unsigned char *)iph,
+                                                 iph->ihl);
+       } else {
+               skb->protocol = htons(ETH_P_IPV6);
+       }
+
+       if (skb_is_gso(skb))
+               return handle_gso_encap(skb, ipv4, len);
+
+       return 0;
+}
+
 static int __init bpf_lwt_init(void)
 {
        return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
index 5cd0029d930e2d2cd940cd1c618e5d5b81b952cc..6c79af056d9b7b7a4595d8a8764839d2204e9ac5 100644 (file)
@@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
        return -EAFNOSUPPORT;
 }
 
+static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
+{
+       return -EAFNOSUPPORT;
+}
+
 static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
 {
        return NULL;
@@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
 
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
        .ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
+       .ipv6_route_input  = eafnosupport_ipv6_route_input,
        .fib6_get_table    = eafnosupport_fib6_get_table,
        .fib6_table_lookup = eafnosupport_fib6_table_lookup,
        .fib6_lookup       = eafnosupport_fib6_lookup,
index d99753b5e39b243ba4567b93b2f990f8728103c6..2f45d2a3e3a33e7e43a3384d1a8fe7a683580919 100644 (file)
@@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = {
        .exit = inet6_net_exit,
 };
 
+static int ipv6_route_input(struct sk_buff *skb)
+{
+       ip6_route_input(skb);
+       return skb_dst(skb)->error;
+}
+
 static const struct ipv6_stub ipv6_stub_impl = {
        .ipv6_sock_mc_join = ipv6_sock_mc_join,
        .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
        .ipv6_dst_lookup   = ip6_dst_lookup,
+       .ipv6_route_input  = ipv6_route_input,
        .fib6_get_table    = fib6_get_table,
        .fib6_table_lookup = fib6_table_lookup,
        .fib6_lookup       = fib6_lookup,
index 27153bb816ac40b58c512ec593e5193adef148e0..4f2188845dd8338f435db3c7ac07e742e9b3ecbd 100644 (file)
@@ -16,7 +16,7 @@ SYNOPSIS
 
        **bpftool** **version**
 
-       *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** }
+       *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
 
        *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
        | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -34,6 +34,8 @@ SYNOPSIS
 
        *NET-COMMANDS* := { **show** | **list** | **help** }
 
+       *FEATURE-COMMANDS* := { **probe** | **help** }
+
 DESCRIPTION
 ===========
        *bpftool* allows for inspection and simple modification of BPF objects
index 1777fa0c61e4a2dd9f15b1908a91a8582c8cad98..bcdd2474eee7549655358698cb509b95b7babc27 100644 (file)
@@ -2016,6 +2016,19 @@ union bpf_attr {
  *                     Only works if *skb* contains an IPv6 packet. Insert a
  *                     Segment Routing Header (**struct ipv6_sr_hdr**) inside
  *                     the IPv6 header.
+ *             **BPF_LWT_ENCAP_IP**
+ *                     IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ *                     must be IPv4 or IPv6, followed by zero or more
+ *                     additional headers, up to LWT_BPF_MAX_HEADROOM total
+ *                     bytes in all prepended headers. Please note that
+ *                     if skb_is_gso(skb) is true, no more than two headers
+ *                     can be prepended, and the inner header, if present,
+ *                     should be either GRE or UDP/GUE.
+ *
+ *             BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
+ *             type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
+ *             by bpf programs of types BPF_PROG_TYPE_LWT_IN and
+ *             BPF_PROG_TYPE_LWT_XMIT.
  *
  *             A call to this helper is susceptible to change the underlaying
  *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2329,6 +2342,23 @@ union bpf_attr {
  *             "**y**".
  *     Return
  *             0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ *     Description
+ *             This helper gets a **struct bpf_sock** pointer such
+ *             that all the fields in bpf_sock can be accessed.
+ *     Return
+ *             A **struct bpf_sock** pointer on success, or NULL in
+ *             case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ *     Description
+ *             This helper gets a **struct bpf_tcp_sock** pointer from a
+ *             **struct bpf_sock** pointer.
+ *
+ *     Return
+ *             A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *             case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -2425,7 +2455,9 @@ union bpf_attr {
        FN(msg_pop_data),               \
        FN(rc_pointer_rel),             \
        FN(spin_lock),                  \
-       FN(spin_unlock),
+       FN(spin_unlock),                \
+       FN(sk_fullsock),                \
+       FN(tcp_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2498,7 +2530,8 @@ enum bpf_hdr_start_off {
 /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
 enum bpf_lwt_encap_mode {
        BPF_LWT_ENCAP_SEG6,
-       BPF_LWT_ENCAP_SEG6_INLINE
+       BPF_LWT_ENCAP_SEG6_INLINE,
+       BPF_LWT_ENCAP_IP,
 };
 
 #define __bpf_md_ptr(type, name)       \
@@ -2545,6 +2578,7 @@ struct __sk_buff {
        __u64 tstamp;
        __u32 wire_len;
        __u32 gso_segs;
+       __bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 struct bpf_tunnel_key {
@@ -2586,7 +2620,15 @@ enum bpf_ret_code {
        BPF_DROP = 2,
        /* 3-6 reserved */
        BPF_REDIRECT = 7,
-       /* >127 are reserved for prog type specific return codes */
+       /* >127 are reserved for prog type specific return codes.
+        *
+        * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+        *    BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+        *    changed and should be routed based on its new L3 header.
+        *    (This is an L3 redirect, as opposed to L2 redirect
+        *    represented by BPF_REDIRECT above).
+        */
+       BPF_LWT_REROUTE = 128,
 };
 
 struct bpf_sock {
@@ -2596,14 +2638,52 @@ struct bpf_sock {
        __u32 protocol;
        __u32 mark;
        __u32 priority;
-       __u32 src_ip4;          /* Allows 1,2,4-byte read.
-                                * Stored in network byte order.
+       /* IP address also allows 1 and 2 bytes access */
+       __u32 src_ip4;
+       __u32 src_ip6[4];
+       __u32 src_port;         /* host byte order */
+       __u32 dst_port;         /* network byte order */
+       __u32 dst_ip4;
+       __u32 dst_ip6[4];
+       __u32 state;
+};
+
+struct bpf_tcp_sock {
+       __u32 snd_cwnd;         /* Sending congestion window            */
+       __u32 srtt_us;          /* smoothed round trip time << 3 in usecs */
+       __u32 rtt_min;
+       __u32 snd_ssthresh;     /* Slow start size threshold            */
+       __u32 rcv_nxt;          /* What we want to receive next         */
+       __u32 snd_nxt;          /* Next sequence we send                */
+       __u32 snd_una;          /* First byte we want an ack for        */
+       __u32 mss_cache;        /* Cached effective mss, not including SACKS */
+       __u32 ecn_flags;        /* ECN status bits.                     */
+       __u32 rate_delivered;   /* saved rate sample: packets delivered */
+       __u32 rate_interval_us; /* saved rate sample: time elapsed */
+       __u32 packets_out;      /* Packets which are "in flight"        */
+       __u32 retrans_out;      /* Retransmitted packets out            */
+       __u32 total_retrans;    /* Total retransmits for entire connection */
+       __u32 segs_in;          /* RFC4898 tcpEStatsPerfSegsIn
+                                * total number of segments in.
                                 */
-       __u32 src_ip6[4];       /* Allows 1,2,4-byte read.
-                                * Stored in network byte order.
+       __u32 data_segs_in;     /* RFC4898 tcpEStatsPerfDataSegsIn
+                                * total number of data segments in.
+                                */
+       __u32 segs_out;         /* RFC4898 tcpEStatsPerfSegsOut
+                                * The total number of segments sent.
+                                */
+       __u32 data_segs_out;    /* RFC4898 tcpEStatsPerfDataSegsOut
+                                * total number of data segments sent.
+                                */
+       __u32 lost_out;         /* Lost packets                 */
+       __u32 sacked_out;       /* SACK'd packets                       */
+       __u64 bytes_received;   /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+                                * sum(delta(rcv_nxt)), or how many bytes
+                                * were acked.
                                 */
-       __u32 src_port;         /* Allows 4-byte read.
-                                * Stored in host byte order
+       __u64 bytes_acked;      /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+                                * sum(delta(snd_una)), or how many bytes
+                                * were acked.
                                 */
 };
 
index d6533828123a6728958d2127ab81c2b5fa30f07f..5b225ff63b483145878e977cf0a1be7d1fe53cd9 100644 (file)
@@ -925,6 +925,7 @@ enum {
 enum {
        LINK_XSTATS_TYPE_UNSPEC,
        LINK_XSTATS_TYPE_BRIDGE,
+       LINK_XSTATS_TYPE_BOND,
        __LINK_XSTATS_TYPE_MAX
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
index 3defad77dc7aca37e7496e2aee6e6b386f44a802..9cd015574e83828d20ace576fe7190f68a924829 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include <stdlib.h>
+#include <string.h>
 #include <memory.h>
 #include <unistd.h>
 #include <asm/unistd.h>
@@ -214,23 +215,35 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 {
        void *finfo = NULL, *linfo = NULL;
        union bpf_attr attr;
+       __u32 log_level;
        __u32 name_len;
        int fd;
 
-       if (!load_attr)
+       if (!load_attr || !log_buf != !log_buf_sz)
+               return -EINVAL;
+
+       log_level = load_attr->log_level;
+       if (log_level > 2 || (log_level && !log_buf))
                return -EINVAL;
 
        name_len = load_attr->name ? strlen(load_attr->name) : 0;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.prog_type = load_attr->prog_type;
        attr.expected_attach_type = load_attr->expected_attach_type;
        attr.insn_cnt = (__u32)load_attr->insns_cnt;
        attr.insns = ptr_to_u64(load_attr->insns);
        attr.license = ptr_to_u64(load_attr->license);
-       attr.log_buf = ptr_to_u64(NULL);
-       attr.log_size = 0;
-       attr.log_level = 0;
+
+       attr.log_level = log_level;
+       if (log_level) {
+               attr.log_buf = ptr_to_u64(log_buf);
+               attr.log_size = log_buf_sz;
+       } else {
+               attr.log_buf = ptr_to_u64(NULL);
+               attr.log_size = 0;
+       }
+
        attr.kern_version = load_attr->kern_version;
        attr.prog_ifindex = load_attr->prog_ifindex;
        attr.prog_btf_fd = load_attr->prog_btf_fd;
@@ -286,7 +299,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
                        goto done;
        }
 
-       if (!log_buf || !log_buf_sz)
+       if (log_level || !log_buf)
                goto done;
 
        /* Try again with log */
@@ -327,7 +340,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.prog_type = type;
        attr.insn_cnt = (__u32)insns_cnt;
        attr.insns = ptr_to_u64(insns);
@@ -347,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.value = ptr_to_u64(value);
@@ -360,7 +373,7 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.value = ptr_to_u64(value);
@@ -372,7 +385,7 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.value = ptr_to_u64(value);
@@ -385,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.value = ptr_to_u64(value);
@@ -397,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
 
@@ -408,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.next_key = ptr_to_u64(next_key);
@@ -420,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.pathname = ptr_to_u64((void *)pathname);
        attr.bpf_fd = fd;
 
@@ -431,7 +444,7 @@ int bpf_obj_get(const char *pathname)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.pathname = ptr_to_u64((void *)pathname);
 
        return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
@@ -442,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.target_fd     = target_fd;
        attr.attach_bpf_fd = prog_fd;
        attr.attach_type   = type;
@@ -455,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.target_fd   = target_fd;
        attr.attach_type = type;
 
@@ -466,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.target_fd   = target_fd;
        attr.attach_bpf_fd = prog_fd;
        attr.attach_type = type;
@@ -480,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
        union bpf_attr attr;
        int ret;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.query.target_fd    = target_fd;
        attr.query.attach_type  = type;
        attr.query.query_flags  = query_flags;
@@ -501,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
        union bpf_attr attr;
        int ret;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.test.prog_fd = prog_fd;
        attr.test.data_in = ptr_to_u64(data);
        attr.test.data_out = ptr_to_u64(data_out);
@@ -526,7 +539,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
        if (!test_attr->data_out && test_attr->data_size_out > 0)
                return -EINVAL;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.test.prog_fd = test_attr->prog_fd;
        attr.test.data_in = ptr_to_u64(test_attr->data_in);
        attr.test.data_out = ptr_to_u64(test_attr->data_out);
@@ -546,7 +559,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
        union bpf_attr attr;
        int err;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.start_id = start_id;
 
        err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
@@ -561,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
        union bpf_attr attr;
        int err;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.start_id = start_id;
 
        err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
@@ -575,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.prog_id = id;
 
        return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
@@ -585,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.map_id = id;
 
        return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
@@ -595,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.btf_id = id;
 
        return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
@@ -606,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
        union bpf_attr attr;
        int err;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.info.bpf_fd = prog_fd;
        attr.info.info_len = *info_len;
        attr.info.info = ptr_to_u64(info);
@@ -622,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 {
        union bpf_attr attr;
 
-       bzero(&attr, sizeof(attr));
+       memset(&attr, 0, sizeof(attr));
        attr.raw_tracepoint.name = ptr_to_u64(name);
        attr.raw_tracepoint.prog_fd = prog_fd;
 
index ed09eed2dc3ba6e6a3b2ebf0d4241066e2d04447..6ffdd79bea89df1e8e2d015130f15a93e504ff94 100644 (file)
@@ -85,6 +85,7 @@ struct bpf_load_program_attr {
        __u32 line_info_rec_size;
        const void *line_info;
        __u32 line_info_cnt;
+       __u32 log_level;
 };
 
 /* Flags to direct loading requirements */
index ab6528c935a1005c1a22afa2cf3c958ad0cffe27..68b50e9bbde18e45b61c2321e798181116a04b5b 100644 (file)
@@ -16,7 +16,8 @@
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define min(a, b) ((a) < (b) ? (a) : (b))
 
-#define BTF_MAX_NR_TYPES 65535
+#define BTF_MAX_NR_TYPES 0x7fffffff
+#define BTF_MAX_STR_OFFSET 0x7fffffff
 
 #define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \
                ((k) == BTF_KIND_VOLATILE) || \
@@ -41,9 +42,8 @@ struct btf {
 
 struct btf_ext_info {
        /*
-        * info points to a deep copy of the individual info section
-        * (e.g. func_info and line_info) from the .BTF.ext.
-        * It does not include the __u32 rec_size.
+        * info points to the individual info section (e.g. func_info and
+        * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
         */
        void *info;
        __u32 rec_size;
@@ -51,8 +51,13 @@ struct btf_ext_info {
 };
 
 struct btf_ext {
+       union {
+               struct btf_ext_header *hdr;
+               void *data;
+       };
        struct btf_ext_info func_info;
        struct btf_ext_info line_info;
+       __u32 data_size;
 };
 
 struct btf_ext_info_sec {
@@ -171,7 +176,7 @@ static int btf_parse_str_sec(struct btf *btf)
        const char *start = btf->nohdr_data + hdr->str_off;
        const char *end = start + btf->hdr->str_len;
 
-       if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+       if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
            start[0] || end[-1]) {
                pr_debug("Invalid BTF string section\n");
                return -EINVAL;
@@ -366,8 +371,6 @@ void btf__free(struct btf *btf)
 
 struct btf *btf__new(__u8 *data, __u32 size)
 {
-       __u32 log_buf_size = 0;
-       char *log_buf = NULL;
        struct btf *btf;
        int err;
 
@@ -377,15 +380,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
 
        btf->fd = -1;
 
-       log_buf = malloc(BPF_LOG_BUF_SIZE);
-       if (!log_buf) {
-               err = -ENOMEM;
-               goto done;
-       }
-
-       *log_buf = 0;
-       log_buf_size = BPF_LOG_BUF_SIZE;
-
        btf->data = malloc(size);
        if (!btf->data) {
                err = -ENOMEM;
@@ -395,17 +389,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
        memcpy(btf->data, data, size);
        btf->data_size = size;
 
-       btf->fd = bpf_load_btf(btf->data, btf->data_size,
-                              log_buf, log_buf_size, false);
-
-       if (btf->fd == -1) {
-               err = -errno;
-               pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
-               if (log_buf && *log_buf)
-                       pr_warning("%s\n", log_buf);
-               goto done;
-       }
-
        err = btf_parse_hdr(btf);
        if (err)
                goto done;
@@ -417,8 +400,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
        err = btf_parse_type_sec(btf);
 
 done:
-       free(log_buf);
-
        if (err) {
                btf__free(btf);
                return ERR_PTR(err);
@@ -427,16 +408,45 @@ done:
        return btf;
 }
 
+int btf__load(struct btf *btf)
+{
+       __u32 log_buf_size = BPF_LOG_BUF_SIZE;
+       char *log_buf = NULL;
+       int err = 0;
+
+       if (btf->fd >= 0)
+               return -EEXIST;
+
+       log_buf = malloc(log_buf_size);
+       if (!log_buf)
+               return -ENOMEM;
+
+       *log_buf = 0;
+
+       btf->fd = bpf_load_btf(btf->data, btf->data_size,
+                              log_buf, log_buf_size, false);
+       if (btf->fd < 0) {
+               err = -errno;
+               pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+               if (*log_buf)
+                       pr_warning("%s\n", log_buf);
+               goto done;
+       }
+
+done:
+       free(log_buf);
+       return err;
+}
+
 int btf__fd(const struct btf *btf)
 {
        return btf->fd;
 }
 
-void btf__get_strings(const struct btf *btf, const char **strings,
-                     __u32 *str_len)
+const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
 {
-       *strings = btf->strings;
-       *str_len = btf->hdr->str_len;
+       *size = btf->data_size;
+       return btf->data;
 }
 
 const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -474,7 +484,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
                goto exit_free;
        }
 
-       bzero(ptr, last_size);
+       memset(ptr, 0, last_size);
        btf_info.btf = ptr_to_u64(ptr);
        err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 
@@ -488,7 +498,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
                        goto exit_free;
                }
                ptr = temp_ptr;
-               bzero(ptr, last_size);
+               memset(ptr, 0, last_size);
                btf_info.btf = ptr_to_u64(ptr);
                err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
        }
@@ -583,7 +593,7 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
        return 0;
 }
 
-struct btf_ext_sec_copy_param {
+struct btf_ext_sec_setup_param {
        __u32 off;
        __u32 len;
        __u32 min_rec_size;
@@ -591,20 +601,14 @@ struct btf_ext_sec_copy_param {
        const char *desc;
 };
 
-static int btf_ext_copy_info(struct btf_ext *btf_ext,
-                            __u8 *data, __u32 data_size,
-                            struct btf_ext_sec_copy_param *ext_sec)
+static int btf_ext_setup_info(struct btf_ext *btf_ext,
+                             struct btf_ext_sec_setup_param *ext_sec)
 {
-       const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
        const struct btf_ext_info_sec *sinfo;
        struct btf_ext_info *ext_info;
        __u32 info_left, record_size;
        /* The start of the info sec (including the __u32 record_size). */
-       const void *info;
-
-       /* data and data_size do not include btf_ext_header from now on */
-       data = data + hdr->hdr_len;
-       data_size -= hdr->hdr_len;
+       void *info;
 
        if (ext_sec->off & 0x03) {
                pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
@@ -612,16 +616,15 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
                return -EINVAL;
        }
 
-       if (data_size < ext_sec->off ||
-           ext_sec->len > data_size - ext_sec->off) {
+       info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
+       info_left = ext_sec->len;
+
+       if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
                pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
-                    ext_sec->desc, ext_sec->off, ext_sec->len);
+                        ext_sec->desc, ext_sec->off, ext_sec->len);
                return -EINVAL;
        }
 
-       info = data + ext_sec->off;
-       info_left = ext_sec->len;
-
        /* At least a record size */
        if (info_left < sizeof(__u32)) {
                pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
@@ -633,7 +636,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
        if (record_size < ext_sec->min_rec_size ||
            record_size & 0x03) {
                pr_debug("%s section in .BTF.ext has invalid record size %u\n",
-                    ext_sec->desc, record_size);
+                        ext_sec->desc, record_size);
                return -EINVAL;
        }
 
@@ -679,42 +682,35 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
        ext_info = ext_sec->ext_info;
        ext_info->len = ext_sec->len - sizeof(__u32);
        ext_info->rec_size = record_size;
-       ext_info->info = malloc(ext_info->len);
-       if (!ext_info->info)
-               return -ENOMEM;
-       memcpy(ext_info->info, info + sizeof(__u32), ext_info->len);
+       ext_info->info = info + sizeof(__u32);
 
        return 0;
 }
 
-static int btf_ext_copy_func_info(struct btf_ext *btf_ext,
-                                 __u8 *data, __u32 data_size)
+static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
 {
-       const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
-       struct btf_ext_sec_copy_param param = {
-               .off = hdr->func_info_off,
-               .len = hdr->func_info_len,
+       struct btf_ext_sec_setup_param param = {
+               .off = btf_ext->hdr->func_info_off,
+               .len = btf_ext->hdr->func_info_len,
                .min_rec_size = sizeof(struct bpf_func_info_min),
                .ext_info = &btf_ext->func_info,
                .desc = "func_info"
        };
 
-       return btf_ext_copy_info(btf_ext, data, data_size, &param);
+       return btf_ext_setup_info(btf_ext, &param);
 }
 
-static int btf_ext_copy_line_info(struct btf_ext *btf_ext,
-                                 __u8 *data, __u32 data_size)
+static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
 {
-       const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
-       struct btf_ext_sec_copy_param param = {
-               .off = hdr->line_info_off,
-               .len = hdr->line_info_len,
+       struct btf_ext_sec_setup_param param = {
+               .off = btf_ext->hdr->line_info_off,
+               .len = btf_ext->hdr->line_info_len,
                .min_rec_size = sizeof(struct bpf_line_info_min),
                .ext_info = &btf_ext->line_info,
                .desc = "line_info",
        };
 
-       return btf_ext_copy_info(btf_ext, data, data_size, &param);
+       return btf_ext_setup_info(btf_ext, &param);
 }
 
 static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
@@ -754,9 +750,7 @@ void btf_ext__free(struct btf_ext *btf_ext)
 {
        if (!btf_ext)
                return;
-
-       free(btf_ext->func_info.info);
-       free(btf_ext->line_info.info);
+       free(btf_ext->data);
        free(btf_ext);
 }
 
@@ -773,13 +767,23 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
        if (!btf_ext)
                return ERR_PTR(-ENOMEM);
 
-       err = btf_ext_copy_func_info(btf_ext, data, size);
-       if (err) {
-               btf_ext__free(btf_ext);
-               return ERR_PTR(err);
+       btf_ext->data_size = size;
+       btf_ext->data = malloc(size);
+       if (!btf_ext->data) {
+               err = -ENOMEM;
+               goto done;
        }
+       memcpy(btf_ext->data, data, size);
 
-       err = btf_ext_copy_line_info(btf_ext, data, size);
+       err = btf_ext_setup_func_info(btf_ext);
+       if (err)
+               goto done;
+
+       err = btf_ext_setup_line_info(btf_ext);
+       if (err)
+               goto done;
+
+done:
        if (err) {
                btf_ext__free(btf_ext);
                return ERR_PTR(err);
@@ -788,6 +792,12 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
        return btf_ext;
 }
 
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+{
+       *size = btf_ext->data_size;
+       return btf_ext->data;
+}
+
 static int btf_ext_reloc_info(const struct btf *btf,
                              const struct btf_ext_info *ext_info,
                              const char *sec_name, __u32 insns_cnt,
@@ -836,7 +846,8 @@ static int btf_ext_reloc_info(const struct btf *btf,
        return -ENOENT;
 }
 
-int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext,
+int btf_ext__reloc_func_info(const struct btf *btf,
+                            const struct btf_ext *btf_ext,
                             const char *sec_name, __u32 insns_cnt,
                             void **func_info, __u32 *cnt)
 {
@@ -844,7 +855,8 @@ int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ex
                                  insns_cnt, func_info, cnt);
 }
 
-int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext,
+int btf_ext__reloc_line_info(const struct btf *btf,
+                            const struct btf_ext *btf_ext,
                             const char *sec_name, __u32 insns_cnt,
                             void **line_info, __u32 *cnt)
 {
@@ -1871,7 +1883,7 @@ static int btf_dedup_prim_types(struct btf_dedup *d)
  */
 static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
 {
-       return d->map[type_id] <= BTF_MAX_TYPE;
+       return d->map[type_id] <= BTF_MAX_NR_TYPES;
 }
 
 /*
@@ -2022,7 +2034,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
        canon_id = resolve_fwd_id(d, canon_id);
 
        hypot_type_id = d->hypot_map[canon_id];
-       if (hypot_type_id <= BTF_MAX_TYPE)
+       if (hypot_type_id <= BTF_MAX_NR_TYPES)
                return hypot_type_id == cand_id;
 
        if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
@@ -2241,7 +2253,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
        __u32 h;
 
        /* already deduped or is in process of deduping (loop detected) */
-       if (d->map[type_id] <= BTF_MAX_TYPE)
+       if (d->map[type_id] <= BTF_MAX_NR_TYPES)
                return 0;
 
        t = d->btf->types[type_id];
@@ -2318,7 +2330,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 
        if (d->map[type_id] == BTF_IN_PROGRESS_ID)
                return -ELOOP;
-       if (d->map[type_id] <= BTF_MAX_TYPE)
+       if (d->map[type_id] <= BTF_MAX_NR_TYPES)
                return resolve_type_id(d, type_id);
 
        t = d->btf->types[type_id];
@@ -2498,7 +2510,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
 
        resolved_type_id = resolve_type_id(d, type_id);
        new_type_id = d->hypot_map[resolved_type_id];
-       if (new_type_id > BTF_MAX_TYPE)
+       if (new_type_id > BTF_MAX_NR_TYPES)
                return -EINVAL;
        return new_type_id;
 }
index b393da90cc8511a1cf24cb3954b6b23d9f5d8d91..94bbc249b0f1ddcbeb8dc7504fc29dc2449e8041 100644 (file)
@@ -57,6 +57,7 @@ struct btf_ext_header {
 
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
                                   const char *type_name);
 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
@@ -65,8 +66,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
-LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings,
-                                __u32 *str_len);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
@@ -76,6 +76,8 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
 LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext* btf_ext,
+                                            __u32 *size);
 LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
                                        const struct btf_ext *btf_ext,
                                        const char *sec_name, __u32 insns_cnt,
index 47969aa0faf80733ab2f05fb9ef5ea9b09b1681e..b38dcbe7460ab9947c133f4349fb81e334cc0c17 100644 (file)
@@ -307,7 +307,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
                return -EINVAL;
        }
 
-       bzero(prog, sizeof(*prog));
+       memset(prog, 0, sizeof(*prog));
 
        prog->section_name = strdup(section_name);
        if (!prog->section_name) {
@@ -835,7 +835,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
                        obj->efile.maps_shndx = idx;
                else if (strcmp(name, BTF_ELF_SEC) == 0) {
                        obj->btf = btf__new(data->d_buf, data->d_size);
-                       if (IS_ERR(obj->btf)) {
+                       if (IS_ERR(obj->btf) || btf__load(obj->btf)) {
                                pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
                                           BTF_ELF_SEC, PTR_ERR(obj->btf));
                                obj->btf = NULL;
@@ -1113,6 +1113,20 @@ err_free_new_name:
        return -errno;
 }
 
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+       if (!map || !max_entries)
+               return -EINVAL;
+
+       /* If map already created, its attributes can't be changed. */
+       if (map->fd >= 0)
+               return -EBUSY;
+
+       map->def.max_entries = max_entries;
+
+       return 0;
+}
+
 static int
 bpf_object__probe_name(struct bpf_object *obj)
 {
@@ -1576,7 +1590,7 @@ bpf_program__load(struct bpf_program *prog,
                struct bpf_prog_prep_result result;
                bpf_program_prep_t preprocessor = prog->preprocessor;
 
-               bzero(&result, sizeof(result));
+               memset(&result, 0, sizeof(result));
                err = preprocessor(prog, i, prog->insns,
                                   prog->insns_cnt, &result);
                if (err) {
@@ -2317,6 +2331,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
        return obj ? obj->kern_version : 0;
 }
 
+struct btf *bpf_object__btf(struct bpf_object *obj)
+{
+       return obj ? obj->btf : NULL;
+}
+
 int bpf_object__btf_fd(const struct bpf_object *obj)
 {
        return obj->btf ? btf__fd(obj->btf) : -1;
index 69a7c25eacccf27c1af7ea3bc138a906a1f459d1..6c0168f8bba5ceb519a25f89383e4ea7f47a01b2 100644 (file)
@@ -89,6 +89,9 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj);
 LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
 LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
 LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
+
+struct btf;
+LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj);
 LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
 
 LIBBPF_API struct bpf_program *
@@ -294,6 +297,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
                                 bpf_map_clear_priv_t clear_priv);
 LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
 LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
index 89c1149e32eece93378bbcdc4be8c807f8f26807..99dfa710c8180f5a8dd82cba59ce9b02651dc343 100644 (file)
@@ -130,15 +130,19 @@ LIBBPF_0.0.2 {
                bpf_probe_helper;
                bpf_probe_map_type;
                bpf_probe_prog_type;
+               bpf_map__resize;
                bpf_map_lookup_elem_flags;
+               bpf_object__btf;
                bpf_object__find_map_fd_by_name;
                bpf_get_link_xdp_id;
                btf__dedup;
                btf__get_map_kv_tids;
                btf__get_nr_types;
-               btf__get_strings;
+               btf__get_raw_data;
+               btf__load;
                btf_ext__free;
                btf_ext__func_info_rec_size;
+               btf_ext__get_raw_data;
                btf_ext__line_info_rec_size;
                btf_ext__new;
                btf_ext__reloc_func_info;
index dd093bd91aa980e68e706437e6d8c6cec41a2c4c..e47168d1257d9437f5a84fe8daf10b83a19bd091 100644 (file)
@@ -29,3 +29,4 @@ test_netcnt
 test_section_names
 test_tcpnotify_user
 test_libbpf
+alu32
index 383d2ff13fc7b73cb1f4c76aae64911df402a4ef..ccffaa0a0787e27482a55c6f392066cfc955a296 100644 (file)
@@ -23,42 +23,19 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
        test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
        test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
        test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
-       test_netcnt test_tcpnotify_user
-
-BPF_OBJ_FILES = \
-       test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
-       sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \
-       test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \
-       sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \
-       test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \
-       test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \
-       sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
-       get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-       test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \
-       xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o
-
-# Objects are built with default compilation flags and with sub-register
-# code-gen enabled.
-BPF_OBJ_FILES_DUAL_COMPILE = \
-       test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \
-       test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \
-       test_obj_id.o test_pkt_md_access.o test_tracepoint.o \
-       test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \
-       test_stacktrace_build_id.o test_get_stack_rawtp.o \
-       test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \
-       test_queue_map.o test_stack_map.o
-
-TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE)
-
-# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor
-# support which is the first version to contain both ALU32 and JMP32
-# instructions.
+       test_netcnt test_tcpnotify_user test_sock_fields
+
+BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
+TEST_GEN_FILES = $(BPF_OBJ_FILES)
+
+# Also test sub-register code-gen if LLVM has eBPF v3 processor support which
+# contains both ALU32 and JMP32 instructions.
 SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \
                        $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \
-                       $(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \
+                       $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \
                        grep 'if w')
 ifneq ($(SUBREG_CODEGEN),)
-TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE))
+TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES))
 endif
 
 # Order correspond to 'make run_tests' order
@@ -73,7 +50,8 @@ TEST_PROGS := test_kmod.sh \
        test_lirc_mode2.sh \
        test_skb_cgroup_id.sh \
        test_flow_dissector.sh \
-       test_xdp_vlan.sh
+       test_xdp_vlan.sh \
+       test_lwt_ip_encap.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
        with_tunnels.sh \
@@ -111,6 +89,7 @@ $(OUTPUT)/test_progs: trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
 $(OUTPUT)/test_netcnt: cgroup_helpers.c
+$(OUTPUT)/test_sock_fields: cgroup_helpers.c
 
 .PHONY: force
 
@@ -188,7 +167,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \
        $(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \
                trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS)
 
-$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32
+$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \
+                                       $(ALU32_BUILD_DIR)/test_progs_32
        $(CLANG) $(CLANG_FLAGS) \
                 -O2 -target bpf -emit-llvm -c $< -o - |      \
        $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
@@ -200,7 +180,7 @@ endif
 
 # Have one program compiled without "-target bpf" to test whether libbpf loads
 # it successfully
-$(OUTPUT)/test_xdp.o: test_xdp.c
+$(OUTPUT)/test_xdp.o: progs/test_xdp.c
        $(CLANG) $(CLANG_FLAGS) \
                -O2 -emit-llvm -c $< -o - | \
        $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
@@ -208,7 +188,7 @@ ifeq ($(DWARF2BTF),y)
        $(BTF_PAHOLE) -J $@
 endif
 
-$(OUTPUT)/%.o: %.c
+$(OUTPUT)/%.o: progs/%.c
        $(CLANG) $(CLANG_FLAGS) \
                 -O2 -target bpf -emit-llvm -c $< -o - |      \
        $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
index 6a0ce0f055c5bda7aba08807f84c44d0b28bc21c..d9999f1ed1d2a3399eb52f1bdae621b48689bcff 100644 (file)
@@ -176,6 +176,10 @@ static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
        (void *) BPF_FUNC_spin_lock;
 static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
        (void *) BPF_FUNC_spin_unlock;
+static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
+       (void *) BPF_FUNC_sk_fullsock;
+static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
+       (void *) BPF_FUNC_tcp_sock;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
index 84fd6f1bf33e7fd2b38d89c9c876bd35139eb94c..a29206ebbd1303cd60e9300d57b8ab12514959be 100644 (file)
@@ -58,4 +58,13 @@ static inline unsigned int bpf_num_possible_cpus(void)
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
 
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+#ifndef offsetofend
+#define offsetofend(TYPE, MEMBER) \
+       (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+#endif
+
 #endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
new file mode 100644 (file)
index 0000000..c957d6d
--- /dev/null
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct grehdr {
+       __be16 flags;
+       __be16 protocol;
+};
+
+SEC("encap_gre")
+int bpf_lwt_encap_gre(struct __sk_buff *skb)
+{
+       struct encap_hdr {
+               struct iphdr iph;
+               struct grehdr greh;
+       } hdr;
+       int err;
+
+       memset(&hdr, 0, sizeof(struct encap_hdr));
+
+       hdr.iph.ihl = 5;
+       hdr.iph.version = 4;
+       hdr.iph.ttl = 0x40;
+       hdr.iph.protocol = 47;  /* IPPROTO_GRE */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       hdr.iph.saddr = 0x640110ac;  /* 172.16.1.100 */
+       hdr.iph.daddr = 0x641010ac;  /* 172.16.16.100 */
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       hdr.iph.saddr = 0xac100164;  /* 172.16.1.100 */
+       hdr.iph.daddr = 0xac101064;  /* 172.16.16.100 */
+#else
+#error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+       hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr));
+
+       hdr.greh.protocol = skb->protocol;
+
+       err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
+                                sizeof(struct encap_hdr));
+       if (err)
+               return BPF_DROP;
+
+       return BPF_LWT_REROUTE;
+}
+
+SEC("encap_gre6")
+int bpf_lwt_encap_gre6(struct __sk_buff *skb)
+{
+       struct encap_hdr {
+               struct ipv6hdr ip6hdr;
+               struct grehdr greh;
+       } hdr;
+       int err;
+
+       memset(&hdr, 0, sizeof(struct encap_hdr));
+
+       hdr.ip6hdr.version = 6;
+       hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr));
+       hdr.ip6hdr.nexthdr = 47;  /* IPPROTO_GRE */
+       hdr.ip6hdr.hop_limit = 0x40;
+       /* fb01::1 */
+       hdr.ip6hdr.saddr.s6_addr[0] = 0xfb;
+       hdr.ip6hdr.saddr.s6_addr[1] = 1;
+       hdr.ip6hdr.saddr.s6_addr[15] = 1;
+       /* fb10::1 */
+       hdr.ip6hdr.daddr.s6_addr[0] = 0xfb;
+       hdr.ip6hdr.daddr.s6_addr[1] = 0x10;
+       hdr.ip6hdr.daddr.s6_addr[15] = 1;
+
+       hdr.greh.protocol = skb->protocol;
+
+       err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
+                                sizeof(struct encap_hdr));
+       if (err)
+               return BPF_DROP;
+
+       return BPF_LWT_REROUTE;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
new file mode 100644 (file)
index 0000000..de1a43e
--- /dev/null
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+enum bpf_array_idx {
+       SRV_IDX,
+       CLI_IDX,
+       __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") addr_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct sockaddr_in6),
+       .max_entries = __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") sock_result_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct bpf_sock),
+       .max_entries = __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") tcp_sock_result_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct bpf_tcp_sock),
+       .max_entries = __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u32),
+       .max_entries = 1,
+};
+
+static bool is_loopback6(__u32 *a6)
+{
+       return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
+}
+
+static void skcpy(struct bpf_sock *dst,
+                 const struct bpf_sock *src)
+{
+       dst->bound_dev_if = src->bound_dev_if;
+       dst->family = src->family;
+       dst->type = src->type;
+       dst->protocol = src->protocol;
+       dst->mark = src->mark;
+       dst->priority = src->priority;
+       dst->src_ip4 = src->src_ip4;
+       dst->src_ip6[0] = src->src_ip6[0];
+       dst->src_ip6[1] = src->src_ip6[1];
+       dst->src_ip6[2] = src->src_ip6[2];
+       dst->src_ip6[3] = src->src_ip6[3];
+       dst->src_port = src->src_port;
+       dst->dst_ip4 = src->dst_ip4;
+       dst->dst_ip6[0] = src->dst_ip6[0];
+       dst->dst_ip6[1] = src->dst_ip6[1];
+       dst->dst_ip6[2] = src->dst_ip6[2];
+       dst->dst_ip6[3] = src->dst_ip6[3];
+       dst->dst_port = src->dst_port;
+       dst->state = src->state;
+}
+
+static void tpcpy(struct bpf_tcp_sock *dst,
+                 const struct bpf_tcp_sock *src)
+{
+       dst->snd_cwnd = src->snd_cwnd;
+       dst->srtt_us = src->srtt_us;
+       dst->rtt_min = src->rtt_min;
+       dst->snd_ssthresh = src->snd_ssthresh;
+       dst->rcv_nxt = src->rcv_nxt;
+       dst->snd_nxt = src->snd_nxt;
+       dst->snd_una = src->snd_una;
+       dst->mss_cache = src->mss_cache;
+       dst->ecn_flags = src->ecn_flags;
+       dst->rate_delivered = src->rate_delivered;
+       dst->rate_interval_us = src->rate_interval_us;
+       dst->packets_out = src->packets_out;
+       dst->retrans_out = src->retrans_out;
+       dst->total_retrans = src->total_retrans;
+       dst->segs_in = src->segs_in;
+       dst->data_segs_in = src->data_segs_in;
+       dst->segs_out = src->segs_out;
+       dst->data_segs_out = src->data_segs_out;
+       dst->lost_out = src->lost_out;
+       dst->sacked_out = src->sacked_out;
+       dst->bytes_received = src->bytes_received;
+       dst->bytes_acked = src->bytes_acked;
+}
+
+#define RETURN {                                               \
+       linum = __LINE__;                                       \
+       bpf_map_update_elem(&linum_map, &idx0, &linum, 0);      \
+       return 1;                                               \
+}
+
+SEC("cgroup_skb/egress")
+int read_sock_fields(struct __sk_buff *skb)
+{
+       __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
+       struct sockaddr_in6 *srv_sa6, *cli_sa6;
+       struct bpf_tcp_sock *tp, *tp_ret;
+       struct bpf_sock *sk, *sk_ret;
+       __u32 linum, idx0 = 0;
+
+       sk = skb->sk;
+       if (!sk || sk->state == 10)
+               RETURN;
+
+       sk = bpf_sk_fullsock(sk);
+       if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
+           !is_loopback6(sk->src_ip6))
+               RETURN;
+
+       tp = bpf_tcp_sock(sk);
+       if (!tp)
+               RETURN;
+
+       srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
+       cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
+       if (!srv_sa6 || !cli_sa6)
+               RETURN;
+
+       if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
+               idx = srv_idx;
+       else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
+               idx = cli_idx;
+       else
+               RETURN;
+
+       sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
+       tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
+       if (!sk_ret || !tp_ret)
+               RETURN;
+
+       skcpy(sk_ret, sk);
+       tpcpy(tp_ret, tp);
+
+       RETURN;
+}
+
+char _license[] SEC("license") = "GPL";
index ee723774015a1f259f1a521fa44971305e6a4389..02d314383a9c8fae85b318807921a9a59f78fb7e 100644 (file)
@@ -5879,15 +5879,17 @@ static void dump_btf_strings(const char *strs, __u32 len)
 static int do_test_dedup(unsigned int test_num)
 {
        const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
-       int err = 0, i;
-       __u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len;
-       void *raw_btf;
-       unsigned int raw_btf_size;
+       __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
+       const struct btf_header *test_hdr, *expect_hdr;
        struct btf *test_btf = NULL, *expect_btf = NULL;
+       const void *test_btf_data, *expect_btf_data;
        const char *ret_test_next_str, *ret_expect_next_str;
        const char *test_strs, *expect_strs;
        const char *test_str_cur, *test_str_end;
        const char *expect_str_cur, *expect_str_end;
+       unsigned int raw_btf_size;
+       void *raw_btf;
+       int err = 0, i;
 
        fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
 
@@ -5924,23 +5926,34 @@ static int do_test_dedup(unsigned int test_num)
                goto done;
        }
 
-       btf__get_strings(test_btf, &test_strs, &test_str_len);
-       btf__get_strings(expect_btf, &expect_strs, &expect_str_len);
-       if (CHECK(test_str_len != expect_str_len,
-                 "test_str_len:%u != expect_str_len:%u",
-                 test_str_len, expect_str_len)) {
+       test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
+       expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
+       if (CHECK(test_btf_size != expect_btf_size,
+                 "test_btf_size:%u != expect_btf_size:%u",
+                 test_btf_size, expect_btf_size)) {
+               err = -1;
+               goto done;
+       }
+
+       test_hdr = test_btf_data;
+       test_strs = test_btf_data + test_hdr->str_off;
+       expect_hdr = expect_btf_data;
+       expect_strs = expect_btf_data + expect_hdr->str_off;
+       if (CHECK(test_hdr->str_len != expect_hdr->str_len,
+                 "test_hdr->str_len:%u != expect_hdr->str_len:%u",
+                 test_hdr->str_len, expect_hdr->str_len)) {
                fprintf(stderr, "\ntest strings:\n");
-               dump_btf_strings(test_strs, test_str_len);
+               dump_btf_strings(test_strs, test_hdr->str_len);
                fprintf(stderr, "\nexpected strings:\n");
-               dump_btf_strings(expect_strs, expect_str_len);
+               dump_btf_strings(expect_strs, expect_hdr->str_len);
                err = -1;
                goto done;
        }
 
        test_str_cur = test_strs;
-       test_str_end = test_strs + test_str_len;
+       test_str_end = test_strs + test_hdr->str_len;
        expect_str_cur = expect_strs;
-       expect_str_end = expect_strs + expect_str_len;
+       expect_str_end = expect_strs + expect_hdr->str_len;
        while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
                size_t test_len, expect_len;
 
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
new file mode 100755 (executable)
index 0000000..612632c
--- /dev/null
@@ -0,0 +1,376 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Setup/topology:
+#
+#    NS1             NS2             NS3
+#   veth1 <---> veth2   veth3 <---> veth4 (the top route)
+#   veth5 <---> veth6   veth7 <---> veth8 (the bottom route)
+#
+#   each vethN gets IPv[4|6]_N address
+#
+#   IPv*_SRC = IPv*_1
+#   IPv*_DST = IPv*_4
+#
+#   all tests test pings from IPv*_SRC to IPv*_DST
+#
+#   by default, routes are configured to allow packets to go
+#   IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
+#
+#   a GRE device is installed in NS3 with IPv*_GRE, and
+#   NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
+#   (the bottom route)
+#
+# Tests:
+#
+#   1. routes NS2->IPv*_DST are brought down, so the only way a ping
+#      from IP*_SRC to IP*_DST can work is via IPv*_GRE
+#
+#   2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
+#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
+#
+#       ping: SRC->[encap at veth1:egress]->GRE:decap->DST
+#       ping replies go DST->SRC directly
+#
+#   2b. in an ingress test, a bpf LWT_IN program is installed on veth2
+#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
+#
+#       ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
+#       ping replies go DST->SRC directly
+
+if [[ $EUID -ne 0 ]]; then
+       echo "This script must be run as root"
+       echo "FAIL"
+       exit 1
+fi
+
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
+
+readonly IPv4_1="172.16.1.100"
+readonly IPv4_2="172.16.2.100"
+readonly IPv4_3="172.16.3.100"
+readonly IPv4_4="172.16.4.100"
+readonly IPv4_5="172.16.5.100"
+readonly IPv4_6="172.16.6.100"
+readonly IPv4_7="172.16.7.100"
+readonly IPv4_8="172.16.8.100"
+readonly IPv4_GRE="172.16.16.100"
+
+readonly IPv4_SRC=$IPv4_1
+readonly IPv4_DST=$IPv4_4
+
+readonly IPv6_1="fb01::1"
+readonly IPv6_2="fb02::1"
+readonly IPv6_3="fb03::1"
+readonly IPv6_4="fb04::1"
+readonly IPv6_5="fb05::1"
+readonly IPv6_6="fb06::1"
+readonly IPv6_7="fb07::1"
+readonly IPv6_8="fb08::1"
+readonly IPv6_GRE="fb10::1"
+
+readonly IPv6_SRC=$IPv6_1
+readonly IPv6_DST=$IPv6_4
+
+TEST_STATUS=0
+TESTS_SUCCEEDED=0
+TESTS_FAILED=0
+
+process_test_results()
+{
+       if [[ "${TEST_STATUS}" -eq 0 ]] ; then
+               echo "PASS"
+               TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
+       else
+               echo "FAIL"
+               TESTS_FAILED=$((TESTS_FAILED+1))
+       fi
+}
+
+print_test_summary_and_exit()
+{
+       echo "passed tests: ${TESTS_SUCCEEDED}"
+       echo "failed tests: ${TESTS_FAILED}"
+       if [ "${TESTS_FAILED}" -eq "0" ] ; then
+               exit 0
+       else
+               exit 1
+       fi
+}
+
+setup()
+{
+       set -e  # exit on error
+       TEST_STATUS=0
+
+       # create devices and namespaces
+       ip netns add "${NS1}"
+       ip netns add "${NS2}"
+       ip netns add "${NS3}"
+
+       ip link add veth1 type veth peer name veth2
+       ip link add veth3 type veth peer name veth4
+       ip link add veth5 type veth peer name veth6
+       ip link add veth7 type veth peer name veth8
+
+       ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
+       ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
+
+       ip link set veth1 netns ${NS1}
+       ip link set veth2 netns ${NS2}
+       ip link set veth3 netns ${NS2}
+       ip link set veth4 netns ${NS3}
+       ip link set veth5 netns ${NS1}
+       ip link set veth6 netns ${NS2}
+       ip link set veth7 netns ${NS2}
+       ip link set veth8 netns ${NS3}
+
+       # configure addesses: the top route (1-2-3-4)
+       ip -netns ${NS1}    addr add ${IPv4_1}/24  dev veth1
+       ip -netns ${NS2}    addr add ${IPv4_2}/24  dev veth2
+       ip -netns ${NS2}    addr add ${IPv4_3}/24  dev veth3
+       ip -netns ${NS3}    addr add ${IPv4_4}/24  dev veth4
+       ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
+       ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
+       ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
+       ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
+
+       # configure addresses: the bottom route (5-6-7-8)
+       ip -netns ${NS1}    addr add ${IPv4_5}/24  dev veth5
+       ip -netns ${NS2}    addr add ${IPv4_6}/24  dev veth6
+       ip -netns ${NS2}    addr add ${IPv4_7}/24  dev veth7
+       ip -netns ${NS3}    addr add ${IPv4_8}/24  dev veth8
+       ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
+       ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
+       ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
+       ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
+
+
+       ip -netns ${NS1} link set dev veth1 up
+       ip -netns ${NS2} link set dev veth2 up
+       ip -netns ${NS2} link set dev veth3 up
+       ip -netns ${NS3} link set dev veth4 up
+       ip -netns ${NS1} link set dev veth5 up
+       ip -netns ${NS2} link set dev veth6 up
+       ip -netns ${NS2} link set dev veth7 up
+       ip -netns ${NS3} link set dev veth8 up
+
+       # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
+       # the bottom route to specific bottom addresses
+
+       # NS1
+       # top route
+       ip -netns ${NS1}    route add ${IPv4_2}/32  dev veth1
+       ip -netns ${NS1}    route add default dev veth1 via ${IPv4_2}  # go top by default
+       ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1
+       ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2}  # go top by default
+       # bottom route
+       ip -netns ${NS1}    route add ${IPv4_6}/32  dev veth5
+       ip -netns ${NS1}    route add ${IPv4_7}/32  dev veth5 via ${IPv4_6}
+       ip -netns ${NS1}    route add ${IPv4_8}/32  dev veth5 via ${IPv4_6}
+       ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5
+       ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6}
+       ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6}
+
+       # NS2
+       # top route
+       ip -netns ${NS2}    route add ${IPv4_1}/32  dev veth2
+       ip -netns ${NS2}    route add ${IPv4_4}/32  dev veth3
+       ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2
+       ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3
+       # bottom route
+       ip -netns ${NS2}    route add ${IPv4_5}/32  dev veth6
+       ip -netns ${NS2}    route add ${IPv4_8}/32  dev veth7
+       ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6
+       ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7
+
+       # NS3
+       # top route
+       ip -netns ${NS3}    route add ${IPv4_3}/32  dev veth4
+       ip -netns ${NS3}    route add ${IPv4_1}/32  dev veth4 via ${IPv4_3}
+       ip -netns ${NS3}    route add ${IPv4_2}/32  dev veth4 via ${IPv4_3}
+       ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
+       ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
+       ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
+       # bottom route
+       ip -netns ${NS3}    route add ${IPv4_7}/32  dev veth8
+       ip -netns ${NS3}    route add ${IPv4_5}/32  dev veth8 via ${IPv4_7}
+       ip -netns ${NS3}    route add ${IPv4_6}/32  dev veth8 via ${IPv4_7}
+       ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
+       ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
+       ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
+
+       # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
+       ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
+       ip -netns ${NS3} link set gre_dev up
+       ip -netns ${NS3} addr add ${IPv4_GRE} nodad dev gre_dev
+       ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
+       ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
+
+
+       # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
+       ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
+       ip -netns ${NS3} link set gre6_dev up
+       ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
+       ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6}
+       ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8}
+
+       # rp_filter gets confused by what these tests are doing, so disable it
+       ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+       ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+       ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+
+       sleep 1  # reduce flakiness
+       set +e
+}
+
+cleanup()
+{
+       ip netns del ${NS1} 2> /dev/null
+       ip netns del ${NS2} 2> /dev/null
+       ip netns del ${NS3} 2> /dev/null
+}
+
+trap cleanup EXIT
+
+remove_routes_to_gredev()
+{
+       ip -netns ${NS1} route del ${IPv4_GRE} dev veth5
+       ip -netns ${NS2} route del ${IPv4_GRE} dev veth7
+       ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5
+       ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7
+}
+
+add_unreachable_routes_to_gredev()
+{
+       ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32
+       ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32
+       ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128
+       ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128
+}
+
+test_ping()
+{
+       local readonly PROTO=$1
+       local readonly EXPECTED=$2
+       local RET=0
+
+       if [ "${PROTO}" == "IPv4" ] ; then
+               ip netns exec ${NS1} ping  -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null
+               RET=$?
+       elif [ "${PROTO}" == "IPv6" ] ; then
+               ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null
+               RET=$?
+       else
+               echo "    test_ping: unknown PROTO: ${PROTO}"
+               TEST_STATUS=1
+       fi
+
+       if [ "0" != "${RET}" ]; then
+               RET=1
+       fi
+
+       if [ "${EXPECTED}" != "${RET}" ] ; then
+               echo "    test_ping failed: expected: ${EXPECTED}; got ${RET}"
+               TEST_STATUS=1
+       fi
+}
+
+test_egress()
+{
+       local readonly ENCAP=$1
+       echo "starting egress ${ENCAP} encap test"
+       setup
+
+       # by default, pings work
+       test_ping IPv4 0
+       test_ping IPv6 0
+
+       # remove NS2->DST routes, ping fails
+       ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3
+       ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
+       test_ping IPv4 1
+       test_ping IPv6 1
+
+       # install replacement routes (LWT/eBPF), pings succeed
+       if [ "${ENCAP}" == "IPv4" ] ; then
+               ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
+               ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
+       elif [ "${ENCAP}" == "IPv6" ] ; then
+               ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
+               ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
+       else
+               echo "    unknown encap ${ENCAP}"
+               TEST_STATUS=1
+       fi
+       test_ping IPv4 0
+       test_ping IPv6 0
+
+       # a negative test: remove routes to GRE devices: ping fails
+       remove_routes_to_gredev
+       test_ping IPv4 1
+       test_ping IPv6 1
+
+       # another negative test
+       add_unreachable_routes_to_gredev
+       test_ping IPv4 1
+       test_ping IPv6 1
+
+       cleanup
+       process_test_results
+}
+
+test_ingress()
+{
+       local readonly ENCAP=$1
+       echo "starting ingress ${ENCAP} encap test"
+       setup
+
+       # need to wait a bit for IPv6 to autoconf, otherwise
+       # ping6 sometimes fails with "unable to bind to address"
+
+       # by default, pings work
+       test_ping IPv4 0
+       test_ping IPv6 0
+
+       # remove NS2->DST routes, pings fail
+       ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3
+       ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
+       test_ping IPv4 1
+       test_ping IPv6 1
+
+       # install replacement routes (LWT/eBPF), pings succeed
+       if [ "${ENCAP}" == "IPv4" ] ; then
+               ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
+               ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
+       elif [ "${ENCAP}" == "IPv6" ] ; then
+               ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
+               ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
+       else
+               echo "FAIL: unknown encap ${ENCAP}"
+       fi
+       test_ping IPv4 0
+       test_ping IPv6 0
+
+       # a negative test: remove routes to GRE devices: ping fails
+       remove_routes_to_gredev
+       test_ping IPv4 1
+       test_ping IPv6 1
+
+       # another negative test
+       add_unreachable_routes_to_gredev
+       test_ping IPv4 1
+       test_ping IPv6 1
+
+       cleanup
+       process_test_results
+}
+
+test_egress IPv4
+test_egress IPv6
+test_ingress IPv4
+test_ingress IPv6
+
+print_test_summary_and_exit
index 561ffb6d643349f794ff8dc9b00ee570c3dd369b..fb679ac3d4b07a4a68822369f9476ab0d06f7fd2 100644 (file)
@@ -20,6 +20,7 @@
 #define MAX_INSNS      512
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static bool verbose = false;
 
 struct sock_test {
        const char *descr;
@@ -325,6 +326,7 @@ static int load_sock_prog(const struct bpf_insn *prog,
                          enum bpf_attach_type attach_type)
 {
        struct bpf_load_program_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(struct bpf_load_program_attr));
        attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
@@ -332,8 +334,13 @@ static int load_sock_prog(const struct bpf_insn *prog,
        attr.insns = prog;
        attr.insns_cnt = probe_prog_length(attr.insns);
        attr.license = "GPL";
+       attr.log_level = 2;
 
-       return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+       ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+       if (verbose && ret < 0)
+               fprintf(stderr, "%s\n", bpf_log_buf);
+
+       return ret;
 }
 
 static int attach_sock_prog(int cgfd, int progfd,
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
new file mode 100644 (file)
index 0000000..9bb5836
--- /dev/null
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <sys/socket.h>
+#include <sys/epoll.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+
+enum bpf_array_idx {
+       SRV_IDX,
+       CLI_IDX,
+       __NR_BPF_ARRAY_IDX,
+};
+
+#define CHECK(condition, tag, format...) ({                            \
+       int __ret = !!(condition);                                      \
+       if (__ret) {                                                    \
+               printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);     \
+               printf(format);                                         \
+               printf("\n");                                           \
+               exit(-1);                                               \
+       }                                                               \
+})
+
+#define TEST_CGROUP "/test-bpf-sock-fields"
+#define DATA "Hello BPF!"
+#define DATA_LEN sizeof(DATA)
+
+static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int linum_map_fd;
+static int addr_map_fd;
+static int tp_map_fd;
+static int sk_map_fd;
+static __u32 srv_idx = SRV_IDX;
+static __u32 cli_idx = CLI_IDX;
+
+static void init_loopback6(struct sockaddr_in6 *sa6)
+{
+       memset(sa6, 0, sizeof(*sa6));
+       sa6->sin6_family = AF_INET6;
+       sa6->sin6_addr = in6addr_loopback;
+}
+
+static void print_sk(const struct bpf_sock *sk)
+{
+       char src_ip4[24], dst_ip4[24];
+       char src_ip6[64], dst_ip6[64];
+
+       inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
+       inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
+       inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
+       inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
+
+       printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
+              "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
+              "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
+              sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
+              sk->mark, sk->priority,
+              sk->src_ip4, src_ip4,
+              sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
+              src_ip6, sk->src_port,
+              sk->dst_ip4, dst_ip4,
+              sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
+              dst_ip6, ntohs(sk->dst_port));
+}
+
+static void print_tp(const struct bpf_tcp_sock *tp)
+{
+       printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
+              "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
+              "rate_delivered:%u rate_interval_us:%u packets_out:%u "
+              "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
+              "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
+              "bytes_received:%llu bytes_acked:%llu\n",
+              tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
+              tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
+              tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
+              tp->packets_out, tp->retrans_out, tp->total_retrans,
+              tp->segs_in, tp->data_segs_in, tp->segs_out,
+              tp->data_segs_out, tp->lost_out, tp->sacked_out,
+              tp->bytes_received, tp->bytes_acked);
+}
+
+static void check_result(void)
+{
+       struct bpf_tcp_sock srv_tp, cli_tp;
+       struct bpf_sock srv_sk, cli_sk;
+       __u32 linum, idx0 = 0;
+       int err;
+
+       err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum);
+       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+             "err:%d errno:%d", err, errno);
+
+       err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk);
+       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)",
+             "err:%d errno:%d", err, errno);
+       err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp);
+       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)",
+             "err:%d errno:%d", err, errno);
+
+       err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk);
+       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)",
+             "err:%d errno:%d", err, errno);
+       err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp);
+       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)",
+             "err:%d errno:%d", err, errno);
+
+       printf("srv_sk: ");
+       print_sk(&srv_sk);
+       printf("\n");
+
+       printf("cli_sk: ");
+       print_sk(&cli_sk);
+       printf("\n");
+
+       printf("srv_tp: ");
+       print_tp(&srv_tp);
+       printf("\n");
+
+       printf("cli_tp: ");
+       print_tp(&cli_tp);
+       printf("\n");
+
+       CHECK(srv_sk.state == 10 ||
+             !srv_sk.state ||
+             srv_sk.family != AF_INET6 ||
+             srv_sk.protocol != IPPROTO_TCP ||
+             memcmp(srv_sk.src_ip6, &in6addr_loopback,
+                    sizeof(srv_sk.src_ip6)) ||
+             memcmp(srv_sk.dst_ip6, &in6addr_loopback,
+                    sizeof(srv_sk.dst_ip6)) ||
+             srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+             srv_sk.dst_port != cli_sa6.sin6_port,
+             "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum);
+
+       CHECK(cli_sk.state == 10 ||
+             !cli_sk.state ||
+             cli_sk.family != AF_INET6 ||
+             cli_sk.protocol != IPPROTO_TCP ||
+             memcmp(cli_sk.src_ip6, &in6addr_loopback,
+                    sizeof(cli_sk.src_ip6)) ||
+             memcmp(cli_sk.dst_ip6, &in6addr_loopback,
+                    sizeof(cli_sk.dst_ip6)) ||
+             cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
+             cli_sk.dst_port != srv_sa6.sin6_port,
+             "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum);
+
+       CHECK(srv_tp.data_segs_out != 1 ||
+             srv_tp.data_segs_in ||
+             srv_tp.snd_cwnd != 10 ||
+             srv_tp.total_retrans ||
+             srv_tp.bytes_acked != DATA_LEN,
+             "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum);
+
+       CHECK(cli_tp.data_segs_out ||
+             cli_tp.data_segs_in != 1 ||
+             cli_tp.snd_cwnd != 10 ||
+             cli_tp.total_retrans ||
+             cli_tp.bytes_received != DATA_LEN,
+             "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum);
+}
+
+static void test(void)
+{
+       int listen_fd, cli_fd, accept_fd, epfd, err;
+       struct epoll_event ev;
+       socklen_t addrlen;
+
+       addrlen = sizeof(struct sockaddr_in6);
+       ev.events = EPOLLIN;
+
+       epfd = epoll_create(1);
+       CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
+
+       /* Prepare listen_fd */
+       listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+       CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
+             listen_fd, errno);
+
+       init_loopback6(&srv_sa6);
+       err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
+       CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
+
+       err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+       CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
+
+       err = listen(listen_fd, 1);
+       CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
+
+       /* Prepare cli_fd */
+       cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+       CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
+
+       init_loopback6(&cli_sa6);
+       err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
+       CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
+
+       err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
+       CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
+             err, errno);
+
+       /* Update addr_map with srv_sa6 and cli_sa6 */
+       err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0);
+       CHECK(err, "map_update", "err:%d errno:%d", err, errno);
+
+       err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0);
+       CHECK(err, "map_update", "err:%d errno:%d", err, errno);
+
+       /* Connect from cli_sa6 to srv_sa6 */
+       err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
+       printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
+              ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
+       CHECK(err && errno != EINPROGRESS,
+             "connect(cli_fd)", "err:%d errno:%d", err, errno);
+
+       ev.data.fd = listen_fd;
+       err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
+       CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
+             err, errno);
+
+       /* Accept the connection */
+       /* Have some timeout in accept(listen_fd). Just in case. */
+       err = epoll_wait(epfd, &ev, 1, 1000);
+       CHECK(err != 1 || ev.data.fd != listen_fd,
+             "epoll_wait(listen_fd)",
+             "err:%d errno:%d ev.data.fd:%d listen_fd:%d",
+             err, errno, ev.data.fd, listen_fd);
+
+       accept_fd = accept(listen_fd, NULL, NULL);
+       CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
+             accept_fd, errno);
+       close(listen_fd);
+
+       /* Send some data from accept_fd to cli_fd */
+       err = send(accept_fd, DATA, DATA_LEN, 0);
+       CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
+             err, errno);
+
+       /* Have some timeout in recv(cli_fd). Just in case. */
+       ev.data.fd = cli_fd;
+       err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
+       CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
+             err, errno);
+
+       err = epoll_wait(epfd, &ev, 1, 1000);
+       CHECK(err != 1 || ev.data.fd != cli_fd,
+             "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
+             err, errno, ev.data.fd, cli_fd);
+
+       err = recv(cli_fd, NULL, 0, MSG_TRUNC);
+       CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
+
+       close(epfd);
+       close(accept_fd);
+       close(cli_fd);
+
+       check_result();
+}
+
+int main(int argc, char **argv)
+{
+       struct bpf_prog_load_attr attr = {
+               .file = "test_sock_fields_kern.o",
+               .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+               .expected_attach_type = BPF_CGROUP_INET_EGRESS,
+       };
+       int cgroup_fd, prog_fd, err;
+       struct bpf_object *obj;
+       struct bpf_map *map;
+
+       err = setup_cgroup_environment();
+       CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
+             err, errno);
+
+       atexit(cleanup_cgroup_environment);
+
+       /* Create a cgroup, get fd, and join it */
+       cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+       CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
+             "cgroup_fd:%d errno:%d", cgroup_fd, errno);
+
+       err = join_cgroup(TEST_CGROUP);
+       CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
+
+       err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+       CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
+
+       err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
+       CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
+             "err:%d errno%d", err, errno);
+       close(cgroup_fd);
+
+       map = bpf_object__find_map_by_name(obj, "addr_map");
+       CHECK(!map, "cannot find addr_map", "(null)");
+       addr_map_fd = bpf_map__fd(map);
+
+       map = bpf_object__find_map_by_name(obj, "sock_result_map");
+       CHECK(!map, "cannot find sock_result_map", "(null)");
+       sk_map_fd = bpf_map__fd(map);
+
+       map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
+       CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
+       tp_map_fd = bpf_map__fd(map);
+
+       map = bpf_object__find_map_by_name(obj, "linum_map");
+       CHECK(!map, "cannot find linum_map", "(null)");
+       linum_map_fd = bpf_map__fd(map);
+
+       test();
+
+       bpf_object__close(obj);
+       cleanup_cgroup_environment();
+
+       printf("PASS\n");
+
+       return 0;
+}
index dc2cc823df2b925526cfe0758fc5040a33042431..3ed3593bd8b61f4301b03fc9f06b97af4e8be17f 100644 (file)
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-       .errstr = "cannot write into socket",
+       .errstr = "cannot write into sock",
        .result = REJECT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-       .errstr = "invalid bpf_sock access off=0 size=8",
+       .errstr = "invalid sock access off=0 size=8",
        .result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
new file mode 100644 (file)
index 0000000..0ddfdf7
--- /dev/null
@@ -0,0 +1,384 @@
+{
+       "skb->sk: no NULL check",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid mem access 'sock_common_or_null'",
+},
+{
+       "skb->sk: sk->family [non fullsock field]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "skb->sk: sk->type [fullsock field]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid sock_common access",
+},
+{
+       "bpf_sk_fullsock(skb->sk): no !skb->sk check",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "type=sock_common_or_null expected=sock_common",
+},
+{
+       "sk_fullsock(skb->sk): no NULL check on ret",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid mem access 'sock_or_null'",
+},
+{
+       "sk_fullsock(skb->sk): sk->type [fullsock field]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): sk->family [non fullsock field]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): sk->state [narrow load]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): sk->dst_port [narrow load]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid sock access",
+},
+{
+       "sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): sk->type [narrow load]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): sk->protocol [narrow load]",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "sk_fullsock(skb->sk): beyond last field",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid sock access",
+},
+{
+       "bpf_tcp_sock(skb->sk): no !skb->sk check",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "type=sock_common_or_null expected=sock_common",
+},
+{
+       "bpf_tcp_sock(skb->sk): no NULL check on ret",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid mem access 'tcp_sock_or_null'",
+},
+{
+       "bpf_tcp_sock(skb->sk): tp->snd_cwnd",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "bpf_tcp_sock(skb->sk): tp->bytes_acked",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "bpf_tcp_sock(skb->sk): beyond last field",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = REJECT,
+       .errstr = "invalid tcp_sock access",
+},
+{
+       "bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       .result = ACCEPT,
+},
+{
+       "bpf_sk_release(skb->sk)",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "type=sock_common expected=sock",
+},
+{
+       "bpf_sk_release(bpf_sk_fullsock(skb->sk))",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "reference has not been acquired before",
+},
+{
+       "bpf_sk_release(bpf_tcp_sock(skb->sk))",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "type=tcp_sock expected=sock",
+},
index 3e046695fad7b96e75db459813065843e55cc2d6..dbaf5be947b2be8ea3007cae023ef94be600a553 100644 (file)
        },
        .result = REJECT,
        //.errstr = "same insn cannot be used with different pointers",
-       .errstr = "cannot write into socket",
+       .errstr = "cannot write into sock",
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
 {