static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
{
- return dev_forward_skb(dev, skb);
+ return dev_forward_skb_nomtu(dev, skb);
}
static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
struct sk_buff *skb)
{
- int ret = ____dev_forward_skb(dev, skb);
+ int ret = ____dev_forward_skb(dev, skb, false);
if (likely(!ret)) {
skb->dev = dev;
goto out_drop;
dev = ops->ndo_get_peer_dev(dev);
if (unlikely(!dev ||
- !is_skb_forwardable(dev, skb) ||
+ !(dev->flags & IFF_UP) ||
net_eq(net, dev_net(dev))))
goto out_drop;
skb->dev = dev;
static u64 __bpf_get_netns_cookie(struct sock *sk)
{
-#ifdef CONFIG_NET_NS
- return __net_gen_cookie(sk ? sk->sk_net.net : &init_net);
-#else
- return 0;
-#endif
+ const struct net *net = sk ? sock_net(sk) : &init_net;
+
+ return net->net_cookie;
}
BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
.arg4_type = ARG_ANYTHING,
};
+static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
+ u32 ifindex)
+{
+ struct net *netns = dev_net(dev_curr);
+
+ /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
+ if (ifindex == 0)
+ return dev_curr;
+
+ return dev_get_by_index_rcu(netns, ifindex);
+}
+
+BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
+ u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
+{
+ int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+ struct net_device *dev = skb->dev;
+ int skb_len, dev_len;
+ int mtu;
+
+ if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
+ return -EINVAL;
+
+ if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
+ return -EINVAL;
+
+ dev = __dev_via_ifindex(dev, ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ mtu = READ_ONCE(dev->mtu);
+
+ dev_len = mtu + dev->hard_header_len;
+ skb_len = skb->len + len_diff; /* minus result pass check */
+ if (skb_len <= dev_len) {
+ ret = BPF_MTU_CHK_RET_SUCCESS;
+ goto out;
+ }
+ /* At this point, skb->len exceed MTU, but as it include length of all
+ * segments, it can still be below MTU. The SKB can possibly get
+ * re-segmented in transmit path (see validate_xmit_skb). Thus, user
+ * must choose if segs are to be MTU checked.
+ */
+ if (skb_is_gso(skb)) {
+ ret = BPF_MTU_CHK_RET_SUCCESS;
+
+ if (flags & BPF_MTU_CHK_SEGS &&
+ !skb_gso_validate_network_len(skb, mtu))
+ ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
+ }
+out:
+ /* BPF verifier guarantees valid pointer */
+ *mtu_len = mtu;
+
+ return ret;
+}
+
+BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
+ u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
+{
+ struct net_device *dev = xdp->rxq->dev;
+ int xdp_len = xdp->data_end - xdp->data;
+ int ret = BPF_MTU_CHK_RET_SUCCESS;
+ int mtu, dev_len;
+
+ /* XDP variant doesn't support multi-buffer segment check (yet) */
+ if (unlikely(flags))
+ return -EINVAL;
+
+ dev = __dev_via_ifindex(dev, ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ mtu = READ_ONCE(dev->mtu);
+
+ /* Add L2-header as dev MTU is L3 size */
+ dev_len = mtu + dev->hard_header_len;
+
+ xdp_len += len_diff; /* minus result pass check */
+ if (xdp_len > dev_len)
+ ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+
+ /* BPF verifier guarantees valid pointer */
+ *mtu_len = mtu;
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
+ .func = bpf_skb_check_mtu,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_INT,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
+ .func = bpf_xdp_check_mtu,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_INT,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
{
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
+ case BPF_FUNC_check_mtu:
+ return &bpf_skb_check_mtu_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_xdp_adjust_tail_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
+ case BPF_FUNC_check_mtu:
+ return &bpf_xdp_check_mtu_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_udp:
return &bpf_xdp_sk_lookup_udp_proto;
target_size));
break;
case offsetof(struct bpf_sock, rx_queue_mapping):
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
si->dst_reg, si->src_reg,