Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

[sfrench/cifs-2.6.git] / net / core / filter.c
diff --git a/net/core/filter.c b/net/core/filter.c

index e7a9b1667dd64a5846a82c395189f1a4968cf2cb..adfdad234674dc1031d24b8ae635174fcfbd0dce 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2083,13 +2083,13 @@ static const struct bpf_func_proto bpf_csum_level_proto = {
  
  static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
  {
-       return dev_forward_skb(dev, skb);
+       return dev_forward_skb_nomtu(dev, skb);
  }
  
  static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
                                       struct sk_buff *skb)
  {
-       int ret = ____dev_forward_skb(dev, skb);
+       int ret = ____dev_forward_skb(dev, skb, false);
  
         if (likely(!ret)) {
                 skb->dev = dev;
@@ -2480,7 +2480,7 @@ int skb_do_redirect(struct sk_buff *skb)
                         goto out_drop;
                 dev = ops->ndo_get_peer_dev(dev);
                 if (unlikely(!dev ||
-                            !is_skb_forwardable(dev, skb) ||
+                            !(dev->flags & IFF_UP) ||
                              net_eq(net, dev_net(dev))))
                         goto out_drop;
                 skb->dev = dev;
@@ -4653,11 +4653,9 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
  
  static u64 __bpf_get_netns_cookie(struct sock *sk)
  {
-#ifdef CONFIG_NET_NS
-       return __net_gen_cookie(sk ? sk->sk_net.net : &init_net);
-#else
-       return 0;
-#endif
+       const struct net *net = sk ? sock_net(sk) : &init_net;
+
+       return net->net_cookie;
  }
  
  BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
@@ -5637,6 +5635,116 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
         .arg4_type      = ARG_ANYTHING,
  };
  
+static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
+                                           u32 ifindex)
+{
+       struct net *netns = dev_net(dev_curr);
+
+       /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
+       if (ifindex == 0)
+               return dev_curr;
+
+       return dev_get_by_index_rcu(netns, ifindex);
+}
+
+BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
+          u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
+{
+       int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+       struct net_device *dev = skb->dev;
+       int skb_len, dev_len;
+       int mtu;
+
+       if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
+               return -EINVAL;
+
+       if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
+               return -EINVAL;
+
+       dev = __dev_via_ifindex(dev, ifindex);
+       if (unlikely(!dev))
+               return -ENODEV;
+
+       mtu = READ_ONCE(dev->mtu);
+
+       dev_len = mtu + dev->hard_header_len;
+       skb_len = skb->len + len_diff; /* minus result pass check */
+       if (skb_len <= dev_len) {
+               ret = BPF_MTU_CHK_RET_SUCCESS;
+               goto out;
+       }
+       /* At this point, skb->len exceed MTU, but as it include length of all
+        * segments, it can still be below MTU.  The SKB can possibly get
+        * re-segmented in transmit path (see validate_xmit_skb).  Thus, user
+        * must choose if segs are to be MTU checked.
+        */
+       if (skb_is_gso(skb)) {
+               ret = BPF_MTU_CHK_RET_SUCCESS;
+
+               if (flags & BPF_MTU_CHK_SEGS &&
+                   !skb_gso_validate_network_len(skb, mtu))
+                       ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
+       }
+out:
+       /* BPF verifier guarantees valid pointer */
+       *mtu_len = mtu;
+
+       return ret;
+}
+
+BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
+          u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
+{
+       struct net_device *dev = xdp->rxq->dev;
+       int xdp_len = xdp->data_end - xdp->data;
+       int ret = BPF_MTU_CHK_RET_SUCCESS;
+       int mtu, dev_len;
+
+       /* XDP variant doesn't support multi-buffer segment check (yet) */
+       if (unlikely(flags))
+               return -EINVAL;
+
+       dev = __dev_via_ifindex(dev, ifindex);
+       if (unlikely(!dev))
+               return -ENODEV;
+
+       mtu = READ_ONCE(dev->mtu);
+
+       /* Add L2-header as dev MTU is L3 size */
+       dev_len = mtu + dev->hard_header_len;
+
+       xdp_len += len_diff; /* minus result pass check */
+       if (xdp_len > dev_len)
+               ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+
+       /* BPF verifier guarantees valid pointer */
+       *mtu_len = mtu;
+
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
+       .func           = bpf_skb_check_mtu,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_INT,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
+       .func           = bpf_xdp_check_mtu,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_INT,
+       .arg4_type      = ARG_ANYTHING,
+       .arg5_type      = ARG_ANYTHING,
+};
+
  #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
  static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
  {
@@ -7222,6 +7330,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_get_socket_uid_proto;
         case BPF_FUNC_fib_lookup:
                 return &bpf_skb_fib_lookup_proto;
+       case BPF_FUNC_check_mtu:
+               return &bpf_skb_check_mtu_proto;
         case BPF_FUNC_sk_fullsock:
                 return &bpf_sk_fullsock_proto;
         case BPF_FUNC_sk_storage_get:
@@ -7291,6 +7401,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_xdp_adjust_tail_proto;
         case BPF_FUNC_fib_lookup:
                 return &bpf_xdp_fib_lookup_proto;
+       case BPF_FUNC_check_mtu:
+               return &bpf_xdp_check_mtu_proto;
  #ifdef CONFIG_INET
         case BPF_FUNC_sk_lookup_udp:
                 return &bpf_xdp_sk_lookup_udp_proto;
@@ -8855,7 +8967,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
                                        target_size));
                 break;
         case offsetof(struct bpf_sock, rx_queue_mapping):
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
                 *insn++ = BPF_LDX_MEM(
                         BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
                         si->dst_reg, si->src_reg,