bpf: tcp: Allow bpf prog to write and parse TCP header option
[sfrench/cifs-2.6.git] / net / ipv4 / tcp_input.c
index 9072d9160df9ed2cae258ab05a02d157a64b20ca..319cc7fd5117c44d507dad352cb3ac30519a76c0 100644 (file)
@@ -138,6 +138,69 @@ void clean_acked_data_flush(void)
 EXPORT_SYMBOL_GPL(clean_acked_data_flush);
 #endif
 
+#ifdef CONFIG_CGROUP_BPF
+static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
+{
+       bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
+               BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+                                      BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
+       bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+                                                   BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+       struct bpf_sock_ops_kern sock_ops;
+
+       if (likely(!unknown_opt && !parse_all_opt))
+               return;
+
+       /* The skb will be handled in the
+        * bpf_skops_established() or
+        * bpf_skops_write_hdr_opt().
+        */
+       switch (sk->sk_state) {
+       case TCP_SYN_RECV:
+       case TCP_SYN_SENT:
+       case TCP_LISTEN:
+               return;
+       }
+
+       sock_owned_by_me(sk);
+
+       memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+       sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
+       sock_ops.is_fullsock = 1;
+       sock_ops.sk = sk;
+       bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
+
+       BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+}
+
+static void bpf_skops_established(struct sock *sk, int bpf_op,
+                                 struct sk_buff *skb)
+{
+       struct bpf_sock_ops_kern sock_ops;
+
+       sock_owned_by_me(sk);
+
+       memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+       sock_ops.op = bpf_op;
+       sock_ops.is_fullsock = 1;
+       sock_ops.sk = sk;
+       /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
+       if (skb)
+               bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
+
+       BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+}
+#else
+static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
+{
+}
+
+static void bpf_skops_established(struct sock *sk, int bpf_op,
+                                 struct sk_buff *skb)
+{
+}
+#endif
+
 static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
                             unsigned int len)
 {
@@ -5600,6 +5663,8 @@ syn_challenge:
                goto discard;
        }
 
+       bpf_skops_parse_hdr(sk, skb);
+
        return true;
 
 discard:
@@ -5808,7 +5873,7 @@ discard:
 }
 EXPORT_SYMBOL(tcp_rcv_established);
 
-void tcp_init_transfer(struct sock *sk, int bpf_op)
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
@@ -5829,7 +5894,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
                tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
        tp->snd_cwnd_stamp = tcp_jiffies32;
 
-       tcp_call_bpf(sk, bpf_op, 0, NULL);
+       bpf_skops_established(sk, bpf_op, skb);
        tcp_init_congestion_control(sk);
        tcp_init_buffer_space(sk);
 }
@@ -5848,7 +5913,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
                sk_mark_napi_id(sk, skb);
        }
 
-       tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
+       tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
 
        /* Prevent spurious tcp_cwnd_restart() on first data
         * packet.
@@ -6320,7 +6385,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                } else {
                        tcp_try_undo_spurious_syn(sk);
                        tp->retrans_stamp = 0;
-                       tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+                       tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
+                                         skb);
                        WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
                }
                smp_mb();
@@ -6764,7 +6830,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        }
        if (fastopen_sk) {
                af_ops->send_synack(fastopen_sk, dst, &fl, req,
-                                   &foc, TCP_SYNACK_FASTOPEN);
+                                   &foc, TCP_SYNACK_FASTOPEN, skb);
                /* Add the child socket directly into the accept queue */
                if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
                        reqsk_fastopen_remove(fastopen_sk, req, false);
@@ -6782,7 +6848,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                                tcp_timeout_init((struct sock *)req));
                af_ops->send_synack(sk, dst, &fl, req, &foc,
                                    !want_cookie ? TCP_SYNACK_NORMAL :
-                                                  TCP_SYNACK_COOKIE);
+                                                  TCP_SYNACK_COOKIE,
+                                   skb);
                if (want_cookie) {
                        reqsk_free(req);
                        return 0;