Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorJakub Kicinski <kuba@kernel.org>
Sat, 31 Jul 2021 18:23:25 +0000 (11:23 -0700)
committerJakub Kicinski <kuba@kernel.org>
Sat, 31 Jul 2021 18:23:26 +0000 (11:23 -0700)
Andrii Nakryiko says:

====================
bpf-next 2021-07-30

We've added 64 non-merge commits during the last 15 day(s) which contain
a total of 83 files changed, 5027 insertions(+), 1808 deletions(-).

The main changes are:

1) BTF-guided binary data dumping libbpf API, from Alan.

2) Internal factoring out of libbpf CO-RE relocation logic, from Alexei.

3) Ambient BPF run context and cgroup storage cleanup, from Andrii.

4) Few small API additions for libbpf 1.0 effort, from Evgeniy and Hengqi.

5) bpf_program__attach_kprobe_opts() fixes in libbpf, from Jiri.

6) bpf_{get,set}sockopt() support in BPF iterators, from Martin.

7) BPF map pinning improvements in libbpf, from Martynas.

8) Improved module BTF support in libbpf and bpftool, from Quentin.

9) Bpftool cleanups and documentation improvements, from Quentin.

10) Libbpf improvements for supporting CO-RE on old kernels, from Shuyi.

11) Increased maximum cgroup storage size, from Stanislav.

12) Small fixes and improvements to BPF tests and samples, from various folks.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (64 commits)
  tools: bpftool: Complete metrics list in "bpftool prog profile" doc
  tools: bpftool: Document and add bash completion for -L, -B options
  selftests/bpf: Update bpftool's consistency script for checking options
  tools: bpftool: Update and synchronise option list in doc and help msg
  tools: bpftool: Complete and synchronise attach or map types
  selftests/bpf: Check consistency between bpftool source, doc, completion
  tools: bpftool: Slightly ease bash completion updates
  unix_bpf: Fix a potential deadlock in unix_dgram_bpf_recvmsg()
  libbpf: Add btf__load_vmlinux_btf/btf__load_module_btf
  tools: bpftool: Support dumping split BTF by id
  libbpf: Add split BTF support for btf__load_from_kernel_by_id()
  tools: Replace btf__get_from_id() with btf__load_from_kernel_by_id()
  tools: Free BTF objects at various locations
  libbpf: Rename btf__get_from_id() as btf__load_from_kernel_by_id()
  libbpf: Rename btf__load() as btf__load_into_kernel()
  libbpf: Return non-null error on failures in libbpf_find_prog_btf_id()
  bpf: Emit better log message if bpf_iter ctx arg btf_id == 0
  tools/resolve_btfids: Emit warnings and patch zero id for missing symbols
  bpf: Increase supported cgroup storage value size
  libbpf: Fix race when pinning maps in parallel
  ...
====================

Link: https://lore.kernel.org/r/20210730225606.1897330-1-andrii@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1  2 
include/net/tcp.h
net/bpf/test_run.c
net/ipv4/tcp_ipv4.c
tools/bpf/bpftool/common.c

diff --combined include/net/tcp.h
index 784d5c3ef1c5be0b54194711ff7f306d271d95c3,ba3034123e1db294211d9231f32f727e5588a790..3166dc15d7d634676ad30fdc1e57b7517087bb68
@@@ -1709,6 -1709,7 +1709,6 @@@ struct tcp_fastopen_context 
        struct rcu_head rcu;
  };
  
 -extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
  void tcp_fastopen_active_disable(struct sock *sk);
  bool tcp_fastopen_active_should_disable(struct sock *sk);
  void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
@@@ -1958,7 -1959,6 +1958,6 @@@ struct tcp_iter_state 
        struct seq_net_private  p;
        enum tcp_seq_states     state;
        struct sock             *syn_wait_sk;
-       struct tcp_seq_afinfo   *bpf_seq_afinfo;
        int                     bucket, offset, sbucket, num;
        loff_t                  last_pos;
  };
diff --combined net/bpf/test_run.c
index b488e2779718ad1c2c7138e72cc5d099807ae622,8d46e2962786ac3ba261ce6d8e3f760947f65a92..695449088e42e75530e88798055b1c14d445fe2b
@@@ -88,17 -88,19 +88,19 @@@ reset
  static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                        u32 *retval, u32 *time, bool xdp)
  {
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+       struct bpf_prog_array_item item = {.prog = prog};
+       struct bpf_run_ctx *old_ctx;
+       struct bpf_cg_run_ctx run_ctx;
        struct bpf_test_timer t = { NO_MIGRATE };
        enum bpf_cgroup_storage_type stype;
        int ret;
  
        for_each_cgroup_storage_type(stype) {
-               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
-               if (IS_ERR(storage[stype])) {
-                       storage[stype] = NULL;
+               item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+               if (IS_ERR(item.cgroup_storage[stype])) {
+                       item.cgroup_storage[stype] = NULL;
                        for_each_cgroup_storage_type(stype)
-                               bpf_cgroup_storage_free(storage[stype]);
+                               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
                        return -ENOMEM;
                }
        }
                repeat = 1;
  
        bpf_test_timer_enter(&t);
+       old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
        do {
-               ret = bpf_cgroup_storage_set(storage);
-               if (ret)
-                       break;
+               run_ctx.prog_item = &item;
                if (xdp)
                        *retval = bpf_prog_run_xdp(prog, ctx);
                else
                        *retval = BPF_PROG_RUN(prog, ctx);
-               bpf_cgroup_storage_unset();
        } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+       bpf_reset_run_ctx(old_ctx);
        bpf_test_timer_leave(&t);
  
        for_each_cgroup_storage_type(stype)
-               bpf_cgroup_storage_free(storage[stype]);
+               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
  
        return ret;
  }
@@@ -761,11 -760,6 +760,11 @@@ int bpf_prog_test_run_xdp(struct bpf_pr
        void *data;
        int ret = -EINVAL;
  
 +      if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
 +          prog->expected_attach_type == BPF_XDP_CPUMAP)
 +              return -EINVAL;
 +      if (kattr->test.ctx_in || kattr->test.ctx_out)
 +              return -EINVAL;
        ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
diff --combined net/ipv4/tcp_ipv4.c
index 84db1c9ee92a8ea290a401f1933c93cf0e23aa7a,f9c6e47141fdd932bd532a65c504b840e70c2832..2e62e0d6373a6ee5d98756fb1967bff9743ffa02
@@@ -2277,51 -2277,72 +2277,72 @@@ EXPORT_SYMBOL(tcp_v4_destroy_sock)
  #ifdef CONFIG_PROC_FS
  /* Proc filesystem TCP sock list dumping. */
  
- /*
-  * Get next listener socket follow cur.  If cur is NULL, get first socket
-  * starting from bucket given in st->bucket; when st->bucket is zero the
-  * very first socket in the hash table is returned.
+ static unsigned short seq_file_family(const struct seq_file *seq);
+ static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+ {
+       unsigned short family = seq_file_family(seq);
+       /* AF_UNSPEC is used as a match all */
+       return ((family == AF_UNSPEC || family == sk->sk_family) &&
+               net_eq(sock_net(sk), seq_file_net(seq)));
+ }
+ /* Find a non empty bucket (starting from st->bucket)
+  * and return the first sk from it.
   */
- static void *listening_get_next(struct seq_file *seq, void *cur)
+ static void *listening_get_first(struct seq_file *seq)
  {
-       struct tcp_seq_afinfo *afinfo;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct inet_listen_hashbucket *ilb;
-       struct hlist_nulls_node *node;
-       struct sock *sk = cur;
  
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
+       st->offset = 0;
+       for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+               struct inet_listen_hashbucket *ilb2;
+               struct inet_connection_sock *icsk;
+               struct sock *sk;
  
-       if (!sk) {
- get_head:
-               ilb = &tcp_hashinfo.listening_hash[st->bucket];
-               spin_lock(&ilb->lock);
-               sk = sk_nulls_head(&ilb->nulls_head);
-               st->offset = 0;
-               goto get_sk;
+               ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+               if (hlist_empty(&ilb2->head))
+                       continue;
+               spin_lock(&ilb2->lock);
+               inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+                       sk = (struct sock *)icsk;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
+               }
+               spin_unlock(&ilb2->lock);
        }
-       ilb = &tcp_hashinfo.listening_hash[st->bucket];
+       return NULL;
+ }
+ /* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+  * If "cur" is the last one in the st->bucket,
+  * call listening_get_first() to return the first sk of the next
+  * non empty bucket.
+  */
+ static void *listening_get_next(struct seq_file *seq, void *cur)
+ {
+       struct tcp_iter_state *st = seq->private;
+       struct inet_listen_hashbucket *ilb2;
+       struct inet_connection_sock *icsk;
+       struct sock *sk = cur;
        ++st->num;
        ++st->offset;
  
-       sk = sk_nulls_next(sk);
- get_sk:
-       sk_nulls_for_each_from(sk, node) {
-               if (!net_eq(sock_net(sk), net))
-                       continue;
-               if (afinfo->family == AF_UNSPEC ||
-                   sk->sk_family == afinfo->family)
+       icsk = inet_csk(sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk))
                        return sk;
        }
-       spin_unlock(&ilb->lock);
-       st->offset = 0;
-       if (++st->bucket < INET_LHTABLE_SIZE)
-               goto get_head;
-       return NULL;
+       ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+       spin_unlock(&ilb2->lock);
+       ++st->bucket;
+       return listening_get_first(seq);
  }
  
  static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
  
        st->bucket = 0;
        st->offset = 0;
-       rc = listening_get_next(seq, NULL);
+       rc = listening_get_first(seq);
  
        while (rc && *pos) {
                rc = listening_get_next(seq, rc);
@@@ -2351,15 -2372,7 +2372,7 @@@ static inline bool empty_bucket(const s
   */
  static void *established_get_first(struct seq_file *seq)
  {
-       struct tcp_seq_afinfo *afinfo;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       void *rc = NULL;
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
  
        st->offset = 0;
        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
  
                spin_lock_bh(lock);
                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-                       if ((afinfo->family != AF_UNSPEC &&
-                            sk->sk_family != afinfo->family) ||
-                           !net_eq(sock_net(sk), net)) {
-                               continue;
-                       }
-                       rc = sk;
-                       goto out;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
                }
                spin_unlock_bh(lock);
        }
- out:
-       return rc;
+       return NULL;
  }
  
  static void *established_get_next(struct seq_file *seq, void *cur)
  {
-       struct tcp_seq_afinfo *afinfo;
        struct sock *sk = cur;
        struct hlist_nulls_node *node;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
  
        ++st->num;
        ++st->offset;
        sk = sk_nulls_next(sk);
  
        sk_nulls_for_each_from(sk, node) {
-               if ((afinfo->family == AF_UNSPEC ||
-                    sk->sk_family == afinfo->family) &&
-                   net_eq(sock_net(sk), net))
+               if (seq_sk_match(seq, sk))
                        return sk;
        }
  
@@@ -2451,17 -2450,18 +2450,18 @@@ static void *tcp_get_idx(struct seq_fil
  static void *tcp_seek_last_pos(struct seq_file *seq)
  {
        struct tcp_iter_state *st = seq->private;
+       int bucket = st->bucket;
        int offset = st->offset;
        int orig_num = st->num;
        void *rc = NULL;
  
        switch (st->state) {
        case TCP_SEQ_STATE_LISTENING:
-               if (st->bucket >= INET_LHTABLE_SIZE)
+               if (st->bucket > tcp_hashinfo.lhash2_mask)
                        break;
                st->state = TCP_SEQ_STATE_LISTENING;
-               rc = listening_get_next(seq, NULL);
-               while (offset-- && rc)
+               rc = listening_get_first(seq);
+               while (offset-- && rc && bucket == st->bucket)
                        rc = listening_get_next(seq, rc);
                if (rc)
                        break;
                if (st->bucket > tcp_hashinfo.ehash_mask)
                        break;
                rc = established_get_first(seq);
-               while (offset-- && rc)
+               while (offset-- && rc && bucket == st->bucket)
                        rc = established_get_next(seq, rc);
        }
  
@@@ -2542,7 -2542,7 +2542,7 @@@ void tcp_seq_stop(struct seq_file *seq
        switch (st->state) {
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
-                       spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+                       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
                break;
        case TCP_SEQ_STATE_ESTABLISHED:
                if (v)
  }
  
  #ifdef CONFIG_BPF_SYSCALL
+ struct bpf_tcp_iter_state {
+       struct tcp_iter_state state;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+ };
  struct bpf_iter__tcp {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct sock_common *, sk_common);
@@@ -2705,16 -2714,204 +2714,204 @@@ static int tcp_prog_seq_show(struct bpf
        return bpf_iter_run_prog(prog, &ctx);
  }
  
+ static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+ {
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+ }
+ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+                                     unsigned int new_batch_sz)
+ {
+       struct sock **new_batch;
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+       bpf_iter_tcp_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+       return 0;
+ }
+ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+                                                struct sock *start_sk)
+ {
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct inet_connection_sock *icsk;
+       unsigned int expected = 1;
+       struct sock *sk;
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+       icsk = inet_csk(start_sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+       return expected;
+ }
+ static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+                                                  struct sock *start_sk)
+ {
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct hlist_nulls_node *node;
+       unsigned int expected = 1;
+       struct sock *sk;
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+       sk = sk_nulls_next(start_sk);
+       sk_nulls_for_each_from(sk, node) {
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+       return expected;
+ }
+ static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+ {
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+       /* The st->bucket is done.  Directly advance to the next
+        * bucket instead of having the tcp_seek_last_pos() to skip
+        * one by one in the current bucket and eventually find out
+        * it has to advance to the next bucket.
+        */
+       if (iter->st_bucket_done) {
+               st->offset = 0;
+               st->bucket++;
+               if (st->state == TCP_SEQ_STATE_LISTENING &&
+                   st->bucket > tcp_hashinfo.lhash2_mask) {
+                       st->state = TCP_SEQ_STATE_ESTABLISHED;
+                       st->bucket = 0;
+               }
+       }
+ again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+       iter->st_bucket_done = false;
+       sk = tcp_seek_last_pos(seq);
+       if (!sk)
+               return NULL; /* Done */
+       if (st->state == TCP_SEQ_STATE_LISTENING)
+               expected = bpf_iter_tcp_listening_batch(seq, sk);
+       else
+               expected = bpf_iter_tcp_established_batch(seq, sk);
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+       if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+       return sk;
+ }
+ static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       if (*pos)
+               return bpf_iter_tcp_batch(seq);
+       return SEQ_START_TOKEN;
+ }
+ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct sock *sk;
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk) {
+               /* Keeping st->num consistent in tcp_iter_state.
+                * bpf_iter_tcp does not use st->num.
+                * meta.seq_num is used instead.
+                */
+               st->num++;
+               /* Move st->offset to the next sk in the bucket such that
+                * the future start() will resume at st->offset in
+                * st->bucket.  See tcp_seek_last_pos().
+                */
+               st->offset++;
+               sock_put(iter->batch[iter->cur_sk++]);
+       }
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_tcp_batch(seq);
+       ++*pos;
+       /* Keeping st->last_pos consistent in tcp_iter_state.
+        * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+        */
+       st->last_pos = *pos;
+       return sk;
+ }
  static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
  {
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        struct sock *sk = v;
+       bool slow;
        uid_t uid;
+       int ret;
  
        if (v == SEQ_START_TOKEN)
                return 0;
  
+       if (sk_fullsock(sk))
+               slow = lock_sock_fast(sk);
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
        if (sk->sk_state == TCP_TIME_WAIT) {
                uid = 0;
        } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
  
        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
-       return tcp_prog_seq_show(prog, &meta, v, uid);
+       ret = tcp_prog_seq_show(prog, &meta, v, uid);
+ unlock:
+       if (sk_fullsock(sk))
+               unlock_sock_fast(sk, slow);
+       return ret;
  }
  
  static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
  {
+       struct bpf_tcp_iter_state *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
  
                        (void)tcp_prog_seq_show(prog, &meta, v, 0);
        }
  
-       tcp_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk) {
+               bpf_iter_tcp_put_batch(iter);
+               iter->st_bucket_done = false;
+       }
  }
  
  static const struct seq_operations bpf_iter_tcp_seq_ops = {
        .show           = bpf_iter_tcp_seq_show,
-       .start          = tcp_seq_start,
-       .next           = tcp_seq_next,
+       .start          = bpf_iter_tcp_seq_start,
+       .next           = bpf_iter_tcp_seq_next,
        .stop           = bpf_iter_tcp_seq_stop,
  };
+ #endif
+ static unsigned short seq_file_family(const struct seq_file *seq)
+ {
+       const struct tcp_seq_afinfo *afinfo;
+ #ifdef CONFIG_BPF_SYSCALL
+       /* Iterated from bpf_iter.  Let the bpf prog to filter instead. */
+       if (seq->op == &bpf_iter_tcp_seq_ops)
+               return AF_UNSPEC;
  #endif
  
+       /* Iterated from proc fs */
+       afinfo = PDE_DATA(file_inode(seq->file));
+       return afinfo->family;
+ }
  static const struct seq_operations tcp4_seq_ops = {
        .show           = tcp4_seq_show,
        .start          = tcp_seq_start,
@@@ -2964,7 -3185,8 +3185,7 @@@ static int __net_init tcp_sk_init(struc
        net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
        net->ipv4.sysctl_tcp_comp_sack_nr = 44;
        net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 -      spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
 -      net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
 +      net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
        atomic_set(&net->ipv4.tfo_active_disable_times, 0);
  
        /* Reno is always built in */
@@@ -3002,39 -3224,55 +3223,55 @@@ static struct pernet_operations __net_i
  DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
                     struct sock_common *sk_common, uid_t uid)
  
+ #define INIT_BATCH_SZ 16
  static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
  {
-       struct tcp_iter_state *st = priv_data;
-       struct tcp_seq_afinfo *afinfo;
-       int ret;
+       struct bpf_tcp_iter_state *iter = priv_data;
+       int err;
  
-       afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
-       if (!afinfo)
-               return -ENOMEM;
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
  
-       afinfo->family = AF_UNSPEC;
-       st->bpf_seq_afinfo = afinfo;
-       ret = bpf_iter_init_seq_net(priv_data, aux);
-       if (ret)
-               kfree(afinfo);
-       return ret;
+       err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+       return 0;
  }
  
  static void bpf_iter_fini_tcp(void *priv_data)
  {
-       struct tcp_iter_state *st = priv_data;
+       struct bpf_tcp_iter_state *iter = priv_data;
  
-       kfree(st->bpf_seq_afinfo);
        bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
  }
  
  static const struct bpf_iter_seq_info tcp_seq_info = {
        .seq_ops                = &bpf_iter_tcp_seq_ops,
        .init_seq_private       = bpf_iter_init_tcp,
        .fini_seq_private       = bpf_iter_fini_tcp,
-       .seq_priv_size          = sizeof(struct tcp_iter_state),
+       .seq_priv_size          = sizeof(struct bpf_tcp_iter_state),
  };
  
+ static const struct bpf_func_proto *
+ bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+                           const struct bpf_prog *prog)
+ {
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+ }
  static struct bpf_iter_reg tcp_reg_info = {
        .target                 = "tcp",
        .ctx_arg_info_size      = 1,
                { offsetof(struct bpf_iter__tcp, sk_common),
                  PTR_TO_BTF_ID_OR_NULL },
        },
+       .get_func_proto         = bpf_iter_tcp_get_func_proto,
        .seq_info               = &tcp_seq_info,
  };
  
index dc6daa193557a9c6647a18ed363fafcfb7026b8a,c5e57cce887a5c641a579522eca9f1ee7fa16ede..d42d930a3ec4d4633289d617fd3e25926ed78305
@@@ -67,6 -67,12 +67,12 @@@ const char * const attach_type_name[__M
        [BPF_MODIFY_RETURN]             = "mod_ret",
        [BPF_LSM_MAC]                   = "lsm_mac",
        [BPF_SK_LOOKUP]                 = "sk_lookup",
+       [BPF_TRACE_ITER]                = "trace_iter",
+       [BPF_XDP_DEVMAP]                = "xdp_devmap",
+       [BPF_XDP_CPUMAP]                = "xdp_cpumap",
+       [BPF_XDP]                       = "xdp",
+       [BPF_SK_REUSEPORT_SELECT]       = "sk_skb_reuseport_select",
+       [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_skb_reuseport_select_or_migrate",
  };
  
  void p_err(const char *fmt, ...)
@@@ -222,11 -228,6 +228,11 @@@ int mount_bpffs_for_pin(const char *nam
        int err = 0;
  
        file = malloc(strlen(name) + 1);
 +      if (!file) {
 +              p_err("mem alloc failed");
 +              return -1;
 +      }
 +
        strcpy(file, name);
        dir = dirname(file);