Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

author David S. Miller <davem@davemloft.net>

Fri, 6 Sep 2019 14:49:17 +0000 (16:49 +0200)

committer David S. Miller <davem@davemloft.net>

Fri, 6 Sep 2019 14:49:17 +0000 (16:49 +0200)
author David S. Miller <davem@davemloft.net>
Fri, 6 Sep 2019 14:49:17 +0000 (16:49 +0200)
committer David S. Miller <davem@davemloft.net>
Fri, 6 Sep 2019 14:49:17 +0000 (16:49 +0200)
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst

index eeedc2e826aa066c89b2073d8fd26100a66fdf8d..83f7ae5fc045e22ad7e35dfdb65dd3e7e342e0b3 100644 (file)
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -153,10 +153,12 @@ an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has
  
  Frames passed to the kernel are used for the ingress path (RX rings).
  
-The user application produces UMEM addrs to this ring. Note that the
-kernel will mask the incoming addr. E.g. for a chunk size of 2k, the
-log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050
-and 3000 refers to the same chunk.
+The user application produces UMEM addrs to this ring. Note that, if
+running the application with aligned chunk mode, the kernel will mask
+the incoming addr.  E.g. for a chunk size of 2k, the log2(2048) LSB of
+the addr will be masked off, meaning that 2048, 2050 and 3000 refers
+to the same chunk. If the user application is run in the unaligned
+chunks mode, then the incoming addr will be left untouched.
  
  
  UMEM Completion Ring
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h

index cb7ab50b76579067d29fbb575396fa47b0a36127..eb73f9f72c467a1bbacecb7ea270b75066b8d2ab 100644 (file)
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -171,6 +171,9 @@
  /* Rd = Ra + Rn * Rm */
  #define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
         A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
+/* Rd = Ra - Rn * Rm */
+#define A64_MSUB(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
+       A64_VARIANT(sf), AARCH64_INSN_DATA3_MSUB)
  /* Rd = Rn * Rm */
  #define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)
  
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c

index f5b437f8a22b4042f29d4e8c55b181efd3350210..cdc79de0c794af4e46176c6924b14fd127426f54 100644 (file)
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -409,8 +409,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
                         break;
                 case BPF_MOD:
                         emit(A64_UDIV(is64, tmp, dst, src), ctx);
-                       emit(A64_MUL(is64, tmp, tmp, src), ctx);
-                       emit(A64_SUB(is64, dst, dst, tmp), ctx);
+                       emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
                         break;
                 }
                 break;
@@ -516,8 +515,7 @@ emit_bswap_uxt:
         case BPF_ALU64 | BPF_MOD | BPF_K:
                 emit_a64_mov_i(is64, tmp2, imm, ctx);
                 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
-               emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
-               emit(A64_SUB(is64, dst, dst, tmp), ctx);
+               emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
                 break;
         case BPF_ALU | BPF_LSH | BPF_K:
         case BPF_ALU64 | BPF_LSH | BPF_K:
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c

index 955eb355c2fdea049412ecbcf81b84427e4cf406..ce88211b9c6cdda55164f82ef00327a98ba11b6d 100644 (file)
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -502,7 +502,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
   * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
   * stack space for the large switch statement.
   */
-static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+                                int i, bool extra_pass)
  {
         struct bpf_insn *insn = &fp->insnsi[i];
         int jmp_off, last, insn_count = 1;
@@ -1011,10 +1012,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
          */
         case BPF_JMP | BPF_CALL:
         {
-               /*
-                * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
-                */
-               const u64 func = (u64)__bpf_call_base + imm;
+               u64 func;
+               bool func_addr_fixed;
+               int ret;
+
+               ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
+                                           &func, &func_addr_fixed);
+               if (ret < 0)
+                       return -1;
  
                 REG_SET_SEEN(BPF_REG_5);
                 jit->seen |= SEEN_FUNC;
@@ -1283,7 +1288,8 @@ branch_oc:
  /*
   * Compile eBPF program into s390x code
   */
-static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
+                       bool extra_pass)
  {
         int i, insn_count;
  
@@ -1292,7 +1298,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
  
         bpf_jit_prologue(jit, fp->aux->stack_depth);
         for (i = 0; i < fp->len; i += insn_count) {
-               insn_count = bpf_jit_insn(jit, fp, i);
+               insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
                 if (insn_count < 0)
                         return -1;
                 /* Next instruction address */
@@ -1311,6 +1317,12 @@ bool bpf_jit_needs_zext(void)
         return true;
  }
  
+struct s390_jit_data {
+       struct bpf_binary_header *header;
+       struct bpf_jit ctx;
+       int pass;
+};
+
  /*
   * Compile eBPF program "fp"
   */
@@ -1318,7 +1330,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
  {
         struct bpf_prog *tmp, *orig_fp = fp;
         struct bpf_binary_header *header;
+       struct s390_jit_data *jit_data;
         bool tmp_blinded = false;
+       bool extra_pass = false;
         struct bpf_jit jit;
         int pass;
  
@@ -1337,6 +1351,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
                 fp = tmp;
         }
  
+       jit_data = fp->aux->jit_data;
+       if (!jit_data) {
+               jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+               if (!jit_data) {
+                       fp = orig_fp;
+                       goto out;
+               }
+               fp->aux->jit_data = jit_data;
+       }
+       if (jit_data->ctx.addrs) {
+               jit = jit_data->ctx;
+               header = jit_data->header;
+               extra_pass = true;
+               pass = jit_data->pass + 1;
+               goto skip_init_ctx;
+       }
+
         memset(&jit, 0, sizeof(jit));
         jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
         if (jit.addrs == NULL) {
@@ -1349,7 +1380,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
          *   - 3:   Calculate program size and addrs arrray
          */
         for (pass = 1; pass <= 3; pass++) {
-               if (bpf_jit_prog(&jit, fp)) {
+               if (bpf_jit_prog(&jit, fp, extra_pass)) {
                         fp = orig_fp;
                         goto free_addrs;
                 }
@@ -1361,12 +1392,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
                 fp = orig_fp;
                 goto free_addrs;
         }
+
         header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
         if (!header) {
                 fp = orig_fp;
                 goto free_addrs;
         }
-       if (bpf_jit_prog(&jit, fp)) {
+skip_init_ctx:
+       if (bpf_jit_prog(&jit, fp, extra_pass)) {
                 bpf_jit_binary_free(header);
                 fp = orig_fp;
                 goto free_addrs;
@@ -1375,12 +1408,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
                 bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
                 print_fn_code(jit.prg_buf, jit.size_prg);
         }
-       bpf_jit_binary_lock_ro(header);
+       if (!fp->is_func || extra_pass) {
+               bpf_jit_binary_lock_ro(header);
+       } else {
+               jit_data->header = header;
+               jit_data->ctx = jit;
+               jit_data->pass = pass;
+       }
         fp->bpf_func = (void *) jit.prg_buf;
         fp->jited = 1;
         fp->jited_len = jit.size;
+
+       if (!fp->is_func || extra_pass) {
+               bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
  free_addrs:
-       kfree(jit.addrs);
+               kfree(jit.addrs);
+               kfree(jit_data);
+               fp->aux->jit_data = NULL;
+       }
  out:
         if (tmp_blinded)
                 bpf_jit_prog_release_other(fp, fp == orig_fp ?
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

index fdf43d87e983fe5b6ca478bb4baeece34c3e37d9..3c8a2f55c43af77d9b260bee9604918f0e5692b1 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12530,7 +12530,8 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
         if (need_reset && prog)
                 for (i = 0; i < vsi->num_queue_pairs; i++)
                         if (vsi->xdp_rings[i]->xsk_umem)
-                               (void)i40e_xsk_async_xmit(vsi->netdev, i);
+                               (void)i40e_xsk_wakeup(vsi->netdev, i,
+                                                     XDP_WAKEUP_RX);
  
         return 0;
  }
@@ -12852,7 +12853,7 @@ static const struct net_device_ops i40e_netdev_ops = {
         .ndo_bridge_setlink     = i40e_ndo_bridge_setlink,
         .ndo_bpf                = i40e_xdp,
         .ndo_xdp_xmit           = i40e_xdp_xmit,
-       .ndo_xsk_async_xmit     = i40e_xsk_async_xmit,
+       .ndo_xsk_wakeup         = i40e_xsk_wakeup,
         .ndo_dfwd_add_station   = i40e_fwd_add,
         .ndo_dfwd_del_station   = i40e_fwd_del,
  };
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c

index 32bad014d76cbaadbc90d117b703898e7f3e067f..0373bc6c7e61ce00b0abcaa3f9189e809edaeb7e 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -116,7 +116,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
                         return err;
  
                 /* Kick start the NAPI context so that receiving will start */
-               err = i40e_xsk_async_xmit(vsi->netdev, qid);
+               err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
                 if (err)
                         return err;
         }
@@ -190,7 +190,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
   **/
  static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
  {
+       struct xdp_umem *umem = rx_ring->xsk_umem;
         int err, result = I40E_XDP_PASS;
+       u64 offset = umem->headroom;
         struct i40e_ring *xdp_ring;
         struct bpf_prog *xdp_prog;
         u32 act;
@@ -201,7 +203,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
          */
         xdp_prog = READ_ONCE(rx_ring->xdp_prog);
         act = bpf_prog_run_xdp(xdp_prog, xdp);
-       xdp->handle += xdp->data - xdp->data_hard_start;
+       offset += xdp->data - xdp->data_hard_start;
+
+       xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
+
         switch (act) {
         case XDP_PASS:
                 break;
@@ -262,7 +267,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
         bi->addr = xdp_umem_get_data(umem, handle);
         bi->addr += hr;
  
-       bi->handle = handle + umem->headroom;
+       bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
  
         xsk_umem_discard_addr(umem);
         return true;
@@ -299,7 +304,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
         bi->addr = xdp_umem_get_data(umem, handle);
         bi->addr += hr;
  
-       bi->handle = handle + umem->headroom;
+       bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
  
         xsk_umem_discard_addr_rq(umem);
         return true;
@@ -420,8 +425,6 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
                                     struct i40e_rx_buffer *old_bi)
  {
         struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
-       unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
-       u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
         u16 nta = rx_ring->next_to_alloc;
  
         /* update, and store next to alloc */
@@ -429,14 +432,9 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
         rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
  
         /* transfer page from old buffer to new buffer */
-       new_bi->dma = old_bi->dma & mask;
-       new_bi->dma += hr;
-
-       new_bi->addr = (void *)((unsigned long)old_bi->addr & mask);
-       new_bi->addr += hr;
-
-       new_bi->handle = old_bi->handle & mask;
-       new_bi->handle += rx_ring->xsk_umem->headroom;
+       new_bi->dma = old_bi->dma;
+       new_bi->addr = old_bi->addr;
+       new_bi->handle = old_bi->handle;
  
         old_bi->addr = NULL;
  }
@@ -471,7 +469,8 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
         bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
         bi->addr += hr;
  
-       bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+       bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
+                                           rx_ring->xsk_umem->headroom);
  }
  
  /**
@@ -626,6 +625,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
  
         i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
         i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
+
+       if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+               if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+                       xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+               else
+                       xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+
+               return (int)total_rx_packets;
+       }
         return failure ? budget : (int)total_rx_packets;
  }
  
@@ -681,6 +689,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
                 i40e_xdp_ring_update_tail(xdp_ring);
  
                 xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+               if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
+                       xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
         }
  
         return !!budget && work_done;
@@ -759,19 +769,27 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
         i40e_update_tx_stats(tx_ring, completed_frames, total_bytes);
  
  out_xmit:
+       if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
+               if (tx_ring->next_to_clean == tx_ring->next_to_use)
+                       xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+               else
+                       xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
+       }
+
         xmit_done = i40e_xmit_zc(tx_ring, budget);
  
         return work_done && xmit_done;
  }
  
  /**
- * i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit
+ * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup
   * @dev: the netdevice
   * @queue_id: queue id to wake up
+ * @flags: ignored in our case since we have Rx and Tx in the same NAPI.
   *
   * Returns <0 for errors, 0 otherwise.
   **/
-int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
  {
         struct i40e_netdev_priv *np = netdev_priv(dev);
         struct i40e_vsi *vsi = np->vsi;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h

index 8cc0a2e7d9a2fa3253ceaec272099ace792d9e44..9ed59c14eb55f931c05bca8f01a772bff390fa71 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -18,6 +18,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
  
  bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
                            struct i40e_ring *tx_ring, int napi_budget);
-int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id);
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
  
  #endif /* _I40E_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

index 17b7ae9f46ecdc704cde918339536b08c1ba0357..9bcae44e98835926789cf366183d3d3eb5c649f8 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10260,7 +10260,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
         if (need_reset && prog)
                 for (i = 0; i < adapter->num_rx_queues; i++)
                         if (adapter->xdp_ring[i]->xsk_umem)
-                               (void)ixgbe_xsk_async_xmit(adapter->netdev, i);
+                               (void)ixgbe_xsk_wakeup(adapter->netdev, i,
+                                                      XDP_WAKEUP_RX);
  
         return 0;
  }
@@ -10379,7 +10380,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
         .ndo_features_check     = ixgbe_features_check,
         .ndo_bpf                = ixgbe_xdp,
         .ndo_xdp_xmit           = ixgbe_xdp_xmit,
-       .ndo_xsk_async_xmit     = ixgbe_xsk_async_xmit,
+       .ndo_xsk_wakeup         = ixgbe_xsk_wakeup,
  };
  
  static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h

index d93a690aff74f404549d067947e1faf0a4d08a02..6d01700b46bc3d8b42886f98931117456a65a783 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -42,7 +42,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
  void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
  bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
                             struct ixgbe_ring *tx_ring, int napi_budget);
-int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id);
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
  void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
  
  #endif /* #define _IXGBE_TXRX_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c

index 6b609553329fa98647b78f4e1e5d3e9223a8aa7a..ad802a8909e0d4423785592ba3e87cc92191b466 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -100,7 +100,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
                 ixgbe_txrx_ring_enable(adapter, qid);
  
                 /* Kick start the NAPI context so that receiving will start */
-               err = ixgbe_xsk_async_xmit(adapter->netdev, qid);
+               err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
                 if (err)
                         return err;
         }
@@ -143,7 +143,9 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
                             struct ixgbe_ring *rx_ring,
                             struct xdp_buff *xdp)
  {
+       struct xdp_umem *umem = rx_ring->xsk_umem;
         int err, result = IXGBE_XDP_PASS;
+       u64 offset = umem->headroom;
         struct bpf_prog *xdp_prog;
         struct xdp_frame *xdpf;
         u32 act;
@@ -151,7 +153,10 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
         rcu_read_lock();
         xdp_prog = READ_ONCE(rx_ring->xdp_prog);
         act = bpf_prog_run_xdp(xdp_prog, xdp);
-       xdp->handle += xdp->data - xdp->data_hard_start;
+       offset += xdp->data - xdp->data_hard_start;
+
+       xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
+
         switch (act) {
         case XDP_PASS:
                 break;
@@ -201,8 +206,6 @@ ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
  static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
                                      struct ixgbe_rx_buffer *obi)
  {
-       unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
-       u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
         u16 nta = rx_ring->next_to_alloc;
         struct ixgbe_rx_buffer *nbi;
  
@@ -212,14 +215,9 @@ static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
         rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
  
         /* transfer page from old buffer to new buffer */
-       nbi->dma = obi->dma & mask;
-       nbi->dma += hr;
-
-       nbi->addr = (void *)((unsigned long)obi->addr & mask);
-       nbi->addr += hr;
-
-       nbi->handle = obi->handle & mask;
-       nbi->handle += rx_ring->xsk_umem->headroom;
+       nbi->dma = obi->dma;
+       nbi->addr = obi->addr;
+       nbi->handle = obi->handle;
  
         obi->addr = NULL;
         obi->skb = NULL;
@@ -250,7 +248,8 @@ void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
         bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
         bi->addr += hr;
  
-       bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+       bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
+                                           rx_ring->xsk_umem->headroom);
  }
  
  static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
@@ -276,7 +275,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
         bi->addr = xdp_umem_get_data(umem, handle);
         bi->addr += hr;
  
-       bi->handle = handle + umem->headroom;
+       bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
  
         xsk_umem_discard_addr(umem);
         return true;
@@ -303,7 +302,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
         bi->addr = xdp_umem_get_data(umem, handle);
         bi->addr += hr;
  
-       bi->handle = handle + umem->headroom;
+       bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
  
         xsk_umem_discard_addr_rq(umem);
         return true;
@@ -547,6 +546,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
         q_vector->rx.total_packets += total_rx_packets;
         q_vector->rx.total_bytes += total_rx_bytes;
  
+       if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+               if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+                       xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+               else
+                       xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+
+               return (int)total_rx_packets;
+       }
         return failure ? budget : (int)total_rx_packets;
  }
  
@@ -615,6 +622,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
         if (tx_desc) {
                 ixgbe_xdp_ring_update_tail(xdp_ring);
                 xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+               if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
+                       xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
         }
  
         return !!budget && work_done;
@@ -688,11 +697,19 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
         if (xsk_frames)
                 xsk_umem_complete_tx(umem, xsk_frames);
  
+       if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
+               if (tx_ring->next_to_clean == tx_ring->next_to_use)
+                       xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+               else
+                       xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
+       }
+
         xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
+
         return budget > 0 && xmit_done;
  }
  
-int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
  {
         struct ixgbe_adapter *adapter = netdev_priv(dev);
         struct ixgbe_ring *ring;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c

index 79301d11666762bdbc27cf052e149acb93d6a8eb..eb2e1f2138e458eab9edb29b88932b2b84e9dad5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -25,18 +25,33 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
         return headroom;
  }
  
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
-                               struct mlx5e_xsk_param *xsk)
+u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
+                            struct mlx5e_xsk_param *xsk)
  {
         u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
         u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
-       u32 frag_sz = linear_rq_headroom + hw_mtu;
+
+       return linear_rq_headroom + hw_mtu;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+                               struct mlx5e_xsk_param *xsk)
+{
+       u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
  
         /* AF_XDP doesn't build SKBs in place. */
         if (!xsk)
                 frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
  
-       /* XDP in mlx5e doesn't support multiple packets per page. */
+       /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a
+        * special case. It can run with frames smaller than a page, as it
+        * doesn't allocate pages dynamically. However, here we pretend that
+        * fragments are page-sized: it allows to treat XSK frames like pages
+        * by redirecting alloc and free operations to XSK rings and by using
+        * the fact there are no multiple packets per "page" (which is a frame).
+        * The latter is important, because frames may come in a random order,
+        * and we will have trouble assemblying a real page of multiple frames.
+        */
         if (mlx5e_rx_is_xdp(params, xsk))
                 frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h

index 3a615d663d84ec51c2142514bdbfde897cb18f2d..989d8f4294388c1d21facd0a21b1df3e42d86020 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -76,6 +76,8 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
  
  u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
                                  struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
+                            struct mlx5e_xsk_param *xsk);
  u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
                                 struct mlx5e_xsk_param *xsk);
  u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

index 1ed5c33e022f5d8fbdeff14ba598fe8a1cef9c05..f049e0ac308a00c587a3260aa110e48f4accb445 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
                       void *va, u16 *rx_headroom, u32 *len, bool xsk)
  {
         struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+       struct xdp_umem *umem = rq->umem;
         struct xdp_buff xdp;
         u32 act;
         int err;
@@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
         xdp.rxq = &rq->xdp_rxq;
  
         act = bpf_prog_run_xdp(prog, &xdp);
-       if (xsk)
-               xdp.handle += xdp.data - xdp.data_hard_start;
+       if (xsk) {
+               u64 off = xdp.data - xdp.data_hard_start;
+
+               xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
+       }
         switch (act) {
         case XDP_PASS:
                 *rx_headroom = xdp.data - xdp.data_hard_start;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c

index 6a55573ec8f2964c226e766c7adba007d7c0d717..475b6bd5d29be7683cc6bbe058eb04c48d4ea357 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -24,7 +24,8 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
         if (!xsk_umem_peek_addr_rq(umem, &handle))
                 return -ENOMEM;
  
-       dma_info->xsk.handle = handle + rq->buff.umem_headroom;
+       dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
+                                                     rq->buff.umem_headroom);
         dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
  
         /* No need to add headroom to the DMA address. In striding RQ case, we
@@ -104,7 +105,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
  
         /* head_offset is not used in this function, because di->xsk.data and
          * di->addr point directly to the necessary place. Furthermore, in the
-        * current implementation, one page = one packet = one frame, so
+        * current implementation, UMR pages are mapped to XSK frames, so
          * head_offset should always be 0.
          */
         WARN_ON_ONCE(head_offset);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h

index 307b923a136139facb90fc44fb433fcb2fa1077a..cab0e93497ae6d2cf53197e86f5ec3ee3df8607d 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,6 +5,7 @@
  #define __MLX5_EN_XSK_RX_H__
  
  #include "en.h"
+#include <net/xdp_sock.h>
  
  /* RX data path */
  
@@ -24,4 +25,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
                                               struct mlx5e_wqe_frag_info *wi,
                                               u32 cqe_bcnt);
  
+static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
+{
+       if (!xsk_umem_uses_need_wakeup(rq->umem))
+               return alloc_err;
+
+       if (unlikely(alloc_err))
+               xsk_set_rx_need_wakeup(rq->umem);
+       else
+               xsk_clear_rx_need_wakeup(rq->umem);
+
+       return false;
+}
+
  #endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c

index d360750b25b7922f7308413257ec4b3d1408f942..631af8dee5171d67447e5eefac4809a115bb4e3e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -4,18 +4,23 @@
  #include "setup.h"
  #include "en/params.h"
  
+/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
+ * change unexpectedly, and mlx5e has a minimum valid stride size for striding
+ * RQ, keep this check in the driver.
+ */
+#define MLX5E_MIN_XSK_CHUNK_SIZE 2048
+
  bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
                               struct mlx5e_xsk_param *xsk,
                               struct mlx5_core_dev *mdev)
  {
-       /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current
-        * mlx5e XDP implementation doesn't support multiple packets per page.
-        */
-       if (xsk->chunk_size != PAGE_SIZE)
+       /* AF_XDP doesn't support frames larger than PAGE_SIZE. */
+       if (xsk->chunk_size > PAGE_SIZE ||
+                       xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
                 return false;
  
         /* Current MTU and XSK headroom don't allow packets to fit the frames. */
-       if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size)
+       if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size)
                 return false;
  
         /* frag_sz is different for regular and XSK RQs, so ensure that linear
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c

index fd2c75b4b519e71fda8a2af052f1aa0afd7639ce..87827477d38c48dc71d795a1a142634a1c3a5933 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -7,7 +7,7 @@
  #include "en/params.h"
  #include <net/xdp_sock.h>
  
-int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
  {
         struct mlx5e_priv *priv = netdev_priv(dev);
         struct mlx5e_params *params = &priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h

index 7add18bf78d8f0688769945e5075f7f0a5f352a6..79b487d897570d1c7194d3b4e9b02de29078d246 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,11 +5,23 @@
  #define __MLX5_EN_XSK_TX_H__
  
  #include "en.h"
+#include <net/xdp_sock.h>
  
  /* TX data path */
  
-int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
  
  bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
  
+static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
+{
+       if (!xsk_umem_uses_need_wakeup(sq->umem))
+               return;
+
+       if (sq->pc != sq->cc)
+               xsk_clear_tx_need_wakeup(sq->umem);
+       else
+               xsk_set_tx_need_wakeup(sq->umem);
+}
+
  #endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index dadadf22108723a3c8deece3fd488d6b197a4344..1cacda1bc1b410f1d470eb452b1839d92bbf93b4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4580,7 +4580,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
         .ndo_tx_timeout          = mlx5e_tx_timeout,
         .ndo_bpf                 = mlx5e_xdp,
         .ndo_xdp_xmit            = mlx5e_xdp_xmit,
-       .ndo_xsk_async_xmit      = mlx5e_xsk_async_xmit,
+       .ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
  #ifdef CONFIG_MLX5_EN_ARFS
         .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
  #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 2fd2760d0bb7c0b1000b8687f4b0af1c0c056ee7..d6a547238de03fd9778d7f786346c00647963c68 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -695,8 +695,11 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
         rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
         rq->mpwqe.actual_wq_head   = head;
  
-       /* If XSK Fill Ring doesn't have enough frames, busy poll by
-        * rescheduling the NAPI poll.
+       /* If XSK Fill Ring doesn't have enough frames, report the error, so
+        * that one of the actions can be performed:
+        * 1. If need_wakeup is used, signal that the application has to kick
+        * the driver when it refills the Fill Ring.
+        * 2. Otherwise, busy poll by rescheduling the NAPI poll.
          */
         if (unlikely(alloc_err == -ENOMEM && rq->umem))
                 return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c

index 49b06b256c92955d8b0054ade614b2d33ee41c1e..257a7c9f7a14d4cef416ec5992ed4e4c21832f43 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -33,6 +33,7 @@
  #include <linux/irq.h>
  #include "en.h"
  #include "en/xdp.h"
+#include "en/xsk/rx.h"
  #include "en/xsk/tx.h"
  
  static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
@@ -81,6 +82,29 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
         mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
  }
  
+static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
+{
+       bool busy_xsk = false, xsk_rx_alloc_err;
+
+       /* Handle the race between the application querying need_wakeup and the
+        * driver setting it:
+        * 1. Update need_wakeup both before and after the TX. If it goes to
+        * "yes", it can only happen with the first update.
+        * 2. If the application queried need_wakeup before we set it, the
+        * packets will be transmitted anyway, even w/o a wakeup.
+        * 3. Give a chance to clear need_wakeup after new packets were queued
+        * for TX.
+        */
+       mlx5e_xsk_update_tx_wakeup(xsksq);
+       busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+       mlx5e_xsk_update_tx_wakeup(xsksq);
+
+       xsk_rx_alloc_err = xskrq->post_wqes(xskrq);
+       busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err);
+
+       return busy_xsk;
+}
+
  int mlx5e_napi_poll(struct napi_struct *napi, int budget)
  {
         struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -122,8 +146,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
         if (xsk_open) {
                 mlx5e_poll_ico_cq(&c->xskicosq.cq);
                 busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
-               busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
-               busy_xsk |= xskrq->post_wqes(xskrq);
+               busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
         }
  
         busy |= busy_xsk;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c

index bc9850e4ec5e646e7e21511a995c5315b1b26062..0e2db6ea79e96f7e9daca0fdcd84ce7b4483b0ed 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -6,6 +6,7 @@
  #include <linux/bug.h>
  #include <linux/jiffies.h>
  #include <linux/skbuff.h>
+#include <linux/timekeeping.h>
  
  #include "../ccm.h"
  #include "../nfp_app.h"
@@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
         return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
  }
  
+static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
+{
+       return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
+              op == NFP_CCM_TYPE_BPF_MAP_DELETE;
+}
+
+static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
+{
+       return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
+              op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
+{
+       return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
+              op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static unsigned int
+nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+                         const u8 *key, u8 *out_key, u8 *out_value,
+                         u32 *cache_gen)
+{
+       struct bpf_map *map = &nfp_map->offmap->map;
+       struct nfp_app_bpf *bpf = nfp_map->bpf;
+       unsigned int i, count, n_entries;
+       struct cmsg_reply_map_op *reply;
+
+       n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
+
+       spin_lock(&nfp_map->cache_lock);
+       *cache_gen = nfp_map->cache_gen;
+       if (nfp_map->cache_blockers)
+               n_entries = 1;
+
+       if (nfp_bpf_ctrl_op_cache_invalidate(op))
+               goto exit_block;
+       if (!nfp_bpf_ctrl_op_cache_capable(op))
+               goto exit_unlock;
+
+       if (!nfp_map->cache)
+               goto exit_unlock;
+       if (nfp_map->cache_to < ktime_get_ns())
+               goto exit_invalidate;
+
+       reply = (void *)nfp_map->cache->data;
+       count = be32_to_cpu(reply->count);
+
+       for (i = 0; i < count; i++) {
+               void *cached_key;
+
+               cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
+               if (memcmp(cached_key, key, map->key_size))
+                       continue;
+
+               if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
+                       memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
+                              map->value_size);
+               if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
+                       if (i + 1 == count)
+                               break;
+
+                       memcpy(out_key,
+                              nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
+                              map->key_size);
+               }
+
+               n_entries = 0;
+               goto exit_unlock;
+       }
+       goto exit_unlock;
+
+exit_block:
+       nfp_map->cache_blockers++;
+exit_invalidate:
+       dev_consume_skb_any(nfp_map->cache);
+       nfp_map->cache = NULL;
+exit_unlock:
+       spin_unlock(&nfp_map->cache_lock);
+       return n_entries;
+}
+
+static void
+nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+                         struct sk_buff *skb, u32 cache_gen)
+{
+       bool blocker, filler;
+
+       blocker = nfp_bpf_ctrl_op_cache_invalidate(op);
+       filler = nfp_bpf_ctrl_op_cache_fill(op);
+       if (blocker || filler) {
+               u64 to = 0;
+
+               if (filler)
+                       to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS;
+
+               spin_lock(&nfp_map->cache_lock);
+               if (blocker) {
+                       nfp_map->cache_blockers--;
+                       nfp_map->cache_gen++;
+               }
+               if (filler && !nfp_map->cache_blockers &&
+                   nfp_map->cache_gen == cache_gen) {
+                       nfp_map->cache_to = to;
+                       swap(nfp_map->cache, skb);
+               }
+               spin_unlock(&nfp_map->cache_lock);
+       }
+
+       dev_consume_skb_any(skb);
+}
+
  static int
  nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
                       u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
  {
         struct nfp_bpf_map *nfp_map = offmap->dev_priv;
+       unsigned int n_entries, reply_entries, count;
         struct nfp_app_bpf *bpf = nfp_map->bpf;
         struct bpf_map *map = &offmap->map;
         struct cmsg_reply_map_op *reply;
         struct cmsg_req_map_op *req;
         struct sk_buff *skb;
+       u32 cache_gen;
         int err;
  
         /* FW messages have no space for more than 32 bits of flags */
         if (flags >> 32)
                 return -EOPNOTSUPP;
  
+       /* Handle op cache */
+       n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
+                                             out_value, &cache_gen);
+       if (!n_entries)
+               return 0;
+
         skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
-       if (!skb)
-               return -ENOMEM;
+       if (!skb) {
+               err = -ENOMEM;
+               goto err_cache_put;
+       }
  
         req = (void *)skb->data;
         req->tid = cpu_to_be32(nfp_map->tid);
-       req->count = cpu_to_be32(1);
+       req->count = cpu_to_be32(n_entries);
         req->flags = cpu_to_be32(flags);
  
         /* Copy inputs */
@@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
                 memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
                        map->value_size);
  
-       skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
-                                 nfp_bpf_cmsg_map_reply_size(bpf, 1));
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
+       skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
+       if (IS_ERR(skb)) {
+               err = PTR_ERR(skb);
+               goto err_cache_put;
+       }
+
+       if (skb->len < sizeof(*reply)) {
+               cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
+                         op, skb->len);
+               err = -EIO;
+               goto err_free;
+       }
  
         reply = (void *)skb->data;
+       count = be32_to_cpu(reply->count);
         err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+       /* FW responds with message sized to hold the good entries,
+        * plus one extra entry if there was an error.
+        */
+       reply_entries = count + !!err;
+       if (n_entries > 1 && count)
+               err = 0;
         if (err)
                 goto err_free;
  
+       if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
+               cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
+                         op, skb->len, reply_entries);
+               err = -EIO;
+               goto err_free;
+       }
+
         /* Copy outputs */
         if (out_key)
                 memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
@@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
                 memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
                        map->value_size);
  
-       dev_consume_skb_any(skb);
+       nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
  
         return 0;
  err_free:
         dev_kfree_skb_any(skb);
+err_cache_put:
+       nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
         return err;
  }
  
@@ -267,11 +414,29 @@ int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
                                      key, NULL, 0, next_key, NULL);
  }
  
+unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
+{
+       return max(nfp_bpf_cmsg_map_req_size(bpf, 1),
+                  nfp_bpf_cmsg_map_reply_size(bpf, 1));
+}
+
  unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
  {
-       return max3((unsigned int)NFP_NET_DEFAULT_MTU,
-                   nfp_bpf_cmsg_map_req_size(bpf, 1),
-                   nfp_bpf_cmsg_map_reply_size(bpf, 1));
+       return max3(NFP_NET_DEFAULT_MTU,
+                   nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
+                   nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
+}
+
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
+{
+       unsigned int mtu, req_max, reply_max, entry_sz;
+
+       mtu = bpf->app->ctrl->dp.mtu;
+       entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
+       req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
+       reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
+
+       return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
  }
  
  void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h

index 06c4286bd79e0a0b3578da221783a20d54d8a7f9..a83a0ad5e27de0c61f9748299e78f040b3ca7289 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -24,6 +24,7 @@ enum bpf_cap_tlv_type {
         NFP_BPF_CAP_TYPE_QUEUE_SELECT   = 5,
         NFP_BPF_CAP_TYPE_ADJUST_TAIL    = 6,
         NFP_BPF_CAP_TYPE_ABI_VERSION    = 7,
+       NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8,
  };
  
  struct nfp_bpf_cap_tlv_func {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c

index 1c9fb11470df7b3d4af3f8ee749c6cc2a10c2d94..8f732771d3fad8965318dca81a5e292a5310d8e6 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -299,6 +299,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
         return 0;
  }
  
+static int
+nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
+                                u32 length)
+{
+       bpf->cmsg_multi_ent = true;
+       return 0;
+}
+
  static int
  nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
                               u32 length)
@@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
                                                           length))
                                 goto err_release_free;
                         break;
+               case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
+                       if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
+                                                            length))
+                               goto err_release_free;
+                       break;
                 default:
                         nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
                         break;
@@ -415,6 +428,25 @@ static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
         bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev);
  }
  
+static int nfp_bpf_start(struct nfp_app *app)
+{
+       struct nfp_app_bpf *bpf = app->priv;
+
+       if (app->ctrl->dp.mtu < nfp_bpf_ctrl_cmsg_min_mtu(bpf)) {
+               nfp_err(bpf->app->cpp,
+                       "ctrl channel MTU below min required %u < %u\n",
+                       app->ctrl->dp.mtu, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
+               return -EINVAL;
+       }
+
+       if (bpf->cmsg_multi_ent)
+               bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf);
+       else
+               bpf->cmsg_cache_cnt = 1;
+
+       return 0;
+}
+
  static int nfp_bpf_init(struct nfp_app *app)
  {
         struct nfp_app_bpf *bpf;
@@ -488,6 +520,7 @@ const struct nfp_app_type app_bpf = {
  
         .init           = nfp_bpf_init,
         .clean          = nfp_bpf_clean,
+       .start          = nfp_bpf_start,
  
         .check_mtu      = nfp_bpf_check_mtu,
  
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h

index 57d6ff51e980c00236886d457c65245c2006bfe3..fac9c6f9e197b44882309b08522187ee0f5eba74 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -99,6 +99,7 @@ enum pkt_vec {
   * @maps_neutral:      hash table of offload-neutral maps (on pointer)
   *
   * @abi_version:       global BPF ABI version
+ * @cmsg_cache_cnt:    number of entries to read for caching
   *
   * @adjust_head:       adjust head capability
   * @adjust_head.flags:         extra flags for adjust head
@@ -124,6 +125,7 @@ enum pkt_vec {
   * @pseudo_random:     FW initialized the pseudo-random machinery (CSRs)
   * @queue_select:      BPF can set the RX queue ID in packet vector
   * @adjust_tail:       BPF can simply trunc packet size for adjust tail
+ * @cmsg_multi_ent:    FW can pack multiple map entries in a single cmsg
   */
  struct nfp_app_bpf {
         struct nfp_app *app;
@@ -134,6 +136,8 @@ struct nfp_app_bpf {
         unsigned int cmsg_key_sz;
         unsigned int cmsg_val_sz;
  
+       unsigned int cmsg_cache_cnt;
+
         struct list_head map_list;
         unsigned int maps_in_use;
         unsigned int map_elems_in_use;
@@ -169,6 +173,7 @@ struct nfp_app_bpf {
         bool pseudo_random;
         bool queue_select;
         bool adjust_tail;
+       bool cmsg_multi_ent;
  };
  
  enum nfp_bpf_map_use {
@@ -183,11 +188,21 @@ struct nfp_bpf_map_word {
         unsigned char non_zero_update   :1;
  };
  
+#define NFP_BPF_MAP_CACHE_CNT          4U
+#define NFP_BPF_MAP_CACHE_TIME_NS      (250 * 1000)
+
  /**
   * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
   * @offmap:    pointer to the offloaded BPF map
   * @bpf:       back pointer to bpf app private structure
   * @tid:       table id identifying map on datapath
+ *
+ * @cache_lock:        protects @cache_blockers, @cache_to, @cache
+ * @cache_blockers:    number of ops in flight which block caching
+ * @cache_gen: counter incremented by every blocker on exit
+ * @cache_to:  time when cache will no longer be valid (ns)
+ * @cache:     skb with cached response
+ *
   * @l:         link on the nfp_app_bpf->map_list list
   * @use_map:   map of how the value is used (in 4B chunks)
   */
@@ -195,6 +210,13 @@ struct nfp_bpf_map {
         struct bpf_offloaded_map *offmap;
         struct nfp_app_bpf *bpf;
         u32 tid;
+
+       spinlock_t cache_lock;
+       u32 cache_blockers;
+       u32 cache_gen;
+       u64 cache_to;
+       struct sk_buff *cache;
+
         struct list_head l;
         struct nfp_bpf_map_word use_map[];
  };
@@ -564,7 +586,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
  
  void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
  
+unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf);
  unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf);
  long long int
  nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
  void
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c

index 39c9fec222b45823eca3772a77b239938188fcd2..88fab6a82acff88716dc70b20ca3b295b257e6c9 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -385,6 +385,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
         offmap->dev_priv = nfp_map;
         nfp_map->offmap = offmap;
         nfp_map->bpf = bpf;
+       spin_lock_init(&nfp_map->cache_lock);
  
         res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
         if (res < 0) {
@@ -407,6 +408,8 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
         struct nfp_bpf_map *nfp_map = offmap->dev_priv;
  
         nfp_bpf_ctrl_free_map(bpf, nfp_map);
+       dev_consume_skb_any(nfp_map->cache);
+       WARN_ON_ONCE(nfp_map->cache_blockers);
         list_del_init(&nfp_map->l);
         bpf->map_elems_in_use -= offmap->map.max_entries;
         bpf->maps_in_use--;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h

index 5d6c3738b4946f5046ebb0b7762f69ea1aaf93a9..250f510b1d212f65c4e8c2d7f9c99f32891c09e3 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -66,7 +66,7 @@
  #define NFP_NET_MAX_DMA_BITS   40
  
  /* Default size for MTU and freelist buffer sizes */
-#define NFP_NET_DEFAULT_MTU            1500
+#define NFP_NET_DEFAULT_MTU            1500U
  
  /* Maximum number of bytes prepended to a packet */
  #define NFP_NET_MAX_PREPEND            64
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 6f97b554f7da7ab49782a8f5cd4b77b7aa48d56f..61aabffc8888d50c4f0cc8dcd14a40594fce61d8 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -4116,14 +4116,7 @@ int nfp_net_init(struct nfp_net *nn)
  
         /* Set default MTU and Freelist buffer size */
         if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
-               if (nn->app->ctrl_mtu <= nn->max_mtu) {
-                       nn->dp.mtu = nn->app->ctrl_mtu;
-               } else {
-                       if (nn->app->ctrl_mtu != NFP_APP_CTRL_MTU_MAX)
-                               nn_warn(nn, "app requested MTU above max supported %u > %u\n",
-                                       nn->app->ctrl_mtu, nn->max_mtu);
-                       nn->dp.mtu = nn->max_mtu;
-               }
+               nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu);
         } else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
                 nn->dp.mtu = nn->max_mtu;
         } else {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index f9a506147c8a4410b122bb6db3525e1672262af3..5b9d2233860654512da6ff5ec8bf56f2343ed722 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -24,6 +24,9 @@ struct seq_file;
  struct btf;
  struct btf_type;
  
+extern struct idr btf_idr;
+extern spinlock_t btf_idr_lock;
+
  /* map is generic key/value storage optionally accesible by eBPF programs */
  struct bpf_map_ops {
         /* funcs callable from userspace (via syscall) */
@@ -647,6 +650,8 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
  struct bpf_map *bpf_map_get_with_uref(u32 ufd);
  struct bpf_map *__bpf_map_get(struct fd f);
  struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map,
+                                                  bool uref);
  void bpf_map_put_with_uref(struct bpf_map *map);
  void bpf_map_put(struct bpf_map *map);
  int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index 5fe99f322b1c5e7d6333ba651a58e1d07995e9a1..26a6d58ca78ccb2a60b91e03471e8dd1eb96df9c 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -355,6 +355,7 @@ struct bpf_verifier_env {
         struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
         int stack_size;                 /* number of states to be processed */
         bool strict_alignment;          /* perform strict pointer alignment checks */
+       bool test_state_freq;           /* test verifier with different pruning frequency */
         struct bpf_verifier_state *cur_state; /* current verifier state */
         struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
         struct bpf_verifier_state_list *free_list;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index b5d28dadf9645d0a694be2c6543712bdeaca52a2..d7d5626002e970e34101436de038a9c6ad4024d5 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -901,6 +901,10 @@ struct netdev_bpf {
         };
  };
  
+/* Flags for ndo_xsk_wakeup. */
+#define XDP_WAKEUP_RX (1 << 0)
+#define XDP_WAKEUP_TX (1 << 1)
+
  #ifdef CONFIG_XFRM_OFFLOAD
  struct xfrmdev_ops {
         int     (*xdo_dev_state_add) (struct xfrm_state *x);
@@ -1227,6 +1231,12 @@ struct tlsdev_ops;
   *     that got dropped are freed/returned via xdp_return_frame().
   *     Returns negative number, means general error invoking ndo, meaning
   *     no frames were xmit'ed and core-caller will free all frames.
+ * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
+ *      This function is used to wake up the softirq, ksoftirqd or kthread
+ *     responsible for sending and/or receiving packets on a specific
+ *     queue id bound to an AF_XDP socket. The flags field specifies if
+ *     only RX, only Tx, or both should be woken up using the flags
+ *     XDP_WAKEUP_RX and XDP_WAKEUP_TX.
   * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev);
   *     Get devlink port instance associated with a given netdev.
   *     Called with a reference on the netdevice and devlink locks only,
@@ -1426,8 +1436,8 @@ struct net_device_ops {
         int                     (*ndo_xdp_xmit)(struct net_device *dev, int n,
                                                 struct xdp_frame **xdp,
                                                 u32 flags);
-       int                     (*ndo_xsk_async_xmit)(struct net_device *dev,
-                                                     u32 queue_id);
+       int                     (*ndo_xsk_wakeup)(struct net_device *dev,
+                                                 u32 queue_id, u32 flags);
         struct devlink_port *   (*ndo_get_devlink_port)(struct net_device *dev);
  };
  
diff --git a/include/linux/tnum.h b/include/linux/tnum.h

index c7dc2b5902c057ee0475f786f576fd921b3a8014..c17af77f3fae7f98814b521b127b2e10945d1e5e 100644 (file)
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -5,6 +5,10 @@
   * propagate the unknown bits such that the tnum result represents all the
   * possible results for possible values of the operands.
   */
+
+#ifndef _LINUX_TNUM_H
+#define _LINUX_TNUM_H
+
  #include <linux/types.h>
  
  struct tnum {
@@ -81,3 +85,5 @@ bool tnum_in(struct tnum a, struct tnum b);
  int tnum_strn(char *str, size_t size, struct tnum a);
  /* Format a tnum as tristate binary expansion */
  int tnum_sbin(char *str, size_t size, struct tnum a);
+
+#endif /* _LINUX_TNUM_H */
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h

index b9dcb02e756b29d080ba9343f7a9e24178cfad67..8e4f831d2e52e59a12aa3acab43c0de85b39d6c7 100644 (file)
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
  extern const struct bpf_func_proto bpf_sk_storage_get_proto;
  extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
  
+#ifdef CONFIG_BPF_SYSCALL
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+#else
+static inline int bpf_sk_storage_clone(const struct sock *sk,
+                                      struct sock *newsk)
+{
+       return 0;
+}
+#endif
+
  #endif /* _BPF_SK_STORAGE_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h

index 69796d264f0638457526345f5cc423bf2c27bf17..c9398ce7960f9e909db8f231a8da08aa38d42025 100644 (file)
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -16,6 +16,13 @@
  struct net_device;
  struct xsk_queue;
  
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
+
  struct xdp_umem_page {
         void *addr;
         dma_addr_t dma;
@@ -27,6 +34,13 @@ struct xdp_umem_fq_reuse {
         u64 handles[];
  };
  
+/* Flags for the umem flags field.
+ *
+ * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
+ * flags. See inlude/uapi/include/linux/if_xdp.h.
+ */
+#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
+
  struct xdp_umem {
         struct xsk_queue *fq;
         struct xsk_queue *cq;
@@ -41,15 +55,27 @@ struct xdp_umem {
         struct work_struct work;
         struct page **pgs;
         u32 npgs;
+       u16 queue_id;
+       u8 need_wakeup;
+       u8 flags;
         int id;
         struct net_device *dev;
         struct xdp_umem_fq_reuse *fq_reuse;
-       u16 queue_id;
         bool zc;
         spinlock_t xsk_list_lock;
         struct list_head xsk_list;
  };
  
+/* Nodes are linked in the struct xdp_sock map_list field, and used to
+ * track which maps a certain socket reside in.
+ */
+struct xsk_map;
+struct xsk_map_node {
+       struct list_head node;
+       struct xsk_map *map;
+       struct xdp_sock **map_entry;
+};
+
  struct xdp_sock {
         /* struct sock must be the first member of struct xdp_sock */
         struct sock sk;
@@ -75,6 +101,9 @@ struct xdp_sock {
         /* Protects generic receive. */
         spinlock_t rx_lock;
         u64 rx_dropped;
+       struct list_head map_list;
+       /* Protects map_list */
+       spinlock_t map_list_lock;
  };
  
  struct xdp_buff;
@@ -95,15 +124,47 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
                                           struct xdp_umem_fq_reuse *newq);
  void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
  struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
+
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+                            struct xdp_sock **map_entry);
+int xsk_map_inc(struct xsk_map *map);
+void xsk_map_put(struct xsk_map *map);
+
+static inline u64 xsk_umem_extract_addr(u64 addr)
+{
+       return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline u64 xsk_umem_extract_offset(u64 addr)
+{
+       return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
+{
+       return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
+}
  
  static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
  {
-       return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
+       unsigned long page_addr;
+
+       addr = xsk_umem_add_offset_to_addr(addr);
+       page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
+
+       return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
  }
  
  static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
  {
-       return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
+       addr = xsk_umem_add_offset_to_addr(addr);
+
+       return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
  }
  
  /* Reuse-queue aware version of FILL queue helpers */
@@ -144,6 +205,19 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
  
         rq->handles[rq->length++] = addr;
  }
+
+/* Handle the offset appropriately depending on aligned or unaligned mode.
+ * For unaligned mode, we store the offset in the upper 16-bits of the address.
+ * For aligned mode, we simply add the offset to the address.
+ */
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
+                                        u64 offset)
+{
+       if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+               return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+       else
+               return address + offset;
+}
  #else
  static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
  {
@@ -213,6 +287,21 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
         return NULL;
  }
  
+static inline u64 xsk_umem_extract_addr(u64 addr)
+{
+       return 0;
+}
+
+static inline u64 xsk_umem_extract_offset(u64 addr)
+{
+       return 0;
+}
+
+static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
+{
+       return 0;
+}
+
  static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
  {
         return NULL;
@@ -241,6 +330,33 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
  {
  }
  
+static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+       return false;
+}
+
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
+                                        u64 offset)
+{
+       return 0;
+}
+
  #endif /* CONFIG_XDP_SOCKETS */
  
  #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 0e66371bea13fdb93411c87aeb08a88615b5f8bb..77c6be96d676222e446d41d2668b40cafb0ef1fe 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -106,6 +106,7 @@ enum bpf_cmd {
         BPF_TASK_FD_QUERY,
         BPF_MAP_LOOKUP_AND_DELETE_ELEM,
         BPF_MAP_FREEZE,
+       BPF_BTF_GET_NEXT_ID,
  };
  
  enum bpf_map_type {
@@ -284,6 +285,9 @@ enum bpf_attach_type {
   */
  #define BPF_F_TEST_RND_HI32    (1U << 2)
  
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ  (1U << 3)
+
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * two extensions:
   *
@@ -337,6 +341,9 @@ enum bpf_attach_type {
  #define BPF_F_RDONLY_PROG      (1U << 7)
  #define BPF_F_WRONLY_PROG      (1U << 8)
  
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE            (1U << 9)
+
  /* flags for BPF_PROG_QUERY */
  #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
  
@@ -576,6 +583,8 @@ union bpf_attr {
   *             limited to five).
   *
   *             Each time the helper is called, it appends a line to the trace.
+ *             Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ *             open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
   *             The format of the trace is customizable, and the exact output
   *             one will get depends on the options set in
   *             *\/sys/kernel/debug/tracing/trace_options* (see also the
@@ -1014,7 +1023,7 @@ union bpf_attr {
   *             The realm of the route for the packet associated to *skb*, or 0
   *             if none was found.
   *
- * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
   *     Description
   *             Write raw *data* blob into a special BPF perf event held by
   *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1076,7 +1085,7 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
   *     Description
   *             Walk a user or a kernel stack and return its id. To achieve
   *             this, the helper needs *ctx*, which is a pointer to the context
@@ -1725,7 +1734,7 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
   *     Description
   *             Used for error injection, this helper uses kprobes to override
   *             the return value of the probed function, and to set it to *rc*.
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h

index faaa5ca2a11767a3cfd967661f21645799b95f9c..be328c59389d56861f95aeb488860ed81ef19e0c 100644 (file)
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -16,6 +16,18 @@
  #define XDP_SHARED_UMEM        (1 << 0)
  #define XDP_COPY       (1 << 1) /* Force copy-mode */
  #define XDP_ZEROCOPY   (1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
+
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
  
  struct sockaddr_xdp {
         __u16 sxdp_family;
@@ -25,10 +37,14 @@ struct sockaddr_xdp {
         __u32 sxdp_shared_umem_fd;
  };
  
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
  struct xdp_ring_offset {
         __u64 producer;
         __u64 consumer;
         __u64 desc;
+       __u64 flags;
  };
  
  struct xdp_mmap_offsets {
@@ -53,6 +69,7 @@ struct xdp_umem_reg {
         __u64 len; /* Length of packet data area */
         __u32 chunk_size;
         __u32 headroom;
+       __u32 flags;
  };
  
  struct xdp_statistics {
@@ -74,6 +91,11 @@ struct xdp_options {
  #define XDP_UMEM_PGOFF_FILL_RING       0x100000000ULL
  #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
  
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+       ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
  /* Rx/Tx descriptor */
  struct xdp_desc {
         __u64 addr;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c

index 5fcc7a17eb5a4d538ffef61335481a262bc23720..adb3adcebe3c5ac0707ed38c8a184504b67aa2f7 100644 (file)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -195,8 +195,8 @@
              i < btf_type_vlen(struct_type);                                    \
              i++, member++)
  
-static DEFINE_IDR(btf_idr);
-static DEFINE_SPINLOCK(btf_idr_lock);
+DEFINE_IDR(btf_idr);
+DEFINE_SPINLOCK(btf_idr_lock);
  
  struct btf {
         void *data;
@@ -3376,6 +3376,15 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
         btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m);
  }
  
+#ifdef CONFIG_PROC_FS
+static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+       const struct btf *btf = filp->private_data;
+
+       seq_printf(m, "btf_id:\t%u\n", btf->id);
+}
+#endif
+
  static int btf_release(struct inode *inode, struct file *filp)
  {
         btf_put(filp->private_data);
@@ -3383,6 +3392,9 @@ static int btf_release(struct inode *inode, struct file *filp)
  }
  
  const struct file_operations btf_fops = {
+#ifdef CONFIG_PROC_FS
+       .show_fdinfo    = bpf_btf_show_fdinfo,
+#endif
         .release        = btf_release,
  };
  
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 272071e9112f3bc7ccfb111fc734238f9561ffde..82eabd4e38adda6e95f8113d9a7f9bb61b45ab00 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -683,8 +683,8 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
  }
  
  /* map_idr_lock should have been held */
-static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
-                                           bool uref)
+static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map,
+                                             bool uref)
  {
         int refold;
  
@@ -704,6 +704,16 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
         return map;
  }
  
+struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+{
+       spin_lock_bh(&map_idr_lock);
+       map = __bpf_map_inc_not_zero(map, uref);
+       spin_unlock_bh(&map_idr_lock);
+
+       return map;
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
+
  int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
  {
         return -ENOTSUPP;
@@ -1619,6 +1629,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
  
         if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
                                  BPF_F_ANY_ALIGNMENT |
+                                BPF_F_TEST_STATE_FREQ |
                                  BPF_F_TEST_RND_HI32))
                 return -EINVAL;
  
@@ -2183,7 +2194,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
         spin_lock_bh(&map_idr_lock);
         map = idr_find(&map_idr, id);
         if (map)
-               map = bpf_map_inc_not_zero(map, true);
+               map = __bpf_map_inc_not_zero(map, true);
         else
                 map = ERR_PTR(-ENOENT);
         spin_unlock_bh(&map_idr_lock);
@@ -2880,6 +2891,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
                 err = bpf_obj_get_next_id(&attr, uattr,
                                           &map_idr, &map_idr_lock);
                 break;
+       case BPF_BTF_GET_NEXT_ID:
+               err = bpf_obj_get_next_id(&attr, uattr,
+                                         &btf_idr, &btf_idr_lock);
+               break;
         case BPF_PROG_GET_FD_BY_ID:
                 err = bpf_prog_get_fd_by_id(&attr);
                 break;
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c

index 4659349fc7953c5481701e4740f1bf46321b9342..7ae5dddd1fe6a56cd335d659c9a74b6dacef9d1c 100644 (file)
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -30,17 +30,12 @@ static struct kobject *btf_kobj;
  
  static int __init btf_vmlinux_init(void)
  {
-       int err;
-
         if (!_binary__btf_vmlinux_bin_start)
                 return 0;
  
         btf_kobj = kobject_create_and_add("btf", kernel_kobj);
-       if (IS_ERR(btf_kobj)) {
-               err = PTR_ERR(btf_kobj);
-               btf_kobj = NULL;
-               return err;
-       }
+       if (!btf_kobj)
+               return -ENOMEM;
  
         bin_attr_btf_vmlinux.size = _binary__btf_vmlinux_bin_end -
                                     _binary__btf_vmlinux_bin_start;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 16d66bd7af09fb71e63f8520489d05680b4e2772..3fb50757e8124fc7d45b5368223197e3fdf9d0bf 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7223,7 +7223,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
         struct bpf_verifier_state_list *sl, **pprev;
         struct bpf_verifier_state *cur = env->cur_state, *new;
         int i, j, err, states_cnt = 0;
-       bool add_new_state = false;
+       bool add_new_state = env->test_state_freq ? true : false;
  
         cur->last_insn_idx = env->prev_insn_idx;
         if (!env->insn_aux_data[insn_idx].prune_point)
@@ -9263,6 +9263,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
  
         env->allow_ptr_leaks = is_priv;
  
+       if (is_priv)
+               env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
+
         ret = replace_map_fd_with_map_ptr(env);
         if (ret < 0)
                 goto skip_full_check;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c

index 9bb96ace9fa126762af4761cb556937cefe880bd..942c662e2eed77c4b78bd66c34057114e946fcde 100644 (file)
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -13,8 +13,71 @@ struct xsk_map {
         struct bpf_map map;
         struct xdp_sock **xsk_map;
         struct list_head __percpu *flush_list;
+       spinlock_t lock; /* Synchronize map updates */
  };
  
+int xsk_map_inc(struct xsk_map *map)
+{
+       struct bpf_map *m = &map->map;
+
+       m = bpf_map_inc(m, false);
+       return PTR_ERR_OR_ZERO(m);
+}
+
+void xsk_map_put(struct xsk_map *map)
+{
+       bpf_map_put(&map->map);
+}
+
+static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
+                                              struct xdp_sock **map_entry)
+{
+       struct xsk_map_node *node;
+       int err;
+
+       node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
+       if (!node)
+               return NULL;
+
+       err = xsk_map_inc(map);
+       if (err) {
+               kfree(node);
+               return ERR_PTR(err);
+       }
+
+       node->map = map;
+       node->map_entry = map_entry;
+       return node;
+}
+
+static void xsk_map_node_free(struct xsk_map_node *node)
+{
+       xsk_map_put(node->map);
+       kfree(node);
+}
+
+static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
+{
+       spin_lock_bh(&xs->map_list_lock);
+       list_add_tail(&node->node, &xs->map_list);
+       spin_unlock_bh(&xs->map_list_lock);
+}
+
+static void xsk_map_sock_delete(struct xdp_sock *xs,
+                               struct xdp_sock **map_entry)
+{
+       struct xsk_map_node *n, *tmp;
+
+       spin_lock_bh(&xs->map_list_lock);
+       list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
+               if (map_entry == n->map_entry) {
+                       list_del(&n->node);
+                       xsk_map_node_free(n);
+               }
+       }
+       spin_unlock_bh(&xs->map_list_lock);
+}
+
  static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
  {
         struct xsk_map *m;
@@ -34,6 +97,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
                 return ERR_PTR(-ENOMEM);
  
         bpf_map_init_from_attr(&m->map, attr);
+       spin_lock_init(&m->lock);
  
         cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
         cost += sizeof(struct list_head) * num_possible_cpus();
@@ -71,21 +135,9 @@ free_m:
  static void xsk_map_free(struct bpf_map *map)
  {
         struct xsk_map *m = container_of(map, struct xsk_map, map);
-       int i;
  
         bpf_clear_redirect_map(map);
         synchronize_net();
-
-       for (i = 0; i < map->max_entries; i++) {
-               struct xdp_sock *xs;
-
-               xs = m->xsk_map[i];
-               if (!xs)
-                       continue;
-
-               sock_put((struct sock *)xs);
-       }
-
         free_percpu(m->flush_list);
         bpf_map_area_free(m->xsk_map);
         kfree(m);
@@ -164,8 +216,9 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
                                u64 map_flags)
  {
         struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct xdp_sock *xs, *old_xs, **map_entry;
         u32 i = *(u32 *)key, fd = *(u32 *)value;
-       struct xdp_sock *xs, *old_xs;
+       struct xsk_map_node *node;
         struct socket *sock;
         int err;
  
@@ -173,8 +226,6 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
                 return -EINVAL;
         if (unlikely(i >= m->map.max_entries))
                 return -E2BIG;
-       if (unlikely(map_flags == BPF_NOEXIST))
-               return -EEXIST;
  
         sock = sockfd_lookup(fd, &err);
         if (!sock)
@@ -192,32 +243,70 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
                 return -EOPNOTSUPP;
         }
  
-       sock_hold(sock->sk);
+       map_entry = &m->xsk_map[i];
+       node = xsk_map_node_alloc(m, map_entry);
+       if (IS_ERR(node)) {
+               sockfd_put(sock);
+               return PTR_ERR(node);
+       }
  
-       old_xs = xchg(&m->xsk_map[i], xs);
+       spin_lock_bh(&m->lock);
+       old_xs = READ_ONCE(*map_entry);
+       if (old_xs == xs) {
+               err = 0;
+               goto out;
+       } else if (old_xs && map_flags == BPF_NOEXIST) {
+               err = -EEXIST;
+               goto out;
+       } else if (!old_xs && map_flags == BPF_EXIST) {
+               err = -ENOENT;
+               goto out;
+       }
+       xsk_map_sock_add(xs, node);
+       WRITE_ONCE(*map_entry, xs);
         if (old_xs)
-               sock_put((struct sock *)old_xs);
-
+               xsk_map_sock_delete(old_xs, map_entry);
+       spin_unlock_bh(&m->lock);
         sockfd_put(sock);
         return 0;
+
+out:
+       spin_unlock_bh(&m->lock);
+       sockfd_put(sock);
+       xsk_map_node_free(node);
+       return err;
  }
  
  static int xsk_map_delete_elem(struct bpf_map *map, void *key)
  {
         struct xsk_map *m = container_of(map, struct xsk_map, map);
-       struct xdp_sock *old_xs;
+       struct xdp_sock *old_xs, **map_entry;
         int k = *(u32 *)key;
  
         if (k >= map->max_entries)
                 return -EINVAL;
  
-       old_xs = xchg(&m->xsk_map[k], NULL);
+       spin_lock_bh(&m->lock);
+       map_entry = &m->xsk_map[k];
+       old_xs = xchg(map_entry, NULL);
         if (old_xs)
-               sock_put((struct sock *)old_xs);
+               xsk_map_sock_delete(old_xs, map_entry);
+       spin_unlock_bh(&m->lock);
  
         return 0;
  }
  
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+                            struct xdp_sock **map_entry)
+{
+       spin_lock_bh(&map->lock);
+       if (READ_ONCE(*map_entry) == xs) {
+               WRITE_ONCE(*map_entry, NULL);
+               xsk_map_sock_delete(xs, map_entry);
+       }
+       spin_unlock_bh(&map->lock);
+}
+
  const struct bpf_map_ops xsk_map_ops = {
         .map_alloc = xsk_map_alloc,
         .map_free = xsk_map_free,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index 98da8998c25ce406a2b0c9620f7295231120ae27..b09d7b1ffffdbb518f255f8b4db8dbb0eb7f172d 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -520,7 +520,8 @@ config BPF_EVENTS
         bool
         default y
         help
-         This allows the user to attach BPF programs to kprobe events.
+         This allows the user to attach BPF programs to kprobe, uprobe, and
+         tracepoint events.
  
  config DYNAMIC_EVENTS
         def_bool n
diff --git a/lib/test_bpf.c b/lib/test_bpf.c

index c41705835cbabbbb3e12dd9fb56f83ae20f7c789..5ef3eccee27cbd9e3c8c17b9d92cc3bbc4b10f92 100644 (file)
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -867,7 +867,7 @@ static struct bpf_test tests[] = {
                 },
                 CLASSIC,
                 { },
-               { { 4, 10 ^ 300 }, { 20, 10 ^ 300 } },
+               { { 4, 0xA ^ 300 }, { 20, 0xA ^ 300 } },
         },
         {
                 "SPILL_FILL",
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c

index 94c7f77ecb6b66b388dcc23c151d487e614196d1..da5639a5bd3b950ce0c65a8f0d43bf465b8819a2 100644 (file)
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
  
  static atomic_t cache_idx;
  
+#define SK_STORAGE_CREATE_FLAG_MASK                                    \
+       (BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
  struct bucket {
         struct hlist_head list;
         raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
                 kfree_rcu(sk_storage, rcu);
  }
  
-/* sk_storage->lock must be held and sk_storage->list cannot be empty */
  static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
                             struct bpf_sk_storage_elem *selem)
  {
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
         return 0;
  }
  
-/* Called by __sk_destruct() */
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
  void bpf_sk_storage_free(struct sock *sk)
  {
         struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
  
         smap = (struct bpf_sk_storage_map *)map;
  
+       /* Note that this map might be concurrently cloned from
+        * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+        * RCU read section to finish before proceeding. New RCU
+        * read sections should be prevented via bpf_map_inc_not_zero.
+        */
         synchronize_rcu();
  
         /* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
  
  static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
  {
-       if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
+       if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+           !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+           attr->max_entries ||
             attr->key_size != sizeof(int) || !attr->value_size ||
             /* Enforce BTF for userspace sk dumping */
             !attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
         return err;
  }
  
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+                         struct bpf_sk_storage_map *smap,
+                         struct bpf_sk_storage_elem *selem)
+{
+       struct bpf_sk_storage_elem *copy_selem;
+
+       copy_selem = selem_alloc(smap, newsk, NULL, true);
+       if (!copy_selem)
+               return NULL;
+
+       if (map_value_has_spin_lock(&smap->map))
+               copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+                                     SDATA(selem)->data, true);
+       else
+               copy_map_value(&smap->map, SDATA(copy_selem)->data,
+                              SDATA(selem)->data);
+
+       return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+       struct bpf_sk_storage *new_sk_storage = NULL;
+       struct bpf_sk_storage *sk_storage;
+       struct bpf_sk_storage_elem *selem;
+       int ret = 0;
+
+       RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+       rcu_read_lock();
+       sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+       if (!sk_storage || hlist_empty(&sk_storage->list))
+               goto out;
+
+       hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+               struct bpf_sk_storage_elem *copy_selem;
+               struct bpf_sk_storage_map *smap;
+               struct bpf_map *map;
+
+               smap = rcu_dereference(SDATA(selem)->smap);
+               if (!(smap->map.map_flags & BPF_F_CLONE))
+                       continue;
+
+               /* Note that for lockless listeners adding new element
+                * here can race with cleanup in bpf_sk_storage_map_free.
+                * Try to grab map refcnt to make sure that it's still
+                * alive and prevent concurrent removal.
+                */
+               map = bpf_map_inc_not_zero(&smap->map, false);
+               if (IS_ERR(map))
+                       continue;
+
+               copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+               if (!copy_selem) {
+                       ret = -ENOMEM;
+                       bpf_map_put(map);
+                       goto out;
+               }
+
+               if (new_sk_storage) {
+                       selem_link_map(smap, copy_selem);
+                       __selem_link_sk(new_sk_storage, copy_selem);
+               } else {
+                       ret = sk_storage_alloc(newsk, smap, copy_selem);
+                       if (ret) {
+                               kfree(copy_selem);
+                               atomic_sub(smap->elem_size,
+                                          &newsk->sk_omem_alloc);
+                               bpf_map_put(map);
+                               goto out;
+                       }
+
+                       new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+               }
+               bpf_map_put(map);
+       }
+
+out:
+       rcu_read_unlock();
+
+       /* In case of an error, don't free anything explicitly here, the
+        * caller is responsible to call bpf_sk_storage_free.
+        */
+
+       return ret;
+}
+
  BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
            void *, value, u64, flags)
  {
diff --git a/net/core/dev.c b/net/core/dev.c

index 49589ed2018df1d8552afeaa0c18b53f81a0be29..b1afafee3e2acb57f732032b087d4689c090e105 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8126,12 +8126,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                 bpf_chk = generic_xdp_install;
  
         if (fd >= 0) {
+               u32 prog_id;
+
                 if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
                         NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
                         return -EEXIST;
                 }
-               if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-                   __dev_xdp_query(dev, bpf_op, query)) {
+
+               prog_id = __dev_xdp_query(dev, bpf_op, query);
+               if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
                         NL_SET_ERR_MSG(extack, "XDP program already attached");
                         return -EBUSY;
                 }
@@ -8146,6 +8149,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                         bpf_prog_put(prog);
                         return -EINVAL;
                 }
+
+               if (prog->aux->id == prog_id) {
+                       bpf_prog_put(prog);
+                       return 0;
+               }
+       } else {
+               if (!__dev_xdp_query(dev, bpf_op, query))
+                       return 0;
         }
  
         err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
diff --git a/net/core/filter.c b/net/core/filter.c

index b91988f8b94e580d6ed9bd5b61d180a475e785cb..ed6563622ce31dcced4e6ba622770e26f1f7756a 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5903,7 +5903,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
         default:
                 return -EPROTONOSUPPORT;
         }
-       if (mss <= 0)
+       if (mss == 0)
                 return -ENOENT;
  
         return cookie | ((u64)mss << 32);
diff --git a/net/core/sock.c b/net/core/sock.c

index 545fac19a711f261fc6cdbdb54a3f08cb9705987..07863edbe6fc4842e47ebebf00bc21bc406d9264 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                         goto out;
                 }
                 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
-#ifdef CONFIG_BPF_SYSCALL
-               RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
-#endif
+
+               if (bpf_sk_storage_clone(sk, newsk)) {
+                       sk_free_unlock_clone(newsk);
+                       newsk = NULL;
+                       goto out;
+               }
  
                 newsk->sk_err      = 0;
                 newsk->sk_err_soft = 0;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c

index 0e0062127124762e3975ea72fee4d3ed8ed37720..947b8ff0227e64ad190116178a2e6c9c4a154102 100644 (file)
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -14,7 +14,7 @@
  #include <linux/netdevice.h>
  #include <linux/rtnetlink.h>
  #include <linux/idr.h>
-#include <linux/highmem.h>
+#include <linux/vmalloc.h>
  
  #include "xdp_umem.h"
  #include "xsk_queue.h"
@@ -106,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
         umem->dev = dev;
         umem->queue_id = queue_id;
  
+       if (flags & XDP_USE_NEED_WAKEUP) {
+               umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
+               /* Tx needs to be explicitly woken up the first time.
+                * Also for supporting drivers that do not implement this
+                * feature. They will always have to call sendto().
+                */
+               xsk_set_tx_need_wakeup(umem);
+       }
+
         dev_hold(dev);
  
         if (force_copy)
                 /* For copy-mode, we are done. */
                 return 0;
  
-       if (!dev->netdev_ops->ndo_bpf ||
-           !dev->netdev_ops->ndo_xsk_async_xmit) {
+       if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
                 err = -EOPNOTSUPP;
                 goto err_unreg_umem;
         }
@@ -170,7 +178,30 @@ static void xdp_umem_unmap_pages(struct xdp_umem *umem)
         unsigned int i;
  
         for (i = 0; i < umem->npgs; i++)
-               kunmap(umem->pgs[i]);
+               if (PageHighMem(umem->pgs[i]))
+                       vunmap(umem->pages[i].addr);
+}
+
+static int xdp_umem_map_pages(struct xdp_umem *umem)
+{
+       unsigned int i;
+       void *addr;
+
+       for (i = 0; i < umem->npgs; i++) {
+               if (PageHighMem(umem->pgs[i]))
+                       addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
+               else
+                       addr = page_address(umem->pgs[i]);
+
+               if (!addr) {
+                       xdp_umem_unmap_pages(umem);
+                       return -ENOMEM;
+               }
+
+               umem->pages[i].addr = addr;
+       }
+
+       return 0;
  }
  
  static void xdp_umem_unpin_pages(struct xdp_umem *umem)
@@ -309,10 +340,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
  
  static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
  {
+       bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
         u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
         unsigned int chunks, chunks_per_page;
         u64 addr = mr->addr, size = mr->len;
-       int size_chk, err, i;
+       int size_chk, err;
  
         if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
                 /* Strictly speaking we could support this, if:
@@ -324,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
                 return -EINVAL;
         }
  
-       if (!is_power_of_2(chunk_size))
+       if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
+                       XDP_UMEM_USES_NEED_WAKEUP))
+               return -EINVAL;
+
+       if (!unaligned_chunks && !is_power_of_2(chunk_size))
                 return -EINVAL;
  
         if (!PAGE_ALIGNED(addr)) {
@@ -341,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
         if (chunks == 0)
                 return -EINVAL;
  
-       chunks_per_page = PAGE_SIZE / chunk_size;
-       if (chunks < chunks_per_page || chunks % chunks_per_page)
-               return -EINVAL;
+       if (!unaligned_chunks) {
+               chunks_per_page = PAGE_SIZE / chunk_size;
+               if (chunks < chunks_per_page || chunks % chunks_per_page)
+                       return -EINVAL;
+       }
  
         headroom = ALIGN(headroom, 64);
  
@@ -352,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
                 return -EINVAL;
  
         umem->address = (unsigned long)addr;
-       umem->chunk_mask = ~((u64)chunk_size - 1);
+       umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
+                                           : ~((u64)chunk_size - 1);
         umem->size = size;
         umem->headroom = headroom;
         umem->chunk_size_nohr = chunk_size - headroom;
         umem->npgs = size / PAGE_SIZE;
         umem->pgs = NULL;
         umem->user = NULL;
+       umem->flags = mr->flags;
         INIT_LIST_HEAD(&umem->xsk_list);
         spin_lock_init(&umem->xsk_list_lock);
  
@@ -378,10 +418,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
                 goto out_pin;
         }
  
-       for (i = 0; i < umem->npgs; i++)
-               umem->pages[i].addr = kmap(umem->pgs[i]);
+       err = xdp_umem_map_pages(umem);
+       if (!err)
+               return 0;
  
-       return 0;
+       kfree(umem->pages);
  
  out_pin:
         xdp_umem_unpin_pages(umem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c

index 59b57d7086970b53b140bc500c881ab9cf1fb98b..c2f1af3b6a7c4ec2aed2beab304e0692fb462535 100644 (file)
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);
  
  u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
  {
-       return xskq_peek_addr(umem->fq, addr);
+       return xskq_peek_addr(umem->fq, addr, umem);
  }
  EXPORT_SYMBOL(xsk_umem_peek_addr);
  
@@ -55,21 +55,103 @@ void xsk_umem_discard_addr(struct xdp_umem *umem)
  }
  EXPORT_SYMBOL(xsk_umem_discard_addr);
  
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+       if (umem->need_wakeup & XDP_WAKEUP_RX)
+               return;
+
+       umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+       umem->need_wakeup |= XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
+
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+       struct xdp_sock *xs;
+
+       if (umem->need_wakeup & XDP_WAKEUP_TX)
+               return;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+               xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
+       }
+       rcu_read_unlock();
+
+       umem->need_wakeup |= XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
+
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+       if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+               return;
+
+       umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+       umem->need_wakeup &= ~XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
+
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+       struct xdp_sock *xs;
+
+       if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+               return;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+               xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+       }
+       rcu_read_unlock();
+
+       umem->need_wakeup &= ~XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
+
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+       return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+}
+EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
+
+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
+ * each page. This is only required in copy mode.
+ */
+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
+                            u32 len, u32 metalen)
+{
+       void *to_buf = xdp_umem_get_data(umem, addr);
+
+       addr = xsk_umem_add_offset_to_addr(addr);
+       if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+               void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
+               u64 page_start = addr & ~(PAGE_SIZE - 1);
+               u64 first_len = PAGE_SIZE - (addr - page_start);
+
+               memcpy(to_buf, from_buf, first_len + metalen);
+               memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+
+               return;
+       }
+
+       memcpy(to_buf, from_buf, len + metalen);
+}
+
  static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
  {
-       void *to_buf, *from_buf;
+       u64 offset = xs->umem->headroom;
+       u64 addr, memcpy_addr;
+       void *from_buf;
         u32 metalen;
-       u64 addr;
         int err;
  
-       if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+       if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
             len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
                 xs->rx_dropped++;
                 return -ENOSPC;
         }
  
-       addr += xs->umem->headroom;
-
         if (unlikely(xdp_data_meta_unsupported(xdp))) {
                 from_buf = xdp->data;
                 metalen = 0;
@@ -78,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
                 metalen = xdp->data - xdp->data_meta;
         }
  
-       to_buf = xdp_umem_get_data(xs->umem, addr);
-       memcpy(to_buf, from_buf, len + metalen);
-       addr += metalen;
+       memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
+       __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+
+       offset += metalen;
+       addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
         err = xskq_produce_batch_desc(xs->rx, addr, len);
         if (!err) {
                 xskq_discard_addr(xs->umem->fq);
@@ -102,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
         return err;
  }
  
+static bool xsk_is_bound(struct xdp_sock *xs)
+{
+       if (READ_ONCE(xs->state) == XSK_BOUND) {
+               /* Matches smp_wmb() in bind(). */
+               smp_rmb();
+               return true;
+       }
+       return false;
+}
+
  int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
  {
         u32 len;
  
+       if (!xsk_is_bound(xs))
+               return -EINVAL;
+
         if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
                 return -EINVAL;
  
@@ -125,6 +222,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
  {
         u32 metalen = xdp->data - xdp->data_meta;
         u32 len = xdp->data_end - xdp->data;
+       u64 offset = xs->umem->headroom;
         void *buffer;
         u64 addr;
         int err;
@@ -136,17 +234,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
                 goto out_unlock;
         }
  
-       if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+       if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
             len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
                 err = -ENOSPC;
                 goto out_drop;
         }
  
-       addr += xs->umem->headroom;
-
+       addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
         buffer = xdp_umem_get_data(xs->umem, addr);
         memcpy(buffer, xdp->data_meta, len + metalen);
-       addr += metalen;
+
+       addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
         err = xskq_produce_batch_desc(xs->rx, addr, len);
         if (err)
                 goto out_drop;
@@ -190,7 +288,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
  
         rcu_read_lock();
         list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
-               if (!xskq_peek_desc(xs->tx, desc))
+               if (!xskq_peek_desc(xs->tx, desc, umem))
                         continue;
  
                 if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -212,7 +310,8 @@ static int xsk_zc_xmit(struct sock *sk)
         struct xdp_sock *xs = xdp_sk(sk);
         struct net_device *dev = xs->dev;
  
-       return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+       return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+                                              XDP_WAKEUP_TX);
  }
  
  static void xsk_destruct_skb(struct sk_buff *skb)
@@ -243,7 +342,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
         if (xs->queue_id >= xs->dev->real_num_tx_queues)
                 goto out;
  
-       while (xskq_peek_desc(xs->tx, &desc)) {
+       while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
                 char *buffer;
                 u64 addr;
                 u32 len;
@@ -272,7 +371,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
                 skb->dev = xs->dev;
                 skb->priority = sk->sk_priority;
                 skb->mark = sk->sk_mark;
-               skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+               skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
                 skb->destructor = xsk_destruct_skb;
  
                 err = dev_direct_xmit(skb, xs->queue_id);
@@ -301,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
         struct sock *sk = sock->sk;
         struct xdp_sock *xs = xdp_sk(sk);
  
-       if (unlikely(!xs->dev))
+       if (unlikely(!xsk_is_bound(xs)))
                 return -ENXIO;
         if (unlikely(!(xs->dev->flags & IFF_UP)))
                 return -ENETDOWN;
@@ -317,8 +416,19 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
                              struct poll_table_struct *wait)
  {
         unsigned int mask = datagram_poll(file, sock, wait);
-       struct sock *sk = sock->sk;
-       struct xdp_sock *xs = xdp_sk(sk);
+       struct xdp_sock *xs = xdp_sk(sock->sk);
+       struct net_device *dev;
+       struct xdp_umem *umem;
+
+       if (unlikely(!xsk_is_bound(xs)))
+               return mask;
+
+       dev = xs->dev;
+       umem = xs->umem;
+
+       if (umem->need_wakeup)
+               dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+                                               umem->need_wakeup);
  
         if (xs->rx && !xskq_empty_desc(xs->rx))
                 mask |= POLLIN | POLLRDNORM;
@@ -342,7 +452,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
  
         /* Make sure queue is ready before it can be seen by others */
         smp_wmb();
-       *queue = q;
+       WRITE_ONCE(*queue, q);
         return 0;
  }
  
@@ -350,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
  {
         struct net_device *dev = xs->dev;
  
-       if (!dev || xs->state != XSK_BOUND)
+       if (xs->state != XSK_BOUND)
                 return;
-
-       xs->state = XSK_UNBOUND;
+       WRITE_ONCE(xs->state, XSK_UNBOUND);
  
         /* Wait for driver to stop using the xdp socket. */
         xdp_del_sk_umem(xs->umem, xs);
@@ -362,6 +471,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
         dev_put(dev);
  }
  
+static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
+                                             struct xdp_sock ***map_entry)
+{
+       struct xsk_map *map = NULL;
+       struct xsk_map_node *node;
+
+       *map_entry = NULL;
+
+       spin_lock_bh(&xs->map_list_lock);
+       node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
+                                       node);
+       if (node) {
+               WARN_ON(xsk_map_inc(node->map));
+               map = node->map;
+               *map_entry = node->map_entry;
+       }
+       spin_unlock_bh(&xs->map_list_lock);
+       return map;
+}
+
+static void xsk_delete_from_maps(struct xdp_sock *xs)
+{
+       /* This function removes the current XDP socket from all the
+        * maps it resides in. We need to take extra care here, due to
+        * the two locks involved. Each map has a lock synchronizing
+        * updates to the entries, and each socket has a lock that
+        * synchronizes access to the list of maps (map_list). For
+        * deadlock avoidance the locks need to be taken in the order
+        * "map lock"->"socket map list lock". We start off by
+        * accessing the socket map list, and take a reference to the
+        * map to guarantee existence between the
+        * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
+        * calls. Then we ask the map to remove the socket, which
+        * tries to remove the socket from the map. Note that there
+        * might be updates to the map between
+        * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
+        */
+       struct xdp_sock **map_entry = NULL;
+       struct xsk_map *map;
+
+       while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
+               xsk_map_try_sock_delete(map, xs, map_entry);
+               xsk_map_put(map);
+       }
+}
+
  static int xsk_release(struct socket *sock)
  {
         struct sock *sk = sock->sk;
@@ -381,7 +536,10 @@ static int xsk_release(struct socket *sock)
         sock_prot_inuse_add(net, sk->sk_prot, -1);
         local_bh_enable();
  
+       xsk_delete_from_maps(xs);
+       mutex_lock(&xs->mutex);
         xsk_unbind_dev(xs);
+       mutex_unlock(&xs->mutex);
  
         xskq_destroy(xs->rx);
         xskq_destroy(xs->tx);
@@ -412,6 +570,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
         return sock;
  }
  
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ * Store the result in the low bits of addr.
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+       struct xdp_umem_page *pgs = umem->pages;
+       int i, is_contig;
+
+       for (i = 0; i < umem->npgs - 1; i++) {
+               is_contig = (flags & XDP_ZEROCOPY) ?
+                       (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
+                       (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
+               pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
+       }
+}
+
  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
  {
         struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -427,7 +603,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                 return -EINVAL;
  
         flags = sxdp->sxdp_flags;
-       if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+       if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
+                     XDP_USE_NEED_WAKEUP))
                 return -EINVAL;
  
         rtnl_lock();
@@ -454,7 +631,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                 struct xdp_sock *umem_xs;
                 struct socket *sock;
  
-               if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+               if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
+                   (flags & XDP_USE_NEED_WAKEUP)) {
                         /* Cannot specify flags for shared sockets. */
                         err = -EINVAL;
                         goto out_unlock;
@@ -473,19 +651,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                 }
  
                 umem_xs = xdp_sk(sock->sk);
-               if (!umem_xs->umem) {
-                       /* No umem to inherit. */
+               if (!xsk_is_bound(umem_xs)) {
                         err = -EBADF;
                         sockfd_put(sock);
                         goto out_unlock;
-               } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
+               }
+               if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
                         err = -EINVAL;
                         sockfd_put(sock);
                         goto out_unlock;
                 }
  
                 xdp_get_umem(umem_xs->umem);
-               xs->umem = umem_xs->umem;
+               WRITE_ONCE(xs->umem, umem_xs->umem);
                 sockfd_put(sock);
         } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
                 err = -EINVAL;
@@ -500,6 +678,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                 err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
                 if (err)
                         goto out_unlock;
+
+               xsk_check_page_contiguity(xs->umem, flags);
         }
  
         xs->dev = dev;
@@ -510,16 +690,28 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
         xdp_add_sk_umem(xs->umem, xs);
  
  out_unlock:
-       if (err)
+       if (err) {
                 dev_put(dev);
-       else
-               xs->state = XSK_BOUND;
+       } else {
+               /* Matches smp_rmb() in bind() for shared umem
+                * sockets, and xsk_is_bound().
+                */
+               smp_wmb();
+               WRITE_ONCE(xs->state, XSK_BOUND);
+       }
  out_release:
         mutex_unlock(&xs->mutex);
         rtnl_unlock();
         return err;
  }
  
+struct xdp_umem_reg_v1 {
+       __u64 addr; /* Start of packet data area */
+       __u64 len; /* Length of packet data area */
+       __u32 chunk_size;
+       __u32 headroom;
+};
+
  static int xsk_setsockopt(struct socket *sock, int level, int optname,
                           char __user *optval, unsigned int optlen)
  {
@@ -549,15 +741,24 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
                 }
                 q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
                 err = xsk_init_queue(entries, q, false);
+               if (!err && optname == XDP_TX_RING)
+                       /* Tx needs to be explicitly woken up the first time */
+                       xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
                 mutex_unlock(&xs->mutex);
                 return err;
         }
         case XDP_UMEM_REG:
         {
-               struct xdp_umem_reg mr;
+               size_t mr_size = sizeof(struct xdp_umem_reg);
+               struct xdp_umem_reg mr = {};
                 struct xdp_umem *umem;
  
-               if (copy_from_user(&mr, optval, sizeof(mr)))
+               if (optlen < sizeof(struct xdp_umem_reg_v1))
+                       return -EINVAL;
+               else if (optlen < sizeof(mr))
+                       mr_size = sizeof(struct xdp_umem_reg_v1);
+
+               if (copy_from_user(&mr, optval, mr_size))
                         return -EFAULT;
  
                 mutex_lock(&xs->mutex);
@@ -574,7 +775,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
  
                 /* Make sure umem is ready before it can be seen by others */
                 smp_wmb();
-               xs->umem = umem;
+               WRITE_ONCE(xs->umem, umem);
                 mutex_unlock(&xs->mutex);
                 return 0;
         }
@@ -610,6 +811,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
         return -ENOPROTOOPT;
  }
  
+static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
+{
+       ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+       ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+       ring->desc = offsetof(struct xdp_rxtx_ring, desc);
+}
+
+static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
+{
+       ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+       ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+       ring->desc = offsetof(struct xdp_umem_ring, desc);
+}
+
  static int xsk_getsockopt(struct socket *sock, int level, int optname,
                           char __user *optval, int __user *optlen)
  {
@@ -649,26 +864,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
         case XDP_MMAP_OFFSETS:
         {
                 struct xdp_mmap_offsets off;
+               struct xdp_mmap_offsets_v1 off_v1;
+               bool flags_supported = true;
+               void *to_copy;
  
-               if (len < sizeof(off))
+               if (len < sizeof(off_v1))
                         return -EINVAL;
+               else if (len < sizeof(off))
+                       flags_supported = false;
+
+               if (flags_supported) {
+                       /* xdp_ring_offset is identical to xdp_ring_offset_v1
+                        * except for the flags field added to the end.
+                        */
+                       xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+                                              &off.rx);
+                       xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+                                              &off.tx);
+                       xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+                                              &off.fr);
+                       xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+                                              &off.cr);
+                       off.rx.flags = offsetof(struct xdp_rxtx_ring,
+                                               ptrs.flags);
+                       off.tx.flags = offsetof(struct xdp_rxtx_ring,
+                                               ptrs.flags);
+                       off.fr.flags = offsetof(struct xdp_umem_ring,
+                                               ptrs.flags);
+                       off.cr.flags = offsetof(struct xdp_umem_ring,
+                                               ptrs.flags);
+
+                       len = sizeof(off);
+                       to_copy = &off;
+               } else {
+                       xsk_enter_rxtx_offsets(&off_v1.rx);
+                       xsk_enter_rxtx_offsets(&off_v1.tx);
+                       xsk_enter_umem_offsets(&off_v1.fr);
+                       xsk_enter_umem_offsets(&off_v1.cr);
+
+                       len = sizeof(off_v1);
+                       to_copy = &off_v1;
+               }
  
-               off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
-               off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
-               off.rx.desc     = offsetof(struct xdp_rxtx_ring, desc);
-               off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
-               off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
-               off.tx.desc     = offsetof(struct xdp_rxtx_ring, desc);
-
-               off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
-               off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
-               off.fr.desc     = offsetof(struct xdp_umem_ring, desc);
-               off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
-               off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
-               off.cr.desc     = offsetof(struct xdp_umem_ring, desc);
-
-               len = sizeof(off);
-               if (copy_to_user(optval, &off, len))
+               if (copy_to_user(optval, to_copy, len))
                         return -EFAULT;
                 if (put_user(len, optlen))
                         return -EFAULT;
@@ -713,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
         unsigned long pfn;
         struct page *qpg;
  
-       if (xs->state != XSK_READY)
+       if (READ_ONCE(xs->state) != XSK_READY)
                 return -EBUSY;
  
         if (offset == XDP_PGOFF_RX_RING) {
@@ -855,6 +1093,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
         spin_lock_init(&xs->rx_lock);
         spin_lock_init(&xs->tx_completion_lock);
  
+       INIT_LIST_HEAD(&xs->map_list);
+       spin_lock_init(&xs->map_list_lock);
+
         mutex_lock(&net->xdp.lock);
         sk_add_node_rcu(sk, &net->xdp.list);
         mutex_unlock(&net->xdp.lock);
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h

index ba81206104266c4f662db880a757f30c8ff97fb7..4cfd106bdb5335ce878f936dd61b26f9914ff0b7 100644 (file)
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -4,6 +4,19 @@
  #ifndef XSK_H_
  #define XSK_H_
  
+struct xdp_ring_offset_v1 {
+       __u64 producer;
+       __u64 consumer;
+       __u64 desc;
+};
+
+struct xdp_mmap_offsets_v1 {
+       struct xdp_ring_offset_v1 rx;
+       struct xdp_ring_offset_v1 tx;
+       struct xdp_ring_offset_v1 fr;
+       struct xdp_ring_offset_v1 cr;
+};
+
  static inline struct xdp_sock *xdp_sk(struct sock *sk)
  {
         return (struct xdp_sock *)sk;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c

index d5e06c8e0cbf9f30fdcdfb6a6ce65f7356c9d89e..f59791ba43a04c08e6d486edf418c63e267a0220 100644 (file)
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
         du.id = umem->id;
         du.size = umem->size;
         du.num_pages = umem->npgs;
-       du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+       du.chunk_size = umem->chunk_size_nohr + umem->headroom;
         du.headroom = umem->headroom;
         du.ifindex = umem->dev ? umem->dev->ifindex : 0;
         du.queue_id = umem->queue_id;
@@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
         msg->xdiag_ino = sk_ino;
         sock_diag_save_cookie(sk, msg->xdiag_cookie);
  
+       mutex_lock(&xs->mutex);
         if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
                 goto out_nlmsg_trim;
  
@@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
             sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
                 goto out_nlmsg_trim;
  
+       mutex_unlock(&xs->mutex);
         nlmsg_end(nlskb, nlh);
         return 0;
  
  out_nlmsg_trim:
+       mutex_unlock(&xs->mutex);
         nlmsg_cancel(nlskb, nlh);
         return -EMSGSIZE;
  }
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h

index 909c5168ed0f87c3d0e1d14359efb67fe0bebab1..eddae4688862906413940960469fba3baf6bfe0c 100644 (file)
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -16,6 +16,7 @@
  struct xdp_ring {
         u32 producer ____cacheline_aligned_in_smp;
         u32 consumer ____cacheline_aligned_in_smp;
+       u32 flags;
  };
  
  /* Used for the RX and TX queues for packets */
@@ -133,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
  
  /* UMEM queue */
  
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+                                             u64 length)
+{
+       bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+       bool next_pg_contig =
+               (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
+                       XSK_NEXT_PG_CONTIG_MASK;
+
+       return cross_pg && !next_pg_contig;
+}
+
  static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
  {
         if (addr >= q->size) {
@@ -143,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
         return true;
  }
  
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+                                               u64 length,
+                                               struct xdp_umem *umem)
+{
+       u64 base_addr = xsk_umem_extract_addr(addr);
+
+       addr = xsk_umem_add_offset_to_addr(addr);
+       if (base_addr >= q->size || addr >= q->size ||
+           xskq_crosses_non_contig_pg(umem, addr, length)) {
+               q->invalid_descs++;
+               return false;
+       }
+
+       return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+                                     struct xdp_umem *umem)
  {
         while (q->cons_tail != q->cons_head) {
                 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
                 unsigned int idx = q->cons_tail & q->ring_mask;
  
                 *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+
+               if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+                       if (xskq_is_valid_addr_unaligned(q, *addr,
+                                                        umem->chunk_size_nohr,
+                                                        umem))
+                               return addr;
+                       goto out;
+               }
+
                 if (xskq_is_valid_addr(q, *addr))
                         return addr;
  
+out:
                 q->cons_tail++;
         }
  
         return NULL;
  }
  
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+                                 struct xdp_umem *umem)
  {
         if (q->cons_tail == q->cons_head) {
                 smp_mb(); /* D, matches A */
@@ -170,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
                 smp_rmb();
         }
  
-       return xskq_validate_addr(q, addr);
+       return xskq_validate_addr(q, addr, umem);
  }
  
  static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -229,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)
  
  /* Rx/Tx queue */
  
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+                                     struct xdp_umem *umem)
  {
+       if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+               if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+                       return false;
+
+               if (d->len > umem->chunk_size_nohr || d->options) {
+                       q->invalid_descs++;
+                       return false;
+               }
+
+               return true;
+       }
+
         if (!xskq_is_valid_addr(q, d->addr))
                 return false;
  
@@ -244,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
  }
  
  static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
-                                                 struct xdp_desc *desc)
+                                                 struct xdp_desc *desc,
+                                                 struct xdp_umem *umem)
  {
         while (q->cons_tail != q->cons_head) {
                 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
                 unsigned int idx = q->cons_tail & q->ring_mask;
  
                 *desc = READ_ONCE(ring->desc[idx]);
-               if (xskq_is_valid_desc(q, desc))
+               if (xskq_is_valid_desc(q, desc, umem))
                         return desc;
  
                 q->cons_tail++;
@@ -261,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
  }
  
  static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
-                                             struct xdp_desc *desc)
+                                             struct xdp_desc *desc,
+                                             struct xdp_umem *umem)
  {
         if (q->cons_tail == q->cons_head) {
                 smp_mb(); /* D, matches A */
@@ -272,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
                 smp_rmb(); /* C, matches B */
         }
  
-       return xskq_validate_desc(q, desc);
+       return xskq_validate_desc(q, desc, umem);
  }
  
  static inline void xskq_discard_desc(struct xsk_queue *q)
diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c

index 516e255cbe8fe2f5ac67b34317bbbadfd69d1535..88f9400524509d85122967d694e142257abab315 100644 (file)
--- a/samples/bpf/syscall_nrs.c
+++ b/samples/bpf/syscall_nrs.c
@@ -9,5 +9,11 @@ void syscall_defines(void)
         COMMENT("Linux system call numbers.");
         SYSNR(__NR_write);
         SYSNR(__NR_read);
+#ifdef __NR_mmap2
+       SYSNR(__NR_mmap2);
+#endif
+#ifdef __NR_mmap
         SYSNR(__NR_mmap);
+#endif
+
  }
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c

index f57f4e1ea1ec3c97d4c9f405349e6c514b6bfaee..35cb0eed3be591c810235e1f6f623827f70097d1 100644 (file)
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -68,12 +68,25 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
         return 0;
  }
  
+#ifdef __NR_mmap2
+PROG(SYS__NR_mmap2)(struct pt_regs *ctx)
+{
+       char fmt[] = "mmap2\n";
+
+       bpf_trace_printk(fmt, sizeof(fmt));
+       return 0;
+}
+#endif
+
+#ifdef __NR_mmap
  PROG(SYS__NR_mmap)(struct pt_regs *ctx)
  {
         char fmt[] = "mmap\n";
+
         bpf_trace_printk(fmt, sizeof(fmt));
         return 0;
  }
+#endif
  
  char _license[] SEC("license") = "GPL";
  u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c

index 93eaaf7239b293d366fedb57ac1e6e7df643385d..102eace229568e38e7ff1a64adebdb9152194a8e 100644 (file)
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -67,8 +67,14 @@ static int opt_ifindex;
  static int opt_queue;
  static int opt_poll;
  static int opt_interval = 1;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u32 opt_umem_flags;
+static int opt_unaligned_chunks;
+static int opt_mmap_flags;
  static u32 opt_xdp_bind_flags;
  static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+static int opt_timeout = 1000;
+static bool opt_need_wakeup = true;
  static __u32 prog_id;
  
  struct xsk_umem_info {
@@ -282,7 +288,9 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
                 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
                 .frame_size = opt_xsk_frame_size,
                 .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+               .flags = opt_umem_flags
         };
+
         int ret;
  
         umem = calloc(1, sizeof(*umem));
@@ -291,6 +299,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
  
         ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
                                &cfg);
+
         if (ret)
                 exit_with_error(-ret);
  
@@ -352,6 +361,8 @@ static struct option long_options[] = {
         {"zero-copy", no_argument, 0, 'z'},
         {"copy", no_argument, 0, 'c'},
         {"frame-size", required_argument, 0, 'f'},
+       {"no-need-wakeup", no_argument, 0, 'm'},
+       {"unaligned", no_argument, 0, 'u'},
         {0, 0, 0, 0}
  };
  
@@ -372,6 +383,9 @@ static void usage(const char *prog)
                 "  -z, --zero-copy      Force zero-copy mode.\n"
                 "  -c, --copy           Force copy mode.\n"
                 "  -f, --frame-size=n   Set the frame size (must be a power of two, default is %d).\n"
+               "  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
+               "  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
+               "  -u, --unaligned      Enable unaligned chunk placement\n"
                 "\n";
         fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
         exit(EXIT_FAILURE);
@@ -384,8 +398,8 @@ static void parse_command_line(int argc, char **argv)
         opterr = 0;
  
         for (;;) {
-               c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
-                               &option_index);
+               c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu",
+                               long_options, &option_index);
                 if (c == -1)
                         break;
  
@@ -424,12 +438,21 @@ static void parse_command_line(int argc, char **argv)
                 case 'c':
                         opt_xdp_bind_flags |= XDP_COPY;
                         break;
+               case 'u':
+                       opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+                       opt_unaligned_chunks = 1;
+                       opt_mmap_flags = MAP_HUGETLB;
+                       break;
                 case 'F':
                         opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
                         break;
                 case 'f':
                         opt_xsk_frame_size = atoi(optarg);
+               case 'm':
+                       opt_need_wakeup = false;
+                       opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
                         break;
+
                 default:
                         usage(basename(argv[0]));
                 }
@@ -442,7 +465,8 @@ static void parse_command_line(int argc, char **argv)
                 usage(basename(argv[0]));
         }
  
-       if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+       if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
+           !opt_unaligned_chunks) {
                 fprintf(stderr, "--frame-size=%d is not a power of two\n",
                         opt_xsk_frame_size);
                 usage(basename(argv[0]));
@@ -459,8 +483,10 @@ static void kick_tx(struct xsk_socket_info *xsk)
         exit_with_error(errno);
  }
  
-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
+static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
+                                    struct pollfd *fds)
  {
+       struct xsk_umem_info *umem = xsk->umem;
         u32 idx_cq = 0, idx_fq = 0;
         unsigned int rcvd;
         size_t ndescs;
@@ -468,27 +494,30 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
         if (!xsk->outstanding_tx)
                 return;
  
-       kick_tx(xsk);
+       if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+               kick_tx(xsk);
+
         ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
                 xsk->outstanding_tx;
  
         /* re-add completed Tx buffers */
-       rcvd = xsk_ring_cons__peek(&xsk->umem->cq, ndescs, &idx_cq);
+       rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
         if (rcvd > 0) {
                 unsigned int i;
                 int ret;
  
-               ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+               ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
                 while (ret != rcvd) {
                         if (ret < 0)
                                 exit_with_error(-ret);
-                       ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd,
-                                                    &idx_fq);
+                       if (xsk_ring_prod__needs_wakeup(&umem->fq))
+                               ret = poll(fds, num_socks, opt_timeout);
+                       ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
                 }
+
                 for (i = 0; i < rcvd; i++)
-                       *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) =
-                               *xsk_ring_cons__comp_addr(&xsk->umem->cq,
-                                                         idx_cq++);
+                       *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
+                               *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
  
                 xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
                 xsk_ring_cons__release(&xsk->umem->cq, rcvd);
@@ -505,7 +534,8 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
         if (!xsk->outstanding_tx)
                 return;
  
-       kick_tx(xsk);
+       if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+               kick_tx(xsk);
  
         rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
         if (rcvd > 0) {
@@ -515,30 +545,38 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
         }
  }
  
-static void rx_drop(struct xsk_socket_info *xsk)
+static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
  {
         unsigned int rcvd, i;
         u32 idx_rx = 0, idx_fq = 0;
         int ret;
  
         rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
-       if (!rcvd)
+       if (!rcvd) {
+               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+                       ret = poll(fds, num_socks, opt_timeout);
                 return;
+       }
  
         ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
         while (ret != rcvd) {
                 if (ret < 0)
                         exit_with_error(-ret);
+               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+                       ret = poll(fds, num_socks, opt_timeout);
                 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
         }
  
         for (i = 0; i < rcvd; i++) {
                 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
                 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+               u64 orig = xsk_umem__extract_addr(addr);
+
+               addr = xsk_umem__add_offset_to_addr(addr);
                 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
  
                 hex_dump(pkt, len, addr);
-               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = addr;
+               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
         }
  
         xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
@@ -549,42 +587,65 @@ static void rx_drop(struct xsk_socket_info *xsk)
  static void rx_drop_all(void)
  {
         struct pollfd fds[MAX_SOCKS + 1];
-       int i, ret, timeout, nfds = 1;
+       int i, ret;
  
         memset(fds, 0, sizeof(fds));
  
         for (i = 0; i < num_socks; i++) {
                 fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
                 fds[i].events = POLLIN;
-               timeout = 1000; /* 1sn */
         }
  
         for (;;) {
                 if (opt_poll) {
-                       ret = poll(fds, nfds, timeout);
+                       ret = poll(fds, num_socks, opt_timeout);
                         if (ret <= 0)
                                 continue;
                 }
  
                 for (i = 0; i < num_socks; i++)
-                       rx_drop(xsks[i]);
+                       rx_drop(xsks[i], fds);
+       }
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
+{
+       u32 idx;
+
+       if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
+               unsigned int i;
+
+               for (i = 0; i < BATCH_SIZE; i++) {
+                       xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
+                               (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+                       xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
+                               sizeof(pkt_data) - 1;
+               }
+
+               xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
+               xsk->outstanding_tx += BATCH_SIZE;
+               frame_nb += BATCH_SIZE;
+               frame_nb %= NUM_FRAMES;
         }
+
+       complete_tx_only(xsk);
  }
  
-static void tx_only(struct xsk_socket_info *xsk)
+static void tx_only_all(void)
  {
-       int timeout, ret, nfds = 1;
-       struct pollfd fds[nfds + 1];
-       u32 idx, frame_nb = 0;
+       struct pollfd fds[MAX_SOCKS];
+       u32 frame_nb[MAX_SOCKS] = {};
+       int i, ret;
  
         memset(fds, 0, sizeof(fds));
-       fds[0].fd = xsk_socket__fd(xsk->xsk);
-       fds[0].events = POLLOUT;
-       timeout = 1000; /* 1sn */
+       for (i = 0; i < num_socks; i++) {
+               fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
+               fds[0].events = POLLOUT;
+       }
  
         for (;;) {
                 if (opt_poll) {
-                       ret = poll(fds, nfds, timeout);
+                       ret = poll(fds, num_socks, opt_timeout);
                         if (ret <= 0)
                                 continue;
  
@@ -592,69 +653,78 @@ static void tx_only(struct xsk_socket_info *xsk)
                                 continue;
                 }
  
-               if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
-                   BATCH_SIZE) {
-                       unsigned int i;
-
-                       for (i = 0; i < BATCH_SIZE; i++) {
-                               xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
-                                       = (frame_nb + i) * opt_xsk_frame_size;
-                               xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
-                                       sizeof(pkt_data) - 1;
-                       }
-
-                       xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
-                       xsk->outstanding_tx += BATCH_SIZE;
-                       frame_nb += BATCH_SIZE;
-                       frame_nb %= NUM_FRAMES;
-               }
-
-               complete_tx_only(xsk);
+               for (i = 0; i < num_socks; i++)
+                       tx_only(xsks[i], frame_nb[i]);
         }
  }
  
-static void l2fwd(struct xsk_socket_info *xsk)
+static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
  {
-       for (;;) {
-               unsigned int rcvd, i;
-               u32 idx_rx = 0, idx_tx = 0;
-               int ret;
+       unsigned int rcvd, i;
+       u32 idx_rx = 0, idx_tx = 0;
+       int ret;
  
-               for (;;) {
-                       complete_tx_l2fwd(xsk);
+       complete_tx_l2fwd(xsk, fds);
  
-                       rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
-                                                  &idx_rx);
-                       if (rcvd > 0)
-                               break;
-               }
+       rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+       if (!rcvd) {
+               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+                       ret = poll(fds, num_socks, opt_timeout);
+               return;
+       }
  
+       ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
+       while (ret != rcvd) {
+               if (ret < 0)
+                       exit_with_error(-ret);
+               if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+                       kick_tx(xsk);
                 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-               while (ret != rcvd) {
-                       if (ret < 0)
-                               exit_with_error(-ret);
-                       ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-               }
+       }
  
-               for (i = 0; i < rcvd; i++) {
-                       u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
-                                                         idx_rx)->addr;
-                       u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
-                                                        idx_rx++)->len;
-                       char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
+       for (i = 0; i < rcvd; i++) {
+               u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+               u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+               u64 orig = xsk_umem__extract_addr(addr);
  
-                       swap_mac_addresses(pkt);
+               addr = xsk_umem__add_offset_to_addr(addr);
+               char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
  
-                       hex_dump(pkt, len, addr);
-                       xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
-                       xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
-               }
+               swap_mac_addresses(pkt);
+
+               hex_dump(pkt, len, addr);
+               xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
+               xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
+       }
+
+       xsk_ring_prod__submit(&xsk->tx, rcvd);
+       xsk_ring_cons__release(&xsk->rx, rcvd);
+
+       xsk->rx_npkts += rcvd;
+       xsk->outstanding_tx += rcvd;
+}
  
-               xsk_ring_prod__submit(&xsk->tx, rcvd);
-               xsk_ring_cons__release(&xsk->rx, rcvd);
+static void l2fwd_all(void)
+{
+       struct pollfd fds[MAX_SOCKS];
+       int i, ret;
+
+       memset(fds, 0, sizeof(fds));
  
-               xsk->rx_npkts += rcvd;
-               xsk->outstanding_tx += rcvd;
+       for (i = 0; i < num_socks; i++) {
+               fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
+               fds[i].events = POLLOUT | POLLIN;
+       }
+
+       for (;;) {
+               if (opt_poll) {
+                       ret = poll(fds, num_socks, opt_timeout);
+                       if (ret <= 0)
+                               continue;
+               }
+
+               for (i = 0; i < num_socks; i++)
+                       l2fwd(xsks[i], fds);
         }
  }
  
@@ -674,11 +744,14 @@ int main(int argc, char **argv)
                 exit(EXIT_FAILURE);
         }
  
-       ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
-                            NUM_FRAMES * opt_xsk_frame_size);
-       if (ret)
-               exit_with_error(ret);
-
+       /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
+       bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
+                   PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
+       if (bufs == MAP_FAILED) {
+               printf("ERROR: mmap failed\n");
+               exit(EXIT_FAILURE);
+       }
         /* Create sockets... */
         umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
         xsks[num_socks++] = xsk_configure_socket(umem);
@@ -705,9 +778,9 @@ int main(int argc, char **argv)
         if (opt_bench == BENCH_RXDROP)
                 rx_drop_all();
         else if (opt_bench == BENCH_TXONLY)
-               tx_only(xsks[0]);
+               tx_only_all();
         else
-               l2fwd(xsks[0]);
+               l2fwd_all();
  
         return 0;
  }
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh

index c311933401081fd48b5451605ff3a4a11f4893da..0d8f41db8cd6e04a3fce6c6cfe9fcb300e6a4500 100755 (executable)
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -115,10 +115,12 @@ gen_btf()
         LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
  
         # dump .BTF section into raw binary file to link with final vmlinux
-       bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \
+       bin_arch=$(LANG=C ${OBJDUMP} -f ${1} | grep architecture | \
                 cut -d, -f1 | cut -d' ' -f2)
+       bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \
+               awk '{print $4}')
         ${OBJCOPY} --dump-section .BTF=.btf.vmlinux.bin ${1} 2>/dev/null
-       ${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \
+       ${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \
                 --rename-section .data=.BTF .btf.vmlinux.bin ${2}
  }
  
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore

index dfe2bd5a4b9538a87572d45a36c6ec8e4dcdfaf0..59024197e71dd185b393544d8a3b0714a41c7b91 100644 (file)
--- a/tools/bpf/.gitignore
+++ b/tools/bpf/.gitignore
@@ -1,4 +1,5 @@
  FEATURE-DUMP.bpf
+feature
  bpf_asm
  bpf_dbg
  bpf_exp.yacc.*
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile

index 53b60ad452f5d3ebd5d2edad0b80ef12558f2215..fbf5e4a0cb9c9cdb7c3c13c28e2d4cf3b5b90f3b 100644 (file)
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -81,10 +81,11 @@ $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
  
  clean: bpftool_clean
         $(call QUIET_CLEAN, bpf-progs)
-       $(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+       $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
                $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
         $(call QUIET_CLEAN, core-gen)
-       $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+       $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf
+       $(Q)$(RM) -r -- $(OUTPUT)feature
  
  install: $(PROGS) bpftool_install
         $(call QUIET_INSTALL, bpf_jit_disasm)
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore

index 8248b8dd89d4b9cd4b2d9622623d43f2140a42f2..b13926432b84c6fa7da8afd938bb06be8ffcb981 100644 (file)
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,3 +3,5 @@
  bpftool*.8
  bpf-helpers.*
  FEATURE-DUMP.bpftool
+feature
+libbpf
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst

index 6694a0fc8f99d505a4d64d1a7e604d9c646896ef..39615f8e145b255fbbb13b0d5edc8adf35a7cdbb 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -19,6 +19,7 @@ SYNOPSIS
  BTF COMMANDS
  =============
  
+|      **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*]
  |      **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
  |      **bpftool** **btf help**
  |
@@ -29,6 +30,12 @@ BTF COMMANDS
  
  DESCRIPTION
  ===========
+       **bpftool btf { show | list }** [**id** *BTF_ID*]
+                 Show information about loaded BTF objects. If a BTF ID is
+                 specified, show information only about given BTF object,
+                 otherwise list all BTF objects currently loaded on the
+                 system.
+
         **bpftool btf dump** *BTF_SRC*
                   Dump BTF entries from a given *BTF_SRC*.
  
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst

index 61d1d270eb5eb577a28e178710ea59cd5fec60c8..1c0f7146aab0a8fc749f6ec87aae642fd3552783 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -36,6 +36,7 @@ MAP COMMANDS
  |      **bpftool** **map pop**        *MAP*
  |      **bpftool** **map enqueue**    *MAP* **value** *VALUE*
  |      **bpftool** **map dequeue**    *MAP*
+|      **bpftool** **map freeze**     *MAP*
  |      **bpftool** **map help**
  |
  |      *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -127,6 +128,14 @@ DESCRIPTION
         **bpftool map dequeue**  *MAP*
                   Dequeue and print **value** from the queue.
  
+       **bpftool map freeze**  *MAP*
+                 Freeze the map as read-only from user space. Entries from a
+                 frozen map can not longer be updated or deleted with the
+                 **bpf\ ()** system call. This operation is not reversible,
+                 and the map remains immutable from user space until its
+                 destruction. However, read and write permissions for BPF
+                 programs to the map remain unchanged.
+
         **bpftool map help**
                   Print short help message.
  
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst

index d8e5237a2085966faa2b1ed312ab5b857e920bef..8651b00b81ea05ff6d9c7275167be87fce83d27c 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -15,17 +15,22 @@ SYNOPSIS
         *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
  
         *COMMANDS* :=
-       { **show** | **list** } [ **dev** name ] | **help**
+       { **show** | **list** | **attach** | **detach** | **help** }
  
  NET COMMANDS
  ============
  
-|      **bpftool** **net { show | list } [ dev name ]**
+|      **bpftool** **net { show | list }** [ **dev** *NAME* ]
+|      **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
+|      **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
  |      **bpftool** **net help**
+|
+|      *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+|      *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** }
  
  DESCRIPTION
  ===========
-       **bpftool net { show | list } [ dev name ]**
+       **bpftool net { show | list }** [ **dev** *NAME* ]
                    List bpf program attachments in the kernel networking subsystem.
  
                    Currently, only device driver xdp attachments and tc filter
@@ -47,6 +52,24 @@ DESCRIPTION
                    all bpf programs attached to non clsact qdiscs, and finally all
                    bpf programs attached to root and clsact qdisc.
  
+       **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
+                  Attach bpf program *PROG* to network interface *NAME* with
+                  type specified by *ATTACH_TYPE*. Previously attached bpf program
+                  can be replaced by the command used with **overwrite** option.
+                  Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
+
+                  *ATTACH_TYPE* can be of:
+                  **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it;
+                  **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb;
+                  **xdpdrv** - Native XDP. runs earliest point in driver's receive path;
+                  **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception;
+
+       **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
+                  Detach bpf program attached to network interface *NAME* with
+                  type specified by *ATTACH_TYPE*. To detach bpf program, same
+                  *ATTACH_TYPE* previously used for attach must be specified.
+                  Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
+
         **bpftool net help**
                   Print short help message.
  
@@ -137,6 +160,34 @@ EXAMPLES
          }
      ]
  
+|
+| **# bpftool net attach xdpdrv id 16 dev enp6s0np0**
+| **# bpftool net**
+
+::
+
+      xdp:
+      enp6s0np0(4) driver id 16
+
+|
+| **# bpftool net attach xdpdrv id 16 dev enp6s0np0**
+| **# bpftool net attach xdpdrv id 20 dev enp6s0np0 overwrite**
+| **# bpftool net**
+
+::
+
+      xdp:
+      enp6s0np0(4) driver id 20
+
+|
+| **# bpftool net attach xdpdrv id 16 dev enp6s0np0**
+| **# bpftool net detach xdpdrv dev enp6s0np0**
+| **# bpftool net**
+
+::
+
+      xdp:
+
  
  SEE ALSO
  ========
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile

index 4c9d1ffc3fc72194117df93b5a37a5dcd21ab9c5..39bc6f0f4f0bb839ade8b6962bff515729f318ea 100644 (file)
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -17,27 +17,30 @@ endif
  BPF_DIR = $(srctree)/tools/lib/bpf/
  
  ifneq ($(OUTPUT),)
-  BPF_PATH = $(OUTPUT)
+  LIBBPF_OUTPUT = $(OUTPUT)/libbpf/
+  LIBBPF_PATH = $(LIBBPF_OUTPUT)
  else
-  BPF_PATH = $(BPF_DIR)
+  LIBBPF_PATH = $(BPF_DIR)
  endif
  
-LIBBPF = $(BPF_PATH)libbpf.a
+LIBBPF = $(LIBBPF_PATH)libbpf.a
  
-BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
  
  $(LIBBPF): FORCE
-       $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
+       $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
+       $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
  
  $(LIBBPF)-clean:
         $(call QUIET_CLEAN, libbpf)
-       $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
+       $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
  
  prefix ?= /usr/local
  bash_compdir ?= /usr/share/bash-completion/completions
  
  CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
+CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
  CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
         -I$(srctree)/kernel/bpf/ \
         -I$(srctree)/tools/include \
@@ -52,7 +55,7 @@ ifneq ($(EXTRA_LDFLAGS),)
  LDFLAGS += $(EXTRA_LDFLAGS)
  endif
  
-LIBS = -lelf -lz $(LIBBPF)
+LIBS = $(LIBBPF) -lelf -lz
  
  INSTALL ?= install
  RM ?= rm -f
@@ -114,16 +117,18 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
  $(OUTPUT)feature.o: | zdep
  
  $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
-       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
  
  $(OUTPUT)%.o: %.c
         $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
  
  clean: $(LIBBPF)-clean
         $(call QUIET_CLEAN, bpftool)
-       $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+       $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+       $(Q)$(RM) -r -- $(OUTPUT)libbpf/
         $(call QUIET_CLEAN, core-gen)
-       $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
+       $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
+       $(Q)$(RM) -r -- $(OUTPUT)feature/
  
  install: $(OUTPUT)bpftool
         $(call QUIET_INSTALL, bpftool)
@@ -134,8 +139,8 @@ install: $(OUTPUT)bpftool
  
  uninstall:
         $(call QUIET_UNINST, bpftool)
-       $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool
-       $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool
+       $(Q)$(RM) -- $(DESTDIR)$(prefix)/sbin/bpftool
+       $(Q)$(RM) -- $(DESTDIR)$(bash_compdir)/bpftool
  
  doc:
         $(call descend,Documentation)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool

index df16c54154442e87830963755c557ff89f7410eb..70493a6da206728112149cbb7aa31f3b8448197a 100644 (file)
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -73,8 +73,8 @@ _bpftool_get_prog_tags()
  
  _bpftool_get_btf_ids()
  {
-    COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
-        command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) )
+    COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \
+        command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
  }
  
  _bpftool_get_obj_map_names()
@@ -201,6 +201,10 @@ _bpftool()
              _bpftool_get_prog_tags
              return 0
              ;;
+        dev)
+            _sysfs_get_netdevs
+            return 0
+            ;;
          file|pinned)
              _filedir
              return 0
@@ -399,10 +403,6 @@ _bpftool()
                              _filedir
                              return 0
                              ;;
-                        dev)
-                            _sysfs_get_netdevs
-                            return 0
-                            ;;
                          *)
                              COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
                              _bpftool_once_attr 'type'
@@ -449,7 +449,7 @@ _bpftool()
          map)
              local MAP_TYPE='id pinned'
              case $command in
-                show|list|dump|peek|pop|dequeue)
+                show|list|dump|peek|pop|dequeue|freeze)
                      case $prev in
                          $command)
                              COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
@@ -498,10 +498,6 @@ _bpftool()
                          key|value|flags|name|entries)
                              return 0
                              ;;
-                        dev)
-                            _sysfs_get_netdevs
-                            return 0
-                            ;;
                          *)
                              _bpftool_once_attr 'type'
                              _bpftool_once_attr 'key'
@@ -642,7 +638,7 @@ _bpftool()
                      [[ $prev == $object ]] && \
                          COMPREPLY=( $( compgen -W 'delete dump getnext help \
                              lookup pin event_pipe show list update create \
-                            peek push enqueue pop dequeue' -- \
+                            peek push enqueue pop dequeue freeze' -- \
                              "$cur" ) )
                      ;;
              esac
@@ -674,7 +670,7 @@ _bpftool()
                                  map)
                                      _bpftool_get_map_ids
                                      ;;
-                                dump)
+                                $command)
                                      _bpftool_get_btf_ids
                                      ;;
                              esac
@@ -702,9 +698,21 @@ _bpftool()
                              ;;
                      esac
                      ;;
+                show|list)
+                    case $prev in
+                        $command)
+                            COMPREPLY+=( $( compgen -W "id" -- "$cur" ) )
+                            ;;
+                        id)
+                            _bpftool_get_btf_ids
+                            ;;
+                    esac
+                    return 0
+                    ;;
                  *)
                      [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'dump help show list' \
+                            -- "$cur" ) )
                      ;;
              esac
              ;;
@@ -778,18 +786,67 @@ _bpftool()
              esac
              ;;
          net)
+            local PROG_TYPE='id pinned tag'
+            local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload'
              case $command in
+                show|list)
+                    [[ $prev != "$command" ]] && return 0
+                    COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
+                    return 0
+                    ;;
+                attach)
+                    case $cword in
+                        3)
+                            COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- "$cur" ) )
+                            return 0
+                            ;;
+                        4)
+                            COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        5)
+                            case $prev in
+                                id)
+                                    _bpftool_get_prog_ids
+                                    ;;
+                                pinned)
+                                    _filedir
+                                    ;;
+                            esac
+                            return 0
+                            ;;
+                        6)
+                            COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
+                            return 0
+                            ;;
+                        8)
+                            _bpftool_once_attr 'overwrite'
+                            return 0
+                            ;;
+                    esac
+                    ;;
+                detach)
+                    case $cword in
+                        3)
+                            COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- "$cur" ) )
+                            return 0
+                            ;;
+                        4)
+                            COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
+                            return 0
+                            ;;
+                    esac
+                    ;;
                  *)
                      [[ $prev == $object ]] && \
                          COMPREPLY=( $( compgen -W 'help \
-                            show list' -- "$cur" ) )
+                            show list attach detach' -- "$cur" ) )
                      ;;
              esac
              ;;
          feature)
              case $command in
                  probe)
-                    [[ $prev == "dev" ]] && _sysfs_get_netdevs && return 0
                      [[ $prev == "prefix" ]] && return 0
                      if _bpftool_search_list 'macros'; then
                          COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) )
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c

index 1b8ec91899e6aad9a069e658f4e08c9d761084f2..9a9376d1d3df2aab84609cc0a6e42ce5beb687a7 100644 (file)
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -11,6 +11,7 @@
  #include <bpf.h>
  #include <libbpf.h>
  #include <linux/btf.h>
+#include <linux/hashtable.h>
  
  #include "btf.h"
  #include "json_writer.h"
@@ -35,6 +36,16 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
         [BTF_KIND_DATASEC]      = "DATASEC",
  };
  
+struct btf_attach_table {
+       DECLARE_HASHTABLE(table, 16);
+};
+
+struct btf_attach_point {
+       __u32 obj_id;
+       __u32 btf_id;
+       struct hlist_node hash;
+};
+
  static const char *btf_int_enc_str(__u8 encoding)
  {
         switch (encoding) {
@@ -449,7 +460,7 @@ static int do_dump(int argc, char **argv)
  
                 btf_id = strtoul(*argv, &endptr, 0);
                 if (*endptr) {
-                       p_err("can't parse %s as ID", **argv);
+                       p_err("can't parse %s as ID", *argv);
                         return -1;
                 }
                 NEXT_ARG();
@@ -522,6 +533,330 @@ done:
         return err;
  }
  
+static int btf_parse_fd(int *argc, char ***argv)
+{
+       unsigned int id;
+       char *endptr;
+       int fd;
+
+       if (!is_prefix(*argv[0], "id")) {
+               p_err("expected 'id', got: '%s'?", **argv);
+               return -1;
+       }
+       NEXT_ARGP();
+
+       id = strtoul(**argv, &endptr, 0);
+       if (*endptr) {
+               p_err("can't parse %s as ID", **argv);
+               return -1;
+       }
+       NEXT_ARGP();
+
+       fd = bpf_btf_get_fd_by_id(id);
+       if (fd < 0)
+               p_err("can't get BTF object by id (%u): %s",
+                     id, strerror(errno));
+
+       return fd;
+}
+
+static void delete_btf_table(struct btf_attach_table *tab)
+{
+       struct btf_attach_point *obj;
+       struct hlist_node *tmp;
+
+       unsigned int bkt;
+
+       hash_for_each_safe(tab->table, bkt, tmp, obj, hash) {
+               hash_del(&obj->hash);
+               free(obj);
+       }
+}
+
+static int
+build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type,
+                    void *info, __u32 *len)
+{
+       static const char * const names[] = {
+               [BPF_OBJ_UNKNOWN]       = "unknown",
+               [BPF_OBJ_PROG]          = "prog",
+               [BPF_OBJ_MAP]           = "map",
+       };
+       struct btf_attach_point *obj_node;
+       __u32 btf_id, id = 0;
+       int err;
+       int fd;
+
+       while (true) {
+               switch (type) {
+               case BPF_OBJ_PROG:
+                       err = bpf_prog_get_next_id(id, &id);
+                       break;
+               case BPF_OBJ_MAP:
+                       err = bpf_map_get_next_id(id, &id);
+                       break;
+               default:
+                       err = -1;
+                       p_err("unexpected object type: %d", type);
+                       goto err_free;
+               }
+               if (err) {
+                       if (errno == ENOENT) {
+                               err = 0;
+                               break;
+                       }
+                       p_err("can't get next %s: %s%s", names[type],
+                             strerror(errno),
+                             errno == EINVAL ? " -- kernel too old?" : "");
+                       goto err_free;
+               }
+
+               switch (type) {
+               case BPF_OBJ_PROG:
+                       fd = bpf_prog_get_fd_by_id(id);
+                       break;
+               case BPF_OBJ_MAP:
+                       fd = bpf_map_get_fd_by_id(id);
+                       break;
+               default:
+                       err = -1;
+                       p_err("unexpected object type: %d", type);
+                       goto err_free;
+               }
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       p_err("can't get %s by id (%u): %s", names[type], id,
+                             strerror(errno));
+                       err = -1;
+                       goto err_free;
+               }
+
+               memset(info, 0, *len);
+               err = bpf_obj_get_info_by_fd(fd, info, len);
+               close(fd);
+               if (err) {
+                       p_err("can't get %s info: %s", names[type],
+                             strerror(errno));
+                       goto err_free;
+               }
+
+               switch (type) {
+               case BPF_OBJ_PROG:
+                       btf_id = ((struct bpf_prog_info *)info)->btf_id;
+                       break;
+               case BPF_OBJ_MAP:
+                       btf_id = ((struct bpf_map_info *)info)->btf_id;
+                       break;
+               default:
+                       err = -1;
+                       p_err("unexpected object type: %d", type);
+                       goto err_free;
+               }
+               if (!btf_id)
+                       continue;
+
+               obj_node = calloc(1, sizeof(*obj_node));
+               if (!obj_node) {
+                       p_err("failed to allocate memory: %s", strerror(errno));
+                       goto err_free;
+               }
+
+               obj_node->obj_id = id;
+               obj_node->btf_id = btf_id;
+               hash_add(tab->table, &obj_node->hash, obj_node->btf_id);
+       }
+
+       return 0;
+
+err_free:
+       delete_btf_table(tab);
+       return err;
+}
+
+static int
+build_btf_tables(struct btf_attach_table *btf_prog_table,
+                struct btf_attach_table *btf_map_table)
+{
+       struct bpf_prog_info prog_info;
+       __u32 prog_len = sizeof(prog_info);
+       struct bpf_map_info map_info;
+       __u32 map_len = sizeof(map_info);
+       int err = 0;
+
+       err = build_btf_type_table(btf_prog_table, BPF_OBJ_PROG, &prog_info,
+                                  &prog_len);
+       if (err)
+               return err;
+
+       err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info,
+                                  &map_len);
+       if (err) {
+               delete_btf_table(btf_prog_table);
+               return err;
+       }
+
+       return 0;
+}
+
+static void
+show_btf_plain(struct bpf_btf_info *info, int fd,
+              struct btf_attach_table *btf_prog_table,
+              struct btf_attach_table *btf_map_table)
+{
+       struct btf_attach_point *obj;
+       int n;
+
+       printf("%u: ", info->id);
+       printf("size %uB", info->btf_size);
+
+       n = 0;
+       hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) {
+               if (obj->btf_id == info->id)
+                       printf("%s%u", n++ == 0 ? "  prog_ids " : ",",
+                              obj->obj_id);
+       }
+
+       n = 0;
+       hash_for_each_possible(btf_map_table->table, obj, hash, info->id) {
+               if (obj->btf_id == info->id)
+                       printf("%s%u", n++ == 0 ? "  map_ids " : ",",
+                              obj->obj_id);
+       }
+
+       printf("\n");
+}
+
+static void
+show_btf_json(struct bpf_btf_info *info, int fd,
+             struct btf_attach_table *btf_prog_table,
+             struct btf_attach_table *btf_map_table)
+{
+       struct btf_attach_point *obj;
+
+       jsonw_start_object(json_wtr);   /* btf object */
+       jsonw_uint_field(json_wtr, "id", info->id);
+       jsonw_uint_field(json_wtr, "size", info->btf_size);
+
+       jsonw_name(json_wtr, "prog_ids");
+       jsonw_start_array(json_wtr);    /* prog_ids */
+       hash_for_each_possible(btf_prog_table->table, obj, hash,
+                              info->id) {
+               if (obj->btf_id == info->id)
+                       jsonw_uint(json_wtr, obj->obj_id);
+       }
+       jsonw_end_array(json_wtr);      /* prog_ids */
+
+       jsonw_name(json_wtr, "map_ids");
+       jsonw_start_array(json_wtr);    /* map_ids */
+       hash_for_each_possible(btf_map_table->table, obj, hash,
+                              info->id) {
+               if (obj->btf_id == info->id)
+                       jsonw_uint(json_wtr, obj->obj_id);
+       }
+       jsonw_end_array(json_wtr);      /* map_ids */
+       jsonw_end_object(json_wtr);     /* btf object */
+}
+
+static int
+show_btf(int fd, struct btf_attach_table *btf_prog_table,
+        struct btf_attach_table *btf_map_table)
+{
+       struct bpf_btf_info info = {};
+       __u32 len = sizeof(info);
+       int err;
+
+       err = bpf_obj_get_info_by_fd(fd, &info, &len);
+       if (err) {
+               p_err("can't get BTF object info: %s", strerror(errno));
+               return -1;
+       }
+
+       if (json_output)
+               show_btf_json(&info, fd, btf_prog_table, btf_map_table);
+       else
+               show_btf_plain(&info, fd, btf_prog_table, btf_map_table);
+
+       return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+       struct btf_attach_table btf_prog_table;
+       struct btf_attach_table btf_map_table;
+       int err, fd = -1;
+       __u32 id = 0;
+
+       if (argc == 2) {
+               fd = btf_parse_fd(&argc, &argv);
+               if (fd < 0)
+                       return -1;
+       }
+
+       if (argc) {
+               if (fd >= 0)
+                       close(fd);
+               return BAD_ARG();
+       }
+
+       hash_init(btf_prog_table.table);
+       hash_init(btf_map_table.table);
+       err = build_btf_tables(&btf_prog_table, &btf_map_table);
+       if (err) {
+               if (fd >= 0)
+                       close(fd);
+               return err;
+       }
+
+       if (fd >= 0) {
+               err = show_btf(fd, &btf_prog_table, &btf_map_table);
+               close(fd);
+               goto exit_free;
+       }
+
+       if (json_output)
+               jsonw_start_array(json_wtr);    /* root array */
+
+       while (true) {
+               err = bpf_btf_get_next_id(id, &id);
+               if (err) {
+                       if (errno == ENOENT) {
+                               err = 0;
+                               break;
+                       }
+                       p_err("can't get next BTF object: %s%s",
+                             strerror(errno),
+                             errno == EINVAL ? " -- kernel too old?" : "");
+                       err = -1;
+                       break;
+               }
+
+               fd = bpf_btf_get_fd_by_id(id);
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       p_err("can't get BTF object by id (%u): %s",
+                             id, strerror(errno));
+                       err = -1;
+                       break;
+               }
+
+               err = show_btf(fd, &btf_prog_table, &btf_map_table);
+               close(fd);
+               if (err)
+                       break;
+       }
+
+       if (json_output)
+               jsonw_end_array(json_wtr);      /* root array */
+
+exit_free:
+       delete_btf_table(&btf_prog_table);
+       delete_btf_table(&btf_map_table);
+
+       return err;
+}
+
  static int do_help(int argc, char **argv)
  {
         if (json_output) {
@@ -530,7 +865,8 @@ static int do_help(int argc, char **argv)
         }
  
         fprintf(stderr,
-               "Usage: %s btf dump BTF_SRC [format FORMAT]\n"
+               "Usage: %s btf { show | list } [id BTF_ID]\n"
+               "       %s btf dump BTF_SRC [format FORMAT]\n"
                 "       %s btf help\n"
                 "\n"
                 "       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
@@ -539,12 +875,14 @@ static int do_help(int argc, char **argv)
                 "       " HELP_SPEC_PROGRAM "\n"
                 "       " HELP_SPEC_OPTIONS "\n"
                 "",
-               bin_name, bin_name);
+               bin_name, bin_name, bin_name);
  
         return 0;
  }
  
  static const struct cmd cmds[] = {
+       { "show",       do_show },
+       { "list",       do_show },
         { "help",       do_help },
         { "dump",       do_dump },
         { 0 }
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c

index 8cafb9b314672fe238cb3d4e40b9dafcba1f2301..d66131f696892065240e225e2c86d44278ea4e6e 100644 (file)
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -26,9 +26,9 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw,
                            bool is_plain_text)
  {
         if (is_plain_text)
-               jsonw_printf(jw, "%p", *(unsigned long *)data);
+               jsonw_printf(jw, "%p", data);
         else
-               jsonw_printf(jw, "%u", *(unsigned long *)data);
+               jsonw_printf(jw, "%lu", *(unsigned long *)data);
  }
  
  static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
@@ -216,7 +216,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
         switch (BTF_INT_ENCODING(*int_type)) {
         case 0:
                 if (BTF_INT_BITS(*int_type) == 64)
-                       jsonw_printf(jw, "%lu", *(__u64 *)data);
+                       jsonw_printf(jw, "%llu", *(__u64 *)data);
                 else if (BTF_INT_BITS(*int_type) == 32)
                         jsonw_printf(jw, "%u", *(__u32 *)data);
                 else if (BTF_INT_BITS(*int_type) == 16)
@@ -229,7 +229,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
                 break;
         case BTF_INT_SIGNED:
                 if (BTF_INT_BITS(*int_type) == 64)
-                       jsonw_printf(jw, "%ld", *(long long *)data);
+                       jsonw_printf(jw, "%lld", *(long long *)data);
                 else if (BTF_INT_BITS(*int_type) == 32)
                         jsonw_printf(jw, "%d", *(int *)data);
                 else if (BTF_INT_BITS(*int_type) == 16)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c

index 44352b5aca8507467e42710c13c19c1083dfb08c..1ef45e55039e191da13e6138be3e62c7e24dcebf 100644 (file)
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -120,8 +120,8 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
  static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
                                    int level)
  {
+       const char *attach_flags_str;
         __u32 prog_ids[1024] = {0};
-       char *attach_flags_str;
         __u32 prog_cnt, iter;
         __u32 attach_flags;
         char buf[32];
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c

index 6a71324be6283eccb0d816a72dd61f66b4527de6..88264abaa738ad1dbab60b407ba584dac617a0b6 100644 (file)
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -29,7 +29,7 @@
  #define BPF_FS_MAGIC           0xcafe4a11
  #endif
  
-void __printf(1, 2) p_err(const char *fmt, ...)
+void p_err(const char *fmt, ...)
  {
         va_list ap;
  
@@ -47,7 +47,7 @@ void __printf(1, 2) p_err(const char *fmt, ...)
         va_end(ap);
  }
  
-void __printf(1, 2) p_info(const char *fmt, ...)
+void p_info(const char *fmt, ...)
  {
         va_list ap;
  
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c

index 6046dcab51cc538b679428def22ff377a6fada7c..86501cd3c763e7fbaff97f1944da70f8b7d67183 100644 (file)
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -15,7 +15,6 @@
  #include <malloc.h>
  #include <inttypes.h>
  #include <stdint.h>
-#include <linux/compiler.h>
  
  #include "json_writer.h"
  
@@ -153,8 +152,7 @@ void jsonw_name(json_writer_t *self, const char *name)
                 putc(' ', self->out);
  }
  
-void __printf(2, 0)
-jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
  {
         jsonw_eor(self);
         putc('"', self->out);
@@ -162,7 +160,7 @@ jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
         putc('"', self->out);
  }
  
-void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...)
+void jsonw_printf(json_writer_t *self, const char *fmt, ...)
  {
         va_list ap;
  
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h

index cb9a1993681c6e3997e07343b3d5c9a4a68b9e6e..35cf1f00f96cadb463ad65c725df2d314bfd866c 100644 (file)
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -14,6 +14,7 @@
  #include <stdbool.h>
  #include <stdint.h>
  #include <stdarg.h>
+#include <linux/compiler.h>
  
  /* Opaque class structure */
  typedef struct json_writer json_writer_t;
@@ -30,8 +31,9 @@ void jsonw_pretty(json_writer_t *self, bool on);
  void jsonw_name(json_writer_t *self, const char *name);
  
  /* Add value  */
-void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap);
-void jsonw_printf(json_writer_t *self, const char *fmt, ...);
+void __printf(2, 0) jsonw_vprintf_enquote(json_writer_t *self, const char *fmt,
+                                         va_list ap);
+void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...);
  void jsonw_string(json_writer_t *self, const char *value);
  void jsonw_bool(json_writer_t *self, bool value);
  void jsonw_float(json_writer_t *self, double number);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c

index e916ff25697f1917e87d04814599c0b0bc01ece9..93d008687020cdac64f931ca22f911e6019d38ea 100644 (file)
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -139,7 +139,7 @@ int detect_common_prefix(const char *arg, ...)
         strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
  
         if (count >= 2) {
-               p_err(msg);
+               p_err("%s", msg);
                 return -1;
         }
  
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h

index 7031a4bf87a020716df5eb20fa0c641c423de9d7..af9ad56c303a1dff6359c6af2530fcdbf60e6fb4 100644 (file)
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -98,8 +98,8 @@ extern int bpf_flags;
  extern struct pinned_obj_table prog_table;
  extern struct pinned_obj_table map_table;
  
-void p_err(const char *fmt, ...);
-void p_info(const char *fmt, ...);
+void __printf(1, 2) p_err(const char *fmt, ...);
+void __printf(1, 2) p_info(const char *fmt, ...);
  
  bool is_prefix(const char *pfx, const char *str);
  int detect_common_prefix(const char *arg, ...);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c

index bfbbc6b4cb83c7c7db9029cebbd5f49118fb66a3..de61d73b9030b27b5d993040afb6738f416a6c72 100644 (file)
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -481,9 +481,11 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
  
  static int show_map_close_json(int fd, struct bpf_map_info *info)
  {
-       char *memlock;
+       char *memlock, *frozen_str;
+       int frozen = 0;
  
         memlock = get_fdinfo(fd, "memlock");
+       frozen_str = get_fdinfo(fd, "frozen");
  
         jsonw_start_object(json_wtr);
  
@@ -533,6 +535,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
         }
         close(fd);
  
+       if (frozen_str) {
+               frozen = atoi(frozen_str);
+               free(frozen_str);
+       }
+       jsonw_int_field(json_wtr, "frozen", frozen);
+
         if (info->btf_id)
                 jsonw_int_field(json_wtr, "btf_id", info->btf_id);
  
@@ -555,9 +563,11 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
  
  static int show_map_close_plain(int fd, struct bpf_map_info *info)
  {
-       char *memlock;
+       char *memlock, *frozen_str;
+       int frozen = 0;
  
         memlock = get_fdinfo(fd, "memlock");
+       frozen_str = get_fdinfo(fd, "frozen");
  
         printf("%u: ", info->id);
         if (info->type < ARRAY_SIZE(map_type_name))
@@ -610,9 +620,23 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
                                 printf("\n\tpinned %s", obj->path);
                 }
         }
+       printf("\n");
+
+       if (frozen_str) {
+               frozen = atoi(frozen_str);
+               free(frozen_str);
+       }
+
+       if (!info->btf_id && !frozen)
+               return 0;
+
+       printf("\t");
  
         if (info->btf_id)
-               printf("\n\tbtf_id %d", info->btf_id);
+               printf("btf_id %d", info->btf_id);
+
+       if (frozen)
+               printf("%sfrozen", info->btf_id ? "  " : "");
  
         printf("\n");
         return 0;
@@ -1238,6 +1262,35 @@ exit_free:
         return err;
  }
  
+static int do_freeze(int argc, char **argv)
+{
+       int err, fd;
+
+       if (!REQ_ARGS(2))
+               return -1;
+
+       fd = map_parse_fd(&argc, &argv);
+       if (fd < 0)
+               return -1;
+
+       if (argc) {
+               close(fd);
+               return BAD_ARG();
+       }
+
+       err = bpf_map_freeze(fd);
+       close(fd);
+       if (err) {
+               p_err("failed to freeze map: %s", strerror(errno));
+               return err;
+       }
+
+       if (json_output)
+               jsonw_null(json_wtr);
+
+       return 0;
+}
+
  static int do_help(int argc, char **argv)
  {
         if (json_output) {
@@ -1262,6 +1315,7 @@ static int do_help(int argc, char **argv)
                 "       %s %s pop        MAP\n"
                 "       %s %s enqueue    MAP value VALUE\n"
                 "       %s %s dequeue    MAP\n"
+               "       %s %s freeze     MAP\n"
                 "       %s %s help\n"
                 "\n"
                 "       " HELP_SPEC_MAP "\n"
@@ -1280,7 +1334,8 @@ static int do_help(int argc, char **argv)
                 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
                 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
                 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-               bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+               bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+               bin_name, argv[-2]);
  
         return 0;
  }
@@ -1302,6 +1357,7 @@ static const struct cmd cmds[] = {
         { "enqueue",    do_update },
         { "pop",        do_pop_dequeue },
         { "dequeue",    do_pop_dequeue },
+       { "freeze",     do_freeze },
         { 0 }
  };
  
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c

index 3f108ab177973761a03601e8d69da0719e3bd052..4c5531d1a45002e79f2e0b2de48317bb408d9988 100644 (file)
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -157,7 +157,7 @@ int do_event_pipe(int argc, char **argv)
                         NEXT_ARG();
                         ctx.cpu = strtoul(*argv, &endptr, 0);
                         if (*endptr) {
-                               p_err("can't parse %s as CPU ID", **argv);
+                               p_err("can't parse %s as CPU ID", *argv);
                                 goto err_close_map;
                         }
  
@@ -168,7 +168,7 @@ int do_event_pipe(int argc, char **argv)
                         NEXT_ARG();
                         ctx.idx = strtoul(*argv, &endptr, 0);
                         if (*endptr) {
-                               p_err("can't parse %s as index", **argv);
+                               p_err("can't parse %s as index", *argv);
                                 goto err_close_map;
                         }
  
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c

index 67e99c56bc88c0e4684755f97a562020c0731bf0..4f52d31516166f0625af6baaaf8847c916ff0a93 100644 (file)
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -55,6 +55,35 @@ struct bpf_attach_info {
         __u32 flow_dissector_id;
  };
  
+enum net_attach_type {
+       NET_ATTACH_TYPE_XDP,
+       NET_ATTACH_TYPE_XDP_GENERIC,
+       NET_ATTACH_TYPE_XDP_DRIVER,
+       NET_ATTACH_TYPE_XDP_OFFLOAD,
+};
+
+static const char * const attach_type_strings[] = {
+       [NET_ATTACH_TYPE_XDP]           = "xdp",
+       [NET_ATTACH_TYPE_XDP_GENERIC]   = "xdpgeneric",
+       [NET_ATTACH_TYPE_XDP_DRIVER]    = "xdpdrv",
+       [NET_ATTACH_TYPE_XDP_OFFLOAD]   = "xdpoffload",
+};
+
+const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings);
+
+static enum net_attach_type parse_attach_type(const char *str)
+{
+       enum net_attach_type type;
+
+       for (type = 0; type < net_attach_type_size; type++) {
+               if (attach_type_strings[type] &&
+                   is_prefix(str, attach_type_strings[type]))
+                       return type;
+       }
+
+       return net_attach_type_size;
+}
+
  static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
  {
         struct bpf_netdev_t *netinfo = cookie;
@@ -197,7 +226,7 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info)
  
         fd = open("/proc/self/ns/net", O_RDONLY);
         if (fd < 0) {
-               p_err("can't open /proc/self/ns/net: %d",
+               p_err("can't open /proc/self/ns/net: %s",
                       strerror(errno));
                 return -1;
         }
@@ -223,6 +252,134 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info)
         return 0;
  }
  
+static int net_parse_dev(int *argc, char ***argv)
+{
+       int ifindex;
+
+       if (is_prefix(**argv, "dev")) {
+               NEXT_ARGP();
+
+               ifindex = if_nametoindex(**argv);
+               if (!ifindex)
+                       p_err("invalid devname %s", **argv);
+
+               NEXT_ARGP();
+       } else {
+               p_err("expected 'dev', got: '%s'?", **argv);
+               return -1;
+       }
+
+       return ifindex;
+}
+
+static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
+                               int ifindex, bool overwrite)
+{
+       __u32 flags = 0;
+
+       if (!overwrite)
+               flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+       if (attach_type == NET_ATTACH_TYPE_XDP_GENERIC)
+               flags |= XDP_FLAGS_SKB_MODE;
+       if (attach_type == NET_ATTACH_TYPE_XDP_DRIVER)
+               flags |= XDP_FLAGS_DRV_MODE;
+       if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
+               flags |= XDP_FLAGS_HW_MODE;
+
+       return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+}
+
+static int do_attach(int argc, char **argv)
+{
+       enum net_attach_type attach_type;
+       int progfd, ifindex, err = 0;
+       bool overwrite = false;
+
+       /* parse attach args */
+       if (!REQ_ARGS(5))
+               return -EINVAL;
+
+       attach_type = parse_attach_type(*argv);
+       if (attach_type == net_attach_type_size) {
+               p_err("invalid net attach/detach type: %s", *argv);
+               return -EINVAL;
+       }
+       NEXT_ARG();
+
+       progfd = prog_parse_fd(&argc, &argv);
+       if (progfd < 0)
+               return -EINVAL;
+
+       ifindex = net_parse_dev(&argc, &argv);
+       if (ifindex < 1) {
+               close(progfd);
+               return -EINVAL;
+       }
+
+       if (argc) {
+               if (is_prefix(*argv, "overwrite")) {
+                       overwrite = true;
+               } else {
+                       p_err("expected 'overwrite', got: '%s'?", *argv);
+                       close(progfd);
+                       return -EINVAL;
+               }
+       }
+
+       /* attach xdp prog */
+       if (is_prefix("xdp", attach_type_strings[attach_type]))
+               err = do_attach_detach_xdp(progfd, attach_type, ifindex,
+                                          overwrite);
+
+       if (err < 0) {
+               p_err("interface %s attach failed: %s",
+                     attach_type_strings[attach_type], strerror(-err));
+               return err;
+       }
+
+       if (json_output)
+               jsonw_null(json_wtr);
+
+       return 0;
+}
+
+static int do_detach(int argc, char **argv)
+{
+       enum net_attach_type attach_type;
+       int progfd, ifindex, err = 0;
+
+       /* parse detach args */
+       if (!REQ_ARGS(3))
+               return -EINVAL;
+
+       attach_type = parse_attach_type(*argv);
+       if (attach_type == net_attach_type_size) {
+               p_err("invalid net attach/detach type: %s", *argv);
+               return -EINVAL;
+       }
+       NEXT_ARG();
+
+       ifindex = net_parse_dev(&argc, &argv);
+       if (ifindex < 1)
+               return -EINVAL;
+
+       /* detach xdp prog */
+       progfd = -1;
+       if (is_prefix("xdp", attach_type_strings[attach_type]))
+               err = do_attach_detach_xdp(progfd, attach_type, ifindex, NULL);
+
+       if (err < 0) {
+               p_err("interface %s detach failed: %s",
+                     attach_type_strings[attach_type], strerror(-err));
+               return err;
+       }
+
+       if (json_output)
+               jsonw_null(json_wtr);
+
+       return 0;
+}
+
  static int do_show(int argc, char **argv)
  {
         struct bpf_attach_info attach_info = {};
@@ -232,13 +389,9 @@ static int do_show(int argc, char **argv)
         char err_buf[256];
  
         if (argc == 2) {
-               if (strcmp(argv[0], "dev") != 0)
-                       usage();
-               filter_idx = if_nametoindex(argv[1]);
-               if (filter_idx == 0) {
-                       fprintf(stderr, "invalid dev name %s\n", argv[1]);
+               filter_idx = net_parse_dev(&argc, &argv);
+               if (filter_idx < 1)
                         return -1;
-               }
         } else if (argc != 0) {
                 usage();
         }
@@ -305,13 +458,20 @@ static int do_help(int argc, char **argv)
  
         fprintf(stderr,
                 "Usage: %s %s { show | list } [dev <devname>]\n"
+               "       %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+               "       %s %s detach ATTACH_TYPE dev <devname>\n"
                 "       %s %s help\n"
+               "\n"
+               "       " HELP_SPEC_PROGRAM "\n"
+               "       ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+               "\n"
                 "Note: Only xdp and tc attachments are supported now.\n"
                 "      For progs attached to cgroups, use \"bpftool cgroup\"\n"
                 "      to dump program attachments. For program types\n"
                 "      sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
                 "      consult iproute2.\n",
-               bin_name, argv[-2], bin_name, argv[-2]);
+               bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+               bin_name, argv[-2]);
  
         return 0;
  }
@@ -319,6 +479,8 @@ static int do_help(int argc, char **argv)
  static const struct cmd cmds[] = {
         { "show",       do_show },
         { "list",       do_show },
+       { "attach",     do_attach },
+       { "detach",     do_detach },
         { "help",       do_help },
         { 0 }
  };
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c

index f2a545e667c4e35a44eeca32c54faeaae17a6fbf..b2046f33e23f1b719a617f7a66f96d5bb13cb934 100644 (file)
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -104,6 +104,8 @@ static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
                 jsonw_string_field(json_wtr, "filename", buf);
                 jsonw_lluint_field(json_wtr, "offset", probe_offset);
                 break;
+       default:
+               break;
         }
         jsonw_end_object(json_wtr);
  }
@@ -140,6 +142,8 @@ static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
                 printf("uretprobe  filename %s  offset %llu\n", buf,
                        probe_offset);
                 break;
+       default:
+               break;
         }
  }
  
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h

index 0d35f18006a136b4578f2298243e3728a067c272..95c072b70d0e832f70a4ac07470bcd0d1ed46deb 100644 (file)
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -6,9 +6,11 @@
  /*
   * Common definitions for all gcc versions go here.
   */
+#ifndef GCC_VERSION
  #define GCC_VERSION (__GNUC__ * 10000          \
                      + __GNUC_MINOR__ * 100     \
                      + __GNUC_PATCHLEVEL__)
+#endif
  
  #if GCC_VERSION >= 70000 && !defined(__CHECKER__)
  # define __fallthrough __attribute__ ((fallthrough))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 0e66371bea13fdb93411c87aeb08a88615b5f8bb..77c6be96d676222e446d41d2668b40cafb0ef1fe 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -106,6 +106,7 @@ enum bpf_cmd {
         BPF_TASK_FD_QUERY,
         BPF_MAP_LOOKUP_AND_DELETE_ELEM,
         BPF_MAP_FREEZE,
+       BPF_BTF_GET_NEXT_ID,
  };
  
  enum bpf_map_type {
@@ -284,6 +285,9 @@ enum bpf_attach_type {
   */
  #define BPF_F_TEST_RND_HI32    (1U << 2)
  
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ  (1U << 3)
+
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * two extensions:
   *
@@ -337,6 +341,9 @@ enum bpf_attach_type {
  #define BPF_F_RDONLY_PROG      (1U << 7)
  #define BPF_F_WRONLY_PROG      (1U << 8)
  
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE            (1U << 9)
+
  /* flags for BPF_PROG_QUERY */
  #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
  
@@ -576,6 +583,8 @@ union bpf_attr {
   *             limited to five).
   *
   *             Each time the helper is called, it appends a line to the trace.
+ *             Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ *             open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
   *             The format of the trace is customizable, and the exact output
   *             one will get depends on the options set in
   *             *\/sys/kernel/debug/tracing/trace_options* (see also the
@@ -1014,7 +1023,7 @@ union bpf_attr {
   *             The realm of the route for the packet associated to *skb*, or 0
   *             if none was found.
   *
- * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
   *     Description
   *             Write raw *data* blob into a special BPF perf event held by
   *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1076,7 +1085,7 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
   *     Description
   *             Walk a user or a kernel stack and return its id. To achieve
   *             this, the helper needs *ctx*, which is a pointer to the context
@@ -1725,7 +1734,7 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
   *     Description
   *             Used for error injection, this helper uses kprobes to override
   *             the return value of the probed function, and to set it to *rc*.
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h

index faaa5ca2a11767a3cfd967661f21645799b95f9c..be328c59389d56861f95aeb488860ed81ef19e0c 100644 (file)
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -16,6 +16,18 @@
  #define XDP_SHARED_UMEM        (1 << 0)
  #define XDP_COPY       (1 << 1) /* Force copy-mode */
  #define XDP_ZEROCOPY   (1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
+
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
  
  struct sockaddr_xdp {
         __u16 sxdp_family;
@@ -25,10 +37,14 @@ struct sockaddr_xdp {
         __u32 sxdp_shared_umem_fd;
  };
  
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
  struct xdp_ring_offset {
         __u64 producer;
         __u64 consumer;
         __u64 desc;
+       __u64 flags;
  };
  
  struct xdp_mmap_offsets {
@@ -53,6 +69,7 @@ struct xdp_umem_reg {
         __u64 len; /* Length of packet data area */
         __u32 chunk_size;
         __u32 headroom;
+       __u32 flags;
  };
  
  struct xdp_statistics {
@@ -74,6 +91,11 @@ struct xdp_options {
  #define XDP_UMEM_PGOFF_FILL_RING       0x100000000ULL
  #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
  
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+       ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
  /* Rx/Tx descriptor */
  struct xdp_desc {
         __u64 addr;
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile

index 9312066a1ae380b0ce9b9abb6c5923d86d047bd8..c6f94cffe06e106549b835e9970b6aeeb577c9a8 100644 (file)
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,9 +1,10 @@
  # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
  # Most of this file is copied from tools/lib/traceevent/Makefile
  
-BPF_VERSION = 0
-BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 4
+LIBBPF_VERSION := $(shell \
+       grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+       sort -rV | head -n1 | cut -d'_' -f2)
+LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
  
  MAKEFLAGS += --no-print-directory
  
@@ -79,15 +80,9 @@ export prefix libdir src obj
  libdir_SQ = $(subst ','\'',$(libdir))
  libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
  
-VERSION                = $(BPF_VERSION)
-PATCHLEVEL     = $(BPF_PATCHLEVEL)
-EXTRAVERSION   = $(BPF_EXTRAVERSION)
-
  OBJ            = $@
  N              =
  
-LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
-
  LIB_TARGET     = libbpf.a libbpf.so.$(LIBBPF_VERSION)
  LIB_FILE       = libbpf.a libbpf.so*
  PC_FILE                = libbpf.pc
@@ -113,6 +108,7 @@ override CFLAGS += -Werror -Wall
  override CFLAGS += -fPIC
  override CFLAGS += $(INCLUDES)
  override CFLAGS += -fvisibility=hidden
+override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
  
  ifeq ($(VERBOSE),1)
    Q =
@@ -138,7 +134,9 @@ LIB_FILE    := $(addprefix $(OUTPUT),$(LIB_FILE))
  PC_FILE                := $(addprefix $(OUTPUT),$(PC_FILE))
  
  GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
-                          awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
+                          cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+                          awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \
+                          sort -u | wc -l)
  VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
                               grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
  
@@ -178,10 +176,10 @@ $(BPF_IN): force elfdep bpfdep
  $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
  
  $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
-       $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \
+       $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
                                     -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
         @ln -sf $(@F) $(OUTPUT)libbpf.so
-       @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION)
+       @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
  
  $(OUTPUT)libbpf.a: $(BPF_IN)
         $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
@@ -205,6 +203,7 @@ check_abi: $(OUTPUT)libbpf.so
                      "Please make sure all LIBBPF_API symbols are"       \
                      "versioned in $(VERSION_SCRIPT)." >&2;              \
                 readelf -s --wide $(OUTPUT)libbpf-in.o |                 \
+                   cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' |   \
                     awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'|   \
                     sort -u > $(OUTPUT)libbpf_global_syms.tmp;           \
                 readelf -s --wide $(OUTPUT)libbpf.so |                   \
@@ -257,7 +256,8 @@ config-clean:
  
  clean:
         $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
-               *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS
+               *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
+               *.pc LIBBPF-CFLAGS
         $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
  
  
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c

index c7d7993c44bb0e1b6f7cdef1dc747050fdd0dbe9..cbb933532981f0923b2d37ac3acacf1d5e428b46 100644 (file)
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -568,7 +568,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
         return ret;
  }
  
-int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
  {
         union bpf_attr attr;
         int err;
@@ -576,26 +576,26 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
         memset(&attr, 0, sizeof(attr));
         attr.start_id = start_id;
  
-       err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
+       err = sys_bpf(cmd, &attr, sizeof(attr));
         if (!err)
                 *next_id = attr.next_id;
  
         return err;
  }
  
-int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
  {
-       union bpf_attr attr;
-       int err;
-
-       memset(&attr, 0, sizeof(attr));
-       attr.start_id = start_id;
+       return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
+}
  
-       err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
-       if (!err)
-               *next_id = attr.next_id;
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+       return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
+}
  
-       return err;
+int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
+{
+       return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
  }
  
  int bpf_prog_get_fd_by_id(__u32 id)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h

index ff42ca043dc8fc1e0e94eaf1334367ec4585b543..0db01334740f8d1961a54bb28d8cb7f11d8ea9f6 100644 (file)
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -156,6 +156,7 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
                                  __u32 *retval, __u32 *duration);
  LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
  LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
  LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
  LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
  LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map

index f9d316e873d8d2d7ff37d138fdef0cbbbd425bcd..d04c7cb623ed01494304f2cc9425f5a2561644ab 100644 (file)
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -183,4 +183,10 @@ LIBBPF_0.0.4 {
                 perf_buffer__new;
                 perf_buffer__new_raw;
                 perf_buffer__poll;
+               xsk_umem__create;
  } LIBBPF_0.0.3;
+
+LIBBPF_0.0.5 {
+       global:
+               bpf_btf_get_next_id;
+} LIBBPF_0.0.4;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c

index 680e63066cf39c7f3bd06cdf645b05065060728e..842c4fd558592183a6ce0a007acfa68ebbcf7237 100644 (file)
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -74,23 +74,6 @@ struct xsk_nl_info {
         int fd;
  };
  
-/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit.
- * Unfortunately, it is not part of glibc.
- */
-static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags,
-                            int fd, __u64 offset)
-{
-#ifdef __NR_mmap2
-       unsigned int page_shift = __builtin_ffs(getpagesize()) - 1;
-       long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd,
-                          (off_t)(offset >> page_shift));
-
-       return (void *)ret;
-#else
-       return mmap(addr, length, prot, flags, fd, offset);
-#endif
-}
-
  int xsk_umem__fd(const struct xsk_umem *umem)
  {
         return umem ? umem->fd : -EINVAL;
@@ -116,6 +99,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
                 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
                 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
                 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+               cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
                 return;
         }
  
@@ -123,6 +107,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
         cfg->comp_size = usr_cfg->comp_size;
         cfg->frame_size = usr_cfg->frame_size;
         cfg->frame_headroom = usr_cfg->frame_headroom;
+       cfg->flags = usr_cfg->flags;
  }
  
  static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
@@ -149,9 +134,10 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
         return 0;
  }
  
-int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
-                    struct xsk_ring_prod *fill, struct xsk_ring_cons *comp,
-                    const struct xsk_umem_config *usr_config)
+int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
+                           __u64 size, struct xsk_ring_prod *fill,
+                           struct xsk_ring_cons *comp,
+                           const struct xsk_umem_config *usr_config)
  {
         struct xdp_mmap_offsets off;
         struct xdp_umem_reg mr;
@@ -182,6 +168,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
         mr.len = size;
         mr.chunk_size = umem->config.frame_size;
         mr.headroom = umem->config.frame_headroom;
+       mr.flags = umem->config.flags;
  
         err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
         if (err) {
@@ -210,10 +197,9 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
                 goto out_socket;
         }
  
-       map = xsk_mmap(NULL, off.fr.desc +
-                      umem->config.fill_size * sizeof(__u64),
-                      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
-                      umem->fd, XDP_UMEM_PGOFF_FILL_RING);
+       map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+                  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+                  XDP_UMEM_PGOFF_FILL_RING);
         if (map == MAP_FAILED) {
                 err = -errno;
                 goto out_socket;
@@ -224,13 +210,13 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
         fill->size = umem->config.fill_size;
         fill->producer = map + off.fr.producer;
         fill->consumer = map + off.fr.consumer;
+       fill->flags = map + off.fr.flags;
         fill->ring = map + off.fr.desc;
         fill->cached_cons = umem->config.fill_size;
  
-       map = xsk_mmap(NULL,
-                      off.cr.desc + umem->config.comp_size * sizeof(__u64),
-                      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
-                      umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING);
+       map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+                  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+                  XDP_UMEM_PGOFF_COMPLETION_RING);
         if (map == MAP_FAILED) {
                 err = -errno;
                 goto out_mmap;
@@ -241,6 +227,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
         comp->size = umem->config.comp_size;
         comp->producer = map + off.cr.producer;
         comp->consumer = map + off.cr.consumer;
+       comp->flags = map + off.cr.flags;
         comp->ring = map + off.cr.desc;
  
         *umem_ptr = umem;
@@ -255,6 +242,29 @@ out_umem_alloc:
         return err;
  }
  
+struct xsk_umem_config_v1 {
+       __u32 fill_size;
+       __u32 comp_size;
+       __u32 frame_size;
+       __u32 frame_headroom;
+};
+
+int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
+                           __u64 size, struct xsk_ring_prod *fill,
+                           struct xsk_ring_cons *comp,
+                           const struct xsk_umem_config *usr_config)
+{
+       struct xsk_umem_config config;
+
+       memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
+       config.flags = 0;
+
+       return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
+                                       &config);
+}
+asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2");
+asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4");
+
  static int xsk_load_xdp_prog(struct xsk_socket *xsk)
  {
         static const int log_buf_size = 16 * 1024;
@@ -550,11 +560,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
         }
  
         if (rx) {
-               rx_map = xsk_mmap(NULL, off.rx.desc +
-                                 xsk->config.rx_size * sizeof(struct xdp_desc),
-                                 PROT_READ | PROT_WRITE,
-                                 MAP_SHARED | MAP_POPULATE,
-                                 xsk->fd, XDP_PGOFF_RX_RING);
+               rx_map = mmap(NULL, off.rx.desc +
+                             xsk->config.rx_size * sizeof(struct xdp_desc),
+                             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                             xsk->fd, XDP_PGOFF_RX_RING);
                 if (rx_map == MAP_FAILED) {
                         err = -errno;
                         goto out_socket;
@@ -564,16 +573,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
                 rx->size = xsk->config.rx_size;
                 rx->producer = rx_map + off.rx.producer;
                 rx->consumer = rx_map + off.rx.consumer;
+               rx->flags = rx_map + off.rx.flags;
                 rx->ring = rx_map + off.rx.desc;
         }
         xsk->rx = rx;
  
         if (tx) {
-               tx_map = xsk_mmap(NULL, off.tx.desc +
-                                 xsk->config.tx_size * sizeof(struct xdp_desc),
-                                 PROT_READ | PROT_WRITE,
-                                 MAP_SHARED | MAP_POPULATE,
-                                 xsk->fd, XDP_PGOFF_TX_RING);
+               tx_map = mmap(NULL, off.tx.desc +
+                             xsk->config.tx_size * sizeof(struct xdp_desc),
+                             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                             xsk->fd, XDP_PGOFF_TX_RING);
                 if (tx_map == MAP_FAILED) {
                         err = -errno;
                         goto out_mmap_rx;
@@ -583,6 +592,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
                 tx->size = xsk->config.tx_size;
                 tx->producer = tx_map + off.tx.producer;
                 tx->consumer = tx_map + off.tx.consumer;
+               tx->flags = tx_map + off.tx.flags;
                 tx->ring = tx_map + off.tx.desc;
                 tx->cached_cons = xsk->config.tx_size;
         }
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h

index 833a6e60d065fc3128e052eccc693d454aba1a04..584f6820a6397aa6ce9da58751ff7952bedcb2f9 100644 (file)
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -32,6 +32,7 @@ struct name { \
         __u32 *producer; \
         __u32 *consumer; \
         void *ring; \
+       __u32 *flags; \
  }
  
  DEFINE_XSK_RING(xsk_ring_prod);
@@ -76,6 +77,11 @@ xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
         return &descs[idx & rx->mask];
  }
  
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+       return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
  static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
  {
         __u32 free_entries = r->cached_cons - r->cached_prod;
@@ -162,6 +168,21 @@ static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
         return &((char *)umem_area)[addr];
  }
  
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+       return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+       return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+       return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
  LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
  LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
  
@@ -170,12 +191,14 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
  #define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
  #define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
  #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
  
  struct xsk_umem_config {
         __u32 fill_size;
         __u32 comp_size;
         __u32 frame_size;
         __u32 frame_headroom;
+       __u32 flags;
  };
  
  /* Flags for the libbpf_flags field. */
@@ -195,6 +218,16 @@ LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
                                 struct xsk_ring_prod *fill,
                                 struct xsk_ring_cons *comp,
                                 const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
+                                      void *umem_area, __u64 size,
+                                      struct xsk_ring_prod *fill,
+                                      struct xsk_ring_cons *comp,
+                                      const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
+                                      void *umem_area, __u64 size,
+                                      struct xsk_ring_prod *fill,
+                                      struct xsk_ring_cons *comp,
+                                      const struct xsk_umem_config *config);
  LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
                                   const char *ifname, __u32 queue_id,
                                   struct xsk_umem *umem,
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore

index 90f70d2c7c22aa00b0874104fcc52c887bdbab64..60c9338cd9b411e589e3b5d1d13839bd4a1584b4 100644 (file)
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -42,4 +42,5 @@ xdping
  test_sockopt
  test_sockopt_sk
  test_sockopt_multi
+test_sockopt_inherit
  test_tcp_rtt
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index d69c541e20390ce6b6678ab4b130ed62bc3a82d8..9eef5edf17be5ec8e30ffddb6ab7d1c4fb62831c 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -29,7 +29,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
         test_cgroup_storage test_select_reuseport test_section_names \
         test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
         test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
-       test_sockopt_multi test_tcp_rtt
+       test_sockopt_multi test_sockopt_inherit test_tcp_rtt
  
  BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
  TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -66,7 +66,8 @@ TEST_PROGS := test_kmod.sh \
         test_tcp_check_syncookie.sh \
         test_tc_tunnel.sh \
         test_tc_edt.sh \
-       test_xdping.sh
+       test_xdping.sh \
+       test_bpftool_build.sh
  
  TEST_PROGS_EXTENDED := with_addr.sh \
         with_tunnels.sh \
@@ -115,6 +116,7 @@ $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
  $(OUTPUT)/test_sockopt: cgroup_helpers.c
  $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
  $(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_inherit: cgroup_helpers.c
  $(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
  
  .PHONY: force
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h

index 05f036df8a4c51b7c32fd72f65b9766e13ac7689..fbe28008450fdffb37e8d6de0be94bd303f3efaf 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
  #ifndef __BPF_ENDIAN__
  #define __BPF_ENDIAN__
  
@@ -29,6 +29,10 @@
  # define __bpf_htonl(x)                        __builtin_bswap32(x)
  # define __bpf_constant_ntohl(x)       ___constant_swab32(x)
  # define __bpf_constant_htonl(x)       ___constant_swab32(x)
+# define __bpf_be64_to_cpu(x)          __builtin_bswap64(x)
+# define __bpf_cpu_to_be64(x)          __builtin_bswap64(x)
+# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
  #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  # define __bpf_ntohs(x)                        (x)
  # define __bpf_htons(x)                        (x)
@@ -38,6 +42,10 @@
  # define __bpf_htonl(x)                        (x)
  # define __bpf_constant_ntohl(x)       (x)
  # define __bpf_constant_htonl(x)       (x)
+# define __bpf_be64_to_cpu(x)          (x)
+# define __bpf_cpu_to_be64(x)          (x)
+# define __bpf_constant_be64_to_cpu(x)  (x)
+# define __bpf_constant_cpu_to_be64(x)  (x)
  #else
  # error "Fix your compiler's __BYTE_ORDER__?!"
  #endif
@@ -54,5 +62,11 @@
  #define bpf_ntohl(x)                           \
         (__builtin_constant_p(x) ?              \
          __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+#define bpf_cpu_to_be64(x)                     \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
+#define bpf_be64_to_cpu(x)                     \
+       (__builtin_constant_p(x) ?              \
+        __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
  
  #endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h

index 8b503ea142f07aea2a395b636f4a3d32d4f40178..6c4930bc6e2ec9fc3263e916865e1fa5e8069eec 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
  #ifndef __BPF_HELPERS_H
  #define __BPF_HELPERS_H
  
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c

index fb5840a6254887eae6258ef6061c162c7df75a01..f10029821e1672f7038bc8f86ae43c393835b8f4 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -48,16 +48,17 @@ void test_bpf_obj_id(void)
                 /* test_obj_id.o is a dumb prog. It should never fail
                  * to load.
                  */
-               if (err)
-                       error_cnt++;
-               assert(!err);
+               if (CHECK_FAIL(err))
+                       continue;
  
                 /* Insert a magic value to the map */
                 map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
-               assert(map_fds[i] >= 0);
+               if (CHECK_FAIL(map_fds[i] < 0))
+                       goto done;
                 err = bpf_map_update_elem(map_fds[i], &array_key,
                                           &array_magic_value, 0);
-               assert(!err);
+               if (CHECK_FAIL(err))
+                       goto done;
  
                 /* Check getting map info */
                 info_len = sizeof(struct bpf_map_info) * 2;
@@ -96,9 +97,11 @@ void test_bpf_obj_id(void)
                 prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
                 prog_infos[i].nr_map_ids = 2;
                 err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
-               assert(!err);
+               if (CHECK_FAIL(err))
+                       goto done;
                 err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
-               assert(!err);
+               if (CHECK_FAIL(err))
+                       goto done;
                 err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
                                              &info_len);
                 load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
@@ -224,7 +227,8 @@ void test_bpf_obj_id(void)
                 nr_id_found++;
  
                 err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
-               assert(!err);
+               if (CHECK_FAIL(err))
+                       goto done;
  
                 err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
                 CHECK(err || info_len != sizeof(struct bpf_map_info) ||
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c

index 1a1eae356f81b3f25f81d5cad76fd178c907025a..1c01ee2600a97ce02a0b622dc51f21c30b5c3e07 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -28,8 +28,6 @@ static int check_load(const char *file, enum bpf_prog_type type)
         attr.prog_flags = BPF_F_TEST_RND_HI32;
         err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
         bpf_object__close(obj);
-       if (err)
-               error_cnt++;
         return err;
  }
  
@@ -105,12 +103,7 @@ void test_bpf_verif_scale(void)
                         continue;
  
                 err = check_load(test->file, test->attach_type);
-               if (test->fails) { /* expected to fail */
-                       if (err)
-                               error_cnt--;
-                       else
-                               error_cnt++;
-               }
+               CHECK_FAIL(err && !test->fails);
         }
  
         if (env.verifier_stats)
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c

index 6892b88ae0652404ad9d3fafce6a95f05251d239..92563898867cb690e8d8193756603f4a8683ee81 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -344,7 +344,6 @@ struct test tests[] = {
                         .tcp.dest = 8080,
                 },
                 .keys = {
-                       .nhoff = 0,
                         .nhoff = ETH_HLEN,
                         .thoff = ETH_HLEN + sizeof(struct iphdr) +
                                 sizeof(struct iphdr),
@@ -452,10 +451,8 @@ void test_flow_dissector(void)
  
         err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
                             "jmp_table", "last_dissection", &prog_fd, &keys_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         for (i = 0; i < ARRAY_SIZE(tests); i++) {
                 struct bpf_flow_keys flow_keys;
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c

index 3d59b3c841fee8def11939484b771dd5ad03b9e2..eba9a970703b6a0481eb749ed4e6dfa24f1f9d3e 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -135,10 +135,7 @@ void test_get_stack_raw_tp(void)
                 exp_cnt -= err;
         }
  
-       goto close_prog_noerr;
  close_prog:
-       error_cnt++;
-close_prog_noerr:
         if (!IS_ERR_OR_NULL(link))
                 bpf_link__destroy(link);
         if (!IS_ERR_OR_NULL(pb))
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c

index d011079fb0bfe6dbd6c963c342787fc25e97b819..c680926fce7384914aa34eb368b4c5ebc83c3c65 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -7,10 +7,8 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
         uint64_t num;
  
         map_fd = bpf_find_map(__func__, obj, "result_number");
-       if (map_fd < 0) {
-               error_cnt++;
+       if (CHECK_FAIL(map_fd < 0))
                 return;
-       }
  
         struct {
                 char *name;
@@ -44,10 +42,8 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration)
         char str[32];
  
         map_fd = bpf_find_map(__func__, obj, "result_string");
-       if (map_fd < 0) {
-               error_cnt++;
+       if (CHECK_FAIL(map_fd < 0))
                 return;
-       }
  
         struct {
                 char *name;
@@ -81,10 +77,8 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
         struct foo val;
  
         map_fd = bpf_find_map(__func__, obj, "result_struct");
-       if (map_fd < 0) {
-               error_cnt++;
+       if (CHECK_FAIL(map_fd < 0))
                 return;
-       }
  
         struct {
                 char *name;
@@ -112,16 +106,12 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
         __u8 *buff;
  
         map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
-       if (!map || !bpf_map__is_internal(map)) {
-               error_cnt++;
+       if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
                 return;
-       }
  
         map_fd = bpf_map__fd(map);
-       if (map_fd < 0) {
-               error_cnt++;
+       if (CHECK_FAIL(map_fd < 0))
                 return;
-       }
  
         buff = malloc(bpf_map__def(map)->value_size);
         if (buff)
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c

index 20ddca830e6838284321523d087a0e83fb347962..eaf64595be8810aaaaf0a6fff5665ebf9a3f018e 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -30,10 +30,8 @@ static void test_l4lb(const char *file)
         u32 *magic = (u32 *)buf;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         map_fd = bpf_find_map(__func__, obj, "vip_map");
         if (map_fd < 0)
@@ -72,10 +70,9 @@ static void test_l4lb(const char *file)
                 bytes += stats[i].bytes;
                 pkts += stats[i].pkts;
         }
-       if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
-               error_cnt++;
+       if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
+                      pkts != NUM_ITER * 2))
                 printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
-       }
  out:
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c

index ee99368c595ca0b0768ad7938212bc80977bacf1..8f91f1881d114b1e13cc239efb22180c28fcad88 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -8,14 +8,12 @@ static void *parallel_map_access(void *arg)
  
         for (i = 0; i < 10000; i++) {
                 err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK);
-               if (err) {
+               if (CHECK_FAIL(err)) {
                         printf("lookup failed\n");
-                       error_cnt++;
                         goto out;
                 }
-               if (vars[0] != 0) {
+               if (CHECK_FAIL(vars[0] != 0)) {
                         printf("lookup #%d var[0]=%d\n", i, vars[0]);
-                       error_cnt++;
                         goto out;
                 }
                 rnd = vars[1];
@@ -24,7 +22,7 @@ static void *parallel_map_access(void *arg)
                                 continue;
                         printf("lookup #%d var[1]=%d var[%d]=%d\n",
                                i, rnd, j, vars[j]);
-                       error_cnt++;
+                       CHECK_FAIL(vars[j] != rnd);
                         goto out;
                 }
         }
@@ -42,34 +40,36 @@ void test_map_lock(void)
         void *ret;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
-       if (err) {
+       if (CHECK_FAIL(err)) {
                 printf("test_map_lock:bpf_prog_load errno %d\n", errno);
                 goto close_prog;
         }
         map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
-       if (map_fd[0] < 0)
+       if (CHECK_FAIL(map_fd[0] < 0))
                 goto close_prog;
         map_fd[1] = bpf_find_map(__func__, obj, "array_map");
-       if (map_fd[1] < 0)
+       if (CHECK_FAIL(map_fd[1] < 0))
                 goto close_prog;
  
         bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK);
  
         for (i = 0; i < 4; i++)
-               assert(pthread_create(&thread_id[i], NULL,
-                                     &spin_lock_thread, &prog_fd) == 0);
+               if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+                                             &spin_lock_thread, &prog_fd)))
+                       goto close_prog;
         for (i = 4; i < 6; i++)
-               assert(pthread_create(&thread_id[i], NULL,
-                                     &parallel_map_access, &map_fd[i - 4]) == 0);
+               if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+                                             &parallel_map_access,
+                                             &map_fd[i - 4])))
+                       goto close_prog;
         for (i = 0; i < 4; i++)
-               assert(pthread_join(thread_id[i], &ret) == 0 &&
-                      ret == (void *)&prog_fd);
+               if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+                              ret != (void *)&prog_fd))
+                       goto close_prog;
         for (i = 4; i < 6; i++)
-               assert(pthread_join(thread_id[i], &ret) == 0 &&
-                      ret == (void *)&map_fd[i - 4]);
-       goto close_prog_noerr;
+               if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+                              ret != (void *)&map_fd[i - 4]))
+                       goto close_prog;
  close_prog:
-       error_cnt++;
-close_prog_noerr:
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c

index 4ecfd721a044bb6b53b616732219ffa21eb146b4..a2537dfa899c6707e6ac46d074de9621bc6d96f6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -9,10 +9,8 @@ void test_pkt_access(void)
         int err, prog_fd;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
                                 NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c

index ac0d434358061a57eccd854fa1d1ad0c752d451c..5f7aea6050199bf8b2c0dffce6d0cbd424a005ab 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -9,10 +9,8 @@ void test_pkt_md_access(void)
         int err, prog_fd;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
                                 NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c

index e60cd5ff1f559543d05a2963967f4624f708d094..faccc66f4e3968af3c4efd0a03c33719f9d151dc 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -27,10 +27,8 @@ static void test_queue_stack_map_by_type(int type)
                 return;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         map_in_fd = bpf_find_map(__func__, obj, "map_in");
         if (map_in_fd < 0)
@@ -43,10 +41,8 @@ static void test_queue_stack_map_by_type(int type)
         /* Push 32 elements to the input map */
         for (i = 0; i < MAP_SIZE; i++) {
                 err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
-               if (err) {
-                       error_cnt++;
+               if (CHECK_FAIL(err))
                         goto out;
-               }
         }
  
         /* The eBPF program pushes iph.saddr in the output map,
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c

index 4a4f428d1a78e1732c1e71384017b9b1379624df..5c78e2b5a917421d0f8c2d21a91584b3514379a7 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -10,10 +10,8 @@ void test_reference_tracking(void)
         int err = 0;
  
         obj = bpf_object__open(file);
-       if (IS_ERR(obj)) {
-               error_cnt++;
+       if (CHECK_FAIL(IS_ERR(obj)))
                 return;
-       }
  
         bpf_object__for_each_program(prog, obj) {
                 const char *title;
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c

index 1575f0a1f58659752e848848fe23e475daa16c9d..b607112c64e7ac90781b38b16c90921132f61dc6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -8,7 +8,7 @@ static void sigusr1_handler(int signum)
         sigusr1_received++;
  }
  
-static int test_send_signal_common(struct perf_event_attr *attr,
+static void test_send_signal_common(struct perf_event_attr *attr,
                                     int prog_type,
                                     const char *test_name)
  {
@@ -23,13 +23,13 @@ static int test_send_signal_common(struct perf_event_attr *attr,
  
         if (CHECK(pipe(pipe_c2p), test_name,
                   "pipe pipe_c2p error: %s\n", strerror(errno)))
-               goto no_fork_done;
+               return;
  
         if (CHECK(pipe(pipe_p2c), test_name,
                   "pipe pipe_p2c error: %s\n", strerror(errno))) {
                 close(pipe_c2p[0]);
                 close(pipe_c2p[1]);
-               goto no_fork_done;
+               return;
         }
  
         pid = fork();
@@ -38,7 +38,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
                 close(pipe_c2p[1]);
                 close(pipe_p2c[0]);
                 close(pipe_p2c[1]);
-               goto no_fork_done;
+               return;
         }
  
         if (pid == 0) {
@@ -125,7 +125,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
                 goto disable_pmu;
         }
  
-       err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
+       CHECK(buf[0] != '2', test_name, "incorrect result\n");
  
         /* notify child safe to exit */
         write(pipe_p2c[1], buf, 1);
@@ -138,11 +138,9 @@ prog_load_failure:
         close(pipe_c2p[0]);
         close(pipe_p2c[1]);
         wait(NULL);
-no_fork_done:
-       return err;
  }
  
-static int test_send_signal_tracepoint(void)
+static void test_send_signal_tracepoint(void)
  {
         const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
         struct perf_event_attr attr = {
@@ -159,21 +157,21 @@ static int test_send_signal_tracepoint(void)
         if (CHECK(efd < 0, "tracepoint",
                   "open syscalls/sys_enter_nanosleep/id failure: %s\n",
                   strerror(errno)))
-               return -1;
+               return;
  
         bytes = read(efd, buf, sizeof(buf));
         close(efd);
         if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
                   "read syscalls/sys_enter_nanosleep/id failure: %s\n",
                   strerror(errno)))
-               return -1;
+               return;
  
         attr.config = strtol(buf, NULL, 0);
  
-       return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+       test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
  }
  
-static int test_send_signal_perf(void)
+static void test_send_signal_perf(void)
  {
         struct perf_event_attr attr = {
                 .sample_period = 1,
@@ -181,11 +179,11 @@ static int test_send_signal_perf(void)
                 .config = PERF_COUNT_SW_CPU_CLOCK,
         };
  
-       return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
-                                      "perf_sw_event");
+       test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
+                               "perf_sw_event");
  }
  
-static int test_send_signal_nmi(void)
+static void test_send_signal_nmi(void)
  {
         struct perf_event_attr attr = {
                 .sample_freq = 50,
@@ -204,25 +202,24 @@ static int test_send_signal_nmi(void)
                 if (errno == ENOENT) {
                         printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
                                __func__);
-                       return 0;
+                       test__skip();
+                       return;
                 }
                 /* Let the test fail with a more informative message */
         } else {
                 close(pmu_fd);
         }
  
-       return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
-                                      "perf_hw_event");
+       test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
+                               "perf_hw_event");
  }
  
  void test_send_signal(void)
  {
-       int ret = 0;
-
         if (test__start_subtest("send_signal_tracepoint"))
-               ret |= test_send_signal_tracepoint();
+               test_send_signal_tracepoint();
         if (test__start_subtest("send_signal_perf"))
-               ret |= test_send_signal_perf();
+               test_send_signal_perf();
         if (test__start_subtest("send_signal_nmi"))
-               ret |= test_send_signal_nmi();
+               test_send_signal_nmi();
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c

index 114ebe6a438e562d864971a5a5d174b1e0936f8a..1ae00cd3174ef8723c98c88350b85ccad7466ac2 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -11,19 +11,19 @@ void test_spinlock(void)
         void *ret;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
-       if (err) {
+       if (CHECK_FAIL(err)) {
                 printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
                 goto close_prog;
         }
         for (i = 0; i < 4; i++)
-               assert(pthread_create(&thread_id[i], NULL,
-                                     &spin_lock_thread, &prog_fd) == 0);
+               if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+                                             &spin_lock_thread, &prog_fd)))
+                       goto close_prog;
+
         for (i = 0; i < 4; i++)
-               assert(pthread_join(thread_id[i], &ret) == 0 &&
-                      ret == (void *)&prog_fd);
-       goto close_prog_noerr;
+               if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+                              ret != (void *)&prog_fd))
+                       goto close_prog;
  close_prog:
-       error_cnt++;
-close_prog_noerr:
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c

index ac44fda84833b4ca1f9a2b409cc8f7295de6ead8..d841dced971ff65458a4296573b2d56594a4291c 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -51,9 +51,10 @@ retry:
                   "err %d errno %d\n", err, errno))
                 goto disable_pmu;
  
-       assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
-              == 0);
-       assert(system("./urandom_read") == 0);
+       if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+               goto disable_pmu;
+       if (CHECK_FAIL(system("./urandom_read")))
+               goto disable_pmu;
         /* disable stack trace collection */
         key = 0;
         val = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c

index 9557b7dfb78270c46fdf38fb53980edfbc76d49b..f62aa0eb959bb0fc51bee1c2c3f66007818e311b 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -82,9 +82,10 @@ retry:
                   "err %d errno %d\n", err, errno))
                 goto disable_pmu;
  
-       assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
-              == 0);
-       assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+       if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+               goto disable_pmu;
+       if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000")))
+               goto disable_pmu;
         /* disable stack trace collection */
         key = 0;
         val = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c

index fc539335c5b3ecc14c953ffddcf16d92d56e2791..37269d23df93e6f5b26a20e190a0108db90e1633 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -26,19 +26,19 @@ void test_stacktrace_map(void)
  
         /* find map fds */
         control_map_fd = bpf_find_map(__func__, obj, "control_map");
-       if (control_map_fd < 0)
+       if (CHECK_FAIL(control_map_fd < 0))
                 goto disable_pmu;
  
         stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
-       if (stackid_hmap_fd < 0)
+       if (CHECK_FAIL(stackid_hmap_fd < 0))
                 goto disable_pmu;
  
         stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
-       if (stackmap_fd < 0)
+       if (CHECK_FAIL(stackmap_fd < 0))
                 goto disable_pmu;
  
         stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
-       if (stack_amap_fd < 0)
+       if (CHECK_FAIL(stack_amap_fd < 0))
                 goto disable_pmu;
  
         /* give some time for bpf program run */
@@ -55,23 +55,20 @@ void test_stacktrace_map(void)
         err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
         if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
                   "err %d errno %d\n", err, errno))
-               goto disable_pmu_noerr;
+               goto disable_pmu;
  
         err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
         if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
                   "err %d errno %d\n", err, errno))
-               goto disable_pmu_noerr;
+               goto disable_pmu;
  
         stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
         err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
         if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
                   "err %d errno %d\n", err, errno))
-               goto disable_pmu_noerr;
+               goto disable_pmu;
  
-       goto disable_pmu_noerr;
  disable_pmu:
-       error_cnt++;
-disable_pmu_noerr:
         bpf_link__destroy(link);
  close_prog:
         bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c

index fbfa8e76cf631b36afea5d322fcf91e13c9cebe9..404a5498e1a35705ab3c47aa6cefe65d12b5bb83 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -26,15 +26,15 @@ void test_stacktrace_map_raw_tp(void)
  
         /* find map fds */
         control_map_fd = bpf_find_map(__func__, obj, "control_map");
-       if (control_map_fd < 0)
+       if (CHECK_FAIL(control_map_fd < 0))
                 goto close_prog;
  
         stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
-       if (stackid_hmap_fd < 0)
+       if (CHECK_FAIL(stackid_hmap_fd < 0))
                 goto close_prog;
  
         stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
-       if (stackmap_fd < 0)
+       if (CHECK_FAIL(stackmap_fd < 0))
                 goto close_prog;
  
         /* give some time for bpf program run */
@@ -58,10 +58,7 @@ void test_stacktrace_map_raw_tp(void)
                   "err %d errno %d\n", err, errno))
                 goto close_prog;
  
-       goto close_prog_noerr;
  close_prog:
-       error_cnt++;
-close_prog_noerr:
         if (!IS_ERR_OR_NULL(link))
                 bpf_link__destroy(link);
         bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c

index 958a3d88de9958f771f26b4f1b9bdb30234eb5b8..1bdc1d86a50c85906f771042dae01e1c3621f1c3 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -70,9 +70,6 @@ void test_task_fd_query_rawtp(void)
         if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
                 goto close_prog;
  
-       goto close_prog_noerr;
  close_prog:
-       error_cnt++;
-close_prog_noerr:
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c

index f9b70e81682b7e8d57b82b0755c3c90f480300c8..3f131b8fe328a7e4e2240ec5f0164ef518b30801 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -62,14 +62,9 @@ static void test_task_fd_query_tp_core(const char *probe_name,
                   fd_type, buf))
                 goto close_pmu;
  
-       close(pmu_fd);
-       goto close_prog_noerr;
-
  close_pmu:
         close(pmu_fd);
  close_prog:
-       error_cnt++;
-close_prog_noerr:
         bpf_object__close(obj);
  }
  
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c

index bb8759d69099c7a948877110bf2420f3858a5454..594307dffd13bc42b66cd3bae370a2ac70806ad6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -10,10 +10,8 @@ void test_tcp_estats(void)
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
         CHECK(err, "", "err %d errno %d\n", err, errno);
-       if (err) {
-               error_cnt++;
+       if (err)
                 return;
-       }
  
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c

index a74167289545b85c418b3e454335fb06e8065427..dcb5ecac778e8a2cf07d6103f25a5252fe6fd1cd 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -16,10 +16,8 @@ void test_xdp(void)
         int err, prog_fd, map_fd;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         map_fd = bpf_find_map(__func__, obj, "vip2tnl");
         if (map_fd < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c

index 922aa0a1976425993719ac46e8909debd0143c46..3744196d7cba923d249bfe541679210ab09a78eb 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -10,10 +10,8 @@ void test_xdp_adjust_tail(void)
         int err, prog_fd;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                 buf, &size, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c

index 15f7c272edb0ab419075cc134927fd1189a38f2e..c9404e6b226ee7f0c652a55b3eda3e1e3f65cf7f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -31,10 +31,8 @@ void test_xdp_noinline(void)
         u32 *magic = (u32 *)buf;
  
         err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (err) {
-               error_cnt++;
+       if (CHECK_FAIL(err))
                 return;
-       }
  
         map_fd = bpf_find_map(__func__, obj, "vip_map");
         if (map_fd < 0)
@@ -73,8 +71,8 @@ void test_xdp_noinline(void)
                 bytes += stats[i].bytes;
                 pkts += stats[i].pkts;
         }
-       if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
-               error_cnt++;
+       if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
+                      pkts != NUM_ITER * 2)) {
                 printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
                        bytes, pkts);
         }
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c

new file mode 100644 (file)

index 0000000..dede0fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM                     0xdeadbeef
+#define CUSTOM_INHERIT1                        0
+#define CUSTOM_INHERIT2                        1
+#define CUSTOM_LISTENER                        2
+
+struct sockopt_inherit {
+       __u8 val;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+       __type(key, int);
+       __type(value, struct sockopt_inherit);
+} cloned1_map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+       __type(key, int);
+       __type(value, struct sockopt_inherit);
+} cloned2_map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, struct sockopt_inherit);
+} listener_only_map SEC(".maps");
+
+static __inline struct sockopt_inherit *get_storage(struct bpf_sockopt *ctx)
+{
+       if (ctx->optname == CUSTOM_INHERIT1)
+               return bpf_sk_storage_get(&cloned1_map, ctx->sk, 0,
+                                         BPF_SK_STORAGE_GET_F_CREATE);
+       else if (ctx->optname == CUSTOM_INHERIT2)
+               return bpf_sk_storage_get(&cloned2_map, ctx->sk, 0,
+                                         BPF_SK_STORAGE_GET_F_CREATE);
+       else
+               return bpf_sk_storage_get(&listener_only_map, ctx->sk, 0,
+                                         BPF_SK_STORAGE_GET_F_CREATE);
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+       __u8 *optval_end = ctx->optval_end;
+       struct sockopt_inherit *storage;
+       __u8 *optval = ctx->optval;
+
+       if (ctx->level != SOL_CUSTOM)
+               return 1; /* only interested in SOL_CUSTOM */
+
+       if (optval + 1 > optval_end)
+               return 0; /* EPERM, bounds check */
+
+       storage = get_storage(ctx);
+       if (!storage)
+               return 0; /* EPERM, couldn't get sk storage */
+
+       ctx->retval = 0; /* Reset system call return value to zero */
+
+       optval[0] = storage->val;
+       ctx->optlen = 1;
+
+       return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+       __u8 *optval_end = ctx->optval_end;
+       struct sockopt_inherit *storage;
+       __u8 *optval = ctx->optval;
+
+       if (ctx->level != SOL_CUSTOM)
+               return 1; /* only interested in SOL_CUSTOM */
+
+       if (optval + 1 > optval_end)
+               return 0; /* EPERM, bounds check */
+
+       storage = get_storage(ctx);
+       if (!storage)
+               return 0; /* EPERM, couldn't get sk storage */
+
+       storage->val = optval[0];
+       ctx->optlen = -1;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c

index a334a0e882e46006eaad43ba09a3e75bf73be525..41a3ebcd593dc766cd81d81108a4b9888a3cf34b 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -12,10 +12,6 @@
  
  #define SR6_FLAG_ALERT (1 << 4)
  
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
-                               0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
-                               0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
  #define BPF_PACKET_HEADER __attribute__((packed))
  
  struct ip6_t {
@@ -276,8 +272,8 @@ int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
                         return 0;
  
                 // check if egress TLV value is correct
-               if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
-                               ntohll(egr_addr.lo) == 0x4)
+               if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 &&
+                   bpf_be64_to_cpu(egr_addr.lo) == 0x4)
                         return 1;
         }
  
@@ -308,8 +304,8 @@ int __encap_srh(struct __sk_buff *skb)
  
         #pragma clang loop unroll(full)
         for (unsigned long long lo = 0; lo < 4; lo++) {
-               seg->lo = htonll(4 - lo);
-               seg->hi = htonll(hi);
+               seg->lo = bpf_cpu_to_be64(4 - lo);
+               seg->hi = bpf_cpu_to_be64(hi);
                 seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
         }
  
@@ -349,8 +345,8 @@ int __add_egr_x(struct __sk_buff *skb)
         if (err)
                 return BPF_DROP;
  
-       addr.lo = htonll(lo);
-       addr.hi = htonll(hi);
+       addr.lo = bpf_cpu_to_be64(lo);
+       addr.hi = bpf_cpu_to_be64(hi);
         err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
                                   (void *)&addr, sizeof(addr));
         if (err)
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c

index 1dbe1d4d467e7cfc00506948baabfbf14d3cad5b..c4d104428643ea3b9b960c0ba1be98badc77f306 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -12,10 +12,6 @@
  
  #define SR6_FLAG_ALERT (1 << 4)
  
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
-                               0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
-                               0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
  #define BPF_PACKET_HEADER __attribute__((packed))
  
  struct ip6_t {
@@ -251,8 +247,8 @@ int __add_egr_x(struct __sk_buff *skb)
         if (err)
                 return BPF_DROP;
  
-       addr.lo = htonll(lo);
-       addr.hi = htonll(hi);
+       addr.lo = bpf_cpu_to_be64(lo);
+       addr.hi = bpf_cpu_to_be64(hi);
         err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
                                   (void *)&addr, sizeof(addr));
         if (err)
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh

new file mode 100755 (executable)

index 0000000..4ba5a34
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+ERROR=0
+TMPDIR=
+
+# If one build fails, continue but return non-0 on exit.
+return_value() {
+       if [ -d "$TMPDIR" ] ; then
+               rm -rf -- $TMPDIR
+       fi
+       exit $ERROR
+}
+trap return_value EXIT
+
+case $1 in
+       -h|--help)
+               echo -e "$0 [-j <n>]"
+               echo -e "\tTest the different ways of building bpftool."
+               echo -e ""
+               echo -e "\tOptions:"
+               echo -e "\t\t-j <n>:\tPass -j flag to 'make'."
+               exit
+               ;;
+esac
+
+J=$*
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+check() {
+       local dir=$(realpath $1)
+
+       echo -n "binary:  "
+       # Returns non-null if file is found (and "false" is run)
+       find $dir -type f -executable -name bpftool -print -exec false {} + && \
+               ERROR=1 && printf "FAILURE: Did not find bpftool\n"
+}
+
+make_and_clean() {
+       echo -e "\$PWD:    $PWD"
+       echo -e "command: make -s $* >/dev/null"
+       make $J -s $* >/dev/null
+       if [ $? -ne 0 ] ; then
+               ERROR=1
+       fi
+       if [ $# -ge 1 ] ; then
+               check ${@: -1}
+       else
+               check .
+       fi
+       (
+               if [ $# -ge 1 ] ; then
+                       cd ${@: -1}
+               fi
+               make -s clean
+       )
+       echo
+}
+
+make_with_tmpdir() {
+       local ARGS
+
+       TMPDIR=$(mktemp -d)
+       if [ $# -ge 2 ] ; then
+               ARGS=${@:1:(($# - 1))}
+       fi
+       echo -e "\$PWD:    $PWD"
+       echo -e "command: make -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null"
+       make $J -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null
+       if [ $? -ne 0 ] ; then
+               ERROR=1
+       fi
+       check $TMPDIR
+       rm -rf -- $TMPDIR
+       echo
+}
+
+echo "Trying to build bpftool"
+echo -e "... through kbuild\n"
+
+if [ -f ".config" ] ; then
+       make_and_clean tools/bpf
+
+       ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
+       ## down from toplevel Makefile to bpftool's Makefile.
+
+       # make_with_tmpdir tools/bpf OUTPUT
+       echo -e "skip:    make tools/bpf OUTPUT=<dir> (not supported)\n"
+
+       make_with_tmpdir tools/bpf O
+else
+       echo -e "skip:    make tools/bpf (no .config found)\n"
+       echo -e "skip:    make tools/bpf OUTPUT=<dir> (not supported)\n"
+       echo -e "skip:    make tools/bpf O=<dir> (no .config found)\n"
+fi
+
+echo -e "... from kernel source tree\n"
+
+make_and_clean -C tools/bpf/bpftool
+
+make_with_tmpdir -C tools/bpf/bpftool OUTPUT
+
+make_with_tmpdir -C tools/bpf/bpftool O
+
+echo -e "... from tools/\n"
+cd tools/
+
+make_and_clean bpf
+
+## In tools/bpf/Makefile, function "descend" is called and passes $(O) and
+## $(OUTPUT). We would like $(OUTPUT) to have "bpf/bpftool/" appended before
+## calling bpftool's Makefile, but this is not the case as the "descend"
+## function focuses on $(O)/$(subdir). However, in the present case, updating
+## $(O) to have $(OUTPUT) recomputed from it in bpftool's Makefile does not
+## work, because $(O) is not defined from command line and $(OUTPUT) is not
+## updated in tools/scripts/Makefile.include.
+##
+## Workarounds would require to a) edit "descend" or use an alternative way to
+## call bpftool's Makefile, b) modify the conditions to update $(OUTPUT) and
+## other variables in tools/scripts/Makefile.include (at the risk of breaking
+## the build of other tools), or c) append manually the "bpf/bpftool" suffix to
+## $(OUTPUT) in bpf's Makefile, which may break if targets for other directories
+## use "descend" in the future.
+
+# make_with_tmpdir bpf OUTPUT
+echo -e "skip:    make bpf OUTPUT=<dir> (not supported)\n"
+
+make_with_tmpdir bpf O
+
+echo -e "... from bpftool's dir\n"
+cd bpf/bpftool
+
+make_and_clean
+
+make_with_tmpdir OUTPUT
+
+make_with_tmpdir O
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py

index 425f9ed27c3b1b9de7e733358c3035b595dbef21..15a666329a34df435dad7021845c0cbb377fb615 100755 (executable)
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1353,7 +1353,7 @@ try:
      bpftool_prog_list_wait(expected=1)
  
      ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
-    fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+    fail(ifnameB != simB1['ifname'], "program not bound to original device")
      simB1.remove()
      bpftool_prog_list_wait(expected=1)
  
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c

index 12895d03d58b0d5f7e0f0e40d95e85577dab2840..e8616e778cb5070cd6a1685f459499e09677b28f 100644 (file)
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -8,22 +8,20 @@
  
  /* defined in test_progs.h */
  struct test_env env;
-int error_cnt, pass_cnt;
  
  struct prog_test_def {
         const char *test_name;
         int test_num;
         void (*run_test)(void);
         bool force_log;
-       int pass_cnt;
         int error_cnt;
+       int skip_cnt;
         bool tested;
  
         const char *subtest_name;
         int subtest_num;
  
         /* store counts before subtest started */
-       int old_pass_cnt;
         int old_error_cnt;
  };
  
@@ -47,6 +45,7 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
  
         if (env.verbose || test->force_log || failed) {
                 if (env.log_cnt) {
+                       env.log_buf[env.log_cnt] = '\0';
                         fprintf(env.stdout, "%s", env.log_buf);
                         if (env.log_buf[env.log_cnt - 1] != '\n')
                                 fprintf(env.stdout, "\n");
@@ -56,15 +55,24 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
         fseeko(stdout, 0, SEEK_SET); /* rewind */
  }
  
+static void skip_account(void)
+{
+       if (env.test->skip_cnt) {
+               env.skip_cnt++;
+               env.test->skip_cnt = 0;
+       }
+}
+
  void test__end_subtest()
  {
         struct prog_test_def *test = env.test;
-       int sub_error_cnt = error_cnt - test->old_error_cnt;
+       int sub_error_cnt = test->error_cnt - test->old_error_cnt;
  
         if (sub_error_cnt)
                 env.fail_cnt++;
         else
                 env.sub_succ_cnt++;
+       skip_account();
  
         dump_test_log(test, sub_error_cnt);
  
@@ -95,8 +103,7 @@ bool test__start_subtest(const char *name)
                 return false;
  
         test->subtest_name = name;
-       env.test->old_pass_cnt = pass_cnt;
-       env.test->old_error_cnt = error_cnt;
+       env.test->old_error_cnt = env.test->error_cnt;
  
         return true;
  }
@@ -105,6 +112,16 @@ void test__force_log() {
         env.test->force_log = true;
  }
  
+void test__skip(void)
+{
+       env.test->skip_cnt++;
+}
+
+void test__fail(void)
+{
+       env.test->error_cnt++;
+}
+
  struct ipv4_packet pkt_v4 = {
         .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
         .iph.ihl = 5,
@@ -129,7 +146,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
         map = bpf_object__find_map_by_name(obj, name);
         if (!map) {
                 printf("%s:FAIL:map '%s' not found\n", test, name);
-               error_cnt++;
+               test__fail();
                 return -1;
         }
         return bpf_map__fd(map);
@@ -488,8 +505,6 @@ int main(int argc, char **argv)
         stdio_hijack();
         for (i = 0; i < prog_test_cnt; i++) {
                 struct prog_test_def *test = &prog_test_defs[i];
-               int old_pass_cnt = pass_cnt;
-               int old_error_cnt = error_cnt;
  
                 env.test = test;
                 test->test_num = i + 1;
@@ -504,12 +519,11 @@ int main(int argc, char **argv)
                         test__end_subtest();
  
                 test->tested = true;
-               test->pass_cnt = pass_cnt - old_pass_cnt;
-               test->error_cnt = error_cnt - old_error_cnt;
                 if (test->error_cnt)
                         env.fail_cnt++;
                 else
                         env.succ_cnt++;
+               skip_account();
  
                 dump_test_log(test, test->error_cnt);
  
@@ -518,11 +532,11 @@ int main(int argc, char **argv)
                         test->error_cnt ? "FAIL" : "OK");
         }
         stdio_restore();
-       printf("Summary: %d/%d PASSED, %d FAILED\n",
-              env.succ_cnt, env.sub_succ_cnt, env.fail_cnt);
+       printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+              env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
  
         free(env.test_selector.num_set);
         free(env.subtest_selector.num_set);
  
-       return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+       return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
  }
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h

index 37d427f5a1e5c68a62e8d71f95b79905e96acace..c8edb9464ba637c03412ef2ceadc4bab55eab544 100644 (file)
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -38,8 +38,6 @@ typedef __u16 __sum16;
  #include "trace_helpers.h"
  #include "flow_dissector_load.h"
  
-struct prog_test_def;
-
  struct test_selector {
         const char *name;
         bool *num_set;
@@ -64,14 +62,15 @@ struct test_env {
         int succ_cnt; /* successful tests */
         int sub_succ_cnt; /* successful sub-tests */
         int fail_cnt; /* total failed tests + sub-tests */
+       int skip_cnt; /* skipped tests */
  };
  
-extern int error_cnt;
-extern int pass_cnt;
  extern struct test_env env;
  
  extern void test__force_log();
  extern bool test__start_subtest(const char *name);
+extern void test__skip(void);
+extern void test__fail(void);
  
  #define MAGIC_BYTES 123
  
@@ -94,17 +93,25 @@ extern struct ipv6_packet pkt_v6;
  #define _CHECK(condition, tag, duration, format...) ({                 \
         int __ret = !!(condition);                                      \
         if (__ret) {                                                    \
-               error_cnt++;                                            \
+               test__fail();                                           \
                 printf("%s:FAIL:%s ", __func__, tag);                   \
                 printf(format);                                         \
         } else {                                                        \
-               pass_cnt++;                                             \
                 printf("%s:PASS:%s %d nsec\n",                          \
                        __func__, tag, duration);                        \
         }                                                               \
         __ret;                                                          \
  })
  
+#define CHECK_FAIL(condition) ({                                       \
+       int __ret = !!(condition);                                      \
+       if (__ret) {                                                    \
+               test__fail();                                           \
+               printf("%s:FAIL:%d\n", __func__, __LINE__);             \
+       }                                                               \
+       __ret;                                                          \
+})
+
  #define CHECK(condition, tag, format...) \
         _CHECK(condition, tag, duration, format)
  #define CHECK_ATTR(condition, tag, format...) \
diff --git a/tools/testing/selftests/bpf/test_sockopt_inherit.c b/tools/testing/selftests/bpf/test_sockopt_inherit.c

new file mode 100644 (file)

index 0000000..1bf6998
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_inherit.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pthread.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH                                "/sockopt_inherit"
+#define SOL_CUSTOM                     0xdeadbeef
+#define CUSTOM_INHERIT1                        0
+#define CUSTOM_INHERIT2                        1
+#define CUSTOM_LISTENER                        2
+
+static int connect_to_server(int server_fd)
+{
+       struct sockaddr_storage addr;
+       socklen_t len = sizeof(addr);
+       int fd;
+
+       fd = socket(AF_INET, SOCK_STREAM, 0);
+       if (fd < 0) {
+               log_err("Failed to create client socket");
+               return -1;
+       }
+
+       if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+               log_err("Failed to get server addr");
+               goto out;
+       }
+
+       if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+               log_err("Fail to connect to server");
+               goto out;
+       }
+
+       return fd;
+
+out:
+       close(fd);
+       return -1;
+}
+
+static int verify_sockopt(int fd, int optname, const char *msg, char expected)
+{
+       socklen_t optlen = 1;
+       char buf = 0;
+       int err;
+
+       err = getsockopt(fd, SOL_CUSTOM, optname, &buf, &optlen);
+       if (err) {
+               log_err("%s: failed to call getsockopt", msg);
+               return 1;
+       }
+
+       printf("%s %d: got=0x%x ? expected=0x%x\n", msg, optname, buf, expected);
+
+       if (buf != expected) {
+               log_err("%s: unexpected getsockopt value %d != %d", msg,
+                       buf, expected);
+               return 1;
+       }
+
+       return 0;
+}
+
+static void *server_thread(void *arg)
+{
+       struct sockaddr_storage addr;
+       socklen_t len = sizeof(addr);
+       int fd = *(int *)arg;
+       int client_fd;
+       int err = 0;
+
+       if (listen(fd, 1) < 0)
+               error(1, errno, "Failed to listed on socket");
+
+       err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
+       err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
+       err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
+
+       client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+       if (client_fd < 0)
+               error(1, errno, "Failed to accept client");
+
+       err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
+       err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
+       err += verify_sockopt(client_fd, CUSTOM_LISTENER, "accept", 0);
+
+       close(client_fd);
+
+       return (void *)(long)err;
+}
+
+static int start_server(void)
+{
+       struct sockaddr_in addr = {
+               .sin_family = AF_INET,
+               .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+       };
+       char buf;
+       int err;
+       int fd;
+       int i;
+
+       fd = socket(AF_INET, SOCK_STREAM, 0);
+       if (fd < 0) {
+               log_err("Failed to create server socket");
+               return -1;
+       }
+
+       for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
+               buf = 0x01;
+               err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
+               if (err) {
+                       log_err("Failed to call setsockopt(%d)", i);
+                       close(fd);
+                       return -1;
+               }
+       }
+
+       if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+               log_err("Failed to bind socket");
+               close(fd);
+               return -1;
+       }
+
+       return fd;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+       enum bpf_attach_type attach_type;
+       enum bpf_prog_type prog_type;
+       struct bpf_program *prog;
+       int err;
+
+       err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+       if (err) {
+               log_err("Failed to deduct types for %s BPF program", title);
+               return -1;
+       }
+
+       prog = bpf_object__find_program_by_title(obj, title);
+       if (!prog) {
+               log_err("Failed to find %s BPF program", title);
+               return -1;
+       }
+
+       err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+                             attach_type, 0);
+       if (err) {
+               log_err("Failed to attach %s BPF program", title);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int run_test(int cgroup_fd)
+{
+       struct bpf_prog_load_attr attr = {
+               .file = "./sockopt_inherit.o",
+       };
+       int server_fd = -1, client_fd;
+       struct bpf_object *obj;
+       void *server_err;
+       pthread_t tid;
+       int ignored;
+       int err;
+
+       err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+       if (err) {
+               log_err("Failed to load BPF object");
+               return -1;
+       }
+
+       err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+       if (err)
+               goto close_bpf_object;
+
+       err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+       if (err)
+               goto close_bpf_object;
+
+       server_fd = start_server();
+       if (server_fd < 0) {
+               err = -1;
+               goto close_bpf_object;
+       }
+
+       pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+
+       client_fd = connect_to_server(server_fd);
+       if (client_fd < 0) {
+               err = -1;
+               goto close_server_fd;
+       }
+
+       err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0);
+       err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0);
+       err += verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0);
+
+       pthread_join(tid, &server_err);
+
+       err += (int)(long)server_err;
+
+       close(client_fd);
+
+close_server_fd:
+       close(server_fd);
+close_bpf_object:
+       bpf_object__close(obj);
+       return err;
+}
+
+int main(int args, char **argv)
+{
+       int cgroup_fd;
+       int err = EXIT_SUCCESS;
+
+       if (setup_cgroup_environment())
+               return err;
+
+       cgroup_fd = create_and_get_cgroup(CG_PATH);
+       if (cgroup_fd < 0)
+               goto cleanup_cgroup_env;
+
+       if (join_cgroup(CG_PATH))
+               goto cleanup_cgroup;
+
+       if (run_test(cgroup_fd))
+               err = EXIT_FAILURE;
+
+       printf("test_sockopt_inherit: %s\n",
+              err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+       close(cgroup_fd);
+cleanup_cgroup_env:
+       cleanup_cgroup_environment();
+       return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c

index a3bebd7c68ddc6262c5eaf6021bd8c8304b6a006..fc33ae36b760c5deec76176d66953c24d95852fe 100644 (file)
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -13,6 +13,7 @@
  #include <bpf/bpf.h>
  #include <bpf/libbpf.h>
  
+#include "bpf_endian.h"
  #include "bpf_rlimit.h"
  #include "bpf_util.h"
  #include "cgroup_helpers.h"
@@ -100,7 +101,7 @@ static struct sysctl_test tests[] = {
                 .descr = "ctx:write sysctl:write read ok",
                 .insns = {
                         /* If (write) */
-                       BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+                       BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
                                     offsetof(struct bpf_sysctl, write)),
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
  
@@ -214,7 +215,8 @@ static struct sysctl_test tests[] = {
                         /* if (ret == expected && */
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
                         /*     buf == "tcp_mem\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x7463705f6d656d00ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -255,7 +257,8 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
  
                         /*     buf[0:7] == "tcp_me\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x7463705f6d650000ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -298,12 +301,14 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
  
                         /*     buf[0:8] == "net/ipv4" && */
-                       BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x6e65742f69707634ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
  
                         /*     buf[8:16] == "/tcp_mem" && */
-                       BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x2f7463705f6d656dULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
  
@@ -350,12 +355,14 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
  
                         /*     buf[0:8] == "net/ipv4" && */
-                       BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x6e65742f69707634ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
  
                         /*     buf[8:16] == "/tcp_me\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x2f7463705f6d6500ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -396,7 +403,8 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
  
                         /*     buf[0:8] == "net/ip\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x6e65742f69700000ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -431,7 +439,8 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
  
                         /*     buf[0:6] == "Linux\n\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x4c696e75780a0000ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -469,7 +478,8 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
  
                         /*     buf[0:6] == "Linux\n\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x4c696e75780a0000ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -507,7 +517,8 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
  
                         /*     buf[0:6] == "Linux\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x4c696e7578000000ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -650,7 +661,8 @@ static struct sysctl_test tests[] = {
  
                         /*     buf[0:4] == "606\0") */
                         BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
-                       BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_9,
+                                   bpf_ntohl(0x36303600), 2),
  
                         /* return DENY; */
                         BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -685,17 +697,20 @@ static struct sysctl_test tests[] = {
                         BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
  
                         /*     buf[0:8] == "3000000 " && */
-                       BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x3330303030303020ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
  
                         /*     buf[8:16] == "4000000 " && */
-                       BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x3430303030303020ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
  
                         /*     buf[16:24] == "6000000\0") */
-                       BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
+                       BPF_LD_IMM64(BPF_REG_8,
+                                    bpf_be64_to_cpu(0x3630303030303000ULL)),
                         BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
                         BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
  
@@ -735,7 +750,8 @@ static struct sysctl_test tests[] = {
  
                         /*     buf[0:3] == "60\0") */
                         BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
-                       BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_9,
+                                   bpf_ntohl(0x36300000), 2),
  
                         /* return DENY; */
                         BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -757,7 +773,8 @@ static struct sysctl_test tests[] = {
                         /* sysctl_set_new_value arg2 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x36303000)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
@@ -791,7 +808,7 @@ static struct sysctl_test tests[] = {
                         /* sysctl_set_new_value arg2 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
+                       BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
@@ -825,8 +842,9 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
-                       BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x36303000)),
+                       BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
  
@@ -869,7 +887,8 @@ static struct sysctl_test tests[] = {
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
                         /* "600 602\0" */
-                       BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3630302036303200ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
  
@@ -937,7 +956,8 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x36303000)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -969,8 +989,9 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00373730),
-                       BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x30373700)),
+                       BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
  
@@ -1012,7 +1033,8 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x36303000)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1052,7 +1074,8 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d),
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x0d0c0a09)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1092,7 +1115,9 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+                       /* " -6\0" */
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x0a2d3600)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1132,8 +1157,10 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
-                       BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+                       /* " -6\0" */
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x0a2d3600)),
+                       BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
  
@@ -1175,8 +1202,10 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
-                       BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */
-                       BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+                       /* "0xfe" */
+                       BPF_MOV64_IMM(BPF_REG_0,
+                                     bpf_ntohl(0x30786665)),
+                       BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
  
@@ -1218,11 +1247,14 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) 9223372036854775807 */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
-                       BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3932323333373230ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-                       BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3336383534373735ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
-                       BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3830370000000000ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1266,11 +1298,14 @@ static struct sysctl_test tests[] = {
                         /* arg1 (buf) 9223372036854775808 */
                         BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
                         BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
-                       BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3932323333373230ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-                       BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3336383534373735ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
-                       BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL),
+                       BPF_LD_IMM64(BPF_REG_0,
+                                    bpf_be64_to_cpu(0x3830380000000000ULL)),
                         BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
  
                         BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1344,20 +1379,24 @@ static size_t probe_prog_length(const struct bpf_insn *fp)
  static int fixup_sysctl_value(const char *buf, size_t buf_len,
                               struct bpf_insn *prog, size_t insn_num)
  {
-       uint32_t value_num = 0;
+       union {
+               uint8_t raw[sizeof(uint64_t)];
+               uint64_t num;
+       } value = {};
         uint8_t c, i;
  
-       if (buf_len > sizeof(value_num)) {
+       if (buf_len > sizeof(value)) {
                 log_err("Value is too big (%zd) to use in fixup", buf_len);
                 return -1;
         }
-
-       for (i = 0; i < buf_len; ++i) {
-               c = buf[i];
-               value_num |= (c << i * 8);
+       if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) {
+               log_err("Can fixup only BPF_LD_IMM64 insns");
+               return -1;
         }
  
-       prog[insn_num].imm = value_num;
+       memcpy(value.raw, buf, buf_len);
+       prog[insn_num].imm = (uint32_t)value.num;
+       prog[insn_num + 1].imm = (uint32_t)(value.num >> 32);
  
         return 0;
  }
@@ -1499,6 +1538,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
                         goto err;
         }
  
+       errno = 0;
         if (access_sysctl(sysctl_path, test) == -1) {
                 if (test->result == OP_EPERM && errno == EPERM)
                         goto out;
@@ -1507,7 +1547,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
         }
  
         if (test->result != SUCCESS) {
-               log_err("Unexpected failure");
+               log_err("Unexpected success");
                 goto err;
         }
  
diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c

index 90c3862f74a856cc10685130a3313dedcdb1dd34..93916a69823e51d5e0df0ce79c647d9fd03f168e 100644 (file)
--- a/tools/testing/selftests/bpf/test_tcp_rtt.c
+++ b/tools/testing/selftests/bpf/test_tcp_rtt.c
@@ -6,6 +6,7 @@
  #include <sys/types.h>
  #include <sys/socket.h>
  #include <netinet/in.h>
+#include <netinet/tcp.h>
  #include <pthread.h>
  
  #include <linux/filter.h>
@@ -34,6 +35,30 @@ static void send_byte(int fd)
                 error(1, errno, "Failed to send single byte");
  }
  
+static int wait_for_ack(int fd, int retries)
+{
+       struct tcp_info info;
+       socklen_t optlen;
+       int i, err;
+
+       for (i = 0; i < retries; i++) {
+               optlen = sizeof(info);
+               err = getsockopt(fd, SOL_TCP, TCP_INFO, &info, &optlen);
+               if (err < 0) {
+                       log_err("Failed to lookup TCP stats");
+                       return err;
+               }
+
+               if (info.tcpi_unacked == 0)
+                       return 0;
+
+               usleep(10);
+       }
+
+       log_err("Did not receive ACK");
+       return -1;
+}
+
  static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
                      __u32 dsack_dups, __u32 delivered, __u32 delivered_ce,
                      __u32 icsk_retransmits)
@@ -149,6 +174,11 @@ static int run_test(int cgroup_fd, int server_fd)
                          /*icsk_retransmits=*/0);
  
         send_byte(client_fd);
+       if (wait_for_ack(client_fd, 100) < 0) {
+               err = -1;
+               goto close_client_fd;
+       }
+
  
         err += verify_sk(map_fd, client_fd, "first payload byte",
                          /*invoked=*/2,
@@ -157,6 +187,7 @@ static int run_test(int cgroup_fd, int server_fd)
                          /*delivered_ce=*/0,
                          /*icsk_retransmits=*/0);
  
+close_client_fd:
         close(client_fd);
  
  close_bpf_object:
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index 44e2d640b088ca10668d28e3a1c3b6acae8ec194..d27fd929abb9003ec4562e970ee321c17e730532 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -61,6 +61,7 @@
  #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
  static bool unpriv_disabled = false;
  static int skips;
+static bool verbose = false;
  
  struct bpf_test {
         const char *descr;
@@ -92,7 +93,8 @@ struct bpf_test {
         enum {
                 UNDEF,
                 ACCEPT,
-               REJECT
+               REJECT,
+               VERBOSE_ACCEPT,
         } result, result_unpriv;
         enum bpf_prog_type prog_type;
         uint8_t flags;
@@ -859,6 +861,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
         return 0;
  }
  
+static bool cmp_str_seq(const char *log, const char *exp)
+{
+       char needle[80];
+       const char *p, *q;
+       int len;
+
+       do {
+               p = strchr(exp, '\t');
+               if (!p)
+                       p = exp + strlen(exp);
+
+               len = p - exp;
+               if (len >= sizeof(needle) || !len) {
+                       printf("FAIL\nTestcase bug\n");
+                       return false;
+               }
+               strncpy(needle, exp, len);
+               needle[len] = 0;
+               q = strstr(log, needle);
+               if (!q) {
+                       printf("FAIL\nUnexpected verifier log in successful load!\n"
+                              "EXP: %s\nRES:\n", needle);
+                       return false;
+               }
+               log = q + len;
+               exp = p + 1;
+       } while (*p);
+       return true;
+}
+
  static void do_test_single(struct bpf_test *test, bool unpriv,
                            int *passes, int *errors)
  {
@@ -897,14 +929,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                 pflags |= BPF_F_STRICT_ALIGNMENT;
         if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
                 pflags |= BPF_F_ANY_ALIGNMENT;
+       if (test->flags & ~3)
+               pflags |= test->flags;
  
+       expected_ret = unpriv && test->result_unpriv != UNDEF ?
+                      test->result_unpriv : test->result;
+       expected_err = unpriv && test->errstr_unpriv ?
+                      test->errstr_unpriv : test->errstr;
         memset(&attr, 0, sizeof(attr));
         attr.prog_type = prog_type;
         attr.expected_attach_type = test->expected_attach_type;
         attr.insns = prog;
         attr.insns_cnt = prog_len;
         attr.license = "GPL";
-       attr.log_level = 4;
+       attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
         attr.prog_flags = pflags;
  
         fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
@@ -914,14 +952,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                 goto close_fds;
         }
  
-       expected_ret = unpriv && test->result_unpriv != UNDEF ?
-                      test->result_unpriv : test->result;
-       expected_err = unpriv && test->errstr_unpriv ?
-                      test->errstr_unpriv : test->errstr;
-
         alignment_prevented_execution = 0;
  
-       if (expected_ret == ACCEPT) {
+       if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
                 if (fd_prog < 0) {
                         printf("FAIL\nFailed to load prog '%s'!\n",
                                strerror(errno));
@@ -932,6 +965,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                     (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS))
                         alignment_prevented_execution = 1;
  #endif
+               if (expected_ret == VERBOSE_ACCEPT && !cmp_str_seq(bpf_vlog, expected_err)) {
+                       goto fail_log;
+               }
         } else {
                 if (fd_prog >= 0) {
                         printf("FAIL\nUnexpected success to load!\n");
@@ -957,6 +993,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                 }
         }
  
+       if (verbose)
+               printf(", verifier log:\n%s", bpf_vlog);
+
         run_errs = 0;
         run_successes = 0;
         if (!alignment_prevented_execution && fd_prog >= 0) {
@@ -1097,17 +1136,24 @@ int main(int argc, char **argv)
  {
         unsigned int from = 0, to = ARRAY_SIZE(tests);
         bool unpriv = !is_admin();
+       int arg = 1;
+
+       if (argc > 1 && strcmp(argv[1], "-v") == 0) {
+               arg++;
+               verbose = true;
+               argc--;
+       }
  
         if (argc == 3) {
-               unsigned int l = atoi(argv[argc - 2]);
-               unsigned int u = atoi(argv[argc - 1]);
+               unsigned int l = atoi(argv[arg]);
+               unsigned int u = atoi(argv[arg + 1]);
  
                 if (l < to && u < to) {
                         from = l;
                         to   = u + 1;
                 }
         } else if (argc == 2) {
-               unsigned int t = atoi(argv[argc - 1]);
+               unsigned int t = atoi(argv[arg]);
  
                 if (t < to) {
                         from = t;
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c

new file mode 100644 (file)

index 0000000..02151f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -0,0 +1,194 @@
+{
+       "precise: test 1",
+       .insns = {
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_LD_MAP_FD(BPF_REG_6, 0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0),
+       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+
+       BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+
+       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+
+       BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+       BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
+       BPF_EXIT_INSN(),
+
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_MOV64_IMM(BPF_REG_3, 0),
+       BPF_EMIT_CALL(BPF_FUNC_probe_read),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .fixup_map_array_48b = { 1 },
+       .result = VERBOSE_ACCEPT,
+       .errstr =
+       "26: (85) call bpf_probe_read#4\
+       last_idx 26 first_idx 20\
+       regs=4 stack=0 before 25\
+       regs=4 stack=0 before 24\
+       regs=4 stack=0 before 23\
+       regs=4 stack=0 before 22\
+       regs=4 stack=0 before 20\
+       parent didn't have regs=4 stack=0 marks\
+       last_idx 19 first_idx 10\
+       regs=4 stack=0 before 19\
+       regs=200 stack=0 before 18\
+       regs=300 stack=0 before 17\
+       regs=201 stack=0 before 15\
+       regs=201 stack=0 before 14\
+       regs=200 stack=0 before 13\
+       regs=200 stack=0 before 12\
+       regs=200 stack=0 before 11\
+       regs=200 stack=0 before 10\
+       parent already had regs=0 stack=0 marks",
+},
+{
+       "precise: test 2",
+       .insns = {
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_LD_MAP_FD(BPF_REG_6, 0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0),
+       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+
+       BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+
+       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+
+       BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+       BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
+       BPF_EXIT_INSN(),
+
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_MOV64_IMM(BPF_REG_3, 0),
+       BPF_EMIT_CALL(BPF_FUNC_probe_read),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+       .fixup_map_array_48b = { 1 },
+       .result = VERBOSE_ACCEPT,
+       .flags = BPF_F_TEST_STATE_FREQ,
+       .errstr =
+       "26: (85) call bpf_probe_read#4\
+       last_idx 26 first_idx 22\
+       regs=4 stack=0 before 25\
+       regs=4 stack=0 before 24\
+       regs=4 stack=0 before 23\
+       regs=4 stack=0 before 22\
+       parent didn't have regs=4 stack=0 marks\
+       last_idx 20 first_idx 20\
+       regs=4 stack=0 before 20\
+       parent didn't have regs=4 stack=0 marks\
+       last_idx 19 first_idx 17\
+       regs=4 stack=0 before 19\
+       regs=200 stack=0 before 18\
+       regs=300 stack=0 before 17\
+       parent already had regs=0 stack=0 marks",
+},
+{
+       "precise: cross frame pruning",
+       .insns = {
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+       BPF_MOV64_IMM(BPF_REG_8, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_8, 1),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+       BPF_MOV64_IMM(BPF_REG_9, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_9, 1),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1),
+       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_XDP,
+       .flags = BPF_F_TEST_STATE_FREQ,
+       .errstr = "!read_ok",
+       .result = REJECT,
+},
+{
+       "precise: ST insn causing spi > allocated_stack",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+       BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
+       BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+       BPF_MOV64_IMM(BPF_REG_0, -1),
+       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_XDP,
+       .flags = BPF_F_TEST_STATE_FREQ,
+       .errstr = "5: (2d) if r4 > r0 goto pc+0\
+       last_idx 5 first_idx 5\
+       parent didn't have regs=10 stack=0 marks\
+       last_idx 4 first_idx 2\
+       regs=10 stack=0 before 4\
+       regs=10 stack=0 before 3\
+       regs=0 stack=1 before 2\
+       last_idx 5 first_idx 5\
+       parent didn't have regs=1 stack=0 marks",
+       .result = VERBOSE_ACCEPT,
+       .retval = -1,
+},
+{
+       "precise: STX insn causing spi > allocated_stack",
+       .insns = {
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+       BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+       BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
+       BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+       BPF_MOV64_IMM(BPF_REG_0, -1),
+       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_XDP,
+       .flags = BPF_F_TEST_STATE_FREQ,
+       .errstr = "last_idx 6 first_idx 6\
+       parent didn't have regs=10 stack=0 marks\
+       last_idx 5 first_idx 3\
+       regs=10 stack=0 before 5\
+       regs=10 stack=0 before 4\
+       regs=0 stack=1 before 3\
+       last_idx 6 first_idx 6\
+       parent didn't have regs=1 stack=0 marks\
+       last_idx 5 first_idx 3\
+       regs=1 stack=0 before 5",
+       .result = VERBOSE_ACCEPT,
+       .retval = -1,
+},
author	David S. Miller <davem@davemloft.net>
	Fri, 6 Sep 2019 14:49:17 +0000 (16:49 +0200)
committer	David S. Miller <davem@davemloft.net>
	Fri, 6 Sep 2019 14:49:17 +0000 (16:49 +0200)
Documentation/networking/af_xdp.rst		patch \| blob \| history
arch/arm64/net/bpf_jit.h		patch \| blob \| history
arch/arm64/net/bpf_jit_comp.c		patch \| blob \| history
arch/s390/net/bpf_jit_comp.c		patch \| blob \| history
drivers/net/ethernet/intel/i40e/i40e_main.c		patch \| blob \| history
drivers/net/ethernet/intel/i40e/i40e_xsk.c		patch \| blob \| history
drivers/net/ethernet/intel/i40e/i40e_xsk.h		patch \| blob \| history
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c		patch \| blob \| history
drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h		patch \| blob \| history
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/params.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/params.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_main.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/bpf/cmsg.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/bpf/fw.h		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/bpf/main.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/bpf/main.h		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/bpf/offload.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net.h		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_common.c		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/linux/bpf_verifier.h		patch \| blob \| history
include/linux/netdevice.h		patch \| blob \| history
include/linux/tnum.h		patch \| blob \| history
include/net/bpf_sk_storage.h		patch \| blob \| history
include/net/xdp_sock.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
include/uapi/linux/if_xdp.h		patch \| blob \| history
kernel/bpf/btf.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/bpf/sysfs_btf.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
kernel/bpf/xskmap.c		patch \| blob \| history
kernel/trace/Kconfig		patch \| blob \| history
lib/test_bpf.c		patch \| blob \| history
net/core/bpf_sk_storage.c		patch \| blob \| history
net/core/dev.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/core/sock.c		patch \| blob \| history
net/xdp/xdp_umem.c		patch \| blob \| history
net/xdp/xsk.c		patch \| blob \| history
net/xdp/xsk.h		patch \| blob \| history
net/xdp/xsk_diag.c		patch \| blob \| history
net/xdp/xsk_queue.h		patch \| blob \| history
samples/bpf/syscall_nrs.c		patch \| blob \| history
samples/bpf/tracex5_kern.c		patch \| blob \| history
samples/bpf/xdpsock_user.c		patch \| blob \| history
scripts/link-vmlinux.sh		patch \| blob \| history
tools/bpf/.gitignore		patch \| blob \| history
tools/bpf/Makefile		patch \| blob \| history
tools/bpf/bpftool/.gitignore		patch \| blob \| history
tools/bpf/bpftool/Documentation/bpftool-btf.rst		patch \| blob \| history
tools/bpf/bpftool/Documentation/bpftool-map.rst		patch \| blob \| history
tools/bpf/bpftool/Documentation/bpftool-net.rst		patch \| blob \| history
tools/bpf/bpftool/Makefile		patch \| blob \| history
tools/bpf/bpftool/bash-completion/bpftool		patch \| blob \| history
tools/bpf/bpftool/btf.c		patch \| blob \| history
tools/bpf/bpftool/btf_dumper.c		patch \| blob \| history
tools/bpf/bpftool/cgroup.c		patch \| blob \| history
tools/bpf/bpftool/common.c		patch \| blob \| history
tools/bpf/bpftool/json_writer.c		patch \| blob \| history
tools/bpf/bpftool/json_writer.h		patch \| blob \| history
tools/bpf/bpftool/main.c		patch \| blob \| history
tools/bpf/bpftool/main.h		patch \| blob \| history
tools/bpf/bpftool/map.c		patch \| blob \| history
tools/bpf/bpftool/map_perf_ring.c		patch \| blob \| history
tools/bpf/bpftool/net.c		patch \| blob \| history
tools/bpf/bpftool/perf.c		patch \| blob \| history
tools/include/linux/compiler-gcc.h		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/include/uapi/linux/if_xdp.h		patch \| blob \| history
tools/lib/bpf/Makefile		patch \| blob \| history
tools/lib/bpf/bpf.c		patch \| blob \| history
tools/lib/bpf/bpf.h		patch \| blob \| history
tools/lib/bpf/libbpf.map		patch \| blob \| history
tools/lib/bpf/xsk.c		patch \| blob \| history
tools/lib/bpf/xsk.h		patch \| blob \| history
tools/testing/selftests/bpf/.gitignore		patch \| blob \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/bpf_endian.h		patch \| blob \| history
tools/testing/selftests/bpf/bpf_helpers.h		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/flow_dissector.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/global_data.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/l4lb_all.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/map_lock.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/pkt_access.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/pkt_md_access.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/queue_stack_map.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/reference_tracking.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/send_signal.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/spinlock.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_map.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/tcp_estats.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_noinline.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/sockopt_inherit.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_lwt_seg6local.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_seg6_loop.c		patch \| blob \| history
tools/testing/selftests/bpf/test_bpftool_build.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/bpf/test_offload.py		patch \| blob \| history
tools/testing/selftests/bpf/test_progs.c		patch \| blob \| history
tools/testing/selftests/bpf/test_progs.h		patch \| blob \| history
tools/testing/selftests/bpf/test_sockopt_inherit.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_sysctl.c		patch \| blob \| history
tools/testing/selftests/bpf/test_tcp_rtt.c		patch \| blob \| history
tools/testing/selftests/bpf/test_verifier.c		patch \| blob \| history
tools/testing/selftests/bpf/verifier/precise.c	[new file with mode: 0644]	patch \| blob