Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / params.c
index d840a59aec88a3d67a166ef9cc9db2bb3752baf8..9c94807097cb84463ef7a5b6dfc8b3af416f880a 100644 (file)
@@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
                                         struct mlx5e_xsk_param *xsk,
                                         bool mpwqe)
 {
+       u32 sz;
+
        /* XSK frames are mapped as individual pages, because frames may come in
         * an arbitrary order from random locations in the UMEM.
         */
        if (xsk)
                return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
 
-       /* XDP in mlx5e doesn't support multiple packets per page. */
-       if (params->xdp_prog)
-               return PAGE_SIZE;
+       sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
 
-       return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
+       /* XDP in mlx5e doesn't support multiple packets per page.
+        * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
+        */
+       return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz;
 }
 
 static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
@@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
        return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
 }
 
+bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+                                         struct mlx5e_params *params,
+                                         struct mlx5e_xsk_param *xsk)
+{
+       u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+       u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+       enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+       u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+       return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+                                            log_wqe_num_of_strides,
+                                            page_shift, umr_mode);
+}
+
 bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
                                  struct mlx5e_params *params,
                                  struct mlx5e_xsk_param *xsk)
@@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
        if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
                return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
 
+       /* XDP in mlx5e doesn't support multiple packets per page. */
+       if (params->xdp_prog)
+               return PAGE_SHIFT;
+
        return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
 }
 
@@ -553,7 +574,7 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
        u32 link_speed = 0;
        u32 pci_bw = 0;
 
-       mlx5e_port_max_linkspeed(mdev, &link_speed);
+       mlx5_port_max_linkspeed(mdev, &link_speed);
        pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
        mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
                           link_speed, pci_bw);
@@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
        if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
                return -EOPNOTSUPP;
 
-       if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
-               return -EINVAL;
-
        return 0;
 }
 
@@ -667,6 +685,48 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
        return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
 }
 
+static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
+                                            struct mlx5e_rq_frags_info *info)
+{
+       u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4;
+       u32 bulk_bound_rq_size_in_bytes;
+       u32 sum_frag_strides = 0;
+       u32 wqe_bulk_in_bytes;
+       u16 split_factor;
+       u32 wqe_bulk;
+       int i;
+
+       for (i = 0; i < info->num_frags; i++)
+               sum_frag_strides += info->arr[i].frag_stride;
+
+       /* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect
+        * amount of consumed pages per wqe in bytes.
+        */
+       if (sum_frag_strides > PAGE_SIZE)
+               sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE);
+
+       bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides;
+
+#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024)
+
+       /* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP
+        * keep bulk size smaller to avoid filling the page_pool cache on
+        * every bulk refill.
+        */
+       wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog),
+                                 bulk_bound_rq_size_in_bytes);
+       wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides);
+
+       /* Make sure that allocations don't start when the page is still used
+        * by older WQEs.
+        */
+       info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk);
+
+       split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog),
+                                   PP_ALLOC_CACHE_REFILL * PAGE_SIZE);
+       info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor);
+}
+
 #define DEFAULT_FRAG_SIZE (2048)
 
 static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
@@ -774,11 +834,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
        }
 
 out:
-       /* Bulking optimization to skip allocation until at least 8 WQEs can be
-        * allocated in a row. At the same time, never start allocation when
-        * the page is still used by older WQEs.
+       /* Bulking optimization to skip allocation until a large enough number
+        * of WQEs can be allocated in a row. Bulking also influences how well
+        * deferred page release works.
         */
-       info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+       mlx5e_rx_compute_wqe_bulk_params(params, info);
+
+       mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n",
+                     __func__, info->wqe_bulk, info->refill_unit);
 
        info->log_num_frags = order_base_2(info->num_frags);