1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
28 #define VALID_FEATURES_MASK \
29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
43 #define VALID_STATUS_MASK \
44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
49 #define MLX5V_UNTAGGED 0x1000
51 struct mlx5_vdpa_cq_buf {
52 struct mlx5_frag_buf_ctrl fbc;
53 struct mlx5_frag_buf frag_buf;
59 struct mlx5_core_cq mcq;
60 struct mlx5_vdpa_cq_buf buf;
65 struct mlx5_vdpa_umem {
66 struct mlx5_frag_buf_ctrl fbc;
67 struct mlx5_frag_buf frag_buf;
73 struct mlx5_core_qp mqp;
74 struct mlx5_frag_buf frag_buf;
80 struct mlx5_vq_restore_info {
92 struct mlx5_vdpa_virtqueue {
99 /* Resources for implementing the notification channel from the device
100 * to the driver. fwqp is the firmware end of an RC connection; the
101 * other end is vqqp used by the driver. cq is where completions are
104 struct mlx5_vdpa_cq cq;
105 struct mlx5_vdpa_qp fwqp;
106 struct mlx5_vdpa_qp vqqp;
108 /* umem resources are required for the virtqueue operation. They're use
109 * is internal and they must be provided by the driver.
111 struct mlx5_vdpa_umem umem1;
112 struct mlx5_vdpa_umem umem2;
113 struct mlx5_vdpa_umem umem3;
119 struct mlx5_vdpa_net *ndev;
125 /* keep last in the struct */
126 struct mlx5_vq_restore_info ri;
129 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
132 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
138 return idx <= mvdev->max_idx;
141 static void free_resources(struct mlx5_vdpa_net *ndev);
142 static void init_mvqs(struct mlx5_vdpa_net *ndev);
143 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
144 static void teardown_driver(struct mlx5_vdpa_net *ndev);
146 static bool mlx5_vdpa_debug;
148 #define MLX5_CVQ_MAX_ENT 16
150 #define MLX5_LOG_VIO_FLAG(_feature) \
152 if (features & BIT_ULL(_feature)) \
153 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
156 #define MLX5_LOG_VIO_STAT(_status) \
158 if (status & (_status)) \
159 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
162 /* TODO: cross-endian support */
163 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
165 return virtio_legacy_is_little_endian() ||
166 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
169 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
171 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
174 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
176 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
179 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
181 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
184 return mvdev->max_vqs;
187 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
189 return idx == ctrl_vq_idx(mvdev);
192 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
194 if (status & ~VALID_STATUS_MASK)
195 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
196 status & ~VALID_STATUS_MASK);
198 if (!mlx5_vdpa_debug)
201 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
202 if (set && !status) {
203 mlx5_vdpa_info(mvdev, "driver resets the device\n");
207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
212 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
215 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
217 if (features & ~VALID_FEATURES_MASK)
218 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
219 features & ~VALID_FEATURES_MASK);
221 if (!mlx5_vdpa_debug)
224 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
226 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
255 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
256 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
257 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
258 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
259 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
260 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
261 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
264 static int create_tis(struct mlx5_vdpa_net *ndev)
266 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
267 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
271 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
272 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
273 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
275 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
280 static void destroy_tis(struct mlx5_vdpa_net *ndev)
282 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
285 #define MLX5_VDPA_CQE_SIZE 64
286 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
288 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
290 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
291 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
292 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
295 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
296 ndev->mvdev.mdev->priv.numa_node);
300 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
302 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
308 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
310 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
312 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
313 ndev->mvdev.mdev->priv.numa_node);
316 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
318 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
321 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
323 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
326 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
328 struct mlx5_cqe64 *cqe64;
332 for (i = 0; i < buf->nent; i++) {
333 cqe = get_cqe(vcq, i);
335 cqe64->op_own = MLX5_CQE_INVALID << 4;
339 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
341 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
343 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
344 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
350 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
353 vqp->db.db[0] = cpu_to_be32(vqp->head);
356 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
357 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
359 struct mlx5_vdpa_qp *vqp;
363 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
364 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
365 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
367 /* Firmware QP is allocated by the driver for the firmware's
368 * use so we can skip part of the params as they will be chosen by firmware
370 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
371 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
372 MLX5_SET(qpc, qpc, no_sq, 1);
376 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
377 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
378 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
379 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
380 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
381 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
382 MLX5_SET(qpc, qpc, no_sq, 1);
383 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
384 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
385 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
386 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
387 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
390 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
392 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
393 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
394 ndev->mvdev.mdev->priv.numa_node);
397 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
399 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
402 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
403 struct mlx5_vdpa_qp *vqp)
405 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
406 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
407 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
414 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
418 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
421 inlen += vqp->frag_buf.npages * sizeof(__be64);
424 in = kzalloc(inlen, GFP_KERNEL);
430 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
431 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
432 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
433 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
434 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
435 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
437 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
438 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
439 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
444 vqp->mqp.uid = ndev->mvdev.res.uid;
445 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
448 rx_post(vqp, mvq->num_ent);
454 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
457 rq_buf_free(ndev, vqp);
462 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
464 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
466 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
467 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
468 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
469 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
470 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
472 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
473 rq_buf_free(ndev, vqp);
477 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
479 return get_sw_cqe(cq, cq->mcq.cons_index);
482 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
484 struct mlx5_cqe64 *cqe64;
486 cqe64 = next_cqe_sw(vcq);
490 vcq->mcq.cons_index++;
494 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
496 struct mlx5_vdpa_net *ndev = mvq->ndev;
497 struct vdpa_callback *event_cb;
499 event_cb = &ndev->event_cbs[mvq->index];
500 mlx5_cq_set_ci(&mvq->cq.mcq);
502 /* make sure CQ cosumer update is visible to the hardware before updating
503 * RX doorbell record.
506 rx_post(&mvq->vqqp, num);
507 if (event_cb->callback)
508 event_cb->callback(event_cb->private);
511 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
513 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
514 struct mlx5_vdpa_net *ndev = mvq->ndev;
515 void __iomem *uar_page = ndev->mvdev.res.uar->map;
518 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
520 if (num > mvq->num_ent / 2) {
521 /* If completions keep coming while we poll, we want to
522 * let the hardware know that we consumed them by
523 * updating the doorbell record. We also let vdpa core
524 * know about this so it passes it on the virtio driver
527 mlx5_vdpa_handle_completions(mvq, num);
533 mlx5_vdpa_handle_completions(mvq, num);
535 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
538 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
540 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
541 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
542 void __iomem *uar_page = ndev->mvdev.res.uar->map;
543 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
544 struct mlx5_vdpa_cq *vcq = &mvq->cq;
552 err = mlx5_db_alloc(mdev, &vcq->db);
556 vcq->mcq.set_ci_db = vcq->db.db;
557 vcq->mcq.arm_db = vcq->db.db + 1;
558 vcq->mcq.cqe_sz = 64;
560 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
564 cq_frag_buf_init(vcq, &vcq->buf);
566 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
567 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
568 in = kzalloc(inlen, GFP_KERNEL);
574 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
575 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
576 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
578 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
579 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
581 /* Use vector 0 by default. Consider adding code to choose least used
584 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
588 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
589 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
590 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
591 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
592 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
594 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
598 vcq->mcq.comp = mlx5_vdpa_cq_comp;
600 vcq->mcq.set_ci_db = vcq->db.db;
601 vcq->mcq.arm_db = vcq->db.db + 1;
602 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
609 cq_frag_buf_free(ndev, &vcq->buf);
611 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
615 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
617 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
618 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
619 struct mlx5_vdpa_cq *vcq = &mvq->cq;
621 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
622 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
625 cq_frag_buf_free(ndev, &vcq->buf);
626 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
629 static int read_umem_params(struct mlx5_vdpa_net *ndev)
631 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
632 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
633 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
639 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
640 out = kzalloc(out_size, GFP_KERNEL);
644 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
645 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
646 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
648 mlx5_vdpa_warn(&ndev->mvdev,
649 "Failed reading vdpa umem capabilities with err %d\n", err);
653 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
655 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
656 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
658 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
659 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
661 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
662 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
670 struct mlx5_vdpa_umem **umemp)
677 p_a = ndev->umem_1_buffer_param_a;
678 p_b = ndev->umem_1_buffer_param_b;
679 *umemp = &mvq->umem1;
682 p_a = ndev->umem_2_buffer_param_a;
683 p_b = ndev->umem_2_buffer_param_b;
684 *umemp = &mvq->umem2;
687 p_a = ndev->umem_3_buffer_param_a;
688 p_b = ndev->umem_3_buffer_param_b;
689 *umemp = &mvq->umem3;
693 (*umemp)->size = p_a * mvq->num_ent + p_b;
696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
698 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
704 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
709 struct mlx5_vdpa_umem *umem;
711 set_umem_size(ndev, mvq, num, &umem);
712 err = umem_frag_buf_alloc(ndev, umem, umem->size);
716 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
718 in = kzalloc(inlen, GFP_KERNEL);
724 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
725 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
726 um = MLX5_ADDR_OF(create_umem_in, in, umem);
727 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
728 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
730 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
731 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
733 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
735 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
740 umem->id = MLX5_GET(create_umem_out, out, umem_id);
747 umem_frag_buf_free(ndev, umem);
751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
753 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
754 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
755 struct mlx5_vdpa_umem *umem;
769 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
770 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
771 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
774 umem_frag_buf_free(ndev, umem);
777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
782 for (num = 1; num <= 3; num++) {
783 err = create_umem(ndev, mvq, num);
790 for (num--; num > 0; num--)
791 umem_destroy(ndev, mvq, num);
796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
800 for (num = 3; num > 0; num--)
801 umem_destroy(ndev, mvq, num);
804 static int get_queue_type(struct mlx5_vdpa_net *ndev)
808 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
810 /* prefer split queue */
811 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
812 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
814 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
816 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
819 static bool vq_is_tx(u16 idx)
825 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
826 MLX5_VIRTIO_NET_F_HOST_ECN = 4,
827 MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
828 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
829 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
830 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
831 MLX5_VIRTIO_NET_F_CSUM = 10,
832 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
833 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
836 static u16 get_features(u64 features)
838 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
839 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
840 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
841 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
842 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
843 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
844 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
845 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
848 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
850 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
851 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
854 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
856 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
857 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
858 pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
861 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
863 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
864 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
865 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
866 struct mlx5_vdpa_mr *vq_mr;
867 struct mlx5_vdpa_mr *vq_desc_mr;
875 err = umems_create(ndev, mvq);
879 in = kzalloc(inlen, GFP_KERNEL);
885 mlx_features = get_features(ndev->mvdev.actual_features);
886 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
888 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
889 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
890 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
892 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
893 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
894 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
895 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
897 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
899 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
900 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
902 if (vq_is_tx(mvq->index))
903 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
906 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
907 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
909 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
910 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
913 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
914 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
915 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
916 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
917 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
918 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
919 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
920 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
922 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
924 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
925 if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
926 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
928 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
929 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
930 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
931 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
932 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
933 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
934 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
935 if (counters_supported(&ndev->mvdev))
936 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
938 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
942 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
944 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
951 umems_destroy(ndev, mvq);
955 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
957 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
958 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
960 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
961 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
962 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
963 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
964 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
965 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
966 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
967 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
970 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
971 umems_destroy(ndev, mvq);
974 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
976 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
979 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
981 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
984 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
985 int *outlen, u32 qpn, u32 rqpn)
991 case MLX5_CMD_OP_2RST_QP:
992 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
993 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
994 *in = kzalloc(*inlen, GFP_KERNEL);
995 *out = kzalloc(*outlen, GFP_KERNEL);
999 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1000 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1001 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1003 case MLX5_CMD_OP_RST2INIT_QP:
1004 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1005 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1006 *in = kzalloc(*inlen, GFP_KERNEL);
1007 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1011 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1012 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1013 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1014 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1015 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1016 MLX5_SET(qpc, qpc, rwe, 1);
1017 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1018 MLX5_SET(ads, pp, vhca_port_num, 1);
1020 case MLX5_CMD_OP_INIT2RTR_QP:
1021 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1022 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1023 *in = kzalloc(*inlen, GFP_KERNEL);
1024 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1028 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1029 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1030 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1031 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1032 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1033 MLX5_SET(qpc, qpc, log_msg_max, 30);
1034 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1035 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1036 MLX5_SET(ads, pp, fl, 1);
1038 case MLX5_CMD_OP_RTR2RTS_QP:
1039 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1040 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1041 *in = kzalloc(*inlen, GFP_KERNEL);
1042 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1046 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1047 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1048 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1049 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1050 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1051 MLX5_SET(ads, pp, ack_timeout, 14);
1052 MLX5_SET(qpc, qpc, retry_count, 7);
1053 MLX5_SET(qpc, qpc, rnr_retry, 7);
1056 goto outerr_nullify;
1069 static void free_inout(void *in, void *out)
1075 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1076 * firmware. The fw argument indicates whether the subjected QP is the one used
1079 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1087 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1091 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1092 free_inout(in, out);
1096 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1100 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1104 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1108 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1112 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1116 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1120 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1124 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1127 struct mlx5_virtq_attr {
1129 u16 available_index;
1133 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1134 struct mlx5_virtq_attr *attr)
1136 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1137 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1143 out = kzalloc(outlen, GFP_KERNEL);
1147 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1149 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1150 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1151 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1152 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1153 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1157 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1158 memset(attr, 0, sizeof(*attr));
1159 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1160 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1161 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1170 static bool is_valid_state_change(int oldstate, int newstate)
1173 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1174 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1175 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1176 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1177 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1178 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1184 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1186 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1187 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1193 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1196 if (!is_valid_state_change(mvq->fw_state, state))
1199 in = kzalloc(inlen, GFP_KERNEL);
1203 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1205 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1206 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1207 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1208 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1210 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1211 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1212 MLX5_VIRTQ_MODIFY_MASK_STATE);
1213 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1214 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1217 mvq->fw_state = state;
1222 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1224 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1225 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1229 if (!counters_supported(&ndev->mvdev))
1232 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1234 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1235 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1236 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1238 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1242 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1247 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1249 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1250 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1252 if (!counters_supported(&ndev->mvdev))
1255 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1256 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1257 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1258 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1259 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1260 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1263 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1265 struct vdpa_callback *cb = priv;
1268 return cb->callback(cb->private);
1273 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1274 struct mlx5_vdpa_virtqueue *mvq)
1276 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1277 struct mlx5_vdpa_irq_pool_entry *ent;
1281 for (i = 0; i < irqp->num_ent; i++) {
1282 ent = &irqp->entries[i];
1284 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1285 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1286 ent->dev_id = &ndev->event_cbs[mvq->index];
1287 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1288 ent->name, ent->dev_id);
1293 mvq->map = ent->map;
1299 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1300 struct mlx5_vdpa_virtqueue *mvq)
1302 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1305 for (i = 0; i < irqp->num_ent; i++)
1306 if (mvq->map.virq == irqp->entries[i].map.virq) {
1307 free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1308 irqp->entries[i].used = false;
1313 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1315 u16 idx = mvq->index;
1321 if (mvq->initialized)
1324 err = cq_create(ndev, idx, mvq->num_ent);
1328 err = qp_create(ndev, mvq, &mvq->fwqp);
1332 err = qp_create(ndev, mvq, &mvq->vqqp);
1336 err = connect_qps(ndev, mvq);
1340 err = counter_set_alloc(ndev, mvq);
1344 alloc_vector(ndev, mvq);
1345 err = create_virtqueue(ndev, mvq);
1350 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1352 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1358 mvq->initialized = true;
1362 destroy_virtqueue(ndev, mvq);
1364 dealloc_vector(ndev, mvq);
1365 counter_set_dealloc(ndev, mvq);
1367 qp_destroy(ndev, &mvq->vqqp);
1369 qp_destroy(ndev, &mvq->fwqp);
1371 cq_destroy(ndev, idx);
1375 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1377 struct mlx5_virtq_attr attr;
1379 if (!mvq->initialized)
1382 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1385 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1386 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1388 if (query_virtqueue(ndev, mvq, &attr)) {
1389 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1392 mvq->avail_idx = attr.available_index;
1393 mvq->used_idx = attr.used_index;
1396 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1400 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1401 suspend_vq(ndev, &ndev->vqs[i]);
1404 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1406 if (!mvq->initialized)
1409 suspend_vq(ndev, mvq);
1410 destroy_virtqueue(ndev, mvq);
1411 dealloc_vector(ndev, mvq);
1412 counter_set_dealloc(ndev, mvq);
1413 qp_destroy(ndev, &mvq->vqqp);
1414 qp_destroy(ndev, &mvq->fwqp);
1415 cq_destroy(ndev, mvq->index);
1416 mvq->initialized = false;
1419 static int create_rqt(struct mlx5_vdpa_net *ndev)
1421 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1422 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1430 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1431 in = kzalloc(inlen, GFP_KERNEL);
1435 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1436 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1438 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1439 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1440 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1441 for (i = 0, j = 0; i < act_sz; i++, j += 2)
1442 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1444 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1445 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1453 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1455 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1457 int act_sz = roundup_pow_of_two(num / 2);
1465 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1466 in = kzalloc(inlen, GFP_KERNEL);
1470 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1471 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1472 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1473 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1475 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1476 for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1477 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1479 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1480 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1488 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1490 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1493 static int create_tir(struct mlx5_vdpa_net *ndev)
1495 #define HASH_IP_L4PORTS \
1496 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1497 MLX5_HASH_FIELD_SEL_L4_DPORT)
1498 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1499 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1500 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1501 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1502 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1509 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1513 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1514 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1515 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1517 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1518 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1519 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1520 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1522 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1523 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1524 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1525 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1527 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1528 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1530 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1535 mlx5_vdpa_add_tirn(ndev);
1539 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1541 mlx5_vdpa_remove_tirn(ndev);
1542 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1545 #define MAX_STEERING_ENT 0x8000
1546 #define MAX_STEERING_GROUPS 2
1548 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1554 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1555 struct macvlan_node *node,
1556 struct mlx5_flow_act *flow_act,
1557 struct mlx5_flow_destination *dests)
1559 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1562 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1563 if (IS_ERR(node->ucast_counter.counter))
1564 return PTR_ERR(node->ucast_counter.counter);
1566 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1567 if (IS_ERR(node->mcast_counter.counter)) {
1568 err = PTR_ERR(node->mcast_counter.counter);
1569 goto err_mcast_counter;
1572 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1573 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1577 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1584 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1585 struct macvlan_node *node)
1587 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1588 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1589 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1593 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1594 struct macvlan_node *node)
1596 struct mlx5_flow_destination dests[NUM_DESTS] = {};
1597 struct mlx5_flow_act flow_act = {};
1598 struct mlx5_flow_spec *spec;
1606 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1610 vid = key2vid(node->macvlan);
1611 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1612 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1613 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1614 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1615 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1616 eth_broadcast_addr(dmac_c);
1617 ether_addr_copy(dmac_v, mac);
1618 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1619 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1620 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1623 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1624 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1626 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1627 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1628 dests[0].tir_num = ndev->res.tirn;
1629 err = add_steering_counters(ndev, node, &flow_act, dests);
1633 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1634 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1636 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1637 if (IS_ERR(node->ucast_rule)) {
1638 err = PTR_ERR(node->ucast_rule);
1642 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1643 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1646 memset(dmac_c, 0, ETH_ALEN);
1647 memset(dmac_v, 0, ETH_ALEN);
1650 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1651 if (IS_ERR(node->mcast_rule)) {
1652 err = PTR_ERR(node->mcast_rule);
1656 mlx5_vdpa_add_rx_counters(ndev, node);
1660 mlx5_del_flow_rules(node->ucast_rule);
1662 remove_steering_counters(ndev, node);
1668 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1669 struct macvlan_node *node)
1671 mlx5_vdpa_remove_rx_counters(ndev, node);
1672 mlx5_del_flow_rules(node->ucast_rule);
1673 mlx5_del_flow_rules(node->mcast_rule);
1676 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1681 vlan = MLX5V_UNTAGGED;
1683 val = (u64)vlan << 48 |
1694 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1696 struct macvlan_node *pos;
1699 idx = hash_64(value, 8); // tbd 8
1700 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1701 if (pos->macvlan == value)
1707 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1709 struct macvlan_node *ptr;
1714 val = search_val(mac, vid, tagged);
1715 if (mac_vlan_lookup(ndev, val))
1718 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1722 ptr->tagged = tagged;
1725 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1729 idx = hash_64(val, 8);
1730 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1738 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1740 struct macvlan_node *ptr;
1742 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1746 hlist_del(&ptr->hlist);
1747 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1748 remove_steering_counters(ndev, ptr);
1752 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1754 struct macvlan_node *pos;
1755 struct hlist_node *n;
1758 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1759 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1760 hlist_del(&pos->hlist);
1761 mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1762 remove_steering_counters(ndev, pos);
1768 static int setup_steering(struct mlx5_vdpa_net *ndev)
1770 struct mlx5_flow_table_attr ft_attr = {};
1771 struct mlx5_flow_namespace *ns;
1774 ft_attr.max_fte = MAX_STEERING_ENT;
1775 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1777 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1779 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1783 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1784 if (IS_ERR(ndev->rxft)) {
1785 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1786 return PTR_ERR(ndev->rxft);
1788 mlx5_vdpa_add_rx_flow_table(ndev);
1790 err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1797 mlx5_vdpa_remove_rx_flow_table(ndev);
1798 mlx5_destroy_flow_table(ndev->rxft);
1802 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1804 clear_mac_vlan_table(ndev);
1805 mlx5_vdpa_remove_rx_flow_table(ndev);
1806 mlx5_destroy_flow_table(ndev->rxft);
1809 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1811 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1812 struct mlx5_control_vq *cvq = &mvdev->cvq;
1813 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1814 struct mlx5_core_dev *pfmdev;
1816 u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1818 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1820 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1821 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1822 if (read != ETH_ALEN)
1825 if (!memcmp(ndev->config.mac, mac, 6)) {
1826 status = VIRTIO_NET_OK;
1830 if (is_zero_ether_addr(mac))
1833 if (!is_zero_ether_addr(ndev->config.mac)) {
1834 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1835 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1841 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1842 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1847 /* backup the original mac address so that if failed to add the forward rules
1848 * we could restore it
1850 memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1852 memcpy(ndev->config.mac, mac, ETH_ALEN);
1854 /* Need recreate the flow table entry, so that the packet could forward back
1856 mac_vlan_del(ndev, mac_back, 0, false);
1858 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1859 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1861 /* Although it hardly run here, we still need double check */
1862 if (is_zero_ether_addr(mac_back)) {
1863 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1867 /* Try to restore original mac address to MFPS table, and try to restore
1868 * the forward rule entry.
1870 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1871 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1875 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1876 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1880 memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1882 if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1883 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1888 status = VIRTIO_NET_OK;
1898 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1900 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1901 int cur_qps = ndev->cur_num_vqs / 2;
1905 if (cur_qps > newqps) {
1906 err = modify_rqt(ndev, 2 * newqps);
1910 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1911 teardown_vq(ndev, &ndev->vqs[i]);
1913 ndev->cur_num_vqs = 2 * newqps;
1915 ndev->cur_num_vqs = 2 * newqps;
1916 for (i = cur_qps * 2; i < 2 * newqps; i++) {
1917 err = setup_vq(ndev, &ndev->vqs[i]);
1921 err = modify_rqt(ndev, 2 * newqps);
1928 for (--i; i >= 2 * cur_qps; --i)
1929 teardown_vq(ndev, &ndev->vqs[i]);
1931 ndev->cur_num_vqs = 2 * cur_qps;
1936 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1938 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1939 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1940 struct mlx5_control_vq *cvq = &mvdev->cvq;
1941 struct virtio_net_ctrl_mq mq;
1946 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1947 /* This mq feature check aligns with pre-existing userspace
1950 * Without it, an untrusted driver could fake a multiqueue config
1951 * request down to a non-mq device that may cause kernel to
1952 * panic due to uninitialized resources for extra vqs. Even with
1953 * a well behaving guest driver, it is not expected to allow
1954 * changing the number of vqs on a non-mq device.
1956 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1959 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1960 if (read != sizeof(mq))
1963 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1964 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1965 newqps > ndev->rqt_size)
1968 if (ndev->cur_num_vqs == 2 * newqps) {
1969 status = VIRTIO_NET_OK;
1973 if (!change_num_qps(mvdev, newqps))
1974 status = VIRTIO_NET_OK;
1984 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1986 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1987 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1988 struct mlx5_control_vq *cvq = &mvdev->cvq;
1993 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1997 case VIRTIO_NET_CTRL_VLAN_ADD:
1998 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1999 if (read != sizeof(vlan))
2002 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2003 if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2006 status = VIRTIO_NET_OK;
2008 case VIRTIO_NET_CTRL_VLAN_DEL:
2009 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2010 if (read != sizeof(vlan))
2013 id = mlx5vdpa16_to_cpu(mvdev, vlan);
2014 mac_vlan_del(ndev, ndev->config.mac, id, true);
2015 status = VIRTIO_NET_OK;
2024 static void mlx5_cvq_kick_handler(struct work_struct *work)
2026 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2027 struct virtio_net_ctrl_hdr ctrl;
2028 struct mlx5_vdpa_wq_ent *wqent;
2029 struct mlx5_vdpa_dev *mvdev;
2030 struct mlx5_control_vq *cvq;
2031 struct mlx5_vdpa_net *ndev;
2035 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2036 mvdev = wqent->mvdev;
2037 ndev = to_mlx5_vdpa_ndev(mvdev);
2040 down_write(&ndev->reslock);
2042 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2045 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2052 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2057 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2058 if (read != sizeof(ctrl))
2061 cvq->received_desc++;
2062 switch (ctrl.class) {
2063 case VIRTIO_NET_CTRL_MAC:
2064 status = handle_ctrl_mac(mvdev, ctrl.cmd);
2066 case VIRTIO_NET_CTRL_MQ:
2067 status = handle_ctrl_mq(mvdev, ctrl.cmd);
2069 case VIRTIO_NET_CTRL_VLAN:
2070 status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2076 /* Make sure data is written before advancing index */
2079 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2080 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2081 vringh_kiov_cleanup(&cvq->riov);
2082 vringh_kiov_cleanup(&cvq->wiov);
2084 if (vringh_need_notify_iotlb(&cvq->vring))
2085 vringh_notify(&cvq->vring);
2087 cvq->completed_desc++;
2088 queue_work(mvdev->wq, &wqent->work);
2093 up_write(&ndev->reslock);
2096 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2098 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2099 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2100 struct mlx5_vdpa_virtqueue *mvq;
2102 if (!is_index_valid(mvdev, idx))
2105 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2106 if (!mvdev->wq || !mvdev->cvq.ready)
2109 queue_work(mvdev->wq, &ndev->cvq_ent.work);
2113 mvq = &ndev->vqs[idx];
2114 if (unlikely(!mvq->ready))
2117 iowrite16(idx, ndev->mvdev.res.kick_addr);
2120 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2121 u64 driver_area, u64 device_area)
2123 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2124 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2125 struct mlx5_vdpa_virtqueue *mvq;
2127 if (!is_index_valid(mvdev, idx))
2130 if (is_ctrl_vq_idx(mvdev, idx)) {
2131 mvdev->cvq.desc_addr = desc_area;
2132 mvdev->cvq.device_addr = device_area;
2133 mvdev->cvq.driver_addr = driver_area;
2137 mvq = &ndev->vqs[idx];
2138 mvq->desc_addr = desc_area;
2139 mvq->device_addr = device_area;
2140 mvq->driver_addr = driver_area;
2144 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2146 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2147 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2148 struct mlx5_vdpa_virtqueue *mvq;
2150 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2153 mvq = &ndev->vqs[idx];
2157 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2159 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2160 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2162 ndev->event_cbs[idx] = *cb;
2163 if (is_ctrl_vq_idx(mvdev, idx))
2164 mvdev->cvq.event_cb = *cb;
2167 static void mlx5_cvq_notify(struct vringh *vring)
2169 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2171 if (!cvq->event_cb.callback)
2174 cvq->event_cb.callback(cvq->event_cb.private);
2177 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2179 struct mlx5_control_vq *cvq = &mvdev->cvq;
2185 cvq->vring.notify = mlx5_cvq_notify;
2188 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2190 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2191 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2192 struct mlx5_vdpa_virtqueue *mvq;
2195 if (!mvdev->actual_features)
2198 if (!is_index_valid(mvdev, idx))
2201 if (is_ctrl_vq_idx(mvdev, idx)) {
2202 set_cvq_ready(mvdev, ready);
2206 mvq = &ndev->vqs[idx];
2208 suspend_vq(ndev, mvq);
2210 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2212 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2221 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2223 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2224 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2226 if (!is_index_valid(mvdev, idx))
2229 if (is_ctrl_vq_idx(mvdev, idx))
2230 return mvdev->cvq.ready;
2232 return ndev->vqs[idx].ready;
2235 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2236 const struct vdpa_vq_state *state)
2238 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2239 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2240 struct mlx5_vdpa_virtqueue *mvq;
2242 if (!is_index_valid(mvdev, idx))
2245 if (is_ctrl_vq_idx(mvdev, idx)) {
2246 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2250 mvq = &ndev->vqs[idx];
2251 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2252 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2256 mvq->used_idx = state->split.avail_index;
2257 mvq->avail_idx = state->split.avail_index;
2261 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2263 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2264 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2265 struct mlx5_vdpa_virtqueue *mvq;
2266 struct mlx5_virtq_attr attr;
2269 if (!is_index_valid(mvdev, idx))
2272 if (is_ctrl_vq_idx(mvdev, idx)) {
2273 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2277 mvq = &ndev->vqs[idx];
2278 /* If the virtq object was destroyed, use the value saved at
2279 * the last minute of suspend_vq. This caters for userspace
2280 * that cares about emulating the index after vq is stopped.
2282 if (!mvq->initialized) {
2283 /* Firmware returns a wrong value for the available index.
2284 * Since both values should be identical, we take the value of
2285 * used_idx which is reported correctly.
2287 state->split.avail_index = mvq->used_idx;
2291 err = query_virtqueue(ndev, mvq, &attr);
2293 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2296 state->split.avail_index = attr.used_index;
2300 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2305 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2307 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2309 if (is_ctrl_vq_idx(mvdev, idx))
2310 return MLX5_VDPA_CVQ_GROUP;
2312 return MLX5_VDPA_DATAVQ_GROUP;
2315 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2317 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2319 if (is_ctrl_vq_idx(mvdev, idx))
2320 return MLX5_VDPA_CVQ_GROUP;
2322 return MLX5_VDPA_DATAVQ_DESC_GROUP;
2325 static u64 mlx_to_vritio_features(u16 dev_features)
2329 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2330 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2331 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2332 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2333 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2334 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2335 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2336 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2337 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2338 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2339 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2340 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2341 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2342 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2343 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2344 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2345 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2346 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2351 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2353 u64 mlx_vdpa_features = 0;
2356 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2357 mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2358 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2359 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2360 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2361 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2362 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2363 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2364 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2365 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2366 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2367 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2369 return mlx_vdpa_features;
2372 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2374 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2375 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2377 print_features(mvdev, ndev->mvdev.mlx_features, false);
2378 return ndev->mvdev.mlx_features;
2381 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2383 /* Minimum features to expect */
2384 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2387 /* Double check features combination sent down by the driver.
2388 * Fail invalid features due to absence of the depended feature.
2390 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2391 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2392 * By failing the invalid features sent down by untrusted drivers,
2393 * we're assured the assumption made upon is_index_valid() and
2394 * is_ctrl_vq_idx() will not be compromised.
2396 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2397 BIT_ULL(VIRTIO_NET_F_MQ))
2403 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2405 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2409 for (i = 0; i < mvdev->max_vqs; i++) {
2410 err = setup_vq(ndev, &ndev->vqs[i]);
2418 for (--i; i >= 0; i--)
2419 teardown_vq(ndev, &ndev->vqs[i]);
2424 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2426 struct mlx5_vdpa_virtqueue *mvq;
2429 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2430 mvq = &ndev->vqs[i];
2431 if (!mvq->initialized)
2434 teardown_vq(ndev, mvq);
2438 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2440 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2441 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2442 /* MQ supported. CVQ index is right above the last data virtqueue's */
2443 mvdev->max_idx = mvdev->max_vqs;
2445 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2451 /* Two data virtqueues only: one for rx and one for tx */
2456 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2458 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2459 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2462 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2463 MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2464 MLX5_SET(query_vport_state_in, in, vport_number, vport);
2466 MLX5_SET(query_vport_state_in, in, other_vport, 1);
2468 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2472 return MLX5_GET(query_vport_state_out, out, state);
2475 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2477 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2484 static void update_carrier(struct work_struct *work)
2486 struct mlx5_vdpa_wq_ent *wqent;
2487 struct mlx5_vdpa_dev *mvdev;
2488 struct mlx5_vdpa_net *ndev;
2490 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2491 mvdev = wqent->mvdev;
2492 ndev = to_mlx5_vdpa_ndev(mvdev);
2493 if (get_link_state(mvdev))
2494 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2496 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2498 if (ndev->config_cb.callback)
2499 ndev->config_cb.callback(ndev->config_cb.private);
2504 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2506 struct mlx5_vdpa_wq_ent *wqent;
2508 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2512 wqent->mvdev = &ndev->mvdev;
2513 INIT_WORK(&wqent->work, update_carrier);
2514 queue_work(ndev->mvdev.wq, &wqent->work);
2518 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2520 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2521 struct mlx5_eqe *eqe = param;
2522 int ret = NOTIFY_DONE;
2524 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2525 switch (eqe->sub_type) {
2526 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2527 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2528 if (queue_link_work(ndev))
2541 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2543 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2546 ndev->nb.notifier_call = event_handler;
2547 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2548 ndev->nb_registered = true;
2549 queue_link_work(ndev);
2552 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2554 if (!ndev->nb_registered)
2557 ndev->nb_registered = false;
2558 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2560 flush_workqueue(ndev->mvdev.wq);
2563 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2565 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2568 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2570 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2571 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2574 print_features(mvdev, features, true);
2576 err = verify_driver_features(mvdev, features);
2580 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2581 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2582 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2586 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2587 * 5.1.6.5.5 "Device operation in multiqueue mode":
2589 * Multiqueue is disabled by default.
2590 * The driver enables multiqueue by sending a command using class
2591 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2592 * operation, as follows: ...
2594 ndev->cur_num_vqs = 2;
2596 update_cvq_info(mvdev);
2600 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2602 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2603 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2605 ndev->config_cb = *cb;
2608 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2609 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2611 return MLX5_VDPA_MAX_VQ_ENTRIES;
2614 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2616 return VIRTIO_ID_NET;
2619 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2621 return PCI_VENDOR_ID_MELLANOX;
2624 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2626 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2627 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2629 print_status(mvdev, ndev->mvdev.status, false);
2630 return ndev->mvdev.status;
2633 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2635 struct mlx5_vq_restore_info *ri = &mvq->ri;
2636 struct mlx5_virtq_attr attr = {};
2639 if (mvq->initialized) {
2640 err = query_virtqueue(ndev, mvq, &attr);
2645 ri->avail_index = attr.available_index;
2646 ri->used_index = attr.used_index;
2647 ri->ready = mvq->ready;
2648 ri->num_ent = mvq->num_ent;
2649 ri->desc_addr = mvq->desc_addr;
2650 ri->device_addr = mvq->device_addr;
2651 ri->driver_addr = mvq->driver_addr;
2657 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2661 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2662 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2663 save_channel_info(ndev, &ndev->vqs[i]);
2668 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2672 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2673 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2676 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2678 struct mlx5_vdpa_virtqueue *mvq;
2679 struct mlx5_vq_restore_info *ri;
2682 mlx5_clear_vqs(ndev);
2684 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2685 mvq = &ndev->vqs[i];
2690 mvq->avail_idx = ri->avail_index;
2691 mvq->used_idx = ri->used_index;
2692 mvq->ready = ri->ready;
2693 mvq->num_ent = ri->num_ent;
2694 mvq->desc_addr = ri->desc_addr;
2695 mvq->device_addr = ri->device_addr;
2696 mvq->driver_addr = ri->driver_addr;
2701 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2702 struct mlx5_vdpa_mr *new_mr,
2705 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2709 err = save_channels_info(ndev);
2713 teardown_driver(ndev);
2715 mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2717 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2720 restore_channels_info(ndev);
2721 err = setup_driver(mvdev);
2728 /* reslock must be held for this function */
2729 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2731 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2734 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2737 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2741 mlx5_vdpa_add_debugfs(ndev);
2743 err = read_umem_params(ndev);
2747 err = setup_virtqueues(mvdev);
2749 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2753 err = create_rqt(ndev);
2755 mlx5_vdpa_warn(mvdev, "create_rqt\n");
2759 err = create_tir(ndev);
2761 mlx5_vdpa_warn(mvdev, "create_tir\n");
2765 err = setup_steering(ndev);
2767 mlx5_vdpa_warn(mvdev, "setup_steering\n");
2779 teardown_virtqueues(ndev);
2781 mlx5_vdpa_remove_debugfs(ndev);
2786 /* reslock must be held for this function */
2787 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2790 WARN_ON(!rwsem_is_locked(&ndev->reslock));
2795 mlx5_vdpa_remove_debugfs(ndev);
2796 teardown_steering(ndev);
2799 teardown_virtqueues(ndev);
2800 ndev->setup = false;
2803 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2807 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2808 ndev->vqs[i].ready = false;
2810 ndev->mvdev.cvq.ready = false;
2813 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2815 struct mlx5_control_vq *cvq = &mvdev->cvq;
2818 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2819 u16 idx = cvq->vring.last_avail_idx;
2821 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2822 MLX5_CVQ_MAX_ENT, false,
2823 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2824 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2825 (struct vring_used *)(uintptr_t)cvq->device_addr);
2828 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2833 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2835 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2836 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2839 print_status(mvdev, status, true);
2841 down_write(&ndev->reslock);
2843 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2844 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2845 err = setup_cvq_vring(mvdev);
2847 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2850 register_link_notifier(ndev);
2851 err = setup_driver(mvdev);
2853 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2857 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2862 ndev->mvdev.status = status;
2863 up_write(&ndev->reslock);
2867 unregister_link_notifier(ndev);
2869 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
2870 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2872 up_write(&ndev->reslock);
2875 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2879 /* default mapping all groups are mapped to asid 0 */
2880 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2881 mvdev->group2asid[i] = 0;
2884 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
2886 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2887 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2889 print_status(mvdev, 0, true);
2890 mlx5_vdpa_info(mvdev, "performing device reset\n");
2892 down_write(&ndev->reslock);
2893 unregister_link_notifier(ndev);
2894 teardown_driver(ndev);
2895 clear_vqs_ready(ndev);
2896 if (flags & VDPA_RESET_F_CLEAN_MAP)
2897 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
2898 ndev->mvdev.status = 0;
2899 ndev->mvdev.suspended = false;
2900 ndev->cur_num_vqs = 0;
2901 ndev->mvdev.cvq.received_desc = 0;
2902 ndev->mvdev.cvq.completed_desc = 0;
2903 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2904 ndev->mvdev.actual_features = 0;
2905 init_group_to_asid_map(mvdev);
2906 ++mvdev->generation;
2908 if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
2909 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2910 if (mlx5_vdpa_create_dma_mr(mvdev))
2911 mlx5_vdpa_warn(mvdev, "create MR failed\n");
2913 up_write(&ndev->reslock);
2918 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2920 return mlx5_vdpa_compat_reset(vdev, 0);
2923 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2925 return sizeof(struct virtio_net_config);
2928 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2931 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2932 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2934 if (offset + len <= sizeof(struct virtio_net_config))
2935 memcpy(buf, (u8 *)&ndev->config + offset, len);
2938 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2944 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2946 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2948 return mvdev->generation;
2951 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2954 struct mlx5_vdpa_mr *new_mr;
2957 if (asid >= MLX5_VDPA_NUM_AS)
2960 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
2961 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
2962 if (IS_ERR(new_mr)) {
2963 err = PTR_ERR(new_mr);
2964 mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
2968 /* Empty iotlbs don't have an mr but will clear the previous mr. */
2972 if (!mvdev->mr[asid]) {
2973 mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2975 err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
2977 mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
2982 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
2985 mlx5_vdpa_destroy_mr(mvdev, new_mr);
2989 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2990 struct vhost_iotlb *iotlb)
2992 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2993 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2996 down_write(&ndev->reslock);
2997 err = set_map_data(mvdev, iotlb, asid);
2998 up_write(&ndev->reslock);
3002 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3004 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3005 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3008 down_write(&ndev->reslock);
3009 err = mlx5_vdpa_reset_mr(mvdev, asid);
3010 up_write(&ndev->reslock);
3014 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3016 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3018 if (is_ctrl_vq_idx(mvdev, idx))
3021 return mvdev->vdev.dma_dev;
3024 static void free_irqs(struct mlx5_vdpa_net *ndev)
3026 struct mlx5_vdpa_irq_pool_entry *ent;
3029 if (!msix_mode_supported(&ndev->mvdev))
3032 if (!ndev->irqp.entries)
3035 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3036 ent = ndev->irqp.entries + i;
3038 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3040 kfree(ndev->irqp.entries);
3043 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3045 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3046 struct mlx5_core_dev *pfmdev;
3047 struct mlx5_vdpa_net *ndev;
3049 ndev = to_mlx5_vdpa_ndev(mvdev);
3051 free_resources(ndev);
3052 mlx5_vdpa_destroy_mr_resources(mvdev);
3053 if (!is_zero_ether_addr(ndev->config.mac)) {
3054 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3055 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3057 mlx5_vdpa_free_resources(&ndev->mvdev);
3059 kfree(ndev->event_cbs);
3063 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3065 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3066 struct vdpa_notification_area ret = {};
3067 struct mlx5_vdpa_net *ndev;
3070 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3073 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3074 * notification to avoid the risk of mapping pages that contain BAR of more
3077 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3080 ndev = to_mlx5_vdpa_ndev(mvdev);
3081 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3083 ret.size = PAGE_SIZE;
3087 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3089 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3090 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3091 struct mlx5_vdpa_virtqueue *mvq;
3093 if (!is_index_valid(mvdev, idx))
3096 if (is_ctrl_vq_idx(mvdev, idx))
3099 mvq = &ndev->vqs[idx];
3103 return mvq->map.virq;
3106 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3108 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3110 return mvdev->actual_features;
3113 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3114 u64 *received_desc, u64 *completed_desc)
3116 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3117 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3122 if (!counters_supported(&ndev->mvdev))
3125 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3128 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3130 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3131 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3132 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3133 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3135 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3139 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3140 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3141 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3145 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3146 struct sk_buff *msg,
3147 struct netlink_ext_ack *extack)
3149 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3150 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3151 struct mlx5_vdpa_virtqueue *mvq;
3152 struct mlx5_control_vq *cvq;
3157 down_read(&ndev->reslock);
3158 if (!is_index_valid(mvdev, idx)) {
3159 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3164 if (idx == ctrl_vq_idx(mvdev)) {
3166 received_desc = cvq->received_desc;
3167 completed_desc = cvq->completed_desc;
3171 mvq = &ndev->vqs[idx];
3172 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3174 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3180 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3183 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3187 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3190 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3196 up_read(&ndev->reslock);
3200 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3202 struct mlx5_control_vq *cvq;
3204 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3211 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3213 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3214 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3215 struct mlx5_vdpa_virtqueue *mvq;
3218 mlx5_vdpa_info(mvdev, "suspending device\n");
3220 down_write(&ndev->reslock);
3221 unregister_link_notifier(ndev);
3222 for (i = 0; i < ndev->cur_num_vqs; i++) {
3223 mvq = &ndev->vqs[i];
3224 suspend_vq(ndev, mvq);
3226 mlx5_vdpa_cvq_suspend(mvdev);
3227 mvdev->suspended = true;
3228 up_write(&ndev->reslock);
3232 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3235 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3238 if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3241 mvdev->group2asid[group] = asid;
3243 mutex_lock(&mvdev->mr_mtx);
3244 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3245 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3246 mutex_unlock(&mvdev->mr_mtx);
3251 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3252 .set_vq_address = mlx5_vdpa_set_vq_address,
3253 .set_vq_num = mlx5_vdpa_set_vq_num,
3254 .kick_vq = mlx5_vdpa_kick_vq,
3255 .set_vq_cb = mlx5_vdpa_set_vq_cb,
3256 .set_vq_ready = mlx5_vdpa_set_vq_ready,
3257 .get_vq_ready = mlx5_vdpa_get_vq_ready,
3258 .set_vq_state = mlx5_vdpa_set_vq_state,
3259 .get_vq_state = mlx5_vdpa_get_vq_state,
3260 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3261 .get_vq_notification = mlx5_get_vq_notification,
3262 .get_vq_irq = mlx5_get_vq_irq,
3263 .get_vq_align = mlx5_vdpa_get_vq_align,
3264 .get_vq_group = mlx5_vdpa_get_vq_group,
3265 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3266 .get_device_features = mlx5_vdpa_get_device_features,
3267 .get_backend_features = mlx5_vdpa_get_backend_features,
3268 .set_driver_features = mlx5_vdpa_set_driver_features,
3269 .get_driver_features = mlx5_vdpa_get_driver_features,
3270 .set_config_cb = mlx5_vdpa_set_config_cb,
3271 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3272 .get_device_id = mlx5_vdpa_get_device_id,
3273 .get_vendor_id = mlx5_vdpa_get_vendor_id,
3274 .get_status = mlx5_vdpa_get_status,
3275 .set_status = mlx5_vdpa_set_status,
3276 .reset = mlx5_vdpa_reset,
3277 .compat_reset = mlx5_vdpa_compat_reset,
3278 .get_config_size = mlx5_vdpa_get_config_size,
3279 .get_config = mlx5_vdpa_get_config,
3280 .set_config = mlx5_vdpa_set_config,
3281 .get_generation = mlx5_vdpa_get_generation,
3282 .set_map = mlx5_vdpa_set_map,
3283 .reset_map = mlx5_vdpa_reset_map,
3284 .set_group_asid = mlx5_set_group_asid,
3285 .get_vq_dma_dev = mlx5_get_vq_dma_dev,
3286 .free = mlx5_vdpa_free,
3287 .suspend = mlx5_vdpa_suspend,
3290 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3295 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3299 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3303 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3305 struct mlx5_vdpa_net_resources *res = &ndev->res;
3309 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3313 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3317 err = create_tis(ndev);
3326 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3330 static void free_resources(struct mlx5_vdpa_net *ndev)
3332 struct mlx5_vdpa_net_resources *res = &ndev->res;
3338 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3342 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3344 struct mlx5_vdpa_virtqueue *mvq;
3347 for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3348 mvq = &ndev->vqs[i];
3349 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3352 mvq->fwqp.fw = true;
3353 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3355 for (; i < ndev->mvdev.max_vqs; i++) {
3356 mvq = &ndev->vqs[i];
3357 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3363 struct mlx5_vdpa_mgmtdev {
3364 struct vdpa_mgmt_dev mgtdev;
3365 struct mlx5_adev *madev;
3366 struct mlx5_vdpa_net *ndev;
3367 struct vdpa_config_ops vdpa_ops;
3370 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3372 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3376 in = kvzalloc(inlen, GFP_KERNEL);
3380 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3381 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3382 mtu + MLX5V_ETH_HARD_MTU);
3383 MLX5_SET(modify_nic_vport_context_in, in, opcode,
3384 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3386 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3392 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3394 struct mlx5_vdpa_irq_pool_entry *ent;
3397 if (!msix_mode_supported(&ndev->mvdev))
3400 if (!ndev->mvdev.mdev->pdev)
3403 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3404 if (!ndev->irqp.entries)
3408 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3409 ent = ndev->irqp.entries + i;
3410 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3411 dev_name(&ndev->mvdev.vdev.dev), i);
3412 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3416 ndev->irqp.num_ent++;
3420 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3421 const struct vdpa_dev_set_config *add_config)
3423 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3424 struct virtio_net_config *config;
3425 struct mlx5_core_dev *pfmdev;
3426 struct mlx5_vdpa_dev *mvdev;
3427 struct mlx5_vdpa_net *ndev;
3428 struct mlx5_core_dev *mdev;
3429 u64 device_features;
3437 mdev = mgtdev->madev->mdev;
3438 device_features = mgtdev->mgtdev.supported_features;
3439 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3440 if (add_config->device_features & ~device_features) {
3441 dev_warn(mdev->device,
3442 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3443 add_config->device_features, device_features);
3446 device_features &= add_config->device_features;
3448 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3450 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3451 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3452 dev_warn(mdev->device,
3453 "Must provision minimum features 0x%llx for this device",
3454 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3458 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3459 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3460 dev_warn(mdev->device, "missing support for split virtqueues\n");
3464 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3465 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3467 dev_warn(mdev->device,
3468 "%d virtqueues are supported. At least 2 are required\n",
3473 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3474 if (add_config->net.max_vq_pairs > max_vqs / 2)
3476 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3481 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3482 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3484 return PTR_ERR(ndev);
3486 ndev->mvdev.max_vqs = max_vqs;
3487 mvdev = &ndev->mvdev;
3490 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3491 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3492 if (!ndev->vqs || !ndev->event_cbs) {
3498 allocate_irqs(ndev);
3499 init_rwsem(&ndev->reslock);
3500 config = &ndev->config;
3502 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3503 err = config_func_mtu(mdev, add_config->net.mtu);
3508 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3509 err = query_mtu(mdev, &mtu);
3513 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3516 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3517 if (get_link_state(mvdev))
3518 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3520 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3523 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3524 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3525 /* No bother setting mac address in config if not going to provision _F_MAC */
3526 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3527 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3528 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3533 if (!is_zero_ether_addr(config->mac)) {
3534 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3535 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3538 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3540 * We used to clear _F_MAC feature bit if seeing
3541 * zero mac address when device features are not
3542 * specifically provisioned. Keep the behaviour
3543 * so old scripts do not break.
3545 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3546 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3547 /* Don't provision zero mac address for _F_MAC */
3548 mlx5_vdpa_warn(&ndev->mvdev,
3549 "No mac address provisioned?\n");
3554 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3555 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3557 ndev->mvdev.mlx_features = device_features;
3558 mvdev->vdev.dma_dev = &mdev->pdev->dev;
3559 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3563 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3564 err = mlx5_vdpa_create_dma_mr(mvdev);
3569 err = alloc_resources(ndev);
3573 ndev->cvq_ent.mvdev = mvdev;
3574 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3575 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3581 mvdev->vdev.mdev = &mgtdev->mgtdev;
3582 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3586 mgtdev->ndev = ndev;
3590 destroy_workqueue(mvdev->wq);
3592 free_resources(ndev);
3594 mlx5_vdpa_destroy_mr_resources(mvdev);
3596 mlx5_vdpa_free_resources(&ndev->mvdev);
3598 if (!is_zero_ether_addr(config->mac))
3599 mlx5_mpfs_del_mac(pfmdev, config->mac);
3601 put_device(&mvdev->vdev.dev);
3605 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3607 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3608 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3609 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3610 struct workqueue_struct *wq;
3612 unregister_link_notifier(ndev);
3613 _vdpa_unregister_device(dev);
3616 destroy_workqueue(wq);
3617 mgtdev->ndev = NULL;
3620 static const struct vdpa_mgmtdev_ops mdev_ops = {
3621 .dev_add = mlx5_vdpa_dev_add,
3622 .dev_del = mlx5_vdpa_dev_del,
3625 static struct virtio_device_id id_table[] = {
3626 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3630 static int mlx5v_probe(struct auxiliary_device *adev,
3631 const struct auxiliary_device_id *id)
3634 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3635 struct mlx5_core_dev *mdev = madev->mdev;
3636 struct mlx5_vdpa_mgmtdev *mgtdev;
3639 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3643 mgtdev->mgtdev.ops = &mdev_ops;
3644 mgtdev->mgtdev.device = mdev->device;
3645 mgtdev->mgtdev.id_table = id_table;
3646 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3647 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3648 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3649 BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3650 mgtdev->mgtdev.max_supported_vqs =
3651 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3652 mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3653 mgtdev->madev = madev;
3654 mgtdev->vdpa_ops = mlx5_vdpa_ops;
3656 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3657 mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3659 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3663 auxiliary_set_drvdata(adev, mgtdev);
3672 static void mlx5v_remove(struct auxiliary_device *adev)
3674 struct mlx5_vdpa_mgmtdev *mgtdev;
3676 mgtdev = auxiliary_get_drvdata(adev);
3677 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3681 static const struct auxiliary_device_id mlx5v_id_table[] = {
3682 { .name = MLX5_ADEV_NAME ".vnet", },
3686 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3688 static struct auxiliary_driver mlx5v_driver = {
3690 .probe = mlx5v_probe,
3691 .remove = mlx5v_remove,
3692 .id_table = mlx5v_id_table,
3695 module_auxiliary_driver(mlx5v_driver);