1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
25 #define to_mlx5_vdpa_ndev(__mvdev) \
26 container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 #define VALID_FEATURES_MASK \
30 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
31 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
32 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
33 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
35 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
36 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
37 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
38 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
39 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
40 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
41 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
42 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 #define VALID_STATUS_MASK \
45 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
46 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 struct mlx5_vdpa_net_resources {
56 struct mlx5_vdpa_cq_buf {
57 struct mlx5_frag_buf_ctrl fbc;
58 struct mlx5_frag_buf frag_buf;
64 struct mlx5_core_cq mcq;
65 struct mlx5_vdpa_cq_buf buf;
70 struct mlx5_vdpa_umem {
71 struct mlx5_frag_buf_ctrl fbc;
72 struct mlx5_frag_buf frag_buf;
78 struct mlx5_core_qp mqp;
79 struct mlx5_frag_buf frag_buf;
85 struct mlx5_vq_restore_info {
93 struct vdpa_callback cb;
97 struct mlx5_vdpa_virtqueue {
103 struct vdpa_callback event_cb;
105 /* Resources for implementing the notification channel from the device
106 * to the driver. fwqp is the firmware end of an RC connection; the
107 * other end is vqqp used by the driver. cq is is where completions are
110 struct mlx5_vdpa_cq cq;
111 struct mlx5_vdpa_qp fwqp;
112 struct mlx5_vdpa_qp vqqp;
114 /* umem resources are required for the virtqueue operation. They're use
115 * is internal and they must be provided by the driver.
117 struct mlx5_vdpa_umem umem1;
118 struct mlx5_vdpa_umem umem2;
119 struct mlx5_vdpa_umem umem3;
124 struct mlx5_vdpa_net *ndev;
129 /* keep last in the struct */
130 struct mlx5_vq_restore_info ri;
133 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
134 * provides for driver space allocation
136 #define MLX5_MAX_SUPPORTED_VQS 16
138 struct mlx5_vdpa_net {
139 struct mlx5_vdpa_dev mvdev;
140 struct mlx5_vdpa_net_resources res;
141 struct virtio_net_config config;
142 struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
144 /* Serialize vq resources creation and destruction. This is required
145 * since memory map might change and we need to destroy and create
146 * resources while driver in operational.
148 struct mutex reslock;
149 struct mlx5_flow_table *rxft;
150 struct mlx5_fc *rx_counter;
151 struct mlx5_flow_handle *rx_rule;
156 static void free_resources(struct mlx5_vdpa_net *ndev);
157 static void init_mvqs(struct mlx5_vdpa_net *ndev);
158 static int setup_driver(struct mlx5_vdpa_net *ndev);
159 static void teardown_driver(struct mlx5_vdpa_net *ndev);
161 static bool mlx5_vdpa_debug;
163 #define MLX5_LOG_VIO_FLAG(_feature) \
165 if (features & BIT_ULL(_feature)) \
166 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
169 #define MLX5_LOG_VIO_STAT(_status) \
171 if (status & (_status)) \
172 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
175 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
180 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
182 if (status & ~VALID_STATUS_MASK)
183 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
184 status & ~VALID_STATUS_MASK);
186 if (!mlx5_vdpa_debug)
189 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
190 if (set && !status) {
191 mlx5_vdpa_info(mvdev, "driver resets the device\n");
195 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
196 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
197 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
198 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
199 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
200 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
203 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
205 if (features & ~VALID_FEATURES_MASK)
206 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
207 features & ~VALID_FEATURES_MASK);
209 if (!mlx5_vdpa_debug)
212 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
214 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
216 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
217 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
218 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
219 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
220 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
221 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
222 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
223 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
224 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
225 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
226 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
243 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
244 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
245 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
246 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
247 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
248 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
249 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
252 static int create_tis(struct mlx5_vdpa_net *ndev)
254 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
255 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
259 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
260 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
261 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
263 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
268 static void destroy_tis(struct mlx5_vdpa_net *ndev)
270 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
273 #define MLX5_VDPA_CQE_SIZE 64
274 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
276 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
278 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
279 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
280 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
283 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
284 ndev->mvdev.mdev->priv.numa_node);
288 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
290 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
296 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
298 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
300 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
301 ndev->mvdev.mdev->priv.numa_node);
304 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
306 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
309 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
311 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
314 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
316 struct mlx5_cqe64 *cqe64;
320 for (i = 0; i < buf->nent; i++) {
321 cqe = get_cqe(vcq, i);
323 cqe64->op_own = MLX5_CQE_INVALID << 4;
327 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
329 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
331 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
332 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
338 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
341 vqp->db.db[0] = cpu_to_be32(vqp->head);
344 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
345 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
347 struct mlx5_vdpa_qp *vqp;
351 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
352 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
353 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
355 /* Firmware QP is allocated by the driver for the firmware's
356 * use so we can skip part of the params as they will be chosen by firmware
358 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
359 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
360 MLX5_SET(qpc, qpc, no_sq, 1);
364 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
365 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
366 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
367 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
368 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
369 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
370 MLX5_SET(qpc, qpc, no_sq, 1);
371 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
372 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
373 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
374 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
375 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
378 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
380 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
381 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
382 ndev->mvdev.mdev->priv.numa_node);
385 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
387 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
390 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
391 struct mlx5_vdpa_qp *vqp)
393 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
394 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
395 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
402 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
406 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
409 inlen += vqp->frag_buf.npages * sizeof(__be64);
412 in = kzalloc(inlen, GFP_KERNEL);
418 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
419 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
420 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
421 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
422 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
423 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
425 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
426 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
427 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
432 vqp->mqp.uid = ndev->mvdev.res.uid;
433 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
436 rx_post(vqp, mvq->num_ent);
442 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
445 rq_buf_free(ndev, vqp);
450 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
452 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
454 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
455 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
456 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
457 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
458 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
460 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 rq_buf_free(ndev, vqp);
465 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
467 return get_sw_cqe(cq, cq->mcq.cons_index);
470 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
472 struct mlx5_cqe64 *cqe64;
474 cqe64 = next_cqe_sw(vcq);
478 vcq->mcq.cons_index++;
482 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
484 mlx5_cq_set_ci(&mvq->cq.mcq);
486 /* make sure CQ cosumer update is visible to the hardware before updating
487 * RX doorbell record.
490 rx_post(&mvq->vqqp, num);
491 if (mvq->event_cb.callback)
492 mvq->event_cb.callback(mvq->event_cb.private);
495 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
497 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
498 struct mlx5_vdpa_net *ndev = mvq->ndev;
499 void __iomem *uar_page = ndev->mvdev.res.uar->map;
502 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
504 if (num > mvq->num_ent / 2) {
505 /* If completions keep coming while we poll, we want to
506 * let the hardware know that we consumed them by
507 * updating the doorbell record. We also let vdpa core
508 * know about this so it passes it on the virtio driver
511 mlx5_vdpa_handle_completions(mvq, num);
517 mlx5_vdpa_handle_completions(mvq, num);
519 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
522 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
524 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
525 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
526 void __iomem *uar_page = ndev->mvdev.res.uar->map;
527 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
528 struct mlx5_vdpa_cq *vcq = &mvq->cq;
537 err = mlx5_db_alloc(mdev, &vcq->db);
541 vcq->mcq.set_ci_db = vcq->db.db;
542 vcq->mcq.arm_db = vcq->db.db + 1;
543 vcq->mcq.cqe_sz = 64;
545 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
549 cq_frag_buf_init(vcq, &vcq->buf);
551 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
552 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
553 in = kzalloc(inlen, GFP_KERNEL);
559 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
560 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
561 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
563 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
564 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
566 /* Use vector 0 by default. Consider adding code to choose least used
569 err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
573 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
574 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
575 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
576 MLX5_SET(cqc, cqc, c_eqn, eqn);
577 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
579 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
583 vcq->mcq.comp = mlx5_vdpa_cq_comp;
585 vcq->mcq.set_ci_db = vcq->db.db;
586 vcq->mcq.arm_db = vcq->db.db + 1;
587 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
594 cq_frag_buf_free(ndev, &vcq->buf);
596 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
600 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
602 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
603 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
604 struct mlx5_vdpa_cq *vcq = &mvq->cq;
606 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
607 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
610 cq_frag_buf_free(ndev, &vcq->buf);
611 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
614 static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
615 struct mlx5_vdpa_umem **umemp)
617 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
623 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
624 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
625 *umemp = &mvq->umem1;
628 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
629 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
630 *umemp = &mvq->umem2;
633 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
634 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
635 *umemp = &mvq->umem3;
638 return p_a * mvq->num_ent + p_b;
641 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
643 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
646 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
649 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
655 struct mlx5_vdpa_umem *umem;
657 size = umem_size(ndev, mvq, num, &umem);
662 err = umem_frag_buf_alloc(ndev, umem, size);
666 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
668 in = kzalloc(inlen, GFP_KERNEL);
674 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
675 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
676 um = MLX5_ADDR_OF(create_umem_in, in, umem);
677 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
678 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
680 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
681 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
683 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
685 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
690 umem->id = MLX5_GET(create_umem_out, out, umem_id);
697 umem_frag_buf_free(ndev, umem);
701 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
703 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
704 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
705 struct mlx5_vdpa_umem *umem;
719 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
720 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
721 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
724 umem_frag_buf_free(ndev, umem);
727 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
732 for (num = 1; num <= 3; num++) {
733 err = create_umem(ndev, mvq, num);
740 for (num--; num > 0; num--)
741 umem_destroy(ndev, mvq, num);
746 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
750 for (num = 3; num > 0; num--)
751 umem_destroy(ndev, mvq, num);
754 static int get_queue_type(struct mlx5_vdpa_net *ndev)
758 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
760 /* prefer split queue */
761 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
762 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
764 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
766 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
769 static bool vq_is_tx(u16 idx)
774 static u16 get_features_12_3(u64 features)
776 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
777 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
778 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
779 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
782 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
784 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
785 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
792 err = umems_create(ndev, mvq);
796 in = kzalloc(inlen, GFP_KERNEL);
802 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
804 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
805 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
806 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
808 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
809 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
810 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
811 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
812 get_features_12_3(ndev->mvdev.actual_features));
813 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
814 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
816 if (vq_is_tx(mvq->index))
817 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
819 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
820 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
821 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
822 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
823 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
824 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
825 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
826 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
827 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
828 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
829 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
830 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
831 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
832 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
833 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
834 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
835 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
836 if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
837 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
839 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
844 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
851 umems_destroy(ndev, mvq);
855 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
857 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
858 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
860 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
861 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
862 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
863 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
864 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
865 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
866 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
867 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
870 umems_destroy(ndev, mvq);
873 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
875 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
878 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
880 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
883 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
884 int *outlen, u32 qpn, u32 rqpn)
890 case MLX5_CMD_OP_2RST_QP:
891 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
892 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
893 *in = kzalloc(*inlen, GFP_KERNEL);
894 *out = kzalloc(*outlen, GFP_KERNEL);
898 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
899 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
900 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
902 case MLX5_CMD_OP_RST2INIT_QP:
903 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
904 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
905 *in = kzalloc(*inlen, GFP_KERNEL);
906 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
910 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
911 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
912 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
913 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
914 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
915 MLX5_SET(qpc, qpc, rwe, 1);
916 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
917 MLX5_SET(ads, pp, vhca_port_num, 1);
919 case MLX5_CMD_OP_INIT2RTR_QP:
920 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
921 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
922 *in = kzalloc(*inlen, GFP_KERNEL);
923 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
927 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
928 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
929 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
930 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
931 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
932 MLX5_SET(qpc, qpc, log_msg_max, 30);
933 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
934 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
935 MLX5_SET(ads, pp, fl, 1);
937 case MLX5_CMD_OP_RTR2RTS_QP:
938 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
939 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
940 *in = kzalloc(*inlen, GFP_KERNEL);
941 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
945 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
946 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
947 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
948 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
949 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
950 MLX5_SET(ads, pp, ack_timeout, 14);
951 MLX5_SET(qpc, qpc, retry_count, 7);
952 MLX5_SET(qpc, qpc, rnr_retry, 7);
968 static void free_inout(void *in, void *out)
974 /* Two QPs are used by each virtqueue. One is used by the driver and one by
975 * firmware. The fw argument indicates whether the subjected QP is the one used
978 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
986 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
990 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
995 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
999 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1003 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1007 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1011 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1015 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1019 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1023 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1026 struct mlx5_virtq_attr {
1028 u16 available_index;
1032 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1033 struct mlx5_virtq_attr *attr)
1035 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1036 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1042 out = kzalloc(outlen, GFP_KERNEL);
1046 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1048 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1049 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1050 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1051 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1052 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1056 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1057 memset(attr, 0, sizeof(*attr));
1058 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1059 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1060 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1069 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1071 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1072 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1078 in = kzalloc(inlen, GFP_KERNEL);
1082 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1084 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1085 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1086 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1087 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1089 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1090 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1091 MLX5_VIRTQ_MODIFY_MASK_STATE);
1092 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1093 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1096 mvq->fw_state = state;
1101 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1103 u16 idx = mvq->index;
1109 if (mvq->initialized) {
1110 mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1114 err = cq_create(ndev, idx, mvq->num_ent);
1118 err = qp_create(ndev, mvq, &mvq->fwqp);
1122 err = qp_create(ndev, mvq, &mvq->vqqp);
1126 err = connect_qps(ndev, mvq);
1130 err = create_virtqueue(ndev, mvq);
1135 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1137 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1143 mvq->initialized = true;
1147 qp_destroy(ndev, &mvq->vqqp);
1149 qp_destroy(ndev, &mvq->fwqp);
1151 cq_destroy(ndev, idx);
1155 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1157 struct mlx5_virtq_attr attr;
1159 if (!mvq->initialized)
1162 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1165 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1166 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1168 if (query_virtqueue(ndev, mvq, &attr)) {
1169 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1172 mvq->avail_idx = attr.available_index;
1173 mvq->used_idx = attr.used_index;
1176 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1180 for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1181 suspend_vq(ndev, &ndev->vqs[i]);
1184 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1186 if (!mvq->initialized)
1189 suspend_vq(ndev, mvq);
1190 destroy_virtqueue(ndev, mvq);
1191 qp_destroy(ndev, &mvq->vqqp);
1192 qp_destroy(ndev, &mvq->fwqp);
1193 cq_destroy(ndev, mvq->index);
1194 mvq->initialized = false;
1197 static int create_rqt(struct mlx5_vdpa_net *ndev)
1207 log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1208 if (log_max_rqt < 1)
1211 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1212 in = kzalloc(inlen, GFP_KERNEL);
1216 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1217 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1219 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1220 MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1221 MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1222 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1223 for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1224 if (!ndev->vqs[j].initialized)
1227 if (!vq_is_tx(ndev->vqs[j].index)) {
1228 list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1233 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1241 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1243 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1246 static int create_tir(struct mlx5_vdpa_net *ndev)
1248 #define HASH_IP_L4PORTS \
1249 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1250 MLX5_HASH_FIELD_SEL_L4_DPORT)
1251 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1252 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1253 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1254 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1255 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1262 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1266 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1267 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1268 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1270 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1271 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1272 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1273 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1275 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1276 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1277 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1278 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1280 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1281 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1283 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1288 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1290 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1293 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1295 struct mlx5_flow_destination dest[2] = {};
1296 struct mlx5_flow_table_attr ft_attr = {};
1297 struct mlx5_flow_act flow_act = {};
1298 struct mlx5_flow_namespace *ns;
1301 /* for now, one entry, match all, forward to tir */
1302 ft_attr.max_fte = 1;
1303 ft_attr.autogroup.max_num_groups = 1;
1305 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1307 mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1311 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1312 if (IS_ERR(ndev->rxft))
1313 return PTR_ERR(ndev->rxft);
1315 ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1316 if (IS_ERR(ndev->rx_counter)) {
1317 err = PTR_ERR(ndev->rx_counter);
1321 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1322 dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1323 dest[0].tir_num = ndev->res.tirn;
1324 dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1325 dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1326 ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1327 if (IS_ERR(ndev->rx_rule)) {
1328 err = PTR_ERR(ndev->rx_rule);
1329 ndev->rx_rule = NULL;
1336 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1338 mlx5_destroy_flow_table(ndev->rxft);
1342 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1347 mlx5_del_flow_rules(ndev->rx_rule);
1348 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1349 mlx5_destroy_flow_table(ndev->rxft);
1351 ndev->rx_rule = NULL;
1354 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1356 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1357 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1358 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1360 if (unlikely(!mvq->ready))
1363 iowrite16(idx, ndev->mvdev.res.kick_addr);
1366 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1367 u64 driver_area, u64 device_area)
1369 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1370 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1371 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1373 mvq->desc_addr = desc_area;
1374 mvq->device_addr = device_area;
1375 mvq->driver_addr = driver_area;
1379 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1381 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1382 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1383 struct mlx5_vdpa_virtqueue *mvq;
1385 mvq = &ndev->vqs[idx];
1389 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1391 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1392 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1393 struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1398 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1400 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1401 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1402 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1405 suspend_vq(ndev, mvq);
1410 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1412 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1413 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1414 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1419 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1420 const struct vdpa_vq_state *state)
1422 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1423 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1424 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1426 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1427 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1431 mvq->used_idx = state->avail_index;
1432 mvq->avail_idx = state->avail_index;
1436 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1438 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1439 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1440 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1441 struct mlx5_virtq_attr attr;
1444 /* If the virtq object was destroyed, use the value saved at
1445 * the last minute of suspend_vq. This caters for userspace
1446 * that cares about emulating the index after vq is stopped.
1448 if (!mvq->initialized) {
1449 /* Firmware returns a wrong value for the available index.
1450 * Since both values should be identical, we take the value of
1451 * used_idx which is reported correctly.
1453 state->avail_index = mvq->used_idx;
1457 err = query_virtqueue(ndev, mvq, &attr);
1459 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1462 state->avail_index = attr.used_index;
1466 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1471 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1472 MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1473 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1474 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1477 static u64 mlx_to_vritio_features(u16 dev_features)
1481 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1482 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1483 if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1484 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1485 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1486 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1487 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1488 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1493 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1495 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1496 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1499 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1500 ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1501 if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1502 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1503 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1504 print_features(mvdev, ndev->mvdev.mlx_features, false);
1505 return ndev->mvdev.mlx_features;
1508 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1510 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1516 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1521 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1522 err = setup_vq(ndev, &ndev->vqs[i]);
1530 for (--i; i >= 0; i--)
1531 teardown_vq(ndev, &ndev->vqs[i]);
1536 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1538 struct mlx5_vdpa_virtqueue *mvq;
1541 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1542 mvq = &ndev->vqs[i];
1543 if (!mvq->initialized)
1546 teardown_vq(ndev, mvq);
1550 /* TODO: cross-endian support */
1551 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1553 return virtio_legacy_is_little_endian() ||
1554 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
1557 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1559 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1562 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1564 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1565 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1568 print_features(mvdev, features, true);
1570 err = verify_min_features(mvdev, features);
1574 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1575 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1576 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1580 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1582 /* not implemented */
1583 mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1586 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1587 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1589 return MLX5_VDPA_MAX_VQ_ENTRIES;
1592 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1594 return VIRTIO_ID_NET;
1597 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1599 return PCI_VENDOR_ID_MELLANOX;
1602 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1604 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1605 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1607 print_status(mvdev, ndev->mvdev.status, false);
1608 return ndev->mvdev.status;
1611 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1613 struct mlx5_vq_restore_info *ri = &mvq->ri;
1614 struct mlx5_virtq_attr attr;
1617 if (!mvq->initialized)
1620 err = query_virtqueue(ndev, mvq, &attr);
1624 ri->avail_index = attr.available_index;
1625 ri->used_index = attr.used_index;
1626 ri->ready = mvq->ready;
1627 ri->num_ent = mvq->num_ent;
1628 ri->desc_addr = mvq->desc_addr;
1629 ri->device_addr = mvq->device_addr;
1630 ri->driver_addr = mvq->driver_addr;
1631 ri->cb = mvq->event_cb;
1636 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1640 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1641 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1642 save_channel_info(ndev, &ndev->vqs[i]);
1647 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1651 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1652 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1655 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1657 struct mlx5_vdpa_virtqueue *mvq;
1658 struct mlx5_vq_restore_info *ri;
1661 mlx5_clear_vqs(ndev);
1663 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1664 mvq = &ndev->vqs[i];
1669 mvq->avail_idx = ri->avail_index;
1670 mvq->used_idx = ri->used_index;
1671 mvq->ready = ri->ready;
1672 mvq->num_ent = ri->num_ent;
1673 mvq->desc_addr = ri->desc_addr;
1674 mvq->device_addr = ri->device_addr;
1675 mvq->driver_addr = ri->driver_addr;
1676 mvq->event_cb = ri->cb;
1680 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1685 err = save_channels_info(ndev);
1689 teardown_driver(ndev);
1690 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1691 err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1695 if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1698 restore_channels_info(ndev);
1699 err = setup_driver(ndev);
1706 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1711 static int setup_driver(struct mlx5_vdpa_net *ndev)
1715 mutex_lock(&ndev->reslock);
1717 mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1721 err = setup_virtqueues(ndev);
1723 mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1727 err = create_rqt(ndev);
1729 mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1733 err = create_tir(ndev);
1735 mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1739 err = add_fwd_to_tir(ndev);
1741 mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1745 mutex_unlock(&ndev->reslock);
1754 teardown_virtqueues(ndev);
1756 mutex_unlock(&ndev->reslock);
1760 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1762 mutex_lock(&ndev->reslock);
1766 remove_fwd_to_tir(ndev);
1769 teardown_virtqueues(ndev);
1770 ndev->setup = false;
1772 mutex_unlock(&ndev->reslock);
1775 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1777 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1778 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1781 print_status(mvdev, status, true);
1783 mlx5_vdpa_info(mvdev, "performing device reset\n");
1784 teardown_driver(ndev);
1785 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1786 ndev->mvdev.status = 0;
1787 ndev->mvdev.mlx_features = 0;
1788 ++mvdev->generation;
1792 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1793 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1794 err = setup_driver(ndev);
1796 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1800 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1805 ndev->mvdev.status = status;
1809 mlx5_vdpa_destroy_mr(&ndev->mvdev);
1810 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1813 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
1815 return sizeof(struct virtio_net_config);
1818 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1821 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1822 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1824 if (offset + len <= sizeof(struct virtio_net_config))
1825 memcpy(buf, (u8 *)&ndev->config + offset, len);
1828 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1834 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1836 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1838 return mvdev->generation;
1841 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1843 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1844 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1848 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1850 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1855 return mlx5_vdpa_change_map(ndev, iotlb);
1860 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1862 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1863 struct mlx5_core_dev *pfmdev;
1864 struct mlx5_vdpa_net *ndev;
1866 ndev = to_mlx5_vdpa_ndev(mvdev);
1868 free_resources(ndev);
1869 if (!is_zero_ether_addr(ndev->config.mac)) {
1870 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1871 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
1873 mlx5_vdpa_free_resources(&ndev->mvdev);
1874 mutex_destroy(&ndev->reslock);
1877 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1879 struct vdpa_notification_area ret = {};
1884 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1889 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1890 .set_vq_address = mlx5_vdpa_set_vq_address,
1891 .set_vq_num = mlx5_vdpa_set_vq_num,
1892 .kick_vq = mlx5_vdpa_kick_vq,
1893 .set_vq_cb = mlx5_vdpa_set_vq_cb,
1894 .set_vq_ready = mlx5_vdpa_set_vq_ready,
1895 .get_vq_ready = mlx5_vdpa_get_vq_ready,
1896 .set_vq_state = mlx5_vdpa_set_vq_state,
1897 .get_vq_state = mlx5_vdpa_get_vq_state,
1898 .get_vq_notification = mlx5_get_vq_notification,
1899 .get_vq_irq = mlx5_get_vq_irq,
1900 .get_vq_align = mlx5_vdpa_get_vq_align,
1901 .get_features = mlx5_vdpa_get_features,
1902 .set_features = mlx5_vdpa_set_features,
1903 .set_config_cb = mlx5_vdpa_set_config_cb,
1904 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1905 .get_device_id = mlx5_vdpa_get_device_id,
1906 .get_vendor_id = mlx5_vdpa_get_vendor_id,
1907 .get_status = mlx5_vdpa_get_status,
1908 .set_status = mlx5_vdpa_set_status,
1909 .get_config_size = mlx5_vdpa_get_config_size,
1910 .get_config = mlx5_vdpa_get_config,
1911 .set_config = mlx5_vdpa_set_config,
1912 .get_generation = mlx5_vdpa_get_generation,
1913 .set_map = mlx5_vdpa_set_map,
1914 .free = mlx5_vdpa_free,
1917 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
1922 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
1926 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
1930 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1932 struct mlx5_vdpa_net_resources *res = &ndev->res;
1936 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1940 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1944 err = create_tis(ndev);
1953 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1957 static void free_resources(struct mlx5_vdpa_net *ndev)
1959 struct mlx5_vdpa_net_resources *res = &ndev->res;
1965 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1969 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1971 struct mlx5_vdpa_virtqueue *mvq;
1974 for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1975 mvq = &ndev->vqs[i];
1976 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1979 mvq->fwqp.fw = true;
1981 for (; i < ndev->mvdev.max_vqs; i++) {
1982 mvq = &ndev->vqs[i];
1983 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1989 struct mlx5_vdpa_mgmtdev {
1990 struct vdpa_mgmt_dev mgtdev;
1991 struct mlx5_adev *madev;
1992 struct mlx5_vdpa_net *ndev;
1995 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
1997 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
1998 struct virtio_net_config *config;
1999 struct mlx5_core_dev *pfmdev;
2000 struct mlx5_vdpa_dev *mvdev;
2001 struct mlx5_vdpa_net *ndev;
2002 struct mlx5_core_dev *mdev;
2009 mdev = mgtdev->madev->mdev;
2010 /* we save one virtqueue for control virtqueue should we require it */
2011 max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2012 max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2014 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2017 return PTR_ERR(ndev);
2019 ndev->mvdev.max_vqs = max_vqs;
2020 mvdev = &ndev->mvdev;
2023 mutex_init(&ndev->reslock);
2024 config = &ndev->config;
2025 err = query_mtu(mdev, &ndev->mtu);
2029 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2033 if (!is_zero_ether_addr(config->mac)) {
2034 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2035 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2040 mvdev->vdev.dma_dev = mdev->device;
2041 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2045 err = alloc_resources(ndev);
2049 mvdev->vdev.mdev = &mgtdev->mgtdev;
2050 err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
2054 mgtdev->ndev = ndev;
2058 free_resources(ndev);
2060 mlx5_vdpa_free_resources(&ndev->mvdev);
2062 if (!is_zero_ether_addr(config->mac))
2063 mlx5_mpfs_del_mac(pfmdev, config->mac);
2065 mutex_destroy(&ndev->reslock);
2066 put_device(&mvdev->vdev.dev);
2070 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2072 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2074 _vdpa_unregister_device(dev);
2075 mgtdev->ndev = NULL;
2078 static const struct vdpa_mgmtdev_ops mdev_ops = {
2079 .dev_add = mlx5_vdpa_dev_add,
2080 .dev_del = mlx5_vdpa_dev_del,
2083 static struct virtio_device_id id_table[] = {
2084 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2088 static int mlx5v_probe(struct auxiliary_device *adev,
2089 const struct auxiliary_device_id *id)
2092 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2093 struct mlx5_core_dev *mdev = madev->mdev;
2094 struct mlx5_vdpa_mgmtdev *mgtdev;
2097 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2101 mgtdev->mgtdev.ops = &mdev_ops;
2102 mgtdev->mgtdev.device = mdev->device;
2103 mgtdev->mgtdev.id_table = id_table;
2104 mgtdev->madev = madev;
2106 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2110 dev_set_drvdata(&adev->dev, mgtdev);
2119 static void mlx5v_remove(struct auxiliary_device *adev)
2121 struct mlx5_vdpa_mgmtdev *mgtdev;
2123 mgtdev = dev_get_drvdata(&adev->dev);
2124 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2128 static const struct auxiliary_device_id mlx5v_id_table[] = {
2129 { .name = MLX5_ADEV_NAME ".vnet", },
2133 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2135 static struct auxiliary_driver mlx5v_driver = {
2137 .probe = mlx5v_probe,
2138 .remove = mlx5v_remove,
2139 .id_table = mlx5v_id_table,
2142 module_auxiliary_driver(mlx5v_driver);