From: Linus Torvalds Date: Sun, 5 Nov 2023 19:02:32 +0000 (-1000) Subject: Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost X-Git-Tag: v6.7-rc1~54 X-Git-Url: http://git.samba.org/samba.git/?a=commitdiff_plain;h=77fa2fbe87fc605c4bfa87dff87be9bfded0e9a3;hp=1cfb751165ef685b80635218beff38d6afbce4e2;p=sfrench%2Fcifs-2.6.git Merge tag 'for_linus' of git://git./linux/kernel/git/mst/vhost Pull virtio updates from Michael Tsirkin: "vhost,virtio,vdpa: features, fixes, cleanups. vdpa/mlx5: - VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK - new maintainer vdpa: - support for vq descriptor mappings - decouple reset of iotlb mapping from device reset and fixes, cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (34 commits) vdpa_sim: implement .reset_map support vdpa/mlx5: implement .reset_map driver op vhost-vdpa: clean iotlb map during reset for older userspace vdpa: introduce .compat_reset operation callback vhost-vdpa: introduce IOTLB_PERSIST backend feature bit vhost-vdpa: reset vendor specific mapping to initial state in .release vdpa: introduce .reset_map operation callback virtio_pci: add check for common cfg size virtio-blk: fix implicit overflow on virtio_max_dma_size virtio_pci: add build offset check for the new common cfg items virtio: add definition of VIRTIO_F_NOTIF_CONFIG_DATA feature bit vduse: make vduse_class constant vhost-scsi: Spelling s/preceeding/preceding/g virtio: kdoc for struct virtio_pci_modern_device vdpa: Update sysfs ABI documentation MAINTAINERS: Add myself as mlx5_vdpa driver virtio-balloon: correct the comment of virtballoon_migratepage() mlx5_vdpa: offer VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK vdpa/mlx5: Update cvq iotlb mapping on ASID change vdpa/mlx5: Make iotlb helper functions more generic ... --- diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa index 28a6111202ba..4da53878bff6 100644 --- a/Documentation/ABI/testing/sysfs-bus-vdpa +++ b/Documentation/ABI/testing/sysfs-bus-vdpa @@ -1,4 +1,4 @@ -What: /sys/bus/vdpa/driver_autoprobe +What: /sys/bus/vdpa/drivers_autoprobe Date: March 2020 Contact: virtualization@lists.linux-foundation.org Description: @@ -17,7 +17,7 @@ Description: Writing a device name to this file will cause the kernel binds devices to a compatible driver. - This can be useful when /sys/bus/vdpa/driver_autoprobe is + This can be useful when /sys/bus/vdpa/drivers_autoprobe is disabled. What: /sys/bus/vdpa/drivers/.../bind diff --git a/MAINTAINERS b/MAINTAINERS index 2cc111933d98..cdc42c5eda24 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13790,6 +13790,12 @@ F: drivers/infiniband/hw/mlx5/ F: include/linux/mlx5/ F: include/uapi/rdma/mlx5-abi.h +MELLANOX MLX5 VDPA DRIVER +M: Dragos Tatulea +L: virtualization@lists.linux-foundation.org +S: Supported +F: drivers/vdpa/mlx5/ + MELLANOX MLXCPLD I2C AND MUX DRIVER M: Vadim Pasternak M: Michael Shych diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4689ac2e0c0e..d53d6aa8ee69 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1311,6 +1311,7 @@ static int virtblk_probe(struct virtio_device *vdev) u16 min_io_size; u8 physical_block_exp, alignment_offset; unsigned int queue_depth; + size_t max_dma_size; if (!vdev->config->get) { dev_err(&vdev->dev, "%s failure: config access disabled\n", @@ -1409,7 +1410,8 @@ static int virtblk_probe(struct virtio_device *vdev) /* No real sector limit. */ blk_queue_max_hw_sectors(q, UINT_MAX); - max_size = virtio_max_dma_size(vdev); + max_dma_size = virtio_max_dma_size(vdev); + max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size; /* Host can optionally specify maximum segment size and number of * segments. */ diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index ca56242972b3..84547d998bcf 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -31,11 +31,9 @@ struct mlx5_vdpa_mr { struct list_head head; unsigned long num_directs; unsigned long num_klms; - /* state of dvq mr */ - bool initialized; - /* serialize mkey creation and destruction */ - struct mutex mkey_mtx; + struct vhost_iotlb *iotlb; + bool user_mr; }; @@ -74,11 +72,12 @@ struct mlx5_vdpa_wq_ent { enum { MLX5_VDPA_DATAVQ_GROUP, MLX5_VDPA_CVQ_GROUP, + MLX5_VDPA_DATAVQ_DESC_GROUP, MLX5_VDPA_NUMVQ_GROUPS }; enum { - MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS + MLX5_VDPA_NUM_AS = 2 }; struct mlx5_vdpa_dev { @@ -93,7 +92,9 @@ struct mlx5_vdpa_dev { u16 max_idx; u32 generation; - struct mlx5_vdpa_mr mr; + struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; + /* serialize mr access */ + struct mutex mr_mtx; struct mlx5_control_vq cvq; struct workqueue_struct *wq; unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; @@ -114,12 +115,19 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int inlen); int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); -int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map, unsigned int asid); -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - unsigned int asid); -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); -void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid); +struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb); +void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + unsigned int asid); +int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid); +int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev); +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid); #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 5a1971fcd87b..2197c46e563a 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -301,10 +301,13 @@ static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct sg_free_table(&mr->sg_head); } -static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, +static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + u64 start, + u64 size, + u8 perm, struct vhost_iotlb *iotlb) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *n; LIST_HEAD(tmp); @@ -354,9 +357,10 @@ err_alloc: * indirect memory key that provides access to the enitre address space given * by iotlb. */ -static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int create_user_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + struct vhost_iotlb *iotlb) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *n; struct vhost_iotlb_map *map; @@ -384,7 +388,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb LOG_MAX_KLM_SIZE); mr->num_klms += nnuls; } - err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); if (err) goto err_chain; } @@ -393,7 +397,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb pperm = map->perm; } } - err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); if (err) goto err_chain; @@ -450,20 +454,23 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); } -static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) +static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src) { struct vhost_iotlb_map *map; u64 start = 0, last = ULLONG_MAX; int err; + if (dst == src) + return -EINVAL; + if (!src) { - err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); + err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW); return err; } for (map = vhost_iotlb_itree_first(src, start, last); map; map = vhost_iotlb_itree_next(map, start, last)) { - err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, + err = vhost_iotlb_add_range(dst, map->start, map->last, map->addr, map->perm); if (err) return err; @@ -471,9 +478,9 @@ static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) return 0; } -static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) +static void prune_iotlb(struct vhost_iotlb *iotlb) { - vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX); + vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX); } static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) @@ -489,133 +496,169 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr } } -static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) - return; + if (mr->user_mr) + destroy_user_mr(mvdev, mr); + else + destroy_dma_mr(mvdev, mr); - prune_iotlb(mvdev); + vhost_iotlb_free(mr->iotlb); } -static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; - - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) + if (!mr) return; - if (!mr->initialized) - return; + mutex_lock(&mvdev->mr_mtx); - if (mr->user_mr) - destroy_user_mr(mvdev, mr); - else - destroy_dma_mr(mvdev, mr); + _mlx5_vdpa_destroy_mr(mvdev, mr); + + for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) { + if (mvdev->mr[i] == mr) + mvdev->mr[i] = NULL; + } - mr->initialized = false; + mutex_unlock(&mvdev->mr_mtx); + + kfree(mr); } -void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *new_mr, + unsigned int asid) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_mr *old_mr = mvdev->mr[asid]; - mutex_lock(&mr->mkey_mtx); + mutex_lock(&mvdev->mr_mtx); - _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); - _mlx5_vdpa_destroy_cvq_mr(mvdev, asid); + mvdev->mr[asid] = new_mr; + if (old_mr) { + _mlx5_vdpa_destroy_mr(mvdev, old_mr); + kfree(old_mr); + } - mutex_unlock(&mr->mkey_mtx); -} + mutex_unlock(&mvdev->mr_mtx); -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) -{ - mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]); - mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]); } -static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, - unsigned int asid) +void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) { - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) - return 0; + for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) + mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[i]); - return dup_iotlb(mvdev, iotlb); + prune_iotlb(mvdev->cvq.iotlb); } -static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, - unsigned int asid) +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + struct vhost_iotlb *iotlb) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) - return 0; - - if (mr->initialized) - return 0; - if (iotlb) - err = create_user_mr(mvdev, iotlb); + err = create_user_mr(mvdev, mr, iotlb); else err = create_dma_mr(mvdev, mr); if (err) return err; - mr->initialized = true; + mr->iotlb = vhost_iotlb_alloc(0, 0); + if (!mr->iotlb) { + err = -ENOMEM; + goto err_mr; + } + + err = dup_iotlb(mr->iotlb, iotlb); + if (err) + goto err_iotlb; return 0; + +err_iotlb: + vhost_iotlb_free(mr->iotlb); + +err_mr: + if (iotlb) + destroy_user_mr(mvdev, mr); + else + destroy_dma_mr(mvdev, mr); + + return err; } -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, unsigned int asid) +struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb) { + struct mlx5_vdpa_mr *mr; int err; - err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); - if (err) - return err; + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mutex_lock(&mvdev->mr_mtx); + err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb); + mutex_unlock(&mvdev->mr_mtx); - err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid); if (err) goto out_err; - return 0; + return mr; out_err: - _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); - - return err; + kfree(mr); + return ERR_PTR(err); } -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - unsigned int asid) +int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid) { int err; - mutex_lock(&mvdev->mr.mkey_mtx); - err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); - mutex_unlock(&mvdev->mr.mkey_mtx); + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + return 0; + + spin_lock(&mvdev->cvq.iommu_lock); + + prune_iotlb(mvdev->cvq.iotlb); + err = dup_iotlb(mvdev->cvq.iotlb, iotlb); + + spin_unlock(&mvdev->cvq.iommu_lock); + return err; } -int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map, unsigned int asid) +int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; - int err = 0; + struct mlx5_vdpa_mr *mr; + + mr = mlx5_vdpa_create_mr(mvdev, NULL); + if (IS_ERR(mr)) + return PTR_ERR(mr); - *change_map = false; - mutex_lock(&mr->mkey_mtx); - if (mr->initialized) { - mlx5_vdpa_info(mvdev, "memory map update\n"); - *change_map = true; + mlx5_vdpa_update_mr(mvdev, mr, 0); + + return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0); +} + +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + if (asid >= MLX5_VDPA_NUM_AS) + return -EINVAL; + + mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[asid]); + + if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { + if (mlx5_vdpa_create_dma_mr(mvdev)) + mlx5_vdpa_warn(mvdev, "create DMA MR failed\n"); + } else { + mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid); } - if (!*change_map) - err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); - mutex_unlock(&mr->mkey_mtx); - return err; + return 0; } diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index d5a59c9035fb..5c5a41b64bfc 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -256,7 +256,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) mlx5_vdpa_warn(mvdev, "resources already allocated\n"); return -EINVAL; } - mutex_init(&mvdev->mr.mkey_mtx); + mutex_init(&mvdev->mr_mtx); res->uar = mlx5_get_uars_page(mdev); if (IS_ERR(res->uar)) { err = PTR_ERR(res->uar); @@ -301,7 +301,7 @@ err_pd: err_uctx: mlx5_put_uars_page(mdev, res->uar); err_uars: - mutex_destroy(&mvdev->mr.mkey_mtx); + mutex_destroy(&mvdev->mr_mtx); return err; } @@ -318,6 +318,6 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) dealloc_pd(mvdev, res->pdn, res->uid); destroy_uctx(mvdev, res->uid); mlx5_put_uars_page(mvdev->mdev, res->uar); - mutex_destroy(&mvdev->mr.mkey_mtx); + mutex_destroy(&mvdev->mr_mtx); res->valid = false; } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 946488b8989f..12ac3397f39b 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -861,6 +862,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque { int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_vdpa_mr *vq_mr; + struct mlx5_vdpa_mr *vq_desc_mr; void *obj_context; u16 mlx_features; void *cmd_hdr; @@ -913,7 +917,14 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); - MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); + vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + if (vq_mr) + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); + + vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); + MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); @@ -2301,6 +2312,16 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) return MLX5_VDPA_DATAVQ_GROUP; } +static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + + if (is_ctrl_vq_idx(mvdev, idx)) + return MLX5_VDPA_CVQ_GROUP; + + return MLX5_VDPA_DATAVQ_DESC_GROUP; +} + static u64 mlx_to_vritio_features(u16 dev_features) { u64 result = 0; @@ -2539,6 +2560,11 @@ static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) flush_workqueue(ndev->mvdev.wq); } +static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa) +{ + return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); +} + static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2673,7 +2699,8 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) } static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, unsigned int asid) + struct mlx5_vdpa_mr *new_mr, + unsigned int asid) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; @@ -2681,28 +2708,21 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, suspend_vqs(ndev); err = save_channels_info(ndev); if (err) - goto err_mr; + return err; teardown_driver(ndev); - mlx5_vdpa_destroy_mr_asid(mvdev, asid); - err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); - if (err) - goto err_mr; + + mlx5_vdpa_update_mr(mvdev, new_mr, asid); if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) - goto err_mr; + return 0; restore_channels_info(ndev); err = setup_driver(mvdev); if (err) - goto err_setup; + return err; return 0; - -err_setup: - mlx5_vdpa_destroy_mr_asid(mvdev, asid); -err_mr: - return err; } /* reslock must be held for this function */ @@ -2841,7 +2861,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) err_driver: unregister_link_notifier(ndev); err_setup: - mlx5_vdpa_destroy_mr(&ndev->mvdev); + mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; err_clear: up_write(&ndev->reslock); @@ -2856,7 +2876,7 @@ static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) mvdev->group2asid[i] = 0; } -static int mlx5_vdpa_reset(struct vdpa_device *vdev) +static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); @@ -2868,7 +2888,8 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) unregister_link_notifier(ndev); teardown_driver(ndev); clear_vqs_ready(ndev); - mlx5_vdpa_destroy_mr(&ndev->mvdev); + if (flags & VDPA_RESET_F_CLEAN_MAP) + mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); ndev->mvdev.status = 0; ndev->mvdev.suspended = false; ndev->cur_num_vqs = 0; @@ -2879,8 +2900,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) init_group_to_asid_map(mvdev); ++mvdev->generation; - if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) + if ((flags & VDPA_RESET_F_CLEAN_MAP) && + MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { + if (mlx5_vdpa_create_dma_mr(mvdev)) mlx5_vdpa_warn(mvdev, "create MR failed\n"); } up_write(&ndev->reslock); @@ -2888,6 +2910,11 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) return 0; } +static int mlx5_vdpa_reset(struct vdpa_device *vdev) +{ + return mlx5_vdpa_compat_reset(vdev, 0); +} + static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) { return sizeof(struct virtio_net_config); @@ -2919,18 +2946,38 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, unsigned int asid) { - bool change_map; + struct mlx5_vdpa_mr *new_mr; int err; - err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); - if (err) { - mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); - return err; + if (asid >= MLX5_VDPA_NUM_AS) + return -EINVAL; + + if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) { + new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); + if (IS_ERR(new_mr)) { + err = PTR_ERR(new_mr); + mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err); + return err; + } + } else { + /* Empty iotlbs don't have an mr but will clear the previous mr. */ + new_mr = NULL; + } + + if (!mvdev->mr[asid]) { + mlx5_vdpa_update_mr(mvdev, new_mr, asid); + } else { + err = mlx5_vdpa_change_map(mvdev, new_mr, asid); + if (err) { + mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err); + goto out_err; + } } - if (change_map) - err = mlx5_vdpa_change_map(mvdev, iotlb, asid); + return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); +out_err: + mlx5_vdpa_destroy_mr(mvdev, new_mr); return err; } @@ -2947,6 +2994,18 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, return err; } +static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + down_write(&ndev->reslock); + err = mlx5_vdpa_reset_mr(mvdev, asid); + up_write(&ndev->reslock); + return err; +} + static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2985,7 +3044,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ndev = to_mlx5_vdpa_ndev(mvdev); free_resources(ndev); - mlx5_vdpa_destroy_mr(mvdev); + mlx5_vdpa_destroy_mr_resources(mvdev); if (!is_zero_ether_addr(ndev->config.mac)) { pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); @@ -3169,12 +3228,19 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, unsigned int asid) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + int err = 0; if (group >= MLX5_VDPA_NUMVQ_GROUPS) return -EINVAL; mvdev->group2asid[group] = asid; - return 0; + + mutex_lock(&mvdev->mr_mtx); + if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid]) + err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid); + mutex_unlock(&mvdev->mr_mtx); + + return err; } static const struct vdpa_config_ops mlx5_vdpa_ops = { @@ -3191,7 +3257,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_irq = mlx5_get_vq_irq, .get_vq_align = mlx5_vdpa_get_vq_align, .get_vq_group = mlx5_vdpa_get_vq_group, + .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */ .get_device_features = mlx5_vdpa_get_device_features, + .get_backend_features = mlx5_vdpa_get_backend_features, .set_driver_features = mlx5_vdpa_set_driver_features, .get_driver_features = mlx5_vdpa_get_driver_features, .set_config_cb = mlx5_vdpa_set_config_cb, @@ -3201,11 +3269,13 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_status = mlx5_vdpa_get_status, .set_status = mlx5_vdpa_set_status, .reset = mlx5_vdpa_reset, + .compat_reset = mlx5_vdpa_compat_reset, .get_config_size = mlx5_vdpa_get_config_size, .get_config = mlx5_vdpa_get_config, .set_config = mlx5_vdpa_set_config, .get_generation = mlx5_vdpa_get_generation, .set_map = mlx5_vdpa_set_map, + .reset_map = mlx5_vdpa_reset_map, .set_group_asid = mlx5_set_group_asid, .get_vq_dma_dev = mlx5_get_vq_dma_dev, .free = mlx5_vdpa_free, @@ -3289,6 +3359,7 @@ struct mlx5_vdpa_mgmtdev { struct vdpa_mgmt_dev mgtdev; struct mlx5_adev *madev; struct mlx5_vdpa_net *ndev; + struct vdpa_config_ops vdpa_ops; }; static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) @@ -3402,7 +3473,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, max_vqs = 2; } - ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, + ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); @@ -3485,7 +3556,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_mpfs; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - err = mlx5_vdpa_create_mr(mvdev, NULL, 0); + err = mlx5_vdpa_create_dma_mr(mvdev); if (err) goto err_res; } @@ -3515,7 +3586,7 @@ err_reg: err_res2: free_resources(ndev); err_mr: - mlx5_vdpa_destroy_mr(mvdev); + mlx5_vdpa_destroy_mr_resources(mvdev); err_res: mlx5_vdpa_free_resources(&ndev->mvdev); err_mpfs: @@ -3575,6 +3646,10 @@ static int mlx5v_probe(struct auxiliary_device *adev, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; mgtdev->mgtdev.supported_features = get_supported_features(mdev); mgtdev->madev = madev; + mgtdev->vdpa_ops = mlx5_vdpa_ops; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) + mgtdev->vdpa_ops.get_vq_desc_group = NULL; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); if (err) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 76d41058add9..be2925d0d283 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -139,7 +139,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim, vq->vring.notify = NULL; } -static void vdpasim_do_reset(struct vdpasim *vdpasim) +static void vdpasim_do_reset(struct vdpasim *vdpasim, u32 flags) { int i; @@ -151,11 +151,13 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) &vdpasim->iommu_lock); } - for (i = 0; i < vdpasim->dev_attr.nas; i++) { - vhost_iotlb_reset(&vdpasim->iommu[i]); - vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX, - 0, VHOST_MAP_RW); - vdpasim->iommu_pt[i] = true; + if (flags & VDPA_RESET_F_CLEAN_MAP) { + for (i = 0; i < vdpasim->dev_attr.nas; i++) { + vhost_iotlb_reset(&vdpasim->iommu[i]); + vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX, + 0, VHOST_MAP_RW); + vdpasim->iommu_pt[i] = true; + } } vdpasim->running = true; @@ -259,8 +261,12 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, if (!vdpasim->iommu_pt) goto err_iommu; - for (i = 0; i < vdpasim->dev_attr.nas; i++) + for (i = 0; i < vdpasim->dev_attr.nas; i++) { vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0); + vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX, 0, + VHOST_MAP_RW); + vdpasim->iommu_pt[i] = true; + } for (i = 0; i < dev_attr->nvqs; i++) vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], @@ -480,18 +486,23 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) mutex_unlock(&vdpasim->mutex); } -static int vdpasim_reset(struct vdpa_device *vdpa) +static int vdpasim_compat_reset(struct vdpa_device *vdpa, u32 flags) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); mutex_lock(&vdpasim->mutex); vdpasim->status = 0; - vdpasim_do_reset(vdpasim); + vdpasim_do_reset(vdpasim, flags); mutex_unlock(&vdpasim->mutex); return 0; } +static int vdpasim_reset(struct vdpa_device *vdpa) +{ + return vdpasim_compat_reset(vdpa, 0); +} + static int vdpasim_suspend(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -637,6 +648,25 @@ err: return ret; } +static int vdpasim_reset_map(struct vdpa_device *vdpa, unsigned int asid) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + + spin_lock(&vdpasim->iommu_lock); + if (vdpasim->iommu_pt[asid]) + goto out; + vhost_iotlb_reset(&vdpasim->iommu[asid]); + vhost_iotlb_add_range(&vdpasim->iommu[asid], 0, ULONG_MAX, + 0, VHOST_MAP_RW); + vdpasim->iommu_pt[asid] = true; +out: + spin_unlock(&vdpasim->iommu_lock); + return 0; +} + static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -749,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, .reset = vdpasim_reset, + .compat_reset = vdpasim_compat_reset, .suspend = vdpasim_suspend, .resume = vdpasim_resume, .get_config_size = vdpasim_get_config_size, @@ -759,6 +790,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .set_group_asid = vdpasim_set_group_asid, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, + .reset_map = vdpasim_reset_map, .bind_mm = vdpasim_bind_mm, .unbind_mm = vdpasim_unbind_mm, .free = vdpasim_free, @@ -787,6 +819,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, .reset = vdpasim_reset, + .compat_reset = vdpasim_compat_reset, .suspend = vdpasim_suspend, .resume = vdpasim_resume, .get_config_size = vdpasim_get_config_size, @@ -796,6 +829,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_iova_range = vdpasim_get_iova_range, .set_group_asid = vdpasim_set_group_asid, .set_map = vdpasim_set_map, + .reset_map = vdpasim_reset_map, .bind_mm = vdpasim_bind_mm, .unbind_mm = vdpasim_unbind_mm, .free = vdpasim_free, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index df7869537ef1..0ddd4b8abecb 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -134,7 +134,6 @@ static DEFINE_MUTEX(vduse_lock); static DEFINE_IDR(vduse_idr); static dev_t vduse_major; -static struct class *vduse_class; static struct cdev vduse_ctrl_cdev; static struct cdev vduse_cdev; static struct workqueue_struct *vduse_irq_wq; @@ -1528,6 +1527,16 @@ static const struct kobj_type vq_type = { .default_groups = vq_groups, }; +static char *vduse_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); +} + +static const struct class vduse_class = { + .name = "vduse", + .devnode = vduse_devnode, +}; + static void vduse_dev_deinit_vqs(struct vduse_dev *dev) { int i; @@ -1638,7 +1647,7 @@ static int vduse_destroy_dev(char *name) mutex_unlock(&dev->lock); vduse_dev_reset(dev); - device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); + device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); idr_remove(&vduse_idr, dev->minor); kvfree(dev->config); vduse_dev_deinit_vqs(dev); @@ -1805,7 +1814,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, dev->minor = ret; dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; - dev->dev = device_create_with_groups(vduse_class, NULL, + dev->dev = device_create_with_groups(&vduse_class, NULL, MKDEV(MAJOR(vduse_major), dev->minor), dev, vduse_dev_groups, "%s", config->name); if (IS_ERR(dev->dev)) { @@ -1821,7 +1830,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, return 0; err_vqs: - device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); + device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); err_dev: idr_remove(&vduse_idr, dev->minor); err_idr: @@ -1934,11 +1943,6 @@ static const struct file_operations vduse_ctrl_fops = { .llseek = noop_llseek, }; -static char *vduse_devnode(const struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); -} - struct vduse_mgmt_dev { struct vdpa_mgmt_dev mgmt_dev; struct device dev; @@ -2082,11 +2086,9 @@ static int vduse_init(void) int ret; struct device *dev; - vduse_class = class_create("vduse"); - if (IS_ERR(vduse_class)) - return PTR_ERR(vduse_class); - - vduse_class->devnode = vduse_devnode; + ret = class_register(&vduse_class); + if (ret) + return ret; ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); if (ret) @@ -2099,7 +2101,7 @@ static int vduse_init(void) if (ret) goto err_ctrl_cdev; - dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); + dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control"); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto err_device; @@ -2141,13 +2143,13 @@ err_bound_wq: err_wq: cdev_del(&vduse_cdev); err_cdev: - device_destroy(vduse_class, vduse_major); + device_destroy(&vduse_class, vduse_major); err_device: cdev_del(&vduse_ctrl_cdev); err_ctrl_cdev: unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); err_chardev_region: - class_destroy(vduse_class); + class_unregister(&vduse_class); return ret; } module_init(vduse_init); @@ -2159,10 +2161,10 @@ static void vduse_exit(void) destroy_workqueue(vduse_irq_bound_wq); destroy_workqueue(vduse_irq_wq); cdev_del(&vduse_cdev); - device_destroy(vduse_class, vduse_major); + device_destroy(&vduse_class, vduse_major); cdev_del(&vduse_ctrl_cdev); unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); - class_destroy(vduse_class); + class_unregister(&vduse_class); } module_exit(vduse_exit); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 4e3b2c25c721..282aac45c690 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1158,7 +1158,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) /* * Set prot_iter to data_iter and truncate it to * prot_bytes, and advance data_iter past any - * preceeding prot_bytes that may be present. + * preceding prot_bytes that may be present. * * Also fix up the exp_data_len to reflect only the * actual data payload length. diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 78379ffd2336..30df5c58db73 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, return vhost_vdpa_alloc_as(v, asid); } +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->reset_map) + ops->reset_map(vdpa, asid); +} + static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) { struct vhost_vdpa_as *as = asid_to_as(v, asid); @@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) hlist_del(&as->hash_link); vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); + /* + * Devices with vendor specific IOMMU may need to restore + * iotlb to the initial or default state, which cannot be + * cleaned up in the all range unmap call above. Give them + * a chance to clean up or reset the map to the desired + * state. + */ + vhost_vdpa_reset_map(v, asid); kfree(as); return 0; @@ -210,13 +227,24 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) irq_bypass_unregister_producer(&vq->call_ctx.producer); } -static int vhost_vdpa_reset(struct vhost_vdpa *v) +static int _compat_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; + u32 flags = 0; - v->in_batch = 0; + if (v->vdev.vqs) { + flags |= !vhost_backend_has_feature(v->vdev.vqs[0], + VHOST_BACKEND_F_IOTLB_PERSIST) ? + VDPA_RESET_F_CLEAN_MAP : 0; + } - return vdpa_reset(vdpa); + return vdpa_reset(vdpa, flags); +} + +static int vhost_vdpa_reset(struct vhost_vdpa *v) +{ + v->in_batch = 0; + return _compat_vdpa_reset(v); } static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) @@ -295,7 +323,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) vhost_vdpa_unsetup_vq_irq(v, i); if (status == 0) { - ret = vdpa_reset(vdpa); + ret = _compat_vdpa_reset(v); if (ret) return ret; } else @@ -389,6 +417,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) return ops->resume; } +static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->get_vq_desc_group; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -414,6 +450,15 @@ static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) return ops->get_backend_features(vdpa); } +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return (!ops->set_map && !ops->dma_map) || ops->reset_map || + vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); +} + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -605,6 +650,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, else if (copy_to_user(argp, &s, sizeof(s))) return -EFAULT; return 0; + case VHOST_VDPA_GET_VRING_DESC_GROUP: + if (!vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_desc_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; case VHOST_VDPA_SET_GROUP_ASID: if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; @@ -690,6 +745,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | + BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | BIT_ULL(VHOST_BACKEND_F_RESUME) | BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) @@ -700,6 +757,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && !vhost_vdpa_can_resume(v)) return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) + return -EINVAL; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && + !vhost_vdpa_has_persistent_map(v)) + return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; } @@ -753,6 +819,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (vhost_vdpa_can_resume(v)) features |= BIT_ULL(VHOST_BACKEND_F_RESUME); + if (vhost_vdpa_has_desc_group(v)) + features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); + if (vhost_vdpa_has_persistent_map(v)) + features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); features |= vhost_vdpa_get_backend_features(v); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; @@ -1285,6 +1355,7 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) vhost_vdpa_free_domain(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); + v->vdev.vqs = NULL; } static int vhost_vdpa_open(struct inode *inode, struct file *filep) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 44dcb9e7b55e..1fe93e93f5bc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -745,7 +745,7 @@ static void report_free_page_func(struct work_struct *work) * 2) update the host about the old page removed from vb->pages list; * * This function preforms the balloon page migration task. - * Called through balloon_mapping->a_ops->migratepage + * Called through movable_operations->migrate_page */ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index d6bb68ba84e5..ee6a386d250b 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -39,6 +39,39 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); } +static int __vp_check_common_size_one_feature(struct virtio_device *vdev, u32 fbit, + u32 offset, const char *fname) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + if (!__virtio_test_bit(vdev, fbit)) + return 0; + + if (likely(vp_dev->mdev.common_len >= offset)) + return 0; + + dev_err(&vdev->dev, + "virtio: common cfg size(%zu) does not match the feature %s\n", + vp_dev->mdev.common_len, fname); + + return -EINVAL; +} + +#define vp_check_common_size_one_feature(vdev, fbit, field) \ + __vp_check_common_size_one_feature(vdev, fbit, \ + offsetofend(struct virtio_pci_modern_common_cfg, field), #fbit) + +static int vp_check_common_size(struct virtio_device *vdev) +{ + if (vp_check_common_size_one_feature(vdev, VIRTIO_F_NOTIF_CONFIG_DATA, queue_notify_data)) + return -EINVAL; + + if (vp_check_common_size_one_feature(vdev, VIRTIO_F_RING_RESET, queue_reset)) + return -EINVAL; + + return 0; +} + /* virtio config->finalize_features() implementation */ static int vp_finalize_features(struct virtio_device *vdev) { @@ -57,6 +90,9 @@ static int vp_finalize_features(struct virtio_device *vdev) return -EINVAL; } + if (vp_check_common_size(vdev)) + return -EINVAL; + vp_modern_set_features(&vp_dev->mdev, vdev->features); return 0; diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 9cb601e16688..e2a1fe7bb66c 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -203,6 +203,10 @@ static inline void check_offsets(void) offsetof(struct virtio_pci_common_cfg, queue_used_lo)); BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI != offsetof(struct virtio_pci_common_cfg, queue_used_hi)); + BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NDATA != + offsetof(struct virtio_pci_modern_common_cfg, queue_notify_data)); + BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_RESET != + offsetof(struct virtio_pci_modern_common_cfg, queue_reset)); } /* @@ -292,7 +296,7 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) mdev->common = vp_modern_map_capability(mdev, common, sizeof(struct virtio_pci_common_cfg), 4, 0, sizeof(struct virtio_pci_modern_common_cfg), - NULL, NULL); + &mdev->common_len, NULL); if (!mdev->common) goto err_map_common; mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1, diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 06ce6d8c2e00..8d63e5923d24 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -100,7 +100,7 @@ static void virtio_vdpa_reset(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - vdpa_reset(vdpa); + vdpa_reset(vdpa, 0); } static bool virtio_vdpa_notify(struct virtqueue *vq) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4df6d1c12437..6f3631425f38 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1232,7 +1232,13 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 max_emulated_devices[0x8]; u8 max_num_virtio_queues[0x18]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 reserved_at_c0[0x13]; + u8 desc_group_mkey_supported[0x1]; + u8 reserved_at_d4[0xc]; + + u8 reserved_at_e0[0x20]; u8 umem_1_buffer_param_a[0x20]; diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 9becdc3fa503..b86d51a855f6 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits { u8 reserved_at_320[0x8]; u8 pd[0x18]; - u8 reserved_at_340[0xc0]; + u8 reserved_at_340[0x20]; + + u8 desc_group_mkey[0x20]; + + u8 reserved_at_380[0x80]; }; struct mlx5_ifc_virtio_net_q_object_bits { @@ -141,6 +145,7 @@ enum { MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; enum { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 0e652026b776..db15ac07f8a6 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -204,6 +204,16 @@ struct vdpa_map_file { * @vdev: vdpa device * @idx: virtqueue index * Returns u32: group id for this virtqueue + * @get_vq_desc_group: Get the group id for the descriptor table of + * a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns u32: group id for the descriptor table + * portion of this virtqueue. Could be different + * than the one from @get_vq_group, in which case + * the access to the descriptor table can be + * confined to a separate asid, isolating from + * the virtqueue's buffer address access. * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the @@ -242,6 +252,17 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) + * @compat_reset: Reset device with compatibility quirks to + * accommodate older userspace. Only needed by + * parent driver which used to have bogus reset + * behaviour, and has to maintain such behaviour + * for compatibility with older userspace. + * Historically compliant driver only has to + * implement .reset, Historically non-compliant + * driver should implement both. + * @vdev: vdpa device + * @flags: compatibility quirks for reset + * Returns integer: success (0) or error (< 0) * @suspend: Suspend the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) @@ -317,6 +338,15 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @reset_map: Reset device memory mapping to the default + * state (optional) + * Needed for devices that are using device + * specific DMA translation and prefer mapping + * to be decoupled from the virtio life cycle, + * i.e. device .reset op does not reset mapping + * @vdev: vdpa device + * @asid: address space identifier + * Returns integer: success (0) or error (< 0) * @get_vq_dma_dev: Get the dma device for a specific * virtqueue (optional) * @vdev: vdpa device @@ -360,6 +390,7 @@ struct vdpa_config_ops { /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); + u32 (*get_vq_desc_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); u64 (*get_backend_features)(const struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); @@ -373,6 +404,8 @@ struct vdpa_config_ops { u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); + int (*compat_reset)(struct vdpa_device *vdev, u32 flags); +#define VDPA_RESET_F_CLEAN_MAP 1 int (*suspend)(struct vdpa_device *vdev); int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); @@ -394,6 +427,7 @@ struct vdpa_config_ops { u64 iova, u64 size, u64 pa, u32 perm, void *opaque); int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, u64 iova, u64 size); + int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); @@ -485,14 +519,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; } -static inline int vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) { const struct vdpa_config_ops *ops = vdev->config; int ret; down_write(&vdev->cf_lock); vdev->features_valid = false; - ret = ops->reset(vdev); + if (ops->compat_reset && flags) + ret = ops->compat_reset(vdev, flags); + else + ret = ops->reset(vdev); up_write(&vdev->cf_lock); return ret; } diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 067ac1d789bc..d0f2797420f7 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -12,37 +12,48 @@ struct virtio_pci_modern_common_cfg { __le16 queue_reset; /* read-write */ }; +/** + * struct virtio_pci_modern_device - info for modern PCI virtio + * @pci_dev: Ptr to the PCI device struct + * @common: Position of the common capability in the PCI config + * @device: Device-specific data (non-legacy mode) + * @notify_base: Base of vq notifications (non-legacy mode) + * @notify_pa: Physical base of vq notifications + * @isr: Where to read and clear interrupt + * @notify_len: So we can sanity-check accesses + * @device_len: So we can sanity-check accesses + * @notify_map_cap: Capability for when we need to map notifications per-vq + * @notify_offset_multiplier: Multiply queue_notify_off by this value + * (non-legacy mode). + * @modern_bars: Bitmask of BARs + * @id: Device and vendor id + * @device_id_check: Callback defined before vp_modern_probe() to be used to + * verify the PCI device is a vendor's expected device rather + * than the standard virtio PCI device + * Returns the found device id or ERRNO + * @dma_mask: Optional mask instead of the traditional DMA_BIT_MASK(64), + * for vendor devices with DMA space address limitations + */ struct virtio_pci_modern_device { struct pci_dev *pci_dev; struct virtio_pci_common_cfg __iomem *common; - /* Device-specific data (non-legacy mode) */ void __iomem *device; - /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; - /* Physical base of vq notifications */ resource_size_t notify_pa; - /* Where to read and clear interrupt */ u8 __iomem *isr; - /* So we can sanity-check accesses. */ size_t notify_len; size_t device_len; + size_t common_len; - /* Capability for when we need to map notifications per-vq. */ int notify_map_cap; - /* Multiply queue_notify_off by this value. (non-legacy mode). */ u32 notify_offset_multiplier; - int modern_bars; - struct virtio_device_id id; - /* optional check for vendor virtio device, returns dev_id or -ERRNO */ int (*device_id_check)(struct pci_dev *pdev); - - /* optional mask for devices with limited DMA space */ u64 dma_mask; }; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index f5c48b61ab62..649560c685f1 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -219,4 +219,12 @@ */ #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) +/* Get the group for the descriptor table including driver & device areas + * of a virtqueue: read index, write group in num. + * The virtqueue index is stored in the index field of vhost_vring_state. + * The group ID of the descriptor table for this specific virtqueue + * is returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 2d827d22cd99..d7656908f730 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -185,5 +185,12 @@ struct vhost_vdpa_iova_range { * DRIVER_OK */ #define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 +/* Device may expose the virtqueue's descriptor area, driver area and + * device area to a different group for ASID binding than where its + * buffers may reside. Requires VHOST_BACKEND_F_IOTLB_ASID. + */ +#define VHOST_BACKEND_F_DESC_ASID 0x7 +/* IOTLB don't flush memory mapping across device reset */ +#define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 #endif diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 2c712c654165..8881aea60f6f 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -105,6 +105,11 @@ */ #define VIRTIO_F_NOTIFICATION_DATA 38 +/* This feature indicates that the driver uses the data provided by the device + * as a virtqueue identifier in available buffer notifications. + */ +#define VIRTIO_F_NOTIF_CONFIG_DATA 39 + /* * This feature indicates that the driver can reset a queue individually. */