Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 3 Jul 2023 22:38:26 +0000 (15:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 3 Jul 2023 22:38:26 +0000 (15:38 -0700)
Pull virtio updates from Michael Tsirkin:

 - resume support in vdpa/solidrun

 - structure size optimizations in virtio_pci

 - new pds_vdpa driver

 - immediate initialization mechanism for vdpa/ifcvf

 - interrupt bypass for vdpa/mlx5

 - multiple worker support for vhost

 - viirtio net in Intel F2000X-PL support for vdpa/ifcvf

 - fixes, cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (48 commits)
  vhost: Make parameter name match of vhost_get_vq_desc()
  vduse: fix NULL pointer dereference
  vhost: Allow worker switching while work is queueing
  vhost_scsi: add support for worker ioctls
  vhost: allow userspace to create workers
  vhost: replace single worker pointer with xarray
  vhost: add helper to parse userspace vring state/file
  vhost: remove vhost_work_queue
  vhost_scsi: flush IO vqs then send TMF rsp
  vhost_scsi: convert to vhost_vq_work_queue
  vhost_scsi: make SCSI cmd completion per vq
  vhost_sock: convert to vhost_vq_work_queue
  vhost: convert poll work to be vq based
  vhost: take worker or vq for flushing
  vhost: take worker or vq instead of dev for queueing
  vhost, vhost_net: add helper to check if vq has work
  vhost: add vhost_worker pointer to vhost_virtqueue
  vhost: dynamically allocate vhost_worker
  vhost: create worker at end of vhost_dev_set_owner
  virtio_bt: call scheduler when we free unused buffs
  ...

42 files changed:
Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst [new file with mode: 0644]
Documentation/networking/device_drivers/ethernet/index.rst
MAINTAINERS
drivers/bluetooth/virtio_bt.c
drivers/char/virtio_console.c
drivers/crypto/virtio/virtio_crypto_core.c
drivers/vdpa/Kconfig
drivers/vdpa/Makefile
drivers/vdpa/ifcvf/ifcvf_base.c
drivers/vdpa/ifcvf/ifcvf_base.h
drivers/vdpa/ifcvf/ifcvf_main.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/mlx5/net/mlx5_vnet.h
drivers/vdpa/pds/Makefile [new file with mode: 0644]
drivers/vdpa/pds/aux_drv.c [new file with mode: 0644]
drivers/vdpa/pds/aux_drv.h [new file with mode: 0644]
drivers/vdpa/pds/cmds.c [new file with mode: 0644]
drivers/vdpa/pds/cmds.h [new file with mode: 0644]
drivers/vdpa/pds/debugfs.c [new file with mode: 0644]
drivers/vdpa/pds/debugfs.h [new file with mode: 0644]
drivers/vdpa/pds/vdpa_dev.c [new file with mode: 0644]
drivers/vdpa/pds/vdpa_dev.h [new file with mode: 0644]
drivers/vdpa/solidrun/snet_ctrl.c
drivers/vdpa/solidrun/snet_hwmon.c
drivers/vdpa/solidrun/snet_main.c
drivers/vdpa/solidrun/snet_vdpa.h
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/vhost/net.c
drivers/vhost/scsi.c
drivers/vhost/vhost.c
drivers/vhost/vhost.h
drivers/vhost/vsock.c
drivers/virtio/virtio_pci_common.h
drivers/virtio/virtio_pci_modern_dev.c
drivers/virtio/virtio_vdpa.c
include/linux/pds/pds_adminq.h
include/linux/pds/pds_common.h
include/linux/virtio.h
include/linux/virtio_pci_modern.h
include/uapi/linux/vhost.h
include/uapi/linux/vhost_types.h
tools/virtio/Makefile

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_vdpa.rst
new file mode 100644 (file)
index 0000000..587927d
--- /dev/null
@@ -0,0 +1,85 @@
+.. SPDX-License-Identifier: GPL-2.0+
+.. note: can be edited and viewed with /usr/bin/formiko-vim
+
+==========================================================
+PCI vDPA driver for the AMD/Pensando(R) DSC adapter family
+==========================================================
+
+AMD/Pensando vDPA VF Device Driver
+
+Copyright(c) 2023 Advanced Micro Devices, Inc
+
+Overview
+========
+
+The ``pds_vdpa`` driver is an auxiliary bus driver that supplies
+a vDPA device for use by the virtio network stack.  It is used with
+the Pensando Virtual Function devices that offer vDPA and virtio queue
+services.  It depends on the ``pds_core`` driver and hardware for the PF
+and VF PCI handling as well as for device configuration services.
+
+Using the device
+================
+
+The ``pds_vdpa`` device is enabled via multiple configuration steps and
+depends on the ``pds_core`` driver to create and enable SR-IOV Virtual
+Function devices.  After the VFs are enabled, we enable the vDPA service
+in the ``pds_core`` device to create the auxiliary devices used by pds_vdpa.
+
+Example steps:
+
+.. code-block:: bash
+
+  #!/bin/bash
+
+  modprobe pds_core
+  modprobe vdpa
+  modprobe pds_vdpa
+
+  PF_BDF=`ls /sys/module/pds_core/drivers/pci\:pds_core/*/sriov_numvfs | awk -F / '{print $7}'`
+
+  # Enable vDPA VF auxiliary device(s) in the PF
+  devlink dev param set pci/$PF_BDF name enable_vnet cmode runtime value true
+
+  # Create a VF for vDPA use
+  echo 1 > /sys/bus/pci/drivers/pds_core/$PF_BDF/sriov_numvfs
+
+  # Find the vDPA services/devices available
+  PDS_VDPA_MGMT=`vdpa mgmtdev show | grep vDPA | head -1 | cut -d: -f1`
+
+  # Create a vDPA device for use in virtio network configurations
+  vdpa dev add name vdpa1 mgmtdev $PDS_VDPA_MGMT mac 00:11:22:33:44:55
+
+  # Set up an ethernet interface on the vdpa device
+  modprobe virtio_vdpa
+
+
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command::
+
+  make oldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+  -> Device Drivers
+    -> Network device support (NETDEVICES [=y])
+      -> Ethernet driver support
+        -> Pensando devices
+          -> Pensando Ethernet PDS_VDPA Support
+
+Support
+=======
+
+For general Linux networking support, please use the netdev mailing
+list, which is monitored by Pensando personnel::
+
+  netdev@vger.kernel.org
+
+For more specific support needs, please use the Pensando driver support
+email::
+
+  drivers@pensando.io
index 417ca514a4d057162667e2c1a267b0c7bd624c7f..94ecb67c0885023381c80bb8d5bb0502f8d2bf43 100644 (file)
@@ -15,6 +15,7 @@ Contents:
    amazon/ena
    altera/altera_tse
    amd/pds_core
+   amd/pds_vdpa
    aquantia/atlantic
    chelsio/cxgb
    cirrus/cs89x0
index d7d65163e54e8dbac4df521b95d9c3d2c79cdd86..1367a240cb0ea9c8f6cbe35cb567acfc4366d06b 100644 (file)
@@ -22535,6 +22535,10 @@ F:     include/linux/vringh.h
 F:     include/uapi/linux/virtio_*.h
 F:     tools/virtio/
 
+PDS DSC VIRTIO DATA PATH ACCELERATOR
+R:     Shannon Nelson <shannon.nelson@amd.com>
+F:     drivers/vdpa/pds/
+
 VIRTIO CRYPTO DRIVER
 M:     Gonglei <arei.gonglei@huawei.com>
 L:     virtualization@lists.linux-foundation.org
index c570c45d148054fe31640015e674211f95387d6b..2ac70b560c46db16bef8b7ca86196c53c23b508e 100644 (file)
@@ -79,6 +79,7 @@ static int virtbt_close_vdev(struct virtio_bluetooth *vbt)
 
                while ((skb = virtqueue_detach_unused_buf(vq)))
                        kfree_skb(skb);
+               cond_resched();
        }
 
        return 0;
index 1f8da0a71ce93f33f5fadf3fd3d1a61b347be45e..680d1ef2a21794b5b549e341c75828559a446571 100644 (file)
@@ -1936,6 +1936,7 @@ static void remove_vqs(struct ports_device *portdev)
                flush_bufs(vq, true);
                while ((buf = virtqueue_detach_unused_buf(vq)))
                        free_buf(buf, true);
+               cond_resched();
        }
        portdev->vdev->config->del_vqs(portdev->vdev);
        kfree(portdev->in_vqs);
index 1198bd3063655a87d7de886c7e9eecbd008d55ab..94849fa3bd74aaa85e7467a571b778e8e373417f 100644 (file)
@@ -480,6 +480,7 @@ static void virtcrypto_free_unused_reqs(struct virtio_crypto *vcrypto)
                        kfree(vc_req->req_data);
                        kfree(vc_req->sgs);
                }
+               cond_resched();
        }
 }
 
index cd6ad92f3f0598c2f9b07f4f659d8c922c3f7f9e..656c1cb541deb7c51c31b5304ff5f5d5b3395da7 100644 (file)
@@ -116,4 +116,14 @@ config ALIBABA_ENI_VDPA
          This driver includes a HW monitor device that
          reads health values from the DPU.
 
+config PDS_VDPA
+       tristate "vDPA driver for AMD/Pensando DSC devices"
+       select VIRTIO_PCI_LIB
+       depends on PCI_MSI
+       depends on PDS_CORE
+       help
+         vDPA network driver for AMD/Pensando's PDS Core devices.
+         With this driver, the VirtIO dataplane can be
+         offloaded to an AMD/Pensando DSC device.
+
 endif # VDPA
index 59396ff2a31828cbbd14a852b82a9922a0d52d46..8f53c6f3cca7c5e5dacc726f2d8f9d94390e04bb 100644 (file)
@@ -7,3 +7,4 @@ obj-$(CONFIG_MLX5_VDPA) += mlx5/
 obj-$(CONFIG_VP_VDPA)    += virtio_pci/
 obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/
 obj-$(CONFIG_SNET_VDPA) += solidrun/
+obj-$(CONFIG_PDS_VDPA) += pds/
index 5563b3a773c7bdc59cde05e39957cb50701765be..060f837a4f9f762018fd29d52bcb10c91a9436aa 100644 (file)
@@ -69,6 +69,37 @@ static int ifcvf_read_config_range(struct pci_dev *dev,
        return 0;
 }
 
+static u16 ifcvf_get_vq_size(struct ifcvf_hw *hw, u16 qid)
+{
+       u16 queue_size;
+
+       vp_iowrite16(qid, &hw->common_cfg->queue_select);
+       queue_size = vp_ioread16(&hw->common_cfg->queue_size);
+
+       return queue_size;
+}
+
+/* This function returns the max allowed safe size for
+ * all virtqueues. It is the minimal size that can be
+ * suppprted by all virtqueues.
+ */
+u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw)
+{
+       u16 queue_size, max_size, qid;
+
+       max_size = ifcvf_get_vq_size(hw, 0);
+       for (qid = 1; qid < hw->nr_vring; qid++) {
+               queue_size = ifcvf_get_vq_size(hw, qid);
+               /* 0 means the queue is unavailable */
+               if (!queue_size)
+                       continue;
+
+               max_size = min(queue_size, max_size);
+       }
+
+       return max_size;
+}
+
 int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
 {
        struct virtio_pci_cap cap;
@@ -134,6 +165,9 @@ next:
        }
 
        hw->nr_vring = vp_ioread16(&hw->common_cfg->num_queues);
+       hw->vring = kzalloc(sizeof(struct vring_info) * hw->nr_vring, GFP_KERNEL);
+       if (!hw->vring)
+               return -ENOMEM;
 
        for (i = 0; i < hw->nr_vring; i++) {
                vp_iowrite16(i, &hw->common_cfg->queue_select);
@@ -170,21 +204,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
 
 void ifcvf_reset(struct ifcvf_hw *hw)
 {
-       hw->config_cb.callback = NULL;
-       hw->config_cb.private = NULL;
-
        ifcvf_set_status(hw, 0);
-       /* flush set_status, make sure VF is stopped, reset */
-       ifcvf_get_status(hw);
-}
-
-static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
-{
-       if (status != 0)
-               status |= ifcvf_get_status(hw);
-
-       ifcvf_set_status(hw, status);
-       ifcvf_get_status(hw);
+       while (ifcvf_get_status(hw))
+               msleep(1);
 }
 
 u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
@@ -204,11 +226,29 @@ u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
        return features;
 }
 
-u64 ifcvf_get_features(struct ifcvf_hw *hw)
+/* return provisioned vDPA dev features */
+u64 ifcvf_get_dev_features(struct ifcvf_hw *hw)
 {
        return hw->dev_features;
 }
 
+u64 ifcvf_get_driver_features(struct ifcvf_hw *hw)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+       u32 features_lo, features_hi;
+       u64 features;
+
+       vp_iowrite32(0, &cfg->device_feature_select);
+       features_lo = vp_ioread32(&cfg->guest_feature);
+
+       vp_iowrite32(1, &cfg->device_feature_select);
+       features_hi = vp_ioread32(&cfg->guest_feature);
+
+       features = ((u64)features_hi << 32) | features_lo;
+
+       return features;
+}
+
 int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
 {
        if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
@@ -275,7 +315,7 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
                vp_iowrite8(*p++, hw->dev_cfg + offset + i);
 }
 
-static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
+void ifcvf_set_driver_features(struct ifcvf_hw *hw, u64 features)
 {
        struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
 
@@ -286,105 +326,104 @@ static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
        vp_iowrite32(features >> 32, &cfg->guest_feature);
 }
 
-static int ifcvf_config_features(struct ifcvf_hw *hw)
-{
-       ifcvf_set_features(hw, hw->req_features);
-       ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK);
-
-       if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) {
-               IFCVF_ERR(hw->pdev, "Failed to set FEATURES_OK status\n");
-               return -EIO;
-       }
-
-       return 0;
-}
-
 u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
 {
-       struct ifcvf_lm_cfg __iomem *ifcvf_lm;
-       void __iomem *avail_idx_addr;
+       struct ifcvf_lm_cfg  __iomem *lm_cfg = hw->lm_cfg;
        u16 last_avail_idx;
-       u32 q_pair_id;
 
-       ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
-       q_pair_id = qid / 2;
-       avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
-       last_avail_idx = vp_ioread16(avail_idx_addr);
+       last_avail_idx = vp_ioread16(&lm_cfg->vq_state_region + qid * 2);
 
        return last_avail_idx;
 }
 
 int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num)
 {
-       struct ifcvf_lm_cfg __iomem *ifcvf_lm;
-       void __iomem *avail_idx_addr;
-       u32 q_pair_id;
+       struct ifcvf_lm_cfg  __iomem *lm_cfg = hw->lm_cfg;
 
-       ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
-       q_pair_id = qid / 2;
-       avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
-       hw->vring[qid].last_avail_idx = num;
-       vp_iowrite16(num, avail_idx_addr);
+       vp_iowrite16(num, &lm_cfg->vq_state_region + qid * 2);
 
        return 0;
 }
 
-static int ifcvf_hw_enable(struct ifcvf_hw *hw)
+void ifcvf_set_vq_num(struct ifcvf_hw *hw, u16 qid, u32 num)
 {
-       struct virtio_pci_common_cfg __iomem *cfg;
-       u32 i;
+       struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
 
-       cfg = hw->common_cfg;
-       for (i = 0; i < hw->nr_vring; i++) {
-               if (!hw->vring[i].ready)
-                       break;
+       vp_iowrite16(qid, &cfg->queue_select);
+       vp_iowrite16(num, &cfg->queue_size);
+}
 
-               vp_iowrite16(i, &cfg->queue_select);
-               vp_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
-                                    &cfg->queue_desc_hi);
-               vp_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
-                                     &cfg->queue_avail_hi);
-               vp_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
-                                    &cfg->queue_used_hi);
-               vp_iowrite16(hw->vring[i].size, &cfg->queue_size);
-               ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx);
-               vp_iowrite16(1, &cfg->queue_enable);
-       }
+int ifcvf_set_vq_address(struct ifcvf_hw *hw, u16 qid, u64 desc_area,
+                        u64 driver_area, u64 device_area)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+
+       vp_iowrite16(qid, &cfg->queue_select);
+       vp_iowrite64_twopart(desc_area, &cfg->queue_desc_lo,
+                            &cfg->queue_desc_hi);
+       vp_iowrite64_twopart(driver_area, &cfg->queue_avail_lo,
+                            &cfg->queue_avail_hi);
+       vp_iowrite64_twopart(device_area, &cfg->queue_used_lo,
+                            &cfg->queue_used_hi);
 
        return 0;
 }
 
-static void ifcvf_hw_disable(struct ifcvf_hw *hw)
+bool ifcvf_get_vq_ready(struct ifcvf_hw *hw, u16 qid)
 {
-       u32 i;
+       struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+       u16 queue_enable;
 
-       ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR);
-       for (i = 0; i < hw->nr_vring; i++) {
-               ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR);
-       }
+       vp_iowrite16(qid, &cfg->queue_select);
+       queue_enable = vp_ioread16(&cfg->queue_enable);
+
+       return (bool)queue_enable;
 }
 
-int ifcvf_start_hw(struct ifcvf_hw *hw)
+void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, bool ready)
 {
-       ifcvf_reset(hw);
-       ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE);
-       ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER);
+       struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
 
-       if (ifcvf_config_features(hw) < 0)
-               return -EINVAL;
+       vp_iowrite16(qid, &cfg->queue_select);
+       vp_iowrite16(ready, &cfg->queue_enable);
+}
 
-       if (ifcvf_hw_enable(hw) < 0)
-               return -EINVAL;
+static void ifcvf_reset_vring(struct ifcvf_hw *hw)
+{
+       u16 qid;
 
-       ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK);
+       for (qid = 0; qid < hw->nr_vring; qid++) {
+               hw->vring[qid].cb.callback = NULL;
+               hw->vring[qid].cb.private = NULL;
+               ifcvf_set_vq_vector(hw, qid, VIRTIO_MSI_NO_VECTOR);
+       }
+}
 
-       return 0;
+static void ifcvf_reset_config_handler(struct ifcvf_hw *hw)
+{
+       hw->config_cb.callback = NULL;
+       hw->config_cb.private = NULL;
+       ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR);
+}
+
+static void ifcvf_synchronize_irq(struct ifcvf_hw *hw)
+{
+       u32 nvectors = hw->num_msix_vectors;
+       struct pci_dev *pdev = hw->pdev;
+       int i, irq;
+
+       for (i = 0; i < nvectors; i++) {
+               irq = pci_irq_vector(pdev, i);
+               if (irq >= 0)
+                       synchronize_irq(irq);
+       }
 }
 
-void ifcvf_stop_hw(struct ifcvf_hw *hw)
+void ifcvf_stop(struct ifcvf_hw *hw)
 {
-       ifcvf_hw_disable(hw);
-       ifcvf_reset(hw);
+       ifcvf_synchronize_irq(hw);
+       ifcvf_reset_vring(hw);
+       ifcvf_reset_config_handler(hw);
 }
 
 void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
index c20d1c40214e06c68559a2e3d8ea77f860ec24f3..b57849c643f611fc271d894eb24075f99a0c6fed 100644 (file)
 #define N3000_DEVICE_ID                0x1041
 #define N3000_SUBSYS_DEVICE_ID 0x001A
 
-/* Max 8 data queue pairs(16 queues) and one control vq for now. */
-#define IFCVF_MAX_QUEUES       17
-
 #define IFCVF_QUEUE_ALIGNMENT  PAGE_SIZE
-#define IFCVF_QUEUE_MAX                32768
 #define IFCVF_PCI_MAX_RESOURCE 6
 
-#define IFCVF_LM_CFG_SIZE              0x40
-#define IFCVF_LM_RING_STATE_OFFSET     0x20
 #define IFCVF_LM_BAR                   4
 
 #define IFCVF_ERR(pdev, fmt, ...)      dev_err(&pdev->dev, fmt, ##__VA_ARGS__)
 #define MSIX_VECTOR_DEV_SHARED                 3
 
 struct vring_info {
-       u64 desc;
-       u64 avail;
-       u64 used;
-       u16 size;
        u16 last_avail_idx;
-       bool ready;
        void __iomem *notify_addr;
        phys_addr_t notify_pa;
        u32 irq;
@@ -60,10 +49,18 @@ struct vring_info {
        char msix_name[256];
 };
 
+struct ifcvf_lm_cfg {
+       __le64 control;
+       __le64 status;
+       __le64 lm_mem_log_start_addr;
+       __le64 lm_mem_log_end_addr;
+       __le16 vq_state_region;
+};
+
 struct ifcvf_hw {
        u8 __iomem *isr;
        /* Live migration */
-       u8 __iomem *lm_cfg;
+       struct ifcvf_lm_cfg  __iomem *lm_cfg;
        /* Notification bar number */
        u8 notify_bar;
        u8 msix_vector_status;
@@ -74,13 +71,12 @@ struct ifcvf_hw {
        phys_addr_t notify_base_pa;
        u32 notify_off_multiplier;
        u32 dev_type;
-       u64 req_features;
        u64 hw_features;
        /* provisioned device features */
        u64 dev_features;
        struct virtio_pci_common_cfg __iomem *common_cfg;
        void __iomem *dev_cfg;
-       struct vring_info vring[IFCVF_MAX_QUEUES];
+       struct vring_info *vring;
        void __iomem * const *base;
        char config_msix_name[256];
        struct vdpa_callback config_cb;
@@ -88,6 +84,7 @@ struct ifcvf_hw {
        int vqs_reused_irq;
        u16 nr_vring;
        /* VIRTIO_PCI_CAP_DEVICE_CFG size */
+       u32 num_msix_vectors;
        u32 cap_dev_config_size;
        struct pci_dev *pdev;
 };
@@ -98,16 +95,6 @@ struct ifcvf_adapter {
        struct ifcvf_hw *vf;
 };
 
-struct ifcvf_vring_lm_cfg {
-       u32 idx_addr[2];
-       u8 reserved[IFCVF_LM_CFG_SIZE - 8];
-};
-
-struct ifcvf_lm_cfg {
-       u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
-       struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUES];
-};
-
 struct ifcvf_vdpa_mgmt_dev {
        struct vdpa_mgmt_dev mdev;
        struct ifcvf_hw vf;
@@ -116,8 +103,7 @@ struct ifcvf_vdpa_mgmt_dev {
 };
 
 int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
-int ifcvf_start_hw(struct ifcvf_hw *hw);
-void ifcvf_stop_hw(struct ifcvf_hw *hw);
+void ifcvf_stop(struct ifcvf_hw *hw);
 void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
 void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset,
                           void *dst, int length);
@@ -127,7 +113,7 @@ u8 ifcvf_get_status(struct ifcvf_hw *hw);
 void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
 void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
 void ifcvf_reset(struct ifcvf_hw *hw);
-u64 ifcvf_get_features(struct ifcvf_hw *hw);
+u64 ifcvf_get_dev_features(struct ifcvf_hw *hw);
 u64 ifcvf_get_hw_features(struct ifcvf_hw *hw);
 int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features);
 u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
@@ -137,4 +123,12 @@ int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
 u32 ifcvf_get_config_size(struct ifcvf_hw *hw);
 u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector);
 u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector);
+void ifcvf_set_vq_num(struct ifcvf_hw *hw, u16 qid, u32 num);
+int ifcvf_set_vq_address(struct ifcvf_hw *hw, u16 qid, u64 desc_area,
+                        u64 driver_area, u64 device_area);
+bool ifcvf_get_vq_ready(struct ifcvf_hw *hw, u16 qid);
+void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, bool ready);
+void ifcvf_set_driver_features(struct ifcvf_hw *hw, u64 features);
+u64 ifcvf_get_driver_features(struct ifcvf_hw *hw);
+u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw);
 #endif /* _IFCVF_H_ */
index 7f78c47e40d60756cba68ec7001bf4626f09a443..e98fa8100f3cc796f2f3b543cadd7f9a694313c7 100644 (file)
@@ -125,6 +125,7 @@ static void ifcvf_free_irq(struct ifcvf_hw *vf)
        ifcvf_free_vq_irq(vf);
        ifcvf_free_config_irq(vf);
        ifcvf_free_irq_vectors(pdev);
+       vf->num_msix_vectors = 0;
 }
 
 /* ifcvf MSIX vectors allocator, this helper tries to allocate
@@ -343,56 +344,11 @@ static int ifcvf_request_irq(struct ifcvf_hw *vf)
        if (ret)
                return ret;
 
-       return 0;
-}
-
-static int ifcvf_start_datapath(struct ifcvf_adapter *adapter)
-{
-       struct ifcvf_hw *vf = adapter->vf;
-       u8 status;
-       int ret;
-
-       ret = ifcvf_start_hw(vf);
-       if (ret < 0) {
-               status = ifcvf_get_status(vf);
-               status |= VIRTIO_CONFIG_S_FAILED;
-               ifcvf_set_status(vf, status);
-       }
-
-       return ret;
-}
-
-static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter)
-{
-       struct ifcvf_hw *vf = adapter->vf;
-       int i;
-
-       for (i = 0; i < vf->nr_vring; i++)
-               vf->vring[i].cb.callback = NULL;
-
-       ifcvf_stop_hw(vf);
+       vf->num_msix_vectors = nvectors;
 
        return 0;
 }
 
-static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
-{
-       struct ifcvf_hw *vf = adapter->vf;
-       int i;
-
-       for (i = 0; i < vf->nr_vring; i++) {
-               vf->vring[i].last_avail_idx = 0;
-               vf->vring[i].desc = 0;
-               vf->vring[i].avail = 0;
-               vf->vring[i].used = 0;
-               vf->vring[i].ready = 0;
-               vf->vring[i].cb.callback = NULL;
-               vf->vring[i].cb.private = NULL;
-       }
-
-       ifcvf_reset(vf);
-}
-
 static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev)
 {
        return container_of(vdpa_dev, struct ifcvf_adapter, vdpa);
@@ -414,7 +370,7 @@ static u64 ifcvf_vdpa_get_device_features(struct vdpa_device *vdpa_dev)
        u64 features;
 
        if (type == VIRTIO_ID_NET || type == VIRTIO_ID_BLOCK)
-               features = ifcvf_get_features(vf);
+               features = ifcvf_get_dev_features(vf);
        else {
                features = 0;
                IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
@@ -432,7 +388,7 @@ static int ifcvf_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 feat
        if (ret)
                return ret;
 
-       vf->req_features = features;
+       ifcvf_set_driver_features(vf, features);
 
        return 0;
 }
@@ -440,8 +396,11 @@ static int ifcvf_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 feat
 static u64 ifcvf_vdpa_get_driver_features(struct vdpa_device *vdpa_dev)
 {
        struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+       u64 features;
+
+       features = ifcvf_get_driver_features(vf);
 
-       return vf->req_features;
+       return features;
 }
 
 static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev)
@@ -453,13 +412,11 @@ static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev)
 
 static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 {
-       struct ifcvf_adapter *adapter;
        struct ifcvf_hw *vf;
        u8 status_old;
        int ret;
 
        vf  = vdpa_to_vf(vdpa_dev);
-       adapter = vdpa_to_adapter(vdpa_dev);
        status_old = ifcvf_get_status(vf);
 
        if (status_old == status)
@@ -469,16 +426,9 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
            !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
                ret = ifcvf_request_irq(vf);
                if (ret) {
-                       status = ifcvf_get_status(vf);
-                       status |= VIRTIO_CONFIG_S_FAILED;
-                       ifcvf_set_status(vf, status);
+                       IFCVF_ERR(vf->pdev, "failed to request irq with error %d\n", ret);
                        return;
                }
-
-               if (ifcvf_start_datapath(adapter) < 0)
-                       IFCVF_ERR(adapter->pdev,
-                                 "Failed to set ifcvf vdpa  status %u\n",
-                                 status);
        }
 
        ifcvf_set_status(vf, status);
@@ -486,30 +436,24 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 
 static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
 {
-       struct ifcvf_adapter *adapter;
-       struct ifcvf_hw *vf;
-       u8 status_old;
-
-       vf  = vdpa_to_vf(vdpa_dev);
-       adapter = vdpa_to_adapter(vdpa_dev);
-       status_old = ifcvf_get_status(vf);
+       struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+       u8 status = ifcvf_get_status(vf);
 
-       if (status_old == 0)
-               return 0;
+       ifcvf_stop(vf);
 
-       if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) {
-               ifcvf_stop_datapath(adapter);
+       if (status & VIRTIO_CONFIG_S_DRIVER_OK)
                ifcvf_free_irq(vf);
-       }
 
-       ifcvf_reset_vring(adapter);
+       ifcvf_reset(vf);
 
        return 0;
 }
 
 static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
 {
-       return IFCVF_QUEUE_MAX;
+       struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+       return ifcvf_get_max_vq_size(vf);
 }
 
 static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
@@ -542,14 +486,14 @@ static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev,
 {
        struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 
-       vf->vring[qid].ready = ready;
+       ifcvf_set_vq_ready(vf, qid, ready);
 }
 
 static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid)
 {
        struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 
-       return vf->vring[qid].ready;
+       return ifcvf_get_vq_ready(vf, qid);
 }
 
 static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid,
@@ -557,7 +501,7 @@ static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid,
 {
        struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 
-       vf->vring[qid].size = num;
+       ifcvf_set_vq_num(vf, qid, num);
 }
 
 static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid,
@@ -566,11 +510,7 @@ static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid,
 {
        struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 
-       vf->vring[qid].desc = desc_area;
-       vf->vring[qid].avail = driver_area;
-       vf->vring[qid].used = device_area;
-
-       return 0;
+       return ifcvf_set_vq_address(vf, qid, desc_area, driver_area, device_area);
 }
 
 static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid)
@@ -892,6 +832,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        return 0;
 
 err:
+       kfree(ifcvf_mgmt_dev->vf.vring);
        kfree(ifcvf_mgmt_dev);
        return ret;
 }
@@ -902,6 +843,7 @@ static void ifcvf_remove(struct pci_dev *pdev)
 
        ifcvf_mgmt_dev = pci_get_drvdata(pdev);
        vdpa_mgmtdev_unregister(&ifcvf_mgmt_dev->mdev);
+       kfree(ifcvf_mgmt_dev->vf.vring);
        kfree(ifcvf_mgmt_dev);
 }
 
@@ -911,7 +853,9 @@ static struct pci_device_id ifcvf_pci_ids[] = {
                         N3000_DEVICE_ID,
                         PCI_VENDOR_ID_INTEL,
                         N3000_SUBSYS_DEVICE_ID) },
-       /* C5000X-PL network device */
+       /* C5000X-PL network device
+        * F2000X-PL network device
+        */
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
                         VIRTIO_TRANS_ID_NET,
                         PCI_VENDOR_ID_INTEL,
index 279ac6a558d29a7124204c9229fe13f7ee4eafd3..9138ef2fb2c853270ec11284a0824e84200276d3 100644 (file)
@@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
        u64 driver_addr;
        u16 avail_index;
        u16 used_index;
+       struct msi_map map;
        bool ready;
        bool restore;
 };
@@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
        u16 avail_idx;
        u16 used_idx;
        int fw_state;
+       struct msi_map map;
 
        /* keep last in the struct */
        struct mlx5_vq_restore_info ri;
@@ -808,6 +810,13 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
               BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
 }
 
+static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
+{
+       return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
+               (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
+               pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
+}
+
 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
@@ -849,9 +858,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
        if (vq_is_tx(mvq->index))
                MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
 
-       MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
+       if (mvq->map.virq) {
+               MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
+               MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
+       } else {
+               MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
+               MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
+       }
+
        MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
-       MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
        MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
        MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
                 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
@@ -1194,6 +1209,56 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
                mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
 }
 
+static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
+{
+       struct vdpa_callback *cb = priv;
+
+       if (cb->callback)
+               return cb->callback(cb->private);
+
+       return IRQ_HANDLED;
+}
+
+static void alloc_vector(struct mlx5_vdpa_net *ndev,
+                        struct mlx5_vdpa_virtqueue *mvq)
+{
+       struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
+       struct mlx5_vdpa_irq_pool_entry *ent;
+       int err;
+       int i;
+
+       for (i = 0; i < irqp->num_ent; i++) {
+               ent = &irqp->entries[i];
+               if (!ent->used) {
+                       snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
+                                dev_name(&ndev->mvdev.vdev.dev), mvq->index);
+                       ent->dev_id = &ndev->event_cbs[mvq->index];
+                       err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
+                                         ent->name, ent->dev_id);
+                       if (err)
+                               return;
+
+                       ent->used = true;
+                       mvq->map = ent->map;
+                       return;
+               }
+       }
+}
+
+static void dealloc_vector(struct mlx5_vdpa_net *ndev,
+                          struct mlx5_vdpa_virtqueue *mvq)
+{
+       struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
+       int i;
+
+       for (i = 0; i < irqp->num_ent; i++)
+               if (mvq->map.virq == irqp->entries[i].map.virq) {
+                       free_irq(mvq->map.virq, irqp->entries[i].dev_id);
+                       irqp->entries[i].used = false;
+                       return;
+               }
+}
+
 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 {
        u16 idx = mvq->index;
@@ -1223,27 +1288,31 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 
        err = counter_set_alloc(ndev, mvq);
        if (err)
-               goto err_counter;
+               goto err_connect;
 
+       alloc_vector(ndev, mvq);
        err = create_virtqueue(ndev, mvq);
        if (err)
-               goto err_connect;
+               goto err_vq;
 
        if (mvq->ready) {
                err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
                if (err) {
                        mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
                                       idx, err);
-                       goto err_connect;
+                       goto err_modify;
                }
        }
 
        mvq->initialized = true;
        return 0;
 
-err_connect:
+err_modify:
+       destroy_virtqueue(ndev, mvq);
+err_vq:
+       dealloc_vector(ndev, mvq);
        counter_set_dealloc(ndev, mvq);
-err_counter:
+err_connect:
        qp_destroy(ndev, &mvq->vqqp);
 err_vqqp:
        qp_destroy(ndev, &mvq->fwqp);
@@ -1288,6 +1357,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
 
        suspend_vq(ndev, mvq);
        destroy_virtqueue(ndev, mvq);
+       dealloc_vector(ndev, mvq);
        counter_set_dealloc(ndev, mvq);
        qp_destroy(ndev, &mvq->vqqp);
        qp_destroy(ndev, &mvq->fwqp);
@@ -2505,6 +2575,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
        ri->desc_addr = mvq->desc_addr;
        ri->device_addr = mvq->device_addr;
        ri->driver_addr = mvq->driver_addr;
+       ri->map = mvq->map;
        ri->restore = true;
        return 0;
 }
@@ -2549,6 +2620,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
                mvq->desc_addr = ri->desc_addr;
                mvq->device_addr = ri->device_addr;
                mvq->driver_addr = ri->driver_addr;
+               mvq->map = ri->map;
        }
 }
 
@@ -2833,6 +2905,25 @@ static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
        return mvdev->vdev.dma_dev;
 }
 
+static void free_irqs(struct mlx5_vdpa_net *ndev)
+{
+       struct mlx5_vdpa_irq_pool_entry *ent;
+       int i;
+
+       if (!msix_mode_supported(&ndev->mvdev))
+               return;
+
+       if (!ndev->irqp.entries)
+               return;
+
+       for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
+               ent = ndev->irqp.entries + i;
+               if (ent->map.virq)
+                       pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
+       }
+       kfree(ndev->irqp.entries);
+}
+
 static void mlx5_vdpa_free(struct vdpa_device *vdev)
 {
        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2848,6 +2939,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
                mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
        }
        mlx5_vdpa_free_resources(&ndev->mvdev);
+       free_irqs(ndev);
        kfree(ndev->event_cbs);
        kfree(ndev->vqs);
 }
@@ -2876,9 +2968,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
        return ret;
 }
 
-static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
+static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
 {
-       return -EOPNOTSUPP;
+       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+       struct mlx5_vdpa_virtqueue *mvq;
+
+       if (!is_index_valid(mvdev, idx))
+               return -EINVAL;
+
+       if (is_ctrl_vq_idx(mvdev, idx))
+               return -EOPNOTSUPP;
+
+       mvq = &ndev->vqs[idx];
+       if (!mvq->map.virq)
+               return -EOPNOTSUPP;
+
+       return mvq->map.virq;
 }
 
 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
@@ -3155,6 +3261,34 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
        return err;
 }
 
+static void allocate_irqs(struct mlx5_vdpa_net *ndev)
+{
+       struct mlx5_vdpa_irq_pool_entry *ent;
+       int i;
+
+       if (!msix_mode_supported(&ndev->mvdev))
+               return;
+
+       if (!ndev->mvdev.mdev->pdev)
+               return;
+
+       ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
+       if (!ndev->irqp.entries)
+               return;
+
+
+       for (i = 0; i < ndev->mvdev.max_vqs; i++) {
+               ent = ndev->irqp.entries + i;
+               snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
+                        dev_name(&ndev->mvdev.vdev.dev), i);
+               ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
+               if (!ent->map.virq)
+                       return;
+
+               ndev->irqp.num_ent++;
+       }
+}
+
 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
                             const struct vdpa_dev_set_config *add_config)
 {
@@ -3233,6 +3367,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
        }
 
        init_mvqs(ndev);
+       allocate_irqs(ndev);
        init_rwsem(&ndev->reslock);
        config = &ndev->config;
 
@@ -3413,6 +3548,17 @@ static void mlx5v_remove(struct auxiliary_device *adev)
        kfree(mgtdev);
 }
 
+static void mlx5v_shutdown(struct auxiliary_device *auxdev)
+{
+       struct mlx5_vdpa_mgmtdev *mgtdev;
+       struct mlx5_vdpa_net *ndev;
+
+       mgtdev = auxiliary_get_drvdata(auxdev);
+       ndev = mgtdev->ndev;
+
+       free_irqs(ndev);
+}
+
 static const struct auxiliary_device_id mlx5v_id_table[] = {
        { .name = MLX5_ADEV_NAME ".vnet", },
        {},
@@ -3424,6 +3570,7 @@ static struct auxiliary_driver mlx5v_driver = {
        .name = "vnet",
        .probe = mlx5v_probe,
        .remove = mlx5v_remove,
+       .shutdown = mlx5v_shutdown,
        .id_table = mlx5v_id_table,
 };
 
index c90a89e1de4d529456e000caca51319afd228d56..36c44d9fdd166b52c83557c66793acee1dbb2ae4 100644 (file)
@@ -26,6 +26,20 @@ static inline u16 key2vid(u64 key)
        return (u16)(key >> 48) & 0xfff;
 }
 
+#define MLX5_VDPA_IRQ_NAME_LEN 32
+
+struct mlx5_vdpa_irq_pool_entry {
+       struct msi_map map;
+       bool used;
+       char name[MLX5_VDPA_IRQ_NAME_LEN];
+       void *dev_id;
+};
+
+struct mlx5_vdpa_irq_pool {
+       int num_ent;
+       struct mlx5_vdpa_irq_pool_entry *entries;
+};
+
 struct mlx5_vdpa_net {
        struct mlx5_vdpa_dev mvdev;
        struct mlx5_vdpa_net_resources res;
@@ -49,6 +63,7 @@ struct mlx5_vdpa_net {
        struct vdpa_callback config_cb;
        struct mlx5_vdpa_wq_ent cvq_ent;
        struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
+       struct mlx5_vdpa_irq_pool irqp;
        struct dentry *debugfs;
 };
 
diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile
new file mode 100644 (file)
index 0000000..2e22418
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright(c) 2023 Advanced Micro Devices, Inc
+
+obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
+
+pds_vdpa-y := aux_drv.o \
+             cmds.o \
+             vdpa_dev.o
+
+pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/vdpa/pds/aux_drv.c b/drivers/vdpa/pds/aux_drv.c
new file mode 100644 (file)
index 0000000..186e9ee
--- /dev/null
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+#include <linux/virtio_pci_modern.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+#include "aux_drv.h"
+#include "debugfs.h"
+#include "vdpa_dev.h"
+
+static const struct auxiliary_device_id pds_vdpa_id_table[] = {
+       { .name = PDS_VDPA_DEV_NAME, },
+       {},
+};
+
+static int pds_vdpa_device_id_check(struct pci_dev *pdev)
+{
+       if (pdev->device != PCI_DEVICE_ID_PENSANDO_VDPA_VF ||
+           pdev->vendor != PCI_VENDOR_ID_PENSANDO)
+               return -ENODEV;
+
+       return PCI_DEVICE_ID_PENSANDO_VDPA_VF;
+}
+
+static int pds_vdpa_probe(struct auxiliary_device *aux_dev,
+                         const struct auxiliary_device_id *id)
+
+{
+       struct pds_auxiliary_dev *padev =
+               container_of(aux_dev, struct pds_auxiliary_dev, aux_dev);
+       struct device *dev = &aux_dev->dev;
+       struct pds_vdpa_aux *vdpa_aux;
+       int err;
+
+       vdpa_aux = kzalloc(sizeof(*vdpa_aux), GFP_KERNEL);
+       if (!vdpa_aux)
+               return -ENOMEM;
+
+       vdpa_aux->padev = padev;
+       vdpa_aux->vf_id = pci_iov_vf_id(padev->vf_pdev);
+       auxiliary_set_drvdata(aux_dev, vdpa_aux);
+
+       /* Get device ident info and set up the vdpa_mgmt_dev */
+       err = pds_vdpa_get_mgmt_info(vdpa_aux);
+       if (err)
+               goto err_free_mem;
+
+       /* Find the virtio configuration */
+       vdpa_aux->vd_mdev.pci_dev = padev->vf_pdev;
+       vdpa_aux->vd_mdev.device_id_check = pds_vdpa_device_id_check;
+       vdpa_aux->vd_mdev.dma_mask = DMA_BIT_MASK(PDS_CORE_ADDR_LEN);
+       err = vp_modern_probe(&vdpa_aux->vd_mdev);
+       if (err) {
+               dev_err(dev, "Unable to probe for virtio configuration: %pe\n",
+                       ERR_PTR(err));
+               goto err_free_mgmt_info;
+       }
+
+       /* Let vdpa know that we can provide devices */
+       err = vdpa_mgmtdev_register(&vdpa_aux->vdpa_mdev);
+       if (err) {
+               dev_err(dev, "%s: Failed to initialize vdpa_mgmt interface: %pe\n",
+                       __func__, ERR_PTR(err));
+               goto err_free_virtio;
+       }
+
+       pds_vdpa_debugfs_add_pcidev(vdpa_aux);
+       pds_vdpa_debugfs_add_ident(vdpa_aux);
+
+       return 0;
+
+err_free_virtio:
+       vp_modern_remove(&vdpa_aux->vd_mdev);
+err_free_mgmt_info:
+       pci_free_irq_vectors(padev->vf_pdev);
+err_free_mem:
+       kfree(vdpa_aux);
+       auxiliary_set_drvdata(aux_dev, NULL);
+
+       return err;
+}
+
+static void pds_vdpa_remove(struct auxiliary_device *aux_dev)
+{
+       struct pds_vdpa_aux *vdpa_aux = auxiliary_get_drvdata(aux_dev);
+       struct device *dev = &aux_dev->dev;
+
+       vdpa_mgmtdev_unregister(&vdpa_aux->vdpa_mdev);
+       vp_modern_remove(&vdpa_aux->vd_mdev);
+       pci_free_irq_vectors(vdpa_aux->padev->vf_pdev);
+
+       pds_vdpa_debugfs_del_vdpadev(vdpa_aux);
+       kfree(vdpa_aux);
+       auxiliary_set_drvdata(aux_dev, NULL);
+
+       dev_info(dev, "Removed\n");
+}
+
+static struct auxiliary_driver pds_vdpa_driver = {
+       .name = PDS_DEV_TYPE_VDPA_STR,
+       .probe = pds_vdpa_probe,
+       .remove = pds_vdpa_remove,
+       .id_table = pds_vdpa_id_table,
+};
+
+static void __exit pds_vdpa_cleanup(void)
+{
+       auxiliary_driver_unregister(&pds_vdpa_driver);
+
+       pds_vdpa_debugfs_destroy();
+}
+module_exit(pds_vdpa_cleanup);
+
+static int __init pds_vdpa_init(void)
+{
+       int err;
+
+       pds_vdpa_debugfs_create();
+
+       err = auxiliary_driver_register(&pds_vdpa_driver);
+       if (err) {
+               pr_err("%s: aux driver register failed: %pe\n",
+                      PDS_VDPA_DRV_NAME, ERR_PTR(err));
+               pds_vdpa_debugfs_destroy();
+       }
+
+       return err;
+}
+module_init(pds_vdpa_init);
+
+MODULE_DESCRIPTION(PDS_VDPA_DRV_DESCRIPTION);
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_LICENSE("GPL");
diff --git a/drivers/vdpa/pds/aux_drv.h b/drivers/vdpa/pds/aux_drv.h
new file mode 100644 (file)
index 0000000..26b7534
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _AUX_DRV_H_
+#define _AUX_DRV_H_
+
+#include <linux/virtio_pci_modern.h>
+
+#define PDS_VDPA_DRV_DESCRIPTION    "AMD/Pensando vDPA VF Device Driver"
+#define PDS_VDPA_DRV_NAME           KBUILD_MODNAME
+
+struct pds_vdpa_aux {
+       struct pds_auxiliary_dev *padev;
+
+       struct vdpa_mgmt_dev vdpa_mdev;
+       struct pds_vdpa_device *pdsv;
+
+       struct pds_vdpa_ident ident;
+
+       int vf_id;
+       struct dentry *dentry;
+       struct virtio_pci_modern_device vd_mdev;
+
+       int nintrs;
+};
+#endif /* _AUX_DRV_H_ */
diff --git a/drivers/vdpa/pds/cmds.c b/drivers/vdpa/pds/cmds.c
new file mode 100644 (file)
index 0000000..80863a4
--- /dev/null
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/vdpa.h>
+#include <linux/virtio_pci_modern.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+#include "vdpa_dev.h"
+#include "aux_drv.h"
+#include "cmds.h"
+
+int pds_vdpa_init_hw(struct pds_vdpa_device *pdsv)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_init.opcode = PDS_VDPA_CMD_INIT,
+               .vdpa_init.vdpa_index = pdsv->vdpa_index,
+               .vdpa_init.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       /* Initialize the vdpa/virtio device */
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_init),
+                                   &comp, 0);
+       if (err)
+               dev_dbg(dev, "Failed to init hw, status %d: %pe\n",
+                       comp.status, ERR_PTR(err));
+
+       return err;
+}
+
+int pds_vdpa_cmd_reset(struct pds_vdpa_device *pdsv)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa.opcode = PDS_VDPA_CMD_RESET,
+               .vdpa.vdpa_index = pdsv->vdpa_index,
+               .vdpa.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa), &comp, 0);
+       if (err)
+               dev_dbg(dev, "Failed to reset hw, status %d: %pe\n",
+                       comp.status, ERR_PTR(err));
+
+       return err;
+}
+
+int pds_vdpa_cmd_set_status(struct pds_vdpa_device *pdsv, u8 status)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_status.opcode = PDS_VDPA_CMD_STATUS_UPDATE,
+               .vdpa_status.vdpa_index = pdsv->vdpa_index,
+               .vdpa_status.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+               .vdpa_status.status = status,
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_status), &comp, 0);
+       if (err)
+               dev_dbg(dev, "Failed to set status to %#x, error status %d: %pe\n",
+                       status, comp.status, ERR_PTR(err));
+
+       return err;
+}
+
+int pds_vdpa_cmd_set_mac(struct pds_vdpa_device *pdsv, u8 *mac)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_setattr.opcode = PDS_VDPA_CMD_SET_ATTR,
+               .vdpa_setattr.vdpa_index = pdsv->vdpa_index,
+               .vdpa_setattr.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+               .vdpa_setattr.attr = PDS_VDPA_ATTR_MAC,
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       ether_addr_copy(cmd.vdpa_setattr.mac, mac);
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_setattr),
+                                   &comp, 0);
+       if (err)
+               dev_dbg(dev, "Failed to set mac address %pM, status %d: %pe\n",
+                       mac, comp.status, ERR_PTR(err));
+
+       return err;
+}
+
+int pds_vdpa_cmd_set_max_vq_pairs(struct pds_vdpa_device *pdsv, u16 max_vqp)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_setattr.opcode = PDS_VDPA_CMD_SET_ATTR,
+               .vdpa_setattr.vdpa_index = pdsv->vdpa_index,
+               .vdpa_setattr.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+               .vdpa_setattr.attr = PDS_VDPA_ATTR_MAX_VQ_PAIRS,
+               .vdpa_setattr.max_vq_pairs = cpu_to_le16(max_vqp),
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_setattr),
+                                   &comp, 0);
+       if (err)
+               dev_dbg(dev, "Failed to set max vq pairs %u, status %d: %pe\n",
+                       max_vqp, comp.status, ERR_PTR(err));
+
+       return err;
+}
+
+int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx,
+                        struct pds_vdpa_vq_info *vq_info)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_vq_init.opcode = PDS_VDPA_CMD_VQ_INIT,
+               .vdpa_vq_init.vdpa_index = pdsv->vdpa_index,
+               .vdpa_vq_init.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+               .vdpa_vq_init.qid = cpu_to_le16(qid),
+               .vdpa_vq_init.len = cpu_to_le16(ilog2(vq_info->q_len)),
+               .vdpa_vq_init.desc_addr = cpu_to_le64(vq_info->desc_addr),
+               .vdpa_vq_init.avail_addr = cpu_to_le64(vq_info->avail_addr),
+               .vdpa_vq_init.used_addr = cpu_to_le64(vq_info->used_addr),
+               .vdpa_vq_init.intr_index = cpu_to_le16(qid),
+               .vdpa_vq_init.avail_index = cpu_to_le16(vq_info->avail_idx ^ invert_idx),
+               .vdpa_vq_init.used_index = cpu_to_le16(vq_info->used_idx ^ invert_idx),
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       dev_dbg(dev, "%s: qid %d len %d desc_addr %#llx avail_addr %#llx used_addr %#llx\n",
+               __func__, qid, ilog2(vq_info->q_len),
+               vq_info->desc_addr, vq_info->avail_addr, vq_info->used_addr);
+
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_vq_init),
+                                   &comp, 0);
+       if (err)
+               dev_dbg(dev, "Failed to init vq %d, status %d: %pe\n",
+                       qid, comp.status, ERR_PTR(err));
+
+       return err;
+}
+
+int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx,
+                         struct pds_vdpa_vq_info *vq_info)
+{
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_vq_reset.opcode = PDS_VDPA_CMD_VQ_RESET,
+               .vdpa_vq_reset.vdpa_index = pdsv->vdpa_index,
+               .vdpa_vq_reset.vf_id = cpu_to_le16(pdsv->vdpa_aux->vf_id),
+               .vdpa_vq_reset.qid = cpu_to_le16(qid),
+       };
+       union pds_core_adminq_comp comp = {};
+       int err;
+
+       err = pds_client_adminq_cmd(padev, &cmd, sizeof(cmd.vdpa_vq_reset),
+                                   &comp, 0);
+       if (err) {
+               dev_dbg(dev, "Failed to reset vq %d, status %d: %pe\n",
+                       qid, comp.status, ERR_PTR(err));
+               return err;
+       }
+
+       vq_info->avail_idx = le16_to_cpu(comp.vdpa_vq_reset.avail_index) ^ invert_idx;
+       vq_info->used_idx = le16_to_cpu(comp.vdpa_vq_reset.used_index) ^ invert_idx;
+
+       return 0;
+}
diff --git a/drivers/vdpa/pds/cmds.h b/drivers/vdpa/pds/cmds.h
new file mode 100644 (file)
index 0000000..e24d85c
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _VDPA_CMDS_H_
+#define _VDPA_CMDS_H_
+
+int pds_vdpa_init_hw(struct pds_vdpa_device *pdsv);
+
+int pds_vdpa_cmd_reset(struct pds_vdpa_device *pdsv);
+int pds_vdpa_cmd_set_status(struct pds_vdpa_device *pdsv, u8 status);
+int pds_vdpa_cmd_set_mac(struct pds_vdpa_device *pdsv, u8 *mac);
+int pds_vdpa_cmd_set_max_vq_pairs(struct pds_vdpa_device *pdsv, u16 max_vqp);
+int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx,
+                        struct pds_vdpa_vq_info *vq_info);
+int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx,
+                         struct pds_vdpa_vq_info *vq_info);
+int pds_vdpa_cmd_set_features(struct pds_vdpa_device *pdsv, u64 features);
+#endif /* _VDPA_CMDS_H_ */
diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
new file mode 100644 (file)
index 0000000..21a0dc0
--- /dev/null
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+#include "aux_drv.h"
+#include "vdpa_dev.h"
+#include "debugfs.h"
+
+static struct dentry *dbfs_dir;
+
+void pds_vdpa_debugfs_create(void)
+{
+       dbfs_dir = debugfs_create_dir(PDS_VDPA_DRV_NAME, NULL);
+}
+
+void pds_vdpa_debugfs_destroy(void)
+{
+       debugfs_remove_recursive(dbfs_dir);
+       dbfs_dir = NULL;
+}
+
+#define PRINT_SBIT_NAME(__seq, __f, __name)                     \
+       do {                                                    \
+               if ((__f) & (__name))                               \
+                       seq_printf(__seq, " %s", &#__name[16]); \
+       } while (0)
+
+static void print_status_bits(struct seq_file *seq, u8 status)
+{
+       seq_puts(seq, "status:");
+       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_DRIVER);
+       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_DRIVER_OK);
+       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_FEATURES_OK);
+       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_NEEDS_RESET);
+       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_FAILED);
+       seq_puts(seq, "\n");
+}
+
+static void print_feature_bits_all(struct seq_file *seq, u64 features)
+{
+       int i;
+
+       seq_puts(seq, "features:");
+
+       for (i = 0; i < (sizeof(u64) * 8); i++) {
+               u64 mask = BIT_ULL(i);
+
+               switch (features & mask) {
+               case BIT_ULL(VIRTIO_NET_F_CSUM):
+                       seq_puts(seq, " VIRTIO_NET_F_CSUM");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_GUEST_CSUM):
+                       seq_puts(seq, " VIRTIO_NET_F_GUEST_CSUM");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS):
+                       seq_puts(seq, " VIRTIO_NET_F_CTRL_GUEST_OFFLOADS");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_MTU):
+                       seq_puts(seq, " VIRTIO_NET_F_MTU");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_MAC):
+                       seq_puts(seq, " VIRTIO_NET_F_MAC");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_GUEST_TSO4):
+                       seq_puts(seq, " VIRTIO_NET_F_GUEST_TSO4");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_GUEST_TSO6):
+                       seq_puts(seq, " VIRTIO_NET_F_GUEST_TSO6");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_GUEST_ECN):
+                       seq_puts(seq, " VIRTIO_NET_F_GUEST_ECN");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_GUEST_UFO):
+                       seq_puts(seq, " VIRTIO_NET_F_GUEST_UFO");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_HOST_TSO4):
+                       seq_puts(seq, " VIRTIO_NET_F_HOST_TSO4");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_HOST_TSO6):
+                       seq_puts(seq, " VIRTIO_NET_F_HOST_TSO6");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_HOST_ECN):
+                       seq_puts(seq, " VIRTIO_NET_F_HOST_ECN");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_HOST_UFO):
+                       seq_puts(seq, " VIRTIO_NET_F_HOST_UFO");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_MRG_RXBUF):
+                       seq_puts(seq, " VIRTIO_NET_F_MRG_RXBUF");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_STATUS):
+                       seq_puts(seq, " VIRTIO_NET_F_STATUS");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_CTRL_VQ):
+                       seq_puts(seq, " VIRTIO_NET_F_CTRL_VQ");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_CTRL_RX):
+                       seq_puts(seq, " VIRTIO_NET_F_CTRL_RX");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_CTRL_VLAN):
+                       seq_puts(seq, " VIRTIO_NET_F_CTRL_VLAN");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA):
+                       seq_puts(seq, " VIRTIO_NET_F_CTRL_RX_EXTRA");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE):
+                       seq_puts(seq, " VIRTIO_NET_F_GUEST_ANNOUNCE");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_MQ):
+                       seq_puts(seq, " VIRTIO_NET_F_MQ");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR):
+                       seq_puts(seq, " VIRTIO_NET_F_CTRL_MAC_ADDR");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_HASH_REPORT):
+                       seq_puts(seq, " VIRTIO_NET_F_HASH_REPORT");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_RSS):
+                       seq_puts(seq, " VIRTIO_NET_F_RSS");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_RSC_EXT):
+                       seq_puts(seq, " VIRTIO_NET_F_RSC_EXT");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_STANDBY):
+                       seq_puts(seq, " VIRTIO_NET_F_STANDBY");
+                       break;
+               case BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX):
+                       seq_puts(seq, " VIRTIO_NET_F_SPEED_DUPLEX");
+                       break;
+               case BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY):
+                       seq_puts(seq, " VIRTIO_F_NOTIFY_ON_EMPTY");
+                       break;
+               case BIT_ULL(VIRTIO_F_ANY_LAYOUT):
+                       seq_puts(seq, " VIRTIO_F_ANY_LAYOUT");
+                       break;
+               case BIT_ULL(VIRTIO_F_VERSION_1):
+                       seq_puts(seq, " VIRTIO_F_VERSION_1");
+                       break;
+               case BIT_ULL(VIRTIO_F_ACCESS_PLATFORM):
+                       seq_puts(seq, " VIRTIO_F_ACCESS_PLATFORM");
+                       break;
+               case BIT_ULL(VIRTIO_F_RING_PACKED):
+                       seq_puts(seq, " VIRTIO_F_RING_PACKED");
+                       break;
+               case BIT_ULL(VIRTIO_F_ORDER_PLATFORM):
+                       seq_puts(seq, " VIRTIO_F_ORDER_PLATFORM");
+                       break;
+               case BIT_ULL(VIRTIO_F_SR_IOV):
+                       seq_puts(seq, " VIRTIO_F_SR_IOV");
+                       break;
+               case 0:
+                       break;
+               default:
+                       seq_printf(seq, " bit_%d", i);
+                       break;
+               }
+       }
+
+       seq_puts(seq, "\n");
+}
+
+void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_aux *vdpa_aux)
+{
+       vdpa_aux->dentry = debugfs_create_dir(pci_name(vdpa_aux->padev->vf_pdev), dbfs_dir);
+}
+
+static int identity_show(struct seq_file *seq, void *v)
+{
+       struct pds_vdpa_aux *vdpa_aux = seq->private;
+       struct vdpa_mgmt_dev *mgmt;
+
+       seq_printf(seq, "aux_dev:            %s\n",
+                  dev_name(&vdpa_aux->padev->aux_dev.dev));
+
+       mgmt = &vdpa_aux->vdpa_mdev;
+       seq_printf(seq, "max_vqs:            %d\n", mgmt->max_supported_vqs);
+       seq_printf(seq, "config_attr_mask:   %#llx\n", mgmt->config_attr_mask);
+       seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features);
+       print_feature_bits_all(seq, mgmt->supported_features);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(identity);
+
+void pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux)
+{
+       debugfs_create_file("identity", 0400, vdpa_aux->dentry,
+                           vdpa_aux, &identity_fops);
+}
+
+static int config_show(struct seq_file *seq, void *v)
+{
+       struct pds_vdpa_device *pdsv = seq->private;
+       struct virtio_net_config vc;
+       u64 driver_features;
+       u8 status;
+
+       memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device,
+                     sizeof(struct virtio_net_config));
+
+       seq_printf(seq, "mac:                  %pM\n", vc.mac);
+       seq_printf(seq, "max_virtqueue_pairs:  %d\n",
+                  __virtio16_to_cpu(true, vc.max_virtqueue_pairs));
+       seq_printf(seq, "mtu:                  %d\n", __virtio16_to_cpu(true, vc.mtu));
+       seq_printf(seq, "speed:                %d\n", le32_to_cpu(vc.speed));
+       seq_printf(seq, "duplex:               %d\n", vc.duplex);
+       seq_printf(seq, "rss_max_key_size:     %d\n", vc.rss_max_key_size);
+       seq_printf(seq, "rss_max_indirection_table_length: %d\n",
+                  le16_to_cpu(vc.rss_max_indirection_table_length));
+       seq_printf(seq, "supported_hash_types: %#x\n",
+                  le32_to_cpu(vc.supported_hash_types));
+       seq_printf(seq, "vn_status:            %#x\n",
+                  __virtio16_to_cpu(true, vc.status));
+
+       status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
+       seq_printf(seq, "dev_status:           %#x\n", status);
+       print_status_bits(seq, status);
+
+       seq_printf(seq, "req_features:         %#llx\n", pdsv->req_features);
+       print_feature_bits_all(seq, pdsv->req_features);
+       driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
+       seq_printf(seq, "driver_features:      %#llx\n", driver_features);
+       print_feature_bits_all(seq, driver_features);
+       seq_printf(seq, "vdpa_index:           %d\n", pdsv->vdpa_index);
+       seq_printf(seq, "num_vqs:              %d\n", pdsv->num_vqs);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(config);
+
+static int vq_show(struct seq_file *seq, void *v)
+{
+       struct pds_vdpa_vq_info *vq = seq->private;
+
+       seq_printf(seq, "ready:      %d\n", vq->ready);
+       seq_printf(seq, "desc_addr:  %#llx\n", vq->desc_addr);
+       seq_printf(seq, "avail_addr: %#llx\n", vq->avail_addr);
+       seq_printf(seq, "used_addr:  %#llx\n", vq->used_addr);
+       seq_printf(seq, "q_len:      %d\n", vq->q_len);
+       seq_printf(seq, "qid:        %d\n", vq->qid);
+
+       seq_printf(seq, "doorbell:   %#llx\n", vq->doorbell);
+       seq_printf(seq, "avail_idx:  %d\n", vq->avail_idx);
+       seq_printf(seq, "used_idx:   %d\n", vq->used_idx);
+       seq_printf(seq, "irq:        %d\n", vq->irq);
+       seq_printf(seq, "irq-name:   %s\n", vq->irq_name);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(vq);
+
+void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux)
+{
+       int i;
+
+       debugfs_create_file("config", 0400, vdpa_aux->dentry, vdpa_aux->pdsv, &config_fops);
+
+       for (i = 0; i < vdpa_aux->pdsv->num_vqs; i++) {
+               char name[8];
+
+               snprintf(name, sizeof(name), "vq%02d", i);
+               debugfs_create_file(name, 0400, vdpa_aux->dentry,
+                                   &vdpa_aux->pdsv->vqs[i], &vq_fops);
+       }
+}
+
+void pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_aux *vdpa_aux)
+{
+       debugfs_remove_recursive(vdpa_aux->dentry);
+       vdpa_aux->dentry = NULL;
+}
+
+void pds_vdpa_debugfs_reset_vdpadev(struct pds_vdpa_aux *vdpa_aux)
+{
+       /* we don't keep track of the entries, so remove it all
+        * then rebuild the basics
+        */
+       pds_vdpa_debugfs_del_vdpadev(vdpa_aux);
+       pds_vdpa_debugfs_add_pcidev(vdpa_aux);
+       pds_vdpa_debugfs_add_ident(vdpa_aux);
+}
diff --git a/drivers/vdpa/pds/debugfs.h b/drivers/vdpa/pds/debugfs.h
new file mode 100644 (file)
index 0000000..c088a4e
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDS_VDPA_DEBUGFS_H_
+#define _PDS_VDPA_DEBUGFS_H_
+
+#include <linux/debugfs.h>
+
+void pds_vdpa_debugfs_create(void);
+void pds_vdpa_debugfs_destroy(void);
+void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_aux *vdpa_aux);
+void pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux);
+void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux);
+void pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_aux *vdpa_aux);
+void pds_vdpa_debugfs_reset_vdpadev(struct pds_vdpa_aux *vdpa_aux);
+
+#endif /* _PDS_VDPA_DEBUGFS_H_ */
diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c
new file mode 100644 (file)
index 0000000..5071a4d
--- /dev/null
@@ -0,0 +1,769 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+#include <uapi/linux/vdpa.h>
+#include <linux/virtio_pci_modern.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+#include "vdpa_dev.h"
+#include "aux_drv.h"
+#include "cmds.h"
+#include "debugfs.h"
+
+static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev);
+
+static struct pds_vdpa_device *vdpa_to_pdsv(struct vdpa_device *vdpa_dev)
+{
+       return container_of(vdpa_dev, struct pds_vdpa_device, vdpa_dev);
+}
+
+static int pds_vdpa_notify_handler(struct notifier_block *nb,
+                                  unsigned long ecode,
+                                  void *data)
+{
+       struct pds_vdpa_device *pdsv = container_of(nb, struct pds_vdpa_device, nb);
+       struct device *dev = &pdsv->vdpa_aux->padev->aux_dev.dev;
+
+       dev_dbg(dev, "%s: event code %lu\n", __func__, ecode);
+
+       if (ecode == PDS_EVENT_RESET || ecode == PDS_EVENT_LINK_CHANGE) {
+               if (pdsv->config_cb.callback)
+                       pdsv->config_cb.callback(pdsv->config_cb.private);
+       }
+
+       return 0;
+}
+
+static int pds_vdpa_register_event_handler(struct pds_vdpa_device *pdsv)
+{
+       struct device *dev = &pdsv->vdpa_aux->padev->aux_dev.dev;
+       struct notifier_block *nb = &pdsv->nb;
+       int err;
+
+       if (!nb->notifier_call) {
+               nb->notifier_call = pds_vdpa_notify_handler;
+               err = pdsc_register_notify(nb);
+               if (err) {
+                       nb->notifier_call = NULL;
+                       dev_err(dev, "failed to register pds event handler: %ps\n",
+                               ERR_PTR(err));
+                       return -EINVAL;
+               }
+               dev_dbg(dev, "pds event handler registered\n");
+       }
+
+       return 0;
+}
+
+static void pds_vdpa_unregister_event_handler(struct pds_vdpa_device *pdsv)
+{
+       if (pdsv->nb.notifier_call) {
+               pdsc_unregister_notify(&pdsv->nb);
+               pdsv->nb.notifier_call = NULL;
+       }
+}
+
+static int pds_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid,
+                                  u64 desc_addr, u64 driver_addr, u64 device_addr)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       pdsv->vqs[qid].desc_addr = desc_addr;
+       pdsv->vqs[qid].avail_addr = driver_addr;
+       pdsv->vqs[qid].used_addr = device_addr;
+
+       return 0;
+}
+
+static void pds_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, u32 num)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       pdsv->vqs[qid].q_len = num;
+}
+
+static void pds_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       iowrite16(qid, pdsv->vqs[qid].notify);
+}
+
+static void pds_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
+                              struct vdpa_callback *cb)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       pdsv->vqs[qid].event_cb = *cb;
+}
+
+static irqreturn_t pds_vdpa_isr(int irq, void *data)
+{
+       struct pds_vdpa_vq_info *vq;
+
+       vq = data;
+       if (vq->event_cb.callback)
+               vq->event_cb.callback(vq->event_cb.private);
+
+       return IRQ_HANDLED;
+}
+
+static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid)
+{
+       if (pdsv->vqs[qid].irq == VIRTIO_MSI_NO_VECTOR)
+               return;
+
+       free_irq(pdsv->vqs[qid].irq, &pdsv->vqs[qid]);
+       pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR;
+}
+
+static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+       struct device *dev = &pdsv->vdpa_dev.dev;
+       u64 driver_features;
+       u16 invert_idx = 0;
+       int irq;
+       int err;
+
+       dev_dbg(dev, "%s: qid %d ready %d => %d\n",
+               __func__, qid, pdsv->vqs[qid].ready, ready);
+       if (ready == pdsv->vqs[qid].ready)
+               return;
+
+       driver_features = pds_vdpa_get_driver_features(vdpa_dev);
+       if (driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
+               invert_idx = PDS_VDPA_PACKED_INVERT_IDX;
+
+       if (ready) {
+               irq = pci_irq_vector(pdev, qid);
+               snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
+                        "vdpa-%s-%d", dev_name(dev), qid);
+
+               err = request_irq(irq, pds_vdpa_isr, 0,
+                                 pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]);
+               if (err) {
+                       dev_err(dev, "%s: no irq for qid %d: %pe\n",
+                               __func__, qid, ERR_PTR(err));
+                       return;
+               }
+               pdsv->vqs[qid].irq = irq;
+
+               /* Pass vq setup info to DSC using adminq to gather up and
+                * send all info at once so FW can do its full set up in
+                * one easy operation
+                */
+               err = pds_vdpa_cmd_init_vq(pdsv, qid, invert_idx, &pdsv->vqs[qid]);
+               if (err) {
+                       dev_err(dev, "Failed to init vq %d: %pe\n",
+                               qid, ERR_PTR(err));
+                       pds_vdpa_release_irq(pdsv, qid);
+                       ready = false;
+               }
+       } else {
+               err = pds_vdpa_cmd_reset_vq(pdsv, qid, invert_idx, &pdsv->vqs[qid]);
+               if (err)
+                       dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
+                               __func__, qid, ERR_PTR(err));
+               pds_vdpa_release_irq(pdsv, qid);
+       }
+
+       pdsv->vqs[qid].ready = ready;
+}
+
+static bool pds_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       return pdsv->vqs[qid].ready;
+}
+
+static int pds_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
+                                const struct vdpa_vq_state *state)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       u64 driver_features;
+       u16 avail;
+       u16 used;
+
+       if (pdsv->vqs[qid].ready) {
+               dev_err(dev, "Setting device position is denied while vq is enabled\n");
+               return -EINVAL;
+       }
+
+       driver_features = pds_vdpa_get_driver_features(vdpa_dev);
+       if (driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
+               avail = state->packed.last_avail_idx |
+                       (state->packed.last_avail_counter << 15);
+               used = state->packed.last_used_idx |
+                      (state->packed.last_used_counter << 15);
+
+               /* The avail and used index are stored with the packed wrap
+                * counter bit inverted.  This way, in case set_vq_state is
+                * not called, the initial value can be set to zero prior to
+                * feature negotiation, and it is good for both packed and
+                * split vq.
+                */
+               avail ^= PDS_VDPA_PACKED_INVERT_IDX;
+               used ^= PDS_VDPA_PACKED_INVERT_IDX;
+       } else {
+               avail = state->split.avail_index;
+               /* state->split does not provide a used_index:
+                * the vq will be set to "empty" here, and the vq will read
+                * the current used index the next time the vq is kicked.
+                */
+               used = avail;
+       }
+
+       if (used != avail) {
+               dev_dbg(dev, "Setting used equal to avail, for interoperability\n");
+               used = avail;
+       }
+
+       pdsv->vqs[qid].avail_idx = avail;
+       pdsv->vqs[qid].used_idx = used;
+
+       return 0;
+}
+
+static int pds_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
+                                struct vdpa_vq_state *state)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct pds_auxiliary_dev *padev = pdsv->vdpa_aux->padev;
+       struct device *dev = &padev->aux_dev.dev;
+       u64 driver_features;
+       u16 avail;
+       u16 used;
+
+       if (pdsv->vqs[qid].ready) {
+               dev_err(dev, "Getting device position is denied while vq is enabled\n");
+               return -EINVAL;
+       }
+
+       avail = pdsv->vqs[qid].avail_idx;
+       used = pdsv->vqs[qid].used_idx;
+
+       driver_features = pds_vdpa_get_driver_features(vdpa_dev);
+       if (driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
+               avail ^= PDS_VDPA_PACKED_INVERT_IDX;
+               used ^= PDS_VDPA_PACKED_INVERT_IDX;
+
+               state->packed.last_avail_idx = avail & 0x7fff;
+               state->packed.last_avail_counter = avail >> 15;
+               state->packed.last_used_idx = used & 0x7fff;
+               state->packed.last_used_counter = used >> 15;
+       } else {
+               state->split.avail_index = avail;
+               /* state->split does not provide a used_index. */
+       }
+
+       return 0;
+}
+
+static struct vdpa_notification_area
+pds_vdpa_get_vq_notification(struct vdpa_device *vdpa_dev, u16 qid)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct virtio_pci_modern_device *vd_mdev;
+       struct vdpa_notification_area area;
+
+       area.addr = pdsv->vqs[qid].notify_pa;
+
+       vd_mdev = &pdsv->vdpa_aux->vd_mdev;
+       if (!vd_mdev->notify_offset_multiplier)
+               area.size = PDS_PAGE_SIZE;
+       else
+               area.size = vd_mdev->notify_offset_multiplier;
+
+       return area;
+}
+
+static int pds_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, u16 qid)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       return pdsv->vqs[qid].irq;
+}
+
+static u32 pds_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
+{
+       return PDS_PAGE_SIZE;
+}
+
+static u32 pds_vdpa_get_vq_group(struct vdpa_device *vdpa_dev, u16 idx)
+{
+       return 0;
+}
+
+static u64 pds_vdpa_get_device_features(struct vdpa_device *vdpa_dev)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       return pdsv->supported_features;
+}
+
+static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 features)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct device *dev = &pdsv->vdpa_dev.dev;
+       u64 driver_features;
+       u64 nego_features;
+       u64 missing;
+
+       if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
+               dev_err(dev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
+               return -EOPNOTSUPP;
+       }
+
+       pdsv->req_features = features;
+
+       /* Check for valid feature bits */
+       nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
+       missing = pdsv->req_features & ~nego_features;
+       if (missing) {
+               dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n",
+                       pdsv->req_features, missing);
+               return -EOPNOTSUPP;
+       }
+
+       driver_features = pds_vdpa_get_driver_features(vdpa_dev);
+       dev_dbg(dev, "%s: %#llx => %#llx\n",
+               __func__, driver_features, nego_features);
+
+       if (driver_features == nego_features)
+               return 0;
+
+       vp_modern_set_features(&pdsv->vdpa_aux->vd_mdev, nego_features);
+
+       return 0;
+}
+
+static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
+}
+
+static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
+                                  struct vdpa_callback *cb)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       pdsv->config_cb.callback = cb->callback;
+       pdsv->config_cb.private = cb->private;
+}
+
+static u16 pds_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       /* qemu has assert() that vq_num_max <= VIRTQUEUE_MAX_SIZE (1024) */
+       return min_t(u16, 1024, BIT(le16_to_cpu(pdsv->vdpa_aux->ident.max_qlen)));
+}
+
+static u32 pds_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
+{
+       return VIRTIO_ID_NET;
+}
+
+static u32 pds_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
+{
+       return PCI_VENDOR_ID_PENSANDO;
+}
+
+static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+
+       return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
+}
+
+static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct device *dev = &pdsv->vdpa_dev.dev;
+       u8 old_status;
+       int i;
+
+       old_status = pds_vdpa_get_status(vdpa_dev);
+       dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status);
+
+       pds_vdpa_cmd_set_status(pdsv, status);
+
+       /* Note: still working with FW on the need for this reset cmd */
+       if (status == 0) {
+               pds_vdpa_cmd_reset(pdsv);
+
+               for (i = 0; i < pdsv->num_vqs; i++) {
+                       pdsv->vqs[i].avail_idx = 0;
+                       pdsv->vqs[i].used_idx = 0;
+               }
+       }
+
+       if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) {
+               for (i = 0; i < pdsv->num_vqs; i++) {
+                       pdsv->vqs[i].notify =
+                               vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
+                                                       i, &pdsv->vqs[i].notify_pa);
+               }
+       }
+}
+
+static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct device *dev;
+       int err = 0;
+       u8 status;
+       int i;
+
+       dev = &pdsv->vdpa_aux->padev->aux_dev.dev;
+       status = pds_vdpa_get_status(vdpa_dev);
+
+       if (status == 0)
+               return 0;
+
+       if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+               /* Reset the vqs */
+               for (i = 0; i < pdsv->num_vqs && !err; i++) {
+                       err = pds_vdpa_cmd_reset_vq(pdsv, i, 0, &pdsv->vqs[i]);
+                       if (err)
+                               dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
+                                       __func__, i, ERR_PTR(err));
+                       pds_vdpa_release_irq(pdsv, i);
+                       memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0]));
+                       pdsv->vqs[i].ready = false;
+               }
+       }
+
+       pds_vdpa_set_status(vdpa_dev, 0);
+
+       return 0;
+}
+
+static size_t pds_vdpa_get_config_size(struct vdpa_device *vdpa_dev)
+{
+       return sizeof(struct virtio_net_config);
+}
+
+static void pds_vdpa_get_config(struct vdpa_device *vdpa_dev,
+                               unsigned int offset,
+                               void *buf, unsigned int len)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       void __iomem *device;
+
+       if (offset + len > sizeof(struct virtio_net_config)) {
+               WARN(true, "%s: bad read, offset %d len %d\n", __func__, offset, len);
+               return;
+       }
+
+       device = pdsv->vdpa_aux->vd_mdev.device;
+       memcpy_fromio(buf, device + offset, len);
+}
+
+static void pds_vdpa_set_config(struct vdpa_device *vdpa_dev,
+                               unsigned int offset, const void *buf,
+                               unsigned int len)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       void __iomem *device;
+
+       if (offset + len > sizeof(struct virtio_net_config)) {
+               WARN(true, "%s: bad read, offset %d len %d\n", __func__, offset, len);
+               return;
+       }
+
+       device = pdsv->vdpa_aux->vd_mdev.device;
+       memcpy_toio(device + offset, buf, len);
+}
+
+static const struct vdpa_config_ops pds_vdpa_ops = {
+       .set_vq_address         = pds_vdpa_set_vq_address,
+       .set_vq_num             = pds_vdpa_set_vq_num,
+       .kick_vq                = pds_vdpa_kick_vq,
+       .set_vq_cb              = pds_vdpa_set_vq_cb,
+       .set_vq_ready           = pds_vdpa_set_vq_ready,
+       .get_vq_ready           = pds_vdpa_get_vq_ready,
+       .set_vq_state           = pds_vdpa_set_vq_state,
+       .get_vq_state           = pds_vdpa_get_vq_state,
+       .get_vq_notification    = pds_vdpa_get_vq_notification,
+       .get_vq_irq             = pds_vdpa_get_vq_irq,
+       .get_vq_align           = pds_vdpa_get_vq_align,
+       .get_vq_group           = pds_vdpa_get_vq_group,
+
+       .get_device_features    = pds_vdpa_get_device_features,
+       .set_driver_features    = pds_vdpa_set_driver_features,
+       .get_driver_features    = pds_vdpa_get_driver_features,
+       .set_config_cb          = pds_vdpa_set_config_cb,
+       .get_vq_num_max         = pds_vdpa_get_vq_num_max,
+       .get_device_id          = pds_vdpa_get_device_id,
+       .get_vendor_id          = pds_vdpa_get_vendor_id,
+       .get_status             = pds_vdpa_get_status,
+       .set_status             = pds_vdpa_set_status,
+       .reset                  = pds_vdpa_reset,
+       .get_config_size        = pds_vdpa_get_config_size,
+       .get_config             = pds_vdpa_get_config,
+       .set_config             = pds_vdpa_set_config,
+};
+static struct virtio_device_id pds_vdpa_id_table[] = {
+       {VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID},
+       {0},
+};
+
+static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+                           const struct vdpa_dev_set_config *add_config)
+{
+       struct pds_vdpa_aux *vdpa_aux;
+       struct pds_vdpa_device *pdsv;
+       struct vdpa_mgmt_dev *mgmt;
+       u16 fw_max_vqs, vq_pairs;
+       struct device *dma_dev;
+       struct pci_dev *pdev;
+       struct device *dev;
+       u8 mac[ETH_ALEN];
+       int err;
+       int i;
+
+       vdpa_aux = container_of(mdev, struct pds_vdpa_aux, vdpa_mdev);
+       dev = &vdpa_aux->padev->aux_dev.dev;
+       mgmt = &vdpa_aux->vdpa_mdev;
+
+       if (vdpa_aux->pdsv) {
+               dev_warn(dev, "Multiple vDPA devices on a VF is not supported.\n");
+               return -EOPNOTSUPP;
+       }
+
+       pdsv = vdpa_alloc_device(struct pds_vdpa_device, vdpa_dev,
+                                dev, &pds_vdpa_ops, 1, 1, name, false);
+       if (IS_ERR(pdsv)) {
+               dev_err(dev, "Failed to allocate vDPA structure: %pe\n", pdsv);
+               return PTR_ERR(pdsv);
+       }
+
+       vdpa_aux->pdsv = pdsv;
+       pdsv->vdpa_aux = vdpa_aux;
+
+       pdev = vdpa_aux->padev->vf_pdev;
+       dma_dev = &pdev->dev;
+       pdsv->vdpa_dev.dma_dev = dma_dev;
+
+       pdsv->supported_features = mgmt->supported_features;
+
+       if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
+               u64 unsupp_features =
+                       add_config->device_features & ~mgmt->supported_features;
+
+               if (unsupp_features) {
+                       dev_err(dev, "Unsupported features: %#llx\n", unsupp_features);
+                       err = -EOPNOTSUPP;
+                       goto err_unmap;
+               }
+
+               pdsv->supported_features = add_config->device_features;
+       }
+
+       err = pds_vdpa_cmd_reset(pdsv);
+       if (err) {
+               dev_err(dev, "Failed to reset hw: %pe\n", ERR_PTR(err));
+               goto err_unmap;
+       }
+
+       err = pds_vdpa_init_hw(pdsv);
+       if (err) {
+               dev_err(dev, "Failed to init hw: %pe\n", ERR_PTR(err));
+               goto err_unmap;
+       }
+
+       fw_max_vqs = le16_to_cpu(pdsv->vdpa_aux->ident.max_vqs);
+       vq_pairs = fw_max_vqs / 2;
+
+       /* Make sure we have the queues being requested */
+       if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
+               vq_pairs = add_config->net.max_vq_pairs;
+
+       pdsv->num_vqs = 2 * vq_pairs;
+       if (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
+               pdsv->num_vqs++;
+
+       if (pdsv->num_vqs > fw_max_vqs) {
+               dev_err(dev, "%s: queue count requested %u greater than max %u\n",
+                       __func__, pdsv->num_vqs, fw_max_vqs);
+               err = -ENOSPC;
+               goto err_unmap;
+       }
+
+       if (pdsv->num_vqs != fw_max_vqs) {
+               err = pds_vdpa_cmd_set_max_vq_pairs(pdsv, vq_pairs);
+               if (err) {
+                       dev_err(dev, "Failed to set max_vq_pairs: %pe\n",
+                               ERR_PTR(err));
+                       goto err_unmap;
+               }
+       }
+
+       /* Set a mac, either from the user config if provided
+        * or set a random mac if default is 00:..:00
+        */
+       if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
+               ether_addr_copy(mac, add_config->net.mac);
+               pds_vdpa_cmd_set_mac(pdsv, mac);
+       } else {
+               struct virtio_net_config __iomem *vc;
+
+               vc = pdsv->vdpa_aux->vd_mdev.device;
+               memcpy_fromio(mac, vc->mac, sizeof(mac));
+               if (is_zero_ether_addr(mac)) {
+                       eth_random_addr(mac);
+                       dev_info(dev, "setting random mac %pM\n", mac);
+                       pds_vdpa_cmd_set_mac(pdsv, mac);
+               }
+       }
+
+       for (i = 0; i < pdsv->num_vqs; i++) {
+               pdsv->vqs[i].qid = i;
+               pdsv->vqs[i].pdsv = pdsv;
+               pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR;
+               pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
+                                                             i, &pdsv->vqs[i].notify_pa);
+       }
+
+       pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev;
+
+       err = pds_vdpa_register_event_handler(pdsv);
+       if (err) {
+               dev_err(dev, "Failed to register for PDS events: %pe\n", ERR_PTR(err));
+               goto err_unmap;
+       }
+
+       /* We use the _vdpa_register_device() call rather than the
+        * vdpa_register_device() to avoid a deadlock because our
+        * dev_add() is called with the vdpa_dev_lock already set
+        * by vdpa_nl_cmd_dev_add_set_doit()
+        */
+       err = _vdpa_register_device(&pdsv->vdpa_dev, pdsv->num_vqs);
+       if (err) {
+               dev_err(dev, "Failed to register to vDPA bus: %pe\n", ERR_PTR(err));
+               goto err_unevent;
+       }
+
+       pds_vdpa_debugfs_add_vdpadev(vdpa_aux);
+
+       return 0;
+
+err_unevent:
+       pds_vdpa_unregister_event_handler(pdsv);
+err_unmap:
+       put_device(&pdsv->vdpa_dev.dev);
+       vdpa_aux->pdsv = NULL;
+       return err;
+}
+
+static void pds_vdpa_dev_del(struct vdpa_mgmt_dev *mdev,
+                            struct vdpa_device *vdpa_dev)
+{
+       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
+       struct pds_vdpa_aux *vdpa_aux;
+
+       pds_vdpa_unregister_event_handler(pdsv);
+
+       vdpa_aux = container_of(mdev, struct pds_vdpa_aux, vdpa_mdev);
+       _vdpa_unregister_device(vdpa_dev);
+
+       pds_vdpa_cmd_reset(vdpa_aux->pdsv);
+       pds_vdpa_debugfs_reset_vdpadev(vdpa_aux);
+
+       vdpa_aux->pdsv = NULL;
+
+       dev_info(&vdpa_aux->padev->aux_dev.dev, "Removed vdpa device\n");
+}
+
+static const struct vdpa_mgmtdev_ops pds_vdpa_mgmt_dev_ops = {
+       .dev_add = pds_vdpa_dev_add,
+       .dev_del = pds_vdpa_dev_del
+};
+
+int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux)
+{
+       union pds_core_adminq_cmd cmd = {
+               .vdpa_ident.opcode = PDS_VDPA_CMD_IDENT,
+               .vdpa_ident.vf_id = cpu_to_le16(vdpa_aux->vf_id),
+       };
+       union pds_core_adminq_comp comp = {};
+       struct vdpa_mgmt_dev *mgmt;
+       struct pci_dev *pf_pdev;
+       struct device *pf_dev;
+       struct pci_dev *pdev;
+       dma_addr_t ident_pa;
+       struct device *dev;
+       u16 dev_intrs;
+       u16 max_vqs;
+       int err;
+
+       dev = &vdpa_aux->padev->aux_dev.dev;
+       pdev = vdpa_aux->padev->vf_pdev;
+       mgmt = &vdpa_aux->vdpa_mdev;
+
+       /* Get resource info through the PF's adminq.  It is a block of info,
+        * so we need to map some memory for PF to make available to the
+        * firmware for writing the data.
+        */
+       pf_pdev = pci_physfn(vdpa_aux->padev->vf_pdev);
+       pf_dev = &pf_pdev->dev;
+       ident_pa = dma_map_single(pf_dev, &vdpa_aux->ident,
+                                 sizeof(vdpa_aux->ident), DMA_FROM_DEVICE);
+       if (dma_mapping_error(pf_dev, ident_pa)) {
+               dev_err(dev, "Failed to map ident space\n");
+               return -ENOMEM;
+       }
+
+       cmd.vdpa_ident.ident_pa = cpu_to_le64(ident_pa);
+       cmd.vdpa_ident.len = cpu_to_le32(sizeof(vdpa_aux->ident));
+       err = pds_client_adminq_cmd(vdpa_aux->padev, &cmd,
+                                   sizeof(cmd.vdpa_ident), &comp, 0);
+       dma_unmap_single(pf_dev, ident_pa,
+                        sizeof(vdpa_aux->ident), DMA_FROM_DEVICE);
+       if (err) {
+               dev_err(dev, "Failed to ident hw, status %d: %pe\n",
+                       comp.status, ERR_PTR(err));
+               return err;
+       }
+
+       max_vqs = le16_to_cpu(vdpa_aux->ident.max_vqs);
+       dev_intrs = pci_msix_vec_count(pdev);
+       dev_dbg(dev, "ident.max_vqs %d dev_intrs %d\n", max_vqs, dev_intrs);
+
+       max_vqs = min_t(u16, dev_intrs, max_vqs);
+       mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs);
+       vdpa_aux->nintrs = mgmt->max_supported_vqs;
+
+       mgmt->ops = &pds_vdpa_mgmt_dev_ops;
+       mgmt->id_table = pds_vdpa_id_table;
+       mgmt->device = dev;
+       mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+       mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
+       mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
+       mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
+
+       err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs,
+                                   PCI_IRQ_MSIX);
+       if (err < 0) {
+               dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
+                       vdpa_aux->nintrs, ERR_PTR(err));
+               return err;
+       }
+       vdpa_aux->nintrs = err;
+
+       return 0;
+}
diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h
new file mode 100644 (file)
index 0000000..a1bc37d
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _VDPA_DEV_H_
+#define _VDPA_DEV_H_
+
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+
+struct pds_vdpa_vq_info {
+       bool ready;
+       u64 desc_addr;
+       u64 avail_addr;
+       u64 used_addr;
+       u32 q_len;
+       u16 qid;
+       int irq;
+       char irq_name[32];
+
+       void __iomem *notify;
+       dma_addr_t notify_pa;
+
+       u64 doorbell;
+       u16 avail_idx;
+       u16 used_idx;
+
+       struct vdpa_callback event_cb;
+       struct pds_vdpa_device *pdsv;
+};
+
+#define PDS_VDPA_MAX_QUEUES    65
+#define PDS_VDPA_MAX_QLEN      32768
+struct pds_vdpa_device {
+       struct vdpa_device vdpa_dev;
+       struct pds_vdpa_aux *vdpa_aux;
+
+       struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES];
+       u64 supported_features;         /* specified device features */
+       u64 req_features;               /* features requested by vdpa */
+       u8 vdpa_index;                  /* rsvd for future subdevice use */
+       u8 num_vqs;                     /* num vqs in use */
+       struct vdpa_callback config_cb;
+       struct notifier_block nb;
+};
+
+#define PDS_VDPA_PACKED_INVERT_IDX     0x8000
+
+int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux);
+#endif /* _VDPA_DEV_H_ */
index 3858738643b40750a0a29d7ba386ab0a2efe2e04..3cef2571d15d3dd62354df713e6321c9cb0f761f 100644 (file)
@@ -16,6 +16,7 @@ enum snet_ctrl_opcodes {
        SNET_CTRL_OP_DESTROY = 1,
        SNET_CTRL_OP_READ_VQ_STATE,
        SNET_CTRL_OP_SUSPEND,
+       SNET_CTRL_OP_RESUME,
 };
 
 #define SNET_CTRL_TIMEOUT              2000000
@@ -328,3 +329,8 @@ int snet_suspend_dev(struct snet *snet)
 {
        return snet_send_ctrl_msg(snet, SNET_CTRL_OP_SUSPEND, 0);
 }
+
+int snet_resume_dev(struct snet *snet)
+{
+       return snet_send_ctrl_msg(snet, SNET_CTRL_OP_RESUME, 0);
+}
index 42c87387a0f1a7983a6278719b89b11170fa8e09..af531a3390824f00b17fd531ecf43ec4ec7e2df0 100644 (file)
@@ -159,7 +159,7 @@ static const struct hwmon_ops snet_hwmon_ops = {
        .read_string = snet_hwmon_read_string
 };
 
-static const struct hwmon_channel_info *snet_hwmon_info[] = {
+static const struct hwmon_channel_info * const snet_hwmon_info[] = {
        HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | HWMON_T_LABEL,
                           HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL),
        HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL),
index cdcd84ce4f5a95b1037fd1b0de900c7876540766..99428a04068d2d4f1c3507455a63dd522187b745 100644 (file)
@@ -509,6 +509,20 @@ static int snet_suspend(struct vdpa_device *vdev)
        return ret;
 }
 
+static int snet_resume(struct vdpa_device *vdev)
+{
+       struct snet *snet = vdpa_to_snet(vdev);
+       int ret;
+
+       ret = snet_resume_dev(snet);
+       if (ret)
+               SNET_ERR(snet->pdev, "SNET[%u] resume failed, err: %d\n", snet->sid, ret);
+       else
+               SNET_DBG(snet->pdev, "Resume SNET[%u] device\n", snet->sid);
+
+       return ret;
+}
+
 static const struct vdpa_config_ops snet_config_ops = {
        .set_vq_address         = snet_set_vq_address,
        .set_vq_num             = snet_set_vq_num,
@@ -536,6 +550,7 @@ static const struct vdpa_config_ops snet_config_ops = {
        .get_config             = snet_get_config,
        .set_config             = snet_set_config,
        .suspend                = snet_suspend,
+       .resume                 = snet_resume,
 };
 
 static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
index 3c78d4e7d4857027870568d1705e4c0e626cfea8..36ac285835ea85790735d867dcec1b774b6a01d0 100644 (file)
@@ -204,5 +204,6 @@ void snet_ctrl_clear(struct snet *snet);
 int snet_destroy_dev(struct snet *snet);
 int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state);
 int snet_suspend_dev(struct snet *snet);
+int snet_resume_dev(struct snet *snet);
 
 #endif //_SNET_VDPA_H_
index 4619b4a520efe5df27979dd0e1142ee9e3ae9330..dc38ed21319da92201b94708775507e0c7ce75be 100644 (file)
@@ -726,7 +726,11 @@ static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
 {
        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 
-       cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+       if (cpu_mask)
+               cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+       else
+               cpumask_setall(&dev->vqs[idx]->irq_affinity);
+
        return 0;
 }
 
index ae2273196b0c902c472b3f5cf45249cb6cae37bf..f2ed7167c848096ef8c8f338325bedc278df65fe 100644 (file)
@@ -546,7 +546,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
        endtime = busy_clock() + busyloop_timeout;
 
        while (vhost_can_busy_poll(endtime)) {
-               if (vhost_has_work(&net->dev)) {
+               if (vhost_vq_has_work(vq)) {
                        *busyloop_intr = true;
                        break;
                }
@@ -1347,8 +1347,10 @@ static int vhost_net_open(struct inode *inode, struct file *f)
                       VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
                       NULL);
 
-       vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
-       vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
+       vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev,
+                       vqs[VHOST_NET_VQ_TX]);
+       vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev,
+                       vqs[VHOST_NET_VQ_RX]);
 
        f->private_data = n;
        n->page_frag.page = NULL;
index bb10fa4bb4f6eca2107d66eb639cefc6f532d07f..c83f7f043470d687b32d92f6dfe864b1e93964c3 100644 (file)
@@ -167,6 +167,7 @@ MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi d
 
 struct vhost_scsi_virtqueue {
        struct vhost_virtqueue vq;
+       struct vhost_scsi *vs;
        /*
         * Reference counting for inflight reqs, used for flush operation. At
         * each time, one reference tracks new commands submitted, while we
@@ -181,6 +182,9 @@ struct vhost_scsi_virtqueue {
        struct vhost_scsi_cmd *scsi_cmds;
        struct sbitmap scsi_tags;
        int max_cmds;
+
+       struct vhost_work completion_work;
+       struct llist_head completion_list;
 };
 
 struct vhost_scsi {
@@ -190,12 +194,8 @@ struct vhost_scsi {
 
        struct vhost_dev dev;
        struct vhost_scsi_virtqueue *vqs;
-       unsigned long *compl_bitmap;
        struct vhost_scsi_inflight **old_inflight;
 
-       struct vhost_work vs_completion_work; /* cmd completion work item */
-       struct llist_head vs_completion_list; /* cmd completion queue */
-
        struct vhost_work vs_event_work; /* evt injection work item */
        struct llist_head vs_event_list; /* evt injection queue */
 
@@ -353,15 +353,17 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
        if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
                struct vhost_scsi_tmf *tmf = container_of(se_cmd,
                                        struct vhost_scsi_tmf, se_cmd);
+               struct vhost_virtqueue *vq = &tmf->svq->vq;
 
-               vhost_work_queue(&tmf->vhost->dev, &tmf->vwork);
+               vhost_vq_work_queue(vq, &tmf->vwork);
        } else {
                struct vhost_scsi_cmd *cmd = container_of(se_cmd,
                                        struct vhost_scsi_cmd, tvc_se_cmd);
-               struct vhost_scsi *vs = cmd->tvc_vhost;
+               struct vhost_scsi_virtqueue *svq =  container_of(cmd->tvc_vq,
+                                       struct vhost_scsi_virtqueue, vq);
 
-               llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list);
-               vhost_work_queue(&vs->dev, &vs->vs_completion_work);
+               llist_add(&cmd->tvc_completion_list, &svq->completion_list);
+               vhost_vq_work_queue(&svq->vq, &svq->completion_work);
        }
 }
 
@@ -509,17 +511,17 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
  */
 static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 {
-       struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
-                                       vs_completion_work);
+       struct vhost_scsi_virtqueue *svq = container_of(work,
+                               struct vhost_scsi_virtqueue, completion_work);
        struct virtio_scsi_cmd_resp v_rsp;
        struct vhost_scsi_cmd *cmd, *t;
        struct llist_node *llnode;
        struct se_cmd *se_cmd;
        struct iov_iter iov_iter;
-       int ret, vq;
+       bool signal = false;
+       int ret;
 
-       bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
-       llnode = llist_del_all(&vs->vs_completion_list);
+       llnode = llist_del_all(&svq->completion_list);
        llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
                se_cmd = &cmd->tvc_se_cmd;
 
@@ -539,21 +541,17 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
                              cmd->tvc_in_iovs, sizeof(v_rsp));
                ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
                if (likely(ret == sizeof(v_rsp))) {
-                       struct vhost_scsi_virtqueue *q;
+                       signal = true;
+
                        vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
-                       q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
-                       vq = q - vs->vqs;
-                       __set_bit(vq, vs->compl_bitmap);
                } else
                        pr_err("Faulted on virtio_scsi_cmd_resp\n");
 
                vhost_scsi_release_cmd_res(se_cmd);
        }
 
-       vq = -1;
-       while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
-               < vs->dev.nvqs)
-               vhost_signal(&vs->dev, &vs->vqs[vq].vq);
+       if (signal)
+               vhost_signal(&svq->vs->dev, &svq->vq);
 }
 
 static struct vhost_scsi_cmd *
@@ -1135,12 +1133,27 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work)
 {
        struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf,
                                                  vwork);
-       int resp_code;
+       struct vhost_virtqueue *ctl_vq, *vq;
+       int resp_code, i;
+
+       if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) {
+               /*
+                * Flush IO vqs that don't share a worker with the ctl to make
+                * sure they have sent their responses before us.
+                */
+               ctl_vq = &tmf->vhost->vqs[VHOST_SCSI_VQ_CTL].vq;
+               for (i = VHOST_SCSI_VQ_IO; i < tmf->vhost->dev.nvqs; i++) {
+                       vq = &tmf->vhost->vqs[i].vq;
+
+                       if (vhost_vq_is_setup(vq) &&
+                           vq->worker != ctl_vq->worker)
+                               vhost_vq_flush(vq);
+               }
 
-       if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE)
                resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
-       else
+       } else {
                resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+       }
 
        vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs,
                                 tmf->vq_desc, &tmf->resp_iov, resp_code);
@@ -1335,11 +1348,9 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
 }
 
 static void
-vhost_scsi_send_evt(struct vhost_scsi *vs,
-                  struct vhost_scsi_tpg *tpg,
-                  struct se_lun *lun,
-                  u32 event,
-                  u32 reason)
+vhost_scsi_send_evt(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+                   struct vhost_scsi_tpg *tpg, struct se_lun *lun,
+                   u32 event, u32 reason)
 {
        struct vhost_scsi_evt *evt;
 
@@ -1361,7 +1372,7 @@ vhost_scsi_send_evt(struct vhost_scsi *vs,
        }
 
        llist_add(&evt->list, &vs->vs_event_list);
-       vhost_work_queue(&vs->dev, &vs->vs_event_work);
+       vhost_vq_work_queue(vq, &vs->vs_event_work);
 }
 
 static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
@@ -1375,7 +1386,8 @@ static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
                goto out;
 
        if (vs->vs_events_missed)
-               vhost_scsi_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
+               vhost_scsi_send_evt(vs, vq, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT,
+                                   0);
 out:
        mutex_unlock(&vq->mutex);
 }
@@ -1770,6 +1782,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
 
 static int vhost_scsi_open(struct inode *inode, struct file *f)
 {
+       struct vhost_scsi_virtqueue *svq;
        struct vhost_scsi *vs;
        struct vhost_virtqueue **vqs;
        int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
@@ -1788,10 +1801,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        }
        nvqs += VHOST_SCSI_VQ_IO;
 
-       vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
-       if (!vs->compl_bitmap)
-               goto err_compl_bitmap;
-
        vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
                                         GFP_KERNEL | __GFP_ZERO);
        if (!vs->old_inflight)
@@ -1806,7 +1815,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        if (!vqs)
                goto err_local_vqs;
 
-       vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
        vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
 
        vs->vs_events_nr = 0;
@@ -1817,8 +1825,14 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
        vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
        for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
-               vqs[i] = &vs->vqs[i].vq;
-               vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
+               svq = &vs->vqs[i];
+
+               vqs[i] = &svq->vq;
+               svq->vs = vs;
+               init_llist_head(&svq->completion_list);
+               vhost_work_init(&svq->completion_work,
+                               vhost_scsi_complete_cmd_work);
+               svq->vq.handle_kick = vhost_scsi_handle_kick;
        }
        vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
                       VHOST_SCSI_WEIGHT, 0, true, NULL);
@@ -1833,8 +1847,6 @@ err_local_vqs:
 err_vqs:
        kfree(vs->old_inflight);
 err_inflight:
-       bitmap_free(vs->compl_bitmap);
-err_compl_bitmap:
        kvfree(vs);
 err_vs:
        return r;
@@ -1854,7 +1866,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
        kfree(vs->dev.vqs);
        kfree(vs->vqs);
        kfree(vs->old_inflight);
-       bitmap_free(vs->compl_bitmap);
        kvfree(vs);
        return 0;
 }
@@ -1916,6 +1927,14 @@ vhost_scsi_ioctl(struct file *f,
                if (copy_from_user(&features, featurep, sizeof features))
                        return -EFAULT;
                return vhost_scsi_set_features(vs, features);
+       case VHOST_NEW_WORKER:
+       case VHOST_FREE_WORKER:
+       case VHOST_ATTACH_VRING_WORKER:
+       case VHOST_GET_VRING_WORKER:
+               mutex_lock(&vs->dev.mutex);
+               r = vhost_worker_ioctl(&vs->dev, ioctl, argp);
+               mutex_unlock(&vs->dev.mutex);
+               return r;
        default:
                mutex_lock(&vs->dev.mutex);
                r = vhost_dev_ioctl(&vs->dev, ioctl, argp);
@@ -1995,7 +2014,7 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg,
                goto unlock;
 
        if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
-               vhost_scsi_send_evt(vs, tpg, lun,
+               vhost_scsi_send_evt(vs, vq, tpg, lun,
                                   VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
 unlock:
        mutex_unlock(&vq->mutex);
index 60c9ebd629dd159d0ceb8a0c14f96091be193753..c71d573f1c9497c37e2da7693becd09f10bf5989 100644 (file)
@@ -187,13 +187,15 @@ EXPORT_SYMBOL_GPL(vhost_work_init);
 
 /* Init poll structure */
 void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
-                    __poll_t mask, struct vhost_dev *dev)
+                    __poll_t mask, struct vhost_dev *dev,
+                    struct vhost_virtqueue *vq)
 {
        init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
        init_poll_funcptr(&poll->table, vhost_poll_func);
        poll->mask = mask;
        poll->dev = dev;
        poll->wqh = NULL;
+       poll->vq = vq;
 
        vhost_work_init(&poll->work, fn);
 }
@@ -231,46 +233,102 @@ void vhost_poll_stop(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_stop);
 
-void vhost_dev_flush(struct vhost_dev *dev)
+static void vhost_worker_queue(struct vhost_worker *worker,
+                              struct vhost_work *work)
+{
+       if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
+               /* We can only add the work to the list after we're
+                * sure it was not in the list.
+                * test_and_set_bit() implies a memory barrier.
+                */
+               llist_add(&work->node, &worker->work_list);
+               vhost_task_wake(worker->vtsk);
+       }
+}
+
+bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
+{
+       struct vhost_worker *worker;
+       bool queued = false;
+
+       rcu_read_lock();
+       worker = rcu_dereference(vq->worker);
+       if (worker) {
+               queued = true;
+               vhost_worker_queue(worker, work);
+       }
+       rcu_read_unlock();
+
+       return queued;
+}
+EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
+
+void vhost_vq_flush(struct vhost_virtqueue *vq)
 {
        struct vhost_flush_struct flush;
 
-       if (dev->worker.vtsk) {
-               init_completion(&flush.wait_event);
-               vhost_work_init(&flush.work, vhost_flush_work);
+       init_completion(&flush.wait_event);
+       vhost_work_init(&flush.work, vhost_flush_work);
 
-               vhost_work_queue(dev, &flush.work);
+       if (vhost_vq_work_queue(vq, &flush.work))
                wait_for_completion(&flush.wait_event);
-       }
 }
-EXPORT_SYMBOL_GPL(vhost_dev_flush);
+EXPORT_SYMBOL_GPL(vhost_vq_flush);
 
-void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
+/**
+ * vhost_worker_flush - flush a worker
+ * @worker: worker to flush
+ *
+ * This does not use RCU to protect the worker, so the device or worker
+ * mutex must be held.
+ */
+static void vhost_worker_flush(struct vhost_worker *worker)
 {
-       if (!dev->worker.vtsk)
-               return;
+       struct vhost_flush_struct flush;
 
-       if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
-               /* We can only add the work to the list after we're
-                * sure it was not in the list.
-                * test_and_set_bit() implies a memory barrier.
-                */
-               llist_add(&work->node, &dev->worker.work_list);
-               vhost_task_wake(dev->worker.vtsk);
+       init_completion(&flush.wait_event);
+       vhost_work_init(&flush.work, vhost_flush_work);
+
+       vhost_worker_queue(worker, &flush.work);
+       wait_for_completion(&flush.wait_event);
+}
+
+void vhost_dev_flush(struct vhost_dev *dev)
+{
+       struct vhost_worker *worker;
+       unsigned long i;
+
+       xa_for_each(&dev->worker_xa, i, worker) {
+               mutex_lock(&worker->mutex);
+               if (!worker->attachment_cnt) {
+                       mutex_unlock(&worker->mutex);
+                       continue;
+               }
+               vhost_worker_flush(worker);
+               mutex_unlock(&worker->mutex);
        }
 }
-EXPORT_SYMBOL_GPL(vhost_work_queue);
+EXPORT_SYMBOL_GPL(vhost_dev_flush);
 
 /* A lockless hint for busy polling code to exit the loop */
-bool vhost_has_work(struct vhost_dev *dev)
+bool vhost_vq_has_work(struct vhost_virtqueue *vq)
 {
-       return !llist_empty(&dev->worker.work_list);
+       struct vhost_worker *worker;
+       bool has_work = false;
+
+       rcu_read_lock();
+       worker = rcu_dereference(vq->worker);
+       if (worker && !llist_empty(&worker->work_list))
+               has_work = true;
+       rcu_read_unlock();
+
+       return has_work;
 }
-EXPORT_SYMBOL_GPL(vhost_has_work);
+EXPORT_SYMBOL_GPL(vhost_vq_has_work);
 
 void vhost_poll_queue(struct vhost_poll *poll)
 {
-       vhost_work_queue(poll->dev, &poll->work);
+       vhost_vq_work_queue(poll->vq, &poll->work);
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
@@ -329,6 +387,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
        vq->busyloop_timeout = 0;
        vq->umem = NULL;
        vq->iotlb = NULL;
+       rcu_assign_pointer(vq->worker, NULL);
        vhost_vring_call_reset(&vq->call_ctx);
        __vhost_vq_meta_reset(vq);
 }
@@ -458,8 +517,6 @@ void vhost_dev_init(struct vhost_dev *dev,
        dev->umem = NULL;
        dev->iotlb = NULL;
        dev->mm = NULL;
-       memset(&dev->worker, 0, sizeof(dev->worker));
-       init_llist_head(&dev->worker.work_list);
        dev->iov_limit = iov_limit;
        dev->weight = weight;
        dev->byte_weight = byte_weight;
@@ -469,7 +526,7 @@ void vhost_dev_init(struct vhost_dev *dev,
        INIT_LIST_HEAD(&dev->read_list);
        INIT_LIST_HEAD(&dev->pending_list);
        spin_lock_init(&dev->iotlb_lock);
-
+       xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC);
 
        for (i = 0; i < dev->nvqs; ++i) {
                vq = dev->vqs[i];
@@ -481,7 +538,7 @@ void vhost_dev_init(struct vhost_dev *dev,
                vhost_vq_reset(dev, vq);
                if (vq->handle_kick)
                        vhost_poll_init(&vq->poll, vq->handle_kick,
-                                       EPOLLIN, dev);
+                                       EPOLLIN, dev, vq);
        }
 }
 EXPORT_SYMBOL_GPL(vhost_dev_init);
@@ -531,38 +588,284 @@ static void vhost_detach_mm(struct vhost_dev *dev)
        dev->mm = NULL;
 }
 
-static void vhost_worker_free(struct vhost_dev *dev)
+static void vhost_worker_destroy(struct vhost_dev *dev,
+                                struct vhost_worker *worker)
+{
+       if (!worker)
+               return;
+
+       WARN_ON(!llist_empty(&worker->work_list));
+       xa_erase(&dev->worker_xa, worker->id);
+       vhost_task_stop(worker->vtsk);
+       kfree(worker);
+}
+
+static void vhost_workers_free(struct vhost_dev *dev)
 {
-       if (!dev->worker.vtsk)
+       struct vhost_worker *worker;
+       unsigned long i;
+
+       if (!dev->use_worker)
                return;
 
-       WARN_ON(!llist_empty(&dev->worker.work_list));
-       vhost_task_stop(dev->worker.vtsk);
-       dev->worker.kcov_handle = 0;
-       dev->worker.vtsk = NULL;
+       for (i = 0; i < dev->nvqs; i++)
+               rcu_assign_pointer(dev->vqs[i]->worker, NULL);
+       /*
+        * Free the default worker we created and cleanup workers userspace
+        * created but couldn't clean up (it forgot or crashed).
+        */
+       xa_for_each(&dev->worker_xa, i, worker)
+               vhost_worker_destroy(dev, worker);
+       xa_destroy(&dev->worker_xa);
 }
 
-static int vhost_worker_create(struct vhost_dev *dev)
+static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
 {
+       struct vhost_worker *worker;
        struct vhost_task *vtsk;
        char name[TASK_COMM_LEN];
+       int ret;
+       u32 id;
+
+       worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
+       if (!worker)
+               return NULL;
 
        snprintf(name, sizeof(name), "vhost-%d", current->pid);
 
-       vtsk = vhost_task_create(vhost_worker, &dev->worker, name);
+       vtsk = vhost_task_create(vhost_worker, worker, name);
        if (!vtsk)
-               return -ENOMEM;
+               goto free_worker;
+
+       mutex_init(&worker->mutex);
+       init_llist_head(&worker->work_list);
+       worker->kcov_handle = kcov_common_handle();
+       worker->vtsk = vtsk;
 
-       dev->worker.kcov_handle = kcov_common_handle();
-       dev->worker.vtsk = vtsk;
        vhost_task_start(vtsk);
+
+       ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+       if (ret < 0)
+               goto stop_worker;
+       worker->id = id;
+
+       return worker;
+
+stop_worker:
+       vhost_task_stop(vtsk);
+free_worker:
+       kfree(worker);
+       return NULL;
+}
+
+/* Caller must have device mutex */
+static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
+                                    struct vhost_worker *worker)
+{
+       struct vhost_worker *old_worker;
+
+       old_worker = rcu_dereference_check(vq->worker,
+                                          lockdep_is_held(&vq->dev->mutex));
+
+       mutex_lock(&worker->mutex);
+       worker->attachment_cnt++;
+       mutex_unlock(&worker->mutex);
+       rcu_assign_pointer(vq->worker, worker);
+
+       if (!old_worker)
+               return;
+       /*
+        * Take the worker mutex to make sure we see the work queued from
+        * device wide flushes which doesn't use RCU for execution.
+        */
+       mutex_lock(&old_worker->mutex);
+       old_worker->attachment_cnt--;
+       /*
+        * We don't want to call synchronize_rcu for every vq during setup
+        * because it will slow down VM startup. If we haven't done
+        * VHOST_SET_VRING_KICK and not done the driver specific
+        * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will
+        * not be any works queued for scsi and net.
+        */
+       mutex_lock(&vq->mutex);
+       if (!vhost_vq_get_backend(vq) && !vq->kick) {
+               mutex_unlock(&vq->mutex);
+               mutex_unlock(&old_worker->mutex);
+               /*
+                * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID.
+                * Warn if it adds support for multiple workers but forgets to
+                * handle the early queueing case.
+                */
+               WARN_ON(!old_worker->attachment_cnt &&
+                       !llist_empty(&old_worker->work_list));
+               return;
+       }
+       mutex_unlock(&vq->mutex);
+
+       /* Make sure new vq queue/flush/poll calls see the new worker */
+       synchronize_rcu();
+       /* Make sure whatever was queued gets run */
+       vhost_worker_flush(old_worker);
+       mutex_unlock(&old_worker->mutex);
+}
+
+ /* Caller must have device mutex */
+static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
+                                 struct vhost_vring_worker *info)
+{
+       unsigned long index = info->worker_id;
+       struct vhost_dev *dev = vq->dev;
+       struct vhost_worker *worker;
+
+       if (!dev->use_worker)
+               return -EINVAL;
+
+       worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
+       if (!worker || worker->id != info->worker_id)
+               return -ENODEV;
+
+       __vhost_vq_attach_worker(vq, worker);
+       return 0;
+}
+
+/* Caller must have device mutex */
+static int vhost_new_worker(struct vhost_dev *dev,
+                           struct vhost_worker_state *info)
+{
+       struct vhost_worker *worker;
+
+       worker = vhost_worker_create(dev);
+       if (!worker)
+               return -ENOMEM;
+
+       info->worker_id = worker->id;
+       return 0;
+}
+
+/* Caller must have device mutex */
+static int vhost_free_worker(struct vhost_dev *dev,
+                            struct vhost_worker_state *info)
+{
+       unsigned long index = info->worker_id;
+       struct vhost_worker *worker;
+
+       worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
+       if (!worker || worker->id != info->worker_id)
+               return -ENODEV;
+
+       mutex_lock(&worker->mutex);
+       if (worker->attachment_cnt) {
+               mutex_unlock(&worker->mutex);
+               return -EBUSY;
+       }
+       mutex_unlock(&worker->mutex);
+
+       vhost_worker_destroy(dev, worker);
        return 0;
 }
 
+static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp,
+                                 struct vhost_virtqueue **vq, u32 *id)
+{
+       u32 __user *idxp = argp;
+       u32 idx;
+       long r;
+
+       r = get_user(idx, idxp);
+       if (r < 0)
+               return r;
+
+       if (idx >= dev->nvqs)
+               return -ENOBUFS;
+
+       idx = array_index_nospec(idx, dev->nvqs);
+
+       *vq = dev->vqs[idx];
+       *id = idx;
+       return 0;
+}
+
+/* Caller must have device mutex */
+long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl,
+                       void __user *argp)
+{
+       struct vhost_vring_worker ring_worker;
+       struct vhost_worker_state state;
+       struct vhost_worker *worker;
+       struct vhost_virtqueue *vq;
+       long ret;
+       u32 idx;
+
+       if (!dev->use_worker)
+               return -EINVAL;
+
+       if (!vhost_dev_has_owner(dev))
+               return -EINVAL;
+
+       ret = vhost_dev_check_owner(dev);
+       if (ret)
+               return ret;
+
+       switch (ioctl) {
+       /* dev worker ioctls */
+       case VHOST_NEW_WORKER:
+               ret = vhost_new_worker(dev, &state);
+               if (!ret && copy_to_user(argp, &state, sizeof(state)))
+                       ret = -EFAULT;
+               return ret;
+       case VHOST_FREE_WORKER:
+               if (copy_from_user(&state, argp, sizeof(state)))
+                       return -EFAULT;
+               return vhost_free_worker(dev, &state);
+       /* vring worker ioctls */
+       case VHOST_ATTACH_VRING_WORKER:
+       case VHOST_GET_VRING_WORKER:
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       ret = vhost_get_vq_from_user(dev, argp, &vq, &idx);
+       if (ret)
+               return ret;
+
+       switch (ioctl) {
+       case VHOST_ATTACH_VRING_WORKER:
+               if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               ret = vhost_vq_attach_worker(vq, &ring_worker);
+               break;
+       case VHOST_GET_VRING_WORKER:
+               worker = rcu_dereference_check(vq->worker,
+                                              lockdep_is_held(&dev->mutex));
+               if (!worker) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               ring_worker.index = idx;
+               ring_worker.worker_id = worker->id;
+
+               if (copy_to_user(argp, &ring_worker, sizeof(ring_worker)))
+                       ret = -EFAULT;
+               break;
+       default:
+               ret = -ENOIOCTLCMD;
+               break;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(vhost_worker_ioctl);
+
 /* Caller should have device mutex */
 long vhost_dev_set_owner(struct vhost_dev *dev)
 {
-       int err;
+       struct vhost_worker *worker;
+       int err, i;
 
        /* Is there an owner already? */
        if (vhost_dev_has_owner(dev)) {
@@ -572,20 +875,32 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 
        vhost_attach_mm(dev);
 
-       if (dev->use_worker) {
-               err = vhost_worker_create(dev);
-               if (err)
-                       goto err_worker;
-       }
-
        err = vhost_dev_alloc_iovecs(dev);
        if (err)
                goto err_iovecs;
 
+       if (dev->use_worker) {
+               /*
+                * This should be done last, because vsock can queue work
+                * before VHOST_SET_OWNER so it simplifies the failure path
+                * below since we don't have to worry about vsock queueing
+                * while we free the worker.
+                */
+               worker = vhost_worker_create(dev);
+               if (!worker) {
+                       err = -ENOMEM;
+                       goto err_worker;
+               }
+
+               for (i = 0; i < dev->nvqs; i++)
+                       __vhost_vq_attach_worker(dev->vqs[i], worker);
+       }
+
        return 0;
-err_iovecs:
-       vhost_worker_free(dev);
+
 err_worker:
+       vhost_dev_free_iovecs(dev);
+err_iovecs:
        vhost_detach_mm(dev);
 err_mm:
        return err;
@@ -677,7 +992,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
        dev->iotlb = NULL;
        vhost_clear_msg(dev);
        wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
-       vhost_worker_free(dev);
+       vhost_workers_free(dev);
        vhost_detach_mm(dev);
 }
 EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -1565,21 +1880,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
        struct file *eventfp, *filep = NULL;
        bool pollstart = false, pollstop = false;
        struct eventfd_ctx *ctx = NULL;
-       u32 __user *idxp = argp;
        struct vhost_virtqueue *vq;
        struct vhost_vring_state s;
        struct vhost_vring_file f;
        u32 idx;
        long r;
 
-       r = get_user(idx, idxp);
+       r = vhost_get_vq_from_user(d, argp, &vq, &idx);
        if (r < 0)
                return r;
-       if (idx >= d->nvqs)
-               return -ENOBUFS;
-
-       idx = array_index_nospec(idx, d->nvqs);
-       vq = d->vqs[idx];
 
        if (ioctl == VHOST_SET_VRING_NUM ||
            ioctl == VHOST_SET_VRING_ADDR) {
index fc900be504b38e48d7c74f96e78d825d4a2a909e..f60d5f7bef944e2e965e54c83bb3925d3da56c5b 100644 (file)
@@ -28,8 +28,12 @@ struct vhost_work {
 
 struct vhost_worker {
        struct vhost_task       *vtsk;
+       /* Used to serialize device wide flushing with worker swapping. */
+       struct mutex            mutex;
        struct llist_head       work_list;
        u64                     kcov_handle;
+       u32                     id;
+       int                     attachment_cnt;
 };
 
 /* Poll a file (eventfd or socket) */
@@ -41,17 +45,17 @@ struct vhost_poll {
        struct vhost_work       work;
        __poll_t                mask;
        struct vhost_dev        *dev;
+       struct vhost_virtqueue  *vq;
 };
 
-void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
-void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
-bool vhost_has_work(struct vhost_dev *dev);
-
 void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
-                    __poll_t mask, struct vhost_dev *dev);
+                    __poll_t mask, struct vhost_dev *dev,
+                    struct vhost_virtqueue *vq);
 int vhost_poll_start(struct vhost_poll *poll, struct file *file);
 void vhost_poll_stop(struct vhost_poll *poll);
 void vhost_poll_queue(struct vhost_poll *poll);
+
+void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
 void vhost_dev_flush(struct vhost_dev *dev);
 
 struct vhost_log {
@@ -74,6 +78,7 @@ struct vhost_vring_call {
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
        struct vhost_dev *dev;
+       struct vhost_worker __rcu *worker;
 
        /* The actual ring of buffers. */
        struct mutex mutex;
@@ -158,7 +163,6 @@ struct vhost_dev {
        struct vhost_virtqueue **vqs;
        int nvqs;
        struct eventfd_ctx *log_ctx;
-       struct vhost_worker worker;
        struct vhost_iotlb *umem;
        struct vhost_iotlb *iotlb;
        spinlock_t iotlb_lock;
@@ -168,6 +172,7 @@ struct vhost_dev {
        int iov_limit;
        int weight;
        int byte_weight;
+       struct xarray worker_xa;
        bool use_worker;
        int (*msg_handler)(struct vhost_dev *dev, u32 asid,
                           struct vhost_iotlb_msg *msg);
@@ -188,16 +193,21 @@ void vhost_dev_cleanup(struct vhost_dev *);
 void vhost_dev_stop(struct vhost_dev *);
 long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
 long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
+long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl,
+                       void __user *argp);
 bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
 bool vhost_log_access_ok(struct vhost_dev *);
 void vhost_clear_msg(struct vhost_dev *dev);
 
 int vhost_get_vq_desc(struct vhost_virtqueue *,
-                     struct iovec iov[], unsigned int iov_count,
+                     struct iovec iov[], unsigned int iov_size,
                      unsigned int *out_num, unsigned int *in_num,
                      struct vhost_log *log, unsigned int *log_num);
 void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
 
+void vhost_vq_flush(struct vhost_virtqueue *vq);
+bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
+bool vhost_vq_has_work(struct vhost_virtqueue *vq);
 bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
 int vhost_vq_init_access(struct vhost_virtqueue *);
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
index 6578db78f0ae27d07d91f2fdcd738d92fd3715a7..817d377a3f360f6cd87f4da1d02debaac4d34545 100644 (file)
@@ -285,7 +285,7 @@ vhost_transport_send_pkt(struct sk_buff *skb)
                atomic_inc(&vsock->queued_replies);
 
        virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
-       vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+       vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
 
        rcu_read_unlock();
        return len;
@@ -583,7 +583,7 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
        /* Some packets may have been queued before the device was started,
         * let's kick the send worker to send them.
         */
-       vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+       vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
 
        mutex_unlock(&vsock->dev.mutex);
        return 0;
index 23112d84218fbd4f283c69319a1a0039fadcc9a8..4b773bd7c58cb7e42726127cdd188a181c8c04db 100644 (file)
@@ -45,9 +45,10 @@ struct virtio_pci_vq_info {
 struct virtio_pci_device {
        struct virtio_device vdev;
        struct pci_dev *pci_dev;
-       struct virtio_pci_legacy_device ldev;
-       struct virtio_pci_modern_device mdev;
-
+       union {
+               struct virtio_pci_legacy_device ldev;
+               struct virtio_pci_modern_device mdev;
+       };
        bool is_legacy;
 
        /* Where to read and clear interrupt */
index 869cb46bef9603597b44db5f72d19c186e6c056e..aad7d9296e772063f8a15757cdf33c0ba1b2fc7d 100644 (file)
@@ -218,21 +218,29 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
        int err, common, isr, notify, device;
        u32 notify_length;
        u32 notify_offset;
+       int devid;
 
        check_offsets();
 
-       /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
-       if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
-               return -ENODEV;
-
-       if (pci_dev->device < 0x1040) {
-               /* Transitional devices: use the PCI subsystem device id as
-                * virtio device id, same as legacy driver always did.
-                */
-               mdev->id.device = pci_dev->subsystem_device;
+       if (mdev->device_id_check) {
+               devid = mdev->device_id_check(pci_dev);
+               if (devid < 0)
+                       return devid;
+               mdev->id.device = devid;
        } else {
-               /* Modern devices: simply use PCI device id, but start from 0x1040. */
-               mdev->id.device = pci_dev->device - 0x1040;
+               /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
+               if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
+                       return -ENODEV;
+
+               if (pci_dev->device < 0x1040) {
+                       /* Transitional devices: use the PCI subsystem device id as
+                        * virtio device id, same as legacy driver always did.
+                        */
+                       mdev->id.device = pci_dev->subsystem_device;
+               } else {
+                       /* Modern devices: simply use PCI device id, but start from 0x1040. */
+                       mdev->id.device = pci_dev->device - 0x1040;
+               }
        }
        mdev->id.vendor = pci_dev->subsystem_vendor;
 
@@ -260,7 +268,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
                return -EINVAL;
        }
 
-       err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+       err = dma_set_mask_and_coherent(&pci_dev->dev,
+                                       mdev->dma_mask ? : DMA_BIT_MASK(64));
        if (err)
                err = dma_set_mask_and_coherent(&pci_dev->dev,
                                                DMA_BIT_MASK(32));
index eb6aee8c06b2c954cb197364bf03efc8708ee2d0..989e2d7184ce463aeef8b4cd6df968f9d8839015 100644 (file)
@@ -385,7 +385,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                        err = PTR_ERR(vqs[i]);
                        goto err_setup_vq;
                }
-               ops->set_vq_affinity(vdpa, i, &masks[i]);
+
+               if (ops->set_vq_affinity)
+                       ops->set_vq_affinity(vdpa, i, &masks[i]);
        }
 
        cb.callback = virtio_vdpa_config_cb;
index 98a60ce87b928e09fe223bb7245e311d57bfb462..bcba7fda3cc974fd47f764ce11aeea966285c46d 100644 (file)
@@ -222,6 +222,27 @@ enum pds_core_lif_type {
        PDS_CORE_LIF_TYPE_DEFAULT = 0,
 };
 
+#define PDS_CORE_IFNAMSIZ              16
+
+/**
+ * enum pds_core_logical_qtype - Logical Queue Types
+ * @PDS_CORE_QTYPE_ADMINQ:    Administrative Queue
+ * @PDS_CORE_QTYPE_NOTIFYQ:   Notify Queue
+ * @PDS_CORE_QTYPE_RXQ:       Receive Queue
+ * @PDS_CORE_QTYPE_TXQ:       Transmit Queue
+ * @PDS_CORE_QTYPE_EQ:        Event Queue
+ * @PDS_CORE_QTYPE_MAX:       Max queue type supported
+ */
+enum pds_core_logical_qtype {
+       PDS_CORE_QTYPE_ADMINQ  = 0,
+       PDS_CORE_QTYPE_NOTIFYQ = 1,
+       PDS_CORE_QTYPE_RXQ     = 2,
+       PDS_CORE_QTYPE_TXQ     = 3,
+       PDS_CORE_QTYPE_EQ      = 4,
+
+       PDS_CORE_QTYPE_MAX     = 16   /* don't change - used in struct size */
+};
+
 /**
  * union pds_core_lif_config - LIF configuration
  * @state:         LIF state (enum pds_core_lif_state)
@@ -584,6 +605,219 @@ struct pds_core_q_init_comp {
        u8     color;
 };
 
+/*
+ * enum pds_vdpa_cmd_opcode - vDPA Device commands
+ */
+enum pds_vdpa_cmd_opcode {
+       PDS_VDPA_CMD_INIT               = 48,
+       PDS_VDPA_CMD_IDENT              = 49,
+       PDS_VDPA_CMD_RESET              = 51,
+       PDS_VDPA_CMD_VQ_RESET           = 52,
+       PDS_VDPA_CMD_VQ_INIT            = 53,
+       PDS_VDPA_CMD_STATUS_UPDATE      = 54,
+       PDS_VDPA_CMD_SET_FEATURES       = 55,
+       PDS_VDPA_CMD_SET_ATTR           = 56,
+};
+
+/**
+ * struct pds_vdpa_cmd - generic command
+ * @opcode:    Opcode
+ * @vdpa_index:        Index for vdpa subdevice
+ * @vf_id:     VF id
+ */
+struct pds_vdpa_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+};
+
+/**
+ * struct pds_vdpa_init_cmd - INIT command
+ * @opcode:    Opcode PDS_VDPA_CMD_INIT
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id:     VF id
+ */
+struct pds_vdpa_init_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+};
+
+/**
+ * struct pds_vdpa_ident - vDPA identification data
+ * @hw_features:       vDPA features supported by device
+ * @max_vqs:           max queues available (2 queues for a single queuepair)
+ * @max_qlen:          log(2) of maximum number of descriptors
+ * @min_qlen:          log(2) of minimum number of descriptors
+ *
+ * This struct is used in a DMA block that is set up for the PDS_VDPA_CMD_IDENT
+ * transaction.  Set up the DMA block and send the address in the IDENT cmd
+ * data, the DSC will write the ident information, then we can remove the DMA
+ * block after reading the answer.  If the completion status is 0, then there
+ * is valid information, else there was an error and the data should be invalid.
+ */
+struct pds_vdpa_ident {
+       __le64 hw_features;
+       __le16 max_vqs;
+       __le16 max_qlen;
+       __le16 min_qlen;
+};
+
+/**
+ * struct pds_vdpa_ident_cmd - IDENT command
+ * @opcode:    Opcode PDS_VDPA_CMD_IDENT
+ * @rsvd:       Word boundary padding
+ * @vf_id:     VF id
+ * @len:       length of ident info DMA space
+ * @ident_pa:  address for DMA of ident info (struct pds_vdpa_ident)
+ *                     only used for this transaction, then forgotten by DSC
+ */
+struct pds_vdpa_ident_cmd {
+       u8     opcode;
+       u8     rsvd;
+       __le16 vf_id;
+       __le32 len;
+       __le64 ident_pa;
+};
+
+/**
+ * struct pds_vdpa_status_cmd - STATUS_UPDATE command
+ * @opcode:    Opcode PDS_VDPA_CMD_STATUS_UPDATE
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id:     VF id
+ * @status:    new status bits
+ */
+struct pds_vdpa_status_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+       u8     status;
+};
+
+/**
+ * enum pds_vdpa_attr - List of VDPA device attributes
+ * @PDS_VDPA_ATTR_MAC:          MAC address
+ * @PDS_VDPA_ATTR_MAX_VQ_PAIRS: Max virtqueue pairs
+ */
+enum pds_vdpa_attr {
+       PDS_VDPA_ATTR_MAC          = 1,
+       PDS_VDPA_ATTR_MAX_VQ_PAIRS = 2,
+};
+
+/**
+ * struct pds_vdpa_setattr_cmd - SET_ATTR command
+ * @opcode:            Opcode PDS_VDPA_CMD_SET_ATTR
+ * @vdpa_index:                Index for vdpa subdevice
+ * @vf_id:             VF id
+ * @attr:              attribute to be changed (enum pds_vdpa_attr)
+ * @pad:               Word boundary padding
+ * @mac:               new mac address to be assigned as vdpa device address
+ * @max_vq_pairs:      new limit of virtqueue pairs
+ */
+struct pds_vdpa_setattr_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+       u8     attr;
+       u8     pad[3];
+       union {
+               u8 mac[6];
+               __le16 max_vq_pairs;
+       } __packed;
+};
+
+/**
+ * struct pds_vdpa_vq_init_cmd - queue init command
+ * @opcode: Opcode PDS_VDPA_CMD_VQ_INIT
+ * @vdpa_index:        Index for vdpa subdevice
+ * @vf_id:     VF id
+ * @qid:       Queue id (bit0 clear = rx, bit0 set = tx, qid=N is ctrlq)
+ * @len:       log(2) of max descriptor count
+ * @desc_addr: DMA address of descriptor area
+ * @avail_addr:        DMA address of available descriptors (aka driver area)
+ * @used_addr: DMA address of used descriptors (aka device area)
+ * @intr_index:        interrupt index
+ * @avail_index:       initial device position in available ring
+ * @used_index:        initial device position in used ring
+ */
+struct pds_vdpa_vq_init_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+       __le16 qid;
+       __le16 len;
+       __le64 desc_addr;
+       __le64 avail_addr;
+       __le64 used_addr;
+       __le16 intr_index;
+       __le16 avail_index;
+       __le16 used_index;
+};
+
+/**
+ * struct pds_vdpa_vq_init_comp - queue init completion
+ * @status:    Status of the command (enum pds_core_status_code)
+ * @hw_qtype:  HW queue type, used in doorbell selection
+ * @hw_qindex: HW queue index, used in doorbell selection
+ * @rsvd:      Word boundary padding
+ * @color:     Color bit
+ */
+struct pds_vdpa_vq_init_comp {
+       u8     status;
+       u8     hw_qtype;
+       __le16 hw_qindex;
+       u8     rsvd[11];
+       u8     color;
+};
+
+/**
+ * struct pds_vdpa_vq_reset_cmd - queue reset command
+ * @opcode:    Opcode PDS_VDPA_CMD_VQ_RESET
+ * @vdpa_index:        Index for vdpa subdevice
+ * @vf_id:     VF id
+ * @qid:       Queue id
+ */
+struct pds_vdpa_vq_reset_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+       __le16 qid;
+};
+
+/**
+ * struct pds_vdpa_vq_reset_comp - queue reset completion
+ * @status:    Status of the command (enum pds_core_status_code)
+ * @rsvd0:     Word boundary padding
+ * @avail_index:       current device position in available ring
+ * @used_index:        current device position in used ring
+ * @rsvd:      Word boundary padding
+ * @color:     Color bit
+ */
+struct pds_vdpa_vq_reset_comp {
+       u8     status;
+       u8     rsvd0;
+       __le16 avail_index;
+       __le16 used_index;
+       u8     rsvd[9];
+       u8     color;
+};
+
+/**
+ * struct pds_vdpa_set_features_cmd - set hw features
+ * @opcode: Opcode PDS_VDPA_CMD_SET_FEATURES
+ * @vdpa_index:        Index for vdpa subdevice
+ * @vf_id:     VF id
+ * @rsvd:       Word boundary padding
+ * @features:  Feature bit mask
+ */
+struct pds_vdpa_set_features_cmd {
+       u8     opcode;
+       u8     vdpa_index;
+       __le16 vf_id;
+       __le32 rsvd;
+       __le64 features;
+};
+
 union pds_core_adminq_cmd {
        u8     opcode;
        u8     bytes[64];
@@ -600,6 +834,16 @@ union pds_core_adminq_cmd {
 
        struct pds_core_q_identify_cmd    q_ident;
        struct pds_core_q_init_cmd        q_init;
+
+       struct pds_vdpa_cmd               vdpa;
+       struct pds_vdpa_init_cmd          vdpa_init;
+       struct pds_vdpa_ident_cmd         vdpa_ident;
+       struct pds_vdpa_status_cmd        vdpa_status;
+       struct pds_vdpa_setattr_cmd       vdpa_setattr;
+       struct pds_vdpa_set_features_cmd  vdpa_set_features;
+       struct pds_vdpa_vq_init_cmd       vdpa_vq_init;
+       struct pds_vdpa_vq_reset_cmd      vdpa_vq_reset;
+
 };
 
 union pds_core_adminq_comp {
@@ -621,6 +865,9 @@ union pds_core_adminq_comp {
 
        struct pds_core_q_identify_comp   q_ident;
        struct pds_core_q_init_comp       q_init;
+
+       struct pds_vdpa_vq_init_comp      vdpa_vq_init;
+       struct pds_vdpa_vq_reset_comp     vdpa_vq_reset;
 };
 
 #ifndef __CHECKER__
index 060331486d50d85a8a7623dab584d7272fa14d0d..435c8e8161c2f3a596c64dc869f816d4ee95ae85 100644 (file)
@@ -39,26 +39,7 @@ enum pds_core_vif_types {
 #define PDS_DEV_TYPE_RDMA_STR  "RDMA"
 #define PDS_DEV_TYPE_LM_STR    "LM"
 
-#define PDS_CORE_IFNAMSIZ              16
-
-/**
- * enum pds_core_logical_qtype - Logical Queue Types
- * @PDS_CORE_QTYPE_ADMINQ:    Administrative Queue
- * @PDS_CORE_QTYPE_NOTIFYQ:   Notify Queue
- * @PDS_CORE_QTYPE_RXQ:       Receive Queue
- * @PDS_CORE_QTYPE_TXQ:       Transmit Queue
- * @PDS_CORE_QTYPE_EQ:        Event Queue
- * @PDS_CORE_QTYPE_MAX:       Max queue type supported
- */
-enum pds_core_logical_qtype {
-       PDS_CORE_QTYPE_ADMINQ  = 0,
-       PDS_CORE_QTYPE_NOTIFYQ = 1,
-       PDS_CORE_QTYPE_RXQ     = 2,
-       PDS_CORE_QTYPE_TXQ     = 3,
-       PDS_CORE_QTYPE_EQ      = 4,
-
-       PDS_CORE_QTYPE_MAX     = 16   /* don't change - used in struct size */
-};
+#define PDS_VDPA_DEV_NAME      PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
 
 int pdsc_register_notify(struct notifier_block *nb);
 void pdsc_unregister_notify(struct notifier_block *nb);
index b93238db94e304de1e817498006118b660eaac1c..de6041deee372e822b77db13a28d759f759f4320 100644 (file)
@@ -103,6 +103,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num,
  * @config_enabled: configuration change reporting enabled
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
+ * @vqs_list_lock: protects @vqs.
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
@@ -117,7 +118,7 @@ struct virtio_device {
        bool config_enabled;
        bool config_change_pending;
        spinlock_t config_lock;
-       spinlock_t vqs_list_lock; /* Protects VQs list access */
+       spinlock_t vqs_list_lock;
        struct device dev;
        struct virtio_device_id id;
        const struct virtio_config_ops *config;
@@ -160,6 +161,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @validate: the function to call to validate features and config space.
+ *            Returns 0 or -errno.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
index c4eeb79b01398eba88977e636f248096a07a8087..067ac1d789bcb64917c5cbef40791f4a04b50380 100644 (file)
@@ -38,6 +38,12 @@ struct virtio_pci_modern_device {
        int modern_bars;
 
        struct virtio_device_id id;
+
+       /* optional check for vendor virtio device, returns dev_id or -ERRNO */
+       int (*device_id_check)(struct pci_dev *pdev);
+
+       /* optional mask for devices with limited DMA space */
+       u64 dma_mask;
 };
 
 /*
index 92e1b700b51cbdf66aed6b3d17bc32688cd4706b..f5c48b61ab62244104bbf1b2100d3db7286f8c82 100644 (file)
 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
 /* Specify an eventfd file descriptor to signal on log write. */
 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+/* By default, a device gets one vhost_worker that its virtqueues share. This
+ * command allows the owner of the device to create an additional vhost_worker
+ * for the device. It can later be bound to 1 or more of its virtqueues using
+ * the VHOST_ATTACH_VRING_WORKER command.
+ *
+ * This must be called after VHOST_SET_OWNER and the caller must be the owner
+ * of the device. The new thread will inherit caller's cgroups and namespaces,
+ * and will share the caller's memory space. The new thread will also be
+ * counted against the caller's RLIMIT_NPROC value.
+ *
+ * The worker's ID used in other commands will be returned in
+ * vhost_worker_state.
+ */
+#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
+/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
+ * virtqueue. If userspace is not able to call this for workers its created,
+ * the kernel will free all the device's workers when the device is closed.
+ */
+#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
 
 /* Ring setup. */
 /* Set number of descriptors in ring. This parameter can not
 #define VHOST_VRING_BIG_ENDIAN 1
 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
 #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
+ * virtqueues.
+ *
+ * This will replace the virtqueue's existing worker. If the replaced worker
+ * is no longer attached to any virtqueues, it can be freed with
+ * VHOST_FREE_WORKER.
+ */
+#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15,             \
+                                      struct vhost_vring_worker)
+/* Return the vring worker's ID */
+#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16,               \
+                                    struct vhost_vring_worker)
 
 /* The following ioctls use eventfd file descriptors to signal and poll
  * for events. */
index c5690a8992d8e5d8c15a383594c8d0e08448d821..d3aad12ad1fa7875a8597c06cc5a998b60f6e72e 100644 (file)
@@ -47,6 +47,22 @@ struct vhost_vring_addr {
        __u64 log_guest_addr;
 };
 
+struct vhost_worker_state {
+       /*
+        * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id.
+        * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker
+        * to free.
+        */
+       unsigned int worker_id;
+};
+
+struct vhost_vring_worker {
+       /* vring index */
+       unsigned int index;
+       /* The id of the vhost_worker returned from VHOST_NEW_WORKER */
+       unsigned int worker_id;
+};
+
 /* no alignment requirement */
 struct vhost_iotlb_msg {
        __u64 iova;
index 7b7139d97d742823364c402050ae02ec9f2ea8b3..d128925980e0592c7959d60ec2fb80375646c4cb 100644 (file)
@@ -4,7 +4,18 @@ test: virtio_test vringh_test
 virtio_test: virtio_ring.o virtio_test.o
 vringh_test: vringh_test.o vringh.o virtio_ring.o
 
-CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -mfunction-return=thunk -fcf-protection=none -mindirect-branch-register
+try-run = $(shell set -e;              \
+       if ($(1)) >/dev/null 2>&1;      \
+       then echo "$(2)";               \
+       else echo "$(3)";               \
+       fi)
+
+__cc-option = $(call try-run,\
+       $(1) -Werror $(2) -c -x c /dev/null -o /dev/null,$(2),)
+cc-option = $(call __cc-option, $(CC),$(1))
+
+CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h $(call cc-option,-mfunction-return=thunk) $(call cc-option,-fcf-protection=none) $(call cc-option,-mindirect-branch-register)
+
 CFLAGS += -pthread
 LDFLAGS += -pthread
 vpath %.c ../../drivers/virtio ../../drivers/vhost