scsi: target: iblock: Add backend plug/unplug callouts
authorMike Christie <michael.christie@oracle.com>
Sat, 27 Feb 2021 17:00:03 +0000 (11:00 -0600)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 4 Mar 2021 22:37:02 +0000 (17:37 -0500)
This patch adds plug/unplug callouts for iblock. For an initiator driver
like iSCSI which wants to pass multiple cmds to its xmit thread instead of
one cmd at a time, this increases IOPS by around 10% with vhost-scsi
(combined with the last patches we can see a total 40-50% increase). For
driver combos like tcm_loop and faster drivers like the iSER initiator, we
can still see IOPS increase by 20-30% when tcm_loop's nr_hw_queues setting
is also increased.

Link: https://lore.kernel.org/r/20210227170006.5077-23-michael.christie@oracle.com
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/target/target_core_iblock.c
drivers/target/target_core_iblock.h

index ee3d52061281202e209a9d3530e7f7866c2416b5..90e1c65ad4b6b1ce1ffaa23e306357f30a27daa1 100644 (file)
@@ -61,9 +61,18 @@ static struct se_device *iblock_alloc_device(struct se_hba *hba, const char *nam
                return NULL;
        }
 
+       ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug),
+                                  GFP_KERNEL);
+       if (!ib_dev->ibd_plug)
+               goto free_dev;
+
        pr_debug( "IBLOCK: Allocated ib_dev for %s\n", name);
 
        return &ib_dev->dev;
+
+free_dev:
+       kfree(ib_dev);
+       return NULL;
 }
 
 static int iblock_configure_device(struct se_device *dev)
@@ -171,6 +180,7 @@ static void iblock_dev_call_rcu(struct rcu_head *p)
        struct se_device *dev = container_of(p, struct se_device, rcu_head);
        struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 
+       kfree(ib_dev->ibd_plug);
        kfree(ib_dev);
 }
 
@@ -188,6 +198,33 @@ static void iblock_destroy_device(struct se_device *dev)
        bioset_exit(&ib_dev->ibd_bio_set);
 }
 
+static struct se_dev_plug *iblock_plug_device(struct se_device *se_dev)
+{
+       struct iblock_dev *ib_dev = IBLOCK_DEV(se_dev);
+       struct iblock_dev_plug *ib_dev_plug;
+
+       /*
+        * Each se_device has a per cpu work this can be run from. Wwe
+        * shouldn't have multiple threads on the same cpu calling this
+        * at the same time.
+        */
+       ib_dev_plug = &ib_dev->ibd_plug[smp_processor_id()];
+       if (test_and_set_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags))
+               return NULL;
+
+       blk_start_plug(&ib_dev_plug->blk_plug);
+       return &ib_dev_plug->se_plug;
+}
+
+static void iblock_unplug_device(struct se_dev_plug *se_plug)
+{
+       struct iblock_dev_plug *ib_dev_plug = container_of(se_plug,
+                                       struct iblock_dev_plug, se_plug);
+
+       blk_finish_plug(&ib_dev_plug->blk_plug);
+       clear_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags);
+}
+
 static unsigned long long iblock_emulate_read_cap_with_block_size(
        struct se_device *dev,
        struct block_device *bd,
@@ -335,7 +372,10 @@ static void iblock_submit_bios(struct bio_list *list)
 {
        struct blk_plug plug;
        struct bio *bio;
-
+       /*
+        * The block layer handles nested plugs, so just plug/unplug to handle
+        * fabric drivers that didn't support batching and multi bio cmds.
+        */
        blk_start_plug(&plug);
        while ((bio = bio_list_pop(list)))
                submit_bio(bio);
@@ -867,6 +907,8 @@ static const struct target_backend_ops iblock_ops = {
        .configure_device       = iblock_configure_device,
        .destroy_device         = iblock_destroy_device,
        .free_device            = iblock_free_device,
+       .plug_device            = iblock_plug_device,
+       .unplug_device          = iblock_unplug_device,
        .parse_cdb              = iblock_parse_cdb,
        .set_configfs_dev_params = iblock_set_configfs_dev_params,
        .show_configfs_dev_params = iblock_show_configfs_dev_params,
index cefc641145b3be44f6aaa76e2627820bb9a61208..8c55375d2f754e6863595be384e79592e8cc1c69 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/atomic.h>
 #include <linux/refcount.h>
+#include <linux/blkdev.h>
 #include <target/target_core_base.h>
 
 #define IBLOCK_VERSION         "4.0"
@@ -17,6 +18,14 @@ struct iblock_req {
 
 #define IBDF_HAS_UDEV_PATH             0x01
 
+#define IBD_PLUGF_PLUGGED              0x01
+
+struct iblock_dev_plug {
+       struct se_dev_plug se_plug;
+       struct blk_plug blk_plug;
+       unsigned long flags;
+};
+
 struct iblock_dev {
        struct se_device dev;
        unsigned char ibd_udev_path[SE_UDEV_PATH_LEN];
@@ -24,6 +33,7 @@ struct iblock_dev {
        struct bio_set  ibd_bio_set;
        struct block_device *ibd_bd;
        bool ibd_readonly;
+       struct iblock_dev_plug *ibd_plug;
 } ____cacheline_aligned;
 
 #endif /* TARGET_CORE_IBLOCK_H */