scsi: qla2xxx: Move heartbeat handling from DPC thread to workqueue
authorManish Rangankar <mrangankar@marvell.com>
Wed, 8 Sep 2021 16:46:20 +0000 (09:46 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 15 Sep 2021 03:33:20 +0000 (23:33 -0400)
DPC thread gets restricted due to a no-op mailbox, which is a blocking call
and has a high execution frequency. To free up the DPC thread we move no-op
handling to the workqueue. Also, modified qla_do_heartbeat() to send no-op
MBC if we don’t have any active interrupts, but there are still I/Os
outstanding with firmware.

Link: https://lore.kernel.org/r/20210908164622.19240-9-njavali@marvell.com
Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check")
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_os.c

index be2eb75ee1a376d5d031affabd0bfb7fa902fc3b..8924eeb9367daec76282bf45699ee57dd56b4d25 100644 (file)
@@ -3750,6 +3750,7 @@ struct qla_qpair {
        struct qla_fw_resources fwres ____cacheline_aligned;
        u32     cmd_cnt;
        u32     cmd_completion_cnt;
+       u32     prev_completion_cnt;
 };
 
 /* Place holder for FW buffer parameters */
@@ -4607,6 +4608,7 @@ struct qla_hw_data {
        struct qla_chip_state_84xx *cs84xx;
        struct isp_operations *isp_ops;
        struct workqueue_struct *wq;
+       struct work_struct heartbeat_work;
        struct qlfc_fw fw_buf;
 
        /* FCP_CMND priority support */
@@ -4708,7 +4710,6 @@ struct qla_hw_data {
 
        struct qla_hw_data_stat stat;
        pci_error_state_t pci_error_state;
-       u64 prev_cmd_cnt;
        struct dma_pool *purex_dma_pool;
        struct btree_head32 host_map;
 
@@ -4854,7 +4855,6 @@ typedef struct scsi_qla_host {
 #define SET_ZIO_THRESHOLD_NEEDED 32
 #define ISP_ABORT_TO_ROM       33
 #define VPORT_DELETE           34
-#define HEARTBEAT_CHK          38
 
 #define PROCESS_PUREX_IOCB     63
 
index c6b3d0e7489e91b39be20ba12bc234a2c86e7745..a9a4243cb15a170e596900dd90f28fdeb0929e13 100644 (file)
@@ -7025,12 +7025,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
        ha->chip_reset++;
        ha->base_qpair->chip_reset = ha->chip_reset;
        ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0;
+       ha->base_qpair->prev_completion_cnt = 0;
        for (i = 0; i < ha->max_qpairs; i++) {
                if (ha->queue_pair_map[i]) {
                        ha->queue_pair_map[i]->chip_reset =
                                ha->base_qpair->chip_reset;
                        ha->queue_pair_map[i]->cmd_cnt =
                            ha->queue_pair_map[i]->cmd_completion_cnt = 0;
+                       ha->base_qpair->prev_completion_cnt = 0;
                }
        }
 
index a1e861ecfc01123504b54690bae81f442515fa1a..0c446438dcd3b7ad1b6b581303fab4a1b00df7f0 100644 (file)
@@ -2794,6 +2794,16 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
        return atomic_read(&vha->loop_state) == LOOP_READY;
 }
 
+static void qla_heartbeat_work_fn(struct work_struct *work)
+{
+       struct qla_hw_data *ha = container_of(work,
+               struct qla_hw_data, heartbeat_work);
+       struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+
+       if (!ha->flags.mbox_busy && base_vha->flags.init_done)
+               qla_no_op_mb(base_vha);
+}
+
 static void qla2x00_iocb_work_fn(struct work_struct *work)
 {
        struct scsi_qla_host *vha = container_of(work,
@@ -3232,6 +3242,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
            host->transportt, sht->vendor_id);
 
        INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
+       INIT_WORK(&ha->heartbeat_work, qla_heartbeat_work_fn);
 
        /* Set up the irqs */
        ret = qla2x00_request_irqs(ha, rsp);
@@ -7118,17 +7129,6 @@ intr_on_check:
                        qla2x00_lip_reset(base_vha);
                }
 
-               if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) {
-                       /*
-                        * if there is a mb in progress then that's
-                        * enough of a check to see if fw is still ticking.
-                        */
-                       if (!ha->flags.mbox_busy && base_vha->flags.init_done)
-                               qla_no_op_mb(base_vha);
-
-                       clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags);
-               }
-
                ha->dpc_active = 0;
 end_loop:
                set_current_state(TASK_INTERRUPTIBLE);
@@ -7187,57 +7187,51 @@ qla2x00_rst_aen(scsi_qla_host_t *vha)
 
 static bool qla_do_heartbeat(struct scsi_qla_host *vha)
 {
-       u64 cmd_cnt, prev_cmd_cnt;
-       bool do_hb = false;
        struct qla_hw_data *ha = vha->hw;
-       int i;
+       u32 cmpl_cnt;
+       u16 i;
+       bool do_heartbeat = false;
 
-       /* if cmds are still pending down in fw, then do hb */
-       if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) {
-               do_hb = true;
+       /*
+        * Allow do_heartbeat only if we don’t have any active interrupts,
+        * but there are still IOs outstanding with firmware.
+        */
+       cmpl_cnt = ha->base_qpair->cmd_completion_cnt;
+       if (cmpl_cnt == ha->base_qpair->prev_completion_cnt &&
+           cmpl_cnt != ha->base_qpair->cmd_cnt) {
+               do_heartbeat = true;
                goto skip;
        }
+       ha->base_qpair->prev_completion_cnt = cmpl_cnt;
 
        for (i = 0; i < ha->max_qpairs; i++) {
-               if (ha->queue_pair_map[i] &&
-                   ha->queue_pair_map[i]->cmd_cnt !=
-                   ha->queue_pair_map[i]->cmd_completion_cnt) {
-                       do_hb = true;
-                       break;
+               if (ha->queue_pair_map[i]) {
+                       cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt;
+                       if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt &&
+                           cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) {
+                               do_heartbeat = true;
+                               break;
+                       }
+                       ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt;
                }
        }
 
 skip:
-       prev_cmd_cnt = ha->prev_cmd_cnt;
-       cmd_cnt = ha->base_qpair->cmd_cnt;
-       for (i = 0; i < ha->max_qpairs; i++) {
-               if (ha->queue_pair_map[i])
-                       cmd_cnt += ha->queue_pair_map[i]->cmd_cnt;
-       }
-       ha->prev_cmd_cnt = cmd_cnt;
-
-       if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50))
-               /*
-                * IOs are completing before periodic hb check.
-                * IOs seems to be running, do hb for sanity check.
-                */
-               do_hb = true;
-
-       return do_hb;
+       return do_heartbeat;
 }
 
 static void qla_heart_beat(struct scsi_qla_host *vha)
 {
+       struct qla_hw_data *ha = vha->hw;
+
        if (vha->vp_idx)
                return;
 
        if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha))
                return;
 
-       if (qla_do_heartbeat(vha)) {
-               set_bit(HEARTBEAT_CHK, &vha->dpc_flags);
-               qla2xxx_wake_dpc(vha);
-       }
+       if (qla_do_heartbeat(vha))
+               queue_work(ha->wq, &ha->heartbeat_work);
 }
 
 /**************************************************************************