Merge branch 'x86-bootmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / drivers / scsi / libiscsi.c
index c28a712fd4db58dfa0880d4df60cf17af0719622..703eb6a88790280c788b9fb88ce331d15ff282e1 100644 (file)
@@ -1919,10 +1919,11 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
 static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
        enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
-       struct iscsi_task *task = NULL;
+       struct iscsi_task *task = NULL, *running_task;
        struct iscsi_cls_session *cls_session;
        struct iscsi_session *session;
        struct iscsi_conn *conn;
+       int i;
 
        cls_session = starget_to_session(scsi_target(sc->device));
        session = cls_session->dd_data;
@@ -1947,8 +1948,15 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
        }
 
        task = (struct iscsi_task *)sc->SCp.ptr;
-       if (!task)
+       if (!task) {
+               /*
+                * Raced with completion. Just reset timer, and let it
+                * complete normally
+                */
+               rc = BLK_EH_RESET_TIMER;
                goto done;
+       }
+
        /*
         * If we have sent (at least queued to the network layer) a pdu or
         * recvd one for the task since the last timeout ask for
@@ -1956,10 +1964,10 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
         * we can check if it is the task or connection when we send the
         * nop as a ping.
         */
-       if (time_after_eq(task->last_xfer, task->last_timeout)) {
+       if (time_after(task->last_xfer, task->last_timeout)) {
                ISCSI_DBG_EH(session, "Command making progress. Asking "
                             "scsi-ml for more time to complete. "
-                            "Last data recv at %lu. Last timeout was at "
+                            "Last data xfer at %lu. Last timeout was at "
                             "%lu\n.", task->last_xfer, task->last_timeout);
                task->have_checked_conn = false;
                rc = BLK_EH_RESET_TIMER;
@@ -1977,6 +1985,43 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
                goto done;
        }
 
+       for (i = 0; i < conn->session->cmds_max; i++) {
+               running_task = conn->session->cmds[i];
+               if (!running_task->sc || running_task == task ||
+                    running_task->state != ISCSI_TASK_RUNNING)
+                       continue;
+
+               /*
+                * Only check if cmds started before this one have made
+                * progress, or this could never fail
+                */
+               if (time_after(running_task->sc->jiffies_at_alloc,
+                              task->sc->jiffies_at_alloc))
+                       continue;
+
+               if (time_after(running_task->last_xfer, task->last_timeout)) {
+                       /*
+                        * This task has not made progress, but a task
+                        * started before us has transferred data since
+                        * we started/last-checked. We could be queueing
+                        * too many tasks or the LU is bad.
+                        *
+                        * If the device is bad the cmds ahead of us on
+                        * other devs will complete, and this loop will
+                        * eventually fail starting the scsi eh.
+                        */
+                       ISCSI_DBG_EH(session, "Command has not made progress "
+                                    "but commands ahead of it have. "
+                                    "Asking scsi-ml for more time to "
+                                    "complete. Our last xfer vs running task "
+                                    "last xfer %lu/%lu. Last check %lu.\n",
+                                    task->last_xfer, running_task->last_xfer,
+                                    task->last_timeout);
+                       rc = BLK_EH_RESET_TIMER;
+                       goto done;
+               }
+       }
+
        /* Assumes nop timeout is shorter than scsi cmd timeout */
        if (task->have_checked_conn)
                goto done;