lightnvm: pblk: encapsulate rqd dma allocations

[sfrench/cifs-2.6.git] / block / blk-mq.c
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 9c8c8c71a13f1683524e1b74433fd897cbeb9fcb..89bd9cb9defcb7786cdeef555d2726d40243884e 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -33,6 +33,7 @@
  #include "blk-mq.h"
  #include "blk-mq-debugfs.h"
  #include "blk-mq-tag.h"
+#include "blk-pm.h"
  #include "blk-stat.h"
  #include "blk-mq-sched.h"
  #include "blk-rq-qos.h"
@@ -198,7 +199,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
         freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
         WARN_ON_ONCE(freeze_depth < 0);
         if (!freeze_depth) {
-               percpu_ref_reinit(&q->q_usage_counter);
+               percpu_ref_resurrect(&q->q_usage_counter);
                 wake_up_all(&q->mq_freeze_wq);
         }
  }
@@ -475,6 +476,7 @@ static void __blk_mq_free_request(struct request *rq)
         struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
         const int sched_tag = rq->internal_tag;
  
+       blk_pm_mark_last_busy(rq);
         if (rq->tag != -1)
                 blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
         if (sched_tag != -1)
@@ -526,6 +528,9 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
                 blk_stat_add(rq, now);
         }
  
+       if (rq->internal_tag != -1)
+               blk_mq_sched_completed_request(rq, now);
+
         blk_account_io_done(rq, now);
  
         if (rq->end_io) {
@@ -560,12 +565,22 @@ static void __blk_mq_complete_request(struct request *rq)
         bool shared = false;
         int cpu;
  
-       if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
-                       MQ_RQ_IN_FLIGHT)
+       if (!blk_mq_mark_complete(rq))
                 return;
  
-       if (rq->internal_tag != -1)
-               blk_mq_sched_completed_request(rq);
+       /*
+        * Most of single queue controllers, there is only one irq vector
+        * for handling IO completion, and the only irq's affinity is set
+        * as all possible CPUs. On most of ARCHs, this affinity means the
+        * irq is handled on one specific CPU.
+        *
+        * So complete IO reqeust in softirq context in case of single queue
+        * for not degrading IO performance by irqsoff latency.
+        */
+       if (rq->q->nr_hw_queues == 1) {
+               __blk_complete_request(rq);
+               return;
+       }
  
         if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
                 rq->q->softirq_done_fn(rq);
@@ -1630,7 +1645,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                 BUG_ON(!rq->q);
                 if (rq->mq_ctx != this_ctx) {
                         if (this_ctx) {
-                               trace_block_unplug(this_q, depth, from_schedule);
+                               trace_block_unplug(this_q, depth, !from_schedule);
                                 blk_mq_sched_insert_requests(this_q, this_ctx,
                                                                 &ctx_list,
                                                                 from_schedule);
@@ -1650,7 +1665,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
          * on 'ctx_list'. Do those.
          */
         if (this_ctx) {
-               trace_block_unplug(this_q, depth, from_schedule);
+               trace_block_unplug(this_q, depth, !from_schedule);
                 blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
                                                 from_schedule);
         }
@@ -2976,6 +2991,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  
         list_for_each_entry(q, &set->tag_list, tag_set_list)
                 blk_mq_freeze_queue(q);
+       /*
+        * Sync with blk_mq_queue_tag_busy_iter.
+        */
+       synchronize_rcu();
         /*
          * Switch IO scheduler to 'none', cleaning up the data associated
          * with the previous scheduler. We will switch back once we are done