Merge tag 'for-4.19/post-20180822' of git://git.kernel.dk/linux-block

[sfrench/cifs-2.6.git] / block / blk-mq.c
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 95919268564b162ed291a683dd5c27668cad0834..85a1c1a59c72716ce2e31c280d7fd43d5c6e61e9 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -34,8 +34,8 @@
  #include "blk-mq-debugfs.h"
  #include "blk-mq-tag.h"
  #include "blk-stat.h"
-#include "blk-wbt.h"
  #include "blk-mq-sched.h"
+#include "blk-rq-qos.h"
  
  static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
  static void blk_mq_poll_stats_start(struct request_queue *q);
@@ -285,7 +285,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                 rq->tag = -1;
                 rq->internal_tag = tag;
         } else {
-               if (blk_mq_tag_busy(data->hctx)) {
+               if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
                         rq_flags = RQF_MQ_INFLIGHT;
                         atomic_inc(&data->hctx->nr_active);
                 }
@@ -367,6 +367,8 @@ static struct request *blk_mq_get_request(struct request_queue *q,
                 if (!op_is_flush(op) && e->type->ops.mq.limit_depth &&
                     !(data->flags & BLK_MQ_REQ_RESERVED))
                         e->type->ops.mq.limit_depth(op, data);
+       } else {
+               blk_mq_tag_busy(data->hctx);
         }
  
         tag = blk_mq_get_tag(data);
@@ -504,7 +506,7 @@ void blk_mq_free_request(struct request *rq)
         if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
                 laptop_io_completion(q->backing_dev_info);
  
-       wbt_done(q->rq_wb, rq);
+       rq_qos_done(q, rq);
  
         if (blk_rq_rl(rq))
                 blk_put_rl(blk_rq_rl(rq));
@@ -527,7 +529,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
         blk_account_io_done(rq, now);
  
         if (rq->end_io) {
-               wbt_done(rq->q->rq_wb, rq);
+               rq_qos_done(rq->q, rq);
                 rq->end_io(rq, error);
         } else {
                 if (unlikely(blk_bidi_rq(rq)))
@@ -558,10 +560,8 @@ static void __blk_mq_complete_request(struct request *rq)
         bool shared = false;
         int cpu;
  
-       if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
-                       MQ_RQ_IN_FLIGHT)
+       if (!blk_mq_mark_complete(rq))
                 return;
-
         if (rq->internal_tag != -1)
                 blk_mq_sched_completed_request(rq);
  
@@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq)
                 rq->throtl_size = blk_rq_sectors(rq);
  #endif
                 rq->rq_flags |= RQF_STATS;
-               wbt_issue(q->rq_wb, rq);
+               rq_qos_issue(q, rq);
         }
  
         WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
@@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq)
         blk_mq_put_driver_tag(rq);
  
         trace_block_rq_requeue(q, rq);
-       wbt_requeue(q->rq_wb, rq);
+       rq_qos_requeue(q, rq);
  
         if (blk_mq_request_started(rq)) {
                 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
@@ -964,16 +964,14 @@ static inline unsigned int queued_to_index(unsigned int queued)
         return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
  }
  
-bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
-                          bool wait)
+bool blk_mq_get_driver_tag(struct request *rq)
  {
         struct blk_mq_alloc_data data = {
                 .q = rq->q,
                 .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
-               .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
+               .flags = BLK_MQ_REQ_NOWAIT,
         };
-
-       might_sleep_if(wait);
+       bool shared;
  
         if (rq->tag != -1)
                 goto done;
@@ -981,9 +979,10 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
         if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
                 data.flags |= BLK_MQ_REQ_RESERVED;
  
+       shared = blk_mq_tag_busy(data.hctx);
         rq->tag = blk_mq_get_tag(&data);
         if (rq->tag >= 0) {
-               if (blk_mq_tag_busy(data.hctx)) {
+               if (shared) {
                         rq->rq_flags |= RQF_MQ_INFLIGHT;
                         atomic_inc(&data.hctx->nr_active);
                 }
@@ -991,8 +990,6 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
         }
  
  done:
-       if (hctx)
-               *hctx = data.hctx;
         return rq->tag != -1;
  }
  
@@ -1003,7 +1000,10 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
  
         hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
  
+       spin_lock(&hctx->dispatch_wait_lock);
         list_del_init(&wait->entry);
+       spin_unlock(&hctx->dispatch_wait_lock);
+
         blk_mq_run_hw_queue(hctx, true);
         return 1;
  }
@@ -1014,17 +1014,16 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
   * restart. For both cases, take care to check the condition again after
   * marking us as waiting.
   */
-static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
+static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
                                  struct request *rq)
  {
-       struct blk_mq_hw_ctx *this_hctx = *hctx;
-       struct sbq_wait_state *ws;
+       struct wait_queue_head *wq;
         wait_queue_entry_t *wait;
         bool ret;
  
-       if (!(this_hctx->flags & BLK_MQ_F_TAG_SHARED)) {
-               if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state))
-                       set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state);
+       if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) {
+               if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                       set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  
                 /*
                  * It's possible that a tag was freed in the window between the
@@ -1034,30 +1033,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
                  * Don't clear RESTART here, someone else could have set it.
                  * At most this will cost an extra queue run.
                  */
-               return blk_mq_get_driver_tag(rq, hctx, false);
+               return blk_mq_get_driver_tag(rq);
         }
  
-       wait = &this_hctx->dispatch_wait;
+       wait = &hctx->dispatch_wait;
         if (!list_empty_careful(&wait->entry))
                 return false;
  
-       spin_lock(&this_hctx->lock);
+       wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait;
+
+       spin_lock_irq(&wq->lock);
+       spin_lock(&hctx->dispatch_wait_lock);
         if (!list_empty(&wait->entry)) {
-               spin_unlock(&this_hctx->lock);
+               spin_unlock(&hctx->dispatch_wait_lock);
+               spin_unlock_irq(&wq->lock);
                 return false;
         }
  
-       ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
-       add_wait_queue(&ws->wait, wait);
+       wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+       __add_wait_queue(wq, wait);
  
         /*
          * It's possible that a tag was freed in the window between the
          * allocation failure and adding the hardware queue to the wait
          * queue.
          */
-       ret = blk_mq_get_driver_tag(rq, hctx, false);
+       ret = blk_mq_get_driver_tag(rq);
         if (!ret) {
-               spin_unlock(&this_hctx->lock);
+               spin_unlock(&hctx->dispatch_wait_lock);
+               spin_unlock_irq(&wq->lock);
                 return false;
         }
  
@@ -1065,14 +1069,42 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
          * We got a tag, remove ourselves from the wait queue to ensure
          * someone else gets the wakeup.
          */
-       spin_lock_irq(&ws->wait.lock);
         list_del_init(&wait->entry);
-       spin_unlock_irq(&ws->wait.lock);
-       spin_unlock(&this_hctx->lock);
+       spin_unlock(&hctx->dispatch_wait_lock);
+       spin_unlock_irq(&wq->lock);
  
         return true;
  }
  
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+       unsigned int ewma;
+
+       if (hctx->queue->elevator)
+               return;
+
+       ewma = hctx->dispatch_busy;
+
+       if (!ewma && !busy)
+               return;
+
+       ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+       if (busy)
+               ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+       ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+       hctx->dispatch_busy = ewma;
+}
+
  #define BLK_MQ_RESOURCE_DELAY  3               /* ms units */
  
  /*
@@ -1105,7 +1137,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 if (!got_budget && !blk_mq_get_dispatch_budget(hctx))
                         break;
  
-               if (!blk_mq_get_driver_tag(rq, NULL, false)) {
+               if (!blk_mq_get_driver_tag(rq)) {
                         /*
                          * The initial allocation attempt failed, so we need to
                          * rerun the hardware queue when a tag is freed. The
@@ -1113,7 +1145,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                          * before we add this entry back on the dispatch list,
                          * we'll re-run it below.
                          */
-                       if (!blk_mq_mark_tag_wait(&hctx, rq)) {
+                       if (!blk_mq_mark_tag_wait(hctx, rq)) {
                                 blk_mq_put_dispatch_budget(hctx);
                                 /*
                                  * For non-shared tags, the RESTART check
@@ -1137,7 +1169,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                         bd.last = true;
                 else {
                         nxt = list_first_entry(list, struct request, queuelist);
-                       bd.last = !blk_mq_get_driver_tag(nxt, NULL, false);
+                       bd.last = !blk_mq_get_driver_tag(nxt);
                 }
  
                 ret = q->mq_ops->queue_rq(hctx, &bd);
@@ -1209,8 +1241,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 else if (needs_restart && (ret == BLK_STS_RESOURCE))
                         blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
  
+               blk_mq_update_dispatch_busy(hctx, true);
                 return false;
-       }
+       } else
+               blk_mq_update_dispatch_busy(hctx, false);
  
         /*
          * If the host/device is unable to accept more work, inform the
@@ -1544,19 +1578,19 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
                             struct list_head *list)
  
  {
+       struct request *rq;
+
         /*
          * preemption doesn't flush plug list, so it's possible ctx->cpu is
          * offline now
          */
-       spin_lock(&ctx->lock);
-       while (!list_empty(list)) {
-               struct request *rq;
-
-               rq = list_first_entry(list, struct request, queuelist);
+       list_for_each_entry(rq, list, queuelist) {
                 BUG_ON(rq->mq_ctx != ctx);
-               list_del_init(&rq->queuelist);
-               __blk_mq_insert_req_list(hctx, rq, false);
+               trace_block_rq_insert(hctx->queue, rq);
         }
+
+       spin_lock(&ctx->lock);
+       list_splice_tail_init(list, &ctx->rq_list);
         blk_mq_hctx_mark_pending(hctx, ctx);
         spin_unlock(&ctx->lock);
  }
@@ -1659,13 +1693,16 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
         ret = q->mq_ops->queue_rq(hctx, &bd);
         switch (ret) {
         case BLK_STS_OK:
+               blk_mq_update_dispatch_busy(hctx, false);
                 *cookie = new_cookie;
                 break;
         case BLK_STS_RESOURCE:
         case BLK_STS_DEV_RESOURCE:
+               blk_mq_update_dispatch_busy(hctx, true);
                 __blk_mq_requeue_request(rq);
                 break;
         default:
+               blk_mq_update_dispatch_busy(hctx, false);
                 *cookie = BLK_QC_T_NONE;
                 break;
         }
@@ -1700,7 +1737,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
         if (!blk_mq_get_dispatch_budget(hctx))
                 goto insert;
  
-       if (!blk_mq_get_driver_tag(rq, NULL, false)) {
+       if (!blk_mq_get_driver_tag(rq)) {
                 blk_mq_put_dispatch_budget(hctx);
                 goto insert;
         }
@@ -1748,6 +1785,27 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq)
         return ret;
  }
  
+void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+               struct list_head *list)
+{
+       while (!list_empty(list)) {
+               blk_status_t ret;
+               struct request *rq = list_first_entry(list, struct request,
+                               queuelist);
+
+               list_del_init(&rq->queuelist);
+               ret = blk_mq_request_issue_directly(rq);
+               if (ret != BLK_STS_OK) {
+                       if (ret == BLK_STS_RESOURCE ||
+                                       ret == BLK_STS_DEV_RESOURCE) {
+                               list_add(&rq->queuelist, list);
+                               break;
+                       }
+                       blk_mq_end_request(rq, ret);
+               }
+       }
+}
+
  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
  {
         const int is_sync = op_is_sync(bio->bi_opf);
@@ -1758,7 +1816,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
         struct blk_plug *plug;
         struct request *same_queue_rq = NULL;
         blk_qc_t cookie;
-       unsigned int wb_acct;
  
         blk_queue_bounce(q, &bio);
  
@@ -1774,19 +1831,19 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
         if (blk_mq_sched_bio_merge(q, bio))
                 return BLK_QC_T_NONE;
  
-       wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+       rq_qos_throttle(q, bio, NULL);
  
         trace_block_getrq(q, bio, bio->bi_opf);
  
         rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
         if (unlikely(!rq)) {
-               __wbt_done(q->rq_wb, wb_acct);
+               rq_qos_cleanup(q, bio);
                 if (bio->bi_opf & REQ_NOWAIT)
                         bio_wouldblock_error(bio);
                 return BLK_QC_T_NONE;
         }
  
-       wbt_track(rq, wb_acct);
+       rq_qos_track(q, rq, bio);
  
         cookie = request_to_qc_t(data.hctx, rq);
  
@@ -1849,7 +1906,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                         blk_mq_try_issue_directly(data.hctx, same_queue_rq,
                                         &cookie);
                 }
-       } else if (q->nr_hw_queues > 1 && is_sync) {
+       } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
+                       !data.hctx->dispatch_busy)) {
                 blk_mq_put_ctx(data.ctx);
                 blk_mq_bio_to_request(rq, bio);
                 blk_mq_try_issue_directly(data.hctx, rq, &cookie);
@@ -2087,8 +2145,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
         if (set->ops->exit_request)
                 set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
  
-       blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
-
         if (set->ops->exit_hctx)
                 set->ops->exit_hctx(hctx, hctx_idx);
  
@@ -2148,6 +2204,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
  
         hctx->nr_ctx = 0;
  
+       spin_lock_init(&hctx->dispatch_wait_lock);
         init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
         INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
  
@@ -2155,12 +2212,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
             set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
                 goto free_bitmap;
  
-       if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
-               goto exit_hctx;
-
         hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
         if (!hctx->fq)
-               goto sched_exit_hctx;
+               goto exit_hctx;
  
         if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
                 goto free_fq;
@@ -2174,8 +2228,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
  
   free_fq:
         kfree(hctx->fq);
- sched_exit_hctx:
-       blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
   exit_hctx:
         if (set->ops->exit_hctx)
                 set->ops->exit_hctx(hctx, hctx_idx);
@@ -2333,15 +2385,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
         int i;
  
         queue_for_each_hw_ctx(q, hctx, i) {
-               if (shared) {
-                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                               atomic_inc(&q->shared_hctx_restart);
+               if (shared)
                         hctx->flags |= BLK_MQ_F_TAG_SHARED;
-               } else {
-                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                               atomic_dec(&q->shared_hctx_restart);
+               else
                         hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
-               }
         }
  }
  
@@ -2372,7 +2419,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
                 blk_mq_update_tag_set_depth(set, false);
         }
         mutex_unlock(&set->tag_list_lock);
-       synchronize_rcu();
         INIT_LIST_HEAD(&q->tag_set_list);
  }
  
@@ -2687,7 +2733,6 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
  {
         if (set->ops->map_queues) {
-               int cpu;
                 /*
                  * transport .map_queues is usually done in the following
                  * way:
@@ -2702,8 +2747,7 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
                  * killing stale mapping since one CPU may not be mapped
                  * to any hw queue.
                  */
-               for_each_possible_cpu(cpu)
-                       set->mq_map[cpu] = 0;
+               blk_mq_clear_mq_map(set);
  
                 return set->ops->map_queues(set);
         } else
@@ -2713,7 +2757,7 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
- * requested depth down, if if it too large. In that case, the set
+ * requested depth down, if it's too large. In that case, the set
   * value will be stored in set->queue_depth.
   */
  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
@@ -2845,10 +2889,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
         return ret;
  }
  
+/*
+ * request_queue and elevator_type pair.
+ * It is just used by __blk_mq_update_nr_hw_queues to cache
+ * the elevator_type associated with a request_queue.
+ */
+struct blk_mq_qe_pair {
+       struct list_head node;
+       struct request_queue *q;
+       struct elevator_type *type;
+};
+
+/*
+ * Cache the elevator_type in qe pair list and switch the
+ * io scheduler to 'none'
+ */
+static bool blk_mq_elv_switch_none(struct list_head *head,
+               struct request_queue *q)
+{
+       struct blk_mq_qe_pair *qe;
+
+       if (!q->elevator)
+               return true;
+
+       qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
+       if (!qe)
+               return false;
+
+       INIT_LIST_HEAD(&qe->node);
+       qe->q = q;
+       qe->type = q->elevator->type;
+       list_add(&qe->node, head);
+
+       mutex_lock(&q->sysfs_lock);
+       /*
+        * After elevator_switch_mq, the previous elevator_queue will be
+        * released by elevator_release. The reference of the io scheduler
+        * module get by elevator_get will also be put. So we need to get
+        * a reference of the io scheduler module here to prevent it to be
+        * removed.
+        */
+       __module_get(qe->type->elevator_owner);
+       elevator_switch_mq(q, NULL);
+       mutex_unlock(&q->sysfs_lock);
+
+       return true;
+}
+
+static void blk_mq_elv_switch_back(struct list_head *head,
+               struct request_queue *q)
+{
+       struct blk_mq_qe_pair *qe;
+       struct elevator_type *t = NULL;
+
+       list_for_each_entry(qe, head, node)
+               if (qe->q == q) {
+                       t = qe->type;
+                       break;
+               }
+
+       if (!t)
+               return;
+
+       list_del(&qe->node);
+       kfree(qe);
+
+       mutex_lock(&q->sysfs_lock);
+       elevator_switch_mq(q, t);
+       mutex_unlock(&q->sysfs_lock);
+}
+
  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
                                                         int nr_hw_queues)
  {
         struct request_queue *q;
+       LIST_HEAD(head);
  
         lockdep_assert_held(&set->tag_list_lock);
  
@@ -2859,6 +2974,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  
         list_for_each_entry(q, &set->tag_list, tag_set_list)
                 blk_mq_freeze_queue(q);
+       /*
+        * Sync with blk_mq_queue_tag_busy_iter.
+        */
+       synchronize_rcu();
+       /*
+        * Switch IO scheduler to 'none', cleaning up the data associated
+        * with the previous scheduler. We will switch back once we are done
+        * updating the new sw to hw queue mappings.
+        */
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               if (!blk_mq_elv_switch_none(&head, q))
+                       goto switch_back;
  
         set->nr_hw_queues = nr_hw_queues;
         blk_mq_update_queue_map(set);
@@ -2867,6 +2994,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
                 blk_mq_queue_reinit(q);
         }
  
+switch_back:
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_elv_switch_back(&head, q);
+
         list_for_each_entry(q, &set->tag_list, tag_set_list)
                 blk_mq_unfreeze_queue(q);
  }