blk-mq: free hw queue's resource in hctx's release handler
authorMing Lei <ming.lei@redhat.com>
Tue, 30 Apr 2019 01:52:25 +0000 (09:52 +0800)
committerJens Axboe <axboe@kernel.dk>
Sat, 4 May 2019 13:24:05 +0000 (07:24 -0600)
Once blk_cleanup_queue() returns, tags shouldn't be used any more,
because blk_mq_free_tag_set() may be called. Commit 45a9c9d909b2
("blk-mq: Fix a use-after-free") fixes this issue exactly.

However, that commit introduces another issue. Before 45a9c9d909b2,
we are allowed to run queue during cleaning up queue if the queue's
kobj refcount is held. After that commit, queue can't be run during
queue cleaning up, otherwise oops can be triggered easily because
some fields of hctx are freed by blk_mq_free_queue() in blk_cleanup_queue().

We have invented ways for addressing this kind of issue before, such as:

8dc765d438f1 ("SCSI: fix queue cleanup race before queue initialization is done")
c2856ae2f315 ("blk-mq: quiesce queue before freeing queue")

But still can't cover all cases, recently James reports another such
kind of issue:

https://marc.info/?l=linux-scsi&m=155389088124782&w=2

This issue can be quite hard to address by previous way, given
scsi_run_queue() may run requeues for other LUNs.

Fixes the above issue by freeing hctx's resources in its release handler, and this
way is safe becasue tags isn't needed for freeing such hctx resource.

This approach follows typical design pattern wrt. kobject's release handler.

Cc: Dongli Zhang <dongli.zhang@oracle.com>
Cc: James Smart <james.smart@broadcom.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: linux-scsi@vger.kernel.org,
Cc: Martin K . Petersen <martin.petersen@oracle.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: James E . J . Bottomley <jejb@linux.vnet.ibm.com>,
Reported-by: James Smart <james.smart@broadcom.com>
Fixes: 45a9c9d909b2 ("blk-mq: Fix a use-after-free")
Cc: stable@vger.kernel.org
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-core.c
block/blk-mq-sysfs.c
block/blk-mq.c
block/blk-mq.h

index 2af1040b2fa61a2acc174c9ba515003d307774a5..81d209568a267975717bfd6ff1ef9a730f874693 100644 (file)
@@ -375,7 +375,7 @@ void blk_cleanup_queue(struct request_queue *q)
        blk_exit_queue(q);
 
        if (queue_is_mq(q))
-               blk_mq_free_queue(q);
+               blk_mq_exit_queue(q);
 
        percpu_ref_exit(&q->q_usage_counter);
 
index 61efc2a29e5880bdc9319620999f2e74e29bc2b4..7593c4c7897526268c3a1f11e149c320681770da 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/smp.h>
 
 #include <linux/blk-mq.h>
+#include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
@@ -34,6 +35,11 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
 {
        struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
                                                  kobj);
+
+       if (hctx->flags & BLK_MQ_F_BLOCKING)
+               cleanup_srcu_struct(hctx->srcu);
+       blk_free_flush_queue(hctx->fq);
+       sbitmap_free(&hctx->ctx_map);
        free_cpumask_var(hctx->cpumask);
        kfree(hctx->ctxs);
        kfree(hctx);
index 741cf8d55e9c7f69b21ef296a795996826dbf444..1fdb8de92a10d2ef1f5c14eacdfcd13e03120ede 100644 (file)
@@ -2268,12 +2268,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
 
-       if (hctx->flags & BLK_MQ_F_BLOCKING)
-               cleanup_srcu_struct(hctx->srcu);
-
        blk_mq_remove_cpuhp(hctx);
-       blk_free_flush_queue(hctx->fq);
-       sbitmap_free(&hctx->ctx_map);
 }
 
 static void blk_mq_exit_hw_queues(struct request_queue *q,
@@ -2908,7 +2903,8 @@ err_exit:
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 
-void blk_mq_free_queue(struct request_queue *q)
+/* tags can _not_ be used after returning from blk_mq_exit_queue */
+void blk_mq_exit_queue(struct request_queue *q)
 {
        struct blk_mq_tag_set   *set = q->tag_set;
 
index 423ea88ab6fbaac08b4fe1367e6a778fdb70641a..633a5a77ee8be25872f7a21e6d1120ea5e4b67df 100644 (file)
@@ -37,7 +37,7 @@ struct blk_mq_ctx {
        struct kobject          kobj;
 } ____cacheline_aligned_in_smp;
 
-void blk_mq_free_queue(struct request_queue *q);
+void blk_mq_exit_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);