blk-mq: when polling for IO, look for any completion
authorJens Axboe <axboe@kernel.dk>
Mon, 26 Nov 2018 15:21:49 +0000 (08:21 -0700)
committerJens Axboe <axboe@kernel.dk>
Mon, 26 Nov 2018 15:25:40 +0000 (08:25 -0700)
If we want to support async IO polling, then we have to allow finding
completions that aren't just for the one we are looking for. Always pass
in -1 to the mq_ops->poll() helper, and have that return how many events
were found in this poll loop.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-core.c
block/blk-mq.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
include/linux/blkdev.h

index 04f5be4736385f6a4bed4c13adbba01c5656f95f..03c4202b69bf0057be334445d2e6ca1226abb0a4 100644 (file)
@@ -1273,10 +1273,19 @@ blk_qc_t submit_bio(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+/**
+ * blk_poll - poll for IO completions
+ * @q:  the queue
+ * @cookie: cookie passed back at IO submission time
+ *
+ * Description:
+ *    Poll for completions on the passed in queue. Returns number of
+ *    completed entries found.
+ */
+int blk_poll(struct request_queue *q, blk_qc_t cookie)
 {
        if (!q->poll_fn || !blk_qc_t_valid(cookie))
-               return false;
+               return 0;
 
        if (current->plug)
                blk_flush_plug_list(current->plug, false);
index b16204df65d1b693ecb3f1b053263d2e4afec6c6..ec6c79578332344190eefe307ccc6f6757230e34 100644 (file)
@@ -3285,15 +3285,12 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
                return false;
 
        /*
-        * poll_nsec can be:
+        * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
         *
-        * -1:  don't ever hybrid sleep
         *  0:  use half of prev avg
         * >0:  use this specific value
         */
-       if (q->poll_nsec == -1)
-               return false;
-       else if (q->poll_nsec > 0)
+       if (q->poll_nsec > 0)
                nsecs = q->poll_nsec;
        else
                nsecs = blk_mq_poll_nsecs(q, hctx, rq);
@@ -3330,11 +3327,41 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
        return true;
 }
 
-static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static bool blk_mq_poll_hybrid(struct request_queue *q,
+                              struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
 {
-       struct request_queue *q = hctx->queue;
+       struct request *rq;
+
+       if (q->poll_nsec == -1)
+               return false;
+
+       if (!blk_qc_t_is_internal(cookie))
+               rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
+       else {
+               rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
+               /*
+                * With scheduling, if the request has completed, we'll
+                * get a NULL return here, as we clear the sched tag when
+                * that happens. The request still remains valid, like always,
+                * so we should be safe with just the NULL check.
+                */
+               if (!rq)
+                       return false;
+       }
+
+       return blk_mq_poll_hybrid_sleep(q, hctx, rq);
+}
+
+static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
+{
+       struct blk_mq_hw_ctx *hctx;
        long state;
 
+       if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+               return 0;
+
+       hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
+
        /*
         * If we sleep, have the caller restart the poll loop to reset
         * the state. Like for the other success return cases, the
@@ -3342,7 +3369,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
         * the IO isn't complete, we'll get called again and will go
         * straight to the busy poll loop.
         */
-       if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
+       if (blk_mq_poll_hybrid(q, hctx, cookie))
                return 1;
 
        hctx->poll_considered++;
@@ -3353,7 +3380,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 
                hctx->poll_invoked++;
 
-               ret = q->mq_ops->poll(hctx, rq->tag);
+               ret = q->mq_ops->poll(hctx, -1U);
                if (ret > 0) {
                        hctx->poll_success++;
                        __set_current_state(TASK_RUNNING);
@@ -3374,32 +3401,6 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
        return 0;
 }
 
-static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
-{
-       struct blk_mq_hw_ctx *hctx;
-       struct request *rq;
-
-       if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-               return 0;
-
-       hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
-       if (!blk_qc_t_is_internal(cookie))
-               rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
-       else {
-               rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
-               /*
-                * With scheduling, if the request has completed, we'll
-                * get a NULL return here, as we clear the sched tag when
-                * that happens. The request still remains valid, like always,
-                * so we should be safe with just the NULL check.
-                */
-               if (!rq)
-                       return 0;
-       }
-
-       return __blk_mq_poll(hctx, rq);
-}
-
 unsigned int blk_mq_rq_cpu(struct request *rq)
 {
        return rq->mq_ctx->cpu;
index 57e790391b82add712aa7f35eb6ef7e06076562c..de50d80ecc847cd442f4429797fe68d499d74de7 100644 (file)
@@ -1012,15 +1012,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
        }
 }
 
-static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
-               u16 *end, int tag)
+static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+                                 u16 *end, unsigned int tag)
 {
-       bool found = false;
+       int found = 0;
 
        *start = nvmeq->cq_head;
-       while (!found && nvme_cqe_pending(nvmeq)) {
-               if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
-                       found = true;
+       while (nvme_cqe_pending(nvmeq)) {
+               if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+                       found++;
                nvme_update_cq_head(nvmeq);
        }
        *end = nvmeq->cq_head;
@@ -1062,7 +1062,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
        u16 start, end;
-       bool found;
+       int found;
 
        if (!nvme_cqe_pending(nvmeq))
                return 0;
index d181cafedc584916d0b04db2e08dd9e0802cba0c..c2c3e1a5b7affe09fa77ffc87e4961be73720809 100644 (file)
@@ -1409,12 +1409,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
        WARN_ON_ONCE(ret);
 }
 
-static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
-               struct nvme_completion *cqe, struct ib_wc *wc, int tag)
+static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
+               struct nvme_completion *cqe, struct ib_wc *wc)
 {
        struct request *rq;
        struct nvme_rdma_request *req;
-       int ret = 0;
 
        rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
        if (!rq) {
@@ -1422,7 +1421,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                        "tag 0x%x on QP %#x not found\n",
                        cqe->command_id, queue->qp->qp_num);
                nvme_rdma_error_recovery(queue->ctrl);
-               return ret;
+               return;
        }
        req = blk_mq_rq_to_pdu(rq);
 
@@ -1437,6 +1436,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                        nvme_rdma_error_recovery(queue->ctrl);
                }
        } else if (req->mr) {
+               int ret;
+
                ret = nvme_rdma_inv_rkey(queue, req);
                if (unlikely(ret < 0)) {
                        dev_err(queue->ctrl->ctrl.device,
@@ -1445,19 +1446,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                        nvme_rdma_error_recovery(queue->ctrl);
                }
                /* the local invalidation completion will end the request */
-               return 0;
+               return;
        }
 
-       if (refcount_dec_and_test(&req->ref)) {
-               if (rq->tag == tag)
-                       ret = 1;
+       if (refcount_dec_and_test(&req->ref))
                nvme_end_request(rq, req->status, req->result);
-       }
-
-       return ret;
 }
 
-static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
+static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
        struct nvme_rdma_qe *qe =
                container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
@@ -1465,11 +1461,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
        struct ib_device *ibdev = queue->device->dev;
        struct nvme_completion *cqe = qe->data;
        const size_t len = sizeof(struct nvme_completion);
-       int ret = 0;
 
        if (unlikely(wc->status != IB_WC_SUCCESS)) {
                nvme_rdma_wr_error(cq, wc, "RECV");
-               return 0;
+               return;
        }
 
        ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
@@ -1484,16 +1479,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
                nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                &cqe->result);
        else
-               ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag);
+               nvme_rdma_process_nvme_rsp(queue, cqe, wc);
        ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
 
        nvme_rdma_post_recv(queue, qe);
-       return ret;
-}
-
-static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
-{
-       __nvme_rdma_recv_done(cq, wc, -1);
 }
 
 static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
@@ -1758,10 +1747,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
                struct ib_cqe *cqe = wc.wr_cqe;
 
                if (cqe) {
-                       if (cqe->done == nvme_rdma_recv_done)
-                               found |= __nvme_rdma_recv_done(cq, &wc, tag);
-                       else
+                       if (cqe->done == nvme_rdma_recv_done) {
+                               nvme_rdma_recv_done(cq, &wc);
+                               found++;
+                       } else {
                                cqe->done(cq, &wc);
+                       }
                }
        }
 
index 9b53db06ad0824c776dbd28649f5f89f956e1639..f3015e9b5ae37b7406e9d97e4ffcbbcbab391978 100644 (file)
@@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 int blk_status_to_errno(blk_status_t status);
 blk_status_t errno_to_blk_status(int errno);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+int blk_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {