sbitmap: optimize wakeup check
authorJens Axboe <axboe@kernel.dk>
Fri, 30 Nov 2018 00:36:41 +0000 (17:36 -0700)
committerJens Axboe <axboe@kernel.dk>
Fri, 30 Nov 2018 21:48:04 +0000 (14:48 -0700)
Even if we have no waiters on any of the sbitmap_queue wait states, we
still have to loop every entry to check. We do this for every IO, so
the cost adds up.

Shift a bit of the cost to the slow path, when we actually have waiters.
Wrap prepare_to_wait_exclusive() and finish_wait(), so we can maintain
an internal count of how many are currently active. Then we can simply
check this count in sbq_wake_ptr() and not have to loop if we don't
have any sleepers.

Convert the two users of sbitmap with waiting, blk-mq-tag and iSCSI.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq-tag.c
drivers/target/iscsi/iscsi_target_util.c
include/linux/sbitmap.h
lib/sbitmap.c

index 87bc5df72d48a3a66ed2141f5240c3083c336037..2089c6c62f44e4e1ffcccdfb542ed17e92f3a7a2 100644 (file)
@@ -110,7 +110,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
        struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
        struct sbitmap_queue *bt;
        struct sbq_wait_state *ws;
-       DEFINE_WAIT(wait);
+       DEFINE_SBQ_WAIT(wait);
        unsigned int tag_offset;
        bool drop_ctx;
        int tag;
@@ -154,8 +154,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
                if (tag != -1)
                        break;
 
-               prepare_to_wait_exclusive(&ws->wait, &wait,
-                                               TASK_UNINTERRUPTIBLE);
+               sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
 
                tag = __blk_mq_get_tag(data, bt);
                if (tag != -1)
@@ -167,6 +166,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
                bt_prev = bt;
                io_schedule();
 
+               sbitmap_finish_wait(bt, ws, &wait);
+
                data->ctx = blk_mq_get_ctx(data->q);
                data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
                                                data->ctx->cpu);
@@ -176,8 +177,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
                else
                        bt = &tags->bitmap_tags;
 
-               finish_wait(&ws->wait, &wait);
-
                /*
                 * If destination hw queue is changed, fake wake up on
                 * previous queue for compensating the wake up miss, so
@@ -192,7 +191,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
        if (drop_ctx && data->ctx)
                blk_mq_put_ctx(data->ctx);
 
-       finish_wait(&ws->wait, &wait);
+       sbitmap_finish_wait(bt, ws, &wait);
 
 found_tag:
        return tag + tag_offset;
index 36b742932c724c8e6f80ca29cb36d0d20208a9a7..86987da86dd6d83ef9d1b00eb65733106bf7a1e2 100644 (file)
@@ -150,24 +150,26 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
 static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup)
 {
        int tag = -1;
-       DEFINE_WAIT(wait);
+       DEFINE_SBQ_WAIT(wait);
        struct sbq_wait_state *ws;
+       struct sbitmap_queue *sbq;
 
        if (state == TASK_RUNNING)
                return tag;
 
-       ws = &se_sess->sess_tag_pool.ws[0];
+       sbq = &se_sess->sess_tag_pool;
+       ws = &sbq->ws[0];
        for (;;) {
-               prepare_to_wait_exclusive(&ws->wait, &wait, state);
+               sbitmap_prepare_to_wait(sbq, ws, &wait, state);
                if (signal_pending_state(state, current))
                        break;
-               tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup);
+               tag = sbitmap_queue_get(sbq, cpup);
                if (tag >= 0)
                        break;
                schedule();
        }
 
-       finish_wait(&ws->wait, &wait);
+       sbitmap_finish_wait(sbq, ws, &wait);
        return tag;
 }
 
index 81359d45751e6e23c689cbc9e465f4723988dd77..92806a2dbab730c517adb9b3d141a80166956023 100644 (file)
@@ -135,6 +135,11 @@ struct sbitmap_queue {
         */
        struct sbq_wait_state *ws;
 
+       /*
+        * @ws_active: count of currently active ws waitqueues
+        */
+       atomic_t ws_active;
+
        /**
         * @round_robin: Allocate bits in strict round-robin order.
         */
@@ -552,4 +557,33 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
  */
 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
 
+struct sbq_wait {
+       int accounted;
+       struct wait_queue_entry wait;
+};
+
+#define DEFINE_SBQ_WAIT(name)                                                  \
+       struct sbq_wait name = {                                                \
+               .accounted = 0,                                                 \
+               .wait = {                                                       \
+                       .private        = current,                              \
+                       .func           = autoremove_wake_function,             \
+                       .entry          = LIST_HEAD_INIT((name).wait.entry),    \
+               }                                                               \
+       }
+
+/*
+ * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
+ * internal state.
+ */
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+                               struct sbq_wait_state *ws,
+                               struct sbq_wait *sbq_wait, int state);
+
+/*
+ * Must be paired with sbitmap_prepare_to_wait().
+ */
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+                               struct sbq_wait *sbq_wait);
+
 #endif /* __LINUX_SCALE_BITMAP_H */
index f99382e593148fcbc118c71e91c548663d59be0a..a89fbe7cf6ca2c6443222ad4e849e5ce809f76c7 100644 (file)
@@ -394,6 +394,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
        sbq->min_shallow_depth = UINT_MAX;
        sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
        atomic_set(&sbq->wake_index, 0);
+       atomic_set(&sbq->ws_active, 0);
 
        sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
        if (!sbq->ws) {
@@ -509,6 +510,9 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 {
        int i, wake_index;
 
+       if (!atomic_read(&sbq->ws_active))
+               return NULL;
+
        wake_index = atomic_read(&sbq->wake_index);
        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
                struct sbq_wait_state *ws = &sbq->ws[wake_index];
@@ -634,6 +638,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
 
        seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
        seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
+       seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active));
 
        seq_puts(m, "ws={\n");
        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
@@ -649,3 +654,26 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
        seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_show);
+
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+                            struct sbq_wait_state *ws,
+                            struct sbq_wait *sbq_wait, int state)
+{
+       if (!sbq_wait->accounted) {
+               atomic_inc(&sbq->ws_active);
+               sbq_wait->accounted = 1;
+       }
+       prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
+}
+EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
+
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+                        struct sbq_wait *sbq_wait)
+{
+       finish_wait(&ws->wait, &sbq_wait->wait);
+       if (sbq_wait->accounted) {
+               atomic_dec(&sbq->ws_active);
+               sbq_wait->accounted = 0;
+       }
+}
+EXPORT_SYMBOL_GPL(sbitmap_finish_wait);