[PATCH 3/3] blk-mq: optimise blk_mq_flush_plug_list()

From: Pavel Begunkov
Date: Thu Nov 28 2019 - 16:12:32 EST


Instead of using list_del_init() in a loop, that generates a lot of
unnecessary memory read/writes, iterate from the first request of a
batch and cut out a sublist with list_cut_before().

Apart from removing the list node initialisation part, this is more
register-friendly, and the assembly uses the stack less intensively.

list_empty() at the beginning is done with hope, that the compiler can
optimise out the same check in the following list_splice_init().

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
block/blk-mq.c | 57 +++++++++++++++++---------------------------------
1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f32a3cfdd34e..3c71d52b6401 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1678,14 +1678,10 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)

void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
- struct blk_mq_hw_ctx *this_hctx;
- struct blk_mq_ctx *this_ctx;
- struct request_queue *this_q;
- struct request *rq;
LIST_HEAD(list);
- LIST_HEAD(rq_list);
- unsigned int depth;

+ if (list_empty(&plug->mq_list))
+ return;
list_splice_init(&plug->mq_list, &list);

if (plug->rq_count > 2 && plug->multiple_queues)
@@ -1693,42 +1689,27 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)

plug->rq_count = 0;

- this_q = NULL;
- this_hctx = NULL;
- this_ctx = NULL;
- depth = 0;
-
- while (!list_empty(&list)) {
- rq = list_entry_rq(list.next);
- list_del_init(&rq->queuelist);
- BUG_ON(!rq->q);
- if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) {
- if (this_hctx) {
- trace_block_unplug(this_q, depth, !from_schedule);
- blk_mq_sched_insert_requests(this_hctx, this_ctx,
- &rq_list,
- from_schedule);
- }
-
- this_q = rq->q;
- this_ctx = rq->mq_ctx;
- this_hctx = rq->mq_hctx;
- depth = 0;
+ do {
+ struct list_head rq_list;
+ struct request *rq, *head_rq = list_entry_rq(list.next);
+ struct list_head *pos = &head_rq->queuelist; /* skip first */
+ struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
+ struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
+ unsigned int depth = 1;
+
+ list_for_each_continue(pos, &list) {
+ rq = list_entry_rq(pos);
+ BUG_ON(!rq->q);
+ if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
+ break;
+ depth++;
}

- depth++;
- list_add_tail(&rq->queuelist, &rq_list);
- }
-
- /*
- * If 'this_hctx' is set, we know we have entries to complete
- * on 'rq_list'. Do those.
- */
- if (this_hctx) {
- trace_block_unplug(this_q, depth, !from_schedule);
+ list_cut_before(&rq_list, &list, pos);
+ trace_block_unplug(head_rq->q, depth, !from_schedule);
blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
from_schedule);
- }
+ } while(!list_empty(&list));
}

static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
--
2.24.0