[PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery

From: Ming Lei
Date: Tue Sep 09 2014 - 09:06:51 EST

Next message: Arnd Bergmann: "[PATCH] binfmt_misc: work around gcc-4.9 warning"
Previous message: Ming Lei: "[PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue"
In reply to: Ming Lei: "[PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue"
Next in thread: Christoph Hellwig: "Re: [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This patch supports to run one single lush machinery for
each blk-mq dispatch queue, so that:

- current init_request and exit_request callbacks can
cover flush request too, then the ugly and buggy way of
initializing flush request's pdu can be fixed

- flushing performance gets improved in case of multi hw-queue

In both fio write and randwrite test over virtio-blk(4 hw queues,
backed by nullblk) with sync=1, ioengine=sync, iodepth=64, numjobs=4,
it is observed that througput gets increased by 70% in the VM
over my laptop environment.

The multi virtqueue feature isn't merged to QEMU yet, and patches for
the feature can be found in below tree:

git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.3

And simply passing 'num_queues=4 vectors=5' should be enough to
enable multi queue feature for QEMU virtio-blk.

Suggested-by: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx>
---
block/blk-flush.c | 141 ++++++++++++++++++++++++++++++++++++++----------
block/blk.h | 12 ++++-
include/linux/blk-mq.h | 2 +
3 files changed, 125 insertions(+), 30 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 4a445a1..2fc79bf 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -482,57 +482,143 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
}
EXPORT_SYMBOL(blkdev_issue_flush);

-static int blk_mq_init_flush(struct request_queue *q)
+static int blk_alloc_flush_queue(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
+ struct blk_flush_queue **pfq)
{
- struct blk_mq_tag_set *set = q->tag_set;
- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+ struct blk_flush_queue *fq;
+ int rq_sz = sizeof(struct request);

- spin_lock_init(&fq->mq_flush_lock);
+ if (hctx) {
+ int cmd_sz = q->tag_set->cmd_size;
+ int node = hctx->numa_node;
+
+ fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
+ if (!fq)
+ goto failed;
+
+ rq_sz = round_up(rq_sz + cmd_sz, cache_line_size());
+ fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
+ if (!fq->flush_rq)
+ goto rq_failed;
+
+ spin_lock_init(&fq->mq_flush_lock);
+ } else {
+ fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+ if (!fq)
+ goto failed;
+
+ fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+ if (!fq->flush_rq)
+ goto rq_failed;
+ }
+
+ INIT_LIST_HEAD(&fq->flush_queue[0]);
+ INIT_LIST_HEAD(&fq->flush_queue[1]);
+ INIT_LIST_HEAD(&fq->flush_data_in_flight);

- fq->flush_rq = kzalloc(round_up(sizeof(struct request) +
- set->cmd_size, cache_line_size()),
- GFP_KERNEL);
- if (!fq->flush_rq)
- return -ENOMEM;
+ *pfq = fq;
return 0;
+
+ rq_failed:
+ kfree(fq);
+ failed:
+ return -ENOMEM;
}

-static void blk_mq_exit_flush(struct request_queue *q)
+static void blk_free_flush_queue(struct blk_flush_queue *fq)
{
- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+ if (!fq)
+ return;
kfree(fq->flush_rq);
kfree(fq);
}

-int blk_init_flush(struct request_queue *q)
+static void __blk_mq_exit_flush(struct request_queue *q,
+ unsigned free_end, unsigned int exit_end)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int k;
+ struct blk_flush_queue *fq;
+ struct blk_mq_tag_set *set = q->tag_set;
+ unsigned start_idx = set->queue_depth;
+
+ queue_for_each_hw_ctx(q, hctx, k) {
+ if (k >= free_end)
+ break;
+
+ fq = hctx->fq;
+ if (k < exit_end && set->ops->exit_request)
+ set->ops->exit_request(set->driver_data,
+ fq->flush_rq, k,
+ start_idx + k);
+
+ blk_free_flush_queue(fq);
+ }
+
+}
+
+static int blk_mq_init_flush(struct request_queue *q)
{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int i, j = 0;
+ struct blk_flush_queue *fq;
int ret;
- struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+ struct blk_mq_tag_set *set = q->tag_set;
+ unsigned start_idx = set->queue_depth;

- if (!fq)
- return -ENOMEM;
+ queue_for_each_hw_ctx(q, hctx, i) {
+ ret = blk_alloc_flush_queue(q, hctx, &fq);
+ if (ret)
+ goto fail;
+ hctx->fq = fq;
+ }

- q->fq = fq;
- INIT_LIST_HEAD(&fq->flush_queue[0]);
- INIT_LIST_HEAD(&fq->flush_queue[1]);
- INIT_LIST_HEAD(&fq->flush_data_in_flight);
+ queue_for_each_hw_ctx(q, hctx, j) {
+ fq = hctx->fq;
+ if (set->ops->init_request) {
+ ret = set->ops->init_request(set->driver_data,
+ fq->flush_rq, j, start_idx + j,
+ hctx->numa_node);
+ if (ret)
+ goto fail;
+ }
+ }
+
+ return 0;
+
+ fail:
+ __blk_mq_exit_flush(q, i, j);
+ return ret;
+}
+
+static void blk_mq_exit_flush(struct request_queue *q)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+
+ __blk_mq_exit_flush(q, set->nr_hw_queues, set->nr_hw_queues);
+}
+
+int blk_init_flush(struct request_queue *q)
+{
+ int ret;

if (q->mq_ops) {
ret = blk_mq_init_flush(q);
if (ret)
goto failed;
} else {
- ret = -ENOMEM;
- fq->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
- if (!fq->flush_rq)
+ struct blk_flush_queue *fq;
+
+ ret = blk_alloc_flush_queue(q, NULL, &fq);
+ if (ret)
goto failed;
+ q->fq = fq;
}

return 0;

failed:
- kfree(fq);
- q->fq = NULL;
return ret;
}

@@ -540,9 +626,6 @@ void blk_exit_flush(struct request_queue *q)
{
if (q->mq_ops)
blk_mq_exit_flush(q);
- else {
- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
- kfree(fq->flush_rq);
- kfree(fq);
- }
+ else
+ blk_free_flush_queue(q->fq);
}
diff --git a/block/blk.h b/block/blk.h
index 30f8033..9dcc11c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -2,6 +2,8 @@
#define BLK_INTERNAL_H

#include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include "blk-mq.h"

/* Amount of time in which a process may batch requests */
#define BLK_BATCH_TIME (HZ/50UL)
@@ -31,7 +33,15 @@ extern struct ida blk_queue_ida;
static inline struct blk_flush_queue *blk_get_flush_queue(
struct request_queue *q, struct blk_mq_ctx *ctx)
{
- return q->fq;
+ struct blk_mq_hw_ctx *hctx;
+
+ if (!q->mq_ops)
+ return q->fq;
+ WARN_ON(!ctx);
+
+ hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+ return hctx->fq;
}

static inline void __blk_get_queue(struct request_queue *q)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f2..1f3c523 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -4,6 +4,7 @@
#include <linux/blkdev.h>

struct blk_mq_tags;
+struct blk_flush_queue;

struct blk_mq_cpu_notifier {
struct list_head list;
@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {

struct request_queue *queue;
unsigned int queue_num;
+ struct blk_flush_queue *fq;

void *driver_data;

--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Arnd Bergmann: "[PATCH] binfmt_misc: work around gcc-4.9 warning"
Previous message: Ming Lei: "[PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue"
In reply to: Ming Lei: "[PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue"
Next in thread: Christoph Hellwig: "Re: [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]