On Wed, Jul 14, 2021 at 11:06:26PM +0800, John Garry wrote:
Currently a full set of static requests are allocated per hw queue per
tagset when shared sbitmap is used.
However, only tagset->queue_depth number of requests may be active at
any given time. As such, only tagset->queue_depth number of static
requests are required.
The same goes for using an IO scheduler, which allocates a full set of
static requests per hw queue per request queue.
This series very significantly reduces memory usage in both scenarios by
allocating static rqs per tagset and per request queue, respectively,
rather than per hw queue per tagset and per request queue.
For megaraid sas driver on my 128-CPU arm64 system with 1x SATA disk, we
save approx. 300MB(!) [370MB -> 60MB]
A couple of patches are marked as RFC, as maybe there is a better
implementation approach.
There is another candidate for addressing this issue, and looks simpler:
block/blk-mq-sched.c | 4 ++++
block/blk-mq-tag.c | 4 ++++
block/blk-mq-tag.h | 3 +++
block/blk-mq.c | 18 ++++++++++++++++++
block/blk-mq.h | 11 +++++++++++
5 files changed, 40 insertions(+)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c838d81ac058..b9236ee0fe4e 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -538,6 +538,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
if (!hctx->sched_tags)
return -ENOMEM;
+ blk_mq_set_master_tags(hctx->sched_tags,
+ q->queue_hw_ctx[0]->sched_tags, hctx->flags,
+ hctx_idx);
+
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
if (ret)
blk_mq_sched_free_tags(set, hctx, hctx_idx);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 86f87346232a..c471a073234d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -608,6 +608,10 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
tags->nr_reserved_tags, set->flags);
if (!new)
return -ENOMEM;
+
+ blk_mq_set_master_tags(new,
+ hctx->queue->queue_hw_ctx[0]->sched_tags, set->flags,
+ hctx->queue_num);
ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
if (ret) {
blk_mq_free_rq_map(new, set->flags);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 8ed55af08427..0a3fbbc61e06 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -21,6 +21,9 @@ struct blk_mq_tags {
struct request **static_rqs;
struct list_head page_list;
+ /* only used for blk_mq_is_sbitmap_shared() */
+ struct blk_mq_tags *master;
+
/*
* used to clear request reference in rqs[] before freeing one
* request pool
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2c4ac51e54eb..ef8a6a7e5f7c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2348,6 +2348,15 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
{
struct page *page;
+ if (blk_mq_is_sbitmap_shared(set->flags)) {
+ if (tags->master)
+ tags = tags->master;
+ if (hctx_idx < set->nr_hw_queues - 1) {
+ blk_mq_clear_rq_mapping(set, tags, hctx_idx);
+ return;
+ }
+ }
+
if (tags->rqs && set->ops->exit_request) {
int i;
@@ -2444,6 +2453,12 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
size_t rq_size, left;
int node;
+ if (blk_mq_is_sbitmap_shared(set->flags) && tags->master) {
+ memcpy(tags->static_rqs, tags->master->static_rqs,
+ sizeof(tags->static_rqs[0]) * tags->nr_tags);
+ return 0;
+ }
+
node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -2860,6 +2875,9 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
if (!set->tags[hctx_idx])
return false;
+ blk_mq_set_master_tags(set->tags[hctx_idx], set->tags[0], flags,
+ hctx_idx);
+
ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
set->queue_depth);
if (!ret)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d08779f77a26..a08b89be6acc 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -354,5 +354,16 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return __blk_mq_active_requests(hctx) < depth;
}
+static inline void blk_mq_set_master_tags(struct blk_mq_tags *tags,
+ struct blk_mq_tags *master_tags, unsigned int flags,
+ unsigned int hctx_idx)
+{
+ if (blk_mq_is_sbitmap_shared(flags)) {
+ if (hctx_idx)
+ tags->master = master_tags;
+ else
+ tags->master = NULL;
+ }
+}
#endif