[PATCH -next] blk-mq: fix tag_get wait task can't be awakened

From: Laibin Qiu
Date: Mon Sep 13 2021 - 03:58:52 EST


When multiple hctx share one tagset. The wake_batch is calculated
during initialization by queue_depth. But when multiple hctx share one
tagset. The queue depth assigned to each user may be smaller than
wakup_batch. This may cause the waiting queue to fail to wakup and leads
to Hang.

Fix this by recalculating wake_batch when inc or dec active_queues.

Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
Signed-off-by: Laibin Qiu <qiulaibin@xxxxxxxxxx>
---
block/blk-mq-tag.c | 44 +++++++++++++++++++++++++++++++++++++++--
include/linux/sbitmap.h | 8 ++++++++
lib/sbitmap.c | 3 ++-
3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 86f87346232a..d02f5ac0004c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -16,6 +16,27 @@
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"

+static void bt_update_wake_batch(struct sbitmap_queue *bt, unsigned int users)
+{
+ unsigned int depth;
+
+ depth = max((bt->sb.depth + users - 1) / users, 4U);
+ sbitmap_queue_update_wake_batch(bt, depth);
+}
+
+/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct sbitmap_queue *bitmap_tags,
+ struct sbitmap_queue *breserved_tags, unsigned int users)
+{
+ if (!users)
+ return;
+
+ bt_update_wake_batch(bitmap_tags, users);
+ bt_update_wake_batch(breserved_tags, users);
+}
+
/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
@@ -24,17 +45,29 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;

if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
+ !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
atomic_inc(&set->active_queues_shared_sbitmap);
+
+ users = atomic_read(&set->active_queues_shared_sbitmap);
+ blk_mq_update_wake_batch(&set->__bitmap_tags,
+ &set->__breserved_tags, users);
+ }
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+ !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
atomic_inc(&hctx->tags->active_queues);
+
+ users = atomic_read(&hctx->tags->active_queues);
+ blk_mq_update_wake_batch(&hctx->tags->__bitmap_tags,
+ &hctx->tags->__breserved_tags, users);
+ }
}

return true;
@@ -59,16 +92,23 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
+ unsigned int users;

if (blk_mq_is_sbitmap_shared(hctx->flags)) {
if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
&q->queue_flags))
return;
atomic_dec(&set->active_queues_shared_sbitmap);
+ users = atomic_read(&set->active_queues_shared_sbitmap);
+ blk_mq_update_wake_batch(&set->__bitmap_tags,
+ &set->__breserved_tags, users);
} else {
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
atomic_dec(&tags->active_queues);
+ users = atomic_read(&tags->active_queues);
+ blk_mq_update_wake_batch(&tags->__bitmap_tags,
+ &tags->__breserved_tags, users);
}

blk_mq_tag_wakeup_all(tags, false);
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 2713e689ad66..d49e4f054bfe 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -406,6 +406,14 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
sbitmap_free(&sbq->sb);
}

+/**
+ * sbitmap_queue_update_wake_batch() - Recalucate wake batch.
+ * @sbq: Bitmap queue.
+ * @depth: New number of queue depth.
+ */
+void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth);
+
/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index b25db9be938a..bbe1d663763f 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,7 +457,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);

-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int depth)
{
unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
@@ -475,6 +475,7 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
atomic_set(&sbq->ws[i].wait_cnt, 1);
}
}
+EXPORT_SYMBOL_GPL(sbitmap_queue_update_wake_batch);

void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
--
2.22.0