Re: [PATCH 3/3] blk-mq: Fix the queue freezing mechanism

From: Ming Lei
Date: Wed Sep 23 2015 - 23:23:06 EST


On Wed, 23 Sep 2015 15:14:10 -0700
Bart Van Assche <bart.vanassche@xxxxxxxxxxx> wrote:

> Ensure that blk_mq_queue_enter() waits if mq_freeze_depth is not
> zero. Ensure that the update of mq_freeze_depth by blk_mq_freeze_queue()
> is visible by all CPU cores before that function waits on
> mq_usage_counter.
>
> It is unfortunate that this patch introduces an smp_mb() in the
> hot path (blk_mq_queue_enter()) but I have not yet found a way to
> avoid this.
>
> I came across this code while analyzing a lockup triggered by
> deleting a SCSI host created by the SRP initiator immediately
> followed by a relogin.
>
> Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
> Cc: Christoph Hellwig <hch@xxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx>
> ---
> block/blk-mq.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 2077f0d..e3ad411 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -83,8 +83,13 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
> while (true) {
> int ret;
>
> - if (percpu_ref_tryget_live(&q->mq_usage_counter))
> - return 0;
> + if (percpu_ref_tryget_live(&q->mq_usage_counter)) {
> + /* Order mq_use_counter and mq_freeze_depth accesses */
> + smp_mb();
> + if (!atomic_read(&q->mq_freeze_depth))
> + return 0;
> + percpu_ref_put(&q->mq_usage_counter);
> + }

IMO, mq_freeze_depth should only be accessed in slow path, and looks
the race just happens during the small window between increasing
'mq_freeze_depth' and killing the percpu counter.

One solution I thought of is the following patch, which depends on
Akinobu's patch (blk-mq: fix freeze queue race
http://marc.info/?l=linux-kernel&m=143723697010781&w=2).

---
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f774f67..1c71c04 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -77,6 +77,17 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
}

+static inline int blk_mq_read_freeze_depth(struct request_queue *q)
+{
+ int depth;
+
+ mutex_lock(&q->mq_freeze_lock);
+ depth = q->mq_freeze_depth;
+ mutex_unlock(&q->mq_freeze_lock);
+
+ return depth;
+}
+
static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
{
while (true) {
@@ -89,7 +100,7 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
return -EBUSY;

ret = wait_event_interruptible(q->mq_freeze_wq,
- !atomic_read(&q->mq_freeze_depth) ||
+ !blk_mq_read_freeze_depth(q) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
@@ -113,12 +124,9 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)

void blk_mq_freeze_queue_start(struct request_queue *q)
{
- int freeze_depth;
-
mutex_lock(&q->mq_freeze_lock);

- freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
- if (freeze_depth == 1) {
+ if (!q->mq_freeze_depth++) {
percpu_ref_kill(&q->mq_usage_counter);
blk_mq_run_hw_queues(q, false);
}
@@ -149,7 +157,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)

mutex_lock(&q->mq_freeze_lock);

- freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
+ freeze_depth = --q->mq_freeze_depth;
WARN_ON_ONCE(freeze_depth < 0);
if (!freeze_depth) {
percpu_ref_reinit(&q->mq_usage_counter);
@@ -2084,7 +2092,7 @@ void blk_mq_free_queue(struct request_queue *q)
/* Basically redo blk_mq_init_queue with queue frozen */
static void blk_mq_queue_reinit(struct request_queue *q)
{
- WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
+ WARN_ON_ONCE(!ACCESS_ONCE(q->mq_freeze_depth));

blk_mq_sysfs_unregister(q);

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6cdf2b7..86fedcc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -436,7 +436,7 @@ struct request_queue {
struct mutex sysfs_lock;

int bypass_depth;
- atomic_t mq_freeze_depth;
+ int mq_freeze_depth;

#if defined(CONFIG_BLK_DEV_BSG)
bsg_job_fn *bsg_job_fn;



>
> if (!(gfp & __GFP_WAIT))
> return -EBUSY;
> @@ -136,6 +141,11 @@ static void blk_mq_freeze_queue_wait(struct request_queue *q)
> void blk_mq_freeze_queue(struct request_queue *q)
> {
> blk_mq_freeze_queue_start(q);
> + /*
> + * Ensure that the mq_freeze_depth update is visiable before
> + * mq_use_counter is read.
> + */
> + smp_mb();
> blk_mq_freeze_queue_wait(q);
> }
> EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/