Re: [PATCHSET v2] blk-mq: reimplement timeout handling
From: Jens Axboe
Date: Tue Dec 12 2017 - 15:23:12 EST
On 12/12/2017 12:01 PM, Tejun Heo wrote:
> Changes from the last version[1]
>
> - BLK_EH_RESET_TIMER handling fixed.
>
> - s/request->gstate_seqc/request->gstate_seq/
>
> - READ_ONCE() added to blk_mq_rq_udpate_state().
>
> - Removed left over blk_clear_rq_complete() invocation from
> blk_mq_rq_timed_out().
>
> Currently, blk-mq timeout path synchronizes against the usual
> issue/completion path using a complex scheme involving atomic
> bitflags, REQ_ATOM_*, memory barriers and subtle memory coherence
> rules. Unfortunatley, it contains quite a few holes.
>
> It's pretty easy to make blk_mq_check_expired() terminate a later
> instance of a request. If we induce 5 sec delay before
> time_after_eq() test in blk_mq_check_expired(), shorten the timeout to
> 2s, and issue back-to-back large IOs, blk-mq starts timing out
> requests spuriously pretty quickly. Nothing actually timed out. It
> just made the call on a recycle instance of a request and then
> terminated a later instance long after the original instance finished.
> The scenario isn't theoretical either.
>
> This patchset replaces the broken synchronization mechanism with a RCU
> and generation number based one. Please read the patch description of
> the second path for more details.
I like this a lot, it's a lot less fragile and more intuitive/readable
than what we have now. And apparently less error prone... I'll do
some testing with this.
BTW, since youadd a few more BLK_MQ_F_BLOCKING checks, I think something
like the below would be a good cleanup on top of this.
From: Jens Axboe <axboe@xxxxxxxxx>
Subject: [PATCH] blk-mq: move hctx lock/unlock into a helper
Move the RCU vs SRCU logic into lock/unlock helpers, which makes
the actual functional bits within the locked region much easier
to read.
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 663069dca4d6..dec3d1bb0559 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -572,6 +572,22 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
return aborted_gstate;
}
+static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
+{
+ if (!(hctx->flags & BLK_MQ_F_BLOCKING))
+ rcu_read_unlock();
+ else
+ srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
+}
+
+static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
+{
+ if (!(hctx->flags & BLK_MQ_F_BLOCKING))
+ rcu_read_lock();
+ else
+ *srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
+}
+
/**
* blk_mq_complete_request - end I/O on a request
* @rq: the request being processed
@@ -594,17 +610,10 @@ void blk_mq_complete_request(struct request *rq)
* claiming @rq and we lost. This is synchronized through RCU.
* See blk_mq_timeout_work() for details.
*/
- if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
- rcu_read_lock();
- if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
- __blk_mq_complete_request(rq);
- rcu_read_unlock();
- } else {
- srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
- if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
- __blk_mq_complete_request(rq);
- srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
- }
+ hctx_lock(hctx, &srcu_idx);
+ if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
+ __blk_mq_complete_request(rq);
+ hctx_unlock(hctx, srcu_idx);
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -1243,17 +1252,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
*/
WARN_ON_ONCE(in_interrupt());
- if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
- rcu_read_lock();
- blk_mq_sched_dispatch_requests(hctx);
- rcu_read_unlock();
- } else {
- might_sleep();
+ might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
- blk_mq_sched_dispatch_requests(hctx);
- srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
- }
+ hctx_lock(hctx, &srcu_idx);
+ blk_mq_sched_dispatch_requests(hctx);
+ hctx_unlock(hctx, srcu_idx);
}
/*
@@ -1624,7 +1627,7 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
- blk_qc_t *cookie, bool may_sleep)
+ blk_qc_t *cookie)
{
struct request_queue *q = rq->q;
struct blk_mq_queue_data bd = {
@@ -1674,25 +1677,20 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
}
insert:
- blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
+ blk_mq_sched_insert_request(rq, false, run_queue, false,
+ hctx->flags & BLK_MQ_F_BLOCKING);
}
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq, blk_qc_t *cookie)
{
- if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
- rcu_read_lock();
- __blk_mq_try_issue_directly(hctx, rq, cookie, false);
- rcu_read_unlock();
- } else {
- unsigned int srcu_idx;
+ int srcu_idx;
- might_sleep();
+ might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
- __blk_mq_try_issue_directly(hctx, rq, cookie, true);
- srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
- }
+ hctx_lock(hctx, &srcu_idx);
+ __blk_mq_try_issue_directly(hctx, rq, cookie);
+ hctx_unlock(hctx, srcu_idx);
}
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
--
Jens Axboe