[PATCH stable] block/mq-deadline: fix different priority request on the same zone

From: Wu Bo
Date: Thu May 16 2024 - 05:15:30 EST


Zoned devices request sequential writing on the same zone. That means
if 2 requests on the saem zone, the lower pos request need to dispatch
to device first.
While different priority has it's own tree & list, request with high
priority will be disptch first.
So if requestA & requestB are on the same zone. RequestA is BE and pos
is X+0. ReqeustB is RT and pos is X+1. RequestB will be disptched before
requestA, which got an ERROR from zoned device.

This is found in a practice scenario when using F2FS on zoned device.
And it is very easy to reproduce:
1. Use fsstress to run 8 test processes
2. Use ionice to change 4/8 processes to RT priority

Fixes: c807ab520fc3 ("block/mq-deadline: Add I/O priority support")
Cc: <stable@xxxxxxxxxxxxxxx>
Signed-off-by: Wu Bo <bo.wu@xxxxxxxx>
---
block/mq-deadline.c | 31 +++++++++++++++++++++++++++++++
include/linux/blk-mq.h | 15 +++++++++++++++
2 files changed, 46 insertions(+)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 02a916ba62ee..6a05dd86e8ca 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -539,6 +539,37 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
if (started_after(dd, rq, latest_start))
return NULL;

+ if (!blk_rq_is_seq_zoned_write(rq))
+ goto skip_check;
+ /*
+ * To ensure sequential writing, check the lower priority class to see
+ * if there is a request on the same zone and need to be dispatched
+ * first
+ */
+ ioprio_class = dd_rq_ioclass(rq);
+ prio = ioprio_class_to_prio[ioprio_class];
+ prio++;
+ for (; prio <= DD_PRIO_MAX; prio++) {
+ struct request *temp_rq;
+ unsigned long flags;
+ bool can_dispatch;
+
+ if (!dd_queued(dd, prio))
+ continue;
+
+ temp_rq = deadline_from_pos(&dd->per_prio[prio], data_dir, blk_rq_pos(rq));
+ if (temp_rq && blk_req_zone_in_one(temp_rq, rq) &&
+ blk_rq_pos(temp_rq) < blk_rq_pos(rq)) {
+ spin_lock_irqsave(&dd->zone_lock, flags);
+ can_dispatch = blk_req_can_dispatch_to_zone(temp_rq);
+ spin_unlock_irqrestore(&dd->zone_lock, flags);
+ if (!can_dispatch)
+ return NULL;
+ rq = temp_rq;
+ per_prio = &dd->per_prio[prio];
+ }
+ }
+skip_check:
/*
* rq is the selected appropriate request.
*/
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d3d8fd8e229b..bca1e639e0f3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1202,6 +1202,15 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
return true;
return !blk_req_zone_is_write_locked(rq);
}
+
+static inline bool blk_req_zone_in_one(struct request *rq_a,
+ struct request *rq_b)
+{
+ unsigned int zone_sectors = rq_a->q->limits.chunk_sectors;
+
+ return round_down(blk_rq_pos(rq_a), zone_sectors) ==
+ round_down(blk_rq_pos(rq_b), zone_sectors);
+}
#else /* CONFIG_BLK_DEV_ZONED */
static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
{
@@ -1229,6 +1238,12 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
{
return true;
}
+
+static inline bool blk_req_zone_in_one(struct request *rq_a,
+ struct request *rq_b)
+{
+ return false;
+}
#endif /* CONFIG_BLK_DEV_ZONED */

#endif /* BLK_MQ_H */
--
2.35.3