[PATCH] block: add max_dispatch to sysfs

From: Dongliang Cui
Date: Wed Apr 10 2024 - 06:19:33 EST


The default configuration in the current code is that when the device
is not busy, a single dispatch will attempt to pull 'nr_requests'
requests out of the schedule queue.

I tried to track the dispatch process:

COMM TYPE SEC_START IOPRIO INDEX
fio-17304 R 196798040 0x2005 0
fio-17306 R 197060504 0x2005 1
fio-17307 R 197346904 0x2005 2
fio-17308 R 197609400 0x2005 3
fio-17309 R 197873048 0x2005 4
fio-17310 R 198134936 0x2005 5
..
fio-17237 R 197122936 0x0 57
fio-17238 R 197384984 0x0 58
<...>-17239 R 197647128 0x0 59
fio-17240 R 197909208 0x0 60
fio-17241 R 198171320 0x0 61
fio-17242 R 198433432 0x0 62
fio-17300 R 195744088 0x2005 0
fio-17301 R 196008504 0x2005 0

The above data is calculated based on the block event trace, with each
column containing: process name, request type, sector start address,
IO priority.

The INDEX represents the order in which the requests are extracted from
the scheduler queue during a single dispatch process.

Some low-speed devices cannot process these requests at once, and they will
be requeued to hctx->dispatch and wait for the next issuance.

There will be a problem here, when the IO priority is enabled, if you try
to dispatch "nr_request" requests at once, the IO priority will be ignored
from the scheduler queue and all requests will be extracted.

In this scenario, if a high priority request is inserted into the scheduler
queue, it needs to wait for the low priority request in the hctx->dispatch
to be processed first.

--------------------dispatch 1st----------------------
fio-17241 R 198171320 0x0 61
fio-17242 R 198433432 0x0 62
--------------------dispatch 2nd----------------------
fio-17300 R 195744088 0x2005 0

In certain scenarios, we hope that requests can be processed in order of io
priority as much as possible.

Maybe max_dispatch should not be a fixed value, but can be adjusted
according to device conditions.

So we give a interface to control the maximum value of single dispatch
so that users can configure it according to devices characteristics.

Signed-off-by: Dongliang Cui <dongliang.cui@xxxxxxxxxx>
---
block/blk-core.c | 1 +
block/blk-mq-sched.c | 4 +++-
block/blk-mq.c | 3 +++
block/blk-sysfs.c | 32 ++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 2 ++
5 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index de771093b526..f5a917085eae 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -442,6 +442,7 @@ struct request_queue *blk_alloc_queue(int node_id)

blk_set_default_limits(&q->limits);
q->nr_requests = BLKDEV_DEFAULT_RQ;
+ q->max_dispatch = BLKDEV_DEFAULT_RQ;

return q;

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 451a2c1f1f32..019958c0a4c3 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -97,7 +97,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
if (hctx->dispatch_busy)
max_dispatch = 1;
else
- max_dispatch = hctx->queue->nr_requests;
+ max_dispatch = hctx->queue->max_dispatch;

do {
struct request *rq;
@@ -454,6 +454,8 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
BLKDEV_DEFAULT_RQ);

+ q->max_dispatch = q->nr_requests;
+
if (blk_mq_is_shared_tags(flags)) {
ret = blk_mq_init_sched_shared_tags(q);
if (ret)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2dc01551e27c..9c286001f429 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4285,6 +4285,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
spin_lock_init(&q->requeue_lock);

q->nr_requests = set->queue_depth;
+ q->max_dispatch = set->queue_depth;

blk_mq_init_cpu_queues(q, set->nr_hw_queues);
blk_mq_add_queue_tag_set(set, q);
@@ -4634,6 +4635,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
}
if (!ret) {
q->nr_requests = nr;
+ if (q->max_dispatch > nr)
+ q->max_dispatch = nr;
if (blk_mq_is_shared_tags(set->flags)) {
if (q->elevator)
blk_mq_tag_update_sched_shared_tags(q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6b2429cad81a..909b5f158bd3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -100,6 +100,36 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
return ret;
}

+static ssize_t queue_max_dispatch_show(struct request_queue *q, char *page)
+{
+ unsigned long max_dispatch;
+
+ if (!q->disk)
+ return -EINVAL;
+ max_dispatch = q->max_dispatch;
+ return queue_var_show(max_dispatch, page);
+}
+
+static ssize_t
+queue_max_dispatch_store(struct request_queue *q, const char *page, size_t count)
+{
+ unsigned long max_dispatch;
+ ssize_t ret;
+
+ if (!q->disk)
+ return -EINVAL;
+
+ ret = queue_var_store(&max_dispatch, page, count);
+ if (ret < 0)
+ return ret;
+
+ if (max_dispatch > q->nr_requests)
+ max_dispatch = q->nr_requests;
+
+ q->max_dispatch = max_dispatch;
+ return ret;
+}
+
static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
{
int max_sectors_kb = queue_max_sectors(q) >> 1;
@@ -484,6 +514,7 @@ static struct queue_sysfs_entry _prefix##_entry = { \
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
+QUEUE_RW_ENTRY(queue_max_dispatch, "max_dispatch");
QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
@@ -614,6 +645,7 @@ QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
static struct attribute *queue_attrs[] = {
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
+ &queue_max_dispatch_entry.attr,
&queue_max_sectors_entry.attr,
&queue_max_segments_entry.attr,
&queue_max_discard_segments_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99e4f5e72213..a96791b83977 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -434,6 +434,8 @@ struct request_queue {
*/
unsigned long nr_requests; /* Max # of requests */

+ unsigned long max_dispatch; /* Max # of single dispatch */
+
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct blk_crypto_profile *crypto_profile;
struct kobject *crypto_kobject;
--
2.25.1