Re: [PATCH v6 4/8] blk-mq: add a new queue sysfs attribute async_depth

From: Yu Kuai

Date: Fri Nov 21 2025 - 07:17:51 EST


Hi,

在 2025/11/21 15:14, Hannes Reinecke 写道:
> On 11/21/25 06:28, Yu Kuai wrote:
>> Add a new field async_depth to request_queue and related APIs, this is
>> currently not used, following patches will convert elevators to use
>> this instead of internal async_depth.
>>
>> Signed-off-by: Yu Kuai <yukuai@xxxxxxxxx>
>> Reviewed-by: Nilay Shroff <nilay@xxxxxxxxxxxxx>
>> ---
>>   block/blk-core.c       |  1 +
>>   block/blk-mq.c         |  6 ++++++
>>   block/blk-sysfs.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
>>   block/elevator.c       |  1 +
>>   include/linux/blkdev.h |  1 +
>>   5 files changed, 51 insertions(+)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 14ae73eebe0d..cc5c9ced8e6f 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -463,6 +463,7 @@ struct request_queue *blk_alloc_queue(struct
>> queue_limits *lim, int node_id)
>>       fs_reclaim_release(GFP_KERNEL);
>>         q->nr_requests = BLKDEV_DEFAULT_RQ;
>> +    q->async_depth = BLKDEV_DEFAULT_RQ;
>>         return q;
>>   diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 6c505ebfab65..ae6ce68f4786 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -4628,6 +4628,7 @@ int blk_mq_init_allocated_queue(struct
>> blk_mq_tag_set *set,
>>       spin_lock_init(&q->requeue_lock);
>>         q->nr_requests = set->queue_depth;
>> +    q->async_depth = set->queue_depth;
>>         blk_mq_init_cpu_queues(q, set->nr_hw_queues);
>>       blk_mq_map_swqueue(q);
>> @@ -4994,6 +4995,11 @@ struct elevator_tags
>> *blk_mq_update_nr_requests(struct request_queue *q,
>>           q->elevator->et = et;
>>       }
>>   +    /*
>> +     * Preserve relative value, both nr and async_depth are at most
>> 16 bit
>> +     * value, no need to worry about overflow.
>> +     */
>> +    q->async_depth = max(q->async_depth * nr / q->nr_requests, 1);
>>       q->nr_requests = nr;
>>       if (q->elevator && q->elevator->type->ops.depth_updated)
>>           q->elevator->type->ops.depth_updated(q);
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 8684c57498cc..5c2d29ac6570 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -127,6 +127,46 @@ queue_requests_store(struct gendisk *disk, const
>> char *page, size_t count)
>>       return ret;
>>   }
>>   +static ssize_t queue_async_depth_show(struct gendisk *disk, char
>> *page)
>> +{
>> +    guard(mutex)(&disk->queue->elevator_lock);
>> +
>> +    return queue_var_show(disk->queue->async_depth, page);
>> +}
>> +
>> +static ssize_t
>> +queue_async_depth_store(struct gendisk *disk, const char *page,
>> size_t count)
>> +{
>> +    struct request_queue *q = disk->queue;
>> +    unsigned int memflags;
>> +    unsigned long nr;
>> +    int ret;
>> +
>> +    if (!queue_is_mq(q))
>> +        return -EINVAL;
>> +
>> +    ret = queue_var_store(&nr, page, count);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    if (nr == 0)
>> +        return -EINVAL;
>> +
>> +    memflags = blk_mq_freeze_queue(q);
>> +    scoped_guard(mutex, &q->elevator_lock) {
>> +        if (q->elevator) {
>> +            q->async_depth = min(q->nr_requests, nr);
>> +            if (q->elevator->type->ops.depth_updated)
>> + q->elevator->type->ops.depth_updated(q);
>> +        } else {
>> +            ret = -EINVAL;
>> +        }
>> +    }
>> +    blk_mq_unfreeze_queue(q, memflags);
>> +
>> +    return ret;
>> +}
>> +
>>   static ssize_t queue_ra_show(struct gendisk *disk, char *page)
>>   {
>>       ssize_t ret;
>> @@ -532,6 +572,7 @@ static struct queue_sysfs_entry _prefix##_entry =
>> {    \
>>   }
>>     QUEUE_RW_ENTRY(queue_requests, "nr_requests");
>> +QUEUE_RW_ENTRY(queue_async_depth, "async_depth");
>>   QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
>>   QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
>>   QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
>> @@ -754,6 +795,7 @@ static struct attribute *blk_mq_queue_attrs[] = {
>>        */
>>       &elv_iosched_entry.attr,
>>       &queue_requests_entry.attr,
>> +    &queue_async_depth_entry.attr,
>>   #ifdef CONFIG_BLK_WBT
>>       &queue_wb_lat_entry.attr,
>>   #endif
>> diff --git a/block/elevator.c b/block/elevator.c
>> index 5b37ef44f52d..5ff21075a84a 100644
>> --- a/block/elevator.c
>> +++ b/block/elevator.c
>> @@ -589,6 +589,7 @@ static int elevator_switch(struct request_queue
>> *q, struct elv_change_ctx *ctx)
>>           blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
>>           q->elevator = NULL;
>>           q->nr_requests = q->tag_set->queue_depth;
>> +        q->async_depth = q->tag_set->queue_depth;
>>       }
>>       blk_add_trace_msg(q, "elv switch: %s", ctx->name);
>>   diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index cdc68c41fa96..edddf17f8304 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -552,6 +552,7 @@ struct request_queue {
>>        * queue settings
>>        */
>>       unsigned int        nr_requests;    /* Max # of requests */
>> +    unsigned int        async_depth;    /* Max # of async requests */
>>     #ifdef CONFIG_BLK_INLINE_ENCRYPTION
>>       struct blk_crypto_profile *crypto_profile;
>
> Hmm. Makes me wonder: async_depth is only used within the elevators, so
> maybe we should restrict visibility of that attribute when an elevator
> is selected?
> It feels kinda awkward having an attribute which does nothing if no
> elevator is loaded ...

To be honest, I can't think of how to implement this suitably, we'll have
to remove/create this entry while switching elevator, this doesn't seem
worth it to invent a new wheel.

>
> Cheers,
>
> Hannes

--
Thanks,
Kuai