Re: [PATCH v2] kyber: introduce kyber_depth_updated()

From: Omar Sandoval
Date: Mon Feb 22 2021 - 14:37:12 EST


On Fri, Feb 05, 2021 at 01:13:10AM -0800, Yang Yang wrote:
> Hang occurs when user changes the scheduler queue depth, by writing to
> the 'nr_requests' sysfs file of that device.
>
> The details of the environment that we found the problem are as follows:
> an eMMC block device
> total driver tags: 16
> default queue_depth: 32
> kqd->async_depth initialized in kyber_init_sched() with queue_depth=32
>
> Then we change queue_depth to 256, by writing to the 'nr_requests' sysfs
> file. But kqd->async_depth don't be updated after queue_depth changes.
> Now the value of async depth is too small for queue_depth=256, this may
> cause hang.
>
> This patch introduces kyber_depth_updated(), so that kyber can update
> async depth when queue depth changes.
>
> Signed-off-by: Yang Yang <yang.yang@xxxxxxxx>

I wasn't able to reproduce the hang, but this looks correct, and it
passed my tests.

Reviewed-by: Omar Sandoval <osandov@xxxxxx>

> ---
> v2:
> - Change the commit message
> - Change from sbitmap::depth to 2^sbitmap::shift
> ---
> block/kyber-iosched.c | 29 +++++++++++++----------------
> 1 file changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
> index dc89199bc8c6..17215b6bf482 100644
> --- a/block/kyber-iosched.c
> +++ b/block/kyber-iosched.c
> @@ -353,19 +353,9 @@ static void kyber_timer_fn(struct timer_list *t)
> }
> }
>
> -static unsigned int kyber_sched_tags_shift(struct request_queue *q)
> -{
> - /*
> - * All of the hardware queues have the same depth, so we can just grab
> - * the shift of the first one.
> - */
> - return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
> -}
> -
> static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
> {
> struct kyber_queue_data *kqd;
> - unsigned int shift;
> int ret = -ENOMEM;
> int i;
>
> @@ -400,9 +390,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
> kqd->latency_targets[i] = kyber_latency_targets[i];
> }
>
> - shift = kyber_sched_tags_shift(q);
> - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
> -
> return kqd;
>
> err_buckets:
> @@ -458,9 +445,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
> INIT_LIST_HEAD(&kcq->rq_list[i]);
> }
>
> -static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
> +static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
> {
> struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
> + struct blk_mq_tags *tags = hctx->sched_tags;
> + unsigned int shift = tags->bitmap_tags->sb.shift;
> +
> + kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
> +
> + sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth);
> +}
> +
> +static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
> +{
> struct kyber_hctx_data *khd;
> int i;
>
> @@ -502,8 +499,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
> khd->batching = 0;
>
> hctx->sched_data = khd;
> - sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
> - kqd->async_depth);
> + kyber_depth_updated(hctx);
>
> return 0;
>
> @@ -1022,6 +1018,7 @@ static struct elevator_type kyber_sched = {
> .completed_request = kyber_completed_request,
> .dispatch_request = kyber_dispatch_request,
> .has_work = kyber_has_work,
> + .depth_updated = kyber_depth_updated,
> },
> #ifdef CONFIG_BLK_DEBUG_FS
> .queue_debugfs_attrs = kyber_queue_debugfs_attrs,
> --
> 2.17.1
>