Re: [PATCH V3 5/7] ublk_drv: consider recovery feature in aborting mechanism

From: Ziyang Zhang
Date: Mon Sep 19 2022 - 23:24:24 EST


On 2022/9/20 11:04, Ming Lei wrote:
> On Tue, Sep 20, 2022 at 09:49:33AM +0800, Ziyang Zhang wrote:
>
> Follows the delta patch against patch 5 for showing the idea:
>
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 4409a130d0b6..60c5786c4711 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -656,7 +656,8 @@ static void ublk_complete_rq(struct request *req)
> * Also aborting may not be started yet, keep in mind that one failed
> * request may be issued by block layer again.
> */
> -static void __ublk_fail_req(struct ublk_io *io, struct request *req)
> +static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
> + struct request *req)
> {
> WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
>
> @@ -667,7 +668,10 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
> req->tag,
> io->flags);
> io->flags |= UBLK_IO_FLAG_ABORTED;
> - blk_mq_end_request(req, BLK_STS_IOERR);
> + if (ublk_queue_can_use_recovery_reissue(ubq))
> + blk_mq_requeue_request(req, false);

Here is one problem:
We reset io->flags to 0 in ublk_queue_reinit() and it is called before new
ubq_daemon with FETCH_REQ is accepted. ublk_abort_queue() is not protected with
ub_mutex and it is called many times in monitor_work. So same rq may be requeued
multiple times.

With recovery disabled, there is no such problem since io->flags does not change
until ublk_dev is released.

In my patch 5 I only requeue the same rq once. So re-using ublk_abort_queue() is
hard for recovery feature.

> + else
> + blk_mq_end_request(req, BLK_STS_IOERR);
> }
> }
>
> @@ -1018,7 +1022,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
> */
> rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
> if (rq)
> - __ublk_fail_req(io, rq);
> + __ublk_fail_req(ubq, io, rq);
> }
> }
> ublk_put_device(ub);
> @@ -1026,12 +1030,10 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
>
> static bool ublk_check_inflight_rq(struct request *rq, void *data)
> {
> - struct ublk_queue *ubq = rq->mq_hctx->driver_data;
> - struct ublk_io *io = &ubq->ios[rq->tag];
> - bool *busy = data;
> + bool *idle = data;
>
> - if (io->flags & UBLK_IO_FLAG_ACTIVE) {
> - *busy = true;
> + if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) {
> + *idle = false;
> return false;
> }
> return true;
> @@ -1039,16 +1041,15 @@ static bool ublk_check_inflight_rq(struct request *rq, void *data)
>
> static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
> {
> - bool busy = false;
> + bool idle = true;
>
> WARN_ON_ONCE(!blk_queue_quiesced(ub->ub_disk->queue));
> while (true) {
> blk_mq_tagset_busy_iter(&ub->tag_set,
> - ublk_check_inflight_rq, &busy);
> - if (busy)
> - msleep(UBLK_REQUEUE_DELAY_MS);
> - else
> + ublk_check_inflight_rq, &idle);
> + if (idle)
> break;
> + msleep(UBLK_REQUEUE_DELAY_MS);
> }
> }
>
> @@ -1069,10 +1070,7 @@ static void ublk_quiesce_queue(struct ublk_device *ub,
> ublk_queue_can_use_recovery_reissue(ubq) ?
> "requeue" : "abort",
> ubq->q_id, i, io->flags);
> - if (ublk_queue_can_use_recovery_reissue(ubq))
> - blk_mq_requeue_request(rq, false);
> - else
> - __ublk_fail_req(io, rq);
> + __ublk_fail_req(ubq, io, rq);
> } else {
> pr_devel("%s: done old cmd: qid %d tag %d\n",
> __func__, ubq->q_id, i);
> @@ -1092,12 +1090,6 @@ static void ublk_quiesce_dev(struct ublk_device *ub)
> if (ub->dev_info.state != UBLK_S_DEV_LIVE)
> goto unlock;
>
> - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> - struct ublk_queue *ubq = ublk_get_queue(ub, i);
> -
> - if (!ubq_daemon_is_dying(ubq))
> - goto unlock;
> - }
> blk_mq_quiesce_queue(ub->ub_disk->queue);
> ublk_wait_tagset_rqs_idle(ub);
> pr_devel("%s: quiesce ub: dev_id %d\n",
> @@ -1129,14 +1121,13 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
> struct ublk_queue *ubq = ublk_get_queue(ub, i);
>
> if (ubq_daemon_is_dying(ubq)) {
> - if (ublk_queue_can_use_recovery(ubq)) {
> + if (ublk_queue_can_use_recovery(ubq))
> schedule_work(&ub->quiesce_work);
> - } else {
> + else
> schedule_work(&ub->stop_work);
>
> - /* abort queue is for making forward progress */
> - ublk_abort_queue(ub, ubq);
> - }
> + /* abort queue is for making forward progress */
> + ublk_abort_queue(ub, ubq);
> }
> }
>
>
>
>
>
> Thanks,
> Ming