Re: [PATCH] md/raid1: honor REQ_NOWAIT when waiting for behind writes

From: Yu Kuai

Date: Sat Jun 20 2026 - 16:18:33 EST


Hi,

在 2026/6/11 16:35, Abd-Alrhman Masalkhi 写道:
> raid1 supports REQ_NOWAIT reads by avoiding waits in the barrier path
> through wait_read_barrier(). However, a read can still block on a
> WriteMostly device when the array uses a bitmap and there are
> outstanding behind writes.
>
> In that case raid1 unconditionally calls wait_behind_writes(), which
> may sleep until all behind writes complete. As a result, a REQ_NOWAIT
> read can block despite the caller explicitly requesting non-blocking
> behavior.
>
> This ensures that raid1 consistently honors REQ_NOWAIT reads across all
> paths that may otherwise wait for behind writes.
>
> Fixes: 5aa705039c4f ("md: raid1 add nowait support")
> Signed-off-by: Abd-Alrhman Masalkhi <abd.masalkhi@xxxxxxxxx>
> ---
> drivers/md/md-bitmap.c | 9 +++++++--
> drivers/md/md-bitmap.h | 2 +-
> drivers/md/md-llbitmap.c | 13 ++++++++-----
> drivers/md/md.c | 2 +-
> drivers/md/raid1.c | 10 +++++++---
> 5 files changed, 24 insertions(+), 12 deletions(-)

Applied to md-7.2.

One note, remove nowait support is already in my to-do lists. There is a case
that can't be handled for now, for example:

2 disk raid1, issue no wait write IO, rdev1 succeed while rdev2 failed. In this
case, we don't know if rdev2 failed issue because nowait or disk really have
badblocks. So we can't either record badblocks or issue again without nowait,
for consequence, rdev1 and rdev2 will end up with different data.

I think the best solution is to remove nowait support for mdraid. Feel free
to cook a patch or if you have something else in mind.

>
> diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
> index 028b9ca8ce52..1206e31f323a 100644
> --- a/drivers/md/md-bitmap.c
> +++ b/drivers/md/md-bitmap.c
> @@ -2063,18 +2063,23 @@ static void bitmap_end_behind_write(struct mddev *mddev)
> bitmap->mddev->bitmap_info.max_write_behind);
> }
>
> -static void bitmap_wait_behind_writes(struct mddev *mddev)
> +static bool bitmap_wait_behind_writes(struct mddev *mddev, bool nowait)
> {
> struct bitmap *bitmap = mddev->bitmap;
>
> /* wait for behind writes to complete */
> if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
> + if (nowait)
> + return false;
> +
> pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
> mdname(mddev));
> /* need to kick something here to make sure I/O goes? */
> wait_event(bitmap->behind_wait,
> atomic_read(&bitmap->behind_writes) == 0);
> }
> +
> + return true;
> }
>
> static void bitmap_destroy(struct mddev *mddev)
> @@ -2084,7 +2089,7 @@ static void bitmap_destroy(struct mddev *mddev)
> if (!bitmap) /* there was no bitmap */
> return;
>
> - bitmap_wait_behind_writes(mddev);
> + bitmap_wait_behind_writes(mddev, false);
> if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
> mddev_destroy_serial_pool(mddev, NULL);
>
> diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
> index 214f623c7e79..f46674bdfeb9 100644
> --- a/drivers/md/md-bitmap.h
> +++ b/drivers/md/md-bitmap.h
> @@ -98,7 +98,7 @@ struct bitmap_operations {
>
> void (*start_behind_write)(struct mddev *mddev);
> void (*end_behind_write)(struct mddev *mddev);
> - void (*wait_behind_writes)(struct mddev *mddev);
> + bool (*wait_behind_writes)(struct mddev *mddev, bool nowait);
>
> md_bitmap_fn *start_write;
> md_bitmap_fn *end_write;
> diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
> index 1adc5b117821..5a4e2abaa757 100644
> --- a/drivers/md/md-llbitmap.c
> +++ b/drivers/md/md-llbitmap.c
> @@ -1574,16 +1574,19 @@ static void llbitmap_end_behind_write(struct mddev *mddev)
> wake_up(&llbitmap->behind_wait);
> }
>
> -static void llbitmap_wait_behind_writes(struct mddev *mddev)
> +static bool llbitmap_wait_behind_writes(struct mddev *mddev, bool nowait)
> {
> struct llbitmap *llbitmap = mddev->bitmap;
>
> - if (!llbitmap)
> - return;
> + if (llbitmap && atomic_read(&llbitmap->behind_writes) > 0) {
> + if (nowait)
> + return false;
>
> - wait_event(llbitmap->behind_wait,
> - atomic_read(&llbitmap->behind_writes) == 0);
> + wait_event(llbitmap->behind_wait,
> + atomic_read(&llbitmap->behind_writes) == 0);
> + }
>
> + return true;
> }
>
> static ssize_t bits_show(struct mddev *mddev, char *page)
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 096bb64e87bd..d1465bcd86c8 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -7050,7 +7050,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
> static void mddev_detach(struct mddev *mddev)
> {
> if (md_bitmap_enabled(mddev, false))
> - mddev->bitmap_ops->wait_behind_writes(mddev);
> + mddev->bitmap_ops->wait_behind_writes(mddev, false);
> if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
> mddev->pers->quiesce(mddev, 1);
> mddev->pers->quiesce(mddev, 0);
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index b1ed4cc6ade4..b2d7c13b64bd 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1341,6 +1341,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
> int max_sectors;
> int rdisk;
> bool r1bio_existed = !!r1_bio;
> + bool nowait = bio->bi_opf & REQ_NOWAIT;
>
> /*
> * An md cloned bio indicates we are in the error path.
> @@ -1360,8 +1361,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
> * Still need barrier for READ in case that whole
> * array is frozen.
> */
> - if (!wait_read_barrier(conf, bio->bi_iter.bi_sector,
> - bio->bi_opf & REQ_NOWAIT)) {
> + if (!wait_read_barrier(conf, bio->bi_iter.bi_sector, nowait)) {
> bio_wouldblock_error(bio);
> return;
> }
> @@ -1402,7 +1402,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
> * over-take any writes that are 'behind'
> */
> mddev_add_trace_msg(mddev, "raid1 wait behind writes");
> - mddev->bitmap_ops->wait_behind_writes(mddev);
> + if (!mddev->bitmap_ops->wait_behind_writes(mddev, nowait)) {
> + bio_wouldblock_error(bio);
> + set_bit(R1BIO_Returned, &r1_bio->state);
> + goto err_handle;
> + }
> }
>
> if (max_sectors < bio_sectors(bio)) {

--
Thanks,
Kuai