Re: [RFC PATCH 5/5] md/raid1: introduce rectify action to repair badblocks

From: Zheng Qixing

Date: Sun Jan 18 2026 - 22:06:52 EST

在 2026/1/14 11:11, Li Nan 写道:

在 2025/12/31 15:09, Zheng Qixing 写道:

From: Zheng Qixing <zhengqixing@xxxxxxxxxx>

Add support for repairing known badblocks in RAID1. When disks
have known badblocks (shown in sysfs bad_blocks), data can be
read from other healthy disks in the array and written to repair
the badblock areas and clear it in bad_blocks.

echo rectify > sync_action can trigger this action.

Signed-off-by: Zheng Qixing <zhengqixing@xxxxxxxxxx>

+static void end_rectify_read(struct bio *bio)
+{
+    struct r1bio *r1_bio = get_resync_r1bio(bio);
+    struct r1conf *conf = r1_bio->mddev->private;
+    struct md_rdev *rdev;
+    struct bio *next_bio;
+    bool all_fail = true;
+    int i;
+
+    update_head_pos(r1_bio->read_disk, r1_bio);
+
+    if (!bio->bi_status) {
+        set_bit(R1BIO_Uptodate, &r1_bio->state);
+        goto out;
+    }
+
+    for (i = r1_bio->read_disk + 1; i < conf->raid_disks; i++) {
+        rdev = conf->mirrors[i].rdev;
+        if (!rdev || test_bit(Faulty, &rdev->flags))
+            continue;
+
+        next_bio = r1_bio->bios[i];
+        if (next_bio->bi_end_io == end_rectify_read) {
+            next_bio->bi_opf &= ~MD_FAILFAST;

Why set MD_FAILFAST and clear it soon?
And submit_rectify_read() will clear it again.

Indeed.

+static void rectify_request_write(struct mddev *mddev, struct r1bio *r1_bio)
+{
+    struct r1conf *conf = mddev->private;
+    struct bio *wbio = NULL;
+    int wcnt = 0;
+    int i;
+
+    if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
+        submit_rectify_read(r1_bio);
+        return;
+    }
+
+    atomic_set(&r1_bio->remaining, 0);
+    for (i = 0; i < conf->raid_disks; i++) {
+        wbio = r1_bio->bios[i];
+        if (wbio->bi_end_io == end_rectify_write) {
+            atomic_inc(&r1_bio->remaining);
+            wcnt++;
+            submit_bio_noacct(wbio);
+        }
+    }
+
+    if (unlikely(!wcnt)) {
+        md_done_sync(r1_bio->mddev, r1_bio->sectors);
+        put_buf(r1_bio);
+    }

How can 'wcnt' be 0?

Oh, I forgot to check the faulty state of rdev.😳

+}
+
+static void handle_rectify_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
+{
+    struct md_rdev *rdev;
+    struct bio *bio;
+    int i;
+
+    for (i = 0; i < conf->raid_disks; i++) {
+        rdev = conf->mirrors[i].rdev;
+        bio = r1_bio->bios[i];
+        if (bio->bi_end_io == NULL)
+            continue;
+
+        if (!bio->bi_status && bio->bi_end_io == end_rectify_write &&
+            test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+            rdev_clear_badblocks(rdev, r1_bio->sector,
+                         r1_bio->sectors, 0);
+        }

Reuse handle_sync_write_finished() seems better.

Good point.

Thanks,

Qixing