Re: [PATCH v8 06/11] btrfs: implement RST version of scrub

From: Qu Wenruo
Date: Wed Sep 13 2023 - 05:52:57 EST




On 2023/9/11 22:22, Johannes Thumshirn wrote:
A filesystem that uses the RAID stripe tree for logical to physical
address translation can't use the regular scrub path, that reads all
stripes and then checks if a sector is unused afterwards.

When using the RAID stripe tree, this will result in lookup errors, as the
stripe tree doesn't know the requested logical addresses.

Instead, look up stripes that are backed by the extent bitmap.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx>
---
fs/btrfs/scrub.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f16220ce5fba..5101e0a3f83e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -23,6 +23,7 @@
#include "accessors.h"
#include "file-item.h"
#include "scrub.h"
+#include "raid-stripe-tree.h"

/*
* This is only the first step towards a full-features scrub. It reads all
@@ -1634,6 +1635,56 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe)
}
}

+static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
+ struct scrub_stripe *stripe)
+{
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ struct btrfs_bio *bbio = NULL;
+ int mirror = stripe->mirror_num;
+ int i;
+
+ atomic_inc(&stripe->pending_io);
+
+ for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
+ struct page *page;
+ int pgoff;
+
+ page = scrub_stripe_get_page(stripe, i);
+ pgoff = scrub_stripe_get_page_offset(stripe, i);
+
+ /* The current sector cannot be merged, submit the bio. */
+ if (bbio &&
+ ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) ||
+ bbio->bio.bi_iter.bi_size >= BTRFS_STRIPE_LEN)) {
+ ASSERT(bbio->bio.bi_iter.bi_size);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_bio(bbio, mirror);
+ bbio = NULL;
+ }
+
+ if (!bbio) {
+ bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
+ fs_info, scrub_read_endio, stripe);
+ bbio->bio.bi_iter.bi_sector = (stripe->logical +
+ (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
+ }
+
+ __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
+ }
+
+ if (bbio) {
+ ASSERT(bbio->bio.bi_iter.bi_size);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_bio(bbio, mirror);

Since RST is looked up during btrfs_submit_bio() (to be more accurate,
set_io_stripe()), and I just checked there is no special requirement to
make btrfs to lookup using commit root.

This means we can have a problem that extent items and RST are out-of-sync.

For scrub, all the extent items are searched using commit root, but
btrfs_get_raid_extent_offset() is only using current root.
Thus you would got some problems during fsstress and scrub.


We need some way to distinguish scrub bbio from regular ones (which is a
completely new requirement).
For now only scrub doesn't initialize bbio->inode, thus it can be used
to do the distinguish (at least for now).

Thanks,
Qu
+ }
+
+ if (atomic_dec_and_test(&stripe->pending_io)) {
+ wake_up(&stripe->io_wait);
+ INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker);
+ queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work);
+ }
+}
+
static void scrub_submit_initial_read(struct scrub_ctx *sctx,
struct scrub_stripe *stripe)
{
@@ -1645,6 +1696,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
ASSERT(stripe->mirror_num > 0);
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));

+ if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) {
+ scrub_submit_extent_sector_read(sctx, stripe);
+ return;
+ }
+
bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
scrub_read_endio, stripe);