[PATCH v8 05/11] btrfs: lookup physical address from stripe extent

From: Johannes Thumshirn
Date: Mon Sep 11 2023 - 17:26:10 EST


Lookup the physical address from the raid stripe tree when a read on an
RAID volume formatted with the raid stripe tree was attempted.

If the requested logical address was not found in the stripe tree, it may
still be in the in-memory ordered stripe tree, so fallback to searching
the ordered stripe tree in this case.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx>
---
fs/btrfs/raid-stripe-tree.c | 159 ++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/raid-stripe-tree.h | 11 +++
fs/btrfs/volumes.c | 37 ++++++++---
3 files changed, 198 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 5b12f40877b5..7ed02e4b79ec 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -324,3 +324,162 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,

return ret;
}
+
+static bool btrfs_check_for_extent(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length, struct btrfs_path *path)
+{
+ struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical);
+ struct btrfs_key key;
+ int ret;
+
+ btrfs_release_path(path);
+
+ key.objectid = logical;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = length;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+
+ return ret;
+}
+
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 *length, u64 map_type,
+ u32 stripe_index,
+ struct btrfs_io_stripe *stripe)
+{
+ struct btrfs_root *stripe_root = btrfs_stripe_tree_root(fs_info);
+ struct btrfs_stripe_extent *stripe_extent;
+ struct btrfs_key stripe_key;
+ struct btrfs_key found_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ int num_stripes;
+ u8 encoding;
+ u64 offset;
+ u64 found_logical;
+ u64 found_length;
+ u64 end;
+ u64 found_end;
+ int slot;
+ int ret;
+ int i;
+
+ stripe_key.objectid = logical;
+ stripe_key.type = BTRFS_RAID_STRIPE_KEY;
+ stripe_key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
+ if (ret < 0)
+ goto free_path;
+ if (ret) {
+ if (path->slots[0] != 0)
+ path->slots[0]--;
+ }
+
+ end = logical + *length;
+
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ found_logical = found_key.objectid;
+ found_length = found_key.offset;
+ found_end = found_logical + found_length;
+
+ if (found_logical > end) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (in_range(logical, found_logical, found_length))
+ break;
+
+ ret = btrfs_next_item(stripe_root, path);
+ if (ret)
+ goto out;
+ }
+
+ offset = logical - found_logical;
+
+ /*
+ * If we have a logically contiguous, but physically noncontinuous
+ * range, we need to split the bio. Record the length after which we
+ * must split the bio.
+ */
+ if (end > found_end)
+ *length -= end - found_end;
+
+ num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
+ stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
+ encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
+
+ if (encoding != btrfs_bg_type_to_raid_encoding(map_type)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+ struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
+ u64 devid = btrfs_raid_stride_devid(leaf, stride);
+ u64 len = btrfs_raid_stride_length(leaf, stride);
+ u64 physical = btrfs_raid_stride_physical(leaf, stride);
+
+ if (offset >= len) {
+ offset -= len;
+
+ if (offset >= BTRFS_STRIPE_LEN)
+ continue;
+ }
+
+ if (devid != stripe->dev->devid)
+ continue;
+
+ if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
+ continue;
+
+ stripe->physical = physical + offset;
+
+ ret = 0;
+ goto free_path;
+ }
+
+ /*
+ * If we're here, we haven't found the requested devid in the stripe.
+ */
+ ret = -ENOENT;
+out:
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret && ret != -EIO) {
+ /*
+ * Check if the range we're looking for is actually backed by
+ * an extent. This can happen, e.g. when scrub is running on a
+ * block-group and the extent it is trying to scrub get's
+ * deleted in the meantime. Although scrub is setting the
+ * block-group to read-only, deletion of extents are still
+ * allowed. If the extent is gone, simply return ENOENT and be
+ * good.
+ */
+ if (btrfs_check_for_extent(fs_info, logical, *length, path)) {
+ ret = -ENOENT;
+ goto free_path;
+ }
+
+ if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+ btrfs_print_tree(leaf, 1);
+ btrfs_err(fs_info,
+ "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
+ logical, logical + *length, stripe->dev->devid,
+ btrfs_bg_type_to_raid_name(map_type));
+ }
+free_path:
+ btrfs_free_path(path);
+
+ return ret;
+}
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
index 7560dc501a65..40aa553ae8aa 100644
--- a/fs/btrfs/raid-stripe-tree.h
+++ b/fs/btrfs/raid-stripe-tree.h
@@ -13,6 +13,10 @@ struct btrfs_io_stripe;

int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start,
u64 length);
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 *length, u64 map_type,
+ u32 stripe_index,
+ struct btrfs_io_stripe *stripe);
int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_extent *ordered_extent);

@@ -33,4 +37,11 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,

return false;
}
+
+static inline int btrfs_num_raid_stripes(u32 item_size)
+{
+ return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
+ sizeof(struct btrfs_raid_stride);
+}
+
#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0c0fd4eb4848..7c25f5c77788 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
#include "relocation.h"
#include "scrub.h"
#include "super.h"
+#include "raid-stripe-tree.h"

#define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID10 | \
@@ -6206,12 +6207,22 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
return U64_MAX;
}

-static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
- u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
+static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length, struct btrfs_io_stripe *dst,
+ struct map_lookup *map, u32 stripe_index,
+ u64 stripe_offset, u64 stripe_nr)
{
dst->dev = map->stripes[stripe_index].dev;
+
+ if (op == BTRFS_MAP_READ &&
+ btrfs_need_stripe_tree_update(fs_info, map->type))
+ return btrfs_get_raid_extent_offset(fs_info, logical, length,
+ map->type, stripe_index,
+ dst);
+
dst->physical = map->stripes[stripe_index].physical +
stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
+ return 0;
}

/*
@@ -6428,11 +6439,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
*/
if (smap && num_alloc_stripes == 1 &&
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
- set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
+ ret = set_io_stripe(fs_info, op, logical, length, smap, map,
+ stripe_index, stripe_offset, stripe_nr);
if (mirror_num_ret)
*mirror_num_ret = mirror_num;
*bioc_ret = NULL;
- ret = 0;
goto out;
}

@@ -6463,21 +6474,29 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
bioc->full_stripe_logical = em->start +
btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
for (i = 0; i < num_stripes; i++)
- set_io_stripe(&bioc->stripes[i], map,
- (i + stripe_nr) % num_stripes,
- stripe_offset, stripe_nr);
+ ret = set_io_stripe(fs_info, op, logical, length,
+ &bioc->stripes[i], map,
+ (i + stripe_nr) % num_stripes,
+ stripe_offset, stripe_nr);
} else {
/*
* For all other non-RAID56 profiles, just copy the target
* stripe into the bioc.
*/
for (i = 0; i < num_stripes; i++) {
- set_io_stripe(&bioc->stripes[i], map, stripe_index,
- stripe_offset, stripe_nr);
+ ret = set_io_stripe(fs_info, op, logical, length,
+ &bioc->stripes[i], map, stripe_index,
+ stripe_offset, stripe_nr);
stripe_index++;
}
}

+ if (ret) {
+ *bioc_ret = NULL;
+ btrfs_put_bioc(bioc);
+ goto out;
+ }
+
if (op != BTRFS_MAP_READ)
max_errors = btrfs_chunk_max_errors(map);


--
2.41.0