Re: [PATCH v4 1/2] btrfs: zoned: reserve relocation block-group on mount

From: Naohiro Aota
Date: Fri May 24 2024 - 04:31:30 EST


On Thu, May 23, 2024 at 05:21:58PM GMT, Johannes Thumshirn wrote:
> From: Johannes Thumshirn <johannes.thumshirn@xxxxxxx>
>
> Reserve one zone as a data relocation target on each mount. If we already
> find one empty block group, there's no need to force a chunk allocation,
> but we can use this empty data block group as our relocation target.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx>
> ---
> fs/btrfs/block-group.c | 17 +++++++++++++
> fs/btrfs/disk-io.c | 2 ++
> fs/btrfs/zoned.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/zoned.h | 3 +++
> 4 files changed, 89 insertions(+)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 9910bae89966..1195f6721c90 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1500,6 +1500,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
> btrfs_put_block_group(block_group);
> continue;
> }
> +
> + spin_lock(&fs_info->relocation_bg_lock);
> + if (block_group->start == fs_info->data_reloc_bg) {
> + btrfs_put_block_group(block_group);
> + spin_unlock(&fs_info->relocation_bg_lock);
> + continue;
> + }
> + spin_unlock(&fs_info->relocation_bg_lock);
> +
> spin_unlock(&fs_info->unused_bgs_lock);
>
> btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
> @@ -1835,6 +1844,14 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> bg_list);
> list_del_init(&bg->bg_list);
>
> + spin_lock(&fs_info->relocation_bg_lock);
> + if (bg->start == fs_info->data_reloc_bg) {
> + btrfs_put_block_group(bg);
> + spin_unlock(&fs_info->relocation_bg_lock);
> + continue;
> + }
> + spin_unlock(&fs_info->relocation_bg_lock);
> +
> space_info = bg->space_info;
> spin_unlock(&fs_info->unused_bgs_lock);
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 78d3966232ae..16bb52bcb69e 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3547,6 +3547,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
> }
> btrfs_discard_resume(fs_info);
>
> + btrfs_reserve_relocation_bg(fs_info);
> +
> if (fs_info->uuid_root &&
> (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
> fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index c52a0063f7db..d291cf4f565e 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -17,6 +17,7 @@
> #include "fs.h"
> #include "accessors.h"
> #include "bio.h"
> +#include "transaction.h"
>
> /* Maximum number of zones to report per blkdev_report_zones() call */
> #define BTRFS_REPORT_NR_ZONES 4096
> @@ -2637,3 +2638,69 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
> }
> spin_unlock(&fs_info->zone_active_bgs_lock);
> }
> +
> +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags)
> +{
> + struct btrfs_block_group *bg;
> +
> + for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
> + list_for_each_entry(bg, &sinfo->block_groups[i], list) {
> + if (bg->flags != flags)
> + continue;
> + if (bg->used == 0)
> + return bg->start;
> + }
> + }
> +
> + return 0;
> +}
> +
> +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info)
> +{
> + struct btrfs_root *tree_root = fs_info->tree_root;
> + struct btrfs_space_info *sinfo = fs_info->data_sinfo;
> + struct btrfs_trans_handle *trans;
> + struct btrfs_block_group *bg;
> + u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags);
> + u64 bytenr = 0;
> +
> + lockdep_assert_not_held(&fs_info->relocation_bg_lock);
> +
> + if (!btrfs_is_zoned(fs_info))
> + return;
> +
> + if (fs_info->data_reloc_bg)
> + return;
> +
> + bytenr = find_empty_block_group(sinfo, flags);
> + if (!bytenr) {
> + int ret;
> +
> + trans = btrfs_join_transaction(tree_root);
> + if (IS_ERR(trans))
> + return;
> +
> + ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
> + btrfs_end_transaction(trans);
> + if (ret)
> + return;
> +
> + bytenr = find_empty_block_group(sinfo, flags);
> + if (!bytenr)
> + return;
> +
> + }
> +
> + bg = btrfs_lookup_block_group(fs_info, bytenr);
> + if (!bg)
> + return;
> +
> + if (!btrfs_zone_activate(bg))
> + bytenr = 0;
> +
> + btrfs_put_block_group(bg);
> +
> + spin_lock(&fs_info->relocation_bg_lock);
> + fs_info->data_reloc_bg = bytenr;

Since the above check and the chunk allocation are outside of the lock, some
other thread can already set new data_reloc_bg in the meantime. So, setting
this under "if (!fs_info->data_reloc_bg)" would be safe.

With that:

Reviewed-by: Naohiro Aota <naohiro.aota@xxxxxxx>

Regards,