Re: [PATCH v5 1/8] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb

From: IBM
Date: Sat Jul 22 2023 - 02:24:39 EST


Kemeng Shi <shikemeng@xxxxxxxxxxxxxxx> writes:

> There are several reasons to add a general function to update block
> bitmap and group descriptor on disk:
> 1. pair behavior of alloc/free bits. For example,
> ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
> in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
> 2. remove repeat code to read from disk, update and write back to disk.
> 3. reduce future unit test mocks to catch real IO to update structure
> on disk.

Thanks for the cleanup and sorry that I am starting to review this
series only now. However I do have some review comments to understand a
bit more on the patch series.

>
> Signed-off-by: Kemeng Shi <shikemeng@xxxxxxxxxxxxxxx>
> Reviewed-by: Ojaswin Mujoo <ojaswin@xxxxxxxxxxxxx>
> ---
> fs/ext4/mballoc.c | 157 +++++++++++++++++++++++++---------------------
> 1 file changed, 87 insertions(+), 70 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index a2475b8c9fb5..58864a9116c0 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3948,6 +3948,86 @@ void ext4_exit_mballoc(void)
> ext4_groupinfo_destroy_slabs();
> }
>
> +struct ext4_mark_context {
> + struct super_block *sb;
> + int state;
> +};

It's not totally clear the intention behind this structure from above
since it lacking any comments.

Can you please help me understand why do we need this.
I still don't know whether we require this structure and what is it's
purpose. Is it only for reducing the number of variable passing?

Let me do more reading...

...On more reading, I was previous considering to rename it to something
like ext4_mb_mark_context, but then I realized the naming of this is
something similar to ext4_allocation_context. So we may keep the naming
as is.

So since this structure, presumably, is used for marking blk bits for
mballoc. Why don't we pass useful information which is relevant for
this operation like -

ext4_mark_context {
ext4_group_t mc_group; /* block group */
ext4_grpblk_t mc_clblk; /* block in cluster units */
ext4_grpblk_t mc_cllen; /* len in cluster units */
ext4_grpblk_t mc_clupdates; /* number of clusters marked/unmarked */
unsigned int mc_flags; /* flags ... */
bool mc_state; /* to set or unset state */
};

Maybe, super_block and handle we can pass as an argument as those doesn't
define the ext4_mark_context for mballoc.

Since this structure is prepared not at the begining of any function, we
may need a prepare function for it. e.g.

static void ext4_mb_prepare_mark_context(&mc, ...)
static int ext4_mb_mark_context(sb, handle, &mc); (instead of ext4_mb_mark_group_bb())

Does this sounds better to you? Thoughts?

Otherwise I think having a common function for mb_mark_context looks
like a nice cleanup.

-ritesh

> +
> +static int
> +ext4_mb_mark_group_bb(struct ext4_mark_context *mc, ext4_group_t group,
> + ext4_grpblk_t blkoff, ext4_grpblk_t len)
> +{
> + struct super_block *sb = mc->sb;
> + struct ext4_sb_info *sbi = EXT4_SB(sb);
> + struct buffer_head *bitmap_bh = NULL;
> + struct ext4_group_desc *gdp;
> + struct buffer_head *gdp_bh;
> + int err;
> + unsigned int i, already, changed;
> +
> + bitmap_bh = ext4_read_block_bitmap(sb, group);
> + if (IS_ERR(bitmap_bh))
> + return PTR_ERR(bitmap_bh);
> +
> + err = -EIO;
> + gdp = ext4_get_group_desc(sb, group, &gdp_bh);
> + if (!gdp)
> + goto out_err;
> +
> + ext4_lock_group(sb, group);
> + if (ext4_has_group_desc_csum(sb) &&
> + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
> + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_clusters_after_init(sb, group, gdp));
> + }
> +
> + already = 0;
> + for (i = 0; i < len; i++)
> + if (mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
> + mc->state)
> + already++;
> + changed = len - already;
> +
> + if (mc->state) {
> + mb_set_bits(bitmap_bh->b_data, blkoff, len);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_group_clusters(sb, gdp) - changed);
> + } else {
> + mb_clear_bits(bitmap_bh->b_data, blkoff, len);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_group_clusters(sb, gdp) + changed);
> + }
> +
> + ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
> + ext4_group_desc_csum_set(sb, group, gdp);
> + ext4_unlock_group(sb, group);
> +
> + if (sbi->s_log_groups_per_flex) {
> + ext4_group_t flex_group = ext4_flex_group(sbi, group);
> + struct flex_groups *fg = sbi_array_rcu_deref(sbi,
> + s_flex_groups, flex_group);
> +
> + if (mc->state)
> + atomic64_sub(changed, &fg->free_clusters);
> + else
> + atomic64_add(changed, &fg->free_clusters);
> + }
> +
> + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
> + if (err)
> + goto out_err;
> + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
> + if (err)
> + goto out_err;
> +
> + sync_dirty_buffer(bitmap_bh);
> + sync_dirty_buffer(gdp_bh);
> +
> +out_err:
> + brelse(bitmap_bh);
> + return err;
> +}
>
> /*
> * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
> @@ -4074,15 +4154,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
> int len, int state)
> {
> - struct buffer_head *bitmap_bh = NULL;
> - struct ext4_group_desc *gdp;
> - struct buffer_head *gdp_bh;
> + struct ext4_mark_context mc = {
> + .sb = sb,
> + .state = state,
> + };
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> ext4_group_t group;
> ext4_grpblk_t blkoff;
> - int i, err;
> - int already;
> - unsigned int clen, clen_changed, thisgrp_len;
> + int err;
> + unsigned int clen, thisgrp_len;
>
> while (len > 0) {
> ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
> @@ -4103,80 +4183,17 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
> ext4_error(sb, "Marking blocks in system zone - "
> "Block = %llu, len = %u",
> block, thisgrp_len);
> - bitmap_bh = NULL;
> break;
> }
>
> - bitmap_bh = ext4_read_block_bitmap(sb, group);
> - if (IS_ERR(bitmap_bh)) {
> - err = PTR_ERR(bitmap_bh);
> - bitmap_bh = NULL;
> - break;
> - }
> -
> - err = -EIO;
> - gdp = ext4_get_group_desc(sb, group, &gdp_bh);
> - if (!gdp)
> - break;
> -
> - ext4_lock_group(sb, group);
> - already = 0;
> - for (i = 0; i < clen; i++)
> - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
> - !state)
> - already++;
> -
> - clen_changed = clen - already;
> - if (state)
> - mb_set_bits(bitmap_bh->b_data, blkoff, clen);
> - else
> - mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
> - if (ext4_has_group_desc_csum(sb) &&
> - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
> - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
> - ext4_free_group_clusters_set(sb, gdp,
> - ext4_free_clusters_after_init(sb, group, gdp));
> - }
> - if (state)
> - clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
> - else
> - clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
> -
> - ext4_free_group_clusters_set(sb, gdp, clen);
> - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
> - ext4_group_desc_csum_set(sb, group, gdp);
> -
> - ext4_unlock_group(sb, group);
> -
> - if (sbi->s_log_groups_per_flex) {
> - ext4_group_t flex_group = ext4_flex_group(sbi, group);
> - struct flex_groups *fg = sbi_array_rcu_deref(sbi,
> - s_flex_groups, flex_group);
> -
> - if (state)
> - atomic64_sub(clen_changed, &fg->free_clusters);
> - else
> - atomic64_add(clen_changed, &fg->free_clusters);
> -
> - }
> -
> - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
> - if (err)
> - break;
> - sync_dirty_buffer(bitmap_bh);
> - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
> - sync_dirty_buffer(gdp_bh);
> + err = ext4_mb_mark_group_bb(&mc, group, blkoff, clen);
> if (err)
> break;
>
> block += thisgrp_len;
> len -= thisgrp_len;
> - brelse(bitmap_bh);
> BUG_ON(len < 0);
> }
> -
> - if (err)
> - brelse(bitmap_bh);
> }
>
> /*
> --
> 2.30.0