Re: [PATCH v5 2/2] ocfs2: detect released suballocator BG for fh_to_[dentry|parent]

From: Heming Zhao

Date: Fri Dec 12 2025 - 02:36:15 EST


On Fri, Dec 12, 2025 at 03:12:53PM +0800, Joseph Qi wrote:
>
>
> On 2025/12/12 15:00, Heming Zhao wrote:
> > After ocfs2 gained the ability to reclaim suballocator free block
> > group (BGs), a suballocator block group may be released. This change
> > causes the xfstest case generic/426 to fail.
> >
> > generic/426 expects return value -ENOENT or -ESTALE, but the current
> > code triggers -EROFS.
> >
> > Call stack before ocfs2 gained the ability to reclaim bg:
> >
> > ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
> > ocfs2_get_dentry
> > + ocfs2_test_inode_bit
> > | ocfs2_test_suballoc_bit
> > | + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
> > | | //the bg block was always found.
> > | + *res = ocfs2_test_bit //unlink was called, and the bit is zero
> > |
> > + if (!set) //because the above *res is 0
> > status = -ESTALE //the generic/426 expected return value
> >
> > Current call stack that triggers -EROFS:
> >
> > ocfs2_get_dentry
> > ocfs2_test_inode_bit
> > ocfs2_test_suballoc_bit
> > ocfs2_read_group_descriptor
> > + if reading a released bg, validation fails and triggers -EROFS
> >
> > How to fix:
> > Since the read BG is already released, we must avoid triggering -EROFS.
> > With this commit, we use ocfs2_read_hint_group_descriptor() to detect
> > the released BG block. This approach quietly handles this type of error
> > and returns -EINVAL, which triggers the caller's existing conversion
> > path to -ESTALE.
> >
> > Signed-off-by: Heming Zhao <heming.zhao@xxxxxxxx>
> > Reviewed-by: Su Yue <glass.su@xxxxxxxx>
> > ---
> > fs/ocfs2/export.c | 6 ++++--
> > fs/ocfs2/suballoc.c | 28 ++++++++++++++++++----------
> > 2 files changed, 22 insertions(+), 12 deletions(-)
> >
> > diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
> > index b95724b767e1..9c2665dd24e2 100644
> > --- a/fs/ocfs2/export.c
> > +++ b/fs/ocfs2/export.c
> > @@ -74,8 +74,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
> > * nice
> > */
> > status = -ESTALE;
> > - } else
> > + } else if (status != -ESTALE) {
> > mlog(ML_ERROR, "test inode bit failed %d\n", status);
> > + }
> > goto unlock_nfs_sync;
> > }
> >
> > @@ -162,8 +163,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
> > if (status < 0) {
> > if (status == -EINVAL) {
> > status = -ESTALE;
> > - } else
> > + } else if (status != -ESTALE) {
> > mlog(ML_ERROR, "test inode bit failed %d\n", status);
> > + }
> > parent = ERR_PTR(status);
> > goto bail_unlock;
> > }
> > diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> > index 9a19f5230c8c..ddcfa6e001e8 100644
> > --- a/fs/ocfs2/suballoc.c
> > +++ b/fs/ocfs2/suballoc.c
> > @@ -3152,7 +3152,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> > struct ocfs2_group_desc *group;
> > struct buffer_head *group_bh = NULL;
> > u64 bg_blkno;
> > - int status;
> > + int status, quiet = 0, released;
> >
> > trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
> > (unsigned int)bit);
> > @@ -3168,11 +3168,15 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> >
> > bg_blkno = group_blkno ? group_blkno :
> > ocfs2_which_suballoc_group(blkno, bit);
> > - status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
> > - &group_bh);
> > - if (status < 0) {
> > + status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
> > + &group_bh, &released);
> > + if (released) {
> > + quiet = 1;
> > + status = -ESTALE;
> > + goto bail;
> > + } else if (status < 0) {
> > mlog(ML_ERROR, "read group %llu failed %d\n",
> > - (unsigned long long)bg_blkno, status);
> > + (unsigned long long)bg_blkno, status);

OK
>
> This can be kept untouched.
>
> > goto bail;
> > }
> >
> > @@ -3182,7 +3186,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> > bail:
> > brelse(group_bh);
> >
> > - if (status)
> > + if (status && (!quiet))
>
> '!quiet' is enough, the parentheses is unneeded.

OK. Very interesting, this is not my code style.
Thanks for your careful review.

Heming
>
> Other looks good to me.
>
> Thanks,
> Joseph
>
> > mlog_errno(status);
> > return status;
> > }
> > @@ -3202,7 +3206,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> > */
> > int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> > {
> > - int status;
> > + int status, quiet = 0;
> > u64 group_blkno = 0;
> > u16 suballoc_bit = 0, suballoc_slot = 0;
> > struct inode *inode_alloc_inode;
> > @@ -3244,8 +3248,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> >
> > status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
> > group_blkno, blkno, suballoc_bit, res);
> > - if (status < 0)
> > - mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> > + if (status < 0) {
> > + if (status == -ESTALE)
> > + quiet = 1;
> > + else
> > + mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> > + }
> >
> > ocfs2_inode_unlock(inode_alloc_inode, 0);
> > inode_unlock(inode_alloc_inode);
> > @@ -3253,7 +3261,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> > iput(inode_alloc_inode);
> > brelse(alloc_bh);
> > bail:
> > - if (status)
> > + if (status && !quiet)
> > mlog_errno(status);
> > return status;
> > }
>