Re: [PATCH v4 09/13] ext4: ensure zeroed partial blocks are persisted in SYNC mode

From: Jan Kara

Date: Wed Apr 01 2026 - 13:06:26 EST


On Fri 27-03-26 18:29:35, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@xxxxxxxxxx>
>
> In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
> and zeroing a partial block, only data=journal modes guarantee that the
> zeroed data is synchronously persisted after the operation completes.
> For data=ordered/writeback mode and non-journal modes, this guarantee is
> missing.
>
> Introduce a partial_zero parameter to explicitly trigger writeback for
> all scenarios where a partial block is zeroed, ensuring the zeroed data
> is durably persisted.
>
> Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>
> ---
> fs/ext4/ext4.h | 2 +-
> fs/ext4/extents.c | 9 ++++++++-
> fs/ext4/inode.c | 19 ++++++++++++++-----
> 3 files changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 859ae05339ad..bfe86479a83c 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
> int pextents);
> extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
> extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
> - loff_t length);
> + loff_t length, bool *did_zero);
> extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
> extern qsize_t *ext4_get_reserved_space(struct inode *inode);
> extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 002b1ec8cee2..16386f499138 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> loff_t align_start, align_end, new_size = 0;
> loff_t end = offset + len;
> unsigned int blocksize = i_blocksize(inode);
> + bool partial_zeroed = false;
> int ret, flags;
>
> trace_ext4_zero_range(inode, offset, len, mode);
> @@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> return ret;
>
> /* Zero out partial block at the edges of the range */
> - ret = ext4_zero_partial_blocks(inode, offset, len);
> + ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
> if (ret)
> return ret;
> + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
> + ret = filemap_write_and_wait_range(inode->i_mapping, offset,
> + end - 1);
> + if (ret)
> + return ret;
> + }

Shouldn't we handle this somewhat below, where we do:

if (file->f_flags & O_SYNC)
ext4_handle_sync(handle);

It would be logical to keep these together... Similarly for
ext4_punch_hole() below. An yes, the check when calling ext4_handle_sync()
should be extended with an IS_SYNC() check, which is a preexisting bug.

Honza

>
> handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
> if (IS_ERR(handle)) {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 8aa4369e3150..b934ad86a96d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
> return 0;
> }
>
> -int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
> +int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
> + bool *did_zero)
> {
> struct super_block *sb = inode->i_sb;
> unsigned partial_start, partial_end;
> @@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
> /* Handle partial zero within the single block */
> if (start == end &&
> (partial_start || (partial_end != sb->s_blocksize - 1))) {
> - err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
> + err = ext4_block_zero_range(inode, lstart, length, did_zero,
> + NULL);
> return err;
> }
> /* Handle partial zero out on the start of the range */
> if (partial_start) {
> err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
> - NULL, NULL);
> + did_zero, NULL);
> if (err)
> return err;
> }
> /* Handle partial zero out on the end of the range */
> if (partial_end != sb->s_blocksize - 1)
> err = ext4_block_zero_range(inode, byte_end - partial_end,
> - partial_end + 1, NULL, NULL);
> + partial_end + 1, did_zero, NULL);
> return err;
> }
>
> @@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
> loff_t end = offset + length;
> handle_t *handle;
> unsigned int credits;
> + bool partial_zeroed = false;
> int ret;
>
> trace_ext4_punch_hole(inode, offset, length, 0);
> @@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
> if (ret)
> return ret;
>
> - ret = ext4_zero_partial_blocks(inode, offset, length);
> + ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
> if (ret)
> return ret;
> + if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
> + ret = filemap_write_and_wait_range(inode->i_mapping, offset,
> + end - 1);
> + if (ret)
> + return ret;
> + }
>
> if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
> credits = ext4_chunk_trans_extent(inode, 0);
> --
> 2.52.0
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR