Re: [PATCH 06/10] ext4: move ordered data handling out of ext4_block_do_zero_range()

From: Zhang Yi

Date: Mon Mar 23 2026 - 23:36:54 EST


On 3/24/2026 12:59 AM, Jan Kara wrote:
> On Tue 10-03-26 09:40:57, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@xxxxxxxxxx>
>>
>> Remove the handle parameter from ext4_block_do_zero_range() and move the
>> ordered data handling to ext4_block_zero_eof().
>>
>> This is necessary for truncate up and append writes across a range
>> extending beyond EOF. The ordered data must be committed before updating
>> i_disksize to prevent exposing stale on-disk data from concurrent
>> post-EOF mmap writes during previous folio writeback or in case of
>> system crash during append writes.
>>
>> This is unnecessary for partial block hole punching because the entire
>> punch operation does not provide atomicity guarantees and can already
>> expose intermediate results in case of crash.
>
> Also because hole punching can only ever expose data that was there before
> the punch but missed zeroing during append / truncate could expose data
> that was not visible in the file before the operation.

Yeah, right, will add.

Thanks,
Yi.

>
>> Since ordered data handling is no longer performed inside
>> ext4_zero_partial_blocks(), ext4_punch_hole() no longer needs to attach
>> jinode.
>>
>> This is prepared for the conversion to the iomap infrastructure, which
>> does not use ordered data mode while zeroing post-EOF partial blocks.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>
>
> The patch looks good. Feel free to add:
>
> Reviewed-by: Jan Kara <jack@xxxxxxx>
>
> Honza
>
>> ---
>> fs/ext4/inode.c | 58 ++++++++++++++++++++++++-------------------------
>> 1 file changed, 29 insertions(+), 29 deletions(-)
>>
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index 86f169df226a..8fea044b3bff 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -4076,12 +4076,12 @@ static struct buffer_head *ext4_block_get_zero_range(struct inode *inode,
>> return err ? ERR_PTR(err) : NULL;
>> }
>>
>> -static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode,
>> - loff_t from, loff_t length, bool *did_zero)
>> +static int ext4_block_do_zero_range(struct inode *inode, loff_t from,
>> + loff_t length, bool *did_zero,
>> + bool *zero_written)
>> {
>> struct buffer_head *bh;
>> struct folio *folio;
>> - int err = 0;
>>
>> bh = ext4_block_get_zero_range(inode, from, length);
>> if (IS_ERR_OR_NULL(bh))
>> @@ -4092,19 +4092,14 @@ static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode,
>> BUFFER_TRACE(bh, "zeroed end of block");
>>
>> mark_buffer_dirty(bh);
>> - /*
>> - * Only the written block requires ordered data to prevent exposing
>> - * stale data.
>> - */
>> - if (ext4_should_order_data(inode) &&
>> - !buffer_unwritten(bh) && !buffer_delay(bh))
>> - err = ext4_jbd2_inode_add_write(handle, inode, from, length);
>> - if (!err && did_zero)
>> + if (did_zero)
>> *did_zero = true;
>> + if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh))
>> + *zero_written = true;
>>
>> folio_unlock(folio);
>> folio_put(folio);
>> - return err;
>> + return 0;
>> }
>>
>> static int ext4_block_journalled_zero_range(handle_t *handle,
>> @@ -4148,7 +4143,8 @@ static int ext4_block_journalled_zero_range(handle_t *handle,
>> * that corresponds to 'from'
>> */
>> static int ext4_block_zero_range(handle_t *handle, struct inode *inode,
>> - loff_t from, loff_t length, bool *did_zero)
>> + loff_t from, loff_t length, bool *did_zero,
>> + bool *zero_written)
>> {
>> unsigned blocksize = inode->i_sb->s_blocksize;
>> unsigned int max = blocksize - (from & (blocksize - 1));
>> @@ -4167,7 +4163,8 @@ static int ext4_block_zero_range(handle_t *handle, struct inode *inode,
>> return ext4_block_journalled_zero_range(handle, inode, from,
>> length, did_zero);
>> }
>> - return ext4_block_do_zero_range(handle, inode, from, length, did_zero);
>> + return ext4_block_do_zero_range(inode, from, length, did_zero,
>> + zero_written);
>> }
>>
>> /*
>> @@ -4184,6 +4181,7 @@ int ext4_block_zero_eof(handle_t *handle, struct inode *inode,
>> unsigned int offset;
>> loff_t length = end - from;
>> bool did_zero = false;
>> + bool zero_written = false;
>> int err;
>>
>> offset = from & (blocksize - 1);
>> @@ -4196,9 +4194,22 @@ int ext4_block_zero_eof(handle_t *handle, struct inode *inode,
>> if (length > blocksize - offset)
>> length = blocksize - offset;
>>
>> - err = ext4_block_zero_range(handle, inode, from, length, &did_zero);
>> + err = ext4_block_zero_range(handle, inode, from, length,
>> + &did_zero, &zero_written);
>> if (err)
>> return err;
>> + /*
>> + * It's necessary to order zeroed data before update i_disksize when
>> + * truncating up or performing an append write, because there might be
>> + * exposing stale on-disk data which may caused by concurrent post-EOF
>> + * mmap write during folio writeback.
>> + */
>> + if (ext4_should_order_data(inode) &&
>> + did_zero && zero_written && !IS_DAX(inode)) {
>> + err = ext4_jbd2_inode_add_write(handle, inode, from, length);
>> + if (err)
>> + return err;
>> + }
>>
>> return did_zero ? length : 0;
>> }
>> @@ -4222,13 +4233,13 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
>> if (start == end &&
>> (partial_start || (partial_end != sb->s_blocksize - 1))) {
>> err = ext4_block_zero_range(handle, inode, lstart,
>> - length, NULL);
>> + length, NULL, NULL);
>> return err;
>> }
>> /* Handle partial zero out on the start of the range */
>> if (partial_start) {
>> err = ext4_block_zero_range(handle, inode, lstart,
>> - sb->s_blocksize, NULL);
>> + sb->s_blocksize, NULL, NULL);
>> if (err)
>> return err;
>> }
>> @@ -4236,7 +4247,7 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
>> if (partial_end != sb->s_blocksize - 1)
>> err = ext4_block_zero_range(handle, inode,
>> byte_end - partial_end,
>> - partial_end + 1, NULL);
>> + partial_end + 1, NULL, NULL);
>> return err;
>> }
>>
>> @@ -4411,17 +4422,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>> end = max_end;
>> length = end - offset;
>>
>> - /*
>> - * Attach jinode to inode for jbd2 if we do any zeroing of partial
>> - * block.
>> - */
>> - if (!IS_ALIGNED(offset | end, sb->s_blocksize)) {
>> - ret = ext4_inode_attach_jinode(inode);
>> - if (ret < 0)
>> - return ret;
>> - }
>> -
>> -
>> ret = ext4_update_disksize_before_punch(inode, offset, length);
>> if (ret)
>> return ret;
>> --
>> 2.52.0
>>