[PATCH 09/10] ext4: move zero partial block range functions out of active handle
From: Zhang Yi
Date: Mon Mar 09 2026 - 21:51:10 EST
From: Zhang Yi <yi.zhang@xxxxxxxxxx>
Move ext4_block_zero_eof() and ext4_zero_partial_blocks() calls out of
the active handle context, making them independent operations. This is
safe because it still ensures data is updated before metadata for
data=ordered mode and data=journal mode because we still zero data and
ordering data before modifying the metadata.
This change is required for iomap infrastructure conversion because the
iomap buffered I/O path does not use the same journal infrastructure for
partial block zeroing. The lock ordering of folio lock and starting
transactions is "folio lock -> transaction start", which is opposite of
the current path. Therefore, zeroing partial blocks cannot be performed
under the active handle.
Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>
---
fs/ext4/extents.c | 29 ++++++++++++-----------------
fs/ext4/inode.c | 36 ++++++++++++++++++------------------
2 files changed, 30 insertions(+), 35 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57a686b600d9..81b9d5b4ad71 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4585,6 +4585,10 @@ static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len,
credits = ext4_chunk_trans_blocks(inode, len_lblk);
depth = ext_depth(inode);
+ /* Zero to the end of the block containing i_size */
+ if (new_size && offset > old_size)
+ ext4_block_zero_eof(inode, old_size, LLONG_MAX);
+
retry:
while (len_lblk) {
/*
@@ -4623,10 +4627,8 @@ static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len,
if (ext4_update_inode_size(inode, epos) & 0x1)
inode_set_mtime_to_ts(inode,
inode_get_ctime(inode));
- if (epos > old_size) {
+ if (epos > old_size)
pagecache_isize_extended(inode, old_size, epos);
- ext4_block_zero_eof(inode, old_size, epos);
- }
}
ret2 = ext4_mark_inode_dirty(handle, inode);
ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4668,7 +4670,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t align_start, align_end, new_size = 0;
loff_t end = offset + len;
unsigned int blocksize = i_blocksize(inode);
- int ret, flags, credits;
+ int ret, flags;
trace_ext4_zero_range(inode, offset, len, mode);
WARN_ON_ONCE(!inode_is_locked(inode));
@@ -4722,25 +4724,18 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (IS_ALIGNED(offset | end, blocksize))
return ret;
- /*
- * In worst case we have to writeout two nonadjacent unwritten
- * blocks and update the inode
- */
- credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
- if (ext4_should_journal_data(inode))
- credits += 2;
- handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
+ /* Zero out partial block at the edges of the range */
+ ret = ext4_zero_partial_blocks(inode, offset, len);
+ if (ret)
+ return ret;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret);
return ret;
}
- /* Zero out partial block at the edges of the range */
- ret = ext4_zero_partial_blocks(inode, offset, len);
- if (ret)
- goto out_handle;
-
if (new_size)
ext4_update_inode_size(inode, new_size);
ret = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d5b783a7c814..5288d36b0f09 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4443,8 +4443,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
if (ret)
return ret;
+ ret = ext4_zero_partial_blocks(inode, offset, length);
+ if (ret)
+ return ret;
+
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- credits = ext4_chunk_trans_extent(inode, 2);
+ credits = ext4_chunk_trans_extent(inode, 0);
else
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
@@ -4454,10 +4458,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
return ret;
}
- ret = ext4_zero_partial_blocks(inode, offset, length);
- if (ret)
- goto out_handle;
-
/* If there are blocks to remove, do it */
start_lblk = EXT4_B_TO_LBLK(inode, offset);
end_lblk = end >> inode->i_blkbits;
@@ -4589,6 +4589,9 @@ int ext4_truncate(struct inode *inode)
err = ext4_inode_attach_jinode(inode);
if (err)
goto out_trace;
+
+ /* Zero to the end of the block containing i_size */
+ ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX);
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4602,10 +4605,6 @@ int ext4_truncate(struct inode *inode)
goto out_trace;
}
- /* Zero to the end of the block containing i_size */
- if (inode->i_size & (inode->i_sb->s_blocksize - 1))
- ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX);
-
/*
* We add the inode to the orphan list, so that if this
* truncate spans multiple transactions, and we crash, we will
@@ -5945,15 +5944,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out_mmap_sem;
}
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- goto out_mmap_sem;
- }
- if (ext4_handle_valid(handle) && shrink) {
- error = ext4_orphan_add(handle, inode);
- orphan = 1;
- }
/*
* Update c/mtime and tail zero the EOF folio on
* truncate up. ext4_truncate() handles the shrink case
@@ -5967,6 +5957,16 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
LLONG_MAX);
}
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ goto out_mmap_sem;
+ }
+ if (ext4_handle_valid(handle) && shrink) {
+ error = ext4_orphan_add(handle, inode);
+ orphan = 1;
+ }
+
if (shrink)
ext4_fc_track_range(handle, inode,
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
--
2.52.0