[PATCH v10 5/5] ext4: prevent deadlock from duplicate EA inode references on corrupted fs
From: Yun Zhou
Date: Thu Jun 25 2026 - 11:40:59 EST
On a corrupted filesystem, multiple xattr entries may reference the same
EA inode. When ext4_xattr_inode_dec_ref_all() processes such entries, it
can dec_ref the EA inode (setting nlink=0) and queue it for deferred iput.
If the deferred worker runs before the loop processes the duplicate entry,
the second iget() may block on I_FREEING while the worker's eviction waits
for the caller's transaction to commit -- classic ABBA deadlock.
Fix by tracking successfully processed EA inodes on a per-call llist
(reusing i_ea_iput_node) and skipping any ea_ino already in the list.
This covers both intra-block duplicates and cross ibody/block duplicates
in ext4_xattr_delete_inode().
The actual ext4_put_ea_inode() is deferred until after the processing
loop completes (ext4_put_ea_inode_llist), ensuring no EA inode is queued
for eviction while the loop is still iterating.
Signed-off-by: Yun Zhou <yun.zhou@xxxxxxxxxxxxx>
---
fs/ext4/xattr.c | 68 ++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 61 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7f334349bd4f..5c929043e44a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1152,11 +1152,41 @@ static int ext4_xattr_restart_fn(handle_t *handle, struct inode *inode,
return 0;
}
+/* Check if an EA inode number is already in the processed llist. */
+static bool ext4_ea_ino_in_llist(unsigned int ea_ino,
+ struct llist_head *processed)
+{
+ struct ext4_inode_info *ei;
+
+ llist_for_each_entry(ei, processed->first, i_ea_iput_node) {
+ if (ei->vfs_inode.i_ino == ea_ino)
+ return true;
+ }
+ return false;
+}
+
+/* Put all EA inodes on a processed llist via ext4_put_ea_inode. */
+static void ext4_put_ea_inode_llist(struct super_block *sb,
+ struct llist_head *processed)
+{
+ struct llist_node *node = llist_del_all(processed);
+ struct llist_node *next;
+
+ while (node) {
+ struct ext4_inode_info *ei = container_of(node,
+ struct ext4_inode_info, i_ea_iput_node);
+ next = node->next;
+ ext4_put_ea_inode(sb, &ei->vfs_inode);
+ node = next;
+ }
+}
+
static void
ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
struct buffer_head *bh,
struct ext4_xattr_entry *first, bool block_csum,
- int extra_credits, bool skip_quota)
+ int extra_credits, bool skip_quota,
+ struct llist_head *processed)
{
struct inode *ea_inode;
struct ext4_xattr_entry *entry;
@@ -1186,6 +1216,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
if (!entry->e_value_inum)
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
+
+ /* Skip if already processed (duplicate on corrupted fs) */
+ if (ext4_ea_ino_in_llist(ea_ino, processed))
+ continue;
+
err = ext4_xattr_inode_iget(parent, ea_ino,
le32_to_cpu(entry->e_hash),
&ea_inode);
@@ -1235,7 +1270,12 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
entry->e_value_inum = 0;
entry->e_value_size = 0;
- ext4_put_ea_inode(parent->i_sb, ea_inode);
+ /*
+ * Collect processed EA inodes for dedup and deferred iput.
+ * ext4_put_ea_inode_llist() handles the actual release
+ * after the loop, preventing iget deadlocks on duplicates.
+ */
+ llist_add(&EXT4_I(ea_inode)->i_ea_iput_node, processed);
dirty = true;
}
@@ -1262,7 +1302,8 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh,
- int extra_credits)
+ int extra_credits,
+ struct llist_head *processed)
{
struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
u32 hash, ref;
@@ -1304,7 +1345,8 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
BFIRST(bh),
true /* block_csum */,
extra_credits,
- true /* skip_quota */);
+ true /* skip_quota */,
+ processed);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
@@ -2171,8 +2213,12 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
/* Drop the previous xattr block. */
if (bs->bh && bs->bh != new_bh) {
+ LLIST_HEAD(processed);
+
ext4_xattr_release_block(handle, inode, bs->bh,
- 0 /* extra_credits */);
+ 0 /* extra_credits */,
+ &processed);
+ ext4_put_ea_inode_llist(inode->i_sb, &processed);
}
error = 0;
@@ -2866,6 +2912,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_entry *entry;
struct inode *ea_inode;
int error;
+ LLIST_HEAD(processed);
error = ext4_journal_ensure_credits(handle, extra_credits,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
@@ -2897,7 +2944,8 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
IFIRST(header),
false /* block_csum */,
extra_credits,
- false /* skip_quota */);
+ false /* skip_quota */,
+ &processed);
}
if (EXT4_I(inode)->i_file_acl) {
@@ -2921,6 +2969,11 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_inum)
continue;
+ /* Skip EA inodes already dec_ref'd from ibody */
+ if (ext4_ea_ino_in_llist(
+ le32_to_cpu(entry->e_value_inum),
+ &processed))
+ continue;
error = ext4_xattr_inode_iget(inode,
le32_to_cpu(entry->e_value_inum),
le32_to_cpu(entry->e_hash),
@@ -2935,7 +2988,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
}
ext4_xattr_release_block(handle, inode, bh,
- extra_credits);
+ extra_credits, &processed);
/*
* Update i_file_acl value in the same transaction that releases
* block.
@@ -2951,6 +3004,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
}
error = 0;
cleanup:
+ ext4_put_ea_inode_llist(inode->i_sb, &processed);
brelse(iloc.bh);
brelse(bh);
return error;
--
2.43.0