[PATCH v8 3/4] ext4: introduce ext4_put_ea_inode() for safe deferred iput
From: Yun Zhou
Date: Fri Jun 19 2026 - 21:42:12 EST
Calling iput() on EA inodes while holding xattr_sem or a jbd2 handle
can trigger write_inode_now() -> ext4_writepages() -> s_writepages_rwsem,
creating a lock ordering issue during mount (!SB_ACTIVE).
Add ext4_put_ea_inode() which safely releases EA inode references:
when SB_ACTIVE, it calls iput() directly (write_inode_now cannot be
triggered); during mount (!SB_ACTIVE), it queues the inode on a per-sb
lock-free llist and schedules a delayed worker (1 jiffie) to call iput()
in a clean context without holding any ext4 locks. The delay allows
multiple inodes to accumulate before the worker runs, reducing context
switches.
Convert the iput in ext4_xattr_block_set()'s "Drop the previous xattr
block" path to use ext4_xattr_inode_array_free_deferred(), which
releases EA inodes via ext4_put_ea_inode(). This path previously called
ext4_xattr_inode_array_free() (synchronous iput) while holding xattr_sem
and a jbd2 handle.
The worker is flushed in ext4_put_super() before quota shutdown to
ensure all pending EA inode cleanup completes while quota accounting
is still active.
Signed-off-by: Yun Zhou <yun.zhou@xxxxxxxxxxxxx>
---
fs/ext4/ext4.h | 5 ++++
fs/ext4/super.c | 6 ++++
fs/ext4/xattr.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++-
fs/ext4/xattr.h | 2 ++
4 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94283a991e5c..e31d60f82a63 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1706,6 +1706,11 @@ struct ext4_sb_info {
struct ext4_es_stats s_es_stats;
struct mb_cache *s_ea_block_cache;
struct mb_cache *s_ea_inode_cache;
+
+ /* Deferred iput for EA inodes to avoid lock ordering issues */
+ struct llist_head s_ea_inode_to_free;
+ struct delayed_work s_ea_inode_work;
+
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Journal triggers for checksum computation */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6a77db4d3124..5dd7c29a70bc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1303,6 +1303,8 @@ static void ext4_put_super(struct super_block *sb)
&sb->s_uuid);
ext4_unregister_li_request(sb);
+ /* Flush deferred EA inode iputs while quota is still active */
+ flush_delayed_work(&sbi->s_ea_inode_work);
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
destroy_workqueue(sbi->rsv_conversion_wq);
@@ -5535,6 +5537,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
needs_recovery = 0;
}
+ init_llist_head(&sbi->s_ea_inode_to_free);
+ INIT_DELAYED_WORK(&sbi->s_ea_inode_work, ext4_ea_inode_work);
+
if (!test_opt(sb, NO_MBCACHE)) {
sbi->s_ea_block_cache = ext4_xattr_create_cache();
if (!sbi->s_ea_block_cache) {
@@ -5763,6 +5768,7 @@ failed_mount8: __maybe_unused
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
+ flush_delayed_work(&sbi->s_ea_inode_work);
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
sbi->s_ea_inode_cache = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 982a1f831e22..79de182e22e6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -117,6 +117,8 @@ const struct xattr_handler * const ext4_xattr_handlers[] = {
static int
ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
struct inode *inode);
+static void ext4_xattr_inode_array_free_deferred(struct super_block *sb,
+ struct ext4_xattr_inode_array *array);
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
@@ -2187,7 +2189,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ext4_xattr_release_block(handle, inode, bs->bh,
&ea_inode_array,
0 /* extra_credits */);
- ext4_xattr_inode_array_free(ea_inode_array);
+ ext4_xattr_inode_array_free_deferred(inode->i_sb,
+ ea_inode_array);
}
error = 0;
@@ -3025,6 +3028,75 @@ void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
kfree(ea_inode_array);
}
+static void ext4_xattr_inode_array_free_deferred(struct super_block *sb,
+ struct ext4_xattr_inode_array *array)
+{
+ int idx;
+
+ if (array == NULL)
+ return;
+
+ for (idx = 0; idx < array->count; ++idx)
+ ext4_put_ea_inode(sb, array->inodes[idx]);
+ kfree(array);
+}
+
+struct ext4_ea_iput_entry {
+ struct llist_node node;
+ struct inode *inode;
+};
+
+/*
+ * Worker function for deferred EA inode iput. Processes all inodes queued
+ * on s_ea_inode_to_free in a context free of xattr_sem/jbd2 handle locks.
+ */
+void ext4_ea_inode_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(to_delayed_work(work),
+ struct ext4_sb_info,
+ s_ea_inode_work);
+ struct llist_node *node = llist_del_all(&sbi->s_ea_inode_to_free);
+ struct llist_node *next;
+
+ while (node) {
+ struct ext4_ea_iput_entry *entry = container_of(node,
+ struct ext4_ea_iput_entry, node);
+ next = node->next;
+ iput(entry->inode);
+ kfree(entry);
+ node = next;
+ }
+}
+
+/*
+ * Release a VFS reference on an EA inode after ext4_xattr_inode_dec_ref()
+ * may have set i_nlink=0. Must be used instead of iput() in any context
+ * where xattr_sem or a jbd2 handle is held, because eviction of a nlink=0
+ * inode can acquire those same locks.
+ *
+ * When SB_ACTIVE, eviction does not call write_inode_now() so direct
+ * iput() is safe. During mount (!SB_ACTIVE), defer to a workqueue.
+ *
+ * For EA inode references dropped without a preceding dec_ref (e.g.,
+ * lookup-only paths where nlink remains >= 1), plain iput() is safe
+ * and preferred.
+ */
+void ext4_put_ea_inode(struct super_block *sb, struct inode *inode)
+{
+ struct ext4_ea_iput_entry *entry;
+
+ if (!inode)
+ return;
+ if (sb->s_flags & SB_ACTIVE) {
+ iput(inode);
+ return;
+ }
+ entry = kmalloc(sizeof(*entry), GFP_NOFS | __GFP_NOFAIL);
+ entry->inode = inode;
+ llist_add(&entry->node, &EXT4_SB(sb)->s_ea_inode_to_free);
+ schedule_delayed_work(&EXT4_SB(sb)->s_ea_inode_work, 1);
+}
+
/*
* ext4_xattr_block_cache_insert()
*
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1fedf44d4fb6..52074537dce5 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -190,6 +190,8 @@ extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_inode_array **array,
int extra_credits);
extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+extern void ext4_ea_inode_work(struct work_struct *work);
+extern void ext4_put_ea_inode(struct super_block *sb, struct inode *inode);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
--
2.43.0