[PATCH v1] ext4: fix use-after-free in update_super_work when racing with umount

From: Jiayuan Chen

Date: Fri Mar 13 2026 - 02:52:42 EST


Commit b98535d09179 ("ext4: fix bug_on in start_this_handle during umount filesystem")
moved ext4_unregister_sysfs() before flushing s_sb_upd_work to prevent new
error work from being queued via /proc/fs/ext4/xx/mb_groups reads during
unmount. However, this introduced a use-after-free because
update_super_work calls ext4_notify_error_sysfs() -> sysfs_notify() which
accesses the kobject's kernfs_node after it has been freed:

update_super_work ext4_put_super
----------------- --------------
ext4_unregister_sysfs(sb)
kobject_del(&sbi->s_kobj)
__kobject_del()
sysfs_remove_dir()
kobj->sd = NULL
sysfs_put(sd)
kernfs_put() // RCU free
ext4_notify_error_sysfs(sbi)
sysfs_notify(&sbi->s_kobj)
kn = kobj->sd // stale pointer
kernfs_get(kn) // UAF on freed kernfs_node
ext4_journal_destroy()
flush_work(&sbi->s_sb_upd_work)

The original blamed commit needed procfs removal before the work
flush to prevent /proc/fs/ext4/xx/mb_groups reads from queuing new error
work. But it bundled procfs removal and kobject_del together in
ext4_unregister_sysfs(), causing the sysfs kobject to be torn down too
early.

The correct teardown ordering has three constraints:

1. procfs removal must happen before flushing s_sb_upd_work, to prevent
/proc reads from queuing new error work that would BUG_ON.
2. sysfs kobject removal must happen after flushing s_sb_upd_work, since
the work calls sysfs_notify() which accesses the kernfs_node.
3. sysfs kobject removal must happen before jbd2_journal_destroy(), since
userspace could read the journal_task sysfs attribute and dereference
j_task after the journal thread has been killed.

Fix this by:
- Adding ext4_sb_release_proc() to remove procfs entries early.
- Splitting ext4_journal_destroy() into ext4_journal_stop_work() and
ext4_journal_finish(), so that ext4_unregister_sysfs() can be placed
between them to satisfy all three ordering constraints.

Fixes: b98535d09179 ("ext4: fix bug_on in start_this_handle during umount filesystem")
Cc: Jiayuan Chen <jiayuan.chen@xxxxxxxxx>
Signed-off-by: Jiayuan Chen <jiayuan.chen@xxxxxxxxx>
---
fs/ext4/ext4.h | 1 +
fs/ext4/ext4_jbd2.h | 45 ++++++++++++++++++++++++++++-----------------
fs/ext4/super.c | 34 +++++++++++++++++++++++-----------
fs/ext4/sysfs.c | 10 ++++++++++
4 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b76966dc06c0..a693365d224c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3757,6 +3757,7 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* sysfs.c */
extern void ext4_notify_error_sysfs(struct ext4_sb_info *sbi);
extern int ext4_register_sysfs(struct super_block *sb);
+extern void ext4_sb_release_proc(struct super_block *sb);
extern void ext4_unregister_sysfs(struct super_block *sb);
extern int __init ext4_init_sysfs(void);
extern void ext4_exit_sysfs(void);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 63d17c5201b5..2b7d68b11578 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -430,32 +430,43 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
}

/*
- * Pass journal explicitly as it may not be cached in the sbi->s_journal in some
- * cases
+ * Stop new s_sb_upd_work from being queued and flush any pending work.
+ *
+ * At this point only two things can be operating on the journal:
+ * JBD2 thread performing transaction commit and s_sb_upd_work
+ * issuing sb update through the journal. Once we set
+ * EXT4_MF_JOURNAL_DESTROY, new ext4_handle_error() calls will not
+ * queue s_sb_upd_work and ext4_force_commit() makes sure any
+ * ext4_handle_error() calls from the running transaction commit are
+ * finished. Hence no new s_sb_upd_work can be queued after we
+ * flush it here.
*/
-static inline int ext4_journal_destroy(struct ext4_sb_info *sbi, journal_t *journal)
+static inline void ext4_journal_stop_work(struct ext4_sb_info *sbi)
{
- int err = 0;
-
- /*
- * At this point only two things can be operating on the journal.
- * JBD2 thread performing transaction commit and s_sb_upd_work
- * issuing sb update through the journal. Once we set
- * EXT4_JOURNAL_DESTROY, new ext4_handle_error() calls will not
- * queue s_sb_upd_work and ext4_force_commit() makes sure any
- * ext4_handle_error() calls from the running transaction commit are
- * finished. Hence no new s_sb_upd_work can be queued after we
- * flush it here.
- */
ext4_set_mount_flag(sbi->s_sb, EXT4_MF_JOURNAL_DESTROY);
-
ext4_force_commit(sbi->s_sb);
flush_work(&sbi->s_sb_upd_work);
+}
+
+/*
+ * Destroy the journal. Must be called after ext4_journal_stop_work().
+ * Pass journal explicitly as it may not be cached in the sbi->s_journal
+ * in some cases.
+ */
+static inline int ext4_journal_finish(struct ext4_sb_info *sbi,
+ journal_t *journal)
+{
+ int err;

err = jbd2_journal_destroy(journal);
sbi->s_journal = NULL;
-
return err;
}

+static inline int ext4_journal_destroy(struct ext4_sb_info *sbi, journal_t *journal)
+{
+ ext4_journal_stop_work(sbi);
+ return ext4_journal_finish(sbi, journal);
+}
+
#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 752f414aa06b..9bba783f44e1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1280,16 +1280,12 @@ static void ext4_put_super(struct super_block *sb)
int err;

/*
- * Unregister sysfs before destroying jbd2 journal.
- * Since we could still access attr_journal_task attribute via sysfs
- * path which could have sbi->s_journal->j_task as NULL
- * Unregister sysfs before flush sbi->s_sb_upd_work.
- * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
- * read metadata verify failed then will queue error work.
- * update_super_work will call start_this_handle may trigger
- * BUG_ON.
+ * Remove procfs entries before flush s_sb_upd_work. Since user may
+ * read /proc/fs/ext4/xx/mb_groups during umount, if read metadata
+ * verify failed then will queue error work. update_super_work will
+ * call start_this_handle which may trigger BUG_ON.
*/
- ext4_unregister_sysfs(sb);
+ ext4_sb_release_proc(sb);

if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
@@ -1301,14 +1297,30 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->rsv_conversion_wq);
ext4_release_orphan_info(sb);

+ /*
+ * Flush s_sb_upd_work before unregistering sysfs, since
+ * update_super_work calls ext4_notify_error_sysfs() which accesses
+ * the kobject's kernfs_node via sysfs_notify(). Unregistering sysfs
+ * before the flush could lead to a use-after-free on the
+ * kernfs_node.
+ *
+ * Also unregister sysfs before destroying jbd2 journal, since
+ * userspace could read the journal_task sysfs attribute while
+ * jbd2_journal_destroy() is killing the journal thread, leading to
+ * a NULL pointer dereference of j_task in journal_task_show().
+ */
if (sbi->s_journal) {
aborted = is_journal_aborted(sbi->s_journal);
- err = ext4_journal_destroy(sbi, sbi->s_journal);
+ ext4_journal_stop_work(sbi);
+ ext4_unregister_sysfs(sb);
+ err = ext4_journal_finish(sbi, sbi->s_journal);
if ((err < 0) && !aborted) {
ext4_abort(sb, -err, "Couldn't clean up the journal");
}
- } else
+ } else {
flush_work(&sbi->s_sb_upd_work);
+ ext4_unregister_sysfs(sb);
+ }

ext4_es_unregister_shrinker(sbi);
timer_shutdown_sync(&sbi->s_err_report);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index d2ecc1026c0c..f6947416c1e7 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -638,6 +638,16 @@ int ext4_register_sysfs(struct super_block *sb)
return 0;
}

+void ext4_sb_release_proc(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (sbi->s_proc) {
+ remove_proc_subtree(sb->s_id, ext4_proc_root);
+ sbi->s_proc = NULL;
+ }
+}
+
void ext4_unregister_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
--
2.43.0