[PATCH v7] f2fs: protect gc task pointer during teardown
From: Zhang Cen
Date: Tue Jun 30 2026 - 10:25:58 EST
This patch is based on the preceding patch "f2fs: embed
f2fs_gc_kthread in f2fs_sb_info", which gives the GC waitqueues and
thread tunables superblock lifetime. With that container lifetime fixed,
f2fs_gc_task remains the running-state marker and the task_struct pointer
that needs separate lifetime protection.
During shutdown, f2fs_stop_gc_thread() stops the GC task and wakes
GC_MERGE foreground waiters while f2fs_balance_fs() can concurrently
queue on gc_th->fggc_wq. The preceding embedded-gc_thread patch keeps
that waitqueue storage alive for the superblock lifetime; this patch
protects the remaining f2fs_gc_task pointer by publishing and detaching
it under gc_task_lock, and by making sysfs users that dereference the
task hold the same lock.
The buggy scenario involves two paths, with each column showing the
order within that path:
foreground f2fs_balance_fs() caller: shutdown path:
1. observes no checkpoint error 1. sets CP_ERROR_FLAG
2. snapshots sbi->gc_thread 2. enters f2fs_stop_gc_thread()
3. queues on gc_th->fggc_wq 3. stops gc_th->f2fs_gc_task
4. wakes gc_wait_queue_head 4. wakes gc_th->fggc_wq
5. sleeps for foreground GC 5. frees gc_th in the old layout
6. finish_wait() touches fggc_wq
GC_MERGE does not keep independent work_struct items that shutdown can
cancel. Its pending foreground GC requests are waitqueue waiters. Drain
them by withdrawing the GC task pointer, stopping the task, waking
gc_th->fggc_wq, and leaving each waiter to remove its own wait entry
with finish_wait().
Add gc_task_lock to struct f2fs_gc_kthread and use f2fs_update_gc_task()
to publish the GC task only after the new kthread has been created and
its nice value has been set. The start path uses kthread_create() so the
task is not woken until after f2fs_gc_task is visible to waiters. The
stop path updates f2fs_gc_task to NULL under the same lock before
kthread_stop(), so later readers see that no new foreground GC work
should be handed to the background thread.
f2fs_balance_fs() also rechecks f2fs_gc_task after prepare_to_wait(). If
the GC task has already been withdrawn, the caller removes its wait entry
without waking the GC thread or sleeping for new foreground GC work. Thus
shutdown drains already queued waiters and stops accepting new foreground
GC work once the task pointer is withdrawn.
Task pointer users are protected separately from the embedded container
lifetime. A sysfs critical_task_priority store now checks f2fs_gc_task
and calls set_user_nice() under gc_task_lock. Boolean running-state
checks that do not dereference the task_struct use READ_ONCE().
One observed report was:
BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
Write of size 8 at addr ffff8881150819b8 by task dd/802
The buggy address belongs to the object at ffff888115081900 which
belongs to the cache kmalloc-256 of size 256
The buggy address is located 184 bytes inside of freed 256-byte region
Call trace:
finish_wait()
f2fs_balance_fs()
f2fs_write_single_data_page()
f2fs_write_cache_pages()
__f2fs_write_data_pages()
do_writepages()
filemap_fdatawrite_wbc()
__filemap_fdatawrite_range()
file_write_and_wait_range()
f2fs_do_sync_file()
f2fs_sync_file()
do_fsync()
Freed by task stack:
kfree()
f2fs_stop_gc_thread()
f2fs_do_shutdown()
f2fs_shutdown()
fs_bdev_mark_dead()
Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
Assisted-by: Codex:gpt-5.5
Signed-off-by: Zhang Cen <rollkingzzc@xxxxxxxxx>
---
Based on:
- [PATCH] f2fs: embed f2fs_gc_kthread in f2fs_sb_info
fs/f2fs/f2fs.h | 15 +++++++++++++++
fs/f2fs/gc.c | 22 +++++++++++++---------
fs/f2fs/segment.c | 16 ++++++++++------
fs/f2fs/super.c | 5 +++--
fs/f2fs/sysfs.c | 15 +++++++++------
5 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8011bbdf2c68..1aa8f8adddaa 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/sched/mm.h>
+#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -1750,6 +1751,7 @@ struct decompress_io_ctx {
struct f2fs_gc_kthread {
struct task_struct *f2fs_gc_task;
+ spinlock_t gc_task_lock; /* protects f2fs_gc_task */
wait_queue_head_t gc_wait_queue_head;
/* for gc sleep time */
@@ -1775,6 +1777,19 @@ struct f2fs_gc_kthread {
unsigned int boost_gc_greedy;
};
+static inline struct task_struct *
+f2fs_update_gc_task(struct f2fs_gc_kthread *gc_th, struct task_struct *task)
+{
+ struct task_struct *old_task;
+
+ spin_lock(&gc_th->gc_task_lock);
+ old_task = READ_ONCE(gc_th->f2fs_gc_task);
+ WRITE_ONCE(gc_th->f2fs_gc_task, task);
+ spin_unlock(&gc_th->gc_task_lock);
+
+ return old_task;
+}
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e8244b92d8ce..7253a920153b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -194,6 +194,7 @@ static int gc_thread_func(void *data)
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = &sbi->gc_thread;
+ struct task_struct *task;
dev_t dev = sbi->sb->s_bdev->bd_dev;
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
@@ -219,26 +220,29 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
init_waitqueue_head(&gc_th->gc_wait_queue_head);
init_waitqueue_head(&gc_th->fggc_wq);
- gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
- "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
- if (IS_ERR(gc_th->f2fs_gc_task)) {
- int err = PTR_ERR(gc_th->f2fs_gc_task);
+ task = kthread_create(gc_thread_func, sbi, "f2fs_gc-%u:%u",
+ MAJOR(dev), MINOR(dev));
+ if (IS_ERR(task)) {
+ int err = PTR_ERR(task);
- gc_th->f2fs_gc_task = NULL;
return err;
}
- set_user_nice(gc_th->f2fs_gc_task,
- PRIO_TO_NICE(sbi->critical_task_priority));
+ set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
+ f2fs_update_gc_task(gc_th, task);
+ wake_up_process(task);
return 0;
}
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = &sbi->gc_thread;
+ struct task_struct *task;
- kthread_stop(gc_th->f2fs_gc_task);
- gc_th->f2fs_gc_task = NULL;
+ task = f2fs_update_gc_task(gc_th, NULL);
+ if (!task)
+ return;
+ kthread_stop(task);
wake_up_all(&gc_th->fggc_wq);
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0b706568b034..e6e67c233e27 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -425,6 +425,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ struct f2fs_gc_kthread *gc_th = &sbi->gc_thread;
+
if (f2fs_cp_error(sbi))
return;
@@ -452,14 +454,16 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
f2fs_submit_merged_write(sbi, DATA);
f2fs_submit_all_merged_ipu_writes(sbi);
- if (test_opt(sbi, GC_MERGE) && sbi->gc_thread.f2fs_gc_task) {
+ if (test_opt(sbi, GC_MERGE) && READ_ONCE(gc_th->f2fs_gc_task)) {
DEFINE_WAIT(wait);
- prepare_to_wait(&sbi->gc_thread.fggc_wq, &wait,
- TASK_UNINTERRUPTIBLE);
- wake_up(&sbi->gc_thread.gc_wait_queue_head);
- io_schedule();
- finish_wait(&sbi->gc_thread.fggc_wq, &wait);
+ prepare_to_wait(&gc_th->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (READ_ONCE(gc_th->f2fs_gc_task)) {
+ wake_up(&gc_th->gc_wait_queue_head);
+ io_schedule();
+ }
+ finish_wait(&gc_th->fggc_wq, &wait);
} else {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e9ecd67a3f3b..140fffe93ea4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2943,11 +2943,11 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
if ((flags & SB_RDONLY) ||
(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
!test_opt(sbi, GC_MERGE))) {
- if (sbi->gc_thread.f2fs_gc_task) {
+ if (READ_ONCE(sbi->gc_thread.f2fs_gc_task)) {
f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
}
- } else if (!sbi->gc_thread.f2fs_gc_task) {
+ } else if (!READ_ONCE(sbi->gc_thread.f2fs_gc_task)) {
err = f2fs_start_gc_thread(sbi);
if (err)
goto restore_opts;
@@ -5051,6 +5051,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
spin_lock_init(&sbi->inode_lock[i]);
}
mutex_init(&sbi->flush_lock);
+ spin_lock_init(&sbi->gc_thread.gc_task_lock);
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index be92c05a5420..c83cd1d3ede2 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -664,7 +664,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
sbi->gc_mode = GC_NORMAL;
} else if (t == 1) {
sbi->gc_mode = GC_URGENT_HIGH;
- if (sbi->gc_thread.f2fs_gc_task) {
+ if (READ_ONCE(sbi->gc_thread.f2fs_gc_task)) {
sbi->gc_thread.gc_wake = true;
wake_up_interruptible_all(
&sbi->gc_thread.gc_wait_queue_head);
@@ -674,7 +674,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
sbi->gc_mode = GC_URGENT_LOW;
} else if (t == 3) {
sbi->gc_mode = GC_URGENT_MID;
- if (sbi->gc_thread.f2fs_gc_task) {
+ if (READ_ONCE(sbi->gc_thread.f2fs_gc_task)) {
sbi->gc_thread.gc_wake = true;
wake_up_interruptible_all(
&sbi->gc_thread.gc_wait_queue_head);
@@ -981,17 +981,20 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
}
if (!strcmp(a->attr.name, "critical_task_priority")) {
+ int nice;
+
if (t < NICE_TO_PRIO(MIN_NICE) || t > NICE_TO_PRIO(MAX_NICE))
return -EINVAL;
if (!capable(CAP_SYS_NICE))
return -EPERM;
sbi->critical_task_priority = t;
+ nice = PRIO_TO_NICE(sbi->critical_task_priority);
if (sbi->cprc_info.f2fs_issue_ckpt)
- set_user_nice(sbi->cprc_info.f2fs_issue_ckpt,
- PRIO_TO_NICE(sbi->critical_task_priority));
+ set_user_nice(sbi->cprc_info.f2fs_issue_ckpt, nice);
+ spin_lock(&sbi->gc_thread.gc_task_lock);
if (sbi->gc_thread.f2fs_gc_task)
- set_user_nice(sbi->gc_thread.f2fs_gc_task,
- PRIO_TO_NICE(sbi->critical_task_priority));
+ set_user_nice(sbi->gc_thread.f2fs_gc_task, nice);
+ spin_unlock(&sbi->gc_thread.gc_task_lock);
return count;
}
--
2.43.0