From: Zhang Cen <rollkingzzc@gmail.com>
To: Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>
Cc: Gao Xiang <xiang@kernel.org>,
linux-f2fs-devel@lists.sourceforge.net,
linux-kernel@vger.kernel.org, zerocling0077@gmail.com,
2045gemini@gmail.com, Zhang Cen <rollkingzzc@gmail.com>
Subject: [PATCH v4] f2fs: protect published gc_thread during teardown
Date: Sat, 30 May 2026 22:33:07 +0800 [thread overview]
Message-ID: <20260530143307.3596771-1-rollkingzzc@gmail.com> (raw)
f2fs_stop_gc_thread() stops the background GC task, wakes foreground
GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
pointer. A foreground f2fs_balance_fs() caller can already have copied
that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
stop time can leave finish_wait() operating on a freed waitqueue.
Keep the allocated GC-thread state until the superblock is destroyed and
use gc_th->f2fs_gc_task as the running-state marker. The stop path now
withdraws the task pointer with xchg(), stops the task, and wakes any
foreground waiters, but leaves the waitqueue storage valid. The start
path reuses a stopped gc_thread object instead of reinitializing its
waitqueues, and remount restart decisions check the task pointer rather
than only the object pointer.
f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
f2fs_gc_task after prepare_to_wait(). If teardown wins the race after
the first check, the foreground caller removes its wait entry without
sleeping on a worker that has already been withdrawn.
Validation reproduced this kernel report:
BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
Write of size 8 at addr ffff8881150819b8 by task dd/802
The buggy address belongs to the object at ffff888115081900 which
belongs to the cache kmalloc-256 of size 256
The buggy address is located 184 bytes inside of freed 256-byte region
Call trace:
finish_wait()
f2fs_balance_fs()
f2fs_write_single_data_page()
f2fs_write_cache_pages()
__f2fs_write_data_pages()
do_writepages()
filemap_fdatawrite_wbc()
__filemap_fdatawrite_range()
file_write_and_wait_range()
f2fs_do_sync_file()
f2fs_sync_file()
do_fsync()
Freed by task stack:
kfree()
f2fs_stop_gc_thread()
f2fs_do_shutdown()
f2fs_shutdown()
fs_bdev_mark_dead()
Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
---
v4:
- Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
keeps the existing heap-allocated gc_thread object alive until
superblock teardown.
- Use f2fs_gc_task as the running-state marker and withdraw it with
xchg() before waking GC_MERGE waiters.
- Reuse a stopped gc_thread object across remount restarts so the
waitqueues are not reinitialized while old waiters can still finish.
- Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
with teardown does not sleep after the worker has been withdrawn.
v3:
- Add the Fixes tag for the GC_MERGE foreground wait path.
- Fix checkpatch style issues in the broader lifetime variant.
v2:
- Sashiko.dev pointed out that GC_MERGE foreground waiters and
GC-thread users needed lifetime-safe access after teardown.
fs/f2fs/gc.c | 48 +++++++++++++++++++++++++++++------------------
fs/f2fs/segment.c | 19 ++++++++++++-------
fs/f2fs/super.c | 7 +++++--
3 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba93010924c06..20f8394482a09 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
{
- struct f2fs_gc_kthread *gc_th;
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+ struct task_struct *task;
+ bool allocated = false;
dev_t dev = sbi->sb->s_bdev->bd_dev;
- gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
- if (!gc_th)
- return -ENOMEM;
+ if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
+ return 0;
+
+ if (!gc_th) {
+ gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
+ if (!gc_th)
+ return -ENOMEM;
+ init_waitqueue_head(&gc_th->gc_wait_queue_head);
+ init_waitqueue_head(&gc_th->fggc_wq);
+ sbi->gc_thread = gc_th;
+ allocated = true;
+ }
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
@@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->gc_wake = false;
- sbi->gc_thread = gc_th;
- init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
- init_waitqueue_head(&sbi->gc_thread->fggc_wq);
- sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
- "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
- if (IS_ERR(gc_th->f2fs_gc_task)) {
- int err = PTR_ERR(gc_th->f2fs_gc_task);
+ task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
+ MAJOR(dev), MINOR(dev));
+ if (IS_ERR(task)) {
+ int err = PTR_ERR(task);
- kfree(gc_th);
- sbi->gc_thread = NULL;
+ if (allocated) {
+ kfree(gc_th);
+ sbi->gc_thread = NULL;
+ }
return err;
}
- set_user_nice(gc_th->f2fs_gc_task,
- PRIO_TO_NICE(sbi->critical_task_priority));
+ WRITE_ONCE(gc_th->f2fs_gc_task, task);
+ set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
return 0;
}
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+ struct task_struct *task;
if (!gc_th)
return;
- kthread_stop(gc_th->f2fs_gc_task);
+ task = xchg(&gc_th->f2fs_gc_task, NULL);
+ if (!task)
+ return;
+ kthread_stop(task);
wake_up_all(&gc_th->fggc_wq);
- kfree(gc_th);
- sbi->gc_thread = NULL;
}
static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 788f8b0502492..84307525edd27 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ struct f2fs_gc_kthread *gc_th;
+
if (f2fs_cp_error(sbi))
return;
@@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
if (has_enough_free_secs(sbi, 0, 0))
return;
- if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
- sbi->gc_thread->f2fs_gc_task) {
+ gc_th = sbi->gc_thread;
+ if (test_opt(sbi, GC_MERGE) && gc_th &&
+ READ_ONCE(gc_th->f2fs_gc_task)) {
DEFINE_WAIT(wait);
- prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
- TASK_UNINTERRUPTIBLE);
- wake_up(&sbi->gc_thread->gc_wait_queue_head);
- io_schedule();
- finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+ prepare_to_wait(&gc_th->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (READ_ONCE(gc_th->f2fs_gc_task)) {
+ wake_up(&gc_th->gc_wait_queue_head);
+ io_schedule();
+ }
+ finish_wait(&gc_th->fggc_wq, &wait);
} else {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ccf806b676f53..d6863da05a7c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
if ((flags & SB_RDONLY) ||
(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
!test_opt(sbi, GC_MERGE))) {
- if (sbi->gc_thread) {
+ if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
}
- } else if (!sbi->gc_thread) {
+ } else if (!sbi->gc_thread ||
+ !READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
err = f2fs_start_gc_thread(sbi);
if (err)
goto restore_opts;
@@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
free_sb_buf:
kfree(raw_super);
free_sbi:
+ kfree(sbi->gc_thread);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_unregister_key(&sbi->cp_global_sem_key);
#endif
@@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
/* Release block devices last, after fscrypt_destroy_keyring(). */
if (sbi) {
destroy_device_list(sbi);
+ kfree(sbi->gc_thread);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_unregister_key(&sbi->cp_global_sem_key);
#endif
--
2.43.0
WARNING: multiple messages have this Message-ID (diff)
From: Zhang Cen <rollkingzzc@gmail.com>
To: Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>
Cc: linux-kernel@vger.kernel.org,
linux-f2fs-devel@lists.sourceforge.net, 2045gemini@gmail.com,
Gao Xiang <xiang@kernel.org>,
zerocling0077@gmail.com, Zhang Cen <rollkingzzc@gmail.com>
Subject: [f2fs-dev] [PATCH v4] f2fs: protect published gc_thread during teardown
Date: Sat, 30 May 2026 22:33:07 +0800 [thread overview]
Message-ID: <20260530143307.3596771-1-rollkingzzc@gmail.com> (raw)
f2fs_stop_gc_thread() stops the background GC task, wakes foreground
GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
pointer. A foreground f2fs_balance_fs() caller can already have copied
that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
stop time can leave finish_wait() operating on a freed waitqueue.
Keep the allocated GC-thread state until the superblock is destroyed and
use gc_th->f2fs_gc_task as the running-state marker. The stop path now
withdraws the task pointer with xchg(), stops the task, and wakes any
foreground waiters, but leaves the waitqueue storage valid. The start
path reuses a stopped gc_thread object instead of reinitializing its
waitqueues, and remount restart decisions check the task pointer rather
than only the object pointer.
f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
f2fs_gc_task after prepare_to_wait(). If teardown wins the race after
the first check, the foreground caller removes its wait entry without
sleeping on a worker that has already been withdrawn.
Validation reproduced this kernel report:
BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
Write of size 8 at addr ffff8881150819b8 by task dd/802
The buggy address belongs to the object at ffff888115081900 which
belongs to the cache kmalloc-256 of size 256
The buggy address is located 184 bytes inside of freed 256-byte region
Call trace:
finish_wait()
f2fs_balance_fs()
f2fs_write_single_data_page()
f2fs_write_cache_pages()
__f2fs_write_data_pages()
do_writepages()
filemap_fdatawrite_wbc()
__filemap_fdatawrite_range()
file_write_and_wait_range()
f2fs_do_sync_file()
f2fs_sync_file()
do_fsync()
Freed by task stack:
kfree()
f2fs_stop_gc_thread()
f2fs_do_shutdown()
f2fs_shutdown()
fs_bdev_mark_dead()
Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
---
v4:
- Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
keeps the existing heap-allocated gc_thread object alive until
superblock teardown.
- Use f2fs_gc_task as the running-state marker and withdraw it with
xchg() before waking GC_MERGE waiters.
- Reuse a stopped gc_thread object across remount restarts so the
waitqueues are not reinitialized while old waiters can still finish.
- Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
with teardown does not sleep after the worker has been withdrawn.
v3:
- Add the Fixes tag for the GC_MERGE foreground wait path.
- Fix checkpatch style issues in the broader lifetime variant.
v2:
- Sashiko.dev pointed out that GC_MERGE foreground waiters and
GC-thread users needed lifetime-safe access after teardown.
fs/f2fs/gc.c | 48 +++++++++++++++++++++++++++++------------------
fs/f2fs/segment.c | 19 ++++++++++++-------
fs/f2fs/super.c | 7 +++++--
3 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba93010924c06..20f8394482a09 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
{
- struct f2fs_gc_kthread *gc_th;
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+ struct task_struct *task;
+ bool allocated = false;
dev_t dev = sbi->sb->s_bdev->bd_dev;
- gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
- if (!gc_th)
- return -ENOMEM;
+ if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
+ return 0;
+
+ if (!gc_th) {
+ gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
+ if (!gc_th)
+ return -ENOMEM;
+ init_waitqueue_head(&gc_th->gc_wait_queue_head);
+ init_waitqueue_head(&gc_th->fggc_wq);
+ sbi->gc_thread = gc_th;
+ allocated = true;
+ }
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
@@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->gc_wake = false;
- sbi->gc_thread = gc_th;
- init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
- init_waitqueue_head(&sbi->gc_thread->fggc_wq);
- sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
- "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
- if (IS_ERR(gc_th->f2fs_gc_task)) {
- int err = PTR_ERR(gc_th->f2fs_gc_task);
+ task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
+ MAJOR(dev), MINOR(dev));
+ if (IS_ERR(task)) {
+ int err = PTR_ERR(task);
- kfree(gc_th);
- sbi->gc_thread = NULL;
+ if (allocated) {
+ kfree(gc_th);
+ sbi->gc_thread = NULL;
+ }
return err;
}
- set_user_nice(gc_th->f2fs_gc_task,
- PRIO_TO_NICE(sbi->critical_task_priority));
+ WRITE_ONCE(gc_th->f2fs_gc_task, task);
+ set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
return 0;
}
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+ struct task_struct *task;
if (!gc_th)
return;
- kthread_stop(gc_th->f2fs_gc_task);
+ task = xchg(&gc_th->f2fs_gc_task, NULL);
+ if (!task)
+ return;
+ kthread_stop(task);
wake_up_all(&gc_th->fggc_wq);
- kfree(gc_th);
- sbi->gc_thread = NULL;
}
static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 788f8b0502492..84307525edd27 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ struct f2fs_gc_kthread *gc_th;
+
if (f2fs_cp_error(sbi))
return;
@@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
if (has_enough_free_secs(sbi, 0, 0))
return;
- if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
- sbi->gc_thread->f2fs_gc_task) {
+ gc_th = sbi->gc_thread;
+ if (test_opt(sbi, GC_MERGE) && gc_th &&
+ READ_ONCE(gc_th->f2fs_gc_task)) {
DEFINE_WAIT(wait);
- prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
- TASK_UNINTERRUPTIBLE);
- wake_up(&sbi->gc_thread->gc_wait_queue_head);
- io_schedule();
- finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+ prepare_to_wait(&gc_th->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (READ_ONCE(gc_th->f2fs_gc_task)) {
+ wake_up(&gc_th->gc_wait_queue_head);
+ io_schedule();
+ }
+ finish_wait(&gc_th->fggc_wq, &wait);
} else {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ccf806b676f53..d6863da05a7c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
if ((flags & SB_RDONLY) ||
(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
!test_opt(sbi, GC_MERGE))) {
- if (sbi->gc_thread) {
+ if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
}
- } else if (!sbi->gc_thread) {
+ } else if (!sbi->gc_thread ||
+ !READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
err = f2fs_start_gc_thread(sbi);
if (err)
goto restore_opts;
@@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
free_sb_buf:
kfree(raw_super);
free_sbi:
+ kfree(sbi->gc_thread);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_unregister_key(&sbi->cp_global_sem_key);
#endif
@@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
/* Release block devices last, after fscrypt_destroy_keyring(). */
if (sbi) {
destroy_device_list(sbi);
+ kfree(sbi->gc_thread);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_unregister_key(&sbi->cp_global_sem_key);
#endif
--
2.43.0
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
next reply other threads:[~2026-05-30 14:33 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-30 14:33 Zhang Cen [this message]
2026-05-30 14:33 ` [f2fs-dev] [PATCH v4] f2fs: protect published gc_thread during teardown Zhang Cen
2026-06-13 11:10 ` Cen Zhang
2026-06-13 11:10 ` Cen Zhang
2026-06-15 16:16 ` Jaegeuk Kim
2026-06-15 16:16 ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260530143307.3596771-1-rollkingzzc@gmail.com \
--to=rollkingzzc@gmail.com \
--cc=2045gemini@gmail.com \
--cc=chao@kernel.org \
--cc=jaegeuk@kernel.org \
--cc=linux-f2fs-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
--cc=xiang@kernel.org \
--cc=zerocling0077@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.