All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] f2fs: protect published gc_thread during teardown
@ 2026-05-30 14:33 ` Zhang Cen
  0 siblings, 0 replies; 6+ messages in thread
From: Zhang Cen @ 2026-05-30 14:33 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu
  Cc: Gao Xiang, linux-f2fs-devel, linux-kernel, zerocling0077,
	2045gemini, Zhang Cen

f2fs_stop_gc_thread() stops the background GC task, wakes foreground
GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
pointer.  A foreground f2fs_balance_fs() caller can already have copied
that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
stop time can leave finish_wait() operating on a freed waitqueue.

Keep the allocated GC-thread state until the superblock is destroyed and
use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
withdraws the task pointer with xchg(), stops the task, and wakes any
foreground waiters, but leaves the waitqueue storage valid.  The start
path reuses a stopped gc_thread object instead of reinitializing its
waitqueues, and remount restart decisions check the task pointer rather
than only the object pointer.

f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
the first check, the foreground caller removes its wait entry without
sleeping on a worker that has already been withdrawn.

Validation reproduced this kernel report:

BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
Write of size 8 at addr ffff8881150819b8 by task dd/802
The buggy address belongs to the object at ffff888115081900 which
belongs to the cache kmalloc-256 of size 256
The buggy address is located 184 bytes inside of freed 256-byte region
Call trace:
  finish_wait()
  f2fs_balance_fs()
  f2fs_write_single_data_page()
  f2fs_write_cache_pages()
  __f2fs_write_data_pages()
  do_writepages()
  filemap_fdatawrite_wbc()
  __filemap_fdatawrite_range()
  file_write_and_wait_range()
  f2fs_do_sync_file()
  f2fs_sync_file()
  do_fsync()
Freed by task stack:
  kfree()
  f2fs_stop_gc_thread()
  f2fs_do_shutdown()
  f2fs_shutdown()
  fs_bdev_mark_dead()

Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
---
v4:
- Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
  keeps the existing heap-allocated gc_thread object alive until
  superblock teardown.
- Use f2fs_gc_task as the running-state marker and withdraw it with
  xchg() before waking GC_MERGE waiters.
- Reuse a stopped gc_thread object across remount restarts so the
  waitqueues are not reinitialized while old waiters can still finish.
- Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
  with teardown does not sleep after the worker has been withdrawn.

v3:
- Add the Fixes tag for the GC_MERGE foreground wait path.
- Fix checkpatch style issues in the broader lifetime variant.

v2:
- Sashiko.dev pointed out that GC_MERGE foreground waiters and
  GC-thread users needed lifetime-safe access after teardown.

 fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
 fs/f2fs/segment.c | 19 ++++++++++++-------
 fs/f2fs/super.c   |  7 +++++--
 3 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba93010924c06..20f8394482a09 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
 
 int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_gc_kthread *gc_th;
+	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+	struct task_struct *task;
+	bool allocated = false;
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
 
-	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-	if (!gc_th)
-		return -ENOMEM;
+	if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
+		return 0;
+
+	if (!gc_th) {
+		gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
+		if (!gc_th)
+			return -ENOMEM;
+		init_waitqueue_head(&gc_th->gc_wait_queue_head);
+		init_waitqueue_head(&gc_th->fggc_wq);
+		sbi->gc_thread = gc_th;
+		allocated = true;
+	}
 
 	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
 	gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
@@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 
 	gc_th->gc_wake = false;
 
-	sbi->gc_thread = gc_th;
-	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
-	init_waitqueue_head(&sbi->gc_thread->fggc_wq);
-	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
-			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
-	if (IS_ERR(gc_th->f2fs_gc_task)) {
-		int err = PTR_ERR(gc_th->f2fs_gc_task);
+	task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
+			   MAJOR(dev), MINOR(dev));
+	if (IS_ERR(task)) {
+		int err = PTR_ERR(task);
 
-		kfree(gc_th);
-		sbi->gc_thread = NULL;
+		if (allocated) {
+			kfree(gc_th);
+			sbi->gc_thread = NULL;
+		}
 		return err;
 	}
 
-	set_user_nice(gc_th->f2fs_gc_task,
-			PRIO_TO_NICE(sbi->critical_task_priority));
+	WRITE_ONCE(gc_th->f2fs_gc_task, task);
+	set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
 	return 0;
 }
 
 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+	struct task_struct *task;
 
 	if (!gc_th)
 		return;
-	kthread_stop(gc_th->f2fs_gc_task);
+	task = xchg(&gc_th->f2fs_gc_task, NULL);
+	if (!task)
+		return;
+	kthread_stop(task);
 	wake_up_all(&gc_th->fggc_wq);
-	kfree(gc_th);
-	sbi->gc_thread = NULL;
 }
 
 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 788f8b0502492..84307525edd27 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
  */
 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
+	struct f2fs_gc_kthread *gc_th;
+
 	if (f2fs_cp_error(sbi))
 		return;
 
@@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	if (has_enough_free_secs(sbi, 0, 0))
 		return;
 
-	if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
-				sbi->gc_thread->f2fs_gc_task) {
+	gc_th = sbi->gc_thread;
+	if (test_opt(sbi, GC_MERGE) && gc_th &&
+	    READ_ONCE(gc_th->f2fs_gc_task)) {
 		DEFINE_WAIT(wait);
 
-		prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
-					TASK_UNINTERRUPTIBLE);
-		wake_up(&sbi->gc_thread->gc_wait_queue_head);
-		io_schedule();
-		finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+		prepare_to_wait(&gc_th->fggc_wq, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (READ_ONCE(gc_th->f2fs_gc_task)) {
+			wake_up(&gc_th->gc_wait_queue_head);
+			io_schedule();
+		}
+		finish_wait(&gc_th->fggc_wq, &wait);
 	} else {
 		struct f2fs_gc_control gc_control = {
 			.victim_segno = NULL_SEGNO,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ccf806b676f53..d6863da05a7c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
 	if ((flags & SB_RDONLY) ||
 			(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
 			!test_opt(sbi, GC_MERGE))) {
-		if (sbi->gc_thread) {
+		if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
 			f2fs_stop_gc_thread(sbi);
 			need_restart_gc = true;
 		}
-	} else if (!sbi->gc_thread) {
+	} else if (!sbi->gc_thread ||
+			!READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
 		err = f2fs_start_gc_thread(sbi);
 		if (err)
 			goto restore_opts;
@@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
 free_sb_buf:
 	kfree(raw_super);
 free_sbi:
+	kfree(sbi->gc_thread);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	lockdep_unregister_key(&sbi->cp_global_sem_key);
 #endif
@@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
 	/* Release block devices last, after fscrypt_destroy_keyring(). */
 	if (sbi) {
 		destroy_device_list(sbi);
+		kfree(sbi->gc_thread);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 		lockdep_unregister_key(&sbi->cp_global_sem_key);
 #endif
-- 
2.43.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [f2fs-dev] [PATCH v4] f2fs: protect published gc_thread during teardown
@ 2026-05-30 14:33 ` Zhang Cen
  0 siblings, 0 replies; 6+ messages in thread
From: Zhang Cen @ 2026-05-30 14:33 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu
  Cc: linux-kernel, linux-f2fs-devel, 2045gemini, Gao Xiang,
	zerocling0077, Zhang Cen

f2fs_stop_gc_thread() stops the background GC task, wakes foreground
GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
pointer.  A foreground f2fs_balance_fs() caller can already have copied
that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
stop time can leave finish_wait() operating on a freed waitqueue.

Keep the allocated GC-thread state until the superblock is destroyed and
use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
withdraws the task pointer with xchg(), stops the task, and wakes any
foreground waiters, but leaves the waitqueue storage valid.  The start
path reuses a stopped gc_thread object instead of reinitializing its
waitqueues, and remount restart decisions check the task pointer rather
than only the object pointer.

f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
the first check, the foreground caller removes its wait entry without
sleeping on a worker that has already been withdrawn.

Validation reproduced this kernel report:

BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
Write of size 8 at addr ffff8881150819b8 by task dd/802
The buggy address belongs to the object at ffff888115081900 which
belongs to the cache kmalloc-256 of size 256
The buggy address is located 184 bytes inside of freed 256-byte region
Call trace:
  finish_wait()
  f2fs_balance_fs()
  f2fs_write_single_data_page()
  f2fs_write_cache_pages()
  __f2fs_write_data_pages()
  do_writepages()
  filemap_fdatawrite_wbc()
  __filemap_fdatawrite_range()
  file_write_and_wait_range()
  f2fs_do_sync_file()
  f2fs_sync_file()
  do_fsync()
Freed by task stack:
  kfree()
  f2fs_stop_gc_thread()
  f2fs_do_shutdown()
  f2fs_shutdown()
  fs_bdev_mark_dead()

Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
---
v4:
- Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
  keeps the existing heap-allocated gc_thread object alive until
  superblock teardown.
- Use f2fs_gc_task as the running-state marker and withdraw it with
  xchg() before waking GC_MERGE waiters.
- Reuse a stopped gc_thread object across remount restarts so the
  waitqueues are not reinitialized while old waiters can still finish.
- Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
  with teardown does not sleep after the worker has been withdrawn.

v3:
- Add the Fixes tag for the GC_MERGE foreground wait path.
- Fix checkpatch style issues in the broader lifetime variant.

v2:
- Sashiko.dev pointed out that GC_MERGE foreground waiters and
  GC-thread users needed lifetime-safe access after teardown.

 fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
 fs/f2fs/segment.c | 19 ++++++++++++-------
 fs/f2fs/super.c   |  7 +++++--
 3 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba93010924c06..20f8394482a09 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
 
 int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_gc_kthread *gc_th;
+	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+	struct task_struct *task;
+	bool allocated = false;
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
 
-	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-	if (!gc_th)
-		return -ENOMEM;
+	if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
+		return 0;
+
+	if (!gc_th) {
+		gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
+		if (!gc_th)
+			return -ENOMEM;
+		init_waitqueue_head(&gc_th->gc_wait_queue_head);
+		init_waitqueue_head(&gc_th->fggc_wq);
+		sbi->gc_thread = gc_th;
+		allocated = true;
+	}
 
 	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
 	gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
@@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 
 	gc_th->gc_wake = false;
 
-	sbi->gc_thread = gc_th;
-	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
-	init_waitqueue_head(&sbi->gc_thread->fggc_wq);
-	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
-			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
-	if (IS_ERR(gc_th->f2fs_gc_task)) {
-		int err = PTR_ERR(gc_th->f2fs_gc_task);
+	task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
+			   MAJOR(dev), MINOR(dev));
+	if (IS_ERR(task)) {
+		int err = PTR_ERR(task);
 
-		kfree(gc_th);
-		sbi->gc_thread = NULL;
+		if (allocated) {
+			kfree(gc_th);
+			sbi->gc_thread = NULL;
+		}
 		return err;
 	}
 
-	set_user_nice(gc_th->f2fs_gc_task,
-			PRIO_TO_NICE(sbi->critical_task_priority));
+	WRITE_ONCE(gc_th->f2fs_gc_task, task);
+	set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
 	return 0;
 }
 
 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+	struct task_struct *task;
 
 	if (!gc_th)
 		return;
-	kthread_stop(gc_th->f2fs_gc_task);
+	task = xchg(&gc_th->f2fs_gc_task, NULL);
+	if (!task)
+		return;
+	kthread_stop(task);
 	wake_up_all(&gc_th->fggc_wq);
-	kfree(gc_th);
-	sbi->gc_thread = NULL;
 }
 
 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 788f8b0502492..84307525edd27 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
  */
 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
+	struct f2fs_gc_kthread *gc_th;
+
 	if (f2fs_cp_error(sbi))
 		return;
 
@@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	if (has_enough_free_secs(sbi, 0, 0))
 		return;
 
-	if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
-				sbi->gc_thread->f2fs_gc_task) {
+	gc_th = sbi->gc_thread;
+	if (test_opt(sbi, GC_MERGE) && gc_th &&
+	    READ_ONCE(gc_th->f2fs_gc_task)) {
 		DEFINE_WAIT(wait);
 
-		prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
-					TASK_UNINTERRUPTIBLE);
-		wake_up(&sbi->gc_thread->gc_wait_queue_head);
-		io_schedule();
-		finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+		prepare_to_wait(&gc_th->fggc_wq, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (READ_ONCE(gc_th->f2fs_gc_task)) {
+			wake_up(&gc_th->gc_wait_queue_head);
+			io_schedule();
+		}
+		finish_wait(&gc_th->fggc_wq, &wait);
 	} else {
 		struct f2fs_gc_control gc_control = {
 			.victim_segno = NULL_SEGNO,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ccf806b676f53..d6863da05a7c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
 	if ((flags & SB_RDONLY) ||
 			(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
 			!test_opt(sbi, GC_MERGE))) {
-		if (sbi->gc_thread) {
+		if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
 			f2fs_stop_gc_thread(sbi);
 			need_restart_gc = true;
 		}
-	} else if (!sbi->gc_thread) {
+	} else if (!sbi->gc_thread ||
+			!READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
 		err = f2fs_start_gc_thread(sbi);
 		if (err)
 			goto restore_opts;
@@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
 free_sb_buf:
 	kfree(raw_super);
 free_sbi:
+	kfree(sbi->gc_thread);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	lockdep_unregister_key(&sbi->cp_global_sem_key);
 #endif
@@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
 	/* Release block devices last, after fscrypt_destroy_keyring(). */
 	if (sbi) {
 		destroy_device_list(sbi);
+		kfree(sbi->gc_thread);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 		lockdep_unregister_key(&sbi->cp_global_sem_key);
 #endif
-- 
2.43.0


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: protect published gc_thread during teardown
  2026-05-30 14:33 ` [f2fs-dev] " Zhang Cen
@ 2026-06-13 11:10   ` Cen Zhang
  -1 siblings, 0 replies; 6+ messages in thread
From: Cen Zhang @ 2026-06-13 11:10 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu
  Cc: 2045gemini, Gao Xiang, zerocling0077, linux-kernel,
	linux-f2fs-devel

Hi all,

Just a gentle ping on this series.

I would appreciate any feedback or guidance on whether further changes
are needed.

Thanks for your time and review.

Best regards,
Zhang Cen

Zhang Cen <rollkingzzc@gmail.com> 于2026年5月30日周六 22:33写道:
>
> f2fs_stop_gc_thread() stops the background GC task, wakes foreground
> GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
> pointer.  A foreground f2fs_balance_fs() caller can already have copied
> that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
> stop time can leave finish_wait() operating on a freed waitqueue.
>
> Keep the allocated GC-thread state until the superblock is destroyed and
> use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
> withdraws the task pointer with xchg(), stops the task, and wakes any
> foreground waiters, but leaves the waitqueue storage valid.  The start
> path reuses a stopped gc_thread object instead of reinitializing its
> waitqueues, and remount restart decisions check the task pointer rather
> than only the object pointer.
>
> f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
> f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
> the first check, the foreground caller removes its wait entry without
> sleeping on a worker that has already been withdrawn.
>
> Validation reproduced this kernel report:
>
> BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
> Write of size 8 at addr ffff8881150819b8 by task dd/802
> The buggy address belongs to the object at ffff888115081900 which
> belongs to the cache kmalloc-256 of size 256
> The buggy address is located 184 bytes inside of freed 256-byte region
> Call trace:
>   finish_wait()
>   f2fs_balance_fs()
>   f2fs_write_single_data_page()
>   f2fs_write_cache_pages()
>   __f2fs_write_data_pages()
>   do_writepages()
>   filemap_fdatawrite_wbc()
>   __filemap_fdatawrite_range()
>   file_write_and_wait_range()
>   f2fs_do_sync_file()
>   f2fs_sync_file()
>   do_fsync()
> Freed by task stack:
>   kfree()
>   f2fs_stop_gc_thread()
>   f2fs_do_shutdown()
>   f2fs_shutdown()
>   fs_bdev_mark_dead()
>
> Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
> Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
> ---
> v4:
> - Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
>   keeps the existing heap-allocated gc_thread object alive until
>   superblock teardown.
> - Use f2fs_gc_task as the running-state marker and withdraw it with
>   xchg() before waking GC_MERGE waiters.
> - Reuse a stopped gc_thread object across remount restarts so the
>   waitqueues are not reinitialized while old waiters can still finish.
> - Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
>   with teardown does not sleep after the worker has been withdrawn.
>
> v3:
> - Add the Fixes tag for the GC_MERGE foreground wait path.
> - Fix checkpatch style issues in the broader lifetime variant.
>
> v2:
> - Sashiko.dev pointed out that GC_MERGE foreground waiters and
>   GC-thread users needed lifetime-safe access after teardown.
>
>  fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
>  fs/f2fs/segment.c | 19 ++++++++++++-------
>  fs/f2fs/super.c   |  7 +++++--
>  3 files changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index ba93010924c06..20f8394482a09 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
>
>  int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>  {
> -       struct f2fs_gc_kthread *gc_th;
> +       struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +       struct task_struct *task;
> +       bool allocated = false;
>         dev_t dev = sbi->sb->s_bdev->bd_dev;
>
> -       gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> -       if (!gc_th)
> -               return -ENOMEM;
> +       if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
> +               return 0;
> +
> +       if (!gc_th) {
> +               gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
> +               if (!gc_th)
> +                       return -ENOMEM;
> +               init_waitqueue_head(&gc_th->gc_wait_queue_head);
> +               init_waitqueue_head(&gc_th->fggc_wq);
> +               sbi->gc_thread = gc_th;
> +               allocated = true;
> +       }
>
>         gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>         gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
> @@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>
>         gc_th->gc_wake = false;
>
> -       sbi->gc_thread = gc_th;
> -       init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> -       init_waitqueue_head(&sbi->gc_thread->fggc_wq);
> -       sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> -                       "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> -       if (IS_ERR(gc_th->f2fs_gc_task)) {
> -               int err = PTR_ERR(gc_th->f2fs_gc_task);
> +       task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
> +                          MAJOR(dev), MINOR(dev));
> +       if (IS_ERR(task)) {
> +               int err = PTR_ERR(task);
>
> -               kfree(gc_th);
> -               sbi->gc_thread = NULL;
> +               if (allocated) {
> +                       kfree(gc_th);
> +                       sbi->gc_thread = NULL;
> +               }
>                 return err;
>         }
>
> -       set_user_nice(gc_th->f2fs_gc_task,
> -                       PRIO_TO_NICE(sbi->critical_task_priority));
> +       WRITE_ONCE(gc_th->f2fs_gc_task, task);
> +       set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
>         return 0;
>  }
>
>  void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
>  {
>         struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +       struct task_struct *task;
>
>         if (!gc_th)
>                 return;
> -       kthread_stop(gc_th->f2fs_gc_task);
> +       task = xchg(&gc_th->f2fs_gc_task, NULL);
> +       if (!task)
> +               return;
> +       kthread_stop(task);
>         wake_up_all(&gc_th->fggc_wq);
> -       kfree(gc_th);
> -       sbi->gc_thread = NULL;
>  }
>
>  static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 788f8b0502492..84307525edd27 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
>   */
>  void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  {
> +       struct f2fs_gc_kthread *gc_th;
> +
>         if (f2fs_cp_error(sbi))
>                 return;
>
> @@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>         if (has_enough_free_secs(sbi, 0, 0))
>                 return;
>
> -       if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
> -                               sbi->gc_thread->f2fs_gc_task) {
> +       gc_th = sbi->gc_thread;
> +       if (test_opt(sbi, GC_MERGE) && gc_th &&
> +           READ_ONCE(gc_th->f2fs_gc_task)) {
>                 DEFINE_WAIT(wait);
>
> -               prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
> -                                       TASK_UNINTERRUPTIBLE);
> -               wake_up(&sbi->gc_thread->gc_wait_queue_head);
> -               io_schedule();
> -               finish_wait(&sbi->gc_thread->fggc_wq, &wait);
> +               prepare_to_wait(&gc_th->fggc_wq, &wait,
> +                               TASK_UNINTERRUPTIBLE);
> +               if (READ_ONCE(gc_th->f2fs_gc_task)) {
> +                       wake_up(&gc_th->gc_wait_queue_head);
> +                       io_schedule();
> +               }
> +               finish_wait(&gc_th->fggc_wq, &wait);
>         } else {
>                 struct f2fs_gc_control gc_control = {
>                         .victim_segno = NULL_SEGNO,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index ccf806b676f53..d6863da05a7c2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
>         if ((flags & SB_RDONLY) ||
>                         (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
>                         !test_opt(sbi, GC_MERGE))) {
> -               if (sbi->gc_thread) {
> +               if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>                         f2fs_stop_gc_thread(sbi);
>                         need_restart_gc = true;
>                 }
> -       } else if (!sbi->gc_thread) {
> +       } else if (!sbi->gc_thread ||
> +                       !READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>                 err = f2fs_start_gc_thread(sbi);
>                 if (err)
>                         goto restore_opts;
> @@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
>  free_sb_buf:
>         kfree(raw_super);
>  free_sbi:
> +       kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>         lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> @@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
>         /* Release block devices last, after fscrypt_destroy_keyring(). */
>         if (sbi) {
>                 destroy_device_list(sbi);
> +               kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>                 lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> --
> 2.43.0


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] f2fs: protect published gc_thread during teardown
@ 2026-06-13 11:10   ` Cen Zhang
  0 siblings, 0 replies; 6+ messages in thread
From: Cen Zhang @ 2026-06-13 11:10 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu
  Cc: Gao Xiang, linux-f2fs-devel, linux-kernel, zerocling0077,
	2045gemini

Hi all,

Just a gentle ping on this series.

I would appreciate any feedback or guidance on whether further changes
are needed.

Thanks for your time and review.

Best regards,
Zhang Cen

Zhang Cen <rollkingzzc@gmail.com> 于2026年5月30日周六 22:33写道:
>
> f2fs_stop_gc_thread() stops the background GC task, wakes foreground
> GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
> pointer.  A foreground f2fs_balance_fs() caller can already have copied
> that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
> stop time can leave finish_wait() operating on a freed waitqueue.
>
> Keep the allocated GC-thread state until the superblock is destroyed and
> use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
> withdraws the task pointer with xchg(), stops the task, and wakes any
> foreground waiters, but leaves the waitqueue storage valid.  The start
> path reuses a stopped gc_thread object instead of reinitializing its
> waitqueues, and remount restart decisions check the task pointer rather
> than only the object pointer.
>
> f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
> f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
> the first check, the foreground caller removes its wait entry without
> sleeping on a worker that has already been withdrawn.
>
> Validation reproduced this kernel report:
>
> BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
> Write of size 8 at addr ffff8881150819b8 by task dd/802
> The buggy address belongs to the object at ffff888115081900 which
> belongs to the cache kmalloc-256 of size 256
> The buggy address is located 184 bytes inside of freed 256-byte region
> Call trace:
>   finish_wait()
>   f2fs_balance_fs()
>   f2fs_write_single_data_page()
>   f2fs_write_cache_pages()
>   __f2fs_write_data_pages()
>   do_writepages()
>   filemap_fdatawrite_wbc()
>   __filemap_fdatawrite_range()
>   file_write_and_wait_range()
>   f2fs_do_sync_file()
>   f2fs_sync_file()
>   do_fsync()
> Freed by task stack:
>   kfree()
>   f2fs_stop_gc_thread()
>   f2fs_do_shutdown()
>   f2fs_shutdown()
>   fs_bdev_mark_dead()
>
> Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
> Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
> ---
> v4:
> - Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
>   keeps the existing heap-allocated gc_thread object alive until
>   superblock teardown.
> - Use f2fs_gc_task as the running-state marker and withdraw it with
>   xchg() before waking GC_MERGE waiters.
> - Reuse a stopped gc_thread object across remount restarts so the
>   waitqueues are not reinitialized while old waiters can still finish.
> - Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
>   with teardown does not sleep after the worker has been withdrawn.
>
> v3:
> - Add the Fixes tag for the GC_MERGE foreground wait path.
> - Fix checkpatch style issues in the broader lifetime variant.
>
> v2:
> - Sashiko.dev pointed out that GC_MERGE foreground waiters and
>   GC-thread users needed lifetime-safe access after teardown.
>
>  fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
>  fs/f2fs/segment.c | 19 ++++++++++++-------
>  fs/f2fs/super.c   |  7 +++++--
>  3 files changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index ba93010924c06..20f8394482a09 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
>
>  int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>  {
> -       struct f2fs_gc_kthread *gc_th;
> +       struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +       struct task_struct *task;
> +       bool allocated = false;
>         dev_t dev = sbi->sb->s_bdev->bd_dev;
>
> -       gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> -       if (!gc_th)
> -               return -ENOMEM;
> +       if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
> +               return 0;
> +
> +       if (!gc_th) {
> +               gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
> +               if (!gc_th)
> +                       return -ENOMEM;
> +               init_waitqueue_head(&gc_th->gc_wait_queue_head);
> +               init_waitqueue_head(&gc_th->fggc_wq);
> +               sbi->gc_thread = gc_th;
> +               allocated = true;
> +       }
>
>         gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>         gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
> @@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>
>         gc_th->gc_wake = false;
>
> -       sbi->gc_thread = gc_th;
> -       init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> -       init_waitqueue_head(&sbi->gc_thread->fggc_wq);
> -       sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> -                       "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> -       if (IS_ERR(gc_th->f2fs_gc_task)) {
> -               int err = PTR_ERR(gc_th->f2fs_gc_task);
> +       task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
> +                          MAJOR(dev), MINOR(dev));
> +       if (IS_ERR(task)) {
> +               int err = PTR_ERR(task);
>
> -               kfree(gc_th);
> -               sbi->gc_thread = NULL;
> +               if (allocated) {
> +                       kfree(gc_th);
> +                       sbi->gc_thread = NULL;
> +               }
>                 return err;
>         }
>
> -       set_user_nice(gc_th->f2fs_gc_task,
> -                       PRIO_TO_NICE(sbi->critical_task_priority));
> +       WRITE_ONCE(gc_th->f2fs_gc_task, task);
> +       set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
>         return 0;
>  }
>
>  void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
>  {
>         struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +       struct task_struct *task;
>
>         if (!gc_th)
>                 return;
> -       kthread_stop(gc_th->f2fs_gc_task);
> +       task = xchg(&gc_th->f2fs_gc_task, NULL);
> +       if (!task)
> +               return;
> +       kthread_stop(task);
>         wake_up_all(&gc_th->fggc_wq);
> -       kfree(gc_th);
> -       sbi->gc_thread = NULL;
>  }
>
>  static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 788f8b0502492..84307525edd27 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
>   */
>  void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  {
> +       struct f2fs_gc_kthread *gc_th;
> +
>         if (f2fs_cp_error(sbi))
>                 return;
>
> @@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>         if (has_enough_free_secs(sbi, 0, 0))
>                 return;
>
> -       if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
> -                               sbi->gc_thread->f2fs_gc_task) {
> +       gc_th = sbi->gc_thread;
> +       if (test_opt(sbi, GC_MERGE) && gc_th &&
> +           READ_ONCE(gc_th->f2fs_gc_task)) {
>                 DEFINE_WAIT(wait);
>
> -               prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
> -                                       TASK_UNINTERRUPTIBLE);
> -               wake_up(&sbi->gc_thread->gc_wait_queue_head);
> -               io_schedule();
> -               finish_wait(&sbi->gc_thread->fggc_wq, &wait);
> +               prepare_to_wait(&gc_th->fggc_wq, &wait,
> +                               TASK_UNINTERRUPTIBLE);
> +               if (READ_ONCE(gc_th->f2fs_gc_task)) {
> +                       wake_up(&gc_th->gc_wait_queue_head);
> +                       io_schedule();
> +               }
> +               finish_wait(&gc_th->fggc_wq, &wait);
>         } else {
>                 struct f2fs_gc_control gc_control = {
>                         .victim_segno = NULL_SEGNO,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index ccf806b676f53..d6863da05a7c2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
>         if ((flags & SB_RDONLY) ||
>                         (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
>                         !test_opt(sbi, GC_MERGE))) {
> -               if (sbi->gc_thread) {
> +               if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>                         f2fs_stop_gc_thread(sbi);
>                         need_restart_gc = true;
>                 }
> -       } else if (!sbi->gc_thread) {
> +       } else if (!sbi->gc_thread ||
> +                       !READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>                 err = f2fs_start_gc_thread(sbi);
>                 if (err)
>                         goto restore_opts;
> @@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
>  free_sb_buf:
>         kfree(raw_super);
>  free_sbi:
> +       kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>         lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> @@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
>         /* Release block devices last, after fscrypt_destroy_keyring(). */
>         if (sbi) {
>                 destroy_device_list(sbi);
> +               kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>                 lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> --
> 2.43.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] f2fs: protect published gc_thread during teardown
  2026-05-30 14:33 ` [f2fs-dev] " Zhang Cen
@ 2026-06-15 16:16   ` Jaegeuk Kim via Linux-f2fs-devel
  -1 siblings, 0 replies; 6+ messages in thread
From: Jaegeuk Kim @ 2026-06-15 16:16 UTC (permalink / raw)
  To: Zhang Cen
  Cc: Chao Yu, Gao Xiang, linux-f2fs-devel, linux-kernel, zerocling0077,
	2045gemini

On 05/30, Zhang Cen wrote:
> f2fs_stop_gc_thread() stops the background GC task, wakes foreground
> GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
> pointer.  A foreground f2fs_balance_fs() caller can already have copied
> that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
> stop time can leave finish_wait() operating on a freed waitqueue.
> 
> Keep the allocated GC-thread state until the superblock is destroyed and
> use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
> withdraws the task pointer with xchg(), stops the task, and wakes any
> foreground waiters, but leaves the waitqueue storage valid.  The start
> path reuses a stopped gc_thread object instead of reinitializing its
> waitqueues, and remount restart decisions check the task pointer rather
> than only the object pointer.
> 
> f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
> f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
> the first check, the foreground caller removes its wait entry without
> sleeping on a worker that has already been withdrawn.
> 
> Validation reproduced this kernel report:
> 
> BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
> Write of size 8 at addr ffff8881150819b8 by task dd/802
> The buggy address belongs to the object at ffff888115081900 which
> belongs to the cache kmalloc-256 of size 256
> The buggy address is located 184 bytes inside of freed 256-byte region
> Call trace:
>   finish_wait()
>   f2fs_balance_fs()
>   f2fs_write_single_data_page()
>   f2fs_write_cache_pages()
>   __f2fs_write_data_pages()
>   do_writepages()
>   filemap_fdatawrite_wbc()
>   __filemap_fdatawrite_range()
>   file_write_and_wait_range()
>   f2fs_do_sync_file()
>   f2fs_sync_file()
>   do_fsync()
> Freed by task stack:
>   kfree()
>   f2fs_stop_gc_thread()
>   f2fs_do_shutdown()
>   f2fs_shutdown()
>   fs_bdev_mark_dead()

What about dequeuing all the pending work items and stop getting more work
when shutdown was called?

> 
> Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
> Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
> ---
> v4:
> - Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
>   keeps the existing heap-allocated gc_thread object alive until
>   superblock teardown.
> - Use f2fs_gc_task as the running-state marker and withdraw it with
>   xchg() before waking GC_MERGE waiters.
> - Reuse a stopped gc_thread object across remount restarts so the
>   waitqueues are not reinitialized while old waiters can still finish.
> - Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
>   with teardown does not sleep after the worker has been withdrawn.
> 
> v3:
> - Add the Fixes tag for the GC_MERGE foreground wait path.
> - Fix checkpatch style issues in the broader lifetime variant.
> 
> v2:
> - Sashiko.dev pointed out that GC_MERGE foreground waiters and
>   GC-thread users needed lifetime-safe access after teardown.
> 
>  fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
>  fs/f2fs/segment.c | 19 ++++++++++++-------
>  fs/f2fs/super.c   |  7 +++++--
>  3 files changed, 47 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index ba93010924c06..20f8394482a09 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
>  
>  int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>  {
> -	struct f2fs_gc_kthread *gc_th;
> +	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +	struct task_struct *task;
> +	bool allocated = false;
>  	dev_t dev = sbi->sb->s_bdev->bd_dev;
>  
> -	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> -	if (!gc_th)
> -		return -ENOMEM;
> +	if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
> +		return 0;
> +
> +	if (!gc_th) {
> +		gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
> +		if (!gc_th)
> +			return -ENOMEM;
> +		init_waitqueue_head(&gc_th->gc_wait_queue_head);
> +		init_waitqueue_head(&gc_th->fggc_wq);
> +		sbi->gc_thread = gc_th;
> +		allocated = true;
> +	}
>  
>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>  	gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
> @@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>  
>  	gc_th->gc_wake = false;
>  
> -	sbi->gc_thread = gc_th;
> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> -	init_waitqueue_head(&sbi->gc_thread->fggc_wq);
> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> -			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> -	if (IS_ERR(gc_th->f2fs_gc_task)) {
> -		int err = PTR_ERR(gc_th->f2fs_gc_task);
> +	task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
> +			   MAJOR(dev), MINOR(dev));
> +	if (IS_ERR(task)) {
> +		int err = PTR_ERR(task);
>  
> -		kfree(gc_th);
> -		sbi->gc_thread = NULL;
> +		if (allocated) {
> +			kfree(gc_th);
> +			sbi->gc_thread = NULL;
> +		}
>  		return err;
>  	}
>  
> -	set_user_nice(gc_th->f2fs_gc_task,
> -			PRIO_TO_NICE(sbi->critical_task_priority));
> +	WRITE_ONCE(gc_th->f2fs_gc_task, task);
> +	set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
>  	return 0;
>  }
>  
>  void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +	struct task_struct *task;
>  
>  	if (!gc_th)
>  		return;
> -	kthread_stop(gc_th->f2fs_gc_task);
> +	task = xchg(&gc_th->f2fs_gc_task, NULL);
> +	if (!task)
> +		return;
> +	kthread_stop(task);
>  	wake_up_all(&gc_th->fggc_wq);
> -	kfree(gc_th);
> -	sbi->gc_thread = NULL;
>  }
>  
>  static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 788f8b0502492..84307525edd27 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
>   */
>  void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  {
> +	struct f2fs_gc_kthread *gc_th;
> +
>  	if (f2fs_cp_error(sbi))
>  		return;
>  
> @@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  	if (has_enough_free_secs(sbi, 0, 0))
>  		return;
>  
> -	if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
> -				sbi->gc_thread->f2fs_gc_task) {
> +	gc_th = sbi->gc_thread;
> +	if (test_opt(sbi, GC_MERGE) && gc_th &&
> +	    READ_ONCE(gc_th->f2fs_gc_task)) {
>  		DEFINE_WAIT(wait);
>  
> -		prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
> -					TASK_UNINTERRUPTIBLE);
> -		wake_up(&sbi->gc_thread->gc_wait_queue_head);
> -		io_schedule();
> -		finish_wait(&sbi->gc_thread->fggc_wq, &wait);
> +		prepare_to_wait(&gc_th->fggc_wq, &wait,
> +				TASK_UNINTERRUPTIBLE);
> +		if (READ_ONCE(gc_th->f2fs_gc_task)) {
> +			wake_up(&gc_th->gc_wait_queue_head);
> +			io_schedule();
> +		}
> +		finish_wait(&gc_th->fggc_wq, &wait);
>  	} else {
>  		struct f2fs_gc_control gc_control = {
>  			.victim_segno = NULL_SEGNO,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index ccf806b676f53..d6863da05a7c2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
>  	if ((flags & SB_RDONLY) ||
>  			(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
>  			!test_opt(sbi, GC_MERGE))) {
> -		if (sbi->gc_thread) {
> +		if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>  			f2fs_stop_gc_thread(sbi);
>  			need_restart_gc = true;
>  		}
> -	} else if (!sbi->gc_thread) {
> +	} else if (!sbi->gc_thread ||
> +			!READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>  		err = f2fs_start_gc_thread(sbi);
>  		if (err)
>  			goto restore_opts;
> @@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
>  free_sb_buf:
>  	kfree(raw_super);
>  free_sbi:
> +	kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  	lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> @@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
>  	/* Release block devices last, after fscrypt_destroy_keyring(). */
>  	if (sbi) {
>  		destroy_device_list(sbi);
> +		kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  		lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> -- 
> 2.43.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: protect published gc_thread during teardown
@ 2026-06-15 16:16   ` Jaegeuk Kim via Linux-f2fs-devel
  0 siblings, 0 replies; 6+ messages in thread
From: Jaegeuk Kim via Linux-f2fs-devel @ 2026-06-15 16:16 UTC (permalink / raw)
  To: Zhang Cen
  Cc: linux-kernel, linux-f2fs-devel, 2045gemini, Gao Xiang,
	zerocling0077

On 05/30, Zhang Cen wrote:
> f2fs_stop_gc_thread() stops the background GC task, wakes foreground
> GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
> pointer.  A foreground f2fs_balance_fs() caller can already have copied
> that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
> stop time can leave finish_wait() operating on a freed waitqueue.
> 
> Keep the allocated GC-thread state until the superblock is destroyed and
> use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
> withdraws the task pointer with xchg(), stops the task, and wakes any
> foreground waiters, but leaves the waitqueue storage valid.  The start
> path reuses a stopped gc_thread object instead of reinitializing its
> waitqueues, and remount restart decisions check the task pointer rather
> than only the object pointer.
> 
> f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
> f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
> the first check, the foreground caller removes its wait entry without
> sleeping on a worker that has already been withdrawn.
> 
> Validation reproduced this kernel report:
> 
> BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
> Write of size 8 at addr ffff8881150819b8 by task dd/802
> The buggy address belongs to the object at ffff888115081900 which
> belongs to the cache kmalloc-256 of size 256
> The buggy address is located 184 bytes inside of freed 256-byte region
> Call trace:
>   finish_wait()
>   f2fs_balance_fs()
>   f2fs_write_single_data_page()
>   f2fs_write_cache_pages()
>   __f2fs_write_data_pages()
>   do_writepages()
>   filemap_fdatawrite_wbc()
>   __filemap_fdatawrite_range()
>   file_write_and_wait_range()
>   f2fs_do_sync_file()
>   f2fs_sync_file()
>   do_fsync()
> Freed by task stack:
>   kfree()
>   f2fs_stop_gc_thread()
>   f2fs_do_shutdown()
>   f2fs_shutdown()
>   fs_bdev_mark_dead()

What about dequeuing all the pending work items and stop getting more work
when shutdown was called?

> 
> Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
> Signed-off-by: Zhang Cen <rollkingzzc@gmail.com>
> ---
> v4:
> - Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
>   keeps the existing heap-allocated gc_thread object alive until
>   superblock teardown.
> - Use f2fs_gc_task as the running-state marker and withdraw it with
>   xchg() before waking GC_MERGE waiters.
> - Reuse a stopped gc_thread object across remount restarts so the
>   waitqueues are not reinitialized while old waiters can still finish.
> - Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
>   with teardown does not sleep after the worker has been withdrawn.
> 
> v3:
> - Add the Fixes tag for the GC_MERGE foreground wait path.
> - Fix checkpatch style issues in the broader lifetime variant.
> 
> v2:
> - Sashiko.dev pointed out that GC_MERGE foreground waiters and
>   GC-thread users needed lifetime-safe access after teardown.
> 
>  fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
>  fs/f2fs/segment.c | 19 ++++++++++++-------
>  fs/f2fs/super.c   |  7 +++++--
>  3 files changed, 47 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index ba93010924c06..20f8394482a09 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
>  
>  int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>  {
> -	struct f2fs_gc_kthread *gc_th;
> +	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +	struct task_struct *task;
> +	bool allocated = false;
>  	dev_t dev = sbi->sb->s_bdev->bd_dev;
>  
> -	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> -	if (!gc_th)
> -		return -ENOMEM;
> +	if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
> +		return 0;
> +
> +	if (!gc_th) {
> +		gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
> +		if (!gc_th)
> +			return -ENOMEM;
> +		init_waitqueue_head(&gc_th->gc_wait_queue_head);
> +		init_waitqueue_head(&gc_th->fggc_wq);
> +		sbi->gc_thread = gc_th;
> +		allocated = true;
> +	}
>  
>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>  	gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
> @@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
>  
>  	gc_th->gc_wake = false;
>  
> -	sbi->gc_thread = gc_th;
> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> -	init_waitqueue_head(&sbi->gc_thread->fggc_wq);
> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> -			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> -	if (IS_ERR(gc_th->f2fs_gc_task)) {
> -		int err = PTR_ERR(gc_th->f2fs_gc_task);
> +	task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
> +			   MAJOR(dev), MINOR(dev));
> +	if (IS_ERR(task)) {
> +		int err = PTR_ERR(task);
>  
> -		kfree(gc_th);
> -		sbi->gc_thread = NULL;
> +		if (allocated) {
> +			kfree(gc_th);
> +			sbi->gc_thread = NULL;
> +		}
>  		return err;
>  	}
>  
> -	set_user_nice(gc_th->f2fs_gc_task,
> -			PRIO_TO_NICE(sbi->critical_task_priority));
> +	WRITE_ONCE(gc_th->f2fs_gc_task, task);
> +	set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
>  	return 0;
>  }
>  
>  void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> +	struct task_struct *task;
>  
>  	if (!gc_th)
>  		return;
> -	kthread_stop(gc_th->f2fs_gc_task);
> +	task = xchg(&gc_th->f2fs_gc_task, NULL);
> +	if (!task)
> +		return;
> +	kthread_stop(task);
>  	wake_up_all(&gc_th->fggc_wq);
> -	kfree(gc_th);
> -	sbi->gc_thread = NULL;
>  }
>  
>  static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 788f8b0502492..84307525edd27 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
>   */
>  void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  {
> +	struct f2fs_gc_kthread *gc_th;
> +
>  	if (f2fs_cp_error(sbi))
>  		return;
>  
> @@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  	if (has_enough_free_secs(sbi, 0, 0))
>  		return;
>  
> -	if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
> -				sbi->gc_thread->f2fs_gc_task) {
> +	gc_th = sbi->gc_thread;
> +	if (test_opt(sbi, GC_MERGE) && gc_th &&
> +	    READ_ONCE(gc_th->f2fs_gc_task)) {
>  		DEFINE_WAIT(wait);
>  
> -		prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
> -					TASK_UNINTERRUPTIBLE);
> -		wake_up(&sbi->gc_thread->gc_wait_queue_head);
> -		io_schedule();
> -		finish_wait(&sbi->gc_thread->fggc_wq, &wait);
> +		prepare_to_wait(&gc_th->fggc_wq, &wait,
> +				TASK_UNINTERRUPTIBLE);
> +		if (READ_ONCE(gc_th->f2fs_gc_task)) {
> +			wake_up(&gc_th->gc_wait_queue_head);
> +			io_schedule();
> +		}
> +		finish_wait(&gc_th->fggc_wq, &wait);
>  	} else {
>  		struct f2fs_gc_control gc_control = {
>  			.victim_segno = NULL_SEGNO,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index ccf806b676f53..d6863da05a7c2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
>  	if ((flags & SB_RDONLY) ||
>  			(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
>  			!test_opt(sbi, GC_MERGE))) {
> -		if (sbi->gc_thread) {
> +		if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>  			f2fs_stop_gc_thread(sbi);
>  			need_restart_gc = true;
>  		}
> -	} else if (!sbi->gc_thread) {
> +	} else if (!sbi->gc_thread ||
> +			!READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
>  		err = f2fs_start_gc_thread(sbi);
>  		if (err)
>  			goto restore_opts;
> @@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
>  free_sb_buf:
>  	kfree(raw_super);
>  free_sbi:
> +	kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  	lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> @@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
>  	/* Release block devices last, after fscrypt_destroy_keyring(). */
>  	if (sbi) {
>  		destroy_device_list(sbi);
> +		kfree(sbi->gc_thread);
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  		lockdep_unregister_key(&sbi->cp_global_sem_key);
>  #endif
> -- 
> 2.43.0


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-06-15 16:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-30 14:33 [PATCH v4] f2fs: protect published gc_thread during teardown Zhang Cen
2026-05-30 14:33 ` [f2fs-dev] " Zhang Cen
2026-06-13 11:10 ` Cen Zhang
2026-06-13 11:10   ` Cen Zhang
2026-06-15 16:16 ` Jaegeuk Kim
2026-06-15 16:16   ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.