public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ext4: Fix the might_sleep() warnings in kvfree()
@ 2026-03-19  9:45 Zqiang
  2026-03-20  2:47 ` Baokun Li
  0 siblings, 1 reply; 2+ messages in thread
From: Zqiang @ 2026-03-19  9:45 UTC (permalink / raw)
  To: tytso, adilger.kernel, libaokun; +Cc: linux-ext4, linux-kernel, qiang.zhang

Use the kvfree() in the RCU read critical section can trigger
the following warnings:

EXT4-fs (vdb): unmounting filesystem cd983e5b-3c83-4f5a-a136-17b00eb9d018.

WARNING: suspicious RCU usage

./include/linux/rcupdate.h:409 Illegal context switch in RCU read-side critical section!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1

Call Trace:
 <TASK>
 dump_stack_lvl+0xbb/0xd0
 dump_stack+0x14/0x20
 lockdep_rcu_suspicious+0x15a/0x1b0
 __might_resched+0x375/0x4d0
 ? put_object.part.0+0x2c/0x50
 __might_sleep+0x108/0x160
 vfree+0x58/0x910
 ? ext4_group_desc_free+0x27/0x270
 kvfree+0x23/0x40
 ext4_group_desc_free+0x111/0x270
 ext4_put_super+0x3c8/0xd40
 generic_shutdown_super+0x14c/0x4a0
 ? __pfx_shrinker_free+0x10/0x10
 kill_block_super+0x40/0x90
 ext4_kill_sb+0x6d/0xb0
 deactivate_locked_super+0xb4/0x180
 deactivate_super+0x7e/0xa0
 cleanup_mnt+0x296/0x3e0
 __cleanup_mnt+0x16/0x20
 task_work_run+0x157/0x250
 ? __pfx_task_work_run+0x10/0x10
 ? exit_to_user_mode_loop+0x6a/0x550
 exit_to_user_mode_loop+0x102/0x550
 do_syscall_64+0x44a/0x500
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
 </TASK>

BUG: sleeping function called from invalid context at mm/vmalloc.c:3441
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556, name: umount
preempt_count: 1, expected: 0
CPU: 3 UID: 0 PID: 556 Comm: umount
Call Trace:
 <TASK>
 dump_stack_lvl+0xbb/0xd0
 dump_stack+0x14/0x20
 __might_resched+0x275/0x4d0
 ? put_object.part.0+0x2c/0x50
 __might_sleep+0x108/0x160
 vfree+0x58/0x910
 ? ext4_group_desc_free+0x27/0x270
 kvfree+0x23/0x40
 ext4_group_desc_free+0x111/0x270
 ext4_put_super+0x3c8/0xd40
 generic_shutdown_super+0x14c/0x4a0
 ? __pfx_shrinker_free+0x10/0x10
 kill_block_super+0x40/0x90
 ext4_kill_sb+0x6d/0xb0
 deactivate_locked_super+0xb4/0x180
 deactivate_super+0x7e/0xa0
 cleanup_mnt+0x296/0x3e0
 __cleanup_mnt+0x16/0x20
 task_work_run+0x157/0x250
 ? __pfx_task_work_run+0x10/0x10
 ? exit_to_user_mode_loop+0x6a/0x550
 exit_to_user_mode_loop+0x102/0x550
 do_syscall_64+0x44a/0x500
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The above scenarios occur in initialization failures and teardown
paths, there are no parallel operations on the resources released
by kvfree(), this commit therefore remove rcu_read_lock/unlock() and
use rcu_access_pointer() instead of rcu_dereference() operations.

Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access")
Fixes: df3da4ea5a0f ("ext4: fix potential race between s_group_info online resizing and access")
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
---
 fs/ext4/mballoc.c | 10 +++-------
 fs/ext4/super.c   |  8 ++------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..e96513cc6151 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3580,9 +3580,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	rcu_read_unlock();
 	iput(sbi->s_buddy_cache);
 err_freesgi:
-	rcu_read_lock();
-	kvfree(rcu_dereference(sbi->s_group_info));
-	rcu_read_unlock();
+	kvfree(rcu_access_pointer(sbi->s_group_info));
 	return -ENOMEM;
 }
 
@@ -3897,7 +3895,8 @@ void ext4_mb_release(struct super_block *sb)
 		WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
 	}
 
-	if (sbi->s_group_info) {
+	group_info = rcu_access_pointer(sbi->s_group_info);
+	if (group_info) {
 		for (i = 0; i < ngroups; i++) {
 			cond_resched();
 			grinfo = ext4_get_group_info(sb, i);
@@ -3915,12 +3914,9 @@ void ext4_mb_release(struct super_block *sb)
 		num_meta_group_infos = (ngroups +
 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
 			EXT4_DESC_PER_BLOCK_BITS(sb);
-		rcu_read_lock();
-		group_info = rcu_dereference(sbi->s_group_info);
 		for (i = 0; i < num_meta_group_infos; i++)
 			kfree(group_info[i]);
 		kvfree(group_info);
-		rcu_read_unlock();
 	}
 	ext4_mb_avg_fragment_size_destroy(sbi);
 	ext4_mb_largest_free_orders_destroy(sbi);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43f680c750ae..0b2fa7bd787f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi)
 	struct buffer_head **group_desc;
 	int i;
 
-	rcu_read_lock();
-	group_desc = rcu_dereference(sbi->s_group_desc);
+	group_desc = rcu_access_pointer(sbi->s_group_desc);
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(group_desc[i]);
 	kvfree(group_desc);
-	rcu_read_unlock();
 }
 
 static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
@@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
 	struct flex_groups **flex_groups;
 	int i;
 
-	rcu_read_lock();
-	flex_groups = rcu_dereference(sbi->s_flex_groups);
+	flex_groups = rcu_access_pointer(sbi->s_flex_groups);
 	if (flex_groups) {
 		for (i = 0; i < sbi->s_flex_groups_allocated; i++)
 			kvfree(flex_groups[i]);
 		kvfree(flex_groups);
 	}
-	rcu_read_unlock();
 }
 
 static void ext4_put_super(struct super_block *sb)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] ext4: Fix the might_sleep() warnings in kvfree()
  2026-03-19  9:45 [PATCH] ext4: Fix the might_sleep() warnings in kvfree() Zqiang
@ 2026-03-20  2:47 ` Baokun Li
  0 siblings, 0 replies; 2+ messages in thread
From: Baokun Li @ 2026-03-20  2:47 UTC (permalink / raw)
  To: Zqiang; +Cc: tytso, adilger.kernel, linux-ext4, linux-kernel, libaokun


On 3/19/26 5:45 PM, Zqiang wrote:
> Use the kvfree() in the RCU read critical section can trigger
> the following warnings:
>
> EXT4-fs (vdb): unmounting filesystem cd983e5b-3c83-4f5a-a136-17b00eb9d018.
>
> WARNING: suspicious RCU usage
>
> ./include/linux/rcupdate.h:409 Illegal context switch in RCU read-side critical section!
>
> other info that might help us debug this:
>
> rcu_scheduler_active = 2, debug_locks = 1
>
> Call Trace:
>  <TASK>
>  dump_stack_lvl+0xbb/0xd0
>  dump_stack+0x14/0x20
>  lockdep_rcu_suspicious+0x15a/0x1b0
>  __might_resched+0x375/0x4d0
>  ? put_object.part.0+0x2c/0x50
>  __might_sleep+0x108/0x160
>  vfree+0x58/0x910
>  ? ext4_group_desc_free+0x27/0x270
>  kvfree+0x23/0x40
>  ext4_group_desc_free+0x111/0x270
>  ext4_put_super+0x3c8/0xd40
>  generic_shutdown_super+0x14c/0x4a0
>  ? __pfx_shrinker_free+0x10/0x10
>  kill_block_super+0x40/0x90
>  ext4_kill_sb+0x6d/0xb0
>  deactivate_locked_super+0xb4/0x180
>  deactivate_super+0x7e/0xa0
>  cleanup_mnt+0x296/0x3e0
>  __cleanup_mnt+0x16/0x20
>  task_work_run+0x157/0x250
>  ? __pfx_task_work_run+0x10/0x10
>  ? exit_to_user_mode_loop+0x6a/0x550
>  exit_to_user_mode_loop+0x102/0x550
>  do_syscall_64+0x44a/0x500
>  entry_SYSCALL_64_after_hwframe+0x77/0x7f
>  </TASK>
>
> BUG: sleeping function called from invalid context at mm/vmalloc.c:3441
> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556, name: umount
> preempt_count: 1, expected: 0
> CPU: 3 UID: 0 PID: 556 Comm: umount
> Call Trace:
>  <TASK>
>  dump_stack_lvl+0xbb/0xd0
>  dump_stack+0x14/0x20
>  __might_resched+0x275/0x4d0
>  ? put_object.part.0+0x2c/0x50
>  __might_sleep+0x108/0x160
>  vfree+0x58/0x910
>  ? ext4_group_desc_free+0x27/0x270
>  kvfree+0x23/0x40
>  ext4_group_desc_free+0x111/0x270
>  ext4_put_super+0x3c8/0xd40
>  generic_shutdown_super+0x14c/0x4a0
>  ? __pfx_shrinker_free+0x10/0x10
>  kill_block_super+0x40/0x90
>  ext4_kill_sb+0x6d/0xb0
>  deactivate_locked_super+0xb4/0x180
>  deactivate_super+0x7e/0xa0
>  cleanup_mnt+0x296/0x3e0
>  __cleanup_mnt+0x16/0x20
>  task_work_run+0x157/0x250
>  ? __pfx_task_work_run+0x10/0x10
>  ? exit_to_user_mode_loop+0x6a/0x550
>  exit_to_user_mode_loop+0x102/0x550
>  do_syscall_64+0x44a/0x500
>  entry_SYSCALL_64_after_hwframe+0x77/0x7f
>
> The above scenarios occur in initialization failures and teardown
> paths, there are no parallel operations on the resources released
> by kvfree(), this commit therefore remove rcu_read_lock/unlock() and
> use rcu_access_pointer() instead of rcu_dereference() operations.
>
> Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access")
> Fixes: df3da4ea5a0f ("ext4: fix potential race between s_group_info online resizing and access")
> Signed-off-by: Zqiang <qiang.zhang@linux.dev>

Looks good, feel free to add:

Reviewed-by: Baokun Li <libaokun@linux.alibaba.com>

> ---
>  fs/ext4/mballoc.c | 10 +++-------
>  fs/ext4/super.c   |  8 ++------
>  2 files changed, 5 insertions(+), 13 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 20e9fdaf4301..e96513cc6151 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3580,9 +3580,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
>  	rcu_read_unlock();
>  	iput(sbi->s_buddy_cache);
>  err_freesgi:
> -	rcu_read_lock();
> -	kvfree(rcu_dereference(sbi->s_group_info));
> -	rcu_read_unlock();
> +	kvfree(rcu_access_pointer(sbi->s_group_info));
>  	return -ENOMEM;
>  }
>  
> @@ -3897,7 +3895,8 @@ void ext4_mb_release(struct super_block *sb)
>  		WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
>  	}
>  
> -	if (sbi->s_group_info) {
> +	group_info = rcu_access_pointer(sbi->s_group_info);
> +	if (group_info) {
>  		for (i = 0; i < ngroups; i++) {
>  			cond_resched();
>  			grinfo = ext4_get_group_info(sb, i);
> @@ -3915,12 +3914,9 @@ void ext4_mb_release(struct super_block *sb)
>  		num_meta_group_infos = (ngroups +
>  				EXT4_DESC_PER_BLOCK(sb) - 1) >>
>  			EXT4_DESC_PER_BLOCK_BITS(sb);
> -		rcu_read_lock();
> -		group_info = rcu_dereference(sbi->s_group_info);
>  		for (i = 0; i < num_meta_group_infos; i++)
>  			kfree(group_info[i]);
>  		kvfree(group_info);
> -		rcu_read_unlock();
>  	}
>  	ext4_mb_avg_fragment_size_destroy(sbi);
>  	ext4_mb_largest_free_orders_destroy(sbi);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 43f680c750ae..0b2fa7bd787f 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi)
>  	struct buffer_head **group_desc;
>  	int i;
>  
> -	rcu_read_lock();
> -	group_desc = rcu_dereference(sbi->s_group_desc);
> +	group_desc = rcu_access_pointer(sbi->s_group_desc);
>  	for (i = 0; i < sbi->s_gdb_count; i++)
>  		brelse(group_desc[i]);
>  	kvfree(group_desc);
> -	rcu_read_unlock();
>  }
>  
>  static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
> @@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
>  	struct flex_groups **flex_groups;
>  	int i;
>  
> -	rcu_read_lock();
> -	flex_groups = rcu_dereference(sbi->s_flex_groups);
> +	flex_groups = rcu_access_pointer(sbi->s_flex_groups);
>  	if (flex_groups) {
>  		for (i = 0; i < sbi->s_flex_groups_allocated; i++)
>  			kvfree(flex_groups[i]);
>  		kvfree(flex_groups);
>  	}
> -	rcu_read_unlock();
>  }
>  
>  static void ext4_put_super(struct super_block *sb)

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-03-20  2:47 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-19  9:45 [PATCH] ext4: Fix the might_sleep() warnings in kvfree() Zqiang
2026-03-20  2:47 ` Baokun Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox