[PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks

Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks
@ 2026-06-11 12:45 Breno Leitao
  2026-06-12  1:10 ` SeongJae Park
  2026-06-12  3:16 ` Lance Yang
  0 siblings, 2 replies; 4+ messages in thread
From: Breno Leitao @ 2026-06-11 12:45 UTC (permalink / raw)
  To: Catalin Marinas, Andrew Morton
  Cc: linux-mm, linux-kernel, kernel-team, Breno Leitao

kmemleak_scan() walks every thread and scans its kernel stack under a
single rcu_read_lock() with no reschedule point. On a host with very
many threads -- amplified by KASAN/lockdep in debug builds -- this loop
can hog a CPU long enough to trip the soft lockup watchdog:

  watchdog: BUG: soft lockup - CPU#35 stuck for 22s! [kmemleak:537]
   scan_block
   kmemleak_scan
   kmemleak_scan_thread
   kthread

A cond_resched() cannot be added directly: the loop runs inside an RCU
read-side critical section.

Split the scan in two parts:

1) get the list of tasks (with RCU read lock) in an array
2) run scan_block() for the tasks (with cond_reschd()).

Is it a sane approach?

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 mm/kmemleak.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 7c7ba17ce7af0..9f8a35ecbb50c 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -62,6 +62,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/stat.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/jiffies.h>
@@ -1885,17 +1886,34 @@ static void kmemleak_scan(void)
 	 * Scanning the task stacks (may introduce false negatives).
 	 */
 	if (kmemleak_stack_scan) {
-		struct task_struct *p, *g;
+		struct task_struct **tasks, *p, *g;
+		unsigned int nr = 0, max, i;
 
+		max = nr_threads + 64;
+		tasks = kvmalloc_array(max, sizeof(*tasks), GFP_KERNEL);
+
+		/* Snapshot the threads under RCU */
 		rcu_read_lock();
 		for_each_process_thread(g, p) {
-			void *stack = try_get_task_stack(p);
+			if (!tasks || nr >= max)
+				break;
+			get_task_struct(p);
+			tasks[nr++] = p;
+		}
+		rcu_read_unlock();
+
+		/* now scan_block for the tasks above with cond_resched() */
+		for (i = 0; i < nr; i++) {
+			void *stack = try_get_task_stack(tasks[i]);
+
 			if (stack) {
 				scan_block(stack, stack + THREAD_SIZE, NULL);
-				put_task_stack(p);
+				put_task_stack(tasks[i]);
 			}
+			put_task_struct(tasks[i]);
+			cond_resched();
 		}
-		rcu_read_unlock();
+		kvfree(tasks);
 	}
 
 	/*

---
base-commit: abe651837cb394f76d738a7a747322fca3bf17ba
change-id: 20260611-kmemleak-stack-resched-01ed72858a7f

Best regards,
-- 
Breno Leitao <leitao@debian.org>



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks
  2026-06-11 12:45 [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks Breno Leitao
@ 2026-06-12  1:10 ` SeongJae Park
  2026-06-12  3:16 ` Lance Yang
  1 sibling, 0 replies; 4+ messages in thread
From: SeongJae Park @ 2026-06-12  1:10 UTC (permalink / raw)
  To: Breno Leitao
  Cc: SeongJae Park, Catalin Marinas, Andrew Morton, linux-mm,
	linux-kernel, kernel-team

On Thu, 11 Jun 2026 05:45:00 -0700 Breno Leitao <leitao@debian.org> wrote:

> kmemleak_scan() walks every thread and scans its kernel stack under a
> single rcu_read_lock() with no reschedule point. On a host with very
> many threads -- amplified by KASAN/lockdep in debug builds -- this loop
> can hog a CPU long enough to trip the soft lockup watchdog:
> 
>   watchdog: BUG: soft lockup - CPU#35 stuck for 22s! [kmemleak:537]
>    scan_block
>    kmemleak_scan
>    kmemleak_scan_thread
>    kthread
> 
> A cond_resched() cannot be added directly: the loop runs inside an RCU
> read-side critical section.
> 
> Split the scan in two parts:
> 
> 1) get the list of tasks (with RCU read lock) in an array
> 2) run scan_block() for the tasks (with cond_reschd()).
> 
> Is it a sane approach?
> 
> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
>  mm/kmemleak.c | 26 ++++++++++++++++++++++----
>  1 file changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/kmemleak.c b/mm/kmemleak.c
> index 7c7ba17ce7af0..9f8a35ecbb50c 100644
> --- a/mm/kmemleak.c
> +++ b/mm/kmemleak.c
> @@ -62,6 +62,7 @@
>  #include <linux/kernel.h>
>  #include <linux/list.h>
>  #include <linux/sched/signal.h>
> +#include <linux/sched/stat.h>
>  #include <linux/sched/task.h>
>  #include <linux/sched/task_stack.h>
>  #include <linux/jiffies.h>
> @@ -1885,17 +1886,34 @@ static void kmemleak_scan(void)
>  	 * Scanning the task stacks (may introduce false negatives).
>  	 */
>  	if (kmemleak_stack_scan) {
> -		struct task_struct *p, *g;
> +		struct task_struct **tasks, *p, *g;
> +		unsigned int nr = 0, max, i;
>  
> +		max = nr_threads + 64;
> +		tasks = kvmalloc_array(max, sizeof(*tasks), GFP_KERNEL);
> +
> +		/* Snapshot the threads under RCU */
>  		rcu_read_lock();
>  		for_each_process_thread(g, p) {
> -			void *stack = try_get_task_stack(p);
> +			if (!tasks || nr >= max)
> +				break;

Why don't you check !tasks right after the allocation?

> +			get_task_struct(p);
> +			tasks[nr++] = p;
> +		}
> +		rcu_read_unlock();
> +
> +		/* now scan_block for the tasks above with cond_resched() */
> +		for (i = 0; i < nr; i++) {
> +			void *stack = try_get_task_stack(tasks[i]);
> +
>  			if (stack) {
>  				scan_block(stack, stack + THREAD_SIZE, NULL);
> -				put_task_stack(p);
> +				put_task_stack(tasks[i]);
>  			}
> +			put_task_struct(tasks[i]);
> +			cond_resched();
>  		}
> -		rcu_read_unlock();
> +		kvfree(tasks);
>  	}


Thanks,
SJ

[...]


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks
  2026-06-11 12:45 [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks Breno Leitao
  2026-06-12  1:10 ` SeongJae Park
@ 2026-06-12  3:16 ` Lance Yang
  2026-06-12  4:27   ` Lance Yang
  1 sibling, 1 reply; 4+ messages in thread
From: Lance Yang @ 2026-06-12  3:16 UTC (permalink / raw)
  To: leitao
  Cc: catalin.marinas, akpm, linux-mm, linux-kernel, kernel-team, sj,
	Lance Yang


On Thu, Jun 11, 2026 at 05:45:00AM -0700, Breno Leitao wrote:
>kmemleak_scan() walks every thread and scans its kernel stack under a
>single rcu_read_lock() with no reschedule point. On a host with very
>many threads -- amplified by KASAN/lockdep in debug builds -- this loop
>can hog a CPU long enough to trip the soft lockup watchdog:
>
>  watchdog: BUG: soft lockup - CPU#35 stuck for 22s! [kmemleak:537]
>   scan_block
>   kmemleak_scan
>   kmemleak_scan_thread
>   kthread

Neat, good catch!

>A cond_resched() cannot be added directly: the loop runs inside an RCU
>read-side critical section.
>
>Split the scan in two parts:
>
>1) get the list of tasks (with RCU read lock) in an array
>2) run scan_block() for the tasks (with cond_reschd()).
>
>Is it a sane approach?

Why not use the kernel/hung_task.c pattern here? Seems simpler, with no
extra task-array allocation ;)

>Signed-off-by: Breno Leitao <leitao@debian.org>
>---

Could break RCU only when resched is needed. Pin the current cursors,
drop RCU, cond_resched(), take RCU again, and continue only if the
cursors are still alive ;)

If either cursor died while RCU was droped, stopping this scan round
should be fine, IMHO.

---8<---
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 7c7ba17ce7af..1062d9545054 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1695,6 +1695,26 @@ static void kmemleak_cond_resched(struct kmemleak_object *object)
 	put_object(object);
 }

+static bool kmemleak_stack_scan_break(struct task_struct *g,
+				      struct task_struct *p)
+{
+	bool can_cont;
+
+	get_task_struct(g);
+	get_task_struct(p);
+
+	rcu_read_unlock();
+	cond_resched();
+	rcu_read_lock();
+
+	can_cont = pid_alive(g) && pid_alive(p);
+
+	put_task_struct(p);
+	put_task_struct(g);
+
+	return can_cont;
+}
+
 /*
  * Print one leak inline. The hex dump is gated on OBJECT_ALLOCATED so it
  * does not touch user memory that was freed concurrently; the rest of the
@@ -1894,7 +1914,10 @@ static void kmemleak_scan(void)
 				scan_block(stack, stack + THREAD_SIZE, NULL);
 				put_task_stack(p);
 			}
+			if (need_resched() && !kmemleak_stack_scan_break(g, p))
+				goto unlock;
 		}
+unlock:
 		rcu_read_unlock();
 	}
---

Not tested, though, feel free to grab it if looks sane :)

[...]

Cheers, Lance


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks
  2026-06-12  3:16 ` Lance Yang
@ 2026-06-12  4:27   ` Lance Yang
  0 siblings, 0 replies; 4+ messages in thread
From: Lance Yang @ 2026-06-12  4:27 UTC (permalink / raw)
  To: leitao; +Cc: catalin.marinas, akpm, linux-mm, linux-kernel, kernel-team, sj



On 2026/6/12 11:16, Lance Yang wrote:
> 
> On Thu, Jun 11, 2026 at 05:45:00AM -0700, Breno Leitao wrote:
>> kmemleak_scan() walks every thread and scans its kernel stack under a
>> single rcu_read_lock() with no reschedule point. On a host with very
>> many threads -- amplified by KASAN/lockdep in debug builds -- this loop
>> can hog a CPU long enough to trip the soft lockup watchdog:
>>
>>   watchdog: BUG: soft lockup - CPU#35 stuck for 22s! [kmemleak:537]
>>    scan_block
>>    kmemleak_scan
>>    kmemleak_scan_thread
>>    kthread
> 
> Neat, good catch!
> 
>> A cond_resched() cannot be added directly: the loop runs inside an RCU
>> read-side critical section.
>>
>> Split the scan in two parts:
>>
>> 1) get the list of tasks (with RCU read lock) in an array
>> 2) run scan_block() for the tasks (with cond_reschd()).
>>
>> Is it a sane approach?
> 
> Why not use the kernel/hung_task.c pattern here? Seems simpler, with no
> extra task-array allocation ;)
> 
>> Signed-off-by: Breno Leitao <leitao@debian.org>

Probably wants a Fixes: tag and stable Cc too, no?

>> ---
> 
> Could break RCU only when resched is needed. Pin the current cursors,
> drop RCU, cond_resched(), take RCU again, and continue only if the
> cursors are still alive ;)
> 
> If either cursor died while RCU was droped, stopping this scan round
> should be fine, IMHO.
> 
> ---8<---
> diff --git a/mm/kmemleak.c b/mm/kmemleak.c
> index 7c7ba17ce7af..1062d9545054 100644
> --- a/mm/kmemleak.c
> +++ b/mm/kmemleak.c
> @@ -1695,6 +1695,26 @@ static void kmemleak_cond_resched(struct kmemleak_object *object)
>   	put_object(object);
>   }
> 
> +static bool kmemleak_stack_scan_break(struct task_struct *g,
> +				      struct task_struct *p)
> +{
> +	bool can_cont;
> +
> +	get_task_struct(g);
> +	get_task_struct(p);
> +
> +	rcu_read_unlock();
> +	cond_resched();
> +	rcu_read_lock();
> +
> +	can_cont = pid_alive(g) && pid_alive(p);
> +
> +	put_task_struct(p);
> +	put_task_struct(g);
> +
> +	return can_cont;
> +}
> +
>   /*
>    * Print one leak inline. The hex dump is gated on OBJECT_ALLOCATED so it
>    * does not touch user memory that was freed concurrently; the rest of the
> @@ -1894,7 +1914,10 @@ static void kmemleak_scan(void)
>   				scan_block(stack, stack + THREAD_SIZE, NULL);
>   				put_task_stack(p);
>   			}
> +			if (need_resched() && !kmemleak_stack_scan_break(g, p))
> +				goto unlock;
>   		}
> +unlock:
>   		rcu_read_unlock();
>   	}
> ---
> 
> Not tested, though, feel free to grab it if looks sane :)
> 
> [...]
> 
> Cheers, Lance


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-12  4:27 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-11 12:45 [PATCH RFC] mm/kmemleak: avoid soft lockup when scanning task stacks Breno Leitao
2026-06-12  1:10 ` SeongJae Park
2026-06-12  3:16 ` Lance Yang
2026-06-12  4:27   ` Lance Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox