linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
@ 2025-08-14 11:16 yangshiguang1011
  2025-08-16  8:25 ` Harry Yoo
  0 siblings, 1 reply; 11+ messages in thread
From: yangshiguang1011 @ 2025-08-14 11:16 UTC (permalink / raw)
  To: harry.yoo
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

From: yangshiguang <yangshiguang@xiaomi.com>

From: yangshiguang <yangshiguang@xiaomi.com>

set_track_prepare() can incur lock recursion.
The issue is that it is called from hrtimer_start_range_ns
holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
and try to hold the per_cpu(hrtimer_bases)[n].lock.

So avoid waking up kswapd.The oops looks something like:

BUG: spinlock recursion on CPU#3, swapper/3/0
 lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
Call trace:
spin_bug+0x0
_raw_spin_lock_irqsave+0x80
hrtimer_try_to_cancel+0x94
task_contending+0x10c
enqueue_dl_entity+0x2a4
dl_server_start+0x74
enqueue_task_fair+0x568
enqueue_task+0xac
do_activate_task+0x14c
ttwu_do_activate+0xcc
try_to_wake_up+0x6c8
default_wake_function+0x20
autoremove_wake_function+0x1c
__wake_up+0xac
wakeup_kswapd+0x19c
wake_all_kswapds+0x78
__alloc_pages_slowpath+0x1ac
__alloc_pages_noprof+0x298
stack_depot_save_flags+0x6b0
stack_depot_save+0x14
set_track_prepare+0x5c
___slab_alloc+0xccc
__kmalloc_cache_noprof+0x470
__set_page_owner+0x2bc
post_alloc_hook[jt]+0x1b8
prep_new_page+0x28
get_page_from_freelist+0x1edc
__alloc_pages_noprof+0x13c
alloc_slab_page+0x244
allocate_slab+0x7c
___slab_alloc+0x8e8
kmem_cache_alloc_noprof+0x450
debug_objects_fill_pool+0x22c
debug_object_activate+0x40
enqueue_hrtimer[jt]+0xdc
hrtimer_start_range_ns+0x5f8
...

Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
---
v1 -> v2:
    propagate gfp flags to set_track_prepare()

[1]https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com/
---
 mm/slub.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 30003763d224..dba905bf1e03 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
 }
 
 #ifdef CONFIG_STACKDEPOT
-static noinline depot_stack_handle_t set_track_prepare(void)
+static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
 {
 	depot_stack_handle_t handle;
 	unsigned long entries[TRACK_ADDRS_COUNT];
 	unsigned int nr_entries;
+	gfp_flags &= GFP_NOWAIT;
 
 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
-	handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+	handle = stack_depot_save(entries, nr_entries, gfp_flags);
 
 	return handle;
 }
 #else
-static inline depot_stack_handle_t set_track_prepare(void)
+static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
 {
 	return 0;
 }
@@ -996,9 +997,9 @@ static void set_track_update(struct kmem_cache *s, void *object,
 }
 
 static __always_inline void set_track(struct kmem_cache *s, void *object,
-				      enum track_item alloc, unsigned long addr)
+				      enum track_item alloc, unsigned long addr, gfp_t gfp_flags)
 {
-	depot_stack_handle_t handle = set_track_prepare();
+	depot_stack_handle_t handle = set_track_prepare(gfp_flags);
 
 	set_track_update(s, object, alloc, addr, handle);
 }
@@ -1921,9 +1922,9 @@ static inline bool free_debug_processing(struct kmem_cache *s,
 static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
 static inline int check_object(struct kmem_cache *s, struct slab *slab,
 			void *object, u8 val) { return 1; }
-static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
+static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; }
 static inline void set_track(struct kmem_cache *s, void *object,
-			     enum track_item alloc, unsigned long addr) {}
+			     enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {}
 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
 					struct slab *slab) {}
 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
@@ -3878,7 +3879,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 			 * tracking info and return the object.
 			 */
 			if (s->flags & SLAB_STORE_USER)
-				set_track(s, freelist, TRACK_ALLOC, addr);
+				set_track(s, freelist, TRACK_ALLOC, addr, gfpflags);
 
 			return freelist;
 		}
@@ -3910,7 +3911,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 			goto new_objects;
 
 		if (s->flags & SLAB_STORE_USER)
-			set_track(s, freelist, TRACK_ALLOC, addr);
+			set_track(s, freelist, TRACK_ALLOC, addr, gfpflags);
 
 		return freelist;
 	}
@@ -4422,7 +4423,7 @@ static noinline void free_to_partial_list(
 	depot_stack_handle_t handle = 0;
 
 	if (s->flags & SLAB_STORE_USER)
-		handle = set_track_prepare();
+		handle = set_track_prepare(GFP_NOWAIT);
 
 	spin_lock_irqsave(&n->list_lock, flags);
 
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-14 11:16 [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare yangshiguang1011
@ 2025-08-16  8:25 ` Harry Yoo
  2025-08-16  9:33   ` Giorgi Tchankvetadze
  2025-08-16 10:05   ` yangshiguang
  0 siblings, 2 replies; 11+ messages in thread
From: Harry Yoo @ 2025-08-16  8:25 UTC (permalink / raw)
  To: yangshiguang1011
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
> From: yangshiguang <yangshiguang@xiaomi.com>
> 
> From: yangshiguang <yangshiguang@xiaomi.com>
> 
> set_track_prepare() can incur lock recursion.
> The issue is that it is called from hrtimer_start_range_ns
> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
> and try to hold the per_cpu(hrtimer_bases)[n].lock.
> 
> So avoid waking up kswapd.The oops looks something like:

Hi yangshiguang, 

In the next revision, could you please elaborate the commit message
to reflect how this change avoids waking up kswapd?

> BUG: spinlock recursion on CPU#3, swapper/3/0
>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
> Call trace:
> spin_bug+0x0
> _raw_spin_lock_irqsave+0x80
> hrtimer_try_to_cancel+0x94
> task_contending+0x10c
> enqueue_dl_entity+0x2a4
> dl_server_start+0x74
> enqueue_task_fair+0x568
> enqueue_task+0xac
> do_activate_task+0x14c
> ttwu_do_activate+0xcc
> try_to_wake_up+0x6c8
> default_wake_function+0x20
> autoremove_wake_function+0x1c
> __wake_up+0xac
> wakeup_kswapd+0x19c
> wake_all_kswapds+0x78
> __alloc_pages_slowpath+0x1ac
> __alloc_pages_noprof+0x298
> stack_depot_save_flags+0x6b0
> stack_depot_save+0x14
> set_track_prepare+0x5c
> ___slab_alloc+0xccc
> __kmalloc_cache_noprof+0x470
> __set_page_owner+0x2bc
> post_alloc_hook[jt]+0x1b8
> prep_new_page+0x28
> get_page_from_freelist+0x1edc
> __alloc_pages_noprof+0x13c
> alloc_slab_page+0x244
> allocate_slab+0x7c
> ___slab_alloc+0x8e8
> kmem_cache_alloc_noprof+0x450
> debug_objects_fill_pool+0x22c
> debug_object_activate+0x40
> enqueue_hrtimer[jt]+0xdc
> hrtimer_start_range_ns+0x5f8
> ...
> 
> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
> ---
> v1 -> v2:
>     propagate gfp flags to set_track_prepare()
> 
> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com 
> ---
>  mm/slub.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 30003763d224..dba905bf1e03 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>  }
>  
>  #ifdef CONFIG_STACKDEPOT
> -static noinline depot_stack_handle_t set_track_prepare(void)
> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>  {
>  	depot_stack_handle_t handle;
>  	unsigned long entries[TRACK_ADDRS_COUNT];
>  	unsigned int nr_entries;
> +	gfp_flags &= GFP_NOWAIT;

Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
direct reclamation?

>  	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
> -	handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
> +	handle = stack_depot_save(entries, nr_entries, gfp_flags);
>  
>  	return handle;
>  }
>  #else
> -static inline depot_stack_handle_t set_track_prepare(void)
> +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>  {
>  	return 0;
>  }
> @@ -4422,7 +4423,7 @@ static noinline void free_to_partial_list(
>  	depot_stack_handle_t handle = 0;
>  
>  	if (s->flags & SLAB_STORE_USER)
> -		handle = set_track_prepare();
> +		handle = set_track_prepare(GFP_NOWAIT);

I don't think it is safe to use GFP_NOWAIT during free?

Let's say fill_pool() -> kmem_alloc_batch() fails to allocate an object
and then free_object_list() frees allocated objects,
set_track_prepare(GFP_NOWAIT) may wake up kswapd, and the same deadlock
you reported will occur.

So I think it should be __GFP_NOWARN?

-- 
Cheers,
Harry / Hyeonggon


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-16  8:25 ` Harry Yoo
@ 2025-08-16  9:33   ` Giorgi Tchankvetadze
  2025-08-16 10:19     ` yangshiguang
  2025-08-16 10:05   ` yangshiguang
  1 sibling, 1 reply; 11+ messages in thread
From: Giorgi Tchankvetadze @ 2025-08-16  9:33 UTC (permalink / raw)
  To: Harry Yoo, yangshiguang1011
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

Rather than masking to GFP_NOWAIT—which still allows kswapd to be 
woken—let’s strip every reclaim bit (`__GFP_RECLAIM` and 
`__GFP_KSWAPD_RECLAIM`) and add `__GFP_NORETRY | __GFP_NOWARN`. That 
guarantees we never enter the slow path that calls `wakeup_kswapd()`, so 
the timer-base lock can’t be re-entered.

On 8/16/2025 12:25 PM, Harry Yoo wrote:
> On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
>> From: yangshiguang <yangshiguang@xiaomi.com>
>>
>> From: yangshiguang <yangshiguang@xiaomi.com>
>>
>> set_track_prepare() can incur lock recursion.
>> The issue is that it is called from hrtimer_start_range_ns
>> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
>> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
>> and try to hold the per_cpu(hrtimer_bases)[n].lock.
>>
>> So avoid waking up kswapd.The oops looks something like:
> 
> Hi yangshiguang,
> 
> In the next revision, could you please elaborate the commit message
> to reflect how this change avoids waking up kswapd?
> 
>> BUG: spinlock recursion on CPU#3, swapper/3/0
>>   lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
>> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
>> Call trace:
>> spin_bug+0x0
>> _raw_spin_lock_irqsave+0x80
>> hrtimer_try_to_cancel+0x94
>> task_contending+0x10c
>> enqueue_dl_entity+0x2a4
>> dl_server_start+0x74
>> enqueue_task_fair+0x568
>> enqueue_task+0xac
>> do_activate_task+0x14c
>> ttwu_do_activate+0xcc
>> try_to_wake_up+0x6c8
>> default_wake_function+0x20
>> autoremove_wake_function+0x1c
>> __wake_up+0xac
>> wakeup_kswapd+0x19c
>> wake_all_kswapds+0x78
>> __alloc_pages_slowpath+0x1ac
>> __alloc_pages_noprof+0x298
>> stack_depot_save_flags+0x6b0
>> stack_depot_save+0x14
>> set_track_prepare+0x5c
>> ___slab_alloc+0xccc
>> __kmalloc_cache_noprof+0x470
>> __set_page_owner+0x2bc
>> post_alloc_hook[jt]+0x1b8
>> prep_new_page+0x28
>> get_page_from_freelist+0x1edc
>> __alloc_pages_noprof+0x13c
>> alloc_slab_page+0x244
>> allocate_slab+0x7c
>> ___slab_alloc+0x8e8
>> kmem_cache_alloc_noprof+0x450
>> debug_objects_fill_pool+0x22c
>> debug_object_activate+0x40
>> enqueue_hrtimer[jt]+0xdc
>> hrtimer_start_range_ns+0x5f8
>> ...
>>
>> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
>> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
>> ---
>> v1 -> v2:
>>      propagate gfp flags to set_track_prepare()
>>
>> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com
>> ---
>>   mm/slub.c | 21 +++++++++++----------
>>   1 file changed, 11 insertions(+), 10 deletions(-)
>>
>> diff --git a/mm/slub.c b/mm/slub.c
>> index 30003763d224..dba905bf1e03 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>>   }
>>   
>>   #ifdef CONFIG_STACKDEPOT
>> -static noinline depot_stack_handle_t set_track_prepare(void)
>> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>>   {
>>   	depot_stack_handle_t handle;
>>   	unsigned long entries[TRACK_ADDRS_COUNT];
>>   	unsigned int nr_entries;
>> +	gfp_flags &= GFP_NOWAIT;
> 
> Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
> direct reclamation?
> 
>>   	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
>> -	handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
>> +	handle = stack_depot_save(entries, nr_entries, gfp_flags);
>>   
>>   	return handle;
>>   }
>>   #else
>> -static inline depot_stack_handle_t set_track_prepare(void)
>> +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>>   {
>>   	return 0;
>>   }
>> @@ -4422,7 +4423,7 @@ static noinline void free_to_partial_list(
>>   	depot_stack_handle_t handle = 0;
>>   
>>   	if (s->flags & SLAB_STORE_USER)
>> -		handle = set_track_prepare();
>> +		handle = set_track_prepare(GFP_NOWAIT);
> 
> I don't think it is safe to use GFP_NOWAIT during free?
> 
> Let's say fill_pool() -> kmem_alloc_batch() fails to allocate an object
> and then free_object_list() frees allocated objects,
> set_track_prepare(GFP_NOWAIT) may wake up kswapd, and the same deadlock
> you reported will occur.
> 
> So I think it should be __GFP_NOWARN?
> 



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re:Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-16  8:25 ` Harry Yoo
  2025-08-16  9:33   ` Giorgi Tchankvetadze
@ 2025-08-16 10:05   ` yangshiguang
  2025-08-16 10:46     ` Harry Yoo
  1 sibling, 1 reply; 11+ messages in thread
From: yangshiguang @ 2025-08-16 10:05 UTC (permalink / raw)
  To: Harry Yoo
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang



At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
>> From: yangshiguang <yangshiguang@xiaomi.com>
>> 
>> From: yangshiguang <yangshiguang@xiaomi.com>
>> 
>> set_track_prepare() can incur lock recursion.
>> The issue is that it is called from hrtimer_start_range_ns
>> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
>> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
>> and try to hold the per_cpu(hrtimer_bases)[n].lock.
>> 
>> So avoid waking up kswapd.The oops looks something like:
>
>Hi yangshiguang, 
>
>In the next revision, could you please elaborate the commit message
>to reflect how this change avoids waking up kswapd?
>

of course. Thanks for the reminder.

>> BUG: spinlock recursion on CPU#3, swapper/3/0
>>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
>> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
>> Call trace:
>> spin_bug+0x0
>> _raw_spin_lock_irqsave+0x80
>> hrtimer_try_to_cancel+0x94
>> task_contending+0x10c
>> enqueue_dl_entity+0x2a4
>> dl_server_start+0x74
>> enqueue_task_fair+0x568
>> enqueue_task+0xac
>> do_activate_task+0x14c
>> ttwu_do_activate+0xcc
>> try_to_wake_up+0x6c8
>> default_wake_function+0x20
>> autoremove_wake_function+0x1c
>> __wake_up+0xac
>> wakeup_kswapd+0x19c
>> wake_all_kswapds+0x78
>> __alloc_pages_slowpath+0x1ac
>> __alloc_pages_noprof+0x298
>> stack_depot_save_flags+0x6b0
>> stack_depot_save+0x14
>> set_track_prepare+0x5c
>> ___slab_alloc+0xccc
>> __kmalloc_cache_noprof+0x470
>> __set_page_owner+0x2bc
>> post_alloc_hook[jt]+0x1b8
>> prep_new_page+0x28
>> get_page_from_freelist+0x1edc
>> __alloc_pages_noprof+0x13c
>> alloc_slab_page+0x244
>> allocate_slab+0x7c
>> ___slab_alloc+0x8e8
>> kmem_cache_alloc_noprof+0x450
>> debug_objects_fill_pool+0x22c
>> debug_object_activate+0x40
>> enqueue_hrtimer[jt]+0xdc
>> hrtimer_start_range_ns+0x5f8
>> ...
>> 
>> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
>> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
>> ---
>> v1 -> v2:
>>     propagate gfp flags to set_track_prepare()
>> 
>> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com 
>> ---
>>  mm/slub.c | 21 +++++++++++----------
>>  1 file changed, 11 insertions(+), 10 deletions(-)
>> 
>> diff --git a/mm/slub.c b/mm/slub.c
>> index 30003763d224..dba905bf1e03 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>>  }
>>  
>>  #ifdef CONFIG_STACKDEPOT
>> -static noinline depot_stack_handle_t set_track_prepare(void)
>> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>>  {
>>  	depot_stack_handle_t handle;
>>  	unsigned long entries[TRACK_ADDRS_COUNT];
>>  	unsigned int nr_entries;
>> +	gfp_flags &= GFP_NOWAIT;
>
>Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
>direct reclamation?
>

Hi Harry,

The original allocation is GFP_NOWAIT.
So I think it's better not to increase the allocation cost here.


>>  	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
>> -	handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
>> +	handle = stack_depot_save(entries, nr_entries, gfp_flags);
>>  
>>  	return handle;
>>  }
>>  #else
>> -static inline depot_stack_handle_t set_track_prepare(void)
>> +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>>  {
>>  	return 0;
>>  }
>> @@ -4422,7 +4423,7 @@ static noinline void free_to_partial_list(
>>  	depot_stack_handle_t handle = 0;
>>  
>>  	if (s->flags & SLAB_STORE_USER)
>> -		handle = set_track_prepare();
>> +		handle = set_track_prepare(GFP_NOWAIT);
>
>I don't think it is safe to use GFP_NOWAIT during free?
>
>Let's say fill_pool() -> kmem_alloc_batch() fails to allocate an object
>and then free_object_list() frees allocated objects,
>set_track_prepare(GFP_NOWAIT) may wake up kswapd, and the same deadlock
>you reported will occur.
>
>So I think it should be __GFP_NOWARN?
>

Yes, this ensures safety.

>-- 
>Cheers,
>Harry / Hyeonggon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re:Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-16  9:33   ` Giorgi Tchankvetadze
@ 2025-08-16 10:19     ` yangshiguang
  0 siblings, 0 replies; 11+ messages in thread
From: yangshiguang @ 2025-08-16 10:19 UTC (permalink / raw)
  To: Giorgi Tchankvetadze
  Cc: Harry Yoo, vbabka, akpm, cl, rientjes, roman.gushchin, glittao,
	linux-mm, linux-kernel, stable, yangshiguang





At 2025-08-16 17:33:00, "Giorgi Tchankvetadze" <giorgitchankvetadze1997@gmail.com> wrote:
>Rather than masking to GFP_NOWAIT—which still allows kswapd to be 
>woken—let’s strip every reclaim bit (`__GFP_RECLAIM` and 
>`__GFP_KSWAPD_RECLAIM`) and add `__GFP_NORETRY | __GFP_NOWARN`. That 
>guarantees we never enter the slow path that calls `wakeup_kswapd()`, so 
>the timer-base lock can’t be re-entered.

>

Hi Giorgi,
Thanks for your advice.
But more scenarios allow kswapd to be woken up.
It might be better to allow kswapd to be woken up.

>On 8/16/2025 12:25 PM, Harry Yoo wrote:
>> On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
>>> From: yangshiguang <yangshiguang@xiaomi.com>
>>>
>>> From: yangshiguang <yangshiguang@xiaomi.com>
>>>
>>> set_track_prepare() can incur lock recursion.
>>> The issue is that it is called from hrtimer_start_range_ns
>>> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
>>> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
>>> and try to hold the per_cpu(hrtimer_bases)[n].lock.
>>>
>>> So avoid waking up kswapd.The oops looks something like:
>> 
>> Hi yangshiguang,
>> 
>> In the next revision, could you please elaborate the commit message
>> to reflect how this change avoids waking up kswapd?
>> 
>>> BUG: spinlock recursion on CPU#3, swapper/3/0
>>>   lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
>>> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
>>> Call trace:
>>> spin_bug+0x0
>>> _raw_spin_lock_irqsave+0x80
>>> hrtimer_try_to_cancel+0x94
>>> task_contending+0x10c
>>> enqueue_dl_entity+0x2a4
>>> dl_server_start+0x74
>>> enqueue_task_fair+0x568
>>> enqueue_task+0xac
>>> do_activate_task+0x14c
>>> ttwu_do_activate+0xcc
>>> try_to_wake_up+0x6c8
>>> default_wake_function+0x20
>>> autoremove_wake_function+0x1c
>>> __wake_up+0xac
>>> wakeup_kswapd+0x19c
>>> wake_all_kswapds+0x78
>>> __alloc_pages_slowpath+0x1ac
>>> __alloc_pages_noprof+0x298
>>> stack_depot_save_flags+0x6b0
>>> stack_depot_save+0x14
>>> set_track_prepare+0x5c
>>> ___slab_alloc+0xccc
>>> __kmalloc_cache_noprof+0x470
>>> __set_page_owner+0x2bc
>>> post_alloc_hook[jt]+0x1b8
>>> prep_new_page+0x28
>>> get_page_from_freelist+0x1edc
>>> __alloc_pages_noprof+0x13c
>>> alloc_slab_page+0x244
>>> allocate_slab+0x7c
>>> ___slab_alloc+0x8e8
>>> kmem_cache_alloc_noprof+0x450
>>> debug_objects_fill_pool+0x22c
>>> debug_object_activate+0x40
>>> enqueue_hrtimer[jt]+0xdc
>>> hrtimer_start_range_ns+0x5f8
>>> ...
>>>
>>> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
>>> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
>>> ---
>>> v1 -> v2:
>>>      propagate gfp flags to set_track_prepare()
>>>
>>> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com
>>> ---
>>>   mm/slub.c | 21 +++++++++++----------
>>>   1 file changed, 11 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/mm/slub.c b/mm/slub.c
>>> index 30003763d224..dba905bf1e03 100644
>>> --- a/mm/slub.c
>>> +++ b/mm/slub.c
>>> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>>>   }
>>>   
>>>   #ifdef CONFIG_STACKDEPOT
>>> -static noinline depot_stack_handle_t set_track_prepare(void)
>>> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>>>   {
>>>   	depot_stack_handle_t handle;
>>>   	unsigned long entries[TRACK_ADDRS_COUNT];
>>>   	unsigned int nr_entries;
>>> +	gfp_flags &= GFP_NOWAIT;
>> 
>> Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
>> direct reclamation?
>> 
>>>   	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
>>> -	handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
>>> +	handle = stack_depot_save(entries, nr_entries, gfp_flags);
>>>   
>>>   	return handle;
>>>   }
>>>   #else
>>> -static inline depot_stack_handle_t set_track_prepare(void)
>>> +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>>>   {
>>>   	return 0;
>>>   }
>>> @@ -4422,7 +4423,7 @@ static noinline void free_to_partial_list(
>>>   	depot_stack_handle_t handle = 0;
>>>   
>>>   	if (s->flags & SLAB_STORE_USER)
>>> -		handle = set_track_prepare();
>>> +		handle = set_track_prepare(GFP_NOWAIT);
>> 
>> I don't think it is safe to use GFP_NOWAIT during free?
>> 
>> Let's say fill_pool() -> kmem_alloc_batch() fails to allocate an object
>> and then free_object_list() frees allocated objects,
>> set_track_prepare(GFP_NOWAIT) may wake up kswapd, and the same deadlock
>> you reported will occur.
>> 
>> So I think it should be __GFP_NOWARN?
>> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-16 10:05   ` yangshiguang
@ 2025-08-16 10:46     ` Harry Yoo
  2025-08-18  2:07       ` yangshiguang
  0 siblings, 1 reply; 11+ messages in thread
From: Harry Yoo @ 2025-08-16 10:46 UTC (permalink / raw)
  To: yangshiguang
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

On Sat, Aug 16, 2025 at 06:05:15PM +0800, yangshiguang wrote:
> 
> 
> At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
> >On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
> >> From: yangshiguang <yangshiguang@xiaomi.com>
> >> 
> >> From: yangshiguang <yangshiguang@xiaomi.com>
> >> 
> >> set_track_prepare() can incur lock recursion.
> >> The issue is that it is called from hrtimer_start_range_ns
> >> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
> >> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
> >> and try to hold the per_cpu(hrtimer_bases)[n].lock.
> >> 
> >> So avoid waking up kswapd.The oops looks something like:
> >
> >Hi yangshiguang, 
> >
> >In the next revision, could you please elaborate the commit message
> >to reflect how this change avoids waking up kswapd?
> >
> 
> of course. Thanks for the reminder.
> 
> >> BUG: spinlock recursion on CPU#3, swapper/3/0
> >>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
> >> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
> >> Call trace:
> >> spin_bug+0x0
> >> _raw_spin_lock_irqsave+0x80
> >> hrtimer_try_to_cancel+0x94
> >> task_contending+0x10c
> >> enqueue_dl_entity+0x2a4
> >> dl_server_start+0x74
> >> enqueue_task_fair+0x568
> >> enqueue_task+0xac
> >> do_activate_task+0x14c
> >> ttwu_do_activate+0xcc
> >> try_to_wake_up+0x6c8
> >> default_wake_function+0x20
> >> autoremove_wake_function+0x1c
> >> __wake_up+0xac
> >> wakeup_kswapd+0x19c
> >> wake_all_kswapds+0x78
> >> __alloc_pages_slowpath+0x1ac
> >> __alloc_pages_noprof+0x298
> >> stack_depot_save_flags+0x6b0
> >> stack_depot_save+0x14
> >> set_track_prepare+0x5c
> >> ___slab_alloc+0xccc
> >> __kmalloc_cache_noprof+0x470
> >> __set_page_owner+0x2bc
> >> post_alloc_hook[jt]+0x1b8
> >> prep_new_page+0x28
> >> get_page_from_freelist+0x1edc
> >> __alloc_pages_noprof+0x13c
> >> alloc_slab_page+0x244
> >> allocate_slab+0x7c
> >> ___slab_alloc+0x8e8
> >> kmem_cache_alloc_noprof+0x450
> >> debug_objects_fill_pool+0x22c
> >> debug_object_activate+0x40
> >> enqueue_hrtimer[jt]+0xdc
> >> hrtimer_start_range_ns+0x5f8
> >> ...
> >> 
> >> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
> >> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
> >> ---
> >> v1 -> v2:
> >>     propagate gfp flags to set_track_prepare()
> >> 
> >> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com 
> >> ---
> >>  mm/slub.c | 21 +++++++++++----------
> >>  1 file changed, 11 insertions(+), 10 deletions(-)
> >> 
> >> diff --git a/mm/slub.c b/mm/slub.c
> >> index 30003763d224..dba905bf1e03 100644
> >> --- a/mm/slub.c
> >> +++ b/mm/slub.c
> >> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
> >>  }
> >>  
> >>  #ifdef CONFIG_STACKDEPOT
> >> -static noinline depot_stack_handle_t set_track_prepare(void)
> >> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
> >>  {
> >>  	depot_stack_handle_t handle;
> >>  	unsigned long entries[TRACK_ADDRS_COUNT];
> >>  	unsigned int nr_entries;
> >> +	gfp_flags &= GFP_NOWAIT;
> >
> >Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
> >direct reclamation?
> >
> 
> Hi Harry,
> 
> The original allocation is GFP_NOWAIT.
> So I think it's better not to increase the allocation cost here.

I don't think the allocation cost is important here, because collecting
a stack trace for each alloc/free is quite slow anyway. And we don't really
care about performance in debug caches (it isn't designed to be
performant).

I think it was GFP_NOWAIT because it was considered safe without
regard to the GFP flags passed, rather than due to performance
considerations.

-- 
Cheers,
Harry / Hyeonggon


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re:Re: Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-16 10:46     ` Harry Yoo
@ 2025-08-18  2:07       ` yangshiguang
  2025-08-18  2:22         ` Harry Yoo
  0 siblings, 1 reply; 11+ messages in thread
From: yangshiguang @ 2025-08-18  2:07 UTC (permalink / raw)
  To: Harry Yoo
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang



At 2025-08-16 18:46:12, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>On Sat, Aug 16, 2025 at 06:05:15PM +0800, yangshiguang wrote:
>> 
>> 
>> At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>> >On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
>> >> From: yangshiguang <yangshiguang@xiaomi.com>
>> >> 
>> >> From: yangshiguang <yangshiguang@xiaomi.com>
>> >> 
>> >> set_track_prepare() can incur lock recursion.
>> >> The issue is that it is called from hrtimer_start_range_ns
>> >> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
>> >> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
>> >> and try to hold the per_cpu(hrtimer_bases)[n].lock.
>> >> 
>> >> So avoid waking up kswapd.The oops looks something like:
>> >
>> >Hi yangshiguang, 
>> >
>> >In the next revision, could you please elaborate the commit message
>> >to reflect how this change avoids waking up kswapd?
>> >
>> 
>> of course. Thanks for the reminder.
>> 
>> >> BUG: spinlock recursion on CPU#3, swapper/3/0
>> >>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
>> >> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
>> >> Call trace:
>> >> spin_bug+0x0
>> >> _raw_spin_lock_irqsave+0x80
>> >> hrtimer_try_to_cancel+0x94
>> >> task_contending+0x10c
>> >> enqueue_dl_entity+0x2a4
>> >> dl_server_start+0x74
>> >> enqueue_task_fair+0x568
>> >> enqueue_task+0xac
>> >> do_activate_task+0x14c
>> >> ttwu_do_activate+0xcc
>> >> try_to_wake_up+0x6c8
>> >> default_wake_function+0x20
>> >> autoremove_wake_function+0x1c
>> >> __wake_up+0xac
>> >> wakeup_kswapd+0x19c
>> >> wake_all_kswapds+0x78
>> >> __alloc_pages_slowpath+0x1ac
>> >> __alloc_pages_noprof+0x298
>> >> stack_depot_save_flags+0x6b0
>> >> stack_depot_save+0x14
>> >> set_track_prepare+0x5c
>> >> ___slab_alloc+0xccc
>> >> __kmalloc_cache_noprof+0x470
>> >> __set_page_owner+0x2bc
>> >> post_alloc_hook[jt]+0x1b8
>> >> prep_new_page+0x28
>> >> get_page_from_freelist+0x1edc
>> >> __alloc_pages_noprof+0x13c
>> >> alloc_slab_page+0x244
>> >> allocate_slab+0x7c
>> >> ___slab_alloc+0x8e8
>> >> kmem_cache_alloc_noprof+0x450
>> >> debug_objects_fill_pool+0x22c
>> >> debug_object_activate+0x40
>> >> enqueue_hrtimer[jt]+0xdc
>> >> hrtimer_start_range_ns+0x5f8
>> >> ...
>> >> 
>> >> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
>> >> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
>> >> ---
>> >> v1 -> v2:
>> >>     propagate gfp flags to set_track_prepare()
>> >> 
>> >> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com 
>> >> ---
>> >>  mm/slub.c | 21 +++++++++++----------
>> >>  1 file changed, 11 insertions(+), 10 deletions(-)
>> >> 
>> >> diff --git a/mm/slub.c b/mm/slub.c
>> >> index 30003763d224..dba905bf1e03 100644
>> >> --- a/mm/slub.c
>> >> +++ b/mm/slub.c
>> >> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>> >>  }
>> >>  
>> >>  #ifdef CONFIG_STACKDEPOT
>> >> -static noinline depot_stack_handle_t set_track_prepare(void)
>> >> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>> >>  {
>> >>  	depot_stack_handle_t handle;
>> >>  	unsigned long entries[TRACK_ADDRS_COUNT];
>> >>  	unsigned int nr_entries;
>> >> +	gfp_flags &= GFP_NOWAIT;
>> >
>> >Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
>> >direct reclamation?
>> >
>> 
>> Hi Harry,
>> 
>> The original allocation is GFP_NOWAIT.
>> So I think it's better not to increase the allocation cost here.
>
>I don't think the allocation cost is important here, because collecting
>a stack trace for each alloc/free is quite slow anyway. And we don't really
>care about performance in debug caches (it isn't designed to be
>performant).
>
>I think it was GFP_NOWAIT because it was considered safe without
>regard to the GFP flags passed, rather than due to performance
>considerations.
>
Hi harry,

Is that so?
gfp_flags &= (GFP_NOWAIT | __GFP_DIRECT_RECLAIM);


>-- 
>Cheers,
>Harry / Hyeonggon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Re: Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-18  2:07       ` yangshiguang
@ 2025-08-18  2:22         ` Harry Yoo
  2025-08-18  2:33           ` yangshiguang
  0 siblings, 1 reply; 11+ messages in thread
From: Harry Yoo @ 2025-08-18  2:22 UTC (permalink / raw)
  To: yangshiguang
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

On Mon, Aug 18, 2025 at 10:07:40AM +0800, yangshiguang wrote:
> 
> 
> At 2025-08-16 18:46:12, "Harry Yoo" <harry.yoo@oracle.com> wrote:
> >On Sat, Aug 16, 2025 at 06:05:15PM +0800, yangshiguang wrote:
> >> 
> >> 
> >> At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
> >> >On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
> >> >> 
> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
> >> >> 
> >> >> set_track_prepare() can incur lock recursion.
> >> >> The issue is that it is called from hrtimer_start_range_ns
> >> >> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
> >> >> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
> >> >> and try to hold the per_cpu(hrtimer_bases)[n].lock.
> >> >> 
> >> >> So avoid waking up kswapd.The oops looks something like:
> >> >
> >> >Hi yangshiguang, 
> >> >
> >> >In the next revision, could you please elaborate the commit message
> >> >to reflect how this change avoids waking up kswapd?
> >> >
> >> 
> >> of course. Thanks for the reminder.
> >> 
> >> >> BUG: spinlock recursion on CPU#3, swapper/3/0
> >> >>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
> >> >> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
> >> >> Call trace:
> >> >> spin_bug+0x0
> >> >> _raw_spin_lock_irqsave+0x80
> >> >> hrtimer_try_to_cancel+0x94
> >> >> task_contending+0x10c
> >> >> enqueue_dl_entity+0x2a4
> >> >> dl_server_start+0x74
> >> >> enqueue_task_fair+0x568
> >> >> enqueue_task+0xac
> >> >> do_activate_task+0x14c
> >> >> ttwu_do_activate+0xcc
> >> >> try_to_wake_up+0x6c8
> >> >> default_wake_function+0x20
> >> >> autoremove_wake_function+0x1c
> >> >> __wake_up+0xac
> >> >> wakeup_kswapd+0x19c
> >> >> wake_all_kswapds+0x78
> >> >> __alloc_pages_slowpath+0x1ac
> >> >> __alloc_pages_noprof+0x298
> >> >> stack_depot_save_flags+0x6b0
> >> >> stack_depot_save+0x14
> >> >> set_track_prepare+0x5c
> >> >> ___slab_alloc+0xccc
> >> >> __kmalloc_cache_noprof+0x470
> >> >> __set_page_owner+0x2bc
> >> >> post_alloc_hook[jt]+0x1b8
> >> >> prep_new_page+0x28
> >> >> get_page_from_freelist+0x1edc
> >> >> __alloc_pages_noprof+0x13c
> >> >> alloc_slab_page+0x244
> >> >> allocate_slab+0x7c
> >> >> ___slab_alloc+0x8e8
> >> >> kmem_cache_alloc_noprof+0x450
> >> >> debug_objects_fill_pool+0x22c
> >> >> debug_object_activate+0x40
> >> >> enqueue_hrtimer[jt]+0xdc
> >> >> hrtimer_start_range_ns+0x5f8
> >> >> ...
> >> >> 
> >> >> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
> >> >> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
> >> >> ---
> >> >> v1 -> v2:
> >> >>     propagate gfp flags to set_track_prepare()
> >> >> 
> >> >> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com 
> >> >> ---
> >> >>  mm/slub.c | 21 +++++++++++----------
> >> >>  1 file changed, 11 insertions(+), 10 deletions(-)
> >> >> 
> >> >> diff --git a/mm/slub.c b/mm/slub.c
> >> >> index 30003763d224..dba905bf1e03 100644
> >> >> --- a/mm/slub.c
> >> >> +++ b/mm/slub.c
> >> >> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
> >> >>  }
> >> >>  
> >> >>  #ifdef CONFIG_STACKDEPOT
> >> >> -static noinline depot_stack_handle_t set_track_prepare(void)
> >> >> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
> >> >>  {
> >> >>  	depot_stack_handle_t handle;
> >> >>  	unsigned long entries[TRACK_ADDRS_COUNT];
> >> >>  	unsigned int nr_entries;
> >> >> +	gfp_flags &= GFP_NOWAIT;
> >> >
> >> >Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
> >> >direct reclamation?
> >> >
> >> 
> >> Hi Harry,
> >> 
> >> The original allocation is GFP_NOWAIT.
> >> So I think it's better not to increase the allocation cost here.
> >
> >I don't think the allocation cost is important here, because collecting
> >a stack trace for each alloc/free is quite slow anyway. And we don't really
> >care about performance in debug caches (it isn't designed to be
> >performant).
> >
> >I think it was GFP_NOWAIT because it was considered safe without
> >regard to the GFP flags passed, rather than due to performance
> >considerations.
> >
> Hi harry,
> 
> Is that so?
> gfp_flags &= (GFP_NOWAIT | __GFP_DIRECT_RECLAIM);

This still clears gfp flags passed by the caller to the allocator.
Why not use gfp_flags directly without clearing some flags?

-- 
Cheers,
Harry / Hyeonggon


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re:Re: Re: Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-18  2:22         ` Harry Yoo
@ 2025-08-18  2:33           ` yangshiguang
  2025-08-18  3:43             ` Harry Yoo
  0 siblings, 1 reply; 11+ messages in thread
From: yangshiguang @ 2025-08-18  2:33 UTC (permalink / raw)
  To: Harry Yoo
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

[-- Attachment #1: Type: text/plain, Size: 5402 bytes --]




At 2025-08-18 10:22:36, "Harry Yoo" <harry.yoo@oracle.com> wrote:

>On Mon, Aug 18, 2025 at 10:07:40AM +0800, yangshiguang wrote:
>> 
>> 
>> At 2025-08-16 18:46:12, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>> >On Sat, Aug 16, 2025 at 06:05:15PM +0800, yangshiguang wrote:
>> >> 
>> >> 
>> >> At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>> >> >On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
>> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
>> >> >> 
>> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
>> >> >> 
>> >> >> set_track_prepare() can incur lock recursion.
>> >> >> The issue is that it is called from hrtimer_start_range_ns
>> >> >> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
>> >> >> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
>> >> >> and try to hold the per_cpu(hrtimer_bases)[n].lock.
>> >> >> 
>> >> >> So avoid waking up kswapd.The oops looks something like:
>> >> >
>> >> >Hi yangshiguang, 
>> >> >
>> >> >In the next revision, could you please elaborate the commit message
>> >> >to reflect how this change avoids waking up kswapd?
>> >> >
>> >> 
>> >> of course. Thanks for the reminder.
>> >> 
>> >> >> BUG: spinlock recursion on CPU#3, swapper/3/0
>> >> >>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
>> >> >> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
>> >> >> Call trace:
>> >> >> spin_bug+0x0
>> >> >> _raw_spin_lock_irqsave+0x80
>> >> >> hrtimer_try_to_cancel+0x94
>> >> >> task_contending+0x10c
>> >> >> enqueue_dl_entity+0x2a4
>> >> >> dl_server_start+0x74
>> >> >> enqueue_task_fair+0x568
>> >> >> enqueue_task+0xac
>> >> >> do_activate_task+0x14c
>> >> >> ttwu_do_activate+0xcc
>> >> >> try_to_wake_up+0x6c8
>> >> >> default_wake_function+0x20
>> >> >> autoremove_wake_function+0x1c
>> >> >> __wake_up+0xac
>> >> >> wakeup_kswapd+0x19c
>> >> >> wake_all_kswapds+0x78
>> >> >> __alloc_pages_slowpath+0x1ac
>> >> >> __alloc_pages_noprof+0x298
>> >> >> stack_depot_save_flags+0x6b0
>> >> >> stack_depot_save+0x14
>> >> >> set_track_prepare+0x5c
>> >> >> ___slab_alloc+0xccc
>> >> >> __kmalloc_cache_noprof+0x470
>> >> >> __set_page_owner+0x2bc
>> >> >> post_alloc_hook[jt]+0x1b8
>> >> >> prep_new_page+0x28
>> >> >> get_page_from_freelist+0x1edc
>> >> >> __alloc_pages_noprof+0x13c
>> >> >> alloc_slab_page+0x244
>> >> >> allocate_slab+0x7c
>> >> >> ___slab_alloc+0x8e8
>> >> >> kmem_cache_alloc_noprof+0x450
>> >> >> debug_objects_fill_pool+0x22c
>> >> >> debug_object_activate+0x40
>> >> >> enqueue_hrtimer[jt]+0xdc
>> >> >> hrtimer_start_range_ns+0x5f8
>> >> >> ...
>> >> >> 
>> >> >> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
>> >> >> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
>> >> >> ---
>> >> >> v1 -> v2:
>> >> >>     propagate gfp flags to set_track_prepare()
>> >> >> 
>> >> >> [1] https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com 
>> >> >> ---
>> >> >>  mm/slub.c | 21 +++++++++++----------
>> >> >>  1 file changed, 11 insertions(+), 10 deletions(-)
>> >> >> 
>> >> >> diff --git a/mm/slub.c b/mm/slub.c
>> >> >> index 30003763d224..dba905bf1e03 100644
>> >> >> --- a/mm/slub.c
>> >> >> +++ b/mm/slub.c
>> >> >> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>> >> >>  }
>> >> >>  
>> >> >>  #ifdef CONFIG_STACKDEPOT
>> >> >> -static noinline depot_stack_handle_t set_track_prepare(void)
>> >> >> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>> >> >>  {
>> >> >>  	depot_stack_handle_t handle;
>> >> >>  	unsigned long entries[TRACK_ADDRS_COUNT];
>> >> >>  	unsigned int nr_entries;
>> >> >> +	gfp_flags &= GFP_NOWAIT;
>> >> >
>> >> >Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
>> >> >direct reclamation?
>> >> >
>> >> 
>> >> Hi Harry,
>> >> 
>> >> The original allocation is GFP_NOWAIT.
>> >> So I think it's better not to increase the allocation cost here.
>> >
>> >I don't think the allocation cost is important here, because collecting
>> >a stack trace for each alloc/free is quite slow anyway. And we don't really
>> >care about performance in debug caches (it isn't designed to be
>> >performant).
>> >
>> >I think it was GFP_NOWAIT because it was considered safe without
>> >regard to the GFP flags passed, rather than due to performance
>> >considerations.
>> >
>> Hi harry,
>> 
>> Is that so?
>> gfp_flags &= (GFP_NOWAIT | __GFP_DIRECT_RECLAIM);
>
>This still clears gfp flags passed by the caller to the allocator.
>Why not use gfp_flags directly without clearing some flags?

>
Hi Harry,


This introduces new problems.


call stack:
dump_backtrace+0xfc/0x17c
show_stack+0x18/0x28
dump_stack_lvl+0x40/0xc0
dump_stack+0x18/0x24
__might_resched+0x164/0x184
__might_sleep+0x38/0x84
prepare_alloc_pages+0xc0/0x17c
__alloc_pages_noprof+0x130/0x3f8
stack_depot_save_flags+0x5a8/0x6bc
stack_depot_save+0x14/0x24
set_track_prepare+0x64/0x90
___slab_alloc+0xc14/0xc48
__kmalloc_cache_noprof+0x398/0x568
__kthread_create_on_node+0x8c/0x1f0
kthread_create_on_node+0x4c/0x74
create_worker+0xe0/0x298
workqueue_init+0x228/0x324
kernel_init_freeable+0x124/0x1c8
kernel_init+0x20/0x1ac
ret_from_fork+0x10/0x20


Of course there are other problems.
So it is best to limit gtp flags.


>-- 
>Cheers,
>Harry / Hyeonggon

[-- Attachment #2: Type: text/html, Size: 7809 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Re: Re: Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-18  2:33           ` yangshiguang
@ 2025-08-18  3:43             ` Harry Yoo
  2025-08-18 11:25               ` yangshiguang
  0 siblings, 1 reply; 11+ messages in thread
From: Harry Yoo @ 2025-08-18  3:43 UTC (permalink / raw)
  To: yangshiguang
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang

On Mon, Aug 18, 2025 at 10:33:51AM +0800, yangshiguang wrote:
> 
> 
> 
> At 2025-08-18 10:22:36, "Harry Yoo" <harry.yoo@oracle.com> wrote:
> 
> >On Mon, Aug 18, 2025 at 10:07:40AM +0800, yangshiguang wrote:
> >> 
> >> 
> >> At 2025-08-16 18:46:12, "Harry Yoo" <harry.yoo@oracle.com> wrote:
> >> >On Sat, Aug 16, 2025 at 06:05:15PM +0800, yangshiguang wrote:
> >> >> 
> >> >> 
> >> >> At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
> >> >> >On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
> >> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
> >> >> >> 
> >> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
> >> >> >> 
> >> >> >> set_track_prepare() can incur lock recursion.
> >> >> >> The issue is that it is called from hrtimer_start_range_ns
> >> >> >> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
> >> >> >> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
> >> >> >> and try to hold the per_cpu(hrtimer_bases)[n].lock.
> >> >> >> 
> >> >> >> So avoid waking up kswapd.The oops looks something like:
> >> >> >
> >> >> >Hi yangshiguang, 
> >> >> >
> >> >> >In the next revision, could you please elaborate the commit message
> >> >> >to reflect how this change avoids waking up kswapd?
> >> >> >
> >> >> 
> >> >> of course. Thanks for the reminder.
> >> >> 
> >> >> >> BUG: spinlock recursion on CPU#3, swapper/3/0
> >> >> >>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
> >> >> >> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
> >> >> >> Call trace:
> >> >> >> spin_bug+0x0
> >> >> >> _raw_spin_lock_irqsave+0x80
> >> >> >> hrtimer_try_to_cancel+0x94
> >> >> >> task_contending+0x10c
> >> >> >> enqueue_dl_entity+0x2a4
> >> >> >> dl_server_start+0x74
> >> >> >> enqueue_task_fair+0x568
> >> >> >> enqueue_task+0xac
> >> >> >> do_activate_task+0x14c
> >> >> >> ttwu_do_activate+0xcc
> >> >> >> try_to_wake_up+0x6c8
> >> >> >> default_wake_function+0x20
> >> >> >> autoremove_wake_function+0x1c
> >> >> >> __wake_up+0xac
> >> >> >> wakeup_kswapd+0x19c
> >> >> >> wake_all_kswapds+0x78
> >> >> >> __alloc_pages_slowpath+0x1ac
> >> >> >> __alloc_pages_noprof+0x298
> >> >> >> stack_depot_save_flags+0x6b0
> >> >> >> stack_depot_save+0x14
> >> >> >> set_track_prepare+0x5c
> >> >> >> ___slab_alloc+0xccc
> >> >> >> __kmalloc_cache_noprof+0x470
> >> >> >> __set_page_owner+0x2bc
> >> >> >> post_alloc_hook[jt]+0x1b8
> >> >> >> prep_new_page+0x28
> >> >> >> get_page_from_freelist+0x1edc
> >> >> >> __alloc_pages_noprof+0x13c
> >> >> >> alloc_slab_page+0x244
> >> >> >> allocate_slab+0x7c
> >> >> >> ___slab_alloc+0x8e8
> >> >> >> kmem_cache_alloc_noprof+0x450
> >> >> >> debug_objects_fill_pool+0x22c
> >> >> >> debug_object_activate+0x40
> >> >> >> enqueue_hrtimer[jt]+0xdc
> >> >> >> hrtimer_start_range_ns+0x5f8
> >> >> >> ...
> >> >> >> 
> >> >> >> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
> >> >> >> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
> >> >> >> ---
> >> >> >> v1 -> v2:
> >> >> >>     propagate gfp flags to set_track_prepare()
> >> >> >> 
> >> >> >> [1] https://urldefense.com/v3/__https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com__;!!ACWV5N9M2RV99hQ!JMgEQrzDS3VAAKdSyj3ge_ZLG1QWaEHA7hH5uL7_Js06GM5m1sYGVOmJHkiTuOeaiE-IizWyvPNtiwzH291FRIojhPs$  
> >> >> >> ---
> >> >> >>  mm/slub.c | 21 +++++++++++----------
> >> >> >>  1 file changed, 11 insertions(+), 10 deletions(-)
> >> >> >> 
> >> >> >> diff --git a/mm/slub.c b/mm/slub.c
> >> >> >> index 30003763d224..dba905bf1e03 100644
> >> >> >> --- a/mm/slub.c
> >> >> >> +++ b/mm/slub.c
> >> >> >> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
> >> >> >>  }
> >> >> >>  
> >> >> >>  #ifdef CONFIG_STACKDEPOT
> >> >> >> -static noinline depot_stack_handle_t set_track_prepare(void)
> >> >> >> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
> >> >> >>  {
> >> >> >>  	depot_stack_handle_t handle;
> >> >> >>  	unsigned long entries[TRACK_ADDRS_COUNT];
> >> >> >>  	unsigned int nr_entries;
> >> >> >> +	gfp_flags &= GFP_NOWAIT;
> >> >> >
> >> >> >Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
> >> >> >direct reclamation?
> >> >> >
> >> >> 
> >> >> Hi Harry,
> >> >> 
> >> >> The original allocation is GFP_NOWAIT.
> >> >> So I think it's better not to increase the allocation cost here.
> >> >
> >> >I don't think the allocation cost is important here, because collecting
> >> >a stack trace for each alloc/free is quite slow anyway. And we don't really
> >> >care about performance in debug caches (it isn't designed to be
> >> >performant).
> >> >
> >> >I think it was GFP_NOWAIT because it was considered safe without
> >> >regard to the GFP flags passed, rather than due to performance
> >> >considerations.
> >> >
> >> Hi harry,
> >> 
> >> Is that so?
> >> gfp_flags &= (GFP_NOWAIT | __GFP_DIRECT_RECLAIM);
> >
> >This still clears gfp flags passed by the caller to the allocator.
> >Why not use gfp_flags directly without clearing some flags?
> 
> >
> Hi Harry,
> 
> 
> This introduces new problems.
> 
> call stack:
> dump_backtrace+0xfc/0x17c
> show_stack+0x18/0x28
> dump_stack_lvl+0x40/0xc0
> dump_stack+0x18/0x24
> __might_resched+0x164/0x184
> __might_sleep+0x38/0x84
> prepare_alloc_pages+0xc0/0x17c
> __alloc_pages_noprof+0x130/0x3f8
> stack_depot_save_flags+0x5a8/0x6bc
> stack_depot_save+0x14/0x24
> set_track_prepare+0x64/0x90
> ___slab_alloc+0xc14/0xc48
> __kmalloc_cache_noprof+0x398/0x568
> __kthread_create_on_node+0x8c/0x1f0
> kthread_create_on_node+0x4c/0x74
> create_worker+0xe0/0x298
> workqueue_init+0x228/0x324
> kernel_init_freeable+0x124/0x1c8
> kernel_init+0x20/0x1ac
> ret_from_fork+0x10/0x20

Ok, because preemption is disabled in ___slab_alloc(),
blocking allocations are not allowed even when gfp_flags allows it.
So __GFP_DIRECT_RECLAIM should be cleared.

So,

/* Preemption is disabled in ___slab_alloc() */
gfp_flags &= ~(__GFP_DIRECT_RECLAIM);

should work?

> Of course there are other problems.
>
> So it is best to limit gtp flags.

-- 
Cheers,
Harry / Hyeonggon


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re:Re: Re: Re: Re: [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare
  2025-08-18  3:43             ` Harry Yoo
@ 2025-08-18 11:25               ` yangshiguang
  0 siblings, 0 replies; 11+ messages in thread
From: yangshiguang @ 2025-08-18 11:25 UTC (permalink / raw)
  To: Harry Yoo
  Cc: vbabka, akpm, cl, rientjes, roman.gushchin, glittao, linux-mm,
	linux-kernel, stable, yangshiguang



At 2025-08-18 11:43:14, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>On Mon, Aug 18, 2025 at 10:33:51AM +0800, yangshiguang wrote:
>> 
>> 
>> 
>> At 2025-08-18 10:22:36, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>> 
>> >On Mon, Aug 18, 2025 at 10:07:40AM +0800, yangshiguang wrote:
>> >> 
>> >> 
>> >> At 2025-08-16 18:46:12, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>> >> >On Sat, Aug 16, 2025 at 06:05:15PM +0800, yangshiguang wrote:
>> >> >> 
>> >> >> 
>> >> >> At 2025-08-16 16:25:25, "Harry Yoo" <harry.yoo@oracle.com> wrote:
>> >> >> >On Thu, Aug 14, 2025 at 07:16:42PM +0800, yangshiguang1011@163.com wrote:
>> >> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
>> >> >> >> 
>> >> >> >> From: yangshiguang <yangshiguang@xiaomi.com>
>> >> >> >> 
>> >> >> >> set_track_prepare() can incur lock recursion.
>> >> >> >> The issue is that it is called from hrtimer_start_range_ns
>> >> >> >> holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
>> >> >> >> CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
>> >> >> >> and try to hold the per_cpu(hrtimer_bases)[n].lock.
>> >> >> >> 
>> >> >> >> So avoid waking up kswapd.The oops looks something like:
>> >> >> >
>> >> >> >Hi yangshiguang, 
>> >> >> >
>> >> >> >In the next revision, could you please elaborate the commit message
>> >> >> >to reflect how this change avoids waking up kswapd?
>> >> >> >
>> >> >> 
>> >> >> of course. Thanks for the reminder.
>> >> >> 
>> >> >> >> BUG: spinlock recursion on CPU#3, swapper/3/0
>> >> >> >>  lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
>> >> >> >> Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
>> >> >> >> Call trace:
>> >> >> >> spin_bug+0x0
>> >> >> >> _raw_spin_lock_irqsave+0x80
>> >> >> >> hrtimer_try_to_cancel+0x94
>> >> >> >> task_contending+0x10c
>> >> >> >> enqueue_dl_entity+0x2a4
>> >> >> >> dl_server_start+0x74
>> >> >> >> enqueue_task_fair+0x568
>> >> >> >> enqueue_task+0xac
>> >> >> >> do_activate_task+0x14c
>> >> >> >> ttwu_do_activate+0xcc
>> >> >> >> try_to_wake_up+0x6c8
>> >> >> >> default_wake_function+0x20
>> >> >> >> autoremove_wake_function+0x1c
>> >> >> >> __wake_up+0xac
>> >> >> >> wakeup_kswapd+0x19c
>> >> >> >> wake_all_kswapds+0x78
>> >> >> >> __alloc_pages_slowpath+0x1ac
>> >> >> >> __alloc_pages_noprof+0x298
>> >> >> >> stack_depot_save_flags+0x6b0
>> >> >> >> stack_depot_save+0x14
>> >> >> >> set_track_prepare+0x5c
>> >> >> >> ___slab_alloc+0xccc
>> >> >> >> __kmalloc_cache_noprof+0x470
>> >> >> >> __set_page_owner+0x2bc
>> >> >> >> post_alloc_hook[jt]+0x1b8
>> >> >> >> prep_new_page+0x28
>> >> >> >> get_page_from_freelist+0x1edc
>> >> >> >> __alloc_pages_noprof+0x13c
>> >> >> >> alloc_slab_page+0x244
>> >> >> >> allocate_slab+0x7c
>> >> >> >> ___slab_alloc+0x8e8
>> >> >> >> kmem_cache_alloc_noprof+0x450
>> >> >> >> debug_objects_fill_pool+0x22c
>> >> >> >> debug_object_activate+0x40
>> >> >> >> enqueue_hrtimer[jt]+0xdc
>> >> >> >> hrtimer_start_range_ns+0x5f8
>> >> >> >> ...
>> >> >> >> 
>> >> >> >> Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
>> >> >> >> Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
>> >> >> >> ---
>> >> >> >> v1 -> v2:
>> >> >> >>     propagate gfp flags to set_track_prepare()
>> >> >> >> 
>> >> >> >> [1] https://urldefense.com/v3/__https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com__;!!ACWV5N9M2RV99hQ!JMgEQrzDS3VAAKdSyj3ge_ZLG1QWaEHA7hH5uL7_Js06GM5m1sYGVOmJHkiTuOeaiE-IizWyvPNtiwzH291FRIojhPs$  
>> >> >> >> ---
>> >> >> >>  mm/slub.c | 21 +++++++++++----------
>> >> >> >>  1 file changed, 11 insertions(+), 10 deletions(-)
>> >> >> >> 
>> >> >> >> diff --git a/mm/slub.c b/mm/slub.c
>> >> >> >> index 30003763d224..dba905bf1e03 100644
>> >> >> >> --- a/mm/slub.c
>> >> >> >> +++ b/mm/slub.c
>> >> >> >> @@ -962,19 +962,20 @@ static struct track *get_track(struct kmem_cache *s, void *object,
>> >> >> >>  }
>> >> >> >>  
>> >> >> >>  #ifdef CONFIG_STACKDEPOT
>> >> >> >> -static noinline depot_stack_handle_t set_track_prepare(void)
>> >> >> >> +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
>> >> >> >>  {
>> >> >> >>  	depot_stack_handle_t handle;
>> >> >> >>  	unsigned long entries[TRACK_ADDRS_COUNT];
>> >> >> >>  	unsigned int nr_entries;
>> >> >> >> +	gfp_flags &= GFP_NOWAIT;
>> >> >> >
>> >> >> >Is there any reason to downgrade it to GFP_NOWAIT when the gfp flag allows
>> >> >> >direct reclamation?
>> >> >> >
>> >> >> 
>> >> >> Hi Harry,
>> >> >> 
>> >> >> The original allocation is GFP_NOWAIT.
>> >> >> So I think it's better not to increase the allocation cost here.
>> >> >
>> >> >I don't think the allocation cost is important here, because collecting
>> >> >a stack trace for each alloc/free is quite slow anyway. And we don't really
>> >> >care about performance in debug caches (it isn't designed to be
>> >> >performant).
>> >> >
>> >> >I think it was GFP_NOWAIT because it was considered safe without
>> >> >regard to the GFP flags passed, rather than due to performance
>> >> >considerations.
>> >> >
>> >> Hi harry,
>> >> 
>> >> Is that so?
>> >> gfp_flags &= (GFP_NOWAIT | __GFP_DIRECT_RECLAIM);
>> >
>> >This still clears gfp flags passed by the caller to the allocator.
>> >Why not use gfp_flags directly without clearing some flags?
>> 
>> >
>> Hi Harry,
>> 
>> 
>> This introduces new problems.
>> 
>> call stack:
>> dump_backtrace+0xfc/0x17c
>> show_stack+0x18/0x28
>> dump_stack_lvl+0x40/0xc0
>> dump_stack+0x18/0x24
>> __might_resched+0x164/0x184
>> __might_sleep+0x38/0x84
>> prepare_alloc_pages+0xc0/0x17c
>> __alloc_pages_noprof+0x130/0x3f8
>> stack_depot_save_flags+0x5a8/0x6bc
>> stack_depot_save+0x14/0x24
>> set_track_prepare+0x64/0x90
>> ___slab_alloc+0xc14/0xc48
>> __kmalloc_cache_noprof+0x398/0x568
>> __kthread_create_on_node+0x8c/0x1f0
>> kthread_create_on_node+0x4c/0x74
>> create_worker+0xe0/0x298
>> workqueue_init+0x228/0x324
>> kernel_init_freeable+0x124/0x1c8
>> kernel_init+0x20/0x1ac
>> ret_from_fork+0x10/0x20
>
>Ok, because preemption is disabled in ___slab_alloc(),
>blocking allocations are not allowed even when gfp_flags allows it.
>So __GFP_DIRECT_RECLAIM should be cleared.
>
>So,
>
>/* Preemption is disabled in ___slab_alloc() */
>gfp_flags &= ~(__GFP_DIRECT_RECLAIM);
>
>should work?

>

Feedback after testing ASAP.

>> Of course there are other problems.
>>
>> So it is best to limit gtp flags.
>
>-- 
>Cheers,
>Harry / Hyeonggon

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-08-18 11:26 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-14 11:16 [PATCH v2] mm: slub: avoid wake up kswapd in set_track_prepare yangshiguang1011
2025-08-16  8:25 ` Harry Yoo
2025-08-16  9:33   ` Giorgi Tchankvetadze
2025-08-16 10:19     ` yangshiguang
2025-08-16 10:05   ` yangshiguang
2025-08-16 10:46     ` Harry Yoo
2025-08-18  2:07       ` yangshiguang
2025-08-18  2:22         ` Harry Yoo
2025-08-18  2:33           ` yangshiguang
2025-08-18  3:43             ` Harry Yoo
2025-08-18 11:25               ` yangshiguang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).