Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Harry Yoo <harry@kernel.org>
To: "Vlastimil Babka (SUSE)" <vbabka@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Hao Li <hao.li@linux.dev>, Christoph Lameter <cl@gentwo.org>,
	David Rientjes <rientjes@google.com>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Puranjay Mohan <puranjay@kernel.org>,
	Amery Hung <ameryhung@gmail.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Clark Williams <clrkwllms@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Neeraj Upadhyay <neeraj.upadhyay@kernel.org>,
	Joel Fernandes <joelagnelf@nvidia.com>,
	Josh Triplett <josh@joshtriplett.org>,
	Boqun Feng <boqun@kernel.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Lai Jiangshan <jiangshanlai@gmail.com>,
	Zqiang <qiang.zhang@linux.dev>, Pedro Falcato <pfalcato@suse.de>,
	Suren Baghdasaryan <surenb@google.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-rt-devel@lists.linux.dev, rcu@vger.kernel.org,
	bpf@vger.kernel.org
Subject: Re: [PATCH for-next v3 3/9] mm/slab: handle the !allow_spin case in kfree_rcu_sheaf()
Date: Wed, 17 Jun 2026 14:32:52 +0900	[thread overview]
Message-ID: <e8940857-4f71-4a34-ad80-81584f7378c8@kernel.org> (raw)
In-Reply-To: <4d7ebcd5-f3b7-4c73-94f1-b7d3ac12a521@kernel.org>


[-- Attachment #1.1: Type: text/plain, Size: 7894 bytes --]



On 6/16/26 4:55 PM, Vlastimil Babka (SUSE) wrote:
> On 6/15/26 13:05, Harry Yoo (Oracle) wrote:
>> Teach kfree_rcu_sheaf() how to handle the !allow_spin case. Try to get
>> an empty sheaf from pcs->spare or the barn even when spinning is not
>> allowed. Unlike __pcs_replace_full_main(), try harder to allocate
>> an empty sheaf because the fallback path will be more expensive than
>> kfree_nolock().
> 
> You mean that it will try to allocate an empty sheaf, while
> __pcs_replace_full_main() doesn't with allow_spin == false.
> I just noticed the comment there is a bit stale... nevermind.

That function

>> When trylock fails or the kernel observes non-NULL pcs->rcu_free after
>> lock acquisition, free the sheaf instead of putting it to the barn.
>> This is rare and not worth complicating the code.
>>
>> Since call_rcu() cannot be called in an unknown context,
>> kfree_rcu_sheaf() fails when the rcu sheaf becomes full.
>>
>> Signed-off-by: Harry Yoo (Oracle) <harry@kernel.org>
>> ---
>>  mm/slab.h        |  2 +-
>>  mm/slab_common.c |  2 +-
>>  mm/slub.c        | 39 ++++++++++++++++++++++++++++++---------
>>  3 files changed, 32 insertions(+), 11 deletions(-)
>>
>> diff --git a/mm/slab.h b/mm/slab.h
>> index 509f330654b8..b1bd33a16544 100644
>> --- a/mm/slab.h
>> +++ b/mm/slab.h
>> @@ -429,7 +429,7 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s)
>>  	return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
>>  }
>>  
>> -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj);
>> +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin);
>>  void flush_all_rcu_sheaves(void);
>>  void flush_rcu_sheaves_on_cache(struct kmem_cache *s);
>>  
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index b6426d7ceec9..bc1a8ec938d9 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -1605,7 +1605,7 @@ static bool kfree_rcu_sheaf(void *obj)
>>  
>>  	s = slab->slab_cache;
>>  	if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
>> -		return __kfree_rcu_sheaf(s, obj);
>> +		return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true);
>>  
>>  	return false;
>>  }
>> diff --git a/mm/slub.c b/mm/slub.c
>> index 87ca154ccd80..b0d38d515386 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -2815,7 +2815,8 @@ static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s,
>>  	return __alloc_empty_sheaf(s, gfp, alloc_flags, s->sheaf_capacity);
>>  }
>>  
>> -static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
>> +static void __free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf,
>> +			       bool allow_spin)
>>  {
>>  	/*
>>  	 * If the sheaf was created with SLAB_ALLOC_NO_RECURSE flag then its
>> @@ -2827,11 +2828,20 @@ static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
>>  		mark_obj_codetag_empty(sheaf);
>>  
>>  	VM_WARN_ON_ONCE(sheaf->size > 0);
>> -	kfree(sheaf);
>> +
>> +	if (likely(allow_spin))
>> +		kfree(sheaf);
>> +	else
>> +		kfree_nolock(sheaf);
> 
> Hmm what if the sheaf wasn't allocated with kmalloc_nolock()?
> 
>>  	stat(s, SHEAF_FREE);
>>  }
>>  
>> +static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
>> +{
>> +	__free_empty_sheaf(s, sheaf, /* allow_spin = */ true);
>> +}
>> +
>>  static unsigned int
>>  refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
>>  	       unsigned int max);
>> @@ -3132,7 +3142,6 @@ static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn,
>>   * intended action due to a race or cpu migration. Thus they do not check the
>>   * empty or full sheaf limits for simplicity.
>>   */
>> -
>>  static void barn_put_empty_sheaf(struct node_barn *barn, struct slab_sheaf *sheaf)
>>  {
>>  	unsigned long flags;
>> @@ -6065,7 +6074,7 @@ static void rcu_free_sheaf(struct rcu_head *head)
>>   */
>>  static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);
>>  
>> -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>> +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin)
>>  {
>>  	struct slub_percpu_sheaves *pcs;
>>  	struct slab_sheaf *rcu_sheaf;
>> @@ -6081,9 +6090,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>>  	pcs = this_cpu_ptr(s->cpu_sheaves);
>>  
>>  	if (unlikely(!pcs->rcu_free)) {
>> -
>>  		struct slab_sheaf *empty;
>>  		struct node_barn *barn;
>> +		unsigned int alloc_flags = SLAB_ALLOC_DEFAULT;
>> +		gfp_t gfp = GFP_NOWAIT;
>>  
>>  		/* Bootstrap or debug cache, fall back */
>>  		if (unlikely(!cache_has_sheaves(s))) {
>> @@ -6103,7 +6113,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>>  			goto fail;
>>  		}
>>  
>> -		empty = barn_get_empty_sheaf(barn, true);
>> +		empty = barn_get_empty_sheaf(barn, allow_spin);
> 
> Here we might be getting a sheaf allocated previously with kmalloc().

Right.

>>  
>>  		if (empty) {
>>  			pcs->rcu_free = empty;
>> @@ -6112,20 +6122,25 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>>  
>>  		local_unlock(&s->cpu_sheaves->lock);
>>  
>> -		empty = alloc_empty_sheaf(s, GFP_NOWAIT, SLAB_ALLOC_DEFAULT);
>> +		if (unlikely(!allow_spin)) {
>> +			alloc_flags = SLAB_ALLOC_TRYLOCK;
>> +			gfp = 0;
> 
> Maybe __GFP_NOWARN?

Ouch. Good catch. Thanks! Will do.

> Here we would get a sheaf with kmalloc_nolock() so that's ok even if it's
> later freed by someone else by kfree(), right.
> 
>> +		}
>> +
>> +		empty = alloc_empty_sheaf(s, gfp, alloc_flags);
>>  
>>  		if (!empty)
>>  			goto fail;
>>  
>>  		if (!local_trylock(&s->cpu_sheaves->lock)) {
>> -			barn_put_empty_sheaf(barn, empty);
>> +			__free_empty_sheaf(s, empty, allow_spin);
> 
> Well we could still use the barn with allow_spin == true.

Initially I did

if (!__barn_put_empty_sheaf(barn, empty, allow_spin))
	__free_empty_sheaf(s, empty, allow_spin);

but I ended up just calling __free_empty_sheaf() because it's
pretty rare to hit... no strong opinion though.

> But more crucially, here we might be freeing with kfree_nolock() a sheaf
> from the barn previously allocated with kmalloc()?

Well, we don't release and reacquire the local lock when we got an empty
sheaf from the barn, so it doesn't free the sheaf from the barn?

That was indeed very subtle and I got confused :D.

When we free a sheaf in this function, it's always allocated in current
context?

> Maybe we need to track if it's the case and defer-free it or something.
> 
> Also maybe there could be a wrapper kfree_maybe_nolock() (~better name?)
> That means "I want to kfree safely in kfree_nolock() context something that
> MIGHT have been kmalloc()"
> And maybe depending on the debugging options that make kmalloc() ->
> kfree_nolock() incompatible, if those are not enabled, it wouldn't have to
> defer, but proceed normally?

But I really like the idea of supporting kmalloc() -> kfree_nolock(),
and I think it's worth exploring that.

Thanks!

-- 
Cheers,
Harry / Hyeonggon

>>  			goto fail;
>>  		}
>>  
>>  		pcs = this_cpu_ptr(s->cpu_sheaves);
>>  
>>  		if (unlikely(pcs->rcu_free))
>> -			barn_put_empty_sheaf(barn, empty);
>> +			__free_empty_sheaf(s, empty, allow_spin);
>>  		else
>>  			pcs->rcu_free = empty;
>>  	}
>> @@ -6143,6 +6158,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>>  	if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
>>  		rcu_sheaf = NULL;
>>  	} else {
>> +		if (unlikely(!allow_spin)) {
>> +			/* call_rcu() cannot be called in an unknown context */
>> +			rcu_sheaf->size--;
>> +			local_unlock(&s->cpu_sheaves->lock);
>> +			goto fail;
>> +		}
>>  		pcs->rcu_free = NULL;
>>  		rcu_sheaf->node = numa_node_id();
>>  	}

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

  parent reply	other threads:[~2026-06-17  5:33 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-15 11:05 [PATCH for-next v3 0/9] mm/slab: introduce kfree_rcu_nolock() and improve slub_kunit coverage Harry Yoo (Oracle)
2026-06-15 11:05 ` [PATCH for-next v3 1/9] slub_kunit: fall back to SW perf events when HW PMU is not available Harry Yoo (Oracle)
2026-06-15 11:05 ` [PATCH for-next v3 2/9] mm/slab, slub_kunit: register kprobe to trigger _nolock APIs Harry Yoo (Oracle)
2026-06-15 11:05 ` [PATCH for-next v3 3/9] mm/slab: handle the !allow_spin case in kfree_rcu_sheaf() Harry Yoo (Oracle)
     [not found]   ` <4d7ebcd5-f3b7-4c73-94f1-b7d3ac12a521@kernel.org>
2026-06-17  5:32     ` Harry Yoo [this message]
2026-06-17  5:58       ` Vlastimil Babka (SUSE)
2026-06-15 11:05 ` [PATCH for-next v3 4/9] mm/slab: use call_rcu() in unknown context if irqs are enabled Harry Yoo (Oracle)
2026-06-15 11:05 ` [PATCH for-next v3 5/9] mm/slab: extend deferred free mechanism to handle rcu sheaves Harry Yoo (Oracle)
2026-06-15 11:06 ` [PATCH for-next v3 6/9] mm/slab: allow kfree_rcu_sheaf() on PREEMPT_RT Harry Yoo (Oracle)
2026-06-16 17:24   ` Vlastimil Babka (SUSE)
2026-06-17  5:14     ` Harry Yoo
2026-06-17  5:38       ` Vlastimil Babka (SUSE)
2026-06-15 11:06 ` [PATCH for-next v3 7/9] mm/slab: introduce kfree_rcu_nolock() Harry Yoo (Oracle)
2026-06-16 17:28   ` Vlastimil Babka (SUSE)
2026-06-15 11:06 ` [PATCH for-next v3 8/9] mm/slab: introduce struct kfree_rcu_head and use in kfree_rcu_nolock() Harry Yoo (Oracle)
2026-06-16 17:36   ` Vlastimil Babka (SUSE)
2026-06-15 11:06 ` [PATCH for-next v3 9/9] slub_kunit: extend the test for kfree_rcu_nolock() Harry Yoo (Oracle)
2026-06-16 17:38   ` Vlastimil Babka (SUSE)
2026-06-15 11:43 ` [PATCH for-next v3 0/9] mm/slab: introduce kfree_rcu_nolock() and improve slub_kunit coverage Harry Yoo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e8940857-4f71-4a34-ad80-81584f7378c8@kernel.org \
    --to=harry@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=ameryhung@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=boqun@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cl@gentwo.org \
    --cc=clrkwllms@kernel.org \
    --cc=frederic@kernel.org \
    --cc=hao.li@linux.dev \
    --cc=jiangshanlai@gmail.com \
    --cc=joelagnelf@nvidia.com \
    --cc=josh@joshtriplett.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rt-devel@lists.linux.dev \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=neeraj.upadhyay@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=pfalcato@suse.de \
    --cc=puranjay@kernel.org \
    --cc=qiang.zhang@linux.dev \
    --cc=rcu@vger.kernel.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=rostedt@goodmis.org \
    --cc=surenb@google.com \
    --cc=urezki@gmail.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox