* [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths
@ 2026-06-23 11:04 Hao Li
2026-06-24 5:14 ` Harry Yoo
0 siblings, 1 reply; 3+ messages in thread
From: Hao Li @ 2026-06-23 11:04 UTC (permalink / raw)
To: vbabka, harry
Cc: akpm, cl, rientjes, roman.gushchin, linux-mm, linux-kernel,
Hao Li
Currently, alloc_from_pcs() and __slab_alloc_node() both calculate the
NUMA policy independently. Since they are called consecutively in paths
like __kmalloc_nolock_noprof() and slab_alloc_node(), this leads to
redundant code snippets.
Introduce a helper function to resolve the NUMA policy once, eliminating
the duplicated code and reducing execution overhead.
Also remove __slab_alloc_node() function because it is almost empty.
The callers of __slab_alloc_node now call ___slab_alloc() directly.
Additional notes:
Previously, when slab_strict_numa was enabled, alloc_from_pcs() and
__slab_alloc_node() could each resolve the task mempolicy, so
MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE could advance the
interleave state twice for a single object allocation attempt.
With this change, the strict NUMA node is resolved once and reused by
both alloc_from_pcs() and ___slab_alloc().
This is a behavior change, but it better matches the intent of
selecting one policy node for one allocation attempt.
Signed-off-by: Hao Li <hao.li@linux.dev>
---
Changes in v2:
* Use a better function name apply_strict_numa_policy() (Thanks Harry)
* Remove almost empty function __slab_alloc_node.
* Add a local variable, strict_node, so the retry path in
__kmalloc_nolock_noprof() computes the strict NUMA node from the original
node parameter instead of a previously resolved node value.
---
mm/slub.c | 45 +++++++++++----------------------------------
1 file changed, 11 insertions(+), 34 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 62e9cd46916f..fd58bd6abd5e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4516,49 +4516,43 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
/* This could cause an endless loop. Fail instead. */
return NULL;
success:
if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
set_track(s, object, TRACK_ALLOC, ac->caller_addr, gfpflags);
return object;
}
-static void *__slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node,
- const struct slab_alloc_context *ac)
+static __always_inline int apply_strict_numa_policy(int node)
{
- void *object;
-
#ifdef CONFIG_NUMA
if (static_branch_unlikely(&strict_numa) &&
node == NUMA_NO_NODE) {
struct mempolicy *mpol = current->mempolicy;
if (mpol) {
/*
* Special BIND rule support. If the local node
* is in permitted set then do not redirect
* to a particular node.
* Otherwise we apply the memory policy to get
* the node we need to allocate on.
*/
if (mpol->mode != MPOL_BIND ||
!node_isset(numa_mem_id(), mpol->nodes))
node = mempolicy_slab_node();
}
}
#endif
-
- object = ___slab_alloc(s, gfpflags, node, ac);
-
- return object;
+ return node;
}
static __fastpath_inline
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
flags &= gfp_allowed_mask;
might_alloc(flags);
if (unlikely(should_failslab(s, flags)))
@@ -4749,42 +4743,20 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
return pcs;
}
static __fastpath_inline
void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, unsigned int alloc_flags, int node)
{
struct slub_percpu_sheaves *pcs;
bool node_requested;
void *object;
-#ifdef CONFIG_NUMA
- if (static_branch_unlikely(&strict_numa) &&
- node == NUMA_NO_NODE) {
-
- struct mempolicy *mpol = current->mempolicy;
-
- if (mpol) {
- /*
- * Special BIND rule support. If the local node
- * is in permitted set then do not redirect
- * to a particular node.
- * Otherwise we apply the memory policy to get
- * the node we need to allocate on.
- */
- if (mpol->mode != MPOL_BIND ||
- !node_isset(numa_mem_id(), mpol->nodes))
-
- node = mempolicy_slab_node();
- }
- }
-#endif
-
node_requested = IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE;
/*
* We assume the percpu sheaves contain only local objects although it's
* not completely guaranteed, so we verify later.
*/
if (unlikely(node_requested && node != numa_mem_id())) {
stat(s, ALLOC_NODE_MISMATCH);
return NULL;
}
@@ -4920,24 +4892,26 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s,
void *object;
s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
return NULL;
object = kfence_alloc(s, ac->orig_size, gfpflags);
if (unlikely(object))
goto out;
+ node = apply_strict_numa_policy(node);
+
object = alloc_from_pcs(s, gfpflags, ac->alloc_flags, node);
if (unlikely(!object))
- object = __slab_alloc_node(s, gfpflags, node, ac);
+ object = ___slab_alloc(s, gfpflags, node, ac);
maybe_wipe_obj_freeptr(s, object);
out:
/*
* In case this fails due to memcg_slab_post_alloc_hook(),
* object is set to NULL
*/
slab_post_alloc_hook(s, gfpflags, 1, &object, ac);
@@ -5385,20 +5359,21 @@ void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags)
PASS_TOKEN_PARAM(token), &ac);
}
EXPORT_SYMBOL(__kmalloc_noprof);
static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags,
int node, const struct slab_alloc_context *ac)
{
struct kmem_cache *s;
bool can_retry = true;
void *ret;
+ int strict_node;
VM_WARN_ON_ONCE(alloc_flags_allow_spinning(ac->alloc_flags));
VM_WARN_ON_ONCE(gfp_flags & ~(__GFP_ACCOUNT | __GFP_ZERO |
__GFP_NOWARN | __GFP_NOMEMALLOC));
gfp_flags |= __GFP_NOWARN | __GFP_NOMEMALLOC;
if (unlikely(!size))
return ZERO_SIZE_PTR;
@@ -5423,31 +5398,33 @@ static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_f
* kmalloc_nolock() is not supported on architectures that
* don't implement cmpxchg16b and thus need slab_lock()
* which could be preempted by a nmi.
* But debug caches don't use that and only rely on
* kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
* to allocate from debug caches by
* spin_trylock_irqsave(&n->list_lock, ...)
*/
return NULL;
- ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, node);
+ strict_node = apply_strict_numa_policy(node);
+
+ ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, strict_node);
if (ret)
goto success;
/*
* Do not call slab_alloc_node(), since trylock mode isn't
* compatible with slab_pre_alloc_hook/should_failslab and
- * kfence_alloc. Hence call __slab_alloc_node() (at most twice)
+ * kfence_alloc. Hence call ___slab_alloc() (at most twice)
* and slab_post_alloc_hook() directly.
*/
- ret = __slab_alloc_node(s, gfp_flags, node, ac);
+ ret = ___slab_alloc(s, gfp_flags, strict_node, ac);
/*
* It's possible we failed due to trylock as we preempted someone with
* the sheaves locked, and the list_lock is also held by another cpu.
* But it should be rare that multiple kmalloc buckets would have
* sheaves locked, so try a larger one.
*/
if (!ret && can_retry) {
/* pick the next kmalloc bucket */
size = s->object_size + 1;
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths
2026-06-23 11:04 [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths Hao Li
@ 2026-06-24 5:14 ` Harry Yoo
2026-06-24 8:17 ` Hao Li
0 siblings, 1 reply; 3+ messages in thread
From: Harry Yoo @ 2026-06-24 5:14 UTC (permalink / raw)
To: Hao Li, vbabka; +Cc: akpm, cl, rientjes, roman.gushchin, linux-mm, linux-kernel
[-- Attachment #1.1: Type: text/plain, Size: 1843 bytes --]
On 6/23/26 8:04 PM, Hao Li wrote:
> Currently, alloc_from_pcs() and __slab_alloc_node() both calculate the
> NUMA policy independently. Since they are called consecutively in paths
> like __kmalloc_nolock_noprof() and slab_alloc_node(), this leads to
> redundant code snippets.
Right.
> Introduce a helper function to resolve the NUMA policy once, eliminating
> the duplicated code and reducing execution overhead.
Nice.
> Also remove __slab_alloc_node() function because it is almost empty.
Nice!
> The callers of __slab_alloc_node now call ___slab_alloc() directly.
>
> Additional notes:
>
> Previously, when slab_strict_numa was enabled, alloc_from_pcs() and
> __slab_alloc_node() could each resolve the task mempolicy, so
> MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE could advance the
> interleave state twice for a single object allocation attempt.
>
> With this change, the strict NUMA node is resolved once and reused by
> both alloc_from_pcs() and ___slab_alloc().
Nice catch!
> This is a behavior change, but it better matches the intent of
> selecting one policy node for one allocation attempt.
Right.
and I think backporting is unnecessary here.
> Signed-off-by: Hao Li <hao.li@linux.dev>
> ---
> Changes in v2:
> * Use a better function name apply_strict_numa_policy() (Thanks Harry)
> * Remove almost empty function __slab_alloc_node.
> * Add a local variable, strict_node, so the retry path in
> __kmalloc_nolock_noprof() computes the strict NUMA node from the original
> node parameter instead of a previously resolved node value.
What about overriding 'node' before retry label instead?
node = apply_strict_numa_policy(node);
[...]
retry:
[...]
Otherwise LGTM.
--
Cheers,
Harry / Hyeonggon
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths
2026-06-24 5:14 ` Harry Yoo
@ 2026-06-24 8:17 ` Hao Li
0 siblings, 0 replies; 3+ messages in thread
From: Hao Li @ 2026-06-24 8:17 UTC (permalink / raw)
To: Harry Yoo
Cc: vbabka, akpm, cl, rientjes, roman.gushchin, linux-mm,
linux-kernel
On Wed, Jun 24, 2026 at 02:14:55PM +0900, Harry Yoo wrote:
>
>
> On 6/23/26 8:04 PM, Hao Li wrote:
> > Currently, alloc_from_pcs() and __slab_alloc_node() both calculate the
> > NUMA policy independently. Since they are called consecutively in paths
> > like __kmalloc_nolock_noprof() and slab_alloc_node(), this leads to
> > redundant code snippets.
>
> Right.
>
> > Introduce a helper function to resolve the NUMA policy once, eliminating
> > the duplicated code and reducing execution overhead.
>
> Nice.
>
> > Also remove __slab_alloc_node() function because it is almost empty.
>
> Nice!
>
> > The callers of __slab_alloc_node now call ___slab_alloc() directly.
> >
> > Additional notes:
> >
> > Previously, when slab_strict_numa was enabled, alloc_from_pcs() and
> > __slab_alloc_node() could each resolve the task mempolicy, so
> > MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE could advance the
> > interleave state twice for a single object allocation attempt.
> >
> > With this change, the strict NUMA node is resolved once and reused by
> > both alloc_from_pcs() and ___slab_alloc().
>
> Nice catch!
>
> > This is a behavior change, but it better matches the intent of
> > selecting one policy node for one allocation attempt.
>
> Right.
>
> and I think backporting is unnecessary here.
>
> > Signed-off-by: Hao Li <hao.li@linux.dev>
> > ---
> > Changes in v2:
> > * Use a better function name apply_strict_numa_policy() (Thanks Harry)
> > * Remove almost empty function __slab_alloc_node.
> > * Add a local variable, strict_node, so the retry path in
> > __kmalloc_nolock_noprof() computes the strict NUMA node from the original
> > node parameter instead of a previously resolved node value.
>
> What about overriding 'node' before retry label instead?
>
> node = apply_strict_numa_policy(node);
> [...]
> retry:
> [...]
>
I agree! I originally used a separate strict_node variable mostly to keep the
old retry behavior intact. But looking at it now, the retry is really just an
internal fallback for the same requested object allocation. There's no strong
reason to advance the MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE state again
on each retry.
> Otherwise LGTM.
Thanks!
--
Thanks,
Hao
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-06-24 8:17 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-23 11:04 [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths Hao Li
2026-06-24 5:14 ` Harry Yoo
2026-06-24 8:17 ` Hao Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.