From: Hao Li <hao.li@linux.dev>
To: vbabka@kernel.org, harry@kernel.org
Cc: akpm@linux-foundation.org, cl@gentwo.org, rientjes@google.com,
roman.gushchin@linux.dev, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, Hao Li <hao.li@linux.dev>
Subject: [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths
Date: Tue, 23 Jun 2026 19:04:02 +0800 [thread overview]
Message-ID: <20260623110952.411041-1-hao.li@linux.dev> (raw)
Currently, alloc_from_pcs() and __slab_alloc_node() both calculate the
NUMA policy independently. Since they are called consecutively in paths
like __kmalloc_nolock_noprof() and slab_alloc_node(), this leads to
redundant code snippets.
Introduce a helper function to resolve the NUMA policy once, eliminating
the duplicated code and reducing execution overhead.
Also remove __slab_alloc_node() function because it is almost empty.
The callers of __slab_alloc_node now call ___slab_alloc() directly.
Additional notes:
Previously, when slab_strict_numa was enabled, alloc_from_pcs() and
__slab_alloc_node() could each resolve the task mempolicy, so
MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE could advance the
interleave state twice for a single object allocation attempt.
With this change, the strict NUMA node is resolved once and reused by
both alloc_from_pcs() and ___slab_alloc().
This is a behavior change, but it better matches the intent of
selecting one policy node for one allocation attempt.
Signed-off-by: Hao Li <hao.li@linux.dev>
---
Changes in v2:
* Use a better function name apply_strict_numa_policy() (Thanks Harry)
* Remove almost empty function __slab_alloc_node.
* Add a local variable, strict_node, so the retry path in
__kmalloc_nolock_noprof() computes the strict NUMA node from the original
node parameter instead of a previously resolved node value.
---
mm/slub.c | 45 +++++++++++----------------------------------
1 file changed, 11 insertions(+), 34 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 62e9cd46916f..fd58bd6abd5e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4516,49 +4516,43 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
/* This could cause an endless loop. Fail instead. */
return NULL;
success:
if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
set_track(s, object, TRACK_ALLOC, ac->caller_addr, gfpflags);
return object;
}
-static void *__slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node,
- const struct slab_alloc_context *ac)
+static __always_inline int apply_strict_numa_policy(int node)
{
- void *object;
-
#ifdef CONFIG_NUMA
if (static_branch_unlikely(&strict_numa) &&
node == NUMA_NO_NODE) {
struct mempolicy *mpol = current->mempolicy;
if (mpol) {
/*
* Special BIND rule support. If the local node
* is in permitted set then do not redirect
* to a particular node.
* Otherwise we apply the memory policy to get
* the node we need to allocate on.
*/
if (mpol->mode != MPOL_BIND ||
!node_isset(numa_mem_id(), mpol->nodes))
node = mempolicy_slab_node();
}
}
#endif
-
- object = ___slab_alloc(s, gfpflags, node, ac);
-
- return object;
+ return node;
}
static __fastpath_inline
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
flags &= gfp_allowed_mask;
might_alloc(flags);
if (unlikely(should_failslab(s, flags)))
@@ -4749,42 +4743,20 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
return pcs;
}
static __fastpath_inline
void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, unsigned int alloc_flags, int node)
{
struct slub_percpu_sheaves *pcs;
bool node_requested;
void *object;
-#ifdef CONFIG_NUMA
- if (static_branch_unlikely(&strict_numa) &&
- node == NUMA_NO_NODE) {
-
- struct mempolicy *mpol = current->mempolicy;
-
- if (mpol) {
- /*
- * Special BIND rule support. If the local node
- * is in permitted set then do not redirect
- * to a particular node.
- * Otherwise we apply the memory policy to get
- * the node we need to allocate on.
- */
- if (mpol->mode != MPOL_BIND ||
- !node_isset(numa_mem_id(), mpol->nodes))
-
- node = mempolicy_slab_node();
- }
- }
-#endif
-
node_requested = IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE;
/*
* We assume the percpu sheaves contain only local objects although it's
* not completely guaranteed, so we verify later.
*/
if (unlikely(node_requested && node != numa_mem_id())) {
stat(s, ALLOC_NODE_MISMATCH);
return NULL;
}
@@ -4920,24 +4892,26 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s,
void *object;
s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
return NULL;
object = kfence_alloc(s, ac->orig_size, gfpflags);
if (unlikely(object))
goto out;
+ node = apply_strict_numa_policy(node);
+
object = alloc_from_pcs(s, gfpflags, ac->alloc_flags, node);
if (unlikely(!object))
- object = __slab_alloc_node(s, gfpflags, node, ac);
+ object = ___slab_alloc(s, gfpflags, node, ac);
maybe_wipe_obj_freeptr(s, object);
out:
/*
* In case this fails due to memcg_slab_post_alloc_hook(),
* object is set to NULL
*/
slab_post_alloc_hook(s, gfpflags, 1, &object, ac);
@@ -5385,20 +5359,21 @@ void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags)
PASS_TOKEN_PARAM(token), &ac);
}
EXPORT_SYMBOL(__kmalloc_noprof);
static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags,
int node, const struct slab_alloc_context *ac)
{
struct kmem_cache *s;
bool can_retry = true;
void *ret;
+ int strict_node;
VM_WARN_ON_ONCE(alloc_flags_allow_spinning(ac->alloc_flags));
VM_WARN_ON_ONCE(gfp_flags & ~(__GFP_ACCOUNT | __GFP_ZERO |
__GFP_NOWARN | __GFP_NOMEMALLOC));
gfp_flags |= __GFP_NOWARN | __GFP_NOMEMALLOC;
if (unlikely(!size))
return ZERO_SIZE_PTR;
@@ -5423,31 +5398,33 @@ static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_f
* kmalloc_nolock() is not supported on architectures that
* don't implement cmpxchg16b and thus need slab_lock()
* which could be preempted by a nmi.
* But debug caches don't use that and only rely on
* kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
* to allocate from debug caches by
* spin_trylock_irqsave(&n->list_lock, ...)
*/
return NULL;
- ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, node);
+ strict_node = apply_strict_numa_policy(node);
+
+ ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, strict_node);
if (ret)
goto success;
/*
* Do not call slab_alloc_node(), since trylock mode isn't
* compatible with slab_pre_alloc_hook/should_failslab and
- * kfence_alloc. Hence call __slab_alloc_node() (at most twice)
+ * kfence_alloc. Hence call ___slab_alloc() (at most twice)
* and slab_post_alloc_hook() directly.
*/
- ret = __slab_alloc_node(s, gfp_flags, node, ac);
+ ret = ___slab_alloc(s, gfp_flags, strict_node, ac);
/*
* It's possible we failed due to trylock as we preempted someone with
* the sheaves locked, and the list_lock is also held by another cpu.
* But it should be rare that multiple kmalloc buckets would have
* sheaves locked, so try a larger one.
*/
if (!ret && can_retry) {
/* pick the next kmalloc bucket */
size = s->object_size + 1;
--
2.54.0
reply other threads:[~2026-06-23 11:10 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260623110952.411041-1-hao.li@linux.dev \
--to=hao.li@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=cl@gentwo.org \
--cc=harry@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=vbabka@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox