[PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths
@ 2026-06-23 11:04 Hao Li
  0 siblings, 0 replies; only message in thread
From: Hao Li @ 2026-06-23 11:04 UTC (permalink / raw)
  To: vbabka, harry
  Cc: akpm, cl, rientjes, roman.gushchin, linux-mm, linux-kernel,
	Hao Li

Currently, alloc_from_pcs() and __slab_alloc_node() both calculate the
NUMA policy independently. Since they are called consecutively in paths
like __kmalloc_nolock_noprof() and slab_alloc_node(), this leads to
redundant code snippets.

Introduce a helper function to resolve the NUMA policy once, eliminating
the duplicated code and reducing execution overhead.

Also remove __slab_alloc_node() function because it is almost empty.
The callers of __slab_alloc_node now call ___slab_alloc() directly.

Additional notes:

  Previously, when slab_strict_numa was enabled, alloc_from_pcs() and
  __slab_alloc_node() could each resolve the task mempolicy, so
  MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE could advance the
  interleave state twice for a single object allocation attempt.

  With this change, the strict NUMA node is resolved once and reused by
  both alloc_from_pcs() and ___slab_alloc().

  This is a behavior change, but it better matches the intent of
  selecting one policy node for one allocation attempt.

Signed-off-by: Hao Li <hao.li@linux.dev>
---
Changes in v2:
  * Use a better function name apply_strict_numa_policy() (Thanks Harry)
  * Remove almost empty function __slab_alloc_node.
  * Add a local variable, strict_node, so the retry path in
    __kmalloc_nolock_noprof() computes the strict NUMA node from the original
    node parameter instead of a previously resolved node value.
---
 mm/slub.c | 45 +++++++++++----------------------------------
 1 file changed, 11 insertions(+), 34 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 62e9cd46916f..fd58bd6abd5e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4516,49 +4516,43 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	/* This could cause an endless loop. Fail instead. */
 	return NULL;
 
 success:
 	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
 		set_track(s, object, TRACK_ALLOC, ac->caller_addr, gfpflags);
 
 	return object;
 }
 
-static void *__slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node,
-			       const struct slab_alloc_context *ac)
+static __always_inline int apply_strict_numa_policy(int node)
 {
-	void *object;
-
 #ifdef CONFIG_NUMA
 	if (static_branch_unlikely(&strict_numa) &&
 			node == NUMA_NO_NODE) {
 
 		struct mempolicy *mpol = current->mempolicy;
 
 		if (mpol) {
 			/*
 			 * Special BIND rule support. If the local node
 			 * is in permitted set then do not redirect
 			 * to a particular node.
 			 * Otherwise we apply the memory policy to get
 			 * the node we need to allocate on.
 			 */
 			if (mpol->mode != MPOL_BIND ||
 					!node_isset(numa_mem_id(), mpol->nodes))
 				node = mempolicy_slab_node();
 		}
 	}
 #endif
-
-	object = ___slab_alloc(s, gfpflags, node, ac);
-
-	return object;
+	return node;
 }
 
 static __fastpath_inline
 struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 
 	might_alloc(flags);
 
 	if (unlikely(should_failslab(s, flags)))
@@ -4749,42 +4743,20 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
 	return pcs;
 }
 
 static __fastpath_inline
 void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, unsigned int alloc_flags, int node)
 {
 	struct slub_percpu_sheaves *pcs;
 	bool node_requested;
 	void *object;
 
-#ifdef CONFIG_NUMA
-	if (static_branch_unlikely(&strict_numa) &&
-			 node == NUMA_NO_NODE) {
-
-		struct mempolicy *mpol = current->mempolicy;
-
-		if (mpol) {
-			/*
-			 * Special BIND rule support. If the local node
-			 * is in permitted set then do not redirect
-			 * to a particular node.
-			 * Otherwise we apply the memory policy to get
-			 * the node we need to allocate on.
-			 */
-			if (mpol->mode != MPOL_BIND ||
-					!node_isset(numa_mem_id(), mpol->nodes))
-
-				node = mempolicy_slab_node();
-		}
-	}
-#endif
-
 	node_requested = IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE;
 
 	/*
 	 * We assume the percpu sheaves contain only local objects although it's
 	 * not completely guaranteed, so we verify later.
 	 */
 	if (unlikely(node_requested && node != numa_mem_id())) {
 		stat(s, ALLOC_NODE_MISMATCH);
 		return NULL;
 	}
@@ -4920,24 +4892,26 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s,
 	void *object;
 
 	s = slab_pre_alloc_hook(s, gfpflags);
 	if (unlikely(!s))
 		return NULL;
 
 	object = kfence_alloc(s, ac->orig_size, gfpflags);
 	if (unlikely(object))
 		goto out;
 
+	node = apply_strict_numa_policy(node);
+
 	object = alloc_from_pcs(s, gfpflags, ac->alloc_flags, node);
 
 	if (unlikely(!object))
-		object = __slab_alloc_node(s, gfpflags, node, ac);
+		object = ___slab_alloc(s, gfpflags, node, ac);
 
 	maybe_wipe_obj_freeptr(s, object);
 
 out:
 	/*
 	 * In case this fails due to memcg_slab_post_alloc_hook(),
 	 * object is set to NULL
 	 */
 	slab_post_alloc_hook(s, gfpflags, 1, &object, ac);
 
@@ -5385,20 +5359,21 @@ void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags)
 				 PASS_TOKEN_PARAM(token), &ac);
 }
 EXPORT_SYMBOL(__kmalloc_noprof);
 
 static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags,
 				     int node, const struct slab_alloc_context *ac)
 {
 	struct kmem_cache *s;
 	bool can_retry = true;
 	void *ret;
+	int strict_node;
 
 	VM_WARN_ON_ONCE(alloc_flags_allow_spinning(ac->alloc_flags));
 	VM_WARN_ON_ONCE(gfp_flags & ~(__GFP_ACCOUNT | __GFP_ZERO |
 				      __GFP_NOWARN | __GFP_NOMEMALLOC));
 
 	gfp_flags |= __GFP_NOWARN | __GFP_NOMEMALLOC;
 
 	if (unlikely(!size))
 		return ZERO_SIZE_PTR;
 
@@ -5423,31 +5398,33 @@ static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_f
 		 * kmalloc_nolock() is not supported on architectures that
 		 * don't implement cmpxchg16b and thus need slab_lock()
 		 * which could be preempted by a nmi.
 		 * But debug caches don't use that and only rely on
 		 * kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
 		 * to allocate from debug caches by
 		 * spin_trylock_irqsave(&n->list_lock, ...)
 		 */
 		return NULL;
 
-	ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, node);
+	strict_node = apply_strict_numa_policy(node);
+
+	ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, strict_node);
 	if (ret)
 		goto success;
 
 	/*
 	 * Do not call slab_alloc_node(), since trylock mode isn't
 	 * compatible with slab_pre_alloc_hook/should_failslab and
-	 * kfence_alloc. Hence call __slab_alloc_node() (at most twice)
+	 * kfence_alloc. Hence call ___slab_alloc() (at most twice)
 	 * and slab_post_alloc_hook() directly.
 	 */
-	ret = __slab_alloc_node(s, gfp_flags, node, ac);
+	ret = ___slab_alloc(s, gfp_flags, strict_node, ac);
 
 	/*
 	 * It's possible we failed due to trylock as we preempted someone with
 	 * the sheaves locked, and the list_lock is also held by another cpu.
 	 * But it should be rare that multiple kmalloc buckets would have
 	 * sheaves locked, so try a larger one.
 	 */
 	if (!ret && can_retry) {
 		/* pick the next kmalloc bucket */
 		size = s->object_size + 1;
-- 
2.54.0



^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-06-23 11:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-23 11:04 [PATCH v2] mm/slub: deduplicate NUMA policy calculation in allocation paths Hao Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.