The Linux Kernel Mailing List
 help / color / mirror / Atom feed
* [PATCH v3] mm/slub: deduplicate NUMA policy calculation in allocation paths
@ 2026-06-24 10:00 Hao Li
  0 siblings, 0 replies; only message in thread
From: Hao Li @ 2026-06-24 10:00 UTC (permalink / raw)
  To: vbabka, harry
  Cc: akpm, cl, rientjes, roman.gushchin, linux-mm, linux-kernel,
	Hao Li

Currently, alloc_from_pcs() and __slab_alloc_node() both calculate the
NUMA policy independently. Since they are called consecutively in paths
like __kmalloc_nolock_noprof() and slab_alloc_node(), this leads to
redundant code snippets.

Introduce a helper function to resolve the NUMA policy once, eliminating
the duplicated code and reducing execution overhead.

Also remove __slab_alloc_node() function because it is almost empty.
The callers of __slab_alloc_node now call ___slab_alloc() directly.

Additional notes:

  Previously, when slab_strict_numa was enabled, alloc_from_pcs() and
  __slab_alloc_node() could each resolve the task mempolicy, so
  MPOL_INTERLEAVE or MPOL_WEIGHTED_INTERLEAVE could advance the
  interleave state twice for a single object allocation attempt.
  And each retry will also advance the interleave state.

  With this change, the strict NUMA node is resolved once and reused by
  both alloc_from_pcs() and ___slab_alloc() in each retry.

  This is a behavior change, but it better matches the intent of
  selecting one policy node for one allocation attempt.

Signed-off-by: Hao Li <hao.li@linux.dev>
---
Changes in v3:
  * Move apply_strict_numa_policy before retry label to simplify code (Thanks
    Harry)

Changes in v2:
  * Use a better function name apply_strict_numa_policy() (Thanks Harry)
  * Remove almost empty function __slab_alloc_node.
  * Add a local variable, strict_node, so the retry path in
    __kmalloc_nolock_noprof() computes the strict NUMA node from the original
    node parameter instead of a previously resolved node value.
---
 mm/slub.c | 42 +++++++++---------------------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 62e9cd46916f..ba969ad1db8b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4516,49 +4516,43 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	/* This could cause an endless loop. Fail instead. */
 	return NULL;
 
 success:
 	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
 		set_track(s, object, TRACK_ALLOC, ac->caller_addr, gfpflags);
 
 	return object;
 }
 
-static void *__slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node,
-			       const struct slab_alloc_context *ac)
+static __always_inline int apply_strict_numa_policy(int node)
 {
-	void *object;
-
 #ifdef CONFIG_NUMA
 	if (static_branch_unlikely(&strict_numa) &&
 			node == NUMA_NO_NODE) {
 
 		struct mempolicy *mpol = current->mempolicy;
 
 		if (mpol) {
 			/*
 			 * Special BIND rule support. If the local node
 			 * is in permitted set then do not redirect
 			 * to a particular node.
 			 * Otherwise we apply the memory policy to get
 			 * the node we need to allocate on.
 			 */
 			if (mpol->mode != MPOL_BIND ||
 					!node_isset(numa_mem_id(), mpol->nodes))
 				node = mempolicy_slab_node();
 		}
 	}
 #endif
-
-	object = ___slab_alloc(s, gfpflags, node, ac);
-
-	return object;
+	return node;
 }
 
 static __fastpath_inline
 struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 
 	might_alloc(flags);
 
 	if (unlikely(should_failslab(s, flags)))
@@ -4749,42 +4743,20 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
 	return pcs;
 }
 
 static __fastpath_inline
 void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, unsigned int alloc_flags, int node)
 {
 	struct slub_percpu_sheaves *pcs;
 	bool node_requested;
 	void *object;
 
-#ifdef CONFIG_NUMA
-	if (static_branch_unlikely(&strict_numa) &&
-			 node == NUMA_NO_NODE) {
-
-		struct mempolicy *mpol = current->mempolicy;
-
-		if (mpol) {
-			/*
-			 * Special BIND rule support. If the local node
-			 * is in permitted set then do not redirect
-			 * to a particular node.
-			 * Otherwise we apply the memory policy to get
-			 * the node we need to allocate on.
-			 */
-			if (mpol->mode != MPOL_BIND ||
-					!node_isset(numa_mem_id(), mpol->nodes))
-
-				node = mempolicy_slab_node();
-		}
-	}
-#endif
-
 	node_requested = IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE;
 
 	/*
 	 * We assume the percpu sheaves contain only local objects although it's
 	 * not completely guaranteed, so we verify later.
 	 */
 	if (unlikely(node_requested && node != numa_mem_id())) {
 		stat(s, ALLOC_NODE_MISMATCH);
 		return NULL;
 	}
@@ -4920,24 +4892,26 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s,
 	void *object;
 
 	s = slab_pre_alloc_hook(s, gfpflags);
 	if (unlikely(!s))
 		return NULL;
 
 	object = kfence_alloc(s, ac->orig_size, gfpflags);
 	if (unlikely(object))
 		goto out;
 
+	node = apply_strict_numa_policy(node);
+
 	object = alloc_from_pcs(s, gfpflags, ac->alloc_flags, node);
 
 	if (unlikely(!object))
-		object = __slab_alloc_node(s, gfpflags, node, ac);
+		object = ___slab_alloc(s, gfpflags, node, ac);
 
 	maybe_wipe_obj_freeptr(s, object);
 
 out:
 	/*
 	 * In case this fails due to memcg_slab_post_alloc_hook(),
 	 * object is set to NULL
 	 */
 	slab_post_alloc_hook(s, gfpflags, 1, &object, ac);
 
@@ -5406,20 +5380,22 @@ static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_f
 	 * See the comment for the same check in
 	 * alloc_frozen_pages_nolock_noprof()
 	 */
 	if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
 		return NULL;
 
 	/* On UP, spin_trylock() always succeeds even when it is locked */
 	if (!IS_ENABLED(CONFIG_SMP) && in_nmi())
 		return NULL;
 
+	node = apply_strict_numa_policy(node);
+
 retry:
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
 		return NULL;
 	s = kmalloc_slab(size, NULL, gfp_flags, PASS_TOKEN_PARAM(token));
 
 	if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
 		/*
 		 * kmalloc_nolock() is not supported on architectures that
 		 * don't implement cmpxchg16b and thus need slab_lock()
 		 * which could be preempted by a nmi.
@@ -5430,24 +5406,24 @@ static void *__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_f
 		 */
 		return NULL;
 
 	ret = alloc_from_pcs(s, gfp_flags, ac->alloc_flags, node);
 	if (ret)
 		goto success;
 
 	/*
 	 * Do not call slab_alloc_node(), since trylock mode isn't
 	 * compatible with slab_pre_alloc_hook/should_failslab and
-	 * kfence_alloc. Hence call __slab_alloc_node() (at most twice)
+	 * kfence_alloc. Hence call ___slab_alloc() (at most twice)
 	 * and slab_post_alloc_hook() directly.
 	 */
-	ret = __slab_alloc_node(s, gfp_flags, node, ac);
+	ret = ___slab_alloc(s, gfp_flags, node, ac);
 
 	/*
 	 * It's possible we failed due to trylock as we preempted someone with
 	 * the sheaves locked, and the list_lock is also held by another cpu.
 	 * But it should be rare that multiple kmalloc buckets would have
 	 * sheaves locked, so try a larger one.
 	 */
 	if (!ret && can_retry) {
 		/* pick the next kmalloc bucket */
 		size = s->object_size + 1;
-- 
2.54.0


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-06-24 10:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-24 10:00 [PATCH v3] mm/slub: deduplicate NUMA policy calculation in allocation paths Hao Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox