Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Harry Yoo <harry@kernel.org>
To: Suren Baghdasaryan <surenb@google.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>,
	"Vlastimil Babka (SUSE)" <vbabka@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Hao Li <hao.li@linux.dev>, Christoph Lameter <cl@gentwo.org>,
	David Rientjes <rientjes@google.com>,
	Usama Arif <usama.arif@linux.dev>,
	Meta kernel team <kernel-team@meta.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Danielle Costantino <dcostantino@meta.com>,
	Kees Cook <kees@kernel.org>
Subject: Re: [PATCH] mm/slub: serve slabobj_ext array from a strictly larger kmalloc cache
Date: Wed, 1 Jul 2026 16:42:13 +0900	[thread overview]
Message-ID: <68e7f6cd-cf11-46b2-84a2-d512bb22dae4@kernel.org> (raw)
In-Reply-To: <92bf5e21-690e-4a77-929e-5217e0d7cb0c@kernel.org>



On 7/1/26 1:53 PM, Harry Yoo wrote:
> 
> 
> On 7/1/26 1:30 PM, Harry Yoo wrote:
>> We can do that in pre-7.2 kernels, by teaching kmalloc_type() and
>> kmalloc_slab() select the new KMALLOC_TYPE based on __GFP_NO_OBJ_EXT?
>>
>> e.g.) Select the new KMALLOC_TYPE when KMALLOC_NOT_NORMAL_BITS is not
>> set AND __GFP_NO_OBJ_EXT is set.
> 
> Uh, this is bit subtle though.
> 
> In some cases  KMALLOC_DMA == KMALLOC_NORMAL,
> KMALLOC_CGROUP == KMALLOC_NORMAL,
> or KMALLOC_RECLAIM == KMALLOC_NORMAL.
> 
> Just checking KMALLOC_NOT_NORMAL_BITS is misleading.

Here's a prototype for slab/for-next. Backporting it requires handling
__GFP_NO_OBJ_EXT instead of SLAB_ALLOC_NO_RECURSE, but shouldn't be
too difficult. Now writing changelog and going through testing...

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 51f03f18c9a7..91a71537a2fe 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -684,6 +684,26 @@ static inline unsigned int arch_slab_minalign(void)
 #define KMALLOC_PARTITION_CACHES_NR	0
 #endif

+/*
+ * SLUB needs a separate kmalloc type, KMALLOC_NO_RECURSE, when
internal slab
+ * metadata of kmalloc objects can be allocated from the same kmalloc type.
+ */
+#if defined(CONFIG_MEM_ALLOC_PROFILING)
+/*
+ * Memory allocation profiling can allocate internal slab metadata
+ * for any slab cache.
+ */
+#define HAS_KMALLOC_NO_RECURSE
+#elif defined(CONFIG_SLUB_TINY) && defined(CONFIG_MEMCG)
+/*
+ * Accounted slab objects are usually allocated from KMALLOC_CGROUP.
+ * On SLUB_TINY, those can be allocated from KMALLOC_NORMAL because
+ * KMALLOC_RECLAIM aliases with KMALLOC_CGROUP and has higher priority than
+ * KMALLOC_CGROUP.
+ */
+#define HAS_KMALLOC_NO_RECURSE
+#endif
+
 /*
  * Whenever changing this, take care of that kmalloc_type() and
  * create_kmalloc_caches() still work as intended.
@@ -702,6 +722,9 @@ enum kmalloc_cache_type {
 #endif
 	KMALLOC_PARTITION_START = KMALLOC_NORMAL,
 	KMALLOC_PARTITION_END = KMALLOC_PARTITION_START +
KMALLOC_PARTITION_CACHES_NR,
+#ifdef HAS_KMALLOC_NO_RECURSE
+	KMALLOC_NO_RECURSE,
+#endif
 #ifdef CONFIG_SLUB_TINY
 	KMALLOC_RECLAIM = KMALLOC_NORMAL,
 #else
@@ -716,6 +739,16 @@ enum kmalloc_cache_type {
 	NR_KMALLOC_TYPES
 };

+#if !defined(HAS_KMALLOC_NO_RECURSE) && defined(CONFIG_SLAB_OBJ_EXT)
+/*
+ * kmalloc_flags() with SLAB_ALLOC_NO_RECURSE should not use KMALLOC_NORMAL
+ * if any of these alias with KMALLOC_NORMAL.
+ */
+static_assert(KMALLOC_DMA != KMALLOC_NORMAL);
+static_assert(KMALLOC_CGROUP != KMALLOC_NORMAL);
+static_assert(KMALLOC_RECLAIM != KMALLOC_NORMAL);
+#endif
+
 typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];

 extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
diff --git a/mm/slab.h b/mm/slab.h
index 281a65233795..ba0560111488 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -386,12 +386,21 @@ static inline unsigned int
size_index_elem(unsigned int bytes)
  * KMALLOC_MAX_CACHE_SIZE and the caller must check that.
  */
 static inline struct kmem_cache *
-kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, kmalloc_token_t
token)
+kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, kmalloc_token_t
token,
+	     unsigned int alloc_flags)
 {
 	unsigned int index;
+	enum kmalloc_cache_type type = kmalloc_type(flags, token);
+
+#ifdef HAS_KMALLOC_NO_RECURSE
+	if (type >= KMALLOC_PARTITION_START &&
+			type <= KMALLOC_PARTITION_END &&
+			(alloc_flags & SLAB_ALLOC_NO_RECURSE))
+		type = KMALLOC_NO_RECURSE;
+#endif

 	if (!b)
-		b = &kmalloc_caches[kmalloc_type(flags, token)];
+		b = &kmalloc_caches[type];
 	if (size <= 192)
 		index = kmalloc_size_index[size_index_elem(size)];
 	else
diff --git a/mm/slab_common.c b/mm/slab_common.c
index b6426d7ceec9..8541f4a9cfda 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -783,11 +783,15 @@ u8 kmalloc_size_index[24] __ro_after_init = {
 size_t kmalloc_size_roundup(size_t size)
 {
 	if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
+		struct kmem_cache *s;
+
 		/*
 		 * The flags don't matter since size_index is common to all.
 		 * Neither does the caller for just getting ->object_size.
 		 */
-		return kmalloc_slab(size, NULL, GFP_KERNEL,
__kmalloc_token(0))->object_size;
+		s = kmalloc_slab(size, NULL, GFP_KERNEL, __kmalloc_token(0),
+				 SLAB_ALLOC_DEFAULT);
+		return s->object_size;
 	}

 	/* Above the smaller buckets, size is a multiple of page size. */
@@ -843,6 +847,12 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
 #define KMALLOC_PARTITION_NAME(N, sz)
 #endif

+#ifdef HAS_KMALLOC_NO_RECURSE
+#define KMALLOC_NO_RECURSE_NAME(sz) .name[KMALLOC_NO_RECURSE] =
"kmalloc-no-recurse-" #sz,
+#else
+#define KMALLOC_NO_RECURSE_NAME(sz)
+#endif
+
 #define INIT_KMALLOC_INFO(__size, __short_size)			\
 {								\
 	.name[KMALLOC_NORMAL]  = "kmalloc-" #__short_size,	\
@@ -850,6 +860,7 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
 	KMALLOC_CGROUP_NAME(__short_size)			\
 	KMALLOC_DMA_NAME(__short_size)				\
 	KMALLOC_PARTITION_NAME(KMALLOC_PARTITION_CACHES_NR, __short_size)	\
+	KMALLOC_NO_RECURSE_NAME(__short_size)			\
 	.size = __size,						\
 }

@@ -966,6 +977,11 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type
type)
 		flags |= SLAB_NO_MERGE;
 #endif

+#ifdef HAS_KMALLOC_NO_RECURSE
+	if (type == KMALLOC_NO_RECURSE)
+		flags |= SLAB_NO_OBJ_EXT;
+#endif
+
 	/*
 	 * If CONFIG_MEMCG is enabled, disable cache merging for
 	 * KMALLOC_NORMAL caches.
diff --git a/mm/slub.c b/mm/slub.c
index 9f754cf1c187..a5745759f0af 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2123,42 +2123,6 @@ static inline void init_slab_obj_exts(struct slab
*slab)
 	slab->obj_exts = 0;
 }

-/*
- * Calculate the allocation size for slabobj_ext array.
- *
- * When memory allocation profiling is enabled, the obj_exts array
- * could be allocated from the same slab cache it's being allocated for.
- * This would prevent the slab from ever being freed because it would
- * always contain at least one allocated object (its own obj_exts array).
- *
- * To avoid this, increase the allocation size when we detect the array
- * may come from the same cache, forcing it to use a different cache.
- */
-static inline size_t obj_exts_alloc_size(struct kmem_cache *s,
-					 struct slab *slab, gfp_t gfp)
-{
-	size_t sz = sizeof(struct slabobj_ext) * slab->objects;
-	struct kmem_cache *obj_exts_cache;
-
-	if (sz > KMALLOC_MAX_CACHE_SIZE)
-		return sz;
-
-	if (!is_kmalloc_normal(s))
-		return sz;
-
-	obj_exts_cache = kmalloc_slab(sz, NULL, gfp, __kmalloc_token(0));
-	/*
-	 * We can't simply compare s with obj_exts_cache, because partitioned
kmalloc
-	 * caches have multiple caches per size, selected by caller address or
type.
-	 * Since caller address or type may differ between kmalloc_slab() and
actual
-	 * allocation, bump size when sizes are equal.
-	 */
-	if (s->object_size == obj_exts_cache->object_size)
-		return obj_exts_cache->object_size + 1;
-
-	return sz;
-}
-
 int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
 			gfp_t gfp, unsigned int alloc_flags)
 {
@@ -2168,15 +2132,13 @@ int alloc_slab_obj_exts(struct slab *slab,
struct kmem_cache *s,
 	unsigned long new_exts;
 	unsigned long old_exts;
 	struct slabobj_ext *vec;
-	size_t sz;
+	size_t sz = sizeof(struct slabobj_ext) * slab->objects;

 	gfp &= ~OBJCGS_CLEAR_MASK;
 	/* Prevent recursive extension vector allocation */
 	alloc_flags |= SLAB_ALLOC_NO_RECURSE;
 	alloc_flags &= ~SLAB_ALLOC_NEW_SLAB;

-	sz = obj_exts_alloc_size(s, slab, gfp);
-
 	/* This will use kmalloc_nolock() if alloc_flags say so */
 	vec = kmalloc_flags(sz, gfp | __GFP_ZERO, alloc_flags, slab_nid(slab));

@@ -5330,7 +5292,7 @@ void *__do_kmalloc_node(kmem_buckets *b, gfp_t
flags, int node,
 	if (unlikely(!size))
 		return ZERO_SIZE_PTR;

-	s = kmalloc_slab(size, b, flags, token);
+	s = kmalloc_slab(size, b, flags, token, ac->alloc_flags);

 	ret = slab_alloc_node(s, flags, node, ac);
 	ret = kasan_kmalloc(s, ret, size, flags);
@@ -5395,7 +5357,9 @@ static void
*__kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_f
 retry:
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
 		return NULL;
-	s = kmalloc_slab(size, NULL, gfp_flags, PASS_TOKEN_PARAM(token));
+
+	s = kmalloc_slab(size, NULL, gfp_flags, PASS_TOKEN_PARAM(token),
+			 ac->alloc_flags);

 	if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
 		/*


-- 
Cheers,
Harry / Hyeonggon


  reply	other threads:[~2026-07-01  7:42 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-25 23:00 [PATCH] mm/slub: serve slabobj_ext array from a strictly larger kmalloc cache Shakeel Butt
2026-06-26  4:22 ` Harry Yoo
2026-06-26 16:49   ` Shakeel Butt
2026-06-26 17:11     ` Vlastimil Babka (SUSE)
2026-06-28  2:58       ` Shakeel Butt
2026-06-28  3:23         ` Shakeel Butt
2026-06-28  7:47           ` Vlastimil Babka (SUSE)
2026-06-28  9:22             ` Harry Yoo
2026-06-28 23:37               ` Suren Baghdasaryan
2026-06-29  3:57                 ` Harry Yoo
2026-06-29  4:28                   ` Suren Baghdasaryan
2026-06-29 19:52                     ` Shakeel Butt
2026-06-30  2:03                       ` Harry Yoo
2026-06-30  2:30                     ` Harry Yoo
2026-06-30  4:38                       ` Suren Baghdasaryan
2026-06-30  4:39                         ` Suren Baghdasaryan
2026-06-30  4:42                           ` Harry Yoo
2026-06-30  5:29                             ` Suren Baghdasaryan
2026-06-30  6:12                               ` Vlastimil Babka (SUSE)
2026-06-30  7:03                                 ` Harry Yoo
2026-06-30 14:35                                   ` Shakeel Butt
2026-06-30 14:52                                     ` Suren Baghdasaryan
2026-06-30 15:27                                       ` Harry Yoo
2026-06-30 23:55                                         ` Suren Baghdasaryan
2026-07-01  4:30                                           ` Harry Yoo
2026-07-01  4:53                                             ` Harry Yoo
2026-07-01  7:42                                               ` Harry Yoo [this message]
2026-07-01  8:43                                                 ` Harry Yoo
2026-07-01 10:31                                                   ` Harry Yoo
2026-07-01 11:37                                                 ` Suren Baghdasaryan
2026-06-28  8:10       ` Harry Yoo
2026-06-28  8:36         ` Harry Yoo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=68e7f6cd-cf11-46b2-84a2-d512bb22dae4@kernel.org \
    --to=harry@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=cl@gentwo.org \
    --cc=dcostantino@meta.com \
    --cc=hao.li@linux.dev \
    --cc=kees@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=usama.arif@linux.dev \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox