* [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
@ 2025-06-13 6:33 Harry Yoo
2025-06-13 7:11 ` Harry Yoo
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Harry Yoo @ 2025-06-13 6:33 UTC (permalink / raw)
To: Vlastimil Babka, David Rientjes, Christoph Lameter, Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm, Harry Yoo
The leftover space in a slab is always smaller than s->size, and
kmem caches for large objects that are not power-of-two sizes tend to have
a greater amount of leftover space per slab. In some cases, the leftover
space is larger than the size of the slabobj_ext array for the slab.
An excellent example of such a cache is ext4_inode_cache. On my system,
the object size is 1144, with a preferred order of 3, 28 objects per slab,
and 736 bytes of leftover space per slab.
Since the size of the slabobj_ext array is only 224 bytes (w/o mem
profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
fits within the leftover space.
Allocate slabobj_exts array from this unused space instead of using
kcalloc(), when it is large enough.
Enjoy the memory savings!
[ MEMCG=y, MEM_ALLOC_PROFILING=y ]
Before patch (run updatedb):
Slab: 5815196 kB
SReclaimable: 5042824 kB
SUnreclaim: 772372 kB
After patch (run updatedb):
Slab: 5748664 kB
SReclaimable: 5041608 kB
SUnreclaim: 707084 kB (-63.75 MiB)
[ MEMCG=y, MEM_ALLOC_PROFILING=n ]
Before patch (run updatedb):
Slab: 5637764 kB
SReclaimable: 5042428 kB
SUnreclaim: 595284 kB
After patch (run updatedb):
Slab: 5598992 kB
SReclaimable: 5042248 kB
SUnreclaim: 560396 kB (-34.07 MiB)
This saves from hundreds of KiBs up to several tens of MiBs of memory
on my machine, depending on the config and slab memory usage.
Enjoy the memory savings!
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
---
KASAN folks: Should we also poison the array before freeing the slab?
If so, which API would be appropriate to use?
mm/slub.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 87 insertions(+), 8 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index cf3637324243..20f0f76f0c65 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -785,6 +785,49 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
return *(unsigned int *)p;
}
+#ifdef CONFIG_SLAB_OBJ_EXT
+static inline unsigned int obj_exts_size(struct slab *slab)
+{
+ return sizeof(struct slabobj_ext) * slab->objects;
+}
+
+static unsigned long obj_exts_offset(struct kmem_cache *s,
+ struct slab *slab)
+{
+ unsigned long objext_offset;
+
+ objext_offset = s->red_left_pad + s->size * slab->objects;
+ objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
+ return objext_offset;
+}
+
+static bool can_alloc_obj_exts_from_leftover(struct kmem_cache *s,
+ struct slab *slab)
+{
+ unsigned long objext_offset = obj_exts_offset(s, slab);
+ unsigned long objext_size = obj_exts_size(slab);
+
+ return objext_offset + objext_size <= slab_size(slab);
+}
+#else
+static inline unsigned int obj_exts_size(struct slab *slab)
+{
+ return 0;
+}
+
+static unsigned long obj_exts_offset(struct kmem_cache *s,
+ struct slab *slab)
+{
+ return 0;
+}
+
+static inline bool can_alloc_obj_exts_from_leftover(struct kmem_cache *s,
+ struct slab *slab)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SLUB_DEBUG
static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
static DEFINE_SPINLOCK(object_map_lock);
@@ -1307,7 +1350,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
start = slab_address(slab);
length = slab_size(slab);
end = start + length;
- remainder = length % s->size;
+
+ if (can_alloc_obj_exts_from_leftover(s, slab)) {
+ remainder = length;
+ remainder -= obj_exts_offset(s, slab);
+ remainder -= obj_exts_size(slab);
+ } else {
+ remainder = length % s->size;
+ }
+
if (!remainder)
return;
@@ -2049,6 +2100,21 @@ static noinline void free_slab_obj_exts(struct slab *slab)
slab->obj_exts = 0;
}
+static void try_to_alloc_obj_exts_from_leftover(struct kmem_cache *s,
+ struct slab *slab)
+{
+ if (can_alloc_obj_exts_from_leftover(s, slab)) {
+ void *addr = slab_address(slab) + obj_exts_offset(s, slab);
+
+ slab->obj_exts = (unsigned long)addr;
+ kasan_unpoison_range(addr, obj_exts_size(slab));
+ memset(addr, 0, obj_exts_size(slab));
+#ifdef CONFIG_MEMCG
+ slab->obj_exts |= MEMCG_DATA_OBJEXTS;
+#endif
+ }
+}
+
static inline bool need_slab_obj_ext(void)
{
if (mem_alloc_profiling_enabled())
@@ -2077,6 +2143,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
{
}
+static inline void try_to_alloc_obj_exts_from_leftover(struct kmem_cache *s,
+ struct slab *slab)
+{
+}
+
static inline bool need_slab_obj_ext(void)
{
return false;
@@ -2592,7 +2663,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
- if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
+ if (memcg_kmem_online() &&
+ (s->flags & SLAB_ACCOUNT) &&
+ !slab_obj_exts(slab))
alloc_slab_obj_exts(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
@@ -2602,11 +2675,16 @@ static __always_inline void account_slab(struct slab *slab, int order,
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
- if (memcg_kmem_online() || need_slab_obj_ext())
- free_slab_obj_exts(slab);
-
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
+
+ if (can_alloc_obj_exts_from_leftover(s, slab)) {
+ slab->obj_exts = 0;
+ return;
+ }
+
+ if (memcg_kmem_online() || need_slab_obj_ext())
+ free_slab_obj_exts(slab);
}
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -2647,9 +2725,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
slab->objects = oo_objects(oo);
slab->inuse = 0;
slab->frozen = 0;
- init_slab_obj_exts(slab);
-
- account_slab(slab, oo_order(oo), s, flags);
slab->slab_cache = s;
@@ -2658,6 +2733,10 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
start = slab_address(slab);
setup_slab_debug(s, slab, start);
+ init_slab_obj_exts(slab);
+ /* Initialize the slabobj_ext array after poisoning the slab */
+ try_to_alloc_obj_exts_from_leftover(s, slab);
+ account_slab(slab, oo_order(oo), s, flags);
shuffle = shuffle_freelist(s, slab);
--
2.43.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 6:33 [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover Harry Yoo
@ 2025-06-13 7:11 ` Harry Yoo
2025-06-13 11:42 ` Yeoreum Yun
2025-06-13 16:04 ` Christoph Lameter (Ampere)
2 siblings, 0 replies; 11+ messages in thread
From: Harry Yoo @ 2025-06-13 7:11 UTC (permalink / raw)
To: Vlastimil Babka, David Rientjes, Christoph Lameter, Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm
On Fri, Jun 13, 2025 at 03:33:36PM +0900, Harry Yoo wrote:
> The leftover space in a slab is always smaller than s->size, and
> kmem caches for large objects that are not power-of-two sizes tend to have
> a greater amount of leftover space per slab. In some cases, the leftover
> space is larger than the size of the slabobj_ext array for the slab.
>
> An excellent example of such a cache is ext4_inode_cache. On my system,
> the object size is 1144, with a preferred order of 3, 28 objects per slab,
> and 736 bytes of leftover space per slab.
>
> Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> fits within the leftover space.
>
> Allocate slabobj_exts array from this unused space instead of using
> kcalloc(), when it is large enough.
>
> Enjoy the memory savings!
Oops, I put this sentence twice in the changelog ;)
There's also a build error I missed when both MEMCG and
MEM_ALLOC_PROFILING are not configured:
hyeyoo@hyeyoo ~/slab-misc (slab/for-next)> make -j24 mm/slub.o
DESCEND objtool
CALL scripts/checksyscalls.sh
INSTALL libsubcmd_headers
CC mm/slub.o
mm/slub.c: In function ‘unaccount_slab’:
mm/slub.c:2682:23: error: ‘struct slab’ has no member named ‘obj_exts’; did you mean ‘objects’?
2682 | slab->obj_exts = 0;
| ^~~~~~~~
| objects
make[3]: *** [scripts/Makefile.build:203: mm/slub.o] Error 1
make[2]: *** [scripts/Makefile.build:461: mm] Error 2
make[1]: *** [/home/hyeyoo/slab-misc/Makefile:2011: .] Error 2
make: *** [Makefile:248: __sub-make] Error 2
Will fix it in the next revision, but let me wait a bit for some
feedback.
> [ MEMCG=y, MEM_ALLOC_PROFILING=y ]
>
> Before patch (run updatedb):
> Slab: 5815196 kB
> SReclaimable: 5042824 kB
> SUnreclaim: 772372 kB
>
> After patch (run updatedb):
> Slab: 5748664 kB
> SReclaimable: 5041608 kB
> SUnreclaim: 707084 kB (-63.75 MiB)
>
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
>
> Before patch (run updatedb):
> Slab: 5637764 kB
> SReclaimable: 5042428 kB
> SUnreclaim: 595284 kB
>
> After patch (run updatedb):
> Slab: 5598992 kB
> SReclaimable: 5042248 kB
> SUnreclaim: 560396 kB (-34.07 MiB)
>
> This saves from hundreds of KiBs up to several tens of MiBs of memory
> on my machine, depending on the config and slab memory usage.
>
> Enjoy the memory savings!
>
> Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> ---
> KASAN folks: Should we also poison the array before freeing the slab?
> If so, which API would be appropriate to use?
>
> mm/slub.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 87 insertions(+), 8 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index cf3637324243..20f0f76f0c65 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -785,6 +785,49 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
> return *(unsigned int *)p;
> }
>
> +#ifdef CONFIG_SLAB_OBJ_EXT
> +static inline unsigned int obj_exts_size(struct slab *slab)
> +{
> + return sizeof(struct slabobj_ext) * slab->objects;
> +}
> +
> +static unsigned long obj_exts_offset(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + unsigned long objext_offset;
> +
> + objext_offset = s->red_left_pad + s->size * slab->objects;
> + objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
> + return objext_offset;
> +}
> +
> +static bool can_alloc_obj_exts_from_leftover(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + unsigned long objext_offset = obj_exts_offset(s, slab);
> + unsigned long objext_size = obj_exts_size(slab);
> +
> + return objext_offset + objext_size <= slab_size(slab);
> +}
> +#else
> +static inline unsigned int obj_exts_size(struct slab *slab)
> +{
> + return 0;
> +}
> +
> +static unsigned long obj_exts_offset(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + return 0;
> +}
> +
> +static inline bool can_alloc_obj_exts_from_leftover(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + return false;
> +}
> +#endif
> +
> #ifdef CONFIG_SLUB_DEBUG
> static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
> static DEFINE_SPINLOCK(object_map_lock);
> @@ -1307,7 +1350,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
> start = slab_address(slab);
> length = slab_size(slab);
> end = start + length;
> - remainder = length % s->size;
> +
> + if (can_alloc_obj_exts_from_leftover(s, slab)) {
> + remainder = length;
> + remainder -= obj_exts_offset(s, slab);
> + remainder -= obj_exts_size(slab);
> + } else {
> + remainder = length % s->size;
> + }
> +
> if (!remainder)
> return;
>
> @@ -2049,6 +2100,21 @@ static noinline void free_slab_obj_exts(struct slab *slab)
> slab->obj_exts = 0;
> }
>
> +static void try_to_alloc_obj_exts_from_leftover(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + if (can_alloc_obj_exts_from_leftover(s, slab)) {
> + void *addr = slab_address(slab) + obj_exts_offset(s, slab);
> +
> + slab->obj_exts = (unsigned long)addr;
> + kasan_unpoison_range(addr, obj_exts_size(slab));
> + memset(addr, 0, obj_exts_size(slab));
> +#ifdef CONFIG_MEMCG
> + slab->obj_exts |= MEMCG_DATA_OBJEXTS;
> +#endif
> + }
> +}
> +
> static inline bool need_slab_obj_ext(void)
> {
> if (mem_alloc_profiling_enabled())
> @@ -2077,6 +2143,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
> {
> }
>
> +static inline void try_to_alloc_obj_exts_from_leftover(struct kmem_cache *s,
> + struct slab *slab)
> +{
> +}
> +
> static inline bool need_slab_obj_ext(void)
> {
> return false;
> @@ -2592,7 +2663,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
> static __always_inline void account_slab(struct slab *slab, int order,
> struct kmem_cache *s, gfp_t gfp)
> {
> - if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> + if (memcg_kmem_online() &&
> + (s->flags & SLAB_ACCOUNT) &&
> + !slab_obj_exts(slab))
> alloc_slab_obj_exts(slab, s, gfp, true);
>
> mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
> @@ -2602,11 +2675,16 @@ static __always_inline void account_slab(struct slab *slab, int order,
> static __always_inline void unaccount_slab(struct slab *slab, int order,
> struct kmem_cache *s)
> {
> - if (memcg_kmem_online() || need_slab_obj_ext())
> - free_slab_obj_exts(slab);
> -
> mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
> -(PAGE_SIZE << order));
> +
> + if (can_alloc_obj_exts_from_leftover(s, slab)) {
> + slab->obj_exts = 0;
> + return;
> + }
> +
> + if (memcg_kmem_online() || need_slab_obj_ext())
> + free_slab_obj_exts(slab);
> }
>
> static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> @@ -2647,9 +2725,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> slab->objects = oo_objects(oo);
> slab->inuse = 0;
> slab->frozen = 0;
> - init_slab_obj_exts(slab);
> -
> - account_slab(slab, oo_order(oo), s, flags);
>
> slab->slab_cache = s;
>
> @@ -2658,6 +2733,10 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> start = slab_address(slab);
>
> setup_slab_debug(s, slab, start);
> + init_slab_obj_exts(slab);
> + /* Initialize the slabobj_ext array after poisoning the slab */
> + try_to_alloc_obj_exts_from_leftover(s, slab);
> + account_slab(slab, oo_order(oo), s, flags);
>
> shuffle = shuffle_freelist(s, slab);
>
> --
> 2.43.0
>
--
Cheers,
Harry / Hyeonggon
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 6:33 [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover Harry Yoo
2025-06-13 7:11 ` Harry Yoo
@ 2025-06-13 11:42 ` Yeoreum Yun
2025-06-13 17:58 ` Harry Yoo
2025-06-13 16:04 ` Christoph Lameter (Ampere)
2 siblings, 1 reply; 11+ messages in thread
From: Yeoreum Yun @ 2025-06-13 11:42 UTC (permalink / raw)
To: Harry Yoo
Cc: Vlastimil Babka, David Rientjes, Christoph Lameter, Andrew Morton,
Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm
Hi Harry,
[...]
> Allocate slabobj_exts array from this unused space instead of using
> kcalloc(), when it is large enough.
>
> Enjoy the memory savings!
>
> [ MEMCG=y, MEM_ALLOC_PROFILING=y ]
>
> Before patch (run updatedb):
> Slab: 5815196 kB
> SReclaimable: 5042824 kB
> SUnreclaim: 772372 kB
>
> After patch (run updatedb):
> Slab: 5748664 kB
> SReclaimable: 5041608 kB
> SUnreclaim: 707084 kB (-63.75 MiB)
>
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
>
> Before patch (run updatedb):
> Slab: 5637764 kB
> SReclaimable: 5042428 kB
> SUnreclaim: 595284 kB
>
> After patch (run updatedb):
> Slab: 5598992 kB
> SReclaimable: 5042248 kB
> SUnreclaim: 560396 kB (-34.07 MiB)
>
> This saves from hundreds of KiBs up to several tens of MiBs of memory
> on my machine, depending on the config and slab memory usage.
>
> Enjoy the memory savings!
Awesome :)
[...]
> #ifdef CONFIG_SLUB_DEBUG
> static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
> static DEFINE_SPINLOCK(object_map_lock);
> @@ -1307,7 +1350,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
> start = slab_address(slab);
> length = slab_size(slab);
> end = start + length;
> - remainder = length % s->size;
> +
> + if (can_alloc_obj_exts_from_leftover(s, slab)) {
> + remainder = length;
> + remainder -= obj_exts_offset(s, slab);
> + remainder -= obj_exts_size(slab);
> + } else {
> + remainder = length % s->size;
> + }
> +
> if (!remainder)
> return;
>
> @@ -2049,6 +2100,21 @@ static noinline void free_slab_obj_exts(struct slab *slab)
> slab->obj_exts = 0;
> }
What concerns me about this patch is the case where !memcg_kmem_online() and
MEM_ALLOC_PROFILING is not used.
With this patch, obj_ext can still be created even in that situation,
and as a result, if data is overwritten in the region previously padded with
POISON_INUSE (before the patch), slab_pad_check() may no longer catch it
If this's ignorable, feel free toadd :
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
--
Sincerely,
Yeoreum Yun
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 6:33 [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover Harry Yoo
2025-06-13 7:11 ` Harry Yoo
2025-06-13 11:42 ` Yeoreum Yun
@ 2025-06-13 16:04 ` Christoph Lameter (Ampere)
2025-06-13 17:47 ` Harry Yoo
2 siblings, 1 reply; 11+ messages in thread
From: Christoph Lameter (Ampere) @ 2025-06-13 16:04 UTC (permalink / raw)
To: Harry Yoo
Cc: Vlastimil Babka, David Rientjes, Andrew Morton, Johannes Weiner,
Michal Hocko, Roman Gushchin, Shakeel Butt, Muchun Song,
Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm
On Fri, 13 Jun 2025, Harry Yoo wrote:
> Allocate slabobj_exts array from this unused space instead of using
> kcalloc(), when it is large enough.
How does slab debug work in this case? The object layout gets a bit
complicated with other metadata there as well.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 16:04 ` Christoph Lameter (Ampere)
@ 2025-06-13 17:47 ` Harry Yoo
2025-06-16 11:00 ` Harry Yoo
2025-06-19 7:56 ` Vlastimil Babka
0 siblings, 2 replies; 11+ messages in thread
From: Harry Yoo @ 2025-06-13 17:47 UTC (permalink / raw)
To: Christoph Lameter (Ampere)
Cc: Vlastimil Babka, David Rientjes, Andrew Morton, Johannes Weiner,
Michal Hocko, Roman Gushchin, Shakeel Butt, Muchun Song,
Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm
On Fri, Jun 13, 2025 at 09:04:34AM -0700, Christoph Lameter (Ampere) wrote:
> On Fri, 13 Jun 2025, Harry Yoo wrote:
>
> > Allocate slabobj_exts array from this unused space instead of using
> > kcalloc(), when it is large enough.
>
> How does slab debug work in this case? The object layout gets a bit
> complicated with other metadata there as well.
Oh, the 'leftover' space I mentioned the cover letter refers to the
wasted space after the last object in a slab, not unused bytes within
objects.
There is no per-object metadata stored there and SLUB simply poisons the area.
I taught slab_pad_check() to skip checking the slabobj_exts array.
--
Cheers,
Harry / Hyeonggon
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 11:42 ` Yeoreum Yun
@ 2025-06-13 17:58 ` Harry Yoo
0 siblings, 0 replies; 11+ messages in thread
From: Harry Yoo @ 2025-06-13 17:58 UTC (permalink / raw)
To: Yeoreum Yun
Cc: Vlastimil Babka, David Rientjes, Christoph Lameter, Andrew Morton,
Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm
On Fri, Jun 13, 2025 at 12:42:22PM +0100, Yeoreum Yun wrote:
> Hi Harry,
>
> [...]
Hi Yeoreum,
> > Allocate slabobj_exts array from this unused space instead of using
> > kcalloc(), when it is large enough.
> >
> > Enjoy the memory savings!
> >
> > [ MEMCG=y, MEM_ALLOC_PROFILING=y ]
> >
> > Before patch (run updatedb):
> > Slab: 5815196 kB
> > SReclaimable: 5042824 kB
> > SUnreclaim: 772372 kB
> >
> > After patch (run updatedb):
> > Slab: 5748664 kB
> > SReclaimable: 5041608 kB
> > SUnreclaim: 707084 kB (-63.75 MiB)
> >
> > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> >
> > Before patch (run updatedb):
> > Slab: 5637764 kB
> > SReclaimable: 5042428 kB
> > SUnreclaim: 595284 kB
> >
> > After patch (run updatedb):
> > Slab: 5598992 kB
> > SReclaimable: 5042248 kB
> > SUnreclaim: 560396 kB (-34.07 MiB)
> >
> > This saves from hundreds of KiBs up to several tens of MiBs of memory
> > on my machine, depending on the config and slab memory usage.
> >
> > Enjoy the memory savings!
>
> Awesome :)
Thanks :)
> [...]
> > #ifdef CONFIG_SLUB_DEBUG
> > static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
> > static DEFINE_SPINLOCK(object_map_lock);
> > @@ -1307,7 +1350,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
> > start = slab_address(slab);
> > length = slab_size(slab);
> > end = start + length;
> > - remainder = length % s->size;
> > +
> > + if (can_alloc_obj_exts_from_leftover(s, slab)) {
> > + remainder = length;
> > + remainder -= obj_exts_offset(s, slab);
> > + remainder -= obj_exts_size(slab);
> > + } else {
> > + remainder = length % s->size;
> > + }
> > +
> > if (!remainder)
> > return;
> >
> > @@ -2049,6 +2100,21 @@ static noinline void free_slab_obj_exts(struct slab *slab)
> > slab->obj_exts = 0;
> > }
>
> What concerns me about this patch is the case where !memcg_kmem_online() and
> MEM_ALLOC_PROFILING is not used.
> With this patch, obj_ext can still be created even in that situation,
> and as a result, if data is overwritten in the region previously padded with
> POISON_INUSE (before the patch), slab_pad_check() may no longer catch it
That's a valid point.
I think allocating the array from the leftover space can be deferred
until either MEMCG or MEM_ALLOC_PROFILING actually requests it.
> If this's ignorable, feel free toadd :
>
> Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
That means the implementation will change a bit, so it's better to drop
the R-b tag as the new change may invalidate "Looks good to me" state.
I'll Cc you in the next version—please take a look and review the
updated version.
Thanks for reviewing!
--
Cheers,
Harry / Hyeonggon
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 17:47 ` Harry Yoo
@ 2025-06-16 11:00 ` Harry Yoo
2025-06-19 7:56 ` Vlastimil Babka
1 sibling, 0 replies; 11+ messages in thread
From: Harry Yoo @ 2025-06-16 11:00 UTC (permalink / raw)
To: Christoph Lameter (Ampere)
Cc: Vlastimil Babka, David Rientjes, Andrew Morton, Johannes Weiner,
Michal Hocko, Roman Gushchin, Shakeel Butt, Muchun Song,
Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, linux-mm
On Sat, Jun 14, 2025 at 02:47:18AM +0900, Harry Yoo wrote:
> On Fri, Jun 13, 2025 at 09:04:34AM -0700, Christoph Lameter (Ampere) wrote:
> > On Fri, 13 Jun 2025, Harry Yoo wrote:
> >
> > > Allocate slabobj_exts array from this unused space instead of using
> > > kcalloc(), when it is large enough.
> >
> > How does slab debug work in this case? The object layout gets a bit
> > complicated with other metadata there as well.
>
> Oh, the 'leftover' space I mentioned the cover letter refers to the
> wasted space after the last object in a slab, not unused bytes within
> objects.
>
> There is no per-object metadata stored there and SLUB simply poisons the area.
> I taught slab_pad_check() to skip checking the slabobj_exts array.
Actually, it might be worth considering placing it within object in case
the object is exactly a power-of-two size but has some wasted bytes
due to alignment.
xfs_inode seems to be a good candidate that could benefit from this.
Hopefully it doesn't introduce too much complexity...
--
Cheers,
Harry / Hyeonggon
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-13 17:47 ` Harry Yoo
2025-06-16 11:00 ` Harry Yoo
@ 2025-06-19 7:56 ` Vlastimil Babka
2025-08-05 11:57 ` Harry Yoo
1 sibling, 1 reply; 11+ messages in thread
From: Vlastimil Babka @ 2025-06-19 7:56 UTC (permalink / raw)
To: Harry Yoo, Christoph Lameter (Ampere)
Cc: David Rientjes, Andrew Morton, Johannes Weiner, Michal Hocko,
Roman Gushchin, Shakeel Butt, Muchun Song, Suren Baghdasaryan,
Kent Overstreet, Andrey Ryabinin, Alexander Potapenko,
Andrey Konovalov, Dmitry Vyukov, Vincenzo Frascino, linux-mm
On 6/13/25 19:47, Harry Yoo wrote:
> On Fri, Jun 13, 2025 at 09:04:34AM -0700, Christoph Lameter (Ampere) wrote:
>> On Fri, 13 Jun 2025, Harry Yoo wrote:
>>
>> > Allocate slabobj_exts array from this unused space instead of using
>> > kcalloc(), when it is large enough.
>>
>> How does slab debug work in this case? The object layout gets a bit
>> complicated with other metadata there as well.
>
> Oh, the 'leftover' space I mentioned the cover letter refers to the
> wasted space after the last object in a slab, not unused bytes within
> objects.
>
> There is no per-object metadata stored there and SLUB simply poisons the area.
> I taught slab_pad_check() to skip checking the slabobj_exts array.
I can imagine going further with this in case where leftover space in slab
isn't enough.
- indeed use per-object padding to store only single object's slabobj_ext,
if it doesn't lead to memory waste
- if not possible, but object size is small enough so there are many per
slab, maybe have one less object per slab to store the array?
- once we have struct slab decoupled from struct page, it could be part of
struct slab directly (but it would mean struct slab isn't fixed size)
Of course having multiple variants would risk slower code, so fast paths
should not be affected - we could have pointer to the 0th slabobj_ext (we
already have) and now also stride (to support the "per-object padding case"
- there's still space in struct slab right?) and then the object alloc/free
case could be oblivious to the storage method, with just a bit more
arithmetic (stride). Slab folio alloc/free would be more complicated but are
not fath path.
Also some variants would be wasteful if they need to be decided upfront (the
2nd and 3rd above) and then the array is unused, so would be only applicable
with SLAB_ACCOUNT caches (if kmemcg is active) or when memalloc profiling is
active. Shouldn't be a big issue as ad-hoc __GFP_ACCOUNT is handled by
different cache selection for kmalloc() and I don't know if anyone is
actually doing ad-hoc __GFP_ACCOUNT on named caches.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-06-19 7:56 ` Vlastimil Babka
@ 2025-08-05 11:57 ` Harry Yoo
2025-08-08 14:44 ` Vlastimil Babka
0 siblings, 1 reply; 11+ messages in thread
From: Harry Yoo @ 2025-08-05 11:57 UTC (permalink / raw)
To: Vlastimil Babka
Cc: Christoph Lameter (Ampere), David Rientjes, Andrew Morton,
Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov, Yeoreum Yun,
Vincenzo Frascino, linux-mm
On Thu, Jun 19, 2025 at 09:56:59AM +0200, Vlastimil Babka wrote:
> On 6/13/25 19:47, Harry Yoo wrote:
> > On Fri, Jun 13, 2025 at 09:04:34AM -0700, Christoph Lameter (Ampere) wrote:
> >> On Fri, 13 Jun 2025, Harry Yoo wrote:
> >>
> >> > Allocate slabobj_exts array from this unused space instead of using
> >> > kcalloc(), when it is large enough.
> >>
> >> How does slab debug work in this case? The object layout gets a bit
> >> complicated with other metadata there as well.
> >
> > Oh, the 'leftover' space I mentioned the cover letter refers to the
> > wasted space after the last object in a slab, not unused bytes within
> > objects.
> >
> > There is no per-object metadata stored there and SLUB simply poisons the area.
> > I taught slab_pad_check() to skip checking the slabobj_exts array.
Apologies for the late reply. I was sidetracked with multiple things :(
This is definitely worth optimizing, so let me make some progress
even if it's a bit slow.
> I can imagine going further with this in case where leftover space in slab
> isn't enough.
Right.
> - indeed use per-object padding to store only single object's slabobj_ext,
I think the most conservative approach is to not increase object_size
but use wasted area when ALIGN(size, align) is bigger than object_size.
A good candidate for that is xfs inode cache.
> if it doesn't lead to memory waste
You mean increasing object size but without decreasing the number of
objects per slab?
...or (maybe) reducing the number of objects but without increasing
the size of the remainder (same as calculated in calc_slab_order())?
> - if not possible, but object size is small enough so there are many per
> slab, maybe have one less object per slab to store the array?
If object size is small the array likely does not fit in one object...
> - once we have struct slab decoupled from struct page, it could be part of
> struct slab directly (but it would mean struct slab isn't fixed size)
It can be tried, but variable struct slab size may or may not work.
That'll depend on the implementation details of how we allocate struct slab
in the future.
> Of course having multiple variants would risk slower code, so fast paths
> should not be affected
I agree that affecting fastpath is not great.
> we could have pointer to the 0th slabobj_ext (we
> already have) and now also stride (to support the "per-object padding case"
> - there's still space in struct slab right?)
which space are you referring to, maybe lower 16 bits of page_type?
> and then the object alloc/free
> case could be oblivious to the storage method, with just a bit more
> arithmetic (stride). Slab folio alloc/free would be more complicated but are
> not fath path.
So it would be something like (please correct if I misunderstood):
index = obj_to_index(s, slab, object)
(the struct slabobj_ext pointer for the object at given index)
== slab->obj_exts + stride * index
slab->obj_exts, stride are determined depending on the case:
- In the normal case (the array is allocated from kmalloc caches),
stride = sizeof(struct slabobj_ext)
slab->obj_exts = (the address of the buffer allocated from kmalloc)
- In "the obj_exts array is stored in the leftover space" case,
stride = sizeof(struct slabobj_ext)
slab->obj_exts = (the start address of the leftover space)
- In "per-object padding" case,
stride = s->size
slab->obj_exts = slab_address(slab) + s->red_left_pad +
(offset of slabobj_ext);
Ok, I think it will work. Great idea!
> Also some variants would be wasteful if they need to be decided upfront (the
> 2nd and 3rd above) and then the array is unused
Right.
> so would be only applicable
> with SLAB_ACCOUNT caches (if kmemcg is active) or when memalloc profiling is
> active.
Right.
> Shouldn't be a big issue as ad-hoc __GFP_ACCOUNT is handled by
> different cache selection for kmalloc() and I don't know if anyone is
> actually doing ad-hoc __GFP_ACCOUNT on named caches.
A while ago I was thinking of getting rid of ad-hoc __GFP_ACCOUNT usage
for slab allocations, but at least xarray cache appears to use it in
an ad-hoc manner. (See xas_nomem()).
In that case, not all allocations to the same cache has
__GFP_ACCOUNT flag set.
--
Cheers,
Harry / Hyeonggon
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-08-05 11:57 ` Harry Yoo
@ 2025-08-08 14:44 ` Vlastimil Babka
2025-08-27 11:40 ` Harry Yoo
0 siblings, 1 reply; 11+ messages in thread
From: Vlastimil Babka @ 2025-08-08 14:44 UTC (permalink / raw)
To: Harry Yoo
Cc: Christoph Lameter (Ampere), David Rientjes, Andrew Morton,
Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov, Yeoreum Yun,
Vincenzo Frascino, linux-mm
On 8/5/25 13:57, Harry Yoo wrote:
> On Thu, Jun 19, 2025 at 09:56:59AM +0200, Vlastimil Babka wrote:
>> On 6/13/25 19:47, Harry Yoo wrote:
>> > On Fri, Jun 13, 2025 at 09:04:34AM -0700, Christoph Lameter (Ampere) wrote:
>> >> On Fri, 13 Jun 2025, Harry Yoo wrote:
>> >>
>> >> > Allocate slabobj_exts array from this unused space instead of using
>> >> > kcalloc(), when it is large enough.
>> >>
>> >> How does slab debug work in this case? The object layout gets a bit
>> >> complicated with other metadata there as well.
>> >
>> > Oh, the 'leftover' space I mentioned the cover letter refers to the
>> > wasted space after the last object in a slab, not unused bytes within
>> > objects.
>> >
>> > There is no per-object metadata stored there and SLUB simply poisons the area.
>> > I taught slab_pad_check() to skip checking the slabobj_exts array.
>
> Apologies for the late reply. I was sidetracked with multiple things :(
>
> This is definitely worth optimizing, so let me make some progress
> even if it's a bit slow.
>
>> I can imagine going further with this in case where leftover space in slab
>> isn't enough.
>
> Right.
>
>> - indeed use per-object padding to store only single object's slabobj_ext,
>
> I think the most conservative approach is to not increase object_size
> but use wasted area when ALIGN(size, align) is bigger than object_size.
>
> A good candidate for that is xfs inode cache.
Yeah that's what I meant.
>> if it doesn't lead to memory waste
>
> You mean increasing object size but without decreasing the number of
> objects per slab?
Just the ALIGN thing. It we can increase size without decreasing objects it
means we can also just use leftover space for the array, IIUC?
> ...or (maybe) reducing the number of objects but without increasing
> the size of the remainder (same as calculated in calc_slab_order())?
>
>> - if not possible, but object size is small enough so there are many per
>> slab, maybe have one less object per slab to store the array?
>
> If object size is small the array likely does not fit in one object...
Hmm perhaps.
>> - once we have struct slab decoupled from struct page, it could be part of
>> struct slab directly (but it would mean struct slab isn't fixed size)
>
> It can be tried, but variable struct slab size may or may not work.
> That'll depend on the implementation details of how we allocate struct slab
> in the future.
>
>> Of course having multiple variants would risk slower code, so fast paths
>> should not be affected
>
> I agree that affecting fastpath is not great.
So we'll have to measure it.
>> we could have pointer to the 0th slabobj_ext (we
>> already have) and now also stride (to support the "per-object padding case"
>
>> - there's still space in struct slab right?)
>
> which space are you referring to, maybe lower 16 bits of page_type?
Ugh I thought counters used only 32bit of 64... so the space exists only for
64bit kernels? It would be fine to limit the optimization to those only.
>> and then the object alloc/free
>> case could be oblivious to the storage method, with just a bit more
>> arithmetic (stride). Slab folio alloc/free would be more complicated but are
>> not fath path.
>
> So it would be something like (please correct if I misunderstood):
>
> index = obj_to_index(s, slab, object)
> (the struct slabobj_ext pointer for the object at given index)
> == slab->obj_exts + stride * index
Yeah.
>
> slab->obj_exts, stride are determined depending on the case:
>
> - In the normal case (the array is allocated from kmalloc caches),
> stride = sizeof(struct slabobj_ext)
> slab->obj_exts = (the address of the buffer allocated from kmalloc)
>
> - In "the obj_exts array is stored in the leftover space" case,
> stride = sizeof(struct slabobj_ext)
> slab->obj_exts = (the start address of the leftover space)
>
> - In "per-object padding" case,
> stride = s->size
> slab->obj_exts = slab_address(slab) + s->red_left_pad +
> (offset of slabobj_ext);
>
> Ok, I think it will work. Great idea!
Yeah.
>
>> Also some variants would be wasteful if they need to be decided upfront (the
>> 2nd and 3rd above) and then the array is unused
>
> Right.
>
>> so would be only applicable
>> with SLAB_ACCOUNT caches (if kmemcg is active) or when memalloc profiling is
>> active.
>
> Right.
>
>> Shouldn't be a big issue as ad-hoc __GFP_ACCOUNT is handled by
>> different cache selection for kmalloc() and I don't know if anyone is
>> actually doing ad-hoc __GFP_ACCOUNT on named caches.
>
> A while ago I was thinking of getting rid of ad-hoc __GFP_ACCOUNT usage
> for slab allocations, but at least xarray cache appears to use it in
> an ad-hoc manner. (See xas_nomem()).
>
> In that case, not all allocations to the same cache has
> __GFP_ACCOUNT flag set.
Hmm bummer.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover
2025-08-08 14:44 ` Vlastimil Babka
@ 2025-08-27 11:40 ` Harry Yoo
0 siblings, 0 replies; 11+ messages in thread
From: Harry Yoo @ 2025-08-27 11:40 UTC (permalink / raw)
To: Vlastimil Babka
Cc: Christoph Lameter (Ampere), David Rientjes, Andrew Morton,
Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Suren Baghdasaryan, Kent Overstreet, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov, Yeoreum Yun,
Vincenzo Frascino, linux-mm
On Fri, Aug 08, 2025 at 04:44:32PM +0200, Vlastimil Babka wrote:
> On 8/5/25 13:57, Harry Yoo wrote:
> > On Thu, Jun 19, 2025 at 09:56:59AM +0200, Vlastimil Babka wrote:
> >> On 6/13/25 19:47, Harry Yoo wrote:
> >> > On Fri, Jun 13, 2025 at 09:04:34AM -0700, Christoph Lameter (Ampere) wrote:
> >> >> On Fri, 13 Jun 2025, Harry Yoo wrote:
> >> we could have pointer to the 0th slabobj_ext (we
> >> already have) and now also stride (to support the "per-object padding case"
> >
> >> - there's still space in struct slab right?)
> >
> > which space are you referring to, maybe lower 16 bits of page_type?
>
> Ugh I thought counters used only 32bit of 64... so the space exists only for
> 64bit kernels? It would be fine to limit the optimization to those only.
Oh, you're right and it seems to be better than using page_type.
RFC v2 uses page_type but I'll adjust this in V3.
--
Cheers,
Harry / Hyeonggon
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2025-08-27 11:40 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-13 6:33 [RFC PATCH] mm/slab: save memory by allocating slabobj_ext array from leftover Harry Yoo
2025-06-13 7:11 ` Harry Yoo
2025-06-13 11:42 ` Yeoreum Yun
2025-06-13 17:58 ` Harry Yoo
2025-06-13 16:04 ` Christoph Lameter (Ampere)
2025-06-13 17:47 ` Harry Yoo
2025-06-16 11:00 ` Harry Yoo
2025-06-19 7:56 ` Vlastimil Babka
2025-08-05 11:57 ` Harry Yoo
2025-08-08 14:44 ` Vlastimil Babka
2025-08-27 11:40 ` Harry Yoo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).