Linux cgroups development
 help / color / mirror / Atom feed
* [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting
@ 2026-02-23 16:01 Johannes Weiner
  2026-02-23 16:01 ` [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter Johannes Weiner
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Johannes Weiner @ 2026-02-23 16:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Uladzislau Rezki, Joshua Hahn, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, linux-kernel

Use a vmstat counter instead of a custom, open-coded atomic. This has
the added benefit of making the data available per-node, and prepares
for cleaning up the memcg accounting as well.

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 fs/proc/meminfo.c       |  3 ++-
 include/linux/mmzone.h  |  1 +
 include/linux/vmalloc.h |  3 ---
 mm/vmalloc.c            | 19 ++++++++++---------
 mm/vmstat.c             |  1 +
 5 files changed, 14 insertions(+), 13 deletions(-)

V2:
- Fix mod_node_page_state() pgdat argument (Shakeel)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index a458f1e112fd..549793f44726 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -126,7 +126,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "Committed_AS:   ", committed);
 	seq_printf(m, "VmallocTotal:   %8lu kB\n",
 		   (unsigned long)VMALLOC_TOTAL >> 10);
-	show_val_kb(m, "VmallocUsed:    ", vmalloc_nr_pages());
+	show_val_kb(m, "VmallocUsed:    ",
+		    global_node_page_state(NR_VMALLOC));
 	show_val_kb(m, "VmallocChunk:   ", 0ul);
 	show_val_kb(m, "Percpu:         ", pcpu_nr_pages());
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fc5d6c88d2f0..64df797d45c6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,6 +220,7 @@ enum node_stat_item {
 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
 	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
 	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
+	NR_VMALLOC,
 	NR_KERNEL_STACK_KB,	/* measured in KiB */
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
 	NR_KERNEL_SCS_KB,	/* measured in KiB */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e8e94f90d686..3b02c0c6b371 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -286,8 +286,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb);
 #ifdef CONFIG_MMU
 #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
 
-unsigned long vmalloc_nr_pages(void);
-
 int vm_area_map_pages(struct vm_struct *area, unsigned long start,
 		      unsigned long end, struct page **pages);
 void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
@@ -304,7 +302,6 @@ static inline void set_vm_flush_reset_perms(void *addr)
 #else  /* !CONFIG_MMU */
 #define VMALLOC_TOTAL 0UL
 
-static inline unsigned long vmalloc_nr_pages(void) { return 0; }
 static inline void set_vm_flush_reset_perms(void *addr) {}
 #endif /* CONFIG_MMU */
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e286c2d2068c..a5fc7795aafd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1063,14 +1063,8 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
 static void drain_vmap_area_work(struct work_struct *work);
 static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
 
-static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
 static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
 
-unsigned long vmalloc_nr_pages(void)
-{
-	return atomic_long_read(&nr_vmalloc_pages);
-}
-
 static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
 {
 	struct rb_node *n = root->rb_node;
@@ -3463,11 +3457,11 @@ void vfree(const void *addr)
 		 * High-order allocs for huge vmallocs are split, so
 		 * can be freed as an array of order-0 allocations
 		 */
+		if (!(vm->flags & VM_MAP_PUT_PAGES))
+			dec_node_page_state(page, NR_VMALLOC);
 		__free_page(page);
 		cond_resched();
 	}
-	if (!(vm->flags & VM_MAP_PUT_PAGES))
-		atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
 	kvfree(vm->pages);
 	kfree(vm);
 }
@@ -3655,6 +3649,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			continue;
 		}
 
+		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << large_order);
+
 		split_page(page, large_order);
 		for (i = 0; i < (1U << large_order); i++)
 			pages[nr_allocated + i] = page + i;
@@ -3675,6 +3671,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 	if (!order) {
 		while (nr_allocated < nr_pages) {
 			unsigned int nr, nr_pages_request;
+			int i;
 
 			/*
 			 * A maximum allowed request is hard-coded and is 100
@@ -3698,6 +3695,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 							nr_pages_request,
 							pages + nr_allocated);
 
+			for (i = nr_allocated; i < nr_allocated + nr; i++)
+				inc_node_page_state(pages[i], NR_VMALLOC);
+
 			nr_allocated += nr;
 
 			/*
@@ -3722,6 +3722,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 		if (unlikely(!page))
 			break;
 
+		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << order);
+
 		/*
 		 * High-order allocations must be able to be treated as
 		 * independent small pages by callers (as they can with
@@ -3864,7 +3866,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			vmalloc_gfp_adjust(gfp_mask, page_order), node,
 			page_order, nr_small_pages, area->pages);
 
-	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 	/* All pages of vm should be charged to same memcg, so use first one. */
 	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
 		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d6e814c82952..bc199c7cd07b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1270,6 +1270,7 @@ const char * const vmstat_text[] = {
 	[I(NR_KERNEL_MISC_RECLAIMABLE)]		= "nr_kernel_misc_reclaimable",
 	[I(NR_FOLL_PIN_ACQUIRED)]		= "nr_foll_pin_acquired",
 	[I(NR_FOLL_PIN_RELEASED)]		= "nr_foll_pin_released",
+	[I(NR_VMALLOC)]				= "nr_vmalloc",
 	[I(NR_KERNEL_STACK_KB)]			= "nr_kernel_stack",
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
 	[I(NR_KERNEL_SCS_KB)]			= "nr_shadow_call_stack",
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter
  2026-02-23 16:01 [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Johannes Weiner
@ 2026-02-23 16:01 ` Johannes Weiner
  2026-02-23 19:24   ` Roman Gushchin
  2026-02-23 19:48   ` Vishal Moola (Oracle)
  2026-02-23 19:22 ` [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Roman Gushchin
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 7+ messages in thread
From: Johannes Weiner @ 2026-02-23 16:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Uladzislau Rezki, Joshua Hahn, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, linux-kernel

Eliminates the custom memcg counter and results in a single,
consolidated accounting call in vmalloc code.

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/memcontrol.h |  1 -
 mm/memcontrol.c            |  4 ++--
 mm/vmalloc.c               | 16 ++++------------
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 67f154de10bc..c7cc4e50e59a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,6 @@ enum memcg_stat_item {
 	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
-	MEMCG_VMALLOC,
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 129eed3ff5bb..fef5bdd887e0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -317,6 +317,7 @@ static const unsigned int memcg_node_stat_items[] = {
 	NR_SHMEM_THPS,
 	NR_FILE_THPS,
 	NR_ANON_THPS,
+	NR_VMALLOC,
 	NR_KERNEL_STACK_KB,
 	NR_PAGETABLE,
 	NR_SECONDARY_PAGETABLE,
@@ -339,7 +340,6 @@ static const unsigned int memcg_stat_items[] = {
 	MEMCG_SWAP,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
-	MEMCG_VMALLOC,
 	MEMCG_KMEM,
 	MEMCG_ZSWAP_B,
 	MEMCG_ZSWAPPED,
@@ -1359,7 +1359,7 @@ static const struct memory_stat memory_stats[] = {
 	{ "sec_pagetables",		NR_SECONDARY_PAGETABLE		},
 	{ "percpu",			MEMCG_PERCPU_B			},
 	{ "sock",			MEMCG_SOCK			},
-	{ "vmalloc",			MEMCG_VMALLOC			},
+	{ "vmalloc",			NR_VMALLOC			},
 	{ "shmem",			NR_SHMEM			},
 #ifdef CONFIG_ZSWAP
 	{ "zswap",			MEMCG_ZSWAP_B			},
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a5fc7795aafd..8773bc0c4734 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3446,9 +3446,6 @@ void vfree(const void *addr)
 
 	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
 		vm_reset_perms(vm);
-	/* All pages of vm should be charged to same memcg, so use first one. */
-	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
-		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
 	for (i = 0; i < vm->nr_pages; i++) {
 		struct page *page = vm->pages[i];
 
@@ -3458,7 +3455,7 @@ void vfree(const void *addr)
 		 * can be freed as an array of order-0 allocations
 		 */
 		if (!(vm->flags & VM_MAP_PUT_PAGES))
-			dec_node_page_state(page, NR_VMALLOC);
+			mod_lruvec_page_state(page, NR_VMALLOC, -1);
 		__free_page(page);
 		cond_resched();
 	}
@@ -3649,7 +3646,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			continue;
 		}
 
-		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << large_order);
+		mod_lruvec_page_state(page, NR_VMALLOC, 1 << large_order);
 
 		split_page(page, large_order);
 		for (i = 0; i < (1U << large_order); i++)
@@ -3696,7 +3693,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 							pages + nr_allocated);
 
 			for (i = nr_allocated; i < nr_allocated + nr; i++)
-				inc_node_page_state(pages[i], NR_VMALLOC);
+				mod_lruvec_page_state(pages[i], NR_VMALLOC, 1);
 
 			nr_allocated += nr;
 
@@ -3722,7 +3719,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 		if (unlikely(!page))
 			break;
 
-		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << order);
+		mod_lruvec_page_state(page, NR_VMALLOC, 1 << order);
 
 		/*
 		 * High-order allocations must be able to be treated as
@@ -3866,11 +3863,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			vmalloc_gfp_adjust(gfp_mask, page_order), node,
 			page_order, nr_small_pages, area->pages);
 
-	/* All pages of vm should be charged to same memcg, so use first one. */
-	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
-		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
-				     area->nr_pages);
-
 	/*
 	 * If not enough pages were obtained to accomplish an
 	 * allocation request, free them via vfree() if any.
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting
  2026-02-23 16:01 [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Johannes Weiner
  2026-02-23 16:01 ` [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter Johannes Weiner
@ 2026-02-23 19:22 ` Roman Gushchin
  2026-02-23 19:48 ` Vishal Moola (Oracle)
  2026-06-19 12:53 ` [REGRESSION] " Aishwarya Rambhadran
  3 siblings, 0 replies; 7+ messages in thread
From: Roman Gushchin @ 2026-02-23 19:22 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, Uladzislau Rezki, Joshua Hahn, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, linux-kernel

Johannes Weiner <hannes@cmpxchg.org> writes:

> Use a vmstat counter instead of a custom, open-coded atomic. This has
> the added benefit of making the data available per-node, and prepares
> for cleaning up the memcg accounting as well.
>
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter
  2026-02-23 16:01 ` [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter Johannes Weiner
@ 2026-02-23 19:24   ` Roman Gushchin
  2026-02-23 19:48   ` Vishal Moola (Oracle)
  1 sibling, 0 replies; 7+ messages in thread
From: Roman Gushchin @ 2026-02-23 19:24 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, Uladzislau Rezki, Joshua Hahn, Michal Hocko,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, linux-kernel

Johannes Weiner <hannes@cmpxchg.org> writes:

> Eliminates the custom memcg counter and results in a single,
> consolidated accounting call in vmalloc code.
>
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Nice series!

Thanks

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting
  2026-02-23 16:01 [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Johannes Weiner
  2026-02-23 16:01 ` [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter Johannes Weiner
  2026-02-23 19:22 ` [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Roman Gushchin
@ 2026-02-23 19:48 ` Vishal Moola (Oracle)
  2026-06-19 12:53 ` [REGRESSION] " Aishwarya Rambhadran
  3 siblings, 0 replies; 7+ messages in thread
From: Vishal Moola (Oracle) @ 2026-02-23 19:48 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, Uladzislau Rezki, Joshua Hahn, Michal Hocko,
	Roman Gushchin, Shakeel Butt, Muchun Song, linux-mm, cgroups,
	linux-kernel

On Mon, Feb 23, 2026 at 11:01:06AM -0500, Johannes Weiner wrote:
> Use a vmstat counter instead of a custom, open-coded atomic. This has
> the added benefit of making the data available per-node, and prepares
> for cleaning up the memcg accounting as well.
> 
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter
  2026-02-23 16:01 ` [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter Johannes Weiner
  2026-02-23 19:24   ` Roman Gushchin
@ 2026-02-23 19:48   ` Vishal Moola (Oracle)
  1 sibling, 0 replies; 7+ messages in thread
From: Vishal Moola (Oracle) @ 2026-02-23 19:48 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, Uladzislau Rezki, Joshua Hahn, Michal Hocko,
	Roman Gushchin, Shakeel Butt, Muchun Song, linux-mm, cgroups,
	linux-kernel

On Mon, Feb 23, 2026 at 11:01:07AM -0500, Johannes Weiner wrote:
> Eliminates the custom memcg counter and results in a single,
> consolidated accounting call in vmalloc code.
> 
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Reviewed-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [REGRESSION] [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting
  2026-02-23 16:01 [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Johannes Weiner
                   ` (2 preceding siblings ...)
  2026-02-23 19:48 ` Vishal Moola (Oracle)
@ 2026-06-19 12:53 ` Aishwarya Rambhadran
  3 siblings, 0 replies; 7+ messages in thread
From: Aishwarya Rambhadran @ 2026-06-19 12:53 UTC (permalink / raw)
  To: Johannes Weiner, Andrew Morton
  Cc: Uladzislau Rezki, Joshua Hahn, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, linux-kernel,
	Ryan Roberts

Hi Johannes,

We have observed kernel performance regressions in vmalloc benchmarks
when comparing v7.0 mainline results against later releases in the v7.1
cycle.
The regressions were detected by Fastpath, our automated kernel
performance benchmark and regression tracking framework.
Independent bisections on multiple arm64 systems consistently
identify this patch as the root cause. The regressions are reproducible
on both AWS Graviton3 & AmpereOne systems.

Fastpath bisection details :
Benchmark - micromm/vmalloc
Test - fix_size_alloc_test: p:512, h:1, l:100000
Good Kernel - v7.0
Bad Kernel - v7.1-rc4

The measured regression for the above test is approximately 32.5%
on AWS Graviton3. Similar regressions are observed across multiple
tests within the vmalloc benchmark suite as well as on AmpereOne.

Below given are the performance benchmark results of vmalloc
suite generated by Fastpath Tool, for v7.1 kernel version relative to
the base version v7.0, executed on the AWS Graviton3 SUT. Label (R)
mean statistically significant regression, where "statistically 
significant"
means the 95% confidence intervals do not overlap.

v7.0 (base) | v7.1
-------------------------------------------------------------------
fix_align_alloc_test: p:1, h:0, l:500000
895106.67 | (R) -10.73%

fix_size_alloc_test: p:1, h:0, l:500000
336785.00 | (R) -7.31%

fix_size_alloc_test: p:4, h:0, l:500000
529652.83 | (R) -13.11%

fix_size_alloc_test: p:16, h:0, l:500000
1043412.50 | (R) -21.92%

fix_size_alloc_test: p:16, h:1, l:500000
1015795.83 | (R) -22.02%

fix_size_alloc_test: p:64, h:0, l:100000
643074.33 | (R) -25.91%

fix_size_alloc_test: p:64, h:1, l:100000
607604.00 | (R) -27.31%

fix_size_alloc_test: p:256, h:0, l:100000
2367906.50 | (R) -27.67%

fix_size_alloc_test: p:256, h:1, l:100000
2275464.67 | (R) -28.66%

fix_size_alloc_test: p:512, h:0, l:100000
4696069.17 | (R) -28.15%

fix_size_alloc_test: p:512, h:1, l:100000
3767292.00 | (R) -32.65%

full_fit_alloc_test: p:1, h:0, l:500000
493884.17 | (R) -12.38%

kvfree_rcu_1_arg_vmalloc_test: p:1, h:0, l:500000
354542.83 | -2.31%

kvfree_rcu_2_arg_vmalloc_test: p:1, h:0, l:500000
358082.83 | -1.53%

long_busy_list_alloc_test: p:1, h:0, l:500000
5490101.33 | (R) -25.85%

pcpu_alloc_test: p:1, h:0, l:500000
193634.00 | -1.53%

random_size_align_alloc_test: p:1, h:0, l:500000
1200206.83 | (R) -11.88%

random_size_alloc_test: p:1, h:0, l:500000
2875736.33 | (R) -24.41%

vm_map_ram_test: p:1, h:0, l:500000
81204.33 | -0.28%
-------------------------------------------------------------------

The regression signal appears stable across repeated runs.
Have you seen similar effects before, or is there an expected
behavioral change associated with the conversion from the
custom atomic accounting to vmstat counters that could
explain this result ?

We would be happy to provide additional performance data,
kernel configurations or any other details if useful.

Thank you.
Aishwarya Rambhadran

On 23/02/26 9:31 PM, Johannes Weiner wrote:
> Use a vmstat counter instead of a custom, open-coded atomic. This has
> the added benefit of making the data available per-node, and prepares
> for cleaning up the memcg accounting as well.
>
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>   fs/proc/meminfo.c       |  3 ++-
>   include/linux/mmzone.h  |  1 +
>   include/linux/vmalloc.h |  3 ---
>   mm/vmalloc.c            | 19 ++++++++++---------
>   mm/vmstat.c             |  1 +
>   5 files changed, 14 insertions(+), 13 deletions(-)
>
> V2:
> - Fix mod_node_page_state() pgdat argument (Shakeel)
>
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index a458f1e112fd..549793f44726 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -126,7 +126,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>   	show_val_kb(m, "Committed_AS:   ", committed);
>   	seq_printf(m, "VmallocTotal:   %8lu kB\n",
>   		   (unsigned long)VMALLOC_TOTAL >> 10);
> -	show_val_kb(m, "VmallocUsed:    ", vmalloc_nr_pages());
> +	show_val_kb(m, "VmallocUsed:    ",
> +		    global_node_page_state(NR_VMALLOC));
>   	show_val_kb(m, "VmallocChunk:   ", 0ul);
>   	show_val_kb(m, "Percpu:         ", pcpu_nr_pages());
>   
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index fc5d6c88d2f0..64df797d45c6 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -220,6 +220,7 @@ enum node_stat_item {
>   	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
>   	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
>   	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
> +	NR_VMALLOC,
>   	NR_KERNEL_STACK_KB,	/* measured in KiB */
>   #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
>   	NR_KERNEL_SCS_KB,	/* measured in KiB */
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index e8e94f90d686..3b02c0c6b371 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -286,8 +286,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb);
>   #ifdef CONFIG_MMU
>   #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
>   
> -unsigned long vmalloc_nr_pages(void);
> -
>   int vm_area_map_pages(struct vm_struct *area, unsigned long start,
>   		      unsigned long end, struct page **pages);
>   void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
> @@ -304,7 +302,6 @@ static inline void set_vm_flush_reset_perms(void *addr)
>   #else  /* !CONFIG_MMU */
>   #define VMALLOC_TOTAL 0UL
>   
> -static inline unsigned long vmalloc_nr_pages(void) { return 0; }
>   static inline void set_vm_flush_reset_perms(void *addr) {}
>   #endif /* CONFIG_MMU */
>   
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index e286c2d2068c..a5fc7795aafd 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1063,14 +1063,8 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
>   static void drain_vmap_area_work(struct work_struct *work);
>   static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
>   
> -static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
>   static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
>   
> -unsigned long vmalloc_nr_pages(void)
> -{
> -	return atomic_long_read(&nr_vmalloc_pages);
> -}
> -
>   static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
>   {
>   	struct rb_node *n = root->rb_node;
> @@ -3463,11 +3457,11 @@ void vfree(const void *addr)
>   		 * High-order allocs for huge vmallocs are split, so
>   		 * can be freed as an array of order-0 allocations
>   		 */
> +		if (!(vm->flags & VM_MAP_PUT_PAGES))
> +			dec_node_page_state(page, NR_VMALLOC);
>   		__free_page(page);
>   		cond_resched();
>   	}
> -	if (!(vm->flags & VM_MAP_PUT_PAGES))
> -		atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
>   	kvfree(vm->pages);
>   	kfree(vm);
>   }
> @@ -3655,6 +3649,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
>   			continue;
>   		}
>   
> +		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << large_order);
> +
>   		split_page(page, large_order);
>   		for (i = 0; i < (1U << large_order); i++)
>   			pages[nr_allocated + i] = page + i;
> @@ -3675,6 +3671,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
>   	if (!order) {
>   		while (nr_allocated < nr_pages) {
>   			unsigned int nr, nr_pages_request;
> +			int i;
>   
>   			/*
>   			 * A maximum allowed request is hard-coded and is 100
> @@ -3698,6 +3695,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
>   							nr_pages_request,
>   							pages + nr_allocated);
>   
> +			for (i = nr_allocated; i < nr_allocated + nr; i++)
> +				inc_node_page_state(pages[i], NR_VMALLOC);
> +
>   			nr_allocated += nr;
>   
>   			/*
> @@ -3722,6 +3722,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
>   		if (unlikely(!page))
>   			break;
>   
> +		mod_node_page_state(page_pgdat(page), NR_VMALLOC, 1 << order);
> +
>   		/*
>   		 * High-order allocations must be able to be treated as
>   		 * independent small pages by callers (as they can with
> @@ -3864,7 +3866,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
>   			vmalloc_gfp_adjust(gfp_mask, page_order), node,
>   			page_order, nr_small_pages, area->pages);
>   
> -	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
>   	/* All pages of vm should be charged to same memcg, so use first one. */
>   	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
>   		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index d6e814c82952..bc199c7cd07b 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1270,6 +1270,7 @@ const char * const vmstat_text[] = {
>   	[I(NR_KERNEL_MISC_RECLAIMABLE)]		= "nr_kernel_misc_reclaimable",
>   	[I(NR_FOLL_PIN_ACQUIRED)]		= "nr_foll_pin_acquired",
>   	[I(NR_FOLL_PIN_RELEASED)]		= "nr_foll_pin_released",
> +	[I(NR_VMALLOC)]				= "nr_vmalloc",
>   	[I(NR_KERNEL_STACK_KB)]			= "nr_kernel_stack",
>   #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
>   	[I(NR_KERNEL_SCS_KB)]			= "nr_shadow_call_stack",


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-06-19 12:53 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-23 16:01 [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Johannes Weiner
2026-02-23 16:01 ` [PATCH v2 2/2] mm: memcontrol: switch to native NR_VMALLOC vmstat counter Johannes Weiner
2026-02-23 19:24   ` Roman Gushchin
2026-02-23 19:48   ` Vishal Moola (Oracle)
2026-02-23 19:22 ` [PATCH v2 1/2] mm: vmalloc: streamline vmalloc memory accounting Roman Gushchin
2026-02-23 19:48 ` Vishal Moola (Oracle)
2026-06-19 12:53 ` [REGRESSION] " Aishwarya Rambhadran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox