* [PATCH] mm/percpu, memcontrol: Per-memcg-lruvec percpu accounting
@ 2026-03-27 19:19 Joshua Hahn
2026-03-30 12:03 ` Michal Hocko
0 siblings, 1 reply; 2+ messages in thread
From: Joshua Hahn @ 2026-03-27 19:19 UTC (permalink / raw)
To: Johannes Weiner, Andrew Morton
Cc: Michal Hocko, Roman Gushchin, Shakeel Butt, Muchun Song,
David Hildenbrand, Lorenzo Stoakes, Vlastimil Babka, Dennis Zhou,
Tejun Heo, Christoph Lameter, cgroups, linux-mm, linux-kernel,
kernel-team
Convert MEMCG_PERCPU_B from a memcg_stat_item to a memcg_node_stat_item
to give visibility into per-node breakdowns for percpu allocations and
turn it into NR_PERCPU_B.
Because percpu memory is accounted at a sub-PAGE_SIZE level, we must
account node level statistics (accounted in PAGE_SIZE units) and
memcg-lruvec statistics separately. Account node statistics when the pcpu
pages are allocated, and account memcg-lruvec statistics when pcpu
objects are handed out.
To do account these separately, expose mod_memcg_lruvec_state to be
used outside of memcontrol.
One functional change is that we do not account the 8 byte objcg
pointer per-memcg-lruvec. Since the objcg membership is tracked
per-memcg and not percpu, there is no appropriate lruvec to charge this
memory to (see pcpu_obj_full_size). Instead of adding additional
mechanisms to detect which lruvec the 8 byte pointer belongs to, let's
just simplify and account the pcpu objects' size.
Limit-checking is still done with the additional 8 bytes.
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
include/linux/memcontrol.h | 4 +++-
include/linux/mmzone.h | 4 +++-
mm/memcontrol.c | 12 ++++++------
mm/percpu-vm.c | 14 ++++++++++++--
mm/percpu.c | 24 ++++++++++++++++++++----
mm/vmstat.c | 1 +
6 files changed, 45 insertions(+), 14 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 086158969529..96dae769c60d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -34,7 +34,6 @@ struct kmem_cache;
enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK,
- MEMCG_PERCPU_B,
MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
@@ -909,6 +908,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
struct mem_cgroup *oom_domain);
void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
+void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
+
/* idx can be of type enum memcg_stat_item or node_stat_item */
void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7bd0134c241c..e38d8fe8552b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -328,6 +328,7 @@ enum node_stat_item {
#endif
NR_BALLOON_PAGES,
NR_KERNEL_FILE_PAGES,
+ NR_PERCPU_B,
NR_VM_NODE_STAT_ITEMS
};
@@ -365,7 +366,8 @@ static __always_inline bool vmstat_item_in_bytes(int idx)
* byte-precise.
*/
return (idx == NR_SLAB_RECLAIMABLE_B ||
- idx == NR_SLAB_UNRECLAIMABLE_B);
+ idx == NR_SLAB_UNRECLAIMABLE_B ||
+ idx == NR_PERCPU_B);
}
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a47fb68dd65f..b320b6a42696 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -377,6 +377,7 @@ static const unsigned int memcg_node_stat_items[] = {
NR_UNEVICTABLE,
NR_SLAB_RECLAIMABLE_B,
NR_SLAB_UNRECLAIMABLE_B,
+ NR_PERCPU_B,
WORKINGSET_REFAULT_ANON,
WORKINGSET_REFAULT_FILE,
WORKINGSET_ACTIVATE_ANON,
@@ -428,7 +429,6 @@ static const unsigned int memcg_node_stat_items[] = {
static const unsigned int memcg_stat_items[] = {
MEMCG_SWAP,
MEMCG_SOCK,
- MEMCG_PERCPU_B,
MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
@@ -920,9 +920,8 @@ static void __mod_memcg_lruvec_state(struct mem_cgroup_per_node *pn,
put_cpu();
}
-static void mod_memcg_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx,
- int val)
+void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct mem_cgroup_per_node *pn;
@@ -936,6 +935,7 @@ static void mod_memcg_lruvec_state(struct lruvec *lruvec,
get_non_dying_memcg_end();
}
+EXPORT_SYMBOL(mod_memcg_lruvec_state);
/**
* mod_lruvec_state - update lruvec memory statistics
@@ -1535,7 +1535,7 @@ static const struct memory_stat memory_stats[] = {
{ "kernel_stack", NR_KERNEL_STACK_KB },
{ "pagetables", NR_PAGETABLE },
{ "sec_pagetables", NR_SECONDARY_PAGETABLE },
- { "percpu", MEMCG_PERCPU_B },
+ { "percpu", NR_PERCPU_B },
{ "sock", MEMCG_SOCK },
{ "vmalloc", NR_VMALLOC },
{ "shmem", NR_SHMEM },
@@ -1597,7 +1597,7 @@ static const struct memory_stat memory_stats[] = {
static int memcg_page_state_unit(int item)
{
switch (item) {
- case MEMCG_PERCPU_B:
+ case NR_PERCPU_B:
case MEMCG_ZSWAP_B:
case NR_SLAB_RECLAIMABLE_B:
case NR_SLAB_UNRECLAIMABLE_B:
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 4f5937090590..e36b639f521d 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -55,7 +55,8 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
struct page **pages, int page_start, int page_end)
{
unsigned int cpu;
- int i;
+ int nr_pages = page_end - page_start;
+ int i, nid;
for_each_possible_cpu(cpu) {
for (i = page_start; i < page_end; i++) {
@@ -65,6 +66,10 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
__free_page(page);
}
}
+
+ for_each_node(nid)
+ mod_node_page_state(NODE_DATA(nid), NR_PERCPU_B,
+ -1L * nr_pages * nr_cpus_node(nid) * PAGE_SIZE);
}
/**
@@ -84,7 +89,8 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
gfp_t gfp)
{
unsigned int cpu, tcpu;
- int i;
+ int nr_pages = page_end - page_start;
+ int i, nid;
gfp |= __GFP_HIGHMEM;
@@ -97,6 +103,10 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
goto err;
}
}
+
+ for_each_node(nid)
+ mod_node_page_state(NODE_DATA(nid), NR_PERCPU_B,
+ nr_pages * nr_cpus_node(nid) * PAGE_SIZE);
return 0;
err:
diff --git a/mm/percpu.c b/mm/percpu.c
index b0676b8054ed..4ad3b9739eb9 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1632,6 +1632,24 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
return true;
}
+static void pcpu_mod_memcg_lruvec(struct obj_cgroup *objcg, int charge)
+{
+ struct mem_cgroup *memcg;
+ int nid;
+
+ memcg = obj_cgroup_memcg(objcg);
+ for_each_node(nid) {
+ struct lruvec *lruvec;
+ unsigned int nr_cpus = nr_cpus_node(nid);
+
+ if (!nr_cpus)
+ continue;
+
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+ mod_memcg_lruvec_state(lruvec, NR_PERCPU_B, nr_cpus * charge);
+ }
+}
+
static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
struct pcpu_chunk *chunk, int off,
size_t size)
@@ -1644,8 +1662,7 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg;
rcu_read_lock();
- mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- pcpu_obj_full_size(size));
+ pcpu_mod_memcg_lruvec(objcg, size);
rcu_read_unlock();
} else {
obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
@@ -1667,8 +1684,7 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
rcu_read_lock();
- mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- -pcpu_obj_full_size(size));
+ pcpu_mod_memcg_lruvec(objcg, -size);
rcu_read_unlock();
obj_cgroup_put(objcg);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b33097ab9bc8..d73c3355be71 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1296,6 +1296,7 @@ const char * const vmstat_text[] = {
#endif
[I(NR_BALLOON_PAGES)] = "nr_balloon_pages",
[I(NR_KERNEL_FILE_PAGES)] = "nr_kernel_file_pages",
+ [I(NR_PERCPU_B)] = "nr_percpu",
#undef I
/* system-wide enum vm_stat_item counters */
--
2.52.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] mm/percpu, memcontrol: Per-memcg-lruvec percpu accounting
2026-03-27 19:19 [PATCH] mm/percpu, memcontrol: Per-memcg-lruvec percpu accounting Joshua Hahn
@ 2026-03-30 12:03 ` Michal Hocko
0 siblings, 0 replies; 2+ messages in thread
From: Michal Hocko @ 2026-03-30 12:03 UTC (permalink / raw)
To: Joshua Hahn
Cc: Johannes Weiner, Andrew Morton, Roman Gushchin, Shakeel Butt,
Muchun Song, David Hildenbrand, Lorenzo Stoakes, Vlastimil Babka,
Dennis Zhou, Tejun Heo, Christoph Lameter, cgroups, linux-mm,
linux-kernel, kernel-team
On Fri 27-03-26 12:19:35, Joshua Hahn wrote:
> Convert MEMCG_PERCPU_B from a memcg_stat_item to a memcg_node_stat_item
> to give visibility into per-node breakdowns for percpu allocations and
> turn it into NR_PERCPU_B.
Why do we need/want this?
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-03-30 12:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-27 19:19 [PATCH] mm/percpu, memcontrol: Per-memcg-lruvec percpu accounting Joshua Hahn
2026-03-30 12:03 ` Michal Hocko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox