From: Alexandre Ghiti <alex@ghiti.fr>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Muchun Song <muchun.song@linux.dev>,
Dennis Zhou <dennis@kernel.org>, Tejun Heo <tj@kernel.org>,
Christoph Lameter <cl@gentwo.org>,
Vlastimil Babka <vbabka@kernel.org>,
Yosry Ahmed <yosry@kernel.org>, Nhat Pham <nphamcs@gmail.com>,
Sergey Senozhatsky <senozhatsky@chromium.org>,
Chengming Zhou <chengming.zhou@linux.dev>,
Suren Baghdasaryan <surenb@google.com>,
Qi Zheng <qi.zheng@linux.dev>,
David Hildenbrand <david@kernel.org>,
Lorenzo Stoakes <ljs@kernel.org>,
Minchan Kim <minchan@kernel.org>, Mike Rapoport <rppt@kernel.org>,
Axel Rasmussen <axelrasmussen@google.com>,
Barry Song <baohua@kernel.org>, Kairui Song <kasong@tencent.com>,
Wei Xu <weixugc@google.com>, Yuanchu Xie <yuanchu@google.com>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Joshua Hahn <joshua.hahnjy@gmail.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
cgroups@vger.kernel.org, Alexandre Ghiti <alex@ghiti.fr>
Subject: [PATCH 4/8] mm: memcontrol: track MEMCG_KMEM per NUMA node
Date: Mon, 11 May 2026 22:20:39 +0200 [thread overview]
Message-ID: <20260511202136.330358-5-alex@ghiti.fr> (raw)
In-Reply-To: <20260511202136.330358-1-alex@ghiti.fr>
This patch gets rid of MEMCG_KMEM and wires all the "generic" functions
by introducing per-node obj_cgroup objects.
Note that it does not convert the kmem users to proper per-memcg-per-node
accounting now, this is done in upcoming patches.
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
---
include/linux/memcontrol.h | 23 ++++++++++----
include/linux/mmzone.h | 1 +
mm/memcontrol.c | 64 ++++++++++++++++++++++++--------------
mm/vmstat.c | 1 +
4 files changed, 59 insertions(+), 30 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 568ab08f42af..17cf823160e4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,6 @@ enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK,
MEMCG_PERCPU_B,
- MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
MEMCG_ZSWAP_INCOMP,
@@ -126,9 +125,10 @@ struct mem_cgroup_per_node {
struct list_head objcg_list;
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
- /* slab stats for nmi context */
+ /* slab and kmem stats for nmi context */
atomic_t slab_reclaimable;
atomic_t slab_unreclaimable;
+ atomic_t kmem;
#endif
};
@@ -190,6 +190,7 @@ struct obj_cgroup {
struct rcu_head rcu;
};
bool is_root;
+ int nid;
};
/*
@@ -254,10 +255,6 @@ struct mem_cgroup {
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS];
-#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
- /* MEMCG_KMEM for nmi context */
- atomic_t kmem_stat;
-#endif
/*
* Hint of reclaim pressure for socket memroy management. Note
* that this indicator should NOT be used in legacy cgroup mode
@@ -776,6 +773,20 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
percpu_ref_put(&objcg->refcnt);
}
+static inline struct obj_cgroup *obj_cgroup_get_nid(struct obj_cgroup *objcg,
+ int nid)
+{
+ struct obj_cgroup *nid_objcg;
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ nid_objcg = rcu_dereference(memcg->nodeinfo[nid]->objcg);
+ rcu_read_unlock();
+
+ return nid_objcg;
+}
+
static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
{
return !memcg || css_tryget(&memcg->css);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9adb2ad21da5..97eb168fd7f3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -326,6 +326,7 @@ enum node_stat_item {
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
+ NR_KMEM,
NR_BALLOON_PAGES,
NR_KERNEL_FILE_PAGES,
NR_GPU_ACTIVE, /* Pages assigned to GPU objects */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index aaaa6a8b9f15..979a847e542a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -136,6 +136,7 @@ bool mem_cgroup_kmem_disabled(void)
}
static void memcg_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
+static void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val);
static void obj_cgroup_release(struct percpu_ref *ref)
{
@@ -170,9 +171,11 @@ static void obj_cgroup_release(struct percpu_ref *ref)
if (nr_pages) {
struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
memcg = get_mem_cgroup_from_objcg(objcg);
- mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages);
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(objcg->nid));
+ mod_lruvec_state(lruvec, NR_KMEM, -nr_pages);
memcg1_account_kmem(memcg, -nr_pages);
if (!mem_cgroup_is_root(memcg))
memcg_uncharge(memcg, nr_pages);
@@ -423,13 +426,13 @@ static const unsigned int memcg_node_stat_items[] = {
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
+ NR_KMEM,
};
static const unsigned int memcg_stat_items[] = {
MEMCG_SWAP,
MEMCG_SOCK,
MEMCG_PERCPU_B,
- MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
MEMCG_ZSWAP_INCOMP,
@@ -1537,7 +1540,7 @@ struct memory_stat {
static const struct memory_stat memory_stats[] = {
{ "anon", NR_ANON_MAPPED },
{ "file", NR_FILE_PAGES },
- { "kernel", MEMCG_KMEM },
+ { "kernel", NR_KMEM },
{ "kernel_stack", NR_KERNEL_STACK_KB },
{ "pagetables", NR_PAGETABLE },
{ "sec_pagetables", NR_SECONDARY_PAGETABLE },
@@ -3004,20 +3007,26 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
}
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
-static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
+static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int nid, int val)
{
if (likely(!in_nmi())) {
- mod_memcg_state(memcg, MEMCG_KMEM, val);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+
+ mod_lruvec_state(lruvec, NR_KMEM, val);
} else {
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+
/* preemption is disabled in_nmi(). */
css_rstat_updated(&memcg->css, smp_processor_id());
- atomic_add(val, &memcg->kmem_stat);
+ atomic_add(val, &pn->kmem);
}
}
#else
-static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
+static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int nid, int val)
{
- mod_memcg_state(memcg, MEMCG_KMEM, val);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+
+ mod_lruvec_state(lruvec, NR_KMEM, val);
}
#endif
@@ -3033,7 +3042,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
memcg = get_mem_cgroup_from_objcg(objcg);
- account_kmem_nmi_safe(memcg, -nr_pages);
+ account_kmem_nmi_safe(memcg, objcg->nid, -nr_pages);
memcg1_account_kmem(memcg, -nr_pages);
if (!mem_cgroup_is_root(memcg))
refill_stock(memcg, nr_pages);
@@ -3061,7 +3070,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
if (ret)
goto out;
- account_kmem_nmi_safe(memcg, nr_pages);
+ account_kmem_nmi_safe(memcg, objcg->nid, nr_pages);
memcg1_account_kmem(memcg, nr_pages);
out:
css_put(&memcg->css);
@@ -3238,10 +3247,11 @@ static void drain_obj_stock(struct obj_stock_pcp *stock)
if (nr_pages) {
struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
memcg = get_mem_cgroup_from_objcg(old);
-
- mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages);
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(old->nid));
+ mod_lruvec_state(lruvec, NR_KMEM, -nr_pages);
memcg1_account_kmem(memcg, -nr_pages);
if (!mem_cgroup_is_root(memcg))
memcg_uncharge(memcg, nr_pages);
@@ -3250,7 +3260,7 @@ static void drain_obj_stock(struct obj_stock_pcp *stock)
}
/*
- * The leftover is flushed to the centralized per-memcg value.
+ * The leftover is flushed to the per-node per-memcg value.
* On the next attempt to refill obj stock it will be moved
* to a per-cpu stock (probably, on an other CPU), see
* refill_obj_stock().
@@ -3417,7 +3427,7 @@ void obj_cgroup_account_kmem(struct obj_cgroup *objcg, unsigned int nr_pages)
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
- account_kmem_nmi_safe(memcg, nr_pages);
+ account_kmem_nmi_safe(memcg, objcg->nid, nr_pages);
memcg1_account_kmem(memcg, nr_pages);
rcu_read_unlock();
}
@@ -4165,6 +4175,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (unlikely(mem_cgroup_is_root(memcg)))
objcg->is_root = true;
+ objcg->nid = nid;
objcg->memcg = memcg;
rcu_assign_pointer(memcg->nodeinfo[nid]->objcg, objcg);
obj_cgroup_get(objcg);
@@ -4369,15 +4380,6 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
{
int nid;
- if (atomic_read(&memcg->kmem_stat)) {
- int kmem = atomic_xchg(&memcg->kmem_stat, 0);
- int index = memcg_stats_index(MEMCG_KMEM);
-
- memcg->vmstats->state[index] += kmem;
- if (parent)
- parent->vmstats->state_pending[index] += kmem;
- }
-
for_each_node_state(nid, N_MEMORY) {
struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
struct lruvec_stats *lstats = pn->lruvec_stats;
@@ -4408,6 +4410,18 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
if (parent)
parent->vmstats->state_pending[index] += slab;
}
+ if (atomic_read(&pn->kmem)) {
+ int kmem = atomic_xchg(&pn->kmem, 0);
+ int index = memcg_stats_index(NR_KMEM);
+
+ mod_node_page_state(NODE_DATA(nid), NR_KMEM, kmem);
+ lstats->state[index] += kmem;
+ memcg->vmstats->state[index] += kmem;
+ if (plstats)
+ plstats->state_pending[index] += kmem;
+ if (parent)
+ parent->vmstats->state_pending[index] += kmem;
+ }
}
}
#else
@@ -5173,7 +5187,9 @@ static void uncharge_batch(const struct uncharge_gather *ug)
if (ug->nr_memory) {
memcg_uncharge(memcg, ug->nr_memory);
if (ug->nr_kmem) {
- mod_memcg_state(memcg, MEMCG_KMEM, -ug->nr_kmem);
+ struct lruvec *lruvec =
+ mem_cgroup_lruvec(memcg, NODE_DATA(ug->objcg->nid));
+ mod_lruvec_state(lruvec, NR_KMEM, -ug->nr_kmem);
memcg1_account_kmem(memcg, -ug->nr_kmem);
}
memcg1_oom_recover(memcg);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f534972f517d..d55437d1852e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1293,6 +1293,7 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_HUGETLB_PAGE
[I(NR_HUGETLB)] = "nr_hugetlb",
#endif
+ [I(NR_KMEM)] = "nr_kmem",
[I(NR_BALLOON_PAGES)] = "nr_balloon_pages",
[I(NR_KERNEL_FILE_PAGES)] = "nr_kernel_file_pages",
[I(NR_GPU_ACTIVE)] = "nr_gpu_active",
--
2.54.0
next prev parent reply other threads:[~2026-05-11 20:26 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 20:20 [PATCH 0/8] per-memcg-per-node kmem accounting Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 1/8] mm: memcontrol: propagate NMI slab stats to memcg vmstats Alexandre Ghiti
2026-05-11 22:49 ` Shakeel Butt
2026-05-11 20:20 ` [PATCH 2/8] mm: percpu: charge obj_exts allocation with __GFP_ACCOUNT Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 3/8] mm: percpu: Split memcg charging and kmem accounting Alexandre Ghiti
2026-05-11 20:20 ` Alexandre Ghiti [this message]
2026-05-11 20:20 ` [PATCH 5/8] mm: memcontrol: per-node kmem accounting for page charges Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 6/8] mm: slab: per-node kmem accounting for slab Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 7/8] mm: percpu: per-node kmem accounting using local credit Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 8/8] mm: zswap: per-node kmem accounting for zswap/zsmalloc Alexandre Ghiti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260511202136.330358-5-alex@ghiti.fr \
--to=alex@ghiti.fr \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=baohua@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=cl@gentwo.org \
--cc=david@kernel.org \
--cc=dennis@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=joshua.hahnjy@gmail.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=mhocko@kernel.org \
--cc=minchan@kernel.org \
--cc=muchun.song@linux.dev \
--cc=nphamcs@gmail.com \
--cc=qi.zheng@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=rppt@kernel.org \
--cc=senozhatsky@chromium.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=tj@kernel.org \
--cc=vbabka@kernel.org \
--cc=weixugc@google.com \
--cc=yosry@kernel.org \
--cc=yuanchu@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox