From: Alexandre Ghiti <alex@ghiti.fr>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Muchun Song <muchun.song@linux.dev>,
Dennis Zhou <dennis@kernel.org>, Tejun Heo <tj@kernel.org>,
Christoph Lameter <cl@gentwo.org>,
Vlastimil Babka <vbabka@kernel.org>,
Yosry Ahmed <yosry@kernel.org>, Nhat Pham <nphamcs@gmail.com>,
Sergey Senozhatsky <senozhatsky@chromium.org>,
Chengming Zhou <chengming.zhou@linux.dev>,
Suren Baghdasaryan <surenb@google.com>,
Qi Zheng <qi.zheng@linux.dev>,
David Hildenbrand <david@kernel.org>,
Lorenzo Stoakes <ljs@kernel.org>,
Minchan Kim <minchan@kernel.org>, Mike Rapoport <rppt@kernel.org>,
Axel Rasmussen <axelrasmussen@google.com>,
Barry Song <baohua@kernel.org>, Kairui Song <kasong@tencent.com>,
Wei Xu <weixugc@google.com>, Yuanchu Xie <yuanchu@google.com>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Joshua Hahn <joshua.hahnjy@gmail.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
cgroups@vger.kernel.org, Alexandre Ghiti <alex@ghiti.fr>
Subject: [PATCH 7/8] mm: percpu: per-node kmem accounting using local credit
Date: Mon, 11 May 2026 22:20:42 +0200 [thread overview]
Message-ID: <20260511202136.330358-8-alex@ghiti.fr> (raw)
In-Reply-To: <20260511202136.330358-1-alex@ghiti.fr>
Now that the memcg charging is decoupled from the kmem accounting, we
can't use obj_stock to handle the percpu accounting because our
precharged pages may get drained. That's a problem because we suppose
we have enough charged pages in pcpu_memcg_post_alloc_hook() and we cannot
charge more pages here because it may fail and would defeat the purpose of
the precharge.
So instead of using obj_stock, use a local per-node credit that fills
the same purpose whose surplus eventually gets refilled into the
stock.
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
---
mm/percpu.c | 88 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 76 insertions(+), 12 deletions(-)
diff --git a/mm/percpu.c b/mm/percpu.c
index 7c67dc2e4878..64b327fe3c26 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1614,6 +1614,16 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
}
#ifdef CONFIG_MEMCG
+static unsigned int pcpu_memcg_nr_precharge_pages(size_t size)
+{
+ size_t total = pcpu_obj_total_size(size);
+
+ if (total < PAGE_SIZE)
+ return num_possible_nodes();
+
+ return PAGE_ALIGN(total) >> PAGE_SHIFT;
+}
+
static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
struct obj_cgroup **objcgp)
{
@@ -1626,8 +1636,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
if (!objcg || obj_cgroup_is_root(objcg))
return true;
- if (obj_cgroup_precharge(objcg, gfp,
- PAGE_ALIGN(pcpu_obj_total_size(size)) >> PAGE_SHIFT))
+ if (obj_cgroup_precharge(objcg, gfp, pcpu_memcg_nr_precharge_pages(size)))
return false;
*objcgp = objcg;
@@ -1642,29 +1651,68 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
return;
if (likely(chunk && chunk->obj_exts)) {
- size_t total = pcpu_obj_total_size(size);
- size_t remainder = PAGE_ALIGN(total) - total;
+ unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned int precharge_pages = pcpu_memcg_nr_precharge_pages(size);
+ unsigned int pages_used = 0;
+ unsigned int node_credit[MAX_NUMNODES] = { 0 };
+ unsigned int cpu;
+ int nid;
obj_cgroup_get(objcg);
chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg;
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
- total);
+ pcpu_obj_total_size(size));
rcu_read_unlock();
- obj_cgroup_account_kmem(objcg, PAGE_ALIGN(total) >> PAGE_SHIFT);
- if (remainder)
- obj_cgroup_uncharge(objcg, remainder);
+ for_each_possible_cpu(cpu) {
+ unsigned int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ void *addr = (void *)pcpu_chunk_addr(chunk, cpu,
+ PFN_DOWN(off) + i);
+ size_t page_sz = i < nr_pages - 1 ?
+ PAGE_SIZE : size - (nr_pages - 1) * PAGE_SIZE;
+
+ nid = page_to_nid(pcpu_addr_to_page(addr));
+
+ if (node_credit[nid] < page_sz) {
+ struct obj_cgroup *nid_objcg;
+
+ nid_objcg = obj_cgroup_get_nid(objcg, nid);
+ obj_cgroup_account_kmem(nid_objcg, 1);
+ node_credit[nid] += PAGE_SIZE;
+ pages_used++;
+ }
+
+ node_credit[nid] -= page_sz;
+ }
+ }
+
+ /* Return unused precharged pages */
+ if (pages_used < precharge_pages)
+ obj_cgroup_unprecharge(objcg, precharge_pages - pages_used);
+
+ /* Put leftover per-node credit into stock */
+ for_each_online_node(nid) {
+ if (node_credit[nid] > 0) {
+ struct obj_cgroup *nid_objcg;
+
+ nid_objcg = obj_cgroup_get_nid(objcg, nid);
+ obj_cgroup_uncharge(nid_objcg, node_credit[nid]);
+ }
+ }
} else {
- obj_cgroup_unprecharge(objcg,
- PAGE_ALIGN(pcpu_obj_total_size(size)) >> PAGE_SHIFT);
+ obj_cgroup_unprecharge(objcg, pcpu_memcg_nr_precharge_pages(size));
}
}
static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
{
+ unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct obj_cgroup *objcg;
+ unsigned int cpu;
if (unlikely(!chunk->obj_exts))
return;
@@ -1674,13 +1722,29 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
return;
chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL;
- obj_cgroup_uncharge(objcg, pcpu_obj_total_size(size));
-
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
-pcpu_obj_total_size(size));
rcu_read_unlock();
+ for_each_possible_cpu(cpu) {
+ unsigned int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ void *addr = (void *)pcpu_chunk_addr(chunk, cpu,
+ PFN_DOWN(off) + i);
+ struct obj_cgroup *nid_objcg;
+ int nid;
+ size_t unc;
+
+ nid = page_to_nid(pcpu_addr_to_page(addr));
+ nid_objcg = obj_cgroup_get_nid(objcg, nid);
+ unc = i < nr_pages - 1 ?
+ PAGE_SIZE : size - (nr_pages - 1) * PAGE_SIZE;
+ obj_cgroup_uncharge(nid_objcg, unc);
+ }
+ }
+
obj_cgroup_put(objcg);
}
--
2.54.0
next prev parent reply other threads:[~2026-05-11 20:29 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 20:20 [PATCH 0/8] per-memcg-per-node kmem accounting Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 1/8] mm: memcontrol: propagate NMI slab stats to memcg vmstats Alexandre Ghiti
2026-05-11 22:49 ` Shakeel Butt
2026-05-11 20:20 ` [PATCH 2/8] mm: percpu: charge obj_exts allocation with __GFP_ACCOUNT Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 3/8] mm: percpu: Split memcg charging and kmem accounting Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 4/8] mm: memcontrol: track MEMCG_KMEM per NUMA node Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 5/8] mm: memcontrol: per-node kmem accounting for page charges Alexandre Ghiti
2026-05-11 20:20 ` [PATCH 6/8] mm: slab: per-node kmem accounting for slab Alexandre Ghiti
2026-05-11 20:20 ` Alexandre Ghiti [this message]
2026-05-11 20:20 ` [PATCH 8/8] mm: zswap: per-node kmem accounting for zswap/zsmalloc Alexandre Ghiti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260511202136.330358-8-alex@ghiti.fr \
--to=alex@ghiti.fr \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=baohua@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=cl@gentwo.org \
--cc=david@kernel.org \
--cc=dennis@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=joshua.hahnjy@gmail.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=mhocko@kernel.org \
--cc=minchan@kernel.org \
--cc=muchun.song@linux.dev \
--cc=nphamcs@gmail.com \
--cc=qi.zheng@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=rppt@kernel.org \
--cc=senozhatsky@chromium.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=tj@kernel.org \
--cc=vbabka@kernel.org \
--cc=weixugc@google.com \
--cc=yosry@kernel.org \
--cc=yuanchu@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox