From: Qi Zheng <qi.zheng@linux.dev>
To: akpm@linux-foundation.org, hannes@cmpxchg.org, mhocko@kernel.org,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
muchun.song@linux.dev, yosry@kernel.org
Cc: cgroups@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Qi Zheng <zhengqi.arch@bytedance.com>
Subject: [PATCH v3] mm: memcontrol: fix rcu unbalance in get_non_dying_memcg_end()
Date: Wed, 29 Apr 2026 15:31:05 +0800 [thread overview]
Message-ID: <20260429073105.44472-1-qi.zheng@linux.dev> (raw)
From: Qi Zheng <zhengqi.arch@bytedance.com>
Currently, get_non_dying_memcg_start() and get_non_dying_memcg_end() both
evaluate cgroup_subsys_on_dfl(memory_cgrp_subsys) independently to
determine whether to acquire or release the RCU read lock.
However, the result of cgroup_subsys_on_dfl() can change dynamically at
runtime due to cgroup hierarchy rebinding (e.g., when the memory
controller is moved between cgroup v1 and v2 hierarchies). This can cause
the following warning:
=====================================
WARNING: bad unlock balance detected!
7.0.0-next-20260420+ #83 Tainted: G W
-------------------------------------
memcg-repro/270 is trying to release lock (rcu_read_lock) at:
[<ffffffff815f57f7>] rcu_read_unlock+0x17/0x60
but there are no more locks to release!
other info that might help us debug this:
1 lock held by memcg-repro/270:
#0: ffff888102fa2088 (vm_lock){++++}-{0:0}, at: do_user_addr_fault+0x285/0x880
stack backtrace:
CPU: 0 UID: 0 PID: 270 Comm: memcg-repro Tainted: G W 7.0.0-next-20260420+ #
Tainted: [W]=WARN
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
Call Trace:
<TASK>
? rcu_read_unlock+0x17/0x60
dump_stack_lvl+0x77/0xb0
print_unlock_imbalance_bug+0xe0/0xf0
? rcu_read_unlock+0x17/0x60
lock_release+0x21d/0x2a0
rcu_read_unlock+0x1c/0x60
do_pte_missing+0x233/0xb40
__handle_mm_fault+0x80e/0xcd0
handle_mm_fault+0x146/0x310
do_user_addr_fault+0x303/0x880
exc_page_fault+0x9b/0x270
asm_exc_page_fault+0x26/0x30
RIP: 0033:0x5590e4eb41ea
Code: 61 cc 66 0f 6f e0 66 0f 61 c2 66 0f db cd 66 0f 69 e2 66 0f 6f d0 66 0f 69 d4 66 0f 61 0
RSP: 002b:00007ffcad25f030 EFLAGS: 00010202
RAX: 00005590e4eb8010 RBX: 00007ffcad260f7d RCX: 00007f73c474d44d
RDX: 00005590e4eb80a0 RSI: 00005590e4eb503c RDI: 000000000000000f
RBP: 00005590e4eb70a0 R08: 0000000000000000 R09: 00007f73c483a680
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007ffcad25f180 R14: 00005590e4eb6dd8 R15: 00007f73c4869020
</TASK>
------------[ cut here ]------------
Fix this by explicitly tracking the RCU lock state, ensuring that
rcu_read_unlock() in get_non_dying_memcg_end() is strictly paired with
the lock acquisition, regardless of any runtime rebinding events.
Fixes: 8285917d6f38 ("mm: memcontrol: prepare for reparenting non-hierarchical stats")
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
---
Changes in v3:
- fix -Werror=uninitialized (pointed by Sashiko)
- collect Reviewed-by
Changes in v2:
- remove unnessary rcu_locked setting under !CONFIG_MEMCG_V1
(pointed by Shakeel Butt)
- collect Acked-by
mm/memcontrol.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3d98ab41f1f1..c03d4787d4668 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -805,12 +805,17 @@ static long memcg_state_val_in_pages(int idx, long val)
* Used in mod_memcg_state() and mod_memcg_lruvec_state() to avoid race with
* reparenting of non-hierarchical state_locals.
*/
-static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg,
+ bool *rcu_locked)
{
- if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ /* Rebinding can cause this value to be changed at runtime */
+ if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+ *rcu_locked = false;
return memcg;
+ }
rcu_read_lock();
+ *rcu_locked = true;
while (memcg_is_dying(memcg))
memcg = parent_mem_cgroup(memcg);
@@ -818,20 +823,21 @@ static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *me
return memcg;
}
-static inline void get_non_dying_memcg_end(void)
+static inline void get_non_dying_memcg_end(bool rcu_locked)
{
- if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ if (!rcu_locked)
return;
rcu_read_unlock();
}
#else
-static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg,
+ bool *rcu_locked)
{
return memcg;
}
-static inline void get_non_dying_memcg_end(void)
+static inline void get_non_dying_memcg_end(bool rcu_locked)
{
}
#endif
@@ -865,12 +871,14 @@ static void __mod_memcg_state(struct mem_cgroup *memcg,
void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
int val)
{
+ bool rcu_locked = false;
+
if (mem_cgroup_disabled())
return;
- memcg = get_non_dying_memcg_start(memcg);
+ memcg = get_non_dying_memcg_start(memcg, &rcu_locked);
__mod_memcg_state(memcg, idx, val);
- get_non_dying_memcg_end();
+ get_non_dying_memcg_end(rcu_locked);
}
#ifdef CONFIG_MEMCG_V1
@@ -933,14 +941,15 @@ static void mod_memcg_lruvec_state(struct lruvec *lruvec,
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct mem_cgroup_per_node *pn;
struct mem_cgroup *memcg;
+ bool rcu_locked = false;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- memcg = get_non_dying_memcg_start(pn->memcg);
+ memcg = get_non_dying_memcg_start(pn->memcg, &rcu_locked);
pn = memcg->nodeinfo[pgdat->node_id];
__mod_memcg_lruvec_state(pn, idx, val);
- get_non_dying_memcg_end();
+ get_non_dying_memcg_end(rcu_locked);
}
/**
--
2.20.1
reply other threads:[~2026-04-29 7:31 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260429073105.44472-1-qi.zheng@linux.dev \
--to=qi.zheng@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=yosry@kernel.org \
--cc=zhengqi.arch@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.