From: Qi Zheng <qi.zheng@linux.dev>
To: akpm@linux-foundation.org, david@kernel.org, kasong@tencent.com,
shakeel.butt@linux.dev, baohua@kernel.org,
axelrasmussen@google.com, yuanchu@google.com, weixugc@google.com,
hannes@cmpxchg.org, harry@kernel.org, muchun.song@linux.dev,
peiyang_he@smail.nju.edu.cn, mhocko@kernel.org,
roman.gushchin@linux.dev, ljs@kernel.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Qi Zheng <zhengqi.arch@bytedance.com>,
stable@vger.kernel.org
Subject: [PATCH v2] mm: mglru: fix stale batch updates after memcg reparenting
Date: Tue, 23 Jun 2026 10:42:37 +0800 [thread overview]
Message-ID: <20260623024237.45990-1-qi.zheng@linux.dev> (raw)
From: Qi Zheng <zhengqi.arch@bytedance.com>
The mglru page table walker batches per-generation size deltas in
walk->nr_pages while walking page tables without holding the lruvec lock.
The reset_batch_size() later folds those deltas into walk->lruvec under
the lruvec lock.
The page table walker can run concurrently with the memcg reparenting path
as follows:
CPU0 CPU1
==== ====
walk_mm
--> walk_page_range
--> update_batch_size
--> walk->nr_pages += delta
mem_cgroup_css_offline
--> memcg_reparent_objcgs
--> lock lruvec
lru_gen_reparent_memcg
--> reparent child folios to parent
unlock lruvec
lock lruvec
reset_batch_size
--> child lrugen->nr_pages += delta
This will trigger the following warning in lru_gen_exit_memcg():
VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
sizeof(lruvec->lrugen.nr_pages)));
To fix it, add lrugen->reparented to remember the new owner of a
reparented lruvec, and make reset_batch_size() charge pending deltas to
that owner.
Reported-by: Peiyang He <peiyang_he@smail.nju.edu.cn>
Closes: https://lore.kernel.org/all/5A9E929D82717101+12fcf643-efb8-4b9a-a53a-1e28cc894f0b@smail.nju.edu.cn
Fixes: f304652609ea ("mm: vmscan: prepare for reparenting MGLRU folios")
Cc: <stable@vger.kernel.org>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Barry Song <baohua@kernel.org>
---
include/linux/mmzone.h | 4 ++++
mm/vmscan.c | 43 +++++++++++++++++++++++++++++++++++-------
2 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ca2712187147..0d572db2ef64 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -584,6 +584,10 @@ struct lru_gen_folio {
u8 gen;
/* the list segment this lru_gen_folio belongs to */
u8 seg;
+#ifdef CONFIG_MEMCG
+ /* the lruvec this lruvec has been reparented to */
+ struct lruvec *reparented;
+#endif
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_node list;
};
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 35c3bb15ae96..64362cbed814 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3262,10 +3262,37 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
walk->nr_pages[new_gen][type][zone] += delta;
}
+#ifdef CONFIG_MEMCG
+static struct lruvec *lock_batch_lruvec(struct lruvec *lruvec)
+{
+ struct lruvec *reparented;
+
+ for (;;) {
+ lruvec_lock_irq(lruvec);
+
+ reparented = lruvec->lrugen.reparented;
+ if (!reparented)
+ break;
+
+ lruvec_unlock_irq(lruvec);
+ lruvec = reparented;
+ }
+
+ return lruvec;
+}
+#else
+static struct lruvec *lock_batch_lruvec(struct lruvec *lruvec)
+{
+ lruvec_lock_irq(lruvec);
+
+ return lruvec;
+}
+#endif
+
static void reset_batch_size(struct lru_gen_mm_walk *walk)
{
int gen, type, zone;
- struct lruvec *lruvec = walk->lruvec;
+ struct lruvec *lruvec = lock_batch_lruvec(walk->lruvec);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
walk->batched = 0;
@@ -3285,6 +3312,8 @@ static void reset_batch_size(struct lru_gen_mm_walk *walk)
lru += LRU_ACTIVE;
__update_lru_size(lruvec, lru, zone, delta);
}
+
+ lruvec_unlock_irq(lruvec);
}
static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args)
@@ -3779,11 +3808,8 @@ static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
mmap_read_unlock(mm);
}
- if (walk->batched) {
- lruvec_lock_irq(lruvec);
+ if (walk->batched)
reset_batch_size(walk);
- lruvec_unlock_irq(lruvec);
- }
cond_resched();
} while (err == -EAGAIN);
@@ -4563,6 +4589,8 @@ void lru_gen_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent,
mem_cgroup_update_lru_size(parent_lruvec, lru, zid, size);
}
}
+
+ child_lruvec->lrugen.reparented = parent_lruvec;
}
#endif /* CONFIG_MEMCG */
@@ -4867,9 +4895,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
walk = current->reclaim_state->mm_walk;
if (walk && walk->batched) {
walk->lruvec = lruvec;
- lruvec_lock_irq(lruvec);
reset_batch_size(walk);
- lruvec_unlock_irq(lruvec);
}
mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(sc),
@@ -5784,6 +5810,9 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
lrugen->max_seq = MIN_NR_GENS + 1;
lrugen->enabled = lru_gen_enabled();
+#ifdef CONFIG_MEMCG
+ lrugen->reparented = NULL;
+#endif
for (i = 0; i <= MIN_NR_GENS + 1; i++)
lrugen->timestamps[i] = jiffies;
--
2.54.0
next reply other threads:[~2026-06-23 2:44 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-23 2:42 Qi Zheng [this message]
2026-06-23 2:56 ` [PATCH v2] mm: mglru: fix stale batch updates after memcg reparenting Qi Zheng
2026-06-23 4:03 ` Baolin Wang
2026-06-23 6:17 ` Harry Yoo
2026-06-23 7:16 ` Qi Zheng
2026-06-23 8:18 ` Harry Yoo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260623024237.45990-1-qi.zheng@linux.dev \
--to=qi.zheng@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=baohua@kernel.org \
--cc=david@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=harry@kernel.org \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=peiyang_he@smail.nju.edu.cn \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=stable@vger.kernel.org \
--cc=weixugc@google.com \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.