cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: hannes@cmpxchg.org, mhocko@kernel.org, roman.gushchin@linux.dev,
	shakeel.butt@linux.dev, muchun.song@linux.dev,
	akpm@linux-foundation.org, david@fromorbit.com,
	zhengqi.arch@bytedance.com, yosry.ahmed@linux.dev,
	nphamcs@gmail.com, chengming.zhou@linux.dev
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-mm@kvack.org, hamzamahfooz@linux.microsoft.com,
	apais@linux.microsoft.com, Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH RFC 24/28] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Date: Tue, 15 Apr 2025 10:45:28 +0800	[thread overview]
Message-ID: <20250415024532.26632-25-songmuchun@bytedance.com> (raw)
In-Reply-To: <20250415024532.26632-1-songmuchun@bytedance.com>

The following diagram illustrates how to ensure the safety of the folio
lruvec lock when LRU folios undergo reparenting.

In the folio_lruvec_lock(folio) function:
```
    rcu_read_lock();
retry:
    lruvec = folio_lruvec(folio);
    /* There is a possibility of folio reparenting at this point. */
    spin_lock(&lruvec->lru_lock);
    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
        /*
         * The wrong lruvec lock was acquired, and a retry is required.
         * This is because the folio resides on the parent memcg lruvec
         * list.
         */
        spin_unlock(&lruvec->lru_lock);
        goto retry;
    }

    /* Reaching here indicates that folio_memcg() is stable. */
```

In the memcg_reparent_objcgs(memcg) function:
```
    spin_lock(&lruvec->lru_lock);
    spin_lock(&lruvec_parent->lru_lock);
    /* Transfer folios from the lruvec list to the parent's. */
    spin_unlock(&lruvec_parent->lru_lock);
    spin_unlock(&lruvec->lru_lock);
```

After acquiring the lruvec lock, it is necessary to verify whether
the folio has been reparented. If reparenting has occurred, the new
lruvec lock must be reacquired. During the LRU folio reparenting
process, the lruvec lock will also be acquired (this will be
implemented in a subsequent patch). Therefore, folio_memcg() remains
unchanged while the lruvec lock is held.

Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
after the lruvec lock is acquired, the lruvec_memcg_debug() check is
redundant. Hence, it is removed.

This patch serves as a preparation for the reparenting of LRU folios.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/memcontrol.h | 23 ++++++-----------
 mm/compaction.c            | 29 ++++++++++++++++-----
 mm/memcontrol.c            | 53 +++++++++++++++++++-------------------
 3 files changed, 58 insertions(+), 47 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 01239147eb11..27b23e464229 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -719,7 +719,11 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
  * folio_lruvec - return lruvec for isolating/putting an LRU folio
  * @folio: Pointer to the folio.
  *
- * This function relies on folio->mem_cgroup being stable.
+ * The user should hold an rcu read lock to protect lruvec associated with
+ * the folio from being released. But it does not prevent binding stability
+ * between the folio and the returned lruvec from being changed to its parent
+ * or ancestor (e.g. like folio_lruvec_lock() does that holds LRU lock to
+ * prevent the change).
  */
 static inline struct lruvec *folio_lruvec(struct folio *folio)
 {
@@ -742,15 +746,6 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 						unsigned long *flags);
 
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
-#else
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-#endif
-
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -1211,11 +1206,6 @@ static inline struct lruvec *folio_lruvec(struct folio *folio)
 	return &pgdat->__lruvec;
 }
 
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-
 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
 	return NULL;
@@ -1532,17 +1522,20 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 static inline void lruvec_unlock(struct lruvec *lruvec)
 {
 	spin_unlock(&lruvec->lru_lock);
+	rcu_read_unlock();
 }
 
 static inline void lruvec_unlock_irq(struct lruvec *lruvec)
 {
 	spin_unlock_irq(&lruvec->lru_lock);
+	rcu_read_unlock();
 }
 
 static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
 		unsigned long flags)
 {
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
+	rcu_read_unlock();
 }
 
 /* Test requires a stable folio->memcg binding, see folio_memcg() */
diff --git a/mm/compaction.c b/mm/compaction.c
index ce45d633ddad..4abd1481d5de 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -551,6 +551,24 @@ static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
 	return true;
 }
 
+static struct lruvec *
+compact_folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags,
+				  struct compact_control *cc)
+{
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
+	compact_lock_irqsave(&lruvec->lru_lock, flags, cc);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+		goto retry;
+	}
+
+	return lruvec;
+}
+
 /*
  * Compaction requires the taking of some coarse locks that are potentially
  * very heavily contended. The lock should be periodically unlocked to avoid
@@ -872,7 +890,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 {
 	pg_data_t *pgdat = cc->zone->zone_pgdat;
 	unsigned long nr_scanned = 0, nr_isolated = 0;
-	struct lruvec *lruvec;
+	struct lruvec *lruvec = NULL;
 	unsigned long flags = 0;
 	struct lruvec *locked = NULL;
 	struct folio *folio = NULL;
@@ -1189,18 +1207,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (!folio_test_clear_lru(folio))
 			goto isolate_fail_put;
 
-		lruvec = folio_lruvec(folio);
+		if (locked)
+			lruvec = folio_lruvec(folio);
 
 		/* If we already hold the lock, we can skip some rechecking */
-		if (lruvec != locked) {
+		if (lruvec != locked || !locked) {
 			if (locked)
 				lruvec_unlock_irqrestore(locked, flags);
 
-			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
+			lruvec = compact_folio_lruvec_lock_irqsave(folio, &flags, cc);
 			locked = lruvec;
 
-			lruvec_memcg_debug(lruvec, folio);
-
 			/*
 			 * Try get exclusive access under lock. If marked for
 			 * skip, the scan is aborted unless the current context
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 694f19017699..1f0c6e7b69cc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1196,23 +1196,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 	}
 }
 
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-	struct mem_cgroup *memcg;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	memcg = folio_memcg(folio);
-
-	if (!memcg)
-		VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio);
-	else
-		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
-}
-#endif
-
 /**
  * folio_lruvec_lock - Lock the lruvec for a folio.
  * @folio: Pointer to the folio.
@@ -1222,14 +1205,20 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
  * - folio_test_lru false
  * - folio frozen (refcount of 0)
  *
- * Return: The lruvec this folio is on with its lock held.
+ * Return: The lruvec this folio is on with its lock held and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock(struct folio *folio)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock(&lruvec->lru_lock);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock(&lruvec->lru_lock);
+		goto retry;
+	}
 
 	return lruvec;
 }
@@ -1244,14 +1233,20 @@ struct lruvec *folio_lruvec_lock(struct folio *folio)
  * - folio frozen (refcount of 0)
  *
  * Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock_irq(&lruvec->lru_lock);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irq(&lruvec->lru_lock);
+		goto retry;
+	}
 
 	return lruvec;
 }
@@ -1267,15 +1262,21 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
  * - folio frozen (refcount of 0)
  *
  * Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 		unsigned long *flags)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+		goto retry;
+	}
 
 	return lruvec;
 }
-- 
2.20.1


  parent reply	other threads:[~2025-04-15  2:48 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-15  2:45 [PATCH RFC 00/28] Eliminate Dying Memory Cgroup Muchun Song
2025-04-15  2:45 ` [PATCH RFC 01/28] mm: memcontrol: remove dead code of checking parent memory cgroup Muchun Song
2025-04-17 14:35   ` Johannes Weiner
2025-04-15  2:45 ` [PATCH RFC 02/28] mm: memcontrol: use folio_memcg_charged() to avoid potential rcu lock holding Muchun Song
2025-04-17 14:48   ` Johannes Weiner
2025-04-18  2:38     ` Muchun Song
2025-04-15  2:45 ` [PATCH RFC 03/28] mm: workingset: use folio_lruvec() in workingset_refault() Muchun Song
2025-04-17 14:52   ` Johannes Weiner
2025-04-15  2:45 ` [PATCH RFC 04/28] mm: rename unlock_page_lruvec_irq and its variants Muchun Song
2025-04-17 14:53   ` Johannes Weiner
2025-04-15  2:45 ` [PATCH RFC 05/28] mm: thp: replace folio_memcg() with folio_memcg_charged() Muchun Song
2025-04-17 14:54   ` Johannes Weiner
2025-04-15  2:45 ` [PATCH RFC 06/28] mm: thp: introduce folio_split_queue_lock and its variants Muchun Song
2025-04-17 14:58   ` Johannes Weiner
2025-04-18 19:50   ` Johannes Weiner
2025-04-19 14:20     ` Muchun Song
2025-04-15  2:45 ` [PATCH RFC 07/28] mm: thp: use folio_batch to handle THP splitting in deferred_split_scan() Muchun Song
2025-04-30 14:37   ` Johannes Weiner
2025-05-06  6:44     ` Hugh Dickins
2025-05-06 21:44       ` Hugh Dickins
2025-05-07  3:30         ` Muchun Song
2025-04-15  2:45 ` [PATCH RFC 08/28] mm: vmscan: refactor move_folios_to_lru() Muchun Song
2025-04-30 14:49   ` Johannes Weiner
2025-04-15  2:45 ` [PATCH RFC 09/28] mm: memcontrol: allocate object cgroup for non-kmem case Muchun Song
2025-04-15  2:45 ` [PATCH RFC 10/28] mm: memcontrol: return root object cgroup for root memory cgroup Muchun Song
2025-06-28  3:09   ` Chen Ridong
2025-06-30  7:16     ` Muchun Song
2025-04-15  2:45 ` [PATCH RFC 11/28] mm: memcontrol: prevent memory cgroup release in get_mem_cgroup_from_folio() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 12/28] buffer: prevent memory cgroup release in folio_alloc_buffers() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 13/28] writeback: prevent memory cgroup release in writeback module Muchun Song
2025-04-15  2:45 ` [PATCH RFC 14/28] mm: memcontrol: prevent memory cgroup release in count_memcg_folio_events() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 15/28] mm: page_io: prevent memory cgroup release in page_io module Muchun Song
2025-04-15  2:45 ` [PATCH RFC 16/28] mm: migrate: prevent memory cgroup release in folio_migrate_mapping() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 17/28] mm: mglru: prevent memory cgroup release in mglru Muchun Song
2025-04-15  2:45 ` [PATCH RFC 18/28] mm: memcontrol: prevent memory cgroup release in mem_cgroup_swap_full() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 19/28] mm: workingset: prevent memory cgroup release in lru_gen_eviction() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 20/28] mm: workingset: prevent lruvec release in workingset_refault() Muchun Song
2025-04-15  2:45 ` [PATCH RFC 21/28] mm: zswap: prevent lruvec release in zswap_folio_swapin() Muchun Song
2025-04-17 17:39   ` Nhat Pham
2025-04-18  2:36   ` Chengming Zhou
2025-04-15  2:45 ` [PATCH RFC 22/28] mm: swap: prevent lruvec release in swap module Muchun Song
2025-04-15  2:45 ` [PATCH RFC 23/28] mm: workingset: prevent lruvec release in workingset_activation() Muchun Song
2025-04-15  2:45 ` Muchun Song [this message]
2025-04-15  2:45 ` [PATCH RFC 25/28] mm: thp: prepare for reparenting LRU pages for split queue lock Muchun Song
2025-04-15  2:45 ` [PATCH RFC 26/28] mm: memcontrol: introduce memcg_reparent_ops Muchun Song
2025-06-30 12:47   ` Harry Yoo
2025-07-01 22:12     ` Harry Yoo
2025-07-07  9:29       ` [External] " Muchun Song
2025-07-09  0:14         ` Harry Yoo
2025-04-15  2:45 ` [PATCH RFC 27/28] mm: memcontrol: eliminate the problem of dying memory cgroup for LRU folios Muchun Song
2025-05-20 11:27   ` Harry Yoo
2025-05-22  2:31     ` Muchun Song
2025-05-23  1:24       ` Harry Yoo
2025-04-15  2:45 ` [PATCH RFC 28/28] mm: lru: add VM_WARN_ON_ONCE_FOLIO to lru maintenance helpers Muchun Song
2025-04-15  2:53 ` [PATCH RFC 00/28] Eliminate Dying Memory Cgroup Muchun Song
2025-04-15  6:19 ` Kairui Song
2025-04-15  8:01   ` Muchun Song
2025-04-17 18:22     ` Kairui Song
2025-04-17 19:04       ` Johannes Weiner
2025-06-27  8:50         ` Chen Ridong
2025-04-17 21:45       ` Roman Gushchin
2025-04-28  3:43         ` Kairui Song
2025-06-27  9:02           ` Chen Ridong
2025-06-27 18:54             ` Kairui Song
2025-06-27 19:14               ` Shakeel Butt
2025-06-28  9:21                 ` Chen Ridong
2025-04-22 14:20       ` Yosry Ahmed
2025-05-23  1:23 ` Harry Yoo
2025-05-23  2:39   ` Muchun Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250415024532.26632-25-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=apais@linux.microsoft.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chengming.zhou@linux.dev \
    --cc=david@fromorbit.com \
    --cc=hamzamahfooz@linux.microsoft.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=nphamcs@gmail.com \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=yosry.ahmed@linux.dev \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).