public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Vineet Agarwal <agarwal.vineet2006@gmail.com>
To: akpm@linux-foundation.org, hannes@cmpxchg.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	kasong@tencent.com, qi.zheng@linux.dev, shakeel.butt@linux.dev,
	baohua@kernel.org, axelrasmussen@google.com, yuanchu@google.com,
	weixugc@google.com, david@kernel.org, mhocko@kernel.org,
	ljs@kernel.org, linuszeng@tencent.com,
	Vineet Agarwal <agarwal.vineet2006@gmail.com>
Subject: [PATCH] mm/vmscan: fix delayed flusher wakeup in MGLRU
Date: Thu, 30 Apr 2026 00:24:41 +0530	[thread overview]
Message-ID: <20260429185441.486804-1-agarwal.vineet2006@gmail.com> (raw)

MGLRU currently decides whether to wake flusher threads in
try_to_shrink_lruvec() using cumulative reclaim counters:

sc->nr.unqueued_dirty == sc->nr.file_taken

However, these counters are accumulated across multiple evict_folios()
passes before the check is performed.

This can delay or suppress flusher wakeup when an earlier reclaim batch
isolates only dirty file folios, but a later batch isolates clean file
folios before try_to_shrink_lruvec() performs the final comparison.

For example:

batch 1: file_taken = 100, unqueued_dirty = 100
batch 2: file_taken += 60, unqueued_dirty += 0

Final check becomes 100 != 160 and flusher wakeup is skipped, even
though reclaim was already blocked by dirty file folios in batch 1.

Classic reclaim avoids this by using per-batch values:

stat.nr_unqueued_dirty == nr_taken

and waking flushers immediately when the condition is met.

Make MGLRU use the same per-batch flusher wakeup behavior as classic
reclaim by moving the flusher wakeup into evict_folios(), using
batch-local isolation results from scan_folios() instead of the
cumulative counters checked later in try_to_shrink_lruvec().

This avoids missed flusher wakeups and makes dirty folio reclaim
behavior consistent with classic reclaim.

Fixes: 1bc542c6a0d14 ("mm/vmscan: wake up flushers conditionally to avoid cgroup OOM")
Signed-off-by: Vineet Agarwal <agarwal.vineet2006@gmail.com>
---
 mm/vmscan.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd1b1aa12581..f9b6cc146a3d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4680,7 +4680,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
 
 static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 		       struct scan_control *sc, int type, int tier,
-		       struct list_head *list)
+		       struct list_head *list,
+		       unsigned long *file_taken)
 {
 	int i;
 	int gen;
@@ -4749,7 +4750,7 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 				scanned, skipped, isolated,
 				type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
 	if (type == LRU_GEN_FILE)
-		sc->nr.file_taken += isolated;
+		*file_taken += isolated;
 	/*
 	 * There might not be eligible folios due to reclaim_idx. Check the
 	 * remaining to prevent livelock if it's not making progress.
@@ -4798,7 +4799,8 @@ static int get_type_to_scan(struct lruvec *lruvec, int swappiness)
 
 static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 			  struct scan_control *sc, int swappiness,
-			  int *type_scanned, struct list_head *list)
+			  int *type_scanned, struct list_head *list,
+			  unsigned long *file_taken)
 {
 	int i;
 	int type = get_type_to_scan(lruvec, swappiness);
@@ -4809,7 +4811,8 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 		*type_scanned = type;
 
-		scanned = scan_folios(nr_to_scan, lruvec, sc, type, tier, list);
+		scanned = scan_folios(nr_to_scan, lruvec, sc, type, tier,
+				      list, file_taken);
 		if (scanned)
 			return scanned;
 
@@ -4825,6 +4828,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	int type;
 	int scanned;
 	int reclaimed;
+	unsigned long file_taken = 0;
 	LIST_HEAD(list);
 	LIST_HEAD(clean);
 	struct folio *folio;
@@ -4839,8 +4843,8 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 	lruvec_lock_irq(lruvec);
 
-	scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
-
+	scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness,
+				 &type, &list, &file_taken);
 	scanned += try_to_inc_min_seq(lruvec, swappiness);
 
 	if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
@@ -4852,6 +4856,14 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 		return scanned;
 retry:
 	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false, memcg);
+
+	if (stat.nr_unqueued_dirty && stat.nr_unqueued_dirty == file_taken) {
+		wakeup_flusher_threads(WB_REASON_VMSCAN);
+
+		if (!writeback_throttling_sane(sc))
+			reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+	}
+	sc->nr.file_taken += file_taken;
 	sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
 	sc->nr_reclaimed += reclaimed;
 	trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
@@ -5021,27 +5033,9 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 	}
 
 	/*
-	 * If too many file cache in the coldest generation can't be evicted
-	 * due to being dirty, wake up the flusher.
+	 * Flusher wakeup and writeback throttling are handled in
+	 * evict_folios() based on per-batch reclaim results.
 	 */
-	if (sc->nr.unqueued_dirty && sc->nr.unqueued_dirty == sc->nr.file_taken) {
-		struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-
-		wakeup_flusher_threads(WB_REASON_VMSCAN);
-
-		/*
-		 * For cgroupv1 dirty throttling is achieved by waking up
-		 * the kernel flusher here and later waiting on folios
-		 * which are in writeback to finish (see shrink_folio_list()).
-		 *
-		 * Flusher may not be able to issue writeback quickly
-		 * enough for cgroupv1 writeback throttling to work
-		 * on a large system.
-		 */
-		if (!writeback_throttling_sane(sc))
-			reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
-	}
-
 	/* whether this lruvec should be rotated */
 	return nr_to_scan < 0;
 }
-- 
2.54.0


             reply	other threads:[~2026-04-29 18:55 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-29 18:54 Vineet Agarwal [this message]
2026-04-30 14:22 ` [PATCH] mm/vmscan: fix delayed flusher wakeup in MGLRU Andrew Morton
2026-04-30 14:35 ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260429185441.486804-1-agarwal.vineet2006@gmail.com \
    --to=agarwal.vineet2006@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=baohua@kernel.org \
    --cc=david@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kasong@tencent.com \
    --cc=linuszeng@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=qi.zheng@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=weixugc@google.com \
    --cc=yuanchu@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox