[PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim

public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed

* [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat
  2026-03-26  8:35 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
@ 2026-03-26  8:35 ` Zhang Peng
  0 siblings, 0 replies; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:35 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

From: Zhang Peng <bruzzhang@tencent.com>

shrink_folio_list() returns nr_reclaimed while all other statistics are
reported via reclaim_stat. Add nr_reclaimed to reclaim_stat and change
the function to void for a consistent interface.

No functional change.

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 include/linux/vmstat.h |  1 +
 mm/vmscan.c            | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3c9c266cf782..f088c5641d99 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -26,6 +26,7 @@ struct reclaim_stat {
 	unsigned nr_unmap_fail;
 	unsigned nr_lazyfree_fail;
 	unsigned nr_demoted;
+	unsigned nr_reclaimed;
 };
 
 /* Stat data for system wide items */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ee64cf81378..f3f03a44042e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1053,9 +1053,9 @@ static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
 }
 
 /*
- * shrink_folio_list() returns the number of reclaimed pages
+ * Reclaimed folios are counted in stat->nr_reclaimed.
  */
-static unsigned int shrink_folio_list(struct list_head *folio_list,
+static void shrink_folio_list(struct list_head *folio_list,
 		struct pglist_data *pgdat, struct scan_control *sc,
 		struct reclaim_stat *stat, bool ignore_references,
 		struct mem_cgroup *memcg)
@@ -1063,7 +1063,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 	struct folio_batch free_folios;
 	LIST_HEAD(ret_folios);
 	LIST_HEAD(demote_folios);
-	unsigned int nr_reclaimed = 0, nr_demoted = 0;
+	unsigned int nr_demoted = 0;
 	unsigned int pgactivate = 0;
 	bool do_demote_pass;
 	struct swap_iocb *plug = NULL;
@@ -1477,7 +1477,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 					 * increment nr_reclaimed here (and
 					 * leave it off the LRU).
 					 */
-					nr_reclaimed += nr_pages;
+					stat->nr_reclaimed += nr_pages;
 					continue;
 				}
 			}
@@ -1507,7 +1507,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 		 * Folio may get swapped out as a whole, need to account
 		 * all pages in it.
 		 */
-		nr_reclaimed += nr_pages;
+		stat->nr_reclaimed += nr_pages;
 
 		folio_unqueue_deferred_split(folio);
 		if (folio_batch_add(&free_folios, folio) == 0) {
@@ -1549,7 +1549,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 
 	/* Migrate folios selected for demotion */
 	nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
-	nr_reclaimed += nr_demoted;
+	stat->nr_reclaimed += nr_demoted;
 	stat->nr_demoted += nr_demoted;
 	/* Folios that could not be demoted are still in @demote_folios */
 	if (!list_empty(&demote_folios)) {
@@ -1589,7 +1589,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 
 	if (plug)
 		swap_write_unplug(plug);
-	return nr_reclaimed;
 }
 
 unsigned int reclaim_clean_pages_from_list(struct zone *zone,
@@ -1623,8 +1622,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 	 * change in the future.
 	 */
 	noreclaim_flag = memalloc_noreclaim_save();
-	nr_reclaimed = shrink_folio_list(&clean_folios, zone->zone_pgdat, &sc,
+	shrink_folio_list(&clean_folios, zone->zone_pgdat, &sc,
 					&stat, true, NULL);
+	nr_reclaimed = stat.nr_reclaimed;
 	memalloc_noreclaim_restore(noreclaim_flag);
 
 	list_splice(&clean_folios, folio_list);
@@ -1992,8 +1992,9 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	if (nr_taken == 0)
 		return 0;
 
-	nr_reclaimed = shrink_folio_list(&folio_list, pgdat, sc, &stat, false,
+	shrink_folio_list(&folio_list, pgdat, sc, &stat, false,
 					 lruvec_memcg(lruvec));
+	nr_reclaimed = stat.nr_reclaimed;
 
 	move_folios_to_lru(&folio_list);
 
@@ -2168,7 +2169,8 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
 		.no_demotion = 1,
 	};
 
-	nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &stat, true, NULL);
+	shrink_folio_list(folio_list, pgdat, &sc, &stat, true, NULL);
+	nr_reclaimed = stat.nr_reclaimed;
 	while (!list_empty(folio_list)) {
 		folio = lru_to_folio(folio_list);
 		list_del(&folio->lru);
@@ -4862,7 +4864,8 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	if (list_empty(&list))
 		return scanned;
 retry:
-	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false, memcg);
+	shrink_folio_list(&list, pgdat, sc, &stat, false, memcg);
+	reclaimed = stat.nr_reclaimed;
 	sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
 	sc->nr_reclaimed += reclaimed;
 	trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,

-- 
2.43.7



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan
@ 2026-03-26  8:36 Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat Zhang Peng
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:36 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

This series introduces batch TLB flushing optimization for dirty folios
during memory reclaim, aiming to reduce IPI overhead on multi-core systems.

Background
----------
Currently, when performing pageout in memory reclaim, try_to_unmap_flush_dirty()
is called for each dirty folio individually. On multi-core systems, this causes
frequent IPIs which can significantly impact performance.

Approach
--------
This patch series accumulates dirty folios into batches and performs a single
TLB flush for the entire batch, rather than flushing for each individual folio.

Changes
-------
Patch 1: Add nr_reclaimed to reclaim_stat so shrink_folio_list() can be changed
         to void, giving a consistent interface where all per-pass statistics
         are reported through reclaim_stat.
Patch 2: Extract the folio activation block at activate_locked into
         folio_active_bounce().
Patch 3: Extract the pageout() dispatch state machine and the folio-freeing path
         into pageout_one() and folio_free() respectively.
Patch 4: Extract the TTU setup and try_to_unmap() block into folio_try_unmap().
Patch 5: Implement batch TLB flushing logic. Dirty folios are accumulated in
         batches and a single TLB flush is performed for each batch before
         calling pageout.

Testing
-------
The benchmark script uses stress-ng to compare TLB shootdown behavior before and
after this patch. It constrains a stress-ng workload via memcg to force reclaim
through shrink_folio_list(), reporting TLB shootdowns and IPIs.

Core benchmark command: stress-ng --vm 16 --vm-bytes 2G --vm-keep --timeout 60

==========================================================================
                 batch_dirty_tlb_flush Benchmark Results
==========================================================================
  Kernel: 7.0.0-rc1+   CPUs: 16
  MemTotal: 31834M   SwapTotal: 8191M
  memcg limit: 512M   alloc: 2G   workers: 16   duration: 60s
--------------------------------------------------------------------------
Metric                 Before        After             Delta (abs / %)
--------------------------------------------------------------------------
bogo ops/s             28238.63      35833.97          +7595.34 (+26.9%)
TLB shootdowns         55428953      17621697          -37807256 (-68.2%)
Function call IPIs     34073695      14498768          -19574927 (-57.4%)
pgscan_anon (pages)    52856224      60252894          7396670 (+14.0%)
pgsteal_anon (pages)   29004962      34054753          5049791 (+17.4%)
--------------------------------------------------------------------------

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
Changes in v2:
- Fix incorrect comment about page_ref_freeze
- Add folio_maybe_dma_pinned() check in pageout_batch()
- Link to v1: https://lore.kernel.org/r/20260309-batch-tlb-flush-v1-0-eb8fed7d1a9e@icloud.com

---
Zhang Peng (5):
      mm/vmscan: track reclaimed pages in reclaim_stat
      mm/vmscan: extract folio activation into folio_active_bounce()
      mm/vmscan: extract folio_free() and pageout_one()
      mm/vmscan: extract folio unmap logic into folio_try_unmap()
      mm/vmscan: flush TLB for every 31 folios evictions

 include/linux/vmstat.h |   1 +
 mm/vmscan.c            | 456 +++++++++++++++++++++++++++++++------------------
 2 files changed, 287 insertions(+), 170 deletions(-)
---
base-commit: 7c5507fca017a80ece36f34e36c77e2bee267517
change-id: 20260309-batch-tlb-flush-893f0e56b496

Best regards,
-- 
Zhang Peng <zippermonkey@icloud.com>



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat
  2026-03-26  8:36 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
@ 2026-03-26  8:36 ` Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 2/5] mm/vmscan: extract folio activation into folio_active_bounce() Zhang Peng
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:36 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

From: Zhang Peng <bruzzhang@tencent.com>

shrink_folio_list() returns nr_reclaimed while all other statistics are
reported via reclaim_stat. Add nr_reclaimed to reclaim_stat and change
the function to void for a consistent interface.

No functional change.

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 include/linux/vmstat.h |  1 +
 mm/vmscan.c            | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3c9c266cf782..f088c5641d99 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -26,6 +26,7 @@ struct reclaim_stat {
 	unsigned nr_unmap_fail;
 	unsigned nr_lazyfree_fail;
 	unsigned nr_demoted;
+	unsigned nr_reclaimed;
 };
 
 /* Stat data for system wide items */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ee64cf81378..f3f03a44042e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1053,9 +1053,9 @@ static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
 }
 
 /*
- * shrink_folio_list() returns the number of reclaimed pages
+ * Reclaimed folios are counted in stat->nr_reclaimed.
  */
-static unsigned int shrink_folio_list(struct list_head *folio_list,
+static void shrink_folio_list(struct list_head *folio_list,
 		struct pglist_data *pgdat, struct scan_control *sc,
 		struct reclaim_stat *stat, bool ignore_references,
 		struct mem_cgroup *memcg)
@@ -1063,7 +1063,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 	struct folio_batch free_folios;
 	LIST_HEAD(ret_folios);
 	LIST_HEAD(demote_folios);
-	unsigned int nr_reclaimed = 0, nr_demoted = 0;
+	unsigned int nr_demoted = 0;
 	unsigned int pgactivate = 0;
 	bool do_demote_pass;
 	struct swap_iocb *plug = NULL;
@@ -1477,7 +1477,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 					 * increment nr_reclaimed here (and
 					 * leave it off the LRU).
 					 */
-					nr_reclaimed += nr_pages;
+					stat->nr_reclaimed += nr_pages;
 					continue;
 				}
 			}
@@ -1507,7 +1507,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 		 * Folio may get swapped out as a whole, need to account
 		 * all pages in it.
 		 */
-		nr_reclaimed += nr_pages;
+		stat->nr_reclaimed += nr_pages;
 
 		folio_unqueue_deferred_split(folio);
 		if (folio_batch_add(&free_folios, folio) == 0) {
@@ -1549,7 +1549,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 
 	/* Migrate folios selected for demotion */
 	nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
-	nr_reclaimed += nr_demoted;
+	stat->nr_reclaimed += nr_demoted;
 	stat->nr_demoted += nr_demoted;
 	/* Folios that could not be demoted are still in @demote_folios */
 	if (!list_empty(&demote_folios)) {
@@ -1589,7 +1589,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 
 	if (plug)
 		swap_write_unplug(plug);
-	return nr_reclaimed;
 }
 
 unsigned int reclaim_clean_pages_from_list(struct zone *zone,
@@ -1623,8 +1622,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 	 * change in the future.
 	 */
 	noreclaim_flag = memalloc_noreclaim_save();
-	nr_reclaimed = shrink_folio_list(&clean_folios, zone->zone_pgdat, &sc,
+	shrink_folio_list(&clean_folios, zone->zone_pgdat, &sc,
 					&stat, true, NULL);
+	nr_reclaimed = stat.nr_reclaimed;
 	memalloc_noreclaim_restore(noreclaim_flag);
 
 	list_splice(&clean_folios, folio_list);
@@ -1992,8 +1992,9 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	if (nr_taken == 0)
 		return 0;
 
-	nr_reclaimed = shrink_folio_list(&folio_list, pgdat, sc, &stat, false,
+	shrink_folio_list(&folio_list, pgdat, sc, &stat, false,
 					 lruvec_memcg(lruvec));
+	nr_reclaimed = stat.nr_reclaimed;
 
 	move_folios_to_lru(&folio_list);
 
@@ -2168,7 +2169,8 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
 		.no_demotion = 1,
 	};
 
-	nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &stat, true, NULL);
+	shrink_folio_list(folio_list, pgdat, &sc, &stat, true, NULL);
+	nr_reclaimed = stat.nr_reclaimed;
 	while (!list_empty(folio_list)) {
 		folio = lru_to_folio(folio_list);
 		list_del(&folio->lru);
@@ -4862,7 +4864,8 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
 	if (list_empty(&list))
 		return scanned;
 retry:
-	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false, memcg);
+	shrink_folio_list(&list, pgdat, sc, &stat, false, memcg);
+	reclaimed = stat.nr_reclaimed;
 	sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
 	sc->nr_reclaimed += reclaimed;
 	trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,

-- 
2.43.7



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/5] mm/vmscan: extract folio activation into folio_active_bounce()
  2026-03-26  8:36 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat Zhang Peng
@ 2026-03-26  8:36 ` Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 3/5] mm/vmscan: extract folio_free() and pageout_one() Zhang Peng
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:36 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

From: Zhang Peng <bruzzhang@tencent.com>

The activate_locked label in shrink_folio_list() handles swap cache
reclaim, marks the folio active, and updates activation statistics.
Extract this block into folio_active_bounce() to make shrink_folio_list()
easier to follow.

No functional change

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 mm/vmscan.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index f3f03a44042e..0860a48d5bf3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1052,6 +1052,24 @@ static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
 	return !data_race(folio_swap_flags(folio) & SWP_FS_OPS);
 }
 
+/* Mark folio as active and prepare to bounce back to head of LRU */
+static void folio_active_bounce(struct folio *folio, struct reclaim_stat *stat,
+		unsigned int nr_pages)
+{
+	/* Not a candidate for swapping, so reclaim swap space. */
+	if (folio_test_swapcache(folio) &&
+		(mem_cgroup_swap_full(folio) || folio_test_mlocked(folio)))
+		folio_free_swap(folio);
+	VM_BUG_ON_FOLIO(folio_test_active(folio), folio);
+	if (!folio_test_mlocked(folio)) {
+		int type = folio_is_file_lru(folio);
+
+		folio_set_active(folio);
+		stat->nr_activate[type] += nr_pages;
+		count_memcg_folio_events(folio, PGACTIVATE, nr_pages);
+	}
+}
+
 /*
  * Reclaimed folios are counted in stat->nr_reclaimed.
  */
@@ -1527,17 +1545,7 @@ static void shrink_folio_list(struct list_head *folio_list,
 			nr_pages = 1;
 		}
 activate_locked:
-		/* Not a candidate for swapping, so reclaim swap space. */
-		if (folio_test_swapcache(folio) &&
-		    (mem_cgroup_swap_full(folio) || folio_test_mlocked(folio)))
-			folio_free_swap(folio);
-		VM_BUG_ON_FOLIO(folio_test_active(folio), folio);
-		if (!folio_test_mlocked(folio)) {
-			int type = folio_is_file_lru(folio);
-			folio_set_active(folio);
-			stat->nr_activate[type] += nr_pages;
-			count_memcg_folio_events(folio, PGACTIVATE, nr_pages);
-		}
+		folio_active_bounce(folio, stat, nr_pages);
 keep_locked:
 		folio_unlock(folio);
 keep:

-- 
2.43.7



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/5] mm/vmscan: extract folio_free() and pageout_one()
  2026-03-26  8:36 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 2/5] mm/vmscan: extract folio activation into folio_active_bounce() Zhang Peng
@ 2026-03-26  8:36 ` Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 4/5] mm/vmscan: extract folio unmap logic into folio_try_unmap() Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 5/5] mm/vmscan: flush TLB for every 31 folios evictions Zhang Peng
  4 siblings, 0 replies; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:36 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

From: Zhang Peng <bruzzhang@tencent.com>

shrink_folio_list() contains two large self-contained sections:
the pageout() dispatch state machine and the folio-freeing path
(buffer release, lazyfree, __remove_mapping, folio_batch). Extract
them into pageout_one() and folio_free() respectively to reduce the
size of shrink_folio_list() and make each step independently readable.

No functional change

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 mm/vmscan.c | 270 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 155 insertions(+), 115 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0860a48d5bf3..c8ff742ed891 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1070,6 +1070,153 @@ static void folio_active_bounce(struct folio *folio, struct reclaim_stat *stat,
 	}
 }
 
+static bool folio_free(struct folio *folio, struct folio_batch *free_folios,
+		struct scan_control *sc, struct reclaim_stat *stat)
+{
+	unsigned int nr_pages = folio_nr_pages(folio);
+	struct address_space *mapping = folio_mapping(folio);
+
+	/*
+	 * If the folio has buffers, try to free the buffer
+	 * mappings associated with this folio. If we succeed
+	 * we try to free the folio as well.
+	 *
+	 * We do this even if the folio is dirty.
+	 * filemap_release_folio() does not perform I/O, but it
+	 * is possible for a folio to have the dirty flag set,
+	 * but it is actually clean (all its buffers are clean).
+	 * This happens if the buffers were written out directly,
+	 * with submit_bh(). ext3 will do this, as well as
+	 * the blockdev mapping.  filemap_release_folio() will
+	 * discover that cleanness and will drop the buffers
+	 * and mark the folio clean - it can be freed.
+	 *
+	 * Rarely, folios can have buffers and no ->mapping.
+	 * These are the folios which were not successfully
+	 * invalidated in truncate_cleanup_folio().  We try to
+	 * drop those buffers here and if that worked, and the
+	 * folio is no longer mapped into process address space
+	 * (refcount == 1) it can be freed.  Otherwise, leave
+	 * the folio on the LRU so it is swappable.
+	 */
+	if (folio_needs_release(folio)) {
+		if (!filemap_release_folio(folio, sc->gfp_mask)) {
+			folio_active_bounce(folio, stat, nr_pages);
+			return false;
+		}
+
+		if (!mapping && folio_ref_count(folio) == 1) {
+			folio_unlock(folio);
+			if (folio_put_testzero(folio))
+				goto free_it;
+			else {
+				/*
+				 * rare race with speculative reference.
+				 * the speculative reference will free
+				 * this folio shortly, so we may
+				 * increment nr_reclaimed here (and
+				 * leave it off the LRU).
+				 */
+				stat->nr_reclaimed += nr_pages;
+				return true;
+			}
+		}
+	}
+
+	if (folio_test_lazyfree(folio)) {
+		/* follow __remove_mapping for reference */
+		if (!folio_ref_freeze(folio, 1))
+			return false;
+		/*
+		 * The folio has only one reference left, which is
+		 * from the isolation. After the caller puts the
+		 * folio back on the lru and drops the reference, the
+		 * folio will be freed anyway. It doesn't matter
+		 * which lru it goes on. So we don't bother checking
+		 * the dirty flag here.
+		 */
+		count_vm_events(PGLAZYFREED, nr_pages);
+		count_memcg_folio_events(folio, PGLAZYFREED, nr_pages);
+	} else if (!mapping || !__remove_mapping(mapping, folio, true,
+							sc->target_mem_cgroup))
+		return false;
+
+	folio_unlock(folio);
+free_it:
+	/*
+	 * Folio may get swapped out as a whole, need to account
+	 * all pages in it.
+	 */
+	stat->nr_reclaimed += nr_pages;
+
+	folio_unqueue_deferred_split(folio);
+	if (folio_batch_add(free_folios, folio) == 0) {
+		mem_cgroup_uncharge_folios(free_folios);
+		try_to_unmap_flush();
+		free_unref_folios(free_folios);
+	}
+	return true;
+}
+
+static void pageout_one(struct folio *folio, struct list_head *ret_folios,
+			struct folio_batch *free_folios,
+			struct scan_control *sc, struct reclaim_stat *stat,
+			struct swap_iocb **plug, struct list_head *folio_list)
+{
+	struct address_space *mapping = folio_mapping(folio);
+	unsigned int nr_pages = folio_nr_pages(folio);
+
+	switch (pageout(folio, mapping, plug, folio_list)) {
+	case PAGE_ACTIVATE:
+		/*
+		 * If shmem folio is split when writeback to swap,
+		 * the tail pages will make their own pass through
+		 * this function and be accounted then.
+		 */
+		if (nr_pages > 1 && !folio_test_large(folio)) {
+			sc->nr_scanned -= (nr_pages - 1);
+			nr_pages = 1;
+		}
+		folio_active_bounce(folio, stat, nr_pages);
+		fallthrough;
+	case PAGE_KEEP:
+		goto locked_keepit;
+	case PAGE_SUCCESS:
+		if (nr_pages > 1 && !folio_test_large(folio)) {
+			sc->nr_scanned -= (nr_pages - 1);
+			nr_pages = 1;
+		}
+		stat->nr_pageout += nr_pages;
+
+		if (folio_test_writeback(folio))
+			goto keepit;
+		if (folio_test_dirty(folio))
+			goto keepit;
+
+		/*
+		 * A synchronous write - probably a ramdisk.  Go
+		 * ahead and try to reclaim the folio.
+		 */
+		if (!folio_trylock(folio))
+			goto keepit;
+		if (folio_test_dirty(folio) ||
+			folio_test_writeback(folio))
+			goto locked_keepit;
+		mapping = folio_mapping(folio);
+		fallthrough;
+	case PAGE_CLEAN:
+		; /* try to free the folio below */
+	}
+	if (folio_free(folio, free_folios, sc, stat))
+		return;
+locked_keepit:
+	folio_unlock(folio);
+keepit:
+	list_add(&folio->lru, ret_folios);
+	VM_BUG_ON_FOLIO(folio_test_lru(folio) ||
+			folio_test_unevictable(folio), folio);
+}
+
 /*
  * Reclaimed folios are counted in stat->nr_reclaimed.
  */
@@ -1415,125 +1562,16 @@ static void shrink_folio_list(struct list_head *folio_list,
 			 * starts and then write it out here.
 			 */
 			try_to_unmap_flush_dirty();
-			switch (pageout(folio, mapping, &plug, folio_list)) {
-			case PAGE_KEEP:
-				goto keep_locked;
-			case PAGE_ACTIVATE:
-				/*
-				 * If shmem folio is split when writeback to swap,
-				 * the tail pages will make their own pass through
-				 * this function and be accounted then.
-				 */
-				if (nr_pages > 1 && !folio_test_large(folio)) {
-					sc->nr_scanned -= (nr_pages - 1);
-					nr_pages = 1;
-				}
-				goto activate_locked;
-			case PAGE_SUCCESS:
-				if (nr_pages > 1 && !folio_test_large(folio)) {
-					sc->nr_scanned -= (nr_pages - 1);
-					nr_pages = 1;
-				}
-				stat->nr_pageout += nr_pages;
-
-				if (folio_test_writeback(folio))
-					goto keep;
-				if (folio_test_dirty(folio))
-					goto keep;
-
-				/*
-				 * A synchronous write - probably a ramdisk.  Go
-				 * ahead and try to reclaim the folio.
-				 */
-				if (!folio_trylock(folio))
-					goto keep;
-				if (folio_test_dirty(folio) ||
-				    folio_test_writeback(folio))
-					goto keep_locked;
-				mapping = folio_mapping(folio);
-				fallthrough;
-			case PAGE_CLEAN:
-				; /* try to free the folio below */
-			}
-		}
-
-		/*
-		 * If the folio has buffers, try to free the buffer
-		 * mappings associated with this folio. If we succeed
-		 * we try to free the folio as well.
-		 *
-		 * We do this even if the folio is dirty.
-		 * filemap_release_folio() does not perform I/O, but it
-		 * is possible for a folio to have the dirty flag set,
-		 * but it is actually clean (all its buffers are clean).
-		 * This happens if the buffers were written out directly,
-		 * with submit_bh(). ext3 will do this, as well as
-		 * the blockdev mapping.  filemap_release_folio() will
-		 * discover that cleanness and will drop the buffers
-		 * and mark the folio clean - it can be freed.
-		 *
-		 * Rarely, folios can have buffers and no ->mapping.
-		 * These are the folios which were not successfully
-		 * invalidated in truncate_cleanup_folio().  We try to
-		 * drop those buffers here and if that worked, and the
-		 * folio is no longer mapped into process address space
-		 * (refcount == 1) it can be freed.  Otherwise, leave
-		 * the folio on the LRU so it is swappable.
-		 */
-		if (folio_needs_release(folio)) {
-			if (!filemap_release_folio(folio, sc->gfp_mask))
-				goto activate_locked;
-			if (!mapping && folio_ref_count(folio) == 1) {
-				folio_unlock(folio);
-				if (folio_put_testzero(folio))
-					goto free_it;
-				else {
-					/*
-					 * rare race with speculative reference.
-					 * the speculative reference will free
-					 * this folio shortly, so we may
-					 * increment nr_reclaimed here (and
-					 * leave it off the LRU).
-					 */
-					stat->nr_reclaimed += nr_pages;
-					continue;
-				}
-			}
+			pageout_one(folio, &ret_folios, &free_folios, sc, stat,
+				&plug, folio_list);
+			goto next;
 		}
 
-		if (folio_test_lazyfree(folio)) {
-			/* follow __remove_mapping for reference */
-			if (!folio_ref_freeze(folio, 1))
-				goto keep_locked;
-			/*
-			 * The folio has only one reference left, which is
-			 * from the isolation. After the caller puts the
-			 * folio back on the lru and drops the reference, the
-			 * folio will be freed anyway. It doesn't matter
-			 * which lru it goes on. So we don't bother checking
-			 * the dirty flag here.
-			 */
-			count_vm_events(PGLAZYFREED, nr_pages);
-			count_memcg_folio_events(folio, PGLAZYFREED, nr_pages);
-		} else if (!mapping || !__remove_mapping(mapping, folio, true,
-							 sc->target_mem_cgroup))
+		if (!folio_free(folio, &free_folios, sc, stat))
 			goto keep_locked;
 
-		folio_unlock(folio);
-free_it:
-		/*
-		 * Folio may get swapped out as a whole, need to account
-		 * all pages in it.
-		 */
-		stat->nr_reclaimed += nr_pages;
-
-		folio_unqueue_deferred_split(folio);
-		if (folio_batch_add(&free_folios, folio) == 0) {
-			mem_cgroup_uncharge_folios(&free_folios);
-			try_to_unmap_flush();
-			free_unref_folios(&free_folios);
-		}
-		continue;
+		else
+			continue;
 
 activate_locked_split:
 		/*
@@ -1552,6 +1590,8 @@ static void shrink_folio_list(struct list_head *folio_list,
 		list_add(&folio->lru, &ret_folios);
 		VM_BUG_ON_FOLIO(folio_test_lru(folio) ||
 				folio_test_unevictable(folio), folio);
+next:
+		continue;
 	}
 	/* 'folio_list' is always empty here */
 

-- 
2.43.7



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 4/5] mm/vmscan: extract folio unmap logic into folio_try_unmap()
  2026-03-26  8:36 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
                   ` (2 preceding siblings ...)
  2026-03-26  8:36 ` [PATCH v2 3/5] mm/vmscan: extract folio_free() and pageout_one() Zhang Peng
@ 2026-03-26  8:36 ` Zhang Peng
  2026-03-26  8:36 ` [PATCH v2 5/5] mm/vmscan: flush TLB for every 31 folios evictions Zhang Peng
  4 siblings, 0 replies; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:36 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

From: Zhang Peng <bruzzhang@tencent.com>

shrink_folio_list() contains a self-contained block that sets up
TTU flags and calls try_to_unmap(), accounting for failures via
reclaim_stat. Extract it into folio_try_unmap() to reduce the size
of shrink_folio_list() and make the unmap step independently readable.

No functional change.

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 mm/vmscan.c | 70 +++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index c8ff742ed891..63cc88c875e8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1217,6 +1217,44 @@ static void pageout_one(struct folio *folio, struct list_head *ret_folios,
 			folio_test_unevictable(folio), folio);
 }
 
+static bool folio_try_unmap(struct folio *folio, struct reclaim_stat *stat,
+			    unsigned int nr_pages)
+{
+	enum ttu_flags flags = TTU_BATCH_FLUSH;
+	bool was_swapbacked;
+
+	if (!folio_mapped(folio))
+		return true;
+
+	was_swapbacked = folio_test_swapbacked(folio);
+	if (folio_test_pmd_mappable(folio))
+		flags |= TTU_SPLIT_HUGE_PMD;
+	/*
+	 * Without TTU_SYNC, try_to_unmap will only begin to
+	 * hold PTL from the first present PTE within a large
+	 * folio. Some initial PTEs might be skipped due to
+	 * races with parallel PTE writes in which PTEs can be
+	 * cleared temporarily before being written new present
+	 * values. This will lead to a large folio is still
+	 * mapped while some subpages have been partially
+	 * unmapped after try_to_unmap; TTU_SYNC helps
+	 * try_to_unmap acquire PTL from the first PTE,
+	 * eliminating the influence of temporary PTE values.
+	 */
+	if (folio_test_large(folio))
+		flags |= TTU_SYNC;
+
+	try_to_unmap(folio, flags);
+	if (folio_mapped(folio)) {
+		stat->nr_unmap_fail += nr_pages;
+		if (!was_swapbacked &&
+		    folio_test_swapbacked(folio))
+			stat->nr_lazyfree_fail += nr_pages;
+		return false;
+	}
+	return true;
+}
+
 /*
  * Reclaimed folios are counted in stat->nr_reclaimed.
  */
@@ -1491,36 +1529,8 @@ static void shrink_folio_list(struct list_head *folio_list,
 		 * The folio is mapped into the page tables of one or more
 		 * processes. Try to unmap it here.
 		 */
-		if (folio_mapped(folio)) {
-			enum ttu_flags flags = TTU_BATCH_FLUSH;
-			bool was_swapbacked = folio_test_swapbacked(folio);
-
-			if (folio_test_pmd_mappable(folio))
-				flags |= TTU_SPLIT_HUGE_PMD;
-			/*
-			 * Without TTU_SYNC, try_to_unmap will only begin to
-			 * hold PTL from the first present PTE within a large
-			 * folio. Some initial PTEs might be skipped due to
-			 * races with parallel PTE writes in which PTEs can be
-			 * cleared temporarily before being written new present
-			 * values. This will lead to a large folio is still
-			 * mapped while some subpages have been partially
-			 * unmapped after try_to_unmap; TTU_SYNC helps
-			 * try_to_unmap acquire PTL from the first PTE,
-			 * eliminating the influence of temporary PTE values.
-			 */
-			if (folio_test_large(folio))
-				flags |= TTU_SYNC;
-
-			try_to_unmap(folio, flags);
-			if (folio_mapped(folio)) {
-				stat->nr_unmap_fail += nr_pages;
-				if (!was_swapbacked &&
-				    folio_test_swapbacked(folio))
-					stat->nr_lazyfree_fail += nr_pages;
-				goto activate_locked;
-			}
-		}
+		if (!folio_try_unmap(folio, stat, nr_pages))
+			goto activate_locked;
 
 		/*
 		 * Folio is unmapped now so it cannot be newly pinned anymore.

-- 
2.43.7



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 5/5] mm/vmscan: flush TLB for every 31 folios evictions
  2026-03-26  8:36 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
                   ` (3 preceding siblings ...)
  2026-03-26  8:36 ` [PATCH v2 4/5] mm/vmscan: extract folio unmap logic into folio_try_unmap() Zhang Peng
@ 2026-03-26  8:36 ` Zhang Peng
  2026-03-26 12:40   ` Pedro Falcato
  4 siblings, 1 reply; 8+ messages in thread
From: Zhang Peng @ 2026-03-26  8:36 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko
  Cc: linux-mm, linux-kernel, Kairui Song, Zhang Peng

From: Zhang Peng <bruzzhang@tencent.com>

Currently we flush TLB for every dirty folio, which is a bottleneck for
systems with many cores as this causes heavy IPI usage.

So instead, batch the folios, and flush once for every 31 folios (one
folio_batch). These folios will be held in a folio_batch releasing their
lock, then when folio_batch is full, do following steps:

- For each folio: lock - check still evictable (writeback, lru,  mapped,
  dma_pinned)
  - If no longer evictable, put back to LRU
- Flush TLB once for the batch
- Pageout the folios

Note we can't hold a frozen folio in folio_batch for long as it will
cause filemap/swapcache lookup to livelock. Fortunately pageout usually
won't take too long; sync IO is fast, and non-sync IO will be issued
with the folio marked writeback.

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 mm/vmscan.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 7 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 63cc88c875e8..27de8034f582 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1217,6 +1217,47 @@ static void pageout_one(struct folio *folio, struct list_head *ret_folios,
 			folio_test_unevictable(folio), folio);
 }
 
+static void pageout_batch(struct folio_batch *fbatch,
+			  struct list_head *ret_folios,
+			  struct folio_batch *free_folios,
+			  struct scan_control *sc, struct reclaim_stat *stat,
+			  struct swap_iocb **plug, struct list_head *folio_list)
+{
+	int i, count = folio_batch_count(fbatch);
+	struct folio *folio;
+
+	folio_batch_reinit(fbatch);
+	for (i = 0; i < count; ++i) {
+		folio = fbatch->folios[i];
+		if (!folio_trylock(folio)) {
+			list_add(&folio->lru, ret_folios);
+			continue;
+		}
+
+		if (folio_test_writeback(folio) || folio_test_lru(folio) ||
+		    folio_mapped(folio) || folio_maybe_dma_pinned(folio)) {
+			folio_unlock(folio);
+			list_add(&folio->lru, ret_folios);
+			continue;
+		}
+
+		folio_batch_add(fbatch, folio);
+	}
+
+	i = 0;
+	count = folio_batch_count(fbatch);
+	if (!count)
+		return;
+	/* One TLB flush for the batch */
+	try_to_unmap_flush_dirty();
+	for (i = 0; i < count; ++i) {
+		folio = fbatch->folios[i];
+		pageout_one(folio, ret_folios, free_folios, sc, stat, plug,
+			    folio_list);
+	}
+	folio_batch_reinit(fbatch);
+}
+
 static bool folio_try_unmap(struct folio *folio, struct reclaim_stat *stat,
 			    unsigned int nr_pages)
 {
@@ -1264,6 +1305,8 @@ static void shrink_folio_list(struct list_head *folio_list,
 		struct mem_cgroup *memcg)
 {
 	struct folio_batch free_folios;
+	struct folio_batch flush_folios;
+
 	LIST_HEAD(ret_folios);
 	LIST_HEAD(demote_folios);
 	unsigned int nr_demoted = 0;
@@ -1272,6 +1315,8 @@ static void shrink_folio_list(struct list_head *folio_list,
 	struct swap_iocb *plug = NULL;
 
 	folio_batch_init(&free_folios);
+	folio_batch_init(&flush_folios);
+
 	memset(stat, 0, sizeof(*stat));
 	cond_resched();
 	do_demote_pass = can_demote(pgdat->node_id, sc, memcg);
@@ -1565,15 +1610,21 @@ static void shrink_folio_list(struct list_head *folio_list,
 				goto keep_locked;
 			if (!sc->may_writepage)
 				goto keep_locked;
-
 			/*
-			 * Folio is dirty. Flush the TLB if a writable entry
-			 * potentially exists to avoid CPU writes after I/O
-			 * starts and then write it out here.
+			 * For anon, we should only see swap cache (anon) and
+			 * the list pinning the page. For file page, the filemap
+			 * and the list pins it. The folio is unlocked while
+			 * held in the batch, so pageout_batch() relocks each
+			 * folio and rechecks its state. If the folio is under
+			 * writeback, on LRU, mapped, or DMA-pinned, it will
+			 * not be written out and is put back to LRU list.
 			 */
-			try_to_unmap_flush_dirty();
-			pageout_one(folio, &ret_folios, &free_folios, sc, stat,
-				&plug, folio_list);
+			folio_unlock(folio);
+			if (!folio_batch_add(&flush_folios, folio))
+				pageout_batch(&flush_folios,
+							&ret_folios, &free_folios,
+							sc, stat, &plug,
+							folio_list);
 			goto next;
 		}
 
@@ -1603,6 +1654,10 @@ static void shrink_folio_list(struct list_head *folio_list,
 next:
 		continue;
 	}
+	if (folio_batch_count(&flush_folios)) {
+		pageout_batch(&flush_folios, &ret_folios, &free_folios, sc,
+			      stat, &plug, folio_list);
+	}
 	/* 'folio_list' is always empty here */
 
 	/* Migrate folios selected for demotion */

-- 
2.43.7



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 5/5] mm/vmscan: flush TLB for every 31 folios evictions
  2026-03-26  8:36 ` [PATCH v2 5/5] mm/vmscan: flush TLB for every 31 folios evictions Zhang Peng
@ 2026-03-26 12:40   ` Pedro Falcato
  0 siblings, 0 replies; 8+ messages in thread
From: Pedro Falcato @ 2026-03-26 12:40 UTC (permalink / raw)
  To: Zhang Peng
  Cc: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Johannes Weiner, Qi Zheng,
	Shakeel Butt, Axel Rasmussen, Yuanchu Xie, Wei Xu, Michal Hocko,
	linux-mm, linux-kernel, Kairui Song, Zhang Peng

On Thu, Mar 26, 2026 at 04:36:21PM +0800, Zhang Peng wrote:
> From: Zhang Peng <bruzzhang@tencent.com>
> 
> Currently we flush TLB for every dirty folio, which is a bottleneck for
> systems with many cores as this causes heavy IPI usage.
> 
> So instead, batch the folios, and flush once for every 31 folios (one
> folio_batch). These folios will be held in a folio_batch releasing their
> lock, then when folio_batch is full, do following steps:
> 
> - For each folio: lock - check still evictable (writeback, lru,  mapped,
>   dma_pinned)
>   - If no longer evictable, put back to LRU
> - Flush TLB once for the batch
> - Pageout the folios
> 
> Note we can't hold a frozen folio in folio_batch for long as it will
> cause filemap/swapcache lookup to livelock. Fortunately pageout usually
> won't take too long; sync IO is fast, and non-sync IO will be issued
> with the folio marked writeback.
> 
> Suggested-by: Kairui Song <kasong@tencent.com>
> Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
> ---
>  mm/vmscan.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 62 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 63cc88c875e8..27de8034f582 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1217,6 +1217,47 @@ static void pageout_one(struct folio *folio, struct list_head *ret_folios,
>  			folio_test_unevictable(folio), folio);
>  }
>  
> +static void pageout_batch(struct folio_batch *fbatch,
> +			  struct list_head *ret_folios,
> +			  struct folio_batch *free_folios,
> +			  struct scan_control *sc, struct reclaim_stat *stat,
> +			  struct swap_iocb **plug, struct list_head *folio_list)
> +{
> +	int i, count = folio_batch_count(fbatch);
> +	struct folio *folio;
> +
> +	folio_batch_reinit(fbatch);
> +	for (i = 0; i < count; ++i) {
> +		folio = fbatch->folios[i];
> +		if (!folio_trylock(folio)) {
> +			list_add(&folio->lru, ret_folios);
> +			continue;
> +		}
> +
> +		if (folio_test_writeback(folio) || folio_test_lru(folio) ||

If PG_lru is set here, we're in a world of trouble as we're actively using
folio->lru. I don't think it's possible for it to be set, as isolating folios
clears lru, and refcount bump means the folio cannot be reused or reinserted
back on the LRU. So perhaps:
		VM_WARN_ON_FOLIO(folio_test_lru(folio), folio);

> +		    folio_mapped(folio) || folio_maybe_dma_pinned(folio)) {
> +			folio_unlock(folio);
> +			list_add(&folio->lru, ret_folios);
> +			continue;
> +		}
> +
> +		folio_batch_add(fbatch, folio);
> +	}
> +
> +	i = 0;
> +	count = folio_batch_count(fbatch);
> +	if (!count)
> +		return;
> +	/* One TLB flush for the batch */
> +	try_to_unmap_flush_dirty();
> +	for (i = 0; i < count; ++i) {
> +		folio = fbatch->folios[i];
> +		pageout_one(folio, ret_folios, free_folios, sc, stat, plug,
> +			    folio_list);

Would be lovely if we could pass the batch down to the swap layer.

> +	}
> +	folio_batch_reinit(fbatch);

The way you keep reinitializing fbatch is a bit confusing.
Probably worth a comment or two (or kdocs for pageout_batch documenting
that the folio batch is reset, etc).

> +}
> +
>  static bool folio_try_unmap(struct folio *folio, struct reclaim_stat *stat,
>  			    unsigned int nr_pages)
>  {
> @@ -1264,6 +1305,8 @@ static void shrink_folio_list(struct list_head *folio_list,
>  		struct mem_cgroup *memcg)
>  {
>  	struct folio_batch free_folios;
> +	struct folio_batch flush_folios;
> +
>  	LIST_HEAD(ret_folios);
>  	LIST_HEAD(demote_folios);
>  	unsigned int nr_demoted = 0;
> @@ -1272,6 +1315,8 @@ static void shrink_folio_list(struct list_head *folio_list,
>  	struct swap_iocb *plug = NULL;
>  
>  	folio_batch_init(&free_folios);
> +	folio_batch_init(&flush_folios);
> +
>  	memset(stat, 0, sizeof(*stat));
>  	cond_resched();
>  	do_demote_pass = can_demote(pgdat->node_id, sc, memcg);
> @@ -1565,15 +1610,21 @@ static void shrink_folio_list(struct list_head *folio_list,
>  				goto keep_locked;
>  			if (!sc->may_writepage)
>  				goto keep_locked;
> -
>  			/*
> -			 * Folio is dirty. Flush the TLB if a writable entry
> -			 * potentially exists to avoid CPU writes after I/O
> -			 * starts and then write it out here.
> +			 * For anon, we should only see swap cache (anon) and
> +			 * the list pinning the page. For file page, the filemap
> +			 * and the list pins it. The folio is unlocked while
> +			 * held in the batch, so pageout_batch() relocks each
> +			 * folio and rechecks its state. If the folio is under
> +			 * writeback, on LRU, mapped, or DMA-pinned, it will
> +			 * not be written out and is put back to LRU list.
>  			 */
> -			try_to_unmap_flush_dirty();
> -			pageout_one(folio, &ret_folios, &free_folios, sc, stat,
> -				&plug, folio_list);
> +			folio_unlock(folio);

Why is the folio unlocked? I don't see the need to take the lock trip twice.
Is there something I'm missing?

-- 
Pedro


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-03-26 12:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-26  8:36 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
2026-03-26  8:36 ` [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat Zhang Peng
2026-03-26  8:36 ` [PATCH v2 2/5] mm/vmscan: extract folio activation into folio_active_bounce() Zhang Peng
2026-03-26  8:36 ` [PATCH v2 3/5] mm/vmscan: extract folio_free() and pageout_one() Zhang Peng
2026-03-26  8:36 ` [PATCH v2 4/5] mm/vmscan: extract folio unmap logic into folio_try_unmap() Zhang Peng
2026-03-26  8:36 ` [PATCH v2 5/5] mm/vmscan: flush TLB for every 31 folios evictions Zhang Peng
2026-03-26 12:40   ` Pedro Falcato
  -- strict thread matches above, loose matches on Subject: below --
2026-03-26  8:35 [PATCH v2 0/5] mm: batch TLB flushing for dirty folios in vmscan Zhang Peng
2026-03-26  8:35 ` [PATCH v2 1/5] mm/vmscan: track reclaimed pages in reclaim_stat Zhang Peng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox