All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: Yu Zhao <yuzhao@google.com>,
	"T . J . Alumbaugh" <talumbau@google.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: [PATCH 3/3] mm/lru_gen: Don't build multi-gen LRU page table walk code on architecture not supported
Date: Tue, 13 Jun 2023 17:30:47 +0530	[thread overview]
Message-ID: <20230613120047.149573-3-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20230613120047.149573-1-aneesh.kumar@linux.ibm.com>

Not all architecture supports hardware atomic updates of access bits. On
such an arch, we don't use page table walk to classify pages into
generations. Add a kernel config option and remove adding all the page
table walk code on such architecture.

No preformance change observed with mongodb ycsb test:

Patch details        Throughput(Ops/sec)
without patch		93278
With patch		93400

Without patch:
$ size mm/vmscan.o
   text    data     bss     dec     hex filename
 112102   42721      40  154863   25cef mm/vmscan.o

With patch

$ size  mm/vmscan.o
   text    data     bss     dec     hex filename
 105430   41333      24  146787   23d63 mm/vmscan.o

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/Kconfig               |   3 +
 arch/arm64/Kconfig         |   1 +
 arch/x86/Kconfig           |   1 +
 include/linux/memcontrol.h |   2 +-
 include/linux/mm_types.h   |   8 +--
 include/linux/mmzone.h     |   8 +++
 include/linux/swap.h       |   2 +-
 kernel/fork.c              |   2 +-
 mm/memcontrol.c            |   2 +-
 mm/vmscan.c                | 128 +++++++++++++++++++++++++++++++++----
 10 files changed, 137 insertions(+), 20 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 205fd23e0cad..5cdd98731298 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1458,6 +1458,9 @@ config DYNAMIC_SIGFRAME
 config HAVE_ARCH_NODE_DEV_GROUP
 	bool
 
+config LRU_TASK_PAGE_AGING
+	bool
+
 config ARCH_HAS_NONLEAF_PMD_YOUNG
 	bool
 	help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1201d25a8a4..e0994fb3504b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -225,6 +225,7 @@ config ARM64
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select KASAN_VMALLOC if KASAN
+	select LRU_TASK_PAGE_AGING		if LRU_GEN
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53bab123a8ee..bde9e6f33b22 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -276,6 +276,7 @@ config X86
 	select HAVE_GENERIC_VDSO
 	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
+	select LRU_TASK_PAGE_AGING		if LRU_GEN
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	select NEED_PER_CPU_PAGE_FIRST_CHUNK
 	select NEED_SG_DMA_LENGTH
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 43d4ec8445d4..ea5d1d7bfb8b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -321,7 +321,7 @@ struct mem_cgroup {
 	struct deferred_split deferred_split_queue;
 #endif
 
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	/* per-memcg mm_struct list */
 	struct lru_gen_mm_list mm_list;
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..f90a4860a792 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -786,7 +786,7 @@ struct mm_struct {
 		 */
 		unsigned long ksm_rmap_items;
 #endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 		struct {
 			/* this mm_struct is on lru_gen_mm_list */
 			struct list_head list;
@@ -801,7 +801,7 @@ struct mm_struct {
 			struct mem_cgroup *memcg;
 #endif
 		} lru_gen;
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_TASK_PAGE_AGING */
 	} __randomize_layout;
 
 	/*
@@ -830,7 +830,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return (struct cpumask *)&mm->cpu_bitmap;
 }
 
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 
 struct lru_gen_mm_list {
 	/* mm_struct list for page table walkers */
@@ -864,7 +864,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
 	WRITE_ONCE(mm->lru_gen.bitmap, -1);
 }
 
-#else /* !CONFIG_LRU_GEN */
+#else /* !CONFIG_LRU_TASK_PAGE_AGING */
 
 static inline void lru_gen_add_mm(struct mm_struct *mm)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4889c9d4055..b35698148d3c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -427,6 +427,7 @@ struct lru_gen_folio {
 #endif
 };
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 enum {
 	MM_LEAF_TOTAL,		/* total leaf entries */
 	MM_LEAF_OLD,		/* old leaf entries */
@@ -469,6 +470,7 @@ struct lru_gen_mm_walk {
 	bool can_swap;
 	bool force_scan;
 };
+#endif
 
 void lru_gen_init_lruvec(struct lruvec *lruvec);
 void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
@@ -613,8 +615,12 @@ struct lruvec {
 #ifdef CONFIG_LRU_GEN
 	/* evictable pages divided into generations */
 	struct lru_gen_folio		lrugen;
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	/* to concurrently iterate lru_gen_mm_list */
 	struct lru_gen_mm_state		mm_state;
+#else
+	bool seq_update_progress;
+#endif
 #endif
 #ifdef CONFIG_MEMCG
 	struct pglist_data *pgdat;
@@ -1354,8 +1360,10 @@ typedef struct pglist_data {
 	unsigned long		flags;
 
 #ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	/* kswap mm walk data */
 	struct lru_gen_mm_walk mm_walk;
+#endif
 	/* lru_gen_folio list */
 	struct lru_gen_memcg memcg_lru;
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3c69cb653cb9..ce09b1e44275 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -155,7 +155,7 @@ union swap_header {
 struct reclaim_state {
 	/* pages reclaimed outside of LRU-based reclaim */
 	unsigned long reclaimed;
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	/* per-thread mm walk data */
 	struct lru_gen_mm_walk *mm_walk;
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..2c9e21e39f84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2939,7 +2939,7 @@ pid_t kernel_clone(struct kernel_clone_args *args)
 		get_task_struct(p);
 	}
 
-	if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+	if (IS_ENABLED(CONFIG_LRU_TASK_PAGE_AGING) && !(clone_flags & CLONE_VM)) {
 		/* lock the task to synchronize with memcg migration */
 		task_lock(p);
 		lru_gen_add_mm(p->mm);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 959d6a27e23d..d8fe30d880c6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6404,7 +6404,7 @@ static void mem_cgroup_move_task(void)
 }
 #endif
 
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 static void mem_cgroup_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f277beba556c..207e62d42888 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3304,6 +3304,7 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
 	       get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
 }
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 /******************************************************************************
  *                          Bloom filters
  ******************************************************************************/
@@ -3650,6 +3651,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
 
 	return success;
 }
+#endif
 
 /******************************************************************************
  *                          PID controller
@@ -3819,6 +3821,8 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
 	return folio;
 }
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
+
 /* promote pages accessed through page tables */
 static int folio_update_gen(struct folio *folio, int gen)
 {
@@ -3882,6 +3886,16 @@ static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk
 	}
 }
 
+static void reset_current_reclaim_batch_size(struct lruvec *lruvec)
+{
+	struct lru_gen_mm_walk *walk;
+
+	walk = current->reclaim_state->mm_walk;
+	if (walk && walk->batched)
+		return reset_batch_size(lruvec, walk);
+
+}
+
 static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args)
 {
 	struct address_space *mapping;
@@ -4304,7 +4318,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
 	} while (err == -EAGAIN);
 }
 
-static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
+static void *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
 {
 	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
 
@@ -4335,6 +4349,23 @@ static void clear_mm_walk(void)
 	if (!current_is_kswapd())
 		kfree(walk);
 }
+#else
+
+static void reset_current_reclaim_batch_size(struct lruvec *lruvec)
+{
+
+}
+
+static inline void *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
+{
+	return NULL;
+}
+
+static inline void clear_mm_walk(void)
+{
+
+}
+#endif
 
 static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
 {
@@ -4468,11 +4499,15 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
 	/* make sure preceding modifications appear */
 	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
 
+#ifndef CONFIG_LRU_TASK_PAGE_AGING
+	lruvec->seq_update_progress = false;
+#endif
 	spin_unlock_irq(&lruvec->lru_lock);
 }
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
-			       struct scan_control *sc, bool can_swap, bool force_scan)
+			       bool can_swap, bool force_scan)
 {
 	bool success;
 	struct lru_gen_mm_walk *walk;
@@ -4498,7 +4533,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 		goto done;
 	}
 
-	walk = set_mm_walk(NULL, true);
+	walk = (struct lru_gen_mm_walk *)set_mm_walk(NULL, true);
 	if (!walk) {
 		success = iterate_mm_list_nowalk(lruvec, max_seq);
 		goto done;
@@ -4520,6 +4555,51 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 
 	return success;
 }
+#else
+
+/*
+ * inc_max_seq can drop the lru_lock in between. So use a waitqueue seq_update_progress
+ * to allow concurrent access.
+ */
+bool __try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+			  bool can_swap, bool force_scan)
+{
+	bool success = false;
+	struct lru_gen_folio *lrugen = &lruvec->lrugen;
+
+	VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
+
+	/* see the comment in iterate_mm_list() */
+	if (lruvec->seq_update_progress)
+		success = false;
+	else {
+		spin_lock_irq(&lruvec->lru_lock);
+
+		if (max_seq != lrugen->max_seq)
+			goto done;
+
+		if (lruvec->seq_update_progress)
+			goto done;
+
+		success = true;
+		lruvec->seq_update_progress = true;
+done:
+		spin_unlock_irq(&lruvec->lru_lock);
+	}
+
+	if (success)
+		inc_max_seq(lruvec, can_swap, force_scan);
+
+	return success;
+}
+
+static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+			       bool can_swap, bool force_scan)
+{
+	return __try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+}
+#endif
+
 
 /******************************************************************************
  *                          working set protection
@@ -4630,6 +4710,7 @@ static void __look_around_gen_update(struct folio *folio, int new_gen)
 		folio_activate(folio);
 }
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 static inline bool current_reclaim_state_can_swap(void)
 {
 	if (current->reclaim_state)
@@ -4651,6 +4732,18 @@ static void look_around_gen_update(struct folio *folio, int new_gen)
 	}
 	return __look_around_gen_update(folio, new_gen);
 }
+#else
+
+static inline bool current_reclaim_state_can_swap(void)
+{
+	return true;
+}
+
+static inline void look_around_gen_update(struct folio *folio, int new_gen)
+{
+	return __look_around_gen_update(folio, new_gen);
+}
+#endif
 
 /*
  * This function exploits spatial locality when shrink_folio_list() walks the
@@ -4714,7 +4807,6 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 
 		folio = get_pfn_folio(pfn, memcg, pgdat,
 				      current_reclaim_state_can_swap());
-
 		if (!folio)
 			continue;
 
@@ -4734,9 +4826,11 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 	arch_leave_lazy_mmu_mode();
 	mem_cgroup_unlock_pages();
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	/* feedback from rmap walkers to page table walkers */
 	if (suitable_to_scan(i, young))
 		update_bloom_filter(lruvec, max_seq, pvmw->pmd);
+#endif
 }
 
 /******************************************************************************
@@ -5156,7 +5250,6 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 	struct folio *next;
 	enum vm_event_item item;
 	struct reclaim_stat stat;
-	struct lru_gen_mm_walk *walk;
 	bool skip_retry = false;
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -5211,9 +5304,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 
 	move_folios_to_lru(lruvec, &list);
 
-	walk = current->reclaim_state->mm_walk;
-	if (walk && walk->batched)
-		reset_batch_size(lruvec, walk);
+	reset_current_reclaim_batch_size(lruvec);
 
 	item = PGSTEAL_KSWAPD + reclaimer_offset();
 	if (!cgroup_reclaim(sc))
@@ -5321,7 +5412,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
 		return nr_to_scan;
 
 	/* skip this lruvec as it's low on cold folios */
-	return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
+	return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
 }
 
 static unsigned long get_nr_to_reclaim(struct scan_control *sc)
@@ -5929,6 +6020,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
 		seq_putc(m, '\n');
 	}
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	seq_puts(m, "                      ");
 	for (i = 0; i < NR_MM_STATS; i++) {
 		const char *s = "      ";
@@ -5945,6 +6037,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
 		seq_printf(m, " %10lu%c", n, s[i]);
 	}
 	seq_putc(m, '\n');
+#endif
 }
 
 /* see Documentation/admin-guide/mm/multigen_lru.rst for details */
@@ -6026,7 +6119,7 @@ static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_contr
 	if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
 		return -ERANGE;
 
-	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
+	try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
 
 	return 0;
 }
@@ -6218,7 +6311,12 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
 	for_each_gen_type_zone(gen, type, zone)
 		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	lruvec->mm_state.seq = MIN_NR_GENS;
+#else
+	lruvec->seq_update_progress = false;
+#endif
+
 }
 
 #ifdef CONFIG_MEMCG
@@ -6237,16 +6335,20 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
 
 void lru_gen_init_memcg(struct mem_cgroup *memcg)
 {
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	INIT_LIST_HEAD(&memcg->mm_list.fifo);
 	spin_lock_init(&memcg->mm_list.lock);
+
+#endif
 }
 
 void lru_gen_exit_memcg(struct mem_cgroup *memcg)
 {
-	int i;
 	int nid;
 
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
 	VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo));
+#endif
 
 	for_each_node(nid) {
 		struct lruvec *lruvec = get_lruvec(memcg, nid);
@@ -6256,10 +6358,12 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
 
 		lruvec->lrugen.list.next = LIST_POISON1;
 
-		for (i = 0; i < NR_BLOOM_FILTERS; i++) {
+#ifdef CONFIG_LRU_TASK_PAGE_AGING
+		for (int i = 0; i < NR_BLOOM_FILTERS; i++) {
 			bitmap_free(lruvec->mm_state.filters[i]);
 			lruvec->mm_state.filters[i] = NULL;
 		}
+#endif
 	}
 }
 
-- 
2.40.1



  parent reply	other threads:[~2023-06-13 12:01 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-13 12:00 [PATCH 1/3] mm/lru_gen: Move some code around so that next patch is simpler Aneesh Kumar K.V
2023-06-13 12:00 ` [PATCH 2/3] mm/lru_gen: lru_gen_look_around simplification Aneesh Kumar K.V
2023-06-13 12:00 ` Aneesh Kumar K.V [this message]
2023-06-13 12:23   ` [PATCH 3/3] mm/lru_gen: Don't build multi-gen LRU page table walk code on architecture not supported Matthew Wilcox
2023-06-13 13:28     ` Aneesh Kumar K V
2023-06-13 13:36       ` Matthew Wilcox
2023-06-13 13:47         ` Aneesh Kumar K V
2023-06-21  2:27   ` kernel test robot
2023-06-24 14:53   ` Aneesh Kumar K.V
2023-06-25 19:34     ` Yu Zhao
2023-06-26 10:52       ` Aneesh Kumar K V
2023-06-26 17:04         ` Yu Zhao
2023-06-27 11:48           ` Aneesh Kumar K V
2023-06-27 19:10             ` Yu Zhao
2023-06-27 19:10               ` Yu Zhao
  -- strict thread matches above, loose matches on Subject: below --
2023-06-21  0:40 kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230613120047.149573-3-aneesh.kumar@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    --cc=talumbau@google.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.