* + mm-mglru-rework-aging-feedback.patch added to mm-unstable branch
@ 2024-12-05 4:44 Andrew Morton
0 siblings, 0 replies; 4+ messages in thread
From: Andrew Morton @ 2024-12-05 4:44 UTC (permalink / raw)
To: mm-commits, stevensd, kasong, kaleshsingh, bharata, yuzhao, akpm
The patch titled
Subject: mm/mglru: rework aging feedback
has been added to the -mm mm-unstable branch. Its filename is
mm-mglru-rework-aging-feedback.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-mglru-rework-aging-feedback.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yu Zhao <yuzhao@google.com>
Subject: mm/mglru: rework aging feedback
Date: Sun, 1 Dec 2024 20:28:20 -0700
The aging feedback is based on both the number of generations and the
distribution of folios in each generation. The number of generations is
currently the distance between max_seq and anon min_seq. This is because
anon min_seq is not allowed to move past file min_seq. The rationale for
that is that file is always evictable whereas anon is not. However, for
use cases where anon is a lot cheaper than file:
1. Anon in the second oldest generation can be a better choice than
file in the oldest generation.
2. A large amount of file in the oldest generation can skew the
distribution, making should_run_aging() return false negative.
Allow anon and file min_seq to move independently, and use solely the
number of generations as the feedback for aging. Specifically, when both
anon and file are evictable, anon min_seq can now be greater than file
min_seq, and therefore the number of generations becomes the distance
between max_seq and min(min_seq[0],min_seq[1]). And should_run_aging()
returns true if and only if the number of generations is less than
MAX_NR_GENS.
As the first step to the final optimization, this change by itself should
not have userspace-visiable effects beyond performance. The next twos
patch will take advantage of this change; the last patch in this series
will better distribute folios across MAX_NR_GENS.
Link: https://lkml.kernel.org/r/20241202032823.2741019-4-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: David Stevens <stevensd@chromium.org>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/mmzone.h | 6 -
mm/vmscan.c | 217 +++++++++++++++------------------------
2 files changed, 91 insertions(+), 132 deletions(-)
--- a/include/linux/mmzone.h~mm-mglru-rework-aging-feedback
+++ a/include/linux/mmzone.h
@@ -446,8 +446,8 @@ struct lru_gen_folio {
unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
/* the exponential moving average of evicted+protected */
unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
- /* the first tier doesn't need protection, hence the minus one */
- unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
+ /* can only be modified under the LRU lock */
+ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* can be modified without holding the LRU lock */
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
@@ -498,7 +498,7 @@ struct lru_gen_mm_walk {
int mm_stats[NR_MM_STATS];
/* total batched items */
int batched;
- bool can_swap;
+ int swappiness;
bool force_scan;
};
--- a/mm/vmscan.c~mm-mglru-rework-aging-feedback
+++ a/mm/vmscan.c
@@ -2623,11 +2623,17 @@ static bool should_clear_pmd_young(void)
READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \
}
+#define evictable_min_seq(min_seq, swappiness) \
+ min((min_seq)[!(swappiness)], (min_seq)[(swappiness) != MAX_SWAPPINESS])
+
#define for_each_gen_type_zone(gen, type, zone) \
for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
+#define for_each_evictable_type(type, swappiness) \
+ for ((type) = !(swappiness); (type) <= ((swappiness) != MAX_SWAPPINESS); (type)++)
+
#define get_memcg_gen(seq) ((seq) % MEMCG_NR_GENS)
#define get_memcg_bin(bin) ((bin) % MEMCG_NR_BINS)
@@ -2673,10 +2679,16 @@ static int get_nr_gens(struct lruvec *lr
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
{
- /* see the comment on lru_gen_folio */
- return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
- get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
- get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
+ int type;
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ int n = get_nr_gens(lruvec, type);
+
+ if (n < MIN_NR_GENS || n > MAX_NR_GENS)
+ return false;
+ }
+
+ return true;
}
/******************************************************************************
@@ -3083,9 +3095,8 @@ static void read_ctrl_pos(struct lruvec
pos->refaulted = lrugen->avg_refaulted[type][tier] +
atomic_long_read(&lrugen->refaulted[hist][type][tier]);
pos->total = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- pos->total += lrugen->protected[hist][type][tier - 1];
pos->gain = gain;
}
@@ -3112,17 +3123,15 @@ static void reset_ctrl_pos(struct lruvec
WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
sum = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- sum += lrugen->protected[hist][type][tier - 1];
WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
}
if (clear) {
atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
- if (tier)
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
+ WRITE_ONCE(lrugen->protected[hist][type][tier], 0);
}
}
}
@@ -3257,7 +3266,7 @@ static int should_skip_vma(unsigned long
return true;
if (vma_is_anonymous(vma))
- return !walk->can_swap;
+ return !walk->swappiness;
if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping))
return true;
@@ -3267,7 +3276,10 @@ static int should_skip_vma(unsigned long
return true;
if (shmem_mapping(mapping))
- return !walk->can_swap;
+ return !walk->swappiness;
+
+ if (walk->swappiness == MAX_SWAPPINESS)
+ return true;
/* to exclude special mappings like dax, etc. */
return !mapping->a_ops->read_folio;
@@ -3355,7 +3367,7 @@ static unsigned long get_pmd_pfn(pmd_t p
}
static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
- struct pglist_data *pgdat, bool can_swap)
+ struct pglist_data *pgdat)
{
struct folio *folio;
@@ -3366,10 +3378,6 @@ static struct folio *get_pfn_folio(unsig
if (folio_memcg(folio) != memcg)
return NULL;
- /* file VMAs can contain anon pages from COW */
- if (!folio_is_file_lru(folio) && !can_swap)
- return NULL;
-
return folio;
}
@@ -3425,7 +3433,7 @@ restart:
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -3510,7 +3518,7 @@ static void walk_pmd_range_locked(pud_t
if (pfn == -1)
goto next;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
goto next;
@@ -3722,22 +3730,26 @@ static void clear_mm_walk(void)
kfree(walk);
}
-static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
+static bool inc_min_seq(struct lruvec *lruvec, int type, int swappiness)
{
int zone;
int remaining = MAX_LRU_BATCH;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- if (type == LRU_GEN_ANON && !can_swap)
+ if (type ? swappiness == MAX_SWAPPINESS : !swappiness)
goto done;
- /* prevent cold/hot inversion if force_scan is true */
+ /* prevent cold/hot inversion if the type is evictable */
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
struct list_head *head = &lrugen->folios[old_gen][type][zone];
while (!list_empty(head)) {
struct folio *folio = lru_to_folio(head);
+ int refs = folio_lru_refs(folio);
+ int tier = lru_tier_from_refs(refs);
+ int delta = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
@@ -3747,6 +3759,9 @@ static bool inc_min_seq(struct lruvec *l
new_gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
+
if (!--remaining)
return false;
}
@@ -3758,51 +3773,37 @@ done:
return true;
}
-static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
+static bool try_to_inc_min_seq(struct lruvec *lruvec, int swappiness)
{
int gen, type, zone;
bool success = false;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
- DEFINE_MIN_SEQ(lruvec);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
- /* find the oldest populated generation */
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
- gen = lru_gen_from_seq(min_seq[type]);
+ for_each_evictable_type(type, swappiness) {
+ unsigned long seq;
+
+ for (seq = lrugen->min_seq[type]; seq + MIN_NR_GENS <= lrugen->max_seq; seq++) {
+ gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
if (!list_empty(&lrugen->folios[gen][type][zone]))
goto next;
}
-
- min_seq[type]++;
}
next:
- ;
- }
-
- /* see the comment on lru_gen_folio */
- if (can_swap) {
- min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
- min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
- }
-
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- if (min_seq[type] == lrugen->min_seq[type])
- continue;
-
- reset_ctrl_pos(lruvec, type, true);
- WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
- success = true;
+ if (seq != lrugen->min_seq[type]) {
+ reset_ctrl_pos(lruvec, type, true);
+ WRITE_ONCE(lrugen->min_seq[type], seq);
+ success = true;
+ }
}
return success;
}
-static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
{
bool success;
int prev, next;
@@ -3820,13 +3821,11 @@ restart:
if (!success)
goto unlock;
- for (type = ANON_AND_FILE - 1; type >= 0; type--) {
+ for (type = 0; type < ANON_AND_FILE; type++) {
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
- VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
-
- if (inc_min_seq(lruvec, type, can_swap))
+ if (inc_min_seq(lruvec, type, swappiness))
continue;
spin_unlock_irq(&lruvec->lru_lock);
@@ -3870,7 +3869,7 @@ unlock:
}
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -3881,7 +3880,7 @@ static bool try_to_inc_max_seq(struct lr
VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq));
if (!mm_state)
- return inc_max_seq(lruvec, seq, can_swap, force_scan);
+ return inc_max_seq(lruvec, seq, swappiness);
/* see the comment in iterate_mm_list() */
if (seq <= READ_ONCE(mm_state->seq))
@@ -3906,7 +3905,7 @@ static bool try_to_inc_max_seq(struct lr
walk->lruvec = lruvec;
walk->seq = seq;
- walk->can_swap = can_swap;
+ walk->swappiness = swappiness;
walk->force_scan = force_scan;
do {
@@ -3916,7 +3915,7 @@ static bool try_to_inc_max_seq(struct lr
} while (mm);
done:
if (success) {
- success = inc_max_seq(lruvec, seq, can_swap, force_scan);
+ success = inc_max_seq(lruvec, seq, swappiness);
WARN_ON_ONCE(!success);
}
@@ -3957,13 +3956,13 @@ static bool lruvec_is_sizable(struct lru
{
int gen, type, zone;
unsigned long total = 0;
- bool can_swap = get_swappiness(lruvec, sc);
+ int swappiness = get_swappiness(lruvec, sc);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
DEFINE_MIN_SEQ(lruvec);
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
@@ -3983,6 +3982,7 @@ static bool lruvec_is_reclaimable(struct
{
int gen;
unsigned long birth;
+ int swappiness = get_swappiness(lruvec, sc);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
@@ -3992,8 +3992,7 @@ static bool lruvec_is_reclaimable(struct
if (!lruvec_is_sizable(lruvec, sc))
return false;
- /* see the comment on lru_gen_folio */
- gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ gen = lru_gen_from_seq(evictable_min_seq(min_seq, swappiness));
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
return time_is_before_jiffies(birth + min_ttl);
@@ -4060,7 +4059,6 @@ bool lru_gen_look_around(struct page_vma
unsigned long addr = pvmw->address;
struct vm_area_struct *vma = pvmw->vma;
struct folio *folio = pfn_folio(pvmw->pfn);
- bool can_swap = !folio_is_file_lru(folio);
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
@@ -4113,7 +4111,7 @@ bool lru_gen_look_around(struct page_vma
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -4329,8 +4327,8 @@ static bool sort_folio(struct lruvec *lr
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
- lrugen->protected[hist][type][tier - 1] + delta);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
return true;
}
@@ -4529,7 +4527,6 @@ static int isolate_folios(struct lruvec
{
int i;
int type;
- int scanned;
int tier = -1;
DEFINE_MIN_SEQ(lruvec);
@@ -4554,21 +4551,23 @@ static int isolate_folios(struct lruvec
else
type = get_type_to_scan(lruvec, swappiness, &tier);
- for (i = !swappiness; i < ANON_AND_FILE; i++) {
+ for_each_evictable_type(i, swappiness) {
+ int scanned;
+
if (tier < 0)
tier = get_tier_idx(lruvec, type);
+ *type_scanned = type;
+
scanned = scan_folios(lruvec, sc, type, tier, list);
if (scanned)
- break;
+ return scanned;
type = !type;
tier = -1;
}
- *type_scanned = type;
-
- return scanned;
+ return 0;
}
static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
@@ -4584,6 +4583,7 @@ static int evict_folios(struct lruvec *l
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -4593,7 +4593,7 @@ static int evict_folios(struct lruvec *l
scanned += try_to_inc_min_seq(lruvec, swappiness);
- if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS)
+ if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
scanned = 0;
spin_unlock_irq(&lruvec->lru_lock);
@@ -4665,63 +4665,32 @@ retry:
}
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
- bool can_swap, unsigned long *nr_to_scan)
+ int swappiness, unsigned long *nr_to_scan)
{
int gen, type, zone;
- unsigned long old = 0;
- unsigned long young = 0;
- unsigned long total = 0;
+ unsigned long size = 0;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
DEFINE_MIN_SEQ(lruvec);
- /* whether this lruvec is completely out of cold folios */
- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
- *nr_to_scan = 0;
+ *nr_to_scan = 0;
+ /* have to run aging, since eviction is not possible anymore */
+ if (evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS > max_seq)
return true;
- }
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
- unsigned long size = 0;
-
gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++)
size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
-
- total += size;
- if (seq == max_seq)
- young += size;
- else if (seq + MIN_NR_GENS == max_seq)
- old += size;
}
}
- *nr_to_scan = total;
-
- /*
- * The aging tries to be lazy to reduce the overhead, while the eviction
- * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
- * ideal number of generations is MIN_NR_GENS+1.
- */
- if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
- return false;
-
- /*
- * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
- * of the total number of pages for each generation. A reasonable range
- * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
- * aging cares about the upper bound of hot pages, while the eviction
- * cares about the lower bound of cold pages.
- */
- if (young * MIN_NR_GENS > total)
- return true;
- if (old * (MIN_NR_GENS + 2) < total)
- return true;
-
- return false;
+ *nr_to_scan = size;
+ /* better to run aging even though eviction is still possible */
+ return evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS == max_seq;
}
/*
@@ -4729,7 +4698,7 @@ static bool should_run_aging(struct lruv
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
* reclaim.
*/
-static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
+static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
{
bool success;
unsigned long nr_to_scan;
@@ -4739,7 +4708,7 @@ static long get_nr_to_scan(struct lruvec
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
return -1;
- success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan);
+ success = should_run_aging(lruvec, max_seq, swappiness, &nr_to_scan);
/* try to scrape all its memory if this memcg was deleted */
if (nr_to_scan && !mem_cgroup_online(memcg))
@@ -4750,7 +4719,7 @@ static long get_nr_to_scan(struct lruvec
return nr_to_scan >> sc->priority;
/* stop scanning this lruvec as it's low on cold folios */
- return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, false) ? -1 : 0;
}
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -5294,8 +5263,7 @@ static void lru_gen_seq_show_full(struct
s = "rep";
n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier]);
}
for (i = 0; i < 3; i++)
@@ -5350,7 +5318,7 @@ static int lru_gen_seq_show(struct seq_f
seq_printf(m, " node %5d\n", nid);
if (!full)
- seq = min_seq[LRU_GEN_ANON];
+ seq = evictable_min_seq(min_seq, MAX_SWAPPINESS / 2);
else if (max_seq >= MAX_NR_GENS)
seq = max_seq - MAX_NR_GENS + 1;
else
@@ -5390,23 +5358,14 @@ static const struct seq_operations lru_g
};
static int run_aging(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
DEFINE_MAX_SEQ(lruvec);
- DEFINE_MIN_SEQ(lruvec);
-
- if (seq < max_seq)
- return 0;
if (seq > max_seq)
return -EINVAL;
- if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
- return -ERANGE;
-
- try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
-
- return 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, force_scan) ? 0 : -EEXIST;
}
static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
@@ -5422,7 +5381,7 @@ static int run_eviction(struct lruvec *l
while (!signal_pending(current)) {
DEFINE_MIN_SEQ(lruvec);
- if (seq < min_seq[!swappiness])
+ if (seq < evictable_min_seq(min_seq, swappiness))
return 0;
if (sc->nr_reclaimed >= nr_to_reclaim)
_
Patches currently in -mm which might be from yuzhao@google.com are
mm-mglru-clean-up-workingset.patch
mm-mglru-optimize-deactivation.patch
mm-mglru-rework-aging-feedback.patch
mm-mglru-rework-type-selection.patch
mm-mglru-rework-refault-detection.patch
mm-mglru-rework-workingset-protection.patch
^ permalink raw reply [flat|nested] 4+ messages in thread
* + mm-mglru-rework-aging-feedback.patch added to mm-unstable branch
@ 2024-12-06 0:58 Andrew Morton
0 siblings, 0 replies; 4+ messages in thread
From: Andrew Morton @ 2024-12-06 0:58 UTC (permalink / raw)
To: mm-commits, stevensd, kasong, kaleshsingh, bharata, yuzhao, akpm
The patch titled
Subject: mm/mglru: rework aging feedback
has been added to the -mm mm-unstable branch. Its filename is
mm-mglru-rework-aging-feedback.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-mglru-rework-aging-feedback.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yu Zhao <yuzhao@google.com>
Subject: mm/mglru: rework aging feedback
Date: Thu, 5 Dec 2024 17:31:23 -0700
The aging feedback is based on both the number of generations and the
distribution of folios in each generation. The number of generations is
currently the distance between max_seq and anon min_seq. This is because
anon min_seq is not allowed to move past file min_seq. The rationale for
that is that file is always evictable whereas anon is not. However, for
use cases where anon is a lot cheaper than file:
1. Anon in the second oldest generation can be a better choice than
file in the oldest generation.
2. A large amount of file in the oldest generation can skew the
distribution, making should_run_aging() return false negative.
Allow anon and file min_seq to move independently, and use solely the
number of generations as the feedback for aging. Specifically, when both
anon and file are evictable, anon min_seq can now be greater than file
min_seq, and therefore the number of generations becomes the distance
between max_seq and min(min_seq[0],min_seq[1]). And should_run_aging()
returns true if and only if the number of generations is less than
MAX_NR_GENS.
As the first step to the final optimization, this change by itself should
not have userspace-visiable effects beyond performance. The next twos
patch will take advantage of this change; the last patch in this series
will better distribute folios across MAX_NR_GENS.
Link: https://lkml.kernel.org/r/20241206003126.1338283-4-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: David Stevens <stevensd@chromium.org>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/mmzone.h | 6 -
mm/vmscan.c | 217 +++++++++++++++------------------------
2 files changed, 91 insertions(+), 132 deletions(-)
--- a/include/linux/mmzone.h~mm-mglru-rework-aging-feedback
+++ a/include/linux/mmzone.h
@@ -446,8 +446,8 @@ struct lru_gen_folio {
unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
/* the exponential moving average of evicted+protected */
unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
- /* the first tier doesn't need protection, hence the minus one */
- unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
+ /* can only be modified under the LRU lock */
+ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* can be modified without holding the LRU lock */
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
@@ -498,7 +498,7 @@ struct lru_gen_mm_walk {
int mm_stats[NR_MM_STATS];
/* total batched items */
int batched;
- bool can_swap;
+ int swappiness;
bool force_scan;
};
--- a/mm/vmscan.c~mm-mglru-rework-aging-feedback
+++ a/mm/vmscan.c
@@ -2623,11 +2623,17 @@ static bool should_clear_pmd_young(void)
READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \
}
+#define evictable_min_seq(min_seq, swappiness) \
+ min((min_seq)[!(swappiness)], (min_seq)[(swappiness) != MAX_SWAPPINESS])
+
#define for_each_gen_type_zone(gen, type, zone) \
for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
+#define for_each_evictable_type(type, swappiness) \
+ for ((type) = !(swappiness); (type) <= ((swappiness) != MAX_SWAPPINESS); (type)++)
+
#define get_memcg_gen(seq) ((seq) % MEMCG_NR_GENS)
#define get_memcg_bin(bin) ((bin) % MEMCG_NR_BINS)
@@ -2673,10 +2679,16 @@ static int get_nr_gens(struct lruvec *lr
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
{
- /* see the comment on lru_gen_folio */
- return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
- get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
- get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
+ int type;
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ int n = get_nr_gens(lruvec, type);
+
+ if (n < MIN_NR_GENS || n > MAX_NR_GENS)
+ return false;
+ }
+
+ return true;
}
/******************************************************************************
@@ -3083,9 +3095,8 @@ static void read_ctrl_pos(struct lruvec
pos->refaulted = lrugen->avg_refaulted[type][tier] +
atomic_long_read(&lrugen->refaulted[hist][type][tier]);
pos->total = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- pos->total += lrugen->protected[hist][type][tier - 1];
pos->gain = gain;
}
@@ -3112,17 +3123,15 @@ static void reset_ctrl_pos(struct lruvec
WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
sum = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- sum += lrugen->protected[hist][type][tier - 1];
WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
}
if (clear) {
atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
- if (tier)
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
+ WRITE_ONCE(lrugen->protected[hist][type][tier], 0);
}
}
}
@@ -3257,7 +3266,7 @@ static int should_skip_vma(unsigned long
return true;
if (vma_is_anonymous(vma))
- return !walk->can_swap;
+ return !walk->swappiness;
if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping))
return true;
@@ -3267,7 +3276,10 @@ static int should_skip_vma(unsigned long
return true;
if (shmem_mapping(mapping))
- return !walk->can_swap;
+ return !walk->swappiness;
+
+ if (walk->swappiness == MAX_SWAPPINESS)
+ return true;
/* to exclude special mappings like dax, etc. */
return !mapping->a_ops->read_folio;
@@ -3355,7 +3367,7 @@ static unsigned long get_pmd_pfn(pmd_t p
}
static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
- struct pglist_data *pgdat, bool can_swap)
+ struct pglist_data *pgdat)
{
struct folio *folio;
@@ -3366,10 +3378,6 @@ static struct folio *get_pfn_folio(unsig
if (folio_memcg(folio) != memcg)
return NULL;
- /* file VMAs can contain anon pages from COW */
- if (!folio_is_file_lru(folio) && !can_swap)
- return NULL;
-
return folio;
}
@@ -3425,7 +3433,7 @@ restart:
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -3510,7 +3518,7 @@ static void walk_pmd_range_locked(pud_t
if (pfn == -1)
goto next;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
goto next;
@@ -3722,22 +3730,26 @@ static void clear_mm_walk(void)
kfree(walk);
}
-static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
+static bool inc_min_seq(struct lruvec *lruvec, int type, int swappiness)
{
int zone;
int remaining = MAX_LRU_BATCH;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- if (type == LRU_GEN_ANON && !can_swap)
+ if (type ? swappiness == MAX_SWAPPINESS : !swappiness)
goto done;
- /* prevent cold/hot inversion if force_scan is true */
+ /* prevent cold/hot inversion if the type is evictable */
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
struct list_head *head = &lrugen->folios[old_gen][type][zone];
while (!list_empty(head)) {
struct folio *folio = lru_to_folio(head);
+ int refs = folio_lru_refs(folio);
+ int tier = lru_tier_from_refs(refs);
+ int delta = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
@@ -3747,6 +3759,9 @@ static bool inc_min_seq(struct lruvec *l
new_gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
+
if (!--remaining)
return false;
}
@@ -3758,51 +3773,37 @@ done:
return true;
}
-static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
+static bool try_to_inc_min_seq(struct lruvec *lruvec, int swappiness)
{
int gen, type, zone;
bool success = false;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
- DEFINE_MIN_SEQ(lruvec);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
- /* find the oldest populated generation */
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
- gen = lru_gen_from_seq(min_seq[type]);
+ for_each_evictable_type(type, swappiness) {
+ unsigned long seq;
+
+ for (seq = lrugen->min_seq[type]; seq + MIN_NR_GENS <= lrugen->max_seq; seq++) {
+ gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
if (!list_empty(&lrugen->folios[gen][type][zone]))
goto next;
}
-
- min_seq[type]++;
}
next:
- ;
- }
-
- /* see the comment on lru_gen_folio */
- if (can_swap) {
- min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
- min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
- }
-
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- if (min_seq[type] == lrugen->min_seq[type])
- continue;
-
- reset_ctrl_pos(lruvec, type, true);
- WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
- success = true;
+ if (seq != lrugen->min_seq[type]) {
+ reset_ctrl_pos(lruvec, type, true);
+ WRITE_ONCE(lrugen->min_seq[type], seq);
+ success = true;
+ }
}
return success;
}
-static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
{
bool success;
int prev, next;
@@ -3820,13 +3821,11 @@ restart:
if (!success)
goto unlock;
- for (type = ANON_AND_FILE - 1; type >= 0; type--) {
+ for (type = 0; type < ANON_AND_FILE; type++) {
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
- VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
-
- if (inc_min_seq(lruvec, type, can_swap))
+ if (inc_min_seq(lruvec, type, swappiness))
continue;
spin_unlock_irq(&lruvec->lru_lock);
@@ -3870,7 +3869,7 @@ unlock:
}
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -3881,7 +3880,7 @@ static bool try_to_inc_max_seq(struct lr
VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq));
if (!mm_state)
- return inc_max_seq(lruvec, seq, can_swap, force_scan);
+ return inc_max_seq(lruvec, seq, swappiness);
/* see the comment in iterate_mm_list() */
if (seq <= READ_ONCE(mm_state->seq))
@@ -3906,7 +3905,7 @@ static bool try_to_inc_max_seq(struct lr
walk->lruvec = lruvec;
walk->seq = seq;
- walk->can_swap = can_swap;
+ walk->swappiness = swappiness;
walk->force_scan = force_scan;
do {
@@ -3916,7 +3915,7 @@ static bool try_to_inc_max_seq(struct lr
} while (mm);
done:
if (success) {
- success = inc_max_seq(lruvec, seq, can_swap, force_scan);
+ success = inc_max_seq(lruvec, seq, swappiness);
WARN_ON_ONCE(!success);
}
@@ -3957,13 +3956,13 @@ static bool lruvec_is_sizable(struct lru
{
int gen, type, zone;
unsigned long total = 0;
- bool can_swap = get_swappiness(lruvec, sc);
+ int swappiness = get_swappiness(lruvec, sc);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
DEFINE_MIN_SEQ(lruvec);
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
@@ -3983,6 +3982,7 @@ static bool lruvec_is_reclaimable(struct
{
int gen;
unsigned long birth;
+ int swappiness = get_swappiness(lruvec, sc);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
@@ -3992,8 +3992,7 @@ static bool lruvec_is_reclaimable(struct
if (!lruvec_is_sizable(lruvec, sc))
return false;
- /* see the comment on lru_gen_folio */
- gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ gen = lru_gen_from_seq(evictable_min_seq(min_seq, swappiness));
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
return time_is_before_jiffies(birth + min_ttl);
@@ -4060,7 +4059,6 @@ bool lru_gen_look_around(struct page_vma
unsigned long addr = pvmw->address;
struct vm_area_struct *vma = pvmw->vma;
struct folio *folio = pfn_folio(pvmw->pfn);
- bool can_swap = !folio_is_file_lru(folio);
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
@@ -4113,7 +4111,7 @@ bool lru_gen_look_around(struct page_vma
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -4329,8 +4327,8 @@ static bool sort_folio(struct lruvec *lr
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
- lrugen->protected[hist][type][tier - 1] + delta);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
return true;
}
@@ -4529,7 +4527,6 @@ static int isolate_folios(struct lruvec
{
int i;
int type;
- int scanned;
int tier = -1;
DEFINE_MIN_SEQ(lruvec);
@@ -4554,21 +4551,23 @@ static int isolate_folios(struct lruvec
else
type = get_type_to_scan(lruvec, swappiness, &tier);
- for (i = !swappiness; i < ANON_AND_FILE; i++) {
+ for_each_evictable_type(i, swappiness) {
+ int scanned;
+
if (tier < 0)
tier = get_tier_idx(lruvec, type);
+ *type_scanned = type;
+
scanned = scan_folios(lruvec, sc, type, tier, list);
if (scanned)
- break;
+ return scanned;
type = !type;
tier = -1;
}
- *type_scanned = type;
-
- return scanned;
+ return 0;
}
static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
@@ -4584,6 +4583,7 @@ static int evict_folios(struct lruvec *l
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -4593,7 +4593,7 @@ static int evict_folios(struct lruvec *l
scanned += try_to_inc_min_seq(lruvec, swappiness);
- if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS)
+ if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
scanned = 0;
spin_unlock_irq(&lruvec->lru_lock);
@@ -4665,63 +4665,32 @@ retry:
}
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
- bool can_swap, unsigned long *nr_to_scan)
+ int swappiness, unsigned long *nr_to_scan)
{
int gen, type, zone;
- unsigned long old = 0;
- unsigned long young = 0;
- unsigned long total = 0;
+ unsigned long size = 0;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
DEFINE_MIN_SEQ(lruvec);
- /* whether this lruvec is completely out of cold folios */
- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
- *nr_to_scan = 0;
+ *nr_to_scan = 0;
+ /* have to run aging, since eviction is not possible anymore */
+ if (evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS > max_seq)
return true;
- }
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
- unsigned long size = 0;
-
gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++)
size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
-
- total += size;
- if (seq == max_seq)
- young += size;
- else if (seq + MIN_NR_GENS == max_seq)
- old += size;
}
}
- *nr_to_scan = total;
-
- /*
- * The aging tries to be lazy to reduce the overhead, while the eviction
- * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
- * ideal number of generations is MIN_NR_GENS+1.
- */
- if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
- return false;
-
- /*
- * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
- * of the total number of pages for each generation. A reasonable range
- * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
- * aging cares about the upper bound of hot pages, while the eviction
- * cares about the lower bound of cold pages.
- */
- if (young * MIN_NR_GENS > total)
- return true;
- if (old * (MIN_NR_GENS + 2) < total)
- return true;
-
- return false;
+ *nr_to_scan = size;
+ /* better to run aging even though eviction is still possible */
+ return evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS == max_seq;
}
/*
@@ -4729,7 +4698,7 @@ static bool should_run_aging(struct lruv
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
* reclaim.
*/
-static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
+static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
{
bool success;
unsigned long nr_to_scan;
@@ -4739,7 +4708,7 @@ static long get_nr_to_scan(struct lruvec
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
return -1;
- success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan);
+ success = should_run_aging(lruvec, max_seq, swappiness, &nr_to_scan);
/* try to scrape all its memory if this memcg was deleted */
if (nr_to_scan && !mem_cgroup_online(memcg))
@@ -4750,7 +4719,7 @@ static long get_nr_to_scan(struct lruvec
return nr_to_scan >> sc->priority;
/* stop scanning this lruvec as it's low on cold folios */
- return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, false) ? -1 : 0;
}
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -5294,8 +5263,7 @@ static void lru_gen_seq_show_full(struct
s = "rep";
n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier]);
}
for (i = 0; i < 3; i++)
@@ -5350,7 +5318,7 @@ static int lru_gen_seq_show(struct seq_f
seq_printf(m, " node %5d\n", nid);
if (!full)
- seq = min_seq[LRU_GEN_ANON];
+ seq = evictable_min_seq(min_seq, MAX_SWAPPINESS / 2);
else if (max_seq >= MAX_NR_GENS)
seq = max_seq - MAX_NR_GENS + 1;
else
@@ -5390,23 +5358,14 @@ static const struct seq_operations lru_g
};
static int run_aging(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
DEFINE_MAX_SEQ(lruvec);
- DEFINE_MIN_SEQ(lruvec);
-
- if (seq < max_seq)
- return 0;
if (seq > max_seq)
return -EINVAL;
- if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
- return -ERANGE;
-
- try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
-
- return 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, force_scan) ? 0 : -EEXIST;
}
static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
@@ -5422,7 +5381,7 @@ static int run_eviction(struct lruvec *l
while (!signal_pending(current)) {
DEFINE_MIN_SEQ(lruvec);
- if (seq < min_seq[!swappiness])
+ if (seq < evictable_min_seq(min_seq, swappiness))
return 0;
if (sc->nr_reclaimed >= nr_to_reclaim)
_
Patches currently in -mm which might be from yuzhao@google.com are
mm-mglru-clean-up-workingset.patch
mm-mglru-optimize-deactivation.patch
mm-mglru-rework-aging-feedback.patch
mm-mglru-rework-type-selection.patch
mm-mglru-rework-refault-detection.patch
mm-mglru-rework-workingset-protection.patch
^ permalink raw reply [flat|nested] 4+ messages in thread
* + mm-mglru-rework-aging-feedback.patch added to mm-unstable branch
@ 2024-12-10 3:23 Andrew Morton
0 siblings, 0 replies; 4+ messages in thread
From: Andrew Morton @ 2024-12-10 3:23 UTC (permalink / raw)
To: mm-commits, stevensd, kasong, kaleshsingh, bharata, yuzhao, akpm
The patch titled
Subject: mm/mglru: rework aging feedback
has been added to the -mm mm-unstable branch. Its filename is
mm-mglru-rework-aging-feedback.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-mglru-rework-aging-feedback.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yu Zhao <yuzhao@google.com>
Subject: mm/mglru: rework aging feedback
Date: Sat, 7 Dec 2024 15:15:19 -0700
The aging feedback is based on both the number of generations and the
distribution of folios in each generation. The number of generations is
currently the distance between max_seq and anon min_seq. This is because
anon min_seq is not allowed to move past file min_seq. The rationale for
that is that file is always evictable whereas anon is not. However, for
use cases where anon is a lot cheaper than file:
1. Anon in the second oldest generation can be a better choice than
file in the oldest generation.
2. A large amount of file in the oldest generation can skew the
distribution, making should_run_aging() return false negative.
Allow anon and file min_seq to move independently, and use solely the
number of generations as the feedback for aging. Specifically, when both
anon and file are evictable, anon min_seq can now be greater than file
min_seq, and therefore the number of generations becomes the distance
between max_seq and min(min_seq[0],min_seq[1]). And should_run_aging()
returns true if and only if the number of generations is less than
MAX_NR_GENS.
As the first step to the final optimization, this change by itself
should not have userspace-visiable effects beyond performance. The
next twos patch will take advantage of this change; the last patch in
this series will better distribute folios across MAX_NR_GENS.
Link: https://lkml.kernel.org/r/20241207221522.2250311-4-yuzhao@google.com
Reported-by: David Stevens <stevensd@chromium.org>
Signed-off-by: Yu Zhao <yuzhao@google.com>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/mmzone.h | 6 -
mm/vmscan.c | 217 +++++++++++++++------------------------
2 files changed, 91 insertions(+), 132 deletions(-)
--- a/include/linux/mmzone.h~mm-mglru-rework-aging-feedback
+++ a/include/linux/mmzone.h
@@ -446,8 +446,8 @@ struct lru_gen_folio {
unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
/* the exponential moving average of evicted+protected */
unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
- /* the first tier doesn't need protection, hence the minus one */
- unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
+ /* can only be modified under the LRU lock */
+ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* can be modified without holding the LRU lock */
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
@@ -498,7 +498,7 @@ struct lru_gen_mm_walk {
int mm_stats[NR_MM_STATS];
/* total batched items */
int batched;
- bool can_swap;
+ int swappiness;
bool force_scan;
};
--- a/mm/vmscan.c~mm-mglru-rework-aging-feedback
+++ a/mm/vmscan.c
@@ -2623,11 +2623,17 @@ static bool should_clear_pmd_young(void)
READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \
}
+#define evictable_min_seq(min_seq, swappiness) \
+ min((min_seq)[!(swappiness)], (min_seq)[(swappiness) != MAX_SWAPPINESS])
+
#define for_each_gen_type_zone(gen, type, zone) \
for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
+#define for_each_evictable_type(type, swappiness) \
+ for ((type) = !(swappiness); (type) <= ((swappiness) != MAX_SWAPPINESS); (type)++)
+
#define get_memcg_gen(seq) ((seq) % MEMCG_NR_GENS)
#define get_memcg_bin(bin) ((bin) % MEMCG_NR_BINS)
@@ -2673,10 +2679,16 @@ static int get_nr_gens(struct lruvec *lr
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
{
- /* see the comment on lru_gen_folio */
- return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
- get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
- get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
+ int type;
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ int n = get_nr_gens(lruvec, type);
+
+ if (n < MIN_NR_GENS || n > MAX_NR_GENS)
+ return false;
+ }
+
+ return true;
}
/******************************************************************************
@@ -3083,9 +3095,8 @@ static void read_ctrl_pos(struct lruvec
pos->refaulted = lrugen->avg_refaulted[type][tier] +
atomic_long_read(&lrugen->refaulted[hist][type][tier]);
pos->total = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- pos->total += lrugen->protected[hist][type][tier - 1];
pos->gain = gain;
}
@@ -3112,17 +3123,15 @@ static void reset_ctrl_pos(struct lruvec
WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
sum = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- sum += lrugen->protected[hist][type][tier - 1];
WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
}
if (clear) {
atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
- if (tier)
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
+ WRITE_ONCE(lrugen->protected[hist][type][tier], 0);
}
}
}
@@ -3257,7 +3266,7 @@ static int should_skip_vma(unsigned long
return true;
if (vma_is_anonymous(vma))
- return !walk->can_swap;
+ return !walk->swappiness;
if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping))
return true;
@@ -3267,7 +3276,10 @@ static int should_skip_vma(unsigned long
return true;
if (shmem_mapping(mapping))
- return !walk->can_swap;
+ return !walk->swappiness;
+
+ if (walk->swappiness == MAX_SWAPPINESS)
+ return true;
/* to exclude special mappings like dax, etc. */
return !mapping->a_ops->read_folio;
@@ -3355,7 +3367,7 @@ static unsigned long get_pmd_pfn(pmd_t p
}
static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
- struct pglist_data *pgdat, bool can_swap)
+ struct pglist_data *pgdat)
{
struct folio *folio;
@@ -3366,10 +3378,6 @@ static struct folio *get_pfn_folio(unsig
if (folio_memcg(folio) != memcg)
return NULL;
- /* file VMAs can contain anon pages from COW */
- if (!folio_is_file_lru(folio) && !can_swap)
- return NULL;
-
return folio;
}
@@ -3425,7 +3433,7 @@ restart:
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -3510,7 +3518,7 @@ static void walk_pmd_range_locked(pud_t
if (pfn == -1)
goto next;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
goto next;
@@ -3722,22 +3730,26 @@ static void clear_mm_walk(void)
kfree(walk);
}
-static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
+static bool inc_min_seq(struct lruvec *lruvec, int type, int swappiness)
{
int zone;
int remaining = MAX_LRU_BATCH;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- if (type == LRU_GEN_ANON && !can_swap)
+ if (type ? swappiness == MAX_SWAPPINESS : !swappiness)
goto done;
- /* prevent cold/hot inversion if force_scan is true */
+ /* prevent cold/hot inversion if the type is evictable */
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
struct list_head *head = &lrugen->folios[old_gen][type][zone];
while (!list_empty(head)) {
struct folio *folio = lru_to_folio(head);
+ int refs = folio_lru_refs(folio);
+ int tier = lru_tier_from_refs(refs);
+ int delta = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
@@ -3747,6 +3759,9 @@ static bool inc_min_seq(struct lruvec *l
new_gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
+
if (!--remaining)
return false;
}
@@ -3758,51 +3773,37 @@ done:
return true;
}
-static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
+static bool try_to_inc_min_seq(struct lruvec *lruvec, int swappiness)
{
int gen, type, zone;
bool success = false;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
- DEFINE_MIN_SEQ(lruvec);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
- /* find the oldest populated generation */
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
- gen = lru_gen_from_seq(min_seq[type]);
+ for_each_evictable_type(type, swappiness) {
+ unsigned long seq;
+
+ for (seq = lrugen->min_seq[type]; seq + MIN_NR_GENS <= lrugen->max_seq; seq++) {
+ gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
if (!list_empty(&lrugen->folios[gen][type][zone]))
goto next;
}
-
- min_seq[type]++;
}
next:
- ;
- }
-
- /* see the comment on lru_gen_folio */
- if (can_swap) {
- min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
- min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
- }
-
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- if (min_seq[type] == lrugen->min_seq[type])
- continue;
-
- reset_ctrl_pos(lruvec, type, true);
- WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
- success = true;
+ if (seq != lrugen->min_seq[type]) {
+ reset_ctrl_pos(lruvec, type, true);
+ WRITE_ONCE(lrugen->min_seq[type], seq);
+ success = true;
+ }
}
return success;
}
-static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
{
bool success;
int prev, next;
@@ -3820,13 +3821,11 @@ restart:
if (!success)
goto unlock;
- for (type = ANON_AND_FILE - 1; type >= 0; type--) {
+ for (type = 0; type < ANON_AND_FILE; type++) {
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
- VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
-
- if (inc_min_seq(lruvec, type, can_swap))
+ if (inc_min_seq(lruvec, type, swappiness))
continue;
spin_unlock_irq(&lruvec->lru_lock);
@@ -3870,7 +3869,7 @@ unlock:
}
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -3881,7 +3880,7 @@ static bool try_to_inc_max_seq(struct lr
VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq));
if (!mm_state)
- return inc_max_seq(lruvec, seq, can_swap, force_scan);
+ return inc_max_seq(lruvec, seq, swappiness);
/* see the comment in iterate_mm_list() */
if (seq <= READ_ONCE(mm_state->seq))
@@ -3906,7 +3905,7 @@ static bool try_to_inc_max_seq(struct lr
walk->lruvec = lruvec;
walk->seq = seq;
- walk->can_swap = can_swap;
+ walk->swappiness = swappiness;
walk->force_scan = force_scan;
do {
@@ -3916,7 +3915,7 @@ static bool try_to_inc_max_seq(struct lr
} while (mm);
done:
if (success) {
- success = inc_max_seq(lruvec, seq, can_swap, force_scan);
+ success = inc_max_seq(lruvec, seq, swappiness);
WARN_ON_ONCE(!success);
}
@@ -3957,13 +3956,13 @@ static bool lruvec_is_sizable(struct lru
{
int gen, type, zone;
unsigned long total = 0;
- bool can_swap = get_swappiness(lruvec, sc);
+ int swappiness = get_swappiness(lruvec, sc);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
DEFINE_MIN_SEQ(lruvec);
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
@@ -3983,6 +3982,7 @@ static bool lruvec_is_reclaimable(struct
{
int gen;
unsigned long birth;
+ int swappiness = get_swappiness(lruvec, sc);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
@@ -3992,8 +3992,7 @@ static bool lruvec_is_reclaimable(struct
if (!lruvec_is_sizable(lruvec, sc))
return false;
- /* see the comment on lru_gen_folio */
- gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ gen = lru_gen_from_seq(evictable_min_seq(min_seq, swappiness));
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
return time_is_before_jiffies(birth + min_ttl);
@@ -4060,7 +4059,6 @@ bool lru_gen_look_around(struct page_vma
unsigned long addr = pvmw->address;
struct vm_area_struct *vma = pvmw->vma;
struct folio *folio = pfn_folio(pvmw->pfn);
- bool can_swap = !folio_is_file_lru(folio);
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
@@ -4113,7 +4111,7 @@ bool lru_gen_look_around(struct page_vma
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -4329,8 +4327,8 @@ static bool sort_folio(struct lruvec *lr
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
- lrugen->protected[hist][type][tier - 1] + delta);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
return true;
}
@@ -4529,7 +4527,6 @@ static int isolate_folios(struct lruvec
{
int i;
int type;
- int scanned;
int tier = -1;
DEFINE_MIN_SEQ(lruvec);
@@ -4554,21 +4551,23 @@ static int isolate_folios(struct lruvec
else
type = get_type_to_scan(lruvec, swappiness, &tier);
- for (i = !swappiness; i < ANON_AND_FILE; i++) {
+ for_each_evictable_type(i, swappiness) {
+ int scanned;
+
if (tier < 0)
tier = get_tier_idx(lruvec, type);
+ *type_scanned = type;
+
scanned = scan_folios(lruvec, sc, type, tier, list);
if (scanned)
- break;
+ return scanned;
type = !type;
tier = -1;
}
- *type_scanned = type;
-
- return scanned;
+ return 0;
}
static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
@@ -4584,6 +4583,7 @@ static int evict_folios(struct lruvec *l
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -4593,7 +4593,7 @@ static int evict_folios(struct lruvec *l
scanned += try_to_inc_min_seq(lruvec, swappiness);
- if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS)
+ if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
scanned = 0;
spin_unlock_irq(&lruvec->lru_lock);
@@ -4665,63 +4665,32 @@ retry:
}
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
- bool can_swap, unsigned long *nr_to_scan)
+ int swappiness, unsigned long *nr_to_scan)
{
int gen, type, zone;
- unsigned long old = 0;
- unsigned long young = 0;
- unsigned long total = 0;
+ unsigned long size = 0;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
DEFINE_MIN_SEQ(lruvec);
- /* whether this lruvec is completely out of cold folios */
- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
- *nr_to_scan = 0;
+ *nr_to_scan = 0;
+ /* have to run aging, since eviction is not possible anymore */
+ if (evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS > max_seq)
return true;
- }
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
- unsigned long size = 0;
-
gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++)
size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
-
- total += size;
- if (seq == max_seq)
- young += size;
- else if (seq + MIN_NR_GENS == max_seq)
- old += size;
}
}
- *nr_to_scan = total;
-
- /*
- * The aging tries to be lazy to reduce the overhead, while the eviction
- * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
- * ideal number of generations is MIN_NR_GENS+1.
- */
- if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
- return false;
-
- /*
- * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
- * of the total number of pages for each generation. A reasonable range
- * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
- * aging cares about the upper bound of hot pages, while the eviction
- * cares about the lower bound of cold pages.
- */
- if (young * MIN_NR_GENS > total)
- return true;
- if (old * (MIN_NR_GENS + 2) < total)
- return true;
-
- return false;
+ *nr_to_scan = size;
+ /* better to run aging even though eviction is still possible */
+ return evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS == max_seq;
}
/*
@@ -4729,7 +4698,7 @@ static bool should_run_aging(struct lruv
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
* reclaim.
*/
-static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
+static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
{
bool success;
unsigned long nr_to_scan;
@@ -4739,7 +4708,7 @@ static long get_nr_to_scan(struct lruvec
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
return -1;
- success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan);
+ success = should_run_aging(lruvec, max_seq, swappiness, &nr_to_scan);
/* try to scrape all its memory if this memcg was deleted */
if (nr_to_scan && !mem_cgroup_online(memcg))
@@ -4750,7 +4719,7 @@ static long get_nr_to_scan(struct lruvec
return nr_to_scan >> sc->priority;
/* stop scanning this lruvec as it's low on cold folios */
- return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, false) ? -1 : 0;
}
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -5294,8 +5263,7 @@ static void lru_gen_seq_show_full(struct
s = "rep";
n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier]);
}
for (i = 0; i < 3; i++)
@@ -5350,7 +5318,7 @@ static int lru_gen_seq_show(struct seq_f
seq_printf(m, " node %5d\n", nid);
if (!full)
- seq = min_seq[LRU_GEN_ANON];
+ seq = evictable_min_seq(min_seq, MAX_SWAPPINESS / 2);
else if (max_seq >= MAX_NR_GENS)
seq = max_seq - MAX_NR_GENS + 1;
else
@@ -5390,23 +5358,14 @@ static const struct seq_operations lru_g
};
static int run_aging(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
DEFINE_MAX_SEQ(lruvec);
- DEFINE_MIN_SEQ(lruvec);
-
- if (seq < max_seq)
- return 0;
if (seq > max_seq)
return -EINVAL;
- if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
- return -ERANGE;
-
- try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
-
- return 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, force_scan) ? 0 : -EEXIST;
}
static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
@@ -5422,7 +5381,7 @@ static int run_eviction(struct lruvec *l
while (!signal_pending(current)) {
DEFINE_MIN_SEQ(lruvec);
- if (seq < min_seq[!swappiness])
+ if (seq < evictable_min_seq(min_seq, swappiness))
return 0;
if (sc->nr_reclaimed >= nr_to_reclaim)
_
Patches currently in -mm which might be from yuzhao@google.com are
mm-mglru-clean-up-workingset.patch
mm-mglru-optimize-deactivation.patch
mm-mglru-rework-aging-feedback.patch
mm-mglru-rework-type-selection.patch
mm-mglru-rework-refault-detection.patch
mm-mglru-rework-workingset-protection.patch
^ permalink raw reply [flat|nested] 4+ messages in thread
* + mm-mglru-rework-aging-feedback.patch added to mm-unstable branch
@ 2025-01-02 23:57 Andrew Morton
0 siblings, 0 replies; 4+ messages in thread
From: Andrew Morton @ 2025-01-02 23:57 UTC (permalink / raw)
To: mm-commits, v-songbaohua, stevensd, kasong, kaleshsingh, bharata,
yuzhao, akpm
The patch titled
Subject: mm/mglru: rework aging feedback
has been added to the -mm mm-unstable branch. Its filename is
mm-mglru-rework-aging-feedback.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-mglru-rework-aging-feedback.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yu Zhao <yuzhao@google.com>
Subject: mm/mglru: rework aging feedback
Date: Mon, 30 Dec 2024 21:35:34 -0700
The aging feedback is based on both the number of generations and the
distribution of folios in each generation. The number of generations is
currently the distance between max_seq and anon min_seq. This is because
anon min_seq is not allowed to move past file min_seq. The rationale for
that is that file is always evictable whereas anon is not. However, for
use cases where anon is a lot cheaper than file:
1. Anon in the second oldest generation can be a better choice than
file in the oldest generation.
2. A large amount of file in the oldest generation can skew the
distribution, making should_run_aging() return false negative.
Allow anon and file min_seq to move independently, and use solely the
number of generations as the feedback for aging. Specifically, when both
anon and file are evictable, anon min_seq can now be greater than file
min_seq, and therefore the number of generations becomes the distance
between max_seq and min(min_seq[0],min_seq[1]). And should_run_aging()
returns true if and only if the number of generations is less than
MAX_NR_GENS.
As the first step to the final optimization, this change by itself should
not have userspace-visiable effects beyond performance. The next twos
patch will take advantage of this change; the last patch in this series
will better distribute folios across MAX_NR_GENS.
Link: https://lkml.kernel.org/r/20241231043538.4075764-4-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: David Stevens <stevensd@chromium.org>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/mmzone.h | 17 +--
mm/vmscan.c | 200 +++++++++++++++++----------------------
2 files changed, 96 insertions(+), 121 deletions(-)
--- a/include/linux/mmzone.h~mm-mglru-rework-aging-feedback
+++ a/include/linux/mmzone.h
@@ -421,12 +421,11 @@ enum {
/*
* The youngest generation number is stored in max_seq for both anon and file
* types as they are aged on an equal footing. The oldest generation numbers are
- * stored in min_seq[] separately for anon and file types as clean file pages
- * can be evicted regardless of swap constraints.
- *
- * Normally anon and file min_seq are in sync. But if swapping is constrained,
- * e.g., out of swap space, file min_seq is allowed to advance and leave anon
- * min_seq behind.
+ * stored in min_seq[] separately for anon and file types so that they can be
+ * incremented independently. Ideally min_seq[] are kept in sync when both anon
+ * and file types are evictable. However, to adapt to situations like extreme
+ * swappiness, they are allowed to be out of sync by at most
+ * MAX_NR_GENS-MIN_NR_GENS-1.
*
* The number of pages in each generation is eventually consistent and therefore
* can be transiently negative when reset_batch_size() is pending.
@@ -446,8 +445,8 @@ struct lru_gen_folio {
unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
/* the exponential moving average of evicted+protected */
unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
- /* the first tier doesn't need protection, hence the minus one */
- unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
+ /* can only be modified under the LRU lock */
+ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* can be modified without holding the LRU lock */
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
@@ -498,7 +497,7 @@ struct lru_gen_mm_walk {
int mm_stats[NR_MM_STATS];
/* total batched items */
int batched;
- bool can_swap;
+ int swappiness;
bool force_scan;
};
--- a/mm/vmscan.c~mm-mglru-rework-aging-feedback
+++ a/mm/vmscan.c
@@ -2627,11 +2627,17 @@ static bool should_clear_pmd_young(void)
READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \
}
+#define evictable_min_seq(min_seq, swappiness) \
+ min((min_seq)[!(swappiness)], (min_seq)[(swappiness) != MAX_SWAPPINESS])
+
#define for_each_gen_type_zone(gen, type, zone) \
for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
+#define for_each_evictable_type(type, swappiness) \
+ for ((type) = !(swappiness); (type) <= ((swappiness) != MAX_SWAPPINESS); (type)++)
+
#define get_memcg_gen(seq) ((seq) % MEMCG_NR_GENS)
#define get_memcg_bin(bin) ((bin) % MEMCG_NR_BINS)
@@ -2677,10 +2683,16 @@ static int get_nr_gens(struct lruvec *lr
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
{
- /* see the comment on lru_gen_folio */
- return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
- get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
- get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
+ int type;
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ int n = get_nr_gens(lruvec, type);
+
+ if (n < MIN_NR_GENS || n > MAX_NR_GENS)
+ return false;
+ }
+
+ return true;
}
/******************************************************************************
@@ -3087,9 +3099,8 @@ static void read_ctrl_pos(struct lruvec
pos->refaulted = lrugen->avg_refaulted[type][tier] +
atomic_long_read(&lrugen->refaulted[hist][type][tier]);
pos->total = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- pos->total += lrugen->protected[hist][type][tier - 1];
pos->gain = gain;
}
@@ -3116,17 +3127,15 @@ static void reset_ctrl_pos(struct lruvec
WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
sum = lrugen->avg_total[type][tier] +
+ lrugen->protected[hist][type][tier] +
atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- sum += lrugen->protected[hist][type][tier - 1];
WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
}
if (clear) {
atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
- if (tier)
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
+ WRITE_ONCE(lrugen->protected[hist][type][tier], 0);
}
}
}
@@ -3261,7 +3270,7 @@ static int should_skip_vma(unsigned long
return true;
if (vma_is_anonymous(vma))
- return !walk->can_swap;
+ return !walk->swappiness;
if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping))
return true;
@@ -3271,7 +3280,10 @@ static int should_skip_vma(unsigned long
return true;
if (shmem_mapping(mapping))
- return !walk->can_swap;
+ return !walk->swappiness;
+
+ if (walk->swappiness == MAX_SWAPPINESS)
+ return true;
/* to exclude special mappings like dax, etc. */
return !mapping->a_ops->read_folio;
@@ -3359,7 +3371,7 @@ static unsigned long get_pmd_pfn(pmd_t p
}
static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
- struct pglist_data *pgdat, bool can_swap)
+ struct pglist_data *pgdat)
{
struct folio *folio;
@@ -3370,10 +3382,6 @@ static struct folio *get_pfn_folio(unsig
if (folio_memcg(folio) != memcg)
return NULL;
- /* file VMAs can contain anon pages from COW */
- if (!folio_is_file_lru(folio) && !can_swap)
- return NULL;
-
return folio;
}
@@ -3429,7 +3437,7 @@ restart:
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -3514,7 +3522,7 @@ static void walk_pmd_range_locked(pud_t
if (pfn == -1)
goto next;
- folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
goto next;
@@ -3726,22 +3734,26 @@ static void clear_mm_walk(void)
kfree(walk);
}
-static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
+static bool inc_min_seq(struct lruvec *lruvec, int type, int swappiness)
{
int zone;
int remaining = MAX_LRU_BATCH;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- if (type == LRU_GEN_ANON && !can_swap)
+ if (type ? swappiness == MAX_SWAPPINESS : !swappiness)
goto done;
- /* prevent cold/hot inversion if force_scan is true */
+ /* prevent cold/hot inversion if the type is evictable */
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
struct list_head *head = &lrugen->folios[old_gen][type][zone];
while (!list_empty(head)) {
struct folio *folio = lru_to_folio(head);
+ int refs = folio_lru_refs(folio);
+ int tier = lru_tier_from_refs(refs);
+ int delta = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
@@ -3751,6 +3763,9 @@ static bool inc_min_seq(struct lruvec *l
new_gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
+
if (!--remaining)
return false;
}
@@ -3762,7 +3777,7 @@ done:
return true;
}
-static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
+static bool try_to_inc_min_seq(struct lruvec *lruvec, int swappiness)
{
int gen, type, zone;
bool success = false;
@@ -3772,7 +3787,7 @@ static bool try_to_inc_min_seq(struct lr
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
/* find the oldest populated generation */
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
gen = lru_gen_from_seq(min_seq[type]);
@@ -3788,13 +3803,17 @@ next:
}
/* see the comment on lru_gen_folio */
- if (can_swap) {
- min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
- min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
+ if (swappiness && swappiness != MAX_SWAPPINESS) {
+ unsigned long seq = lrugen->max_seq - MIN_NR_GENS;
+
+ if (min_seq[LRU_GEN_ANON] > seq && min_seq[LRU_GEN_FILE] < seq)
+ min_seq[LRU_GEN_ANON] = seq;
+ else if (min_seq[LRU_GEN_FILE] > seq && min_seq[LRU_GEN_ANON] < seq)
+ min_seq[LRU_GEN_FILE] = seq;
}
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- if (min_seq[type] == lrugen->min_seq[type])
+ for_each_evictable_type(type, swappiness) {
+ if (min_seq[type] <= lrugen->min_seq[type])
continue;
reset_ctrl_pos(lruvec, type, true);
@@ -3805,8 +3824,7 @@ next:
return success;
}
-static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
{
bool success;
int prev, next;
@@ -3824,13 +3842,11 @@ restart:
if (!success)
goto unlock;
- for (type = ANON_AND_FILE - 1; type >= 0; type--) {
+ for (type = 0; type < ANON_AND_FILE; type++) {
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
- VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
-
- if (inc_min_seq(lruvec, type, can_swap))
+ if (inc_min_seq(lruvec, type, swappiness))
continue;
spin_unlock_irq(&lruvec->lru_lock);
@@ -3874,7 +3890,7 @@ unlock:
}
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -3885,7 +3901,7 @@ static bool try_to_inc_max_seq(struct lr
VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq));
if (!mm_state)
- return inc_max_seq(lruvec, seq, can_swap, force_scan);
+ return inc_max_seq(lruvec, seq, swappiness);
/* see the comment in iterate_mm_list() */
if (seq <= READ_ONCE(mm_state->seq))
@@ -3910,7 +3926,7 @@ static bool try_to_inc_max_seq(struct lr
walk->lruvec = lruvec;
walk->seq = seq;
- walk->can_swap = can_swap;
+ walk->swappiness = swappiness;
walk->force_scan = force_scan;
do {
@@ -3920,7 +3936,7 @@ static bool try_to_inc_max_seq(struct lr
} while (mm);
done:
if (success) {
- success = inc_max_seq(lruvec, seq, can_swap, force_scan);
+ success = inc_max_seq(lruvec, seq, swappiness);
WARN_ON_ONCE(!success);
}
@@ -3961,13 +3977,13 @@ static bool lruvec_is_sizable(struct lru
{
int gen, type, zone;
unsigned long total = 0;
- bool can_swap = get_swappiness(lruvec, sc);
+ int swappiness = get_swappiness(lruvec, sc);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
DEFINE_MIN_SEQ(lruvec);
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
@@ -3987,6 +4003,7 @@ static bool lruvec_is_reclaimable(struct
{
int gen;
unsigned long birth;
+ int swappiness = get_swappiness(lruvec, sc);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
@@ -3996,8 +4013,7 @@ static bool lruvec_is_reclaimable(struct
if (!lruvec_is_sizable(lruvec, sc))
return false;
- /* see the comment on lru_gen_folio */
- gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ gen = lru_gen_from_seq(evictable_min_seq(min_seq, swappiness));
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
return time_is_before_jiffies(birth + min_ttl);
@@ -4064,7 +4080,6 @@ bool lru_gen_look_around(struct page_vma
unsigned long addr = pvmw->address;
struct vm_area_struct *vma = pvmw->vma;
struct folio *folio = pfn_folio(pvmw->pfn);
- bool can_swap = !folio_is_file_lru(folio);
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
@@ -4117,7 +4132,7 @@ bool lru_gen_look_around(struct page_vma
if (pfn == -1)
continue;
- folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
+ folio = get_pfn_folio(pfn, memcg, pgdat);
if (!folio)
continue;
@@ -4333,8 +4348,8 @@ static bool sort_folio(struct lruvec *lr
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
- lrugen->protected[hist][type][tier - 1] + delta);
+ WRITE_ONCE(lrugen->protected[hist][type][tier],
+ lrugen->protected[hist][type][tier] + delta);
return true;
}
@@ -4533,7 +4548,6 @@ static int isolate_folios(struct lruvec
{
int i;
int type;
- int scanned;
int tier = -1;
DEFINE_MIN_SEQ(lruvec);
@@ -4558,21 +4572,23 @@ static int isolate_folios(struct lruvec
else
type = get_type_to_scan(lruvec, swappiness, &tier);
- for (i = !swappiness; i < ANON_AND_FILE; i++) {
+ for_each_evictable_type(i, swappiness) {
+ int scanned;
+
if (tier < 0)
tier = get_tier_idx(lruvec, type);
+ *type_scanned = type;
+
scanned = scan_folios(lruvec, sc, type, tier, list);
if (scanned)
- break;
+ return scanned;
type = !type;
tier = -1;
}
- *type_scanned = type;
-
- return scanned;
+ return 0;
}
static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
@@ -4588,6 +4604,7 @@ static int evict_folios(struct lruvec *l
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -4597,7 +4614,7 @@ static int evict_folios(struct lruvec *l
scanned += try_to_inc_min_seq(lruvec, swappiness);
- if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS)
+ if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
scanned = 0;
spin_unlock_irq(&lruvec->lru_lock);
@@ -4669,63 +4686,32 @@ retry:
}
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
- bool can_swap, unsigned long *nr_to_scan)
+ int swappiness, unsigned long *nr_to_scan)
{
int gen, type, zone;
- unsigned long old = 0;
- unsigned long young = 0;
- unsigned long total = 0;
+ unsigned long size = 0;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
DEFINE_MIN_SEQ(lruvec);
- /* whether this lruvec is completely out of cold folios */
- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
- *nr_to_scan = 0;
+ *nr_to_scan = 0;
+ /* have to run aging, since eviction is not possible anymore */
+ if (evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS > max_seq)
return true;
- }
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ for_each_evictable_type(type, swappiness) {
unsigned long seq;
for (seq = min_seq[type]; seq <= max_seq; seq++) {
- unsigned long size = 0;
-
gen = lru_gen_from_seq(seq);
for (zone = 0; zone < MAX_NR_ZONES; zone++)
size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
-
- total += size;
- if (seq == max_seq)
- young += size;
- else if (seq + MIN_NR_GENS == max_seq)
- old += size;
}
}
- *nr_to_scan = total;
-
- /*
- * The aging tries to be lazy to reduce the overhead, while the eviction
- * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
- * ideal number of generations is MIN_NR_GENS+1.
- */
- if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
- return false;
-
- /*
- * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
- * of the total number of pages for each generation. A reasonable range
- * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
- * aging cares about the upper bound of hot pages, while the eviction
- * cares about the lower bound of cold pages.
- */
- if (young * MIN_NR_GENS > total)
- return true;
- if (old * (MIN_NR_GENS + 2) < total)
- return true;
-
- return false;
+ *nr_to_scan = size;
+ /* better to run aging even though eviction is still possible */
+ return evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS == max_seq;
}
/*
@@ -4733,7 +4719,7 @@ static bool should_run_aging(struct lruv
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
* reclaim.
*/
-static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
+static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
{
bool success;
unsigned long nr_to_scan;
@@ -4743,7 +4729,7 @@ static long get_nr_to_scan(struct lruvec
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
return -1;
- success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan);
+ success = should_run_aging(lruvec, max_seq, swappiness, &nr_to_scan);
/* try to scrape all its memory if this memcg was deleted */
if (nr_to_scan && !mem_cgroup_online(memcg))
@@ -4754,7 +4740,7 @@ static long get_nr_to_scan(struct lruvec
return nr_to_scan >> sc->priority;
/* stop scanning this lruvec as it's low on cold folios */
- return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, false) ? -1 : 0;
}
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -5298,8 +5284,7 @@ static void lru_gen_seq_show_full(struct
s = "rep";
n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier]);
}
for (i = 0; i < 3; i++)
@@ -5354,7 +5339,7 @@ static int lru_gen_seq_show(struct seq_f
seq_printf(m, " node %5d\n", nid);
if (!full)
- seq = min_seq[LRU_GEN_ANON];
+ seq = evictable_min_seq(min_seq, MAX_SWAPPINESS / 2);
else if (max_seq >= MAX_NR_GENS)
seq = max_seq - MAX_NR_GENS + 1;
else
@@ -5394,23 +5379,14 @@ static const struct seq_operations lru_g
};
static int run_aging(struct lruvec *lruvec, unsigned long seq,
- bool can_swap, bool force_scan)
+ int swappiness, bool force_scan)
{
DEFINE_MAX_SEQ(lruvec);
- DEFINE_MIN_SEQ(lruvec);
-
- if (seq < max_seq)
- return 0;
if (seq > max_seq)
return -EINVAL;
- if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
- return -ERANGE;
-
- try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
-
- return 0;
+ return try_to_inc_max_seq(lruvec, max_seq, swappiness, force_scan) ? 0 : -EEXIST;
}
static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
@@ -5426,7 +5402,7 @@ static int run_eviction(struct lruvec *l
while (!signal_pending(current)) {
DEFINE_MIN_SEQ(lruvec);
- if (seq < min_seq[!swappiness])
+ if (seq < evictable_min_seq(min_seq, swappiness))
return 0;
if (sc->nr_reclaimed >= nr_to_reclaim)
_
Patches currently in -mm which might be from yuzhao@google.com are
mm-mglru-clean-up-workingset.patch
mm-mglru-optimize-deactivation.patch
mm-mglru-rework-aging-feedback.patch
mm-mglru-rework-type-selection.patch
mm-mglru-rework-refault-detection.patch
mm-mglru-rework-workingset-protection.patch
mm-mglru-fix-pte-mapped-large-folios.patch
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-01-02 23:57 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-06 0:58 + mm-mglru-rework-aging-feedback.patch added to mm-unstable branch Andrew Morton
-- strict thread matches above, loose matches on Subject: below --
2025-01-02 23:57 Andrew Morton
2024-12-10 3:23 Andrew Morton
2024-12-05 4:44 Andrew Morton
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.