From: Andrew Morton <akpm@linux-foundation.org>
To: mm-commits@vger.kernel.org,tjmercier@google.com,stable@vger.kernel.org,yuzhao@google.com,akpm@linux-foundation.org
Subject: [merged mm-stable] mm-mglru-fix-ineffective-protection-calculation.patch removed from -mm tree
Date: Wed, 17 Jul 2024 21:09:29 -0700 [thread overview]
Message-ID: <20240718040929.CA967C4AF0A@smtp.kernel.org> (raw)
The quilt patch titled
Subject: mm/mglru: fix ineffective protection calculation
has been removed from the -mm tree. Its filename was
mm-mglru-fix-ineffective-protection-calculation.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Yu Zhao <yuzhao@google.com>
Subject: mm/mglru: fix ineffective protection calculation
Date: Fri, 12 Jul 2024 17:29:56 -0600
mem_cgroup_calculate_protection() is not stateless and should only be used
as part of a top-down tree traversal. shrink_one() traverses the per-node
memcg LRU instead of the root_mem_cgroup tree, and therefore it should not
call mem_cgroup_calculate_protection().
The existing misuse in shrink_one() can cause ineffective protection of
sub-trees that are grandchildren of root_mem_cgroup. Fix it by reusing
lru_gen_age_node(), which already traverses the root_mem_cgroup tree, to
calculate the protection.
Previously lru_gen_age_node() opportunistically skips the first pass,
i.e., when scan_control->priority is DEF_PRIORITY. On the second pass,
lruvec_is_sizable() uses appropriate scan_control->priority, set by
set_initial_priority() from lru_gen_shrink_node(), to decide whether a
memcg is too small to reclaim from.
Now lru_gen_age_node() unconditionally traverses the root_mem_cgroup tree.
So it should call set_initial_priority() upfront, to make sure
lruvec_is_sizable() uses appropriate scan_control->priority on the first
pass. Otherwise, lruvec_is_reclaimable() can return false negatives and
result in premature OOM kills when min_ttl_ms is used.
Link: https://lkml.kernel.org/r/20240712232956.1427127-1-yuzhao@google.com
Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: T.J. Mercier <tjmercier@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/vmscan.c | 82 +++++++++++++++++++++++---------------------------
1 file changed, 38 insertions(+), 44 deletions(-)
--- a/mm/vmscan.c~mm-mglru-fix-ineffective-protection-calculation
+++ a/mm/vmscan.c
@@ -3915,6 +3915,32 @@ done:
* working set protection
******************************************************************************/
+static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
+{
+ int priority;
+ unsigned long reclaimable;
+
+ if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
+ return;
+ /*
+ * Determine the initial priority based on
+ * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
+ * where reclaimed_to_scanned_ratio = inactive / total.
+ */
+ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
+ if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
+ reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
+
+ /* round down reclaimable and round up sc->nr_to_reclaim */
+ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
+
+ /*
+ * The estimation is based on LRU pages only, so cap it to prevent
+ * overshoots of shrinker objects by large margins.
+ */
+ sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY);
+}
+
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
{
int gen, type, zone;
@@ -3948,19 +3974,17 @@ static bool lruvec_is_reclaimable(struct
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
- /* see the comment on lru_gen_folio */
- gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
- birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
-
- if (time_is_after_jiffies(birth + min_ttl))
+ if (mem_cgroup_below_min(NULL, memcg))
return false;
if (!lruvec_is_sizable(lruvec, sc))
return false;
- mem_cgroup_calculate_protection(NULL, memcg);
+ /* see the comment on lru_gen_folio */
+ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
- return !mem_cgroup_below_min(NULL, memcg);
+ return time_is_before_jiffies(birth + min_ttl);
}
/* to protect the working set of the last N jiffies */
@@ -3970,23 +3994,20 @@ static void lru_gen_age_node(struct pgli
{
struct mem_cgroup *memcg;
unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
+ bool reclaimable = !min_ttl;
VM_WARN_ON_ONCE(!current_is_kswapd());
- /* check the order to exclude compaction-induced reclaim */
- if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
- return;
+ set_initial_priority(pgdat, sc);
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
- if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
- mem_cgroup_iter_break(NULL, memcg);
- return;
- }
+ mem_cgroup_calculate_protection(NULL, memcg);
- cond_resched();
+ if (!reclaimable)
+ reclaimable = lruvec_is_reclaimable(lruvec, sc, min_ttl);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
/*
@@ -3994,7 +4015,7 @@ static void lru_gen_age_node(struct pgli
* younger than min_ttl. However, another possibility is all memcgs are
* either too small or below min.
*/
- if (mutex_trylock(&oom_lock)) {
+ if (!reclaimable && mutex_trylock(&oom_lock)) {
struct oom_control oc = {
.gfp_mask = sc->gfp_mask,
};
@@ -4786,8 +4807,7 @@ static int shrink_one(struct lruvec *lru
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- mem_cgroup_calculate_protection(NULL, memcg);
-
+ /* lru_gen_age_node() called mem_cgroup_calculate_protection() */
if (mem_cgroup_below_min(NULL, memcg))
return MEMCG_LRU_YOUNG;
@@ -4911,32 +4931,6 @@ static void lru_gen_shrink_lruvec(struct
blk_finish_plug(&plug);
}
-static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
-{
- int priority;
- unsigned long reclaimable;
-
- if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
- return;
- /*
- * Determine the initial priority based on
- * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
- * where reclaimed_to_scanned_ratio = inactive / total.
- */
- reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
- if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
- reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
-
- /* round down reclaimable and round up sc->nr_to_reclaim */
- priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
-
- /*
- * The estimation is based on LRU pages only, so cap it to prevent
- * overshoots of shrinker objects by large margins.
- */
- sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY);
-}
-
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
{
struct blk_plug plug;
_
Patches currently in -mm which might be from yuzhao@google.com are
reply other threads:[~2024-07-18 4:09 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240718040929.CA967C4AF0A@smtp.kernel.org \
--to=akpm@linux-foundation.org \
--cc=mm-commits@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=tjmercier@google.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.