From: "JP Kobryn (Meta)" <jp.kobryn@linux.dev>
To: linux-mm@kvack.org, akpm@linux-foundation.org, david@kernel.org,
ljs@kernel.org, Liam.Howlett@oracle.com, vbabka@kernel.org,
rppt@kernel.org, surenb@google.com, mhocko@suse.com,
kasong@tencent.com, qi.zheng@linux.dev, shakeel.butt@linux.dev,
baohua@kernel.org, axelrasmussen@google.com, yuanchu@google.com,
weixugc@google.com, hannes@cmpxchg.org, riel@surriel.com,
kuba@kernel.org, edumazet@google.com
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
kernel-team@meta.com
Subject: [PATCH] mm/vmpressure: skip socket pressure for costly order reclaim
Date: Wed, 1 Apr 2026 13:37:52 -0700 [thread overview]
Message-ID: <20260401203752.643259-1-jp.kobryn@linux.dev> (raw)
When kswapd reclaims at high order due to fragmentation, vmpressure() can
report poor reclaim efficiency even though the system has plenty of free
memory. This is because kswapd scans many pages but finds little to reclaim
- the pages are actively in use and don't need to be freed. The resulting
scan:reclaim ratio triggers socket pressure, throttling TCP throughput
unnecessarily.
Net allocations do not exceed order 3 (PAGE_ALLOC_COSTLY_ORDER), so high
order reclaim difficulty should not trigger socket pressure. The kernel
already treats this order as the boundary where reclaim is no longer
expected to succeed and compaction may take over.
Make vmpressure() order-aware through an additional parameter sourced from
scan_control at existing call sites. Socket pressure is now only asserted
when order <= PAGE_ALLOC_COSTLY_ORDER.
Memcg reclaim is unaffected since try_to_free_mem_cgroup_pages() always
uses order 0, which passes the filter unconditionally. Similarly,
vmpressure_prio() now passes order 0 internally when calling vmpressure(),
ensuring critical pressure from low reclaim priority is not suppressed by
the order filter.
Signed-off-by: JP Kobryn (Meta) <jp.kobryn@linux.dev>
---
include/linux/vmpressure.h | 9 +++++----
mm/vmpressure.c | 7 ++++---
mm/vmscan.c | 8 ++++----
3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 6a2f51ebbfd35..56544e8df2937 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -30,8 +30,8 @@ struct vmpressure {
struct mem_cgroup;
#ifdef CONFIG_MEMCG
-extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
- unsigned long scanned, unsigned long reclaimed);
+extern void vmpressure(gfp_t gfp, int order, struct mem_cgroup *memcg,
+ bool tree, unsigned long scanned, unsigned long reclaimed);
extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
extern void vmpressure_init(struct vmpressure *vmpr);
@@ -44,8 +44,9 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd);
#else
-static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
- unsigned long scanned, unsigned long reclaimed) {}
+static inline void vmpressure(gfp_t gfp, int order, struct mem_cgroup *memcg,
+ bool tree, unsigned long scanned,
+ unsigned long reclaimed) {}
static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
int prio) {}
#endif /* CONFIG_MEMCG */
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 3fbb86996c4d2..7f86b9ec83b05 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -218,6 +218,7 @@ static void vmpressure_work_fn(struct work_struct *work)
/**
* vmpressure() - Account memory pressure through scanned/reclaimed ratio
* @gfp: reclaimer's gfp mask
+ * @order: allocation order being reclaimed for
* @memcg: cgroup memory controller handle
* @tree: legacy subtree mode
* @scanned: number of pages scanned
@@ -236,7 +237,7 @@ static void vmpressure_work_fn(struct work_struct *work)
*
* This function does not return any value.
*/
-void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
+void vmpressure(gfp_t gfp, int order, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
{
struct vmpressure *vmpr;
@@ -307,7 +308,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
level = vmpressure_calc_level(scanned, reclaimed);
- if (level > VMPRESSURE_LOW) {
+ if (level > VMPRESSURE_LOW && order <= PAGE_ALLOC_COSTLY_ORDER) {
/*
* Let the socket buffer allocator know that
* we are having trouble reclaiming LRU pages.
@@ -348,7 +349,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* to the vmpressure() basically means that we signal 'critical'
* level.
*/
- vmpressure(gfp, memcg, true, vmpressure_win, 0);
+ vmpressure(gfp, 0, memcg, true, vmpressure_win, 0);
}
#define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5a8c8fcccbfc9..1342323a0b41f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5071,8 +5071,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
if (!sc->proactive)
- vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
- sc->nr_reclaimed - reclaimed);
+ vmpressure(sc->gfp_mask, sc->order, memcg, false,
+ sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed);
flush_reclaim_state(sc);
@@ -6175,7 +6175,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
/* Record the group's reclaim efficiency */
if (!sc->proactive)
- vmpressure(sc->gfp_mask, memcg, false,
+ vmpressure(sc->gfp_mask, sc->order, memcg, false,
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);
@@ -6220,7 +6220,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
/* Record the subtree's reclaim efficiency */
if (!sc->proactive)
- vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+ vmpressure(sc->gfp_mask, sc->order, sc->target_mem_cgroup, true,
sc->nr_scanned - nr_scanned, nr_node_reclaimed);
if (nr_node_reclaimed)
--
2.52.0
next reply other threads:[~2026-04-01 20:38 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-01 20:37 JP Kobryn (Meta) [this message]
2026-04-01 21:10 ` [PATCH] mm/vmpressure: skip socket pressure for costly order reclaim Johannes Weiner
2026-04-01 21:32 ` Matthew Wilcox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260401203752.643259-1-jp.kobryn@linux.dev \
--to=jp.kobryn@linux.dev \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=baohua@kernel.org \
--cc=david@kernel.org \
--cc=edumazet@google.com \
--cc=hannes@cmpxchg.org \
--cc=kasong@tencent.com \
--cc=kernel-team@meta.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=mhocko@suse.com \
--cc=netdev@vger.kernel.org \
--cc=qi.zheng@linux.dev \
--cc=riel@surriel.com \
--cc=rppt@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=vbabka@kernel.org \
--cc=weixugc@google.com \
--cc=yuanchu@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox