public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
@ 2026-03-17 10:00 Daniil Tatianin
  2026-03-17 19:17 ` Andrew Morton
  0 siblings, 1 reply; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-17 10:00 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Daniil Tatianin, Michal Hocko, Roman Gushchin, Shakeel Butt,
	Muchun Song, Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Axel Rasmussen, Yuanchu Xie, Wei Xu,
	Brendan Jackman, Zi Yan, cgroups, linux-mm, linux-kernel, yc-core

The current global sysctl compact_unevictable_allowed is too coarse.
In environments with mixed workloads, we may want to protect specific
important cgroups from compaction to ensure their stability and
responsiveness, while allowing compaction for others.

This patch introduces a per-memcg compact_unevictable_allowed attribute.
This allows granular control over whether unevictable pages in a specific
cgroup can be compacted. The global sysctl still takes precedence if set
to disallow compaction, but this new setting allows opting out specific
cgroups.

This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
isolate_migratepages_block to preserve the old behavior for the
ISOLATE_UNEVICTABLE flag unconditionally used by
isolage_migratepages_range.

Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
---
 include/linux/memcontrol.h | 19 ++++++++++++++++++
 include/linux/mmzone.h     |  5 +++++
 mm/compaction.c            | 21 +++++++++++++++++---
 mm/memcontrol.c            | 40 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 70b685a85bf4..13b7ef6cf511 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -227,6 +227,12 @@ struct mem_cgroup {
 	 */
 	bool oom_group;
 
+	/*
+	 * Is compaction allowed to take unevictable pages accounted to
+	 * this cgroup?
+	 */
+	bool compact_unevictable_allowed;
+
 	int swappiness;
 
 	/* memory.events and memory.events.local */
@@ -640,6 +646,14 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
 		page_counter_read(&memcg->memory);
 }
 
+static inline bool mem_cgroup_compact_unevictable_allowed(struct mem_cgroup *memcg)
+{
+	if (mem_cgroup_disabled() || !memcg)
+		return true;
+
+	return READ_ONCE(memcg->compact_unevictable_allowed);
+}
+
 int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
 
 /**
@@ -1092,6 +1106,11 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
+static inline bool mem_cgroup_compact_unevictable_allowed(struct mem_cgroup *memcg)
+{
+	return true;
+}
+
 static inline void memcg_memory_event(struct mem_cgroup *memcg,
 				      enum memcg_memory_event event)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..dadc9b66efa1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -701,6 +701,11 @@ struct lruvec {
 #define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x4)
 /* Isolate unevictable pages */
 #define ISOLATE_UNEVICTABLE	((__force isolate_mode_t)0x8)
+/*
+ * Isolate unevictable pages, but honor the page's cgroup settings if it
+ * explicitly disallows unevictable isolation.
+ */
+#define ISOLATE_UNEVICTABLE_CHECK_MEMCG ((__force isolate_mode_t)0x10)
 
 /* LRU Isolation modes. */
 typedef unsigned __bitwise isolate_mode_t;
diff --git a/mm/compaction.c b/mm/compaction.c
index 1e8f8eca318c..0dbb81aa5d2e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1098,8 +1098,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		is_unevictable = folio_test_unevictable(folio);
 
 		/* Compaction might skip unevictable pages but CMA takes them */
-		if (!(mode & ISOLATE_UNEVICTABLE) && is_unevictable)
-			goto isolate_fail_put;
+		if (is_unevictable) {
+			if (mode & ISOLATE_UNEVICTABLE_CHECK_MEMCG) {
+				struct mem_cgroup *memcg;
+
+				rcu_read_lock();
+				memcg = folio_memcg_check(folio);
+
+				if (!mem_cgroup_compact_unevictable_allowed(memcg)) {
+					rcu_read_unlock();
+					goto isolate_fail_put;
+				}
+
+				rcu_read_unlock();
+			} else if (!(mode & ISOLATE_UNEVICTABLE))
+				goto isolate_fail_put;
+		}
 
 		/*
 		 * To minimise LRU disruption, the caller can indicate with
@@ -2049,7 +2063,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
 	unsigned long low_pfn;
 	struct page *page;
 	const isolate_mode_t isolate_mode =
-		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
+		(sysctl_compact_unevictable_allowed ?
+			ISOLATE_UNEVICTABLE_CHECK_MEMCG : 0) |
 		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
 	bool fast_find_block;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 772bac21d155..bd0230d93dd8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3839,6 +3839,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	WRITE_ONCE(memcg->zswap_writeback, true);
 #endif
 	page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
+	WRITE_ONCE(memcg->compact_unevictable_allowed,
+		mem_cgroup_compact_unevictable_allowed(parent));
 	if (parent) {
 		WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
 
@@ -4608,6 +4610,37 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
+static int memory_compact_unevictable_allowed_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+	seq_printf(m, "%d\n", READ_ONCE(memcg->compact_unevictable_allowed));
+
+	return 0;
+}
+
+static ssize_t memory_compact_unevictable_allowed_write(
+	struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, allowed;
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 0, &allowed);
+	if (ret)
+		return ret;
+
+	if (allowed != 0 && allowed != 1)
+		return -EINVAL;
+
+	WRITE_ONCE(memcg->compact_unevictable_allowed, allowed);
+
+	return nbytes;
+}
+
 static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off)
 {
@@ -4692,6 +4725,13 @@ static struct cftype memory_files[] = {
 		.flags = CFTYPE_NS_DELEGATABLE,
 		.write = memory_reclaim,
 	},
+	{
+		.name = "compact_unevictable_allowed",
+		/* For root use /proc/sys/vm/compact_unevictable_allowed */
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = memory_compact_unevictable_allowed_show,
+		.write = memory_compact_unevictable_allowed_write,
+	},
 	{ }	/* terminate */
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2026-03-19  8:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-17 10:00 [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute Daniil Tatianin
2026-03-17 19:17 ` Andrew Morton
2026-03-17 20:17   ` Daniil Tatianin
2026-03-18  8:25     ` Michal Hocko
2026-03-18  9:09       ` Daniil Tatianin
     [not found]       ` <7ca9876c-f3fa-441c-9a21-ae0ee5523318@yandex-team.ru>
2026-03-18  9:20         ` Michal Hocko
2026-03-18  9:25           ` Daniil Tatianin
2026-03-18 10:01             ` Michal Hocko
2026-03-18 10:08               ` Daniil Tatianin
2026-03-18 11:47                 ` Michal Hocko
2026-03-18 14:03                   ` Daniil Tatianin
2026-03-18 19:55                     ` Shakeel Butt
2026-03-19  8:35                       ` Michal Hocko
2026-03-19  8:24                     ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox