[PATCH] mm: add memory.compact_unevictable

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
@ 2026-03-17 10:00 Daniil Tatianin
  2026-03-17 19:17 ` Andrew Morton
  0 siblings, 1 reply; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-17 10:00 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Daniil Tatianin, Michal Hocko, Roman Gushchin, Shakeel Butt,
	Muchun Song, Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Axel Rasmussen, Yuanchu Xie, Wei Xu,
	Brendan Jackman, Zi Yan, cgroups, linux-mm, linux-kernel, yc-core

The current global sysctl compact_unevictable_allowed is too coarse.
In environments with mixed workloads, we may want to protect specific
important cgroups from compaction to ensure their stability and
responsiveness, while allowing compaction for others.

This patch introduces a per-memcg compact_unevictable_allowed attribute.
This allows granular control over whether unevictable pages in a specific
cgroup can be compacted. The global sysctl still takes precedence if set
to disallow compaction, but this new setting allows opting out specific
cgroups.

This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
isolate_migratepages_block to preserve the old behavior for the
ISOLATE_UNEVICTABLE flag unconditionally used by
isolage_migratepages_range.

Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
---
 include/linux/memcontrol.h | 19 ++++++++++++++++++
 include/linux/mmzone.h     |  5 +++++
 mm/compaction.c            | 21 +++++++++++++++++---
 mm/memcontrol.c            | 40 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 70b685a85bf4..13b7ef6cf511 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -227,6 +227,12 @@ struct mem_cgroup {
 	 */
 	bool oom_group;
 
+	/*
+	 * Is compaction allowed to take unevictable pages accounted to
+	 * this cgroup?
+	 */
+	bool compact_unevictable_allowed;
+
 	int swappiness;
 
 	/* memory.events and memory.events.local */
@@ -640,6 +646,14 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
 		page_counter_read(&memcg->memory);
 }
 
+static inline bool mem_cgroup_compact_unevictable_allowed(struct mem_cgroup *memcg)
+{
+	if (mem_cgroup_disabled() || !memcg)
+		return true;
+
+	return READ_ONCE(memcg->compact_unevictable_allowed);
+}
+
 int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
 
 /**
@@ -1092,6 +1106,11 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
+static inline bool mem_cgroup_compact_unevictable_allowed(struct mem_cgroup *memcg)
+{
+	return true;
+}
+
 static inline void memcg_memory_event(struct mem_cgroup *memcg,
 				      enum memcg_memory_event event)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..dadc9b66efa1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -701,6 +701,11 @@ struct lruvec {
 #define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x4)
 /* Isolate unevictable pages */
 #define ISOLATE_UNEVICTABLE	((__force isolate_mode_t)0x8)
+/*
+ * Isolate unevictable pages, but honor the page's cgroup settings if it
+ * explicitly disallows unevictable isolation.
+ */
+#define ISOLATE_UNEVICTABLE_CHECK_MEMCG ((__force isolate_mode_t)0x10)
 
 /* LRU Isolation modes. */
 typedef unsigned __bitwise isolate_mode_t;
diff --git a/mm/compaction.c b/mm/compaction.c
index 1e8f8eca318c..0dbb81aa5d2e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1098,8 +1098,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		is_unevictable = folio_test_unevictable(folio);
 
 		/* Compaction might skip unevictable pages but CMA takes them */
-		if (!(mode & ISOLATE_UNEVICTABLE) && is_unevictable)
-			goto isolate_fail_put;
+		if (is_unevictable) {
+			if (mode & ISOLATE_UNEVICTABLE_CHECK_MEMCG) {
+				struct mem_cgroup *memcg;
+
+				rcu_read_lock();
+				memcg = folio_memcg_check(folio);
+
+				if (!mem_cgroup_compact_unevictable_allowed(memcg)) {
+					rcu_read_unlock();
+					goto isolate_fail_put;
+				}
+
+				rcu_read_unlock();
+			} else if (!(mode & ISOLATE_UNEVICTABLE))
+				goto isolate_fail_put;
+		}
 
 		/*
 		 * To minimise LRU disruption, the caller can indicate with
@@ -2049,7 +2063,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
 	unsigned long low_pfn;
 	struct page *page;
 	const isolate_mode_t isolate_mode =
-		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
+		(sysctl_compact_unevictable_allowed ?
+			ISOLATE_UNEVICTABLE_CHECK_MEMCG : 0) |
 		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
 	bool fast_find_block;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 772bac21d155..bd0230d93dd8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3839,6 +3839,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	WRITE_ONCE(memcg->zswap_writeback, true);
 #endif
 	page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
+	WRITE_ONCE(memcg->compact_unevictable_allowed,
+		mem_cgroup_compact_unevictable_allowed(parent));
 	if (parent) {
 		WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
 
@@ -4608,6 +4610,37 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
+static int memory_compact_unevictable_allowed_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+	seq_printf(m, "%d\n", READ_ONCE(memcg->compact_unevictable_allowed));
+
+	return 0;
+}
+
+static ssize_t memory_compact_unevictable_allowed_write(
+	struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, allowed;
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 0, &allowed);
+	if (ret)
+		return ret;
+
+	if (allowed != 0 && allowed != 1)
+		return -EINVAL;
+
+	WRITE_ONCE(memcg->compact_unevictable_allowed, allowed);
+
+	return nbytes;
+}
+
 static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off)
 {
@@ -4692,6 +4725,13 @@ static struct cftype memory_files[] = {
 		.flags = CFTYPE_NS_DELEGATABLE,
 		.write = memory_reclaim,
 	},
+	{
+		.name = "compact_unevictable_allowed",
+		/* For root use /proc/sys/vm/compact_unevictable_allowed */
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = memory_compact_unevictable_allowed_show,
+		.write = memory_compact_unevictable_allowed_write,
+	},
 	{ }	/* terminate */
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-17 10:00 [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute Daniil Tatianin
@ 2026-03-17 19:17 ` Andrew Morton
  2026-03-17 20:17   ` Daniil Tatianin
  0 siblings, 1 reply; 14+ messages in thread
From: Andrew Morton @ 2026-03-17 19:17 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Tue, 17 Mar 2026 13:00:58 +0300 Daniil Tatianin <d-tatianin@yandex-team.ru> wrote:

> The current global sysctl compact_unevictable_allowed is too coarse.
> In environments with mixed workloads, we may want to protect specific
> important cgroups from compaction to ensure their stability and
> responsiveness, while allowing compaction for others.
> 
> This patch introduces a per-memcg compact_unevictable_allowed attribute.
> This allows granular control over whether unevictable pages in a specific
> cgroup can be compacted. The global sysctl still takes precedence if set
> to disallow compaction, but this new setting allows opting out specific
> cgroups.
> 
> This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
> isolate_migratepages_block to preserve the old behavior for the
> ISOLATE_UNEVICTABLE flag unconditionally used by
> isolage_migratepages_range.

AI review asked questions:
	https://sashiko.dev/#/patchset/20260317100058.2316997-1-d-tatianin@yandex-team.ru

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-17 19:17 ` Andrew Morton
@ 2026-03-17 20:17   ` Daniil Tatianin
  2026-03-18  8:25     ` Michal Hocko
  0 siblings, 1 reply; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-17 20:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core


On 3/17/26 10:17 PM, Andrew Morton wrote:
> On Tue, 17 Mar 2026 13:00:58 +0300 Daniil Tatianin <d-tatianin@yandex-team.ru> wrote:
>
>> The current global sysctl compact_unevictable_allowed is too coarse.
>> In environments with mixed workloads, we may want to protect specific
>> important cgroups from compaction to ensure their stability and
>> responsiveness, while allowing compaction for others.
>>
>> This patch introduces a per-memcg compact_unevictable_allowed attribute.
>> This allows granular control over whether unevictable pages in a specific
>> cgroup can be compacted. The global sysctl still takes precedence if set
>> to disallow compaction, but this new setting allows opting out specific
>> cgroups.
>>
>> This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
>> isolate_migratepages_block to preserve the old behavior for the
>> ISOLATE_UNEVICTABLE flag unconditionally used by
>> isolage_migratepages_range.
> AI review asked questions:
> 	https://sashiko.dev/#/patchset/20260317100058.2316997-1-d-tatianin@yandex-team.ru

> Should this dynamically walk up the ancestor chain during evaluation to
> ensure it returns false if any ancestor has disallowed compaction?

I think ultimately it's up to cgroup maintainers whether the code should 
do that, but as far as I understand the whole point of cgroups is that a 
child can override the settings of its parent. Moreover, this property 
doesn't have CFTYPE_NS_DELEGATABLE set, so a child cgroup cannot just 
toggle it at will.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-17 20:17   ` Daniil Tatianin
@ 2026-03-18  8:25     ` Michal Hocko
  2026-03-18  9:09       ` Daniil Tatianin
       [not found]       ` <7ca9876c-f3fa-441c-9a21-ae0ee5523318@yandex-team.ru>
  0 siblings, 2 replies; 14+ messages in thread
From: Michal Hocko @ 2026-03-18  8:25 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Tue 17-03-26 23:17:28, Daniil Tatianin wrote:
> 
> On 3/17/26 10:17 PM, Andrew Morton wrote:
> > On Tue, 17 Mar 2026 13:00:58 +0300 Daniil Tatianin <d-tatianin@yandex-team.ru> wrote:
> > 
> > > The current global sysctl compact_unevictable_allowed is too coarse.
> > > In environments with mixed workloads, we may want to protect specific
> > > important cgroups from compaction to ensure their stability and
> > > responsiveness, while allowing compaction for others.
> > > 
> > > This patch introduces a per-memcg compact_unevictable_allowed attribute.
> > > This allows granular control over whether unevictable pages in a specific
> > > cgroup can be compacted. The global sysctl still takes precedence if set
> > > to disallow compaction, but this new setting allows opting out specific
> > > cgroups.
> > > 
> > > This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
> > > isolate_migratepages_block to preserve the old behavior for the
> > > ISOLATE_UNEVICTABLE flag unconditionally used by
> > > isolage_migratepages_range.
> > AI review asked questions:
> > 	https://sashiko.dev/#/patchset/20260317100058.2316997-1-d-tatianin@yandex-team.ru
> 
> > Should this dynamically walk up the ancestor chain during evaluation to
> > ensure it returns false if any ancestor has disallowed compaction?
> 
> I think ultimately it's up to cgroup maintainers whether the code should do
> that, but as far as I understand the whole point of cgroups is that a child
> can override the settings of its parent. Moreover, this property doesn't
> have CFTYPE_NS_DELEGATABLE set, so a child cgroup cannot just toggle it at
> will.

In general any attributes should have proper hieararchical semantic. I
am not sure what that should be in this case. What is a desire in a
child cgroup can become fragmentation pressure to others.

I think it would be really important to explain more thoroughly about
those usecases of mixed workloads. Is the memcg even a suitable level of
abstraction for this tunable? Doesn't this belong to tasks if anything?
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18  8:25     ` Michal Hocko
@ 2026-03-18  9:09       ` Daniil Tatianin
       [not found]       ` <7ca9876c-f3fa-441c-9a21-ae0ee5523318@yandex-team.ru>
  1 sibling, 0 replies; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-18  9:09 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On 3/18/26 11:25 AM, Michal Hocko wrote:
> On Tue 17-03-26 23:17:28, Daniil Tatianin wrote:
>> On 3/17/26 10:17 PM, Andrew Morton wrote:
>>> On Tue, 17 Mar 2026 13:00:58 +0300 Daniil Tatianin<d-tatianin@yandex-team.ru> wrote:
>>>
>>>> The current global sysctl compact_unevictable_allowed is too coarse.
>>>> In environments with mixed workloads, we may want to protect specific
>>>> important cgroups from compaction to ensure their stability and
>>>> responsiveness, while allowing compaction for others.
>>>>
>>>> This patch introduces a per-memcg compact_unevictable_allowed attribute.
>>>> This allows granular control over whether unevictable pages in a specific
>>>> cgroup can be compacted. The global sysctl still takes precedence if set
>>>> to disallow compaction, but this new setting allows opting out specific
>>>> cgroups.
>>>>
>>>> This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
>>>> isolate_migratepages_block to preserve the old behavior for the
>>>> ISOLATE_UNEVICTABLE flag unconditionally used by
>>>> isolage_migratepages_range.
>>> AI review asked questions:
>>> 	https://sashiko.dev/#/patchset/20260317100058.2316997-1-d-tatianin@yandex-team.ru
>>> Should this dynamically walk up the ancestor chain during evaluation to
>>> ensure it returns false if any ancestor has disallowed compaction?
>> I think ultimately it's up to cgroup maintainers whether the code should do
>> that, but as far as I understand the whole point of cgroups is that a child
>> can override the settings of its parent. Moreover, this property doesn't
>> have CFTYPE_NS_DELEGATABLE set, so a child cgroup cannot just toggle it at
>> will.
> In general any attributes should have proper hieararchical semantic. I
> am not sure what that should be in this case. What is a desire in a
> child cgroup can become fragmentation pressure to others.

 >
 > I think it would be really important to explain more thoroughly about
 > those usecases of mixed workloads.

I think there are many examples of a system where one process is more 
important than
others. For example, any sort of healthcheck or even the ssh daemon: 
these may become
unresponsive during heavy compaction due to thousands of TLB invalidate 
IPIs or page faulting
on pages that are being compacted. Another example is a VM that is 
responsible for routing
traffic of all other VMs or even the entire cluster, you really want to 
prioritize its responsiveness, while
still allowing compaction of memory for the rest of the system, for less 
important VMs or services etc.

 > Is the memcg even a suitable level of
 > abstraction for this tunable?

In my opinion it is, since it is relatively common to put all related 
tasks into one cgroup with preset memory limits etc.

 > Doesn't this belong to tasks if anything?

I think it would be very difficult to implement as a per-task attribute 
properly since compaction works at the folio
level. While folios have a pointer to the memcg that owns them, they may 
be mapped by multiple process in case
of shared memory. We would have to find all the address spaces mapping 
this folio, and then check the property on
every one of them, which may be set to different values. This may be 
problematic performance-wise to do for
every physical page, and it also introduces unclear semantics if 
different address spaces mapping the same page
have different opinions.

(resend because of html formatting in the previous email)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
       [not found]       ` <7ca9876c-f3fa-441c-9a21-ae0ee5523318@yandex-team.ru>
@ 2026-03-18  9:20         ` Michal Hocko
  2026-03-18  9:25           ` Daniil Tatianin
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Hocko @ 2026-03-18  9:20 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Wed 18-03-26 12:04:10, Daniil Tatianin wrote:
> 
> On 3/18/26 11:25 AM, Michal Hocko wrote:
> > On Tue 17-03-26 23:17:28, Daniil Tatianin wrote:
> > > On 3/17/26 10:17 PM, Andrew Morton wrote:
> > > > On Tue, 17 Mar 2026 13:00:58 +0300 Daniil Tatianin<d-tatianin@yandex-team.ru> wrote:
> > > > 
> > > > > The current global sysctl compact_unevictable_allowed is too coarse.
> > > > > In environments with mixed workloads, we may want to protect specific
> > > > > important cgroups from compaction to ensure their stability and
> > > > > responsiveness, while allowing compaction for others.
> > > > > 
> > > > > This patch introduces a per-memcg compact_unevictable_allowed attribute.
> > > > > This allows granular control over whether unevictable pages in a specific
> > > > > cgroup can be compacted. The global sysctl still takes precedence if set
> > > > > to disallow compaction, but this new setting allows opting out specific
> > > > > cgroups.
> > > > > 
> > > > > This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
> > > > > isolate_migratepages_block to preserve the old behavior for the
> > > > > ISOLATE_UNEVICTABLE flag unconditionally used by
> > > > > isolage_migratepages_range.
> > > > AI review asked questions:
> > > > 	https://sashiko.dev/#/patchset/20260317100058.2316997-1-d-tatianin@yandex-team.ru
> > > > Should this dynamically walk up the ancestor chain during evaluation to
> > > > ensure it returns false if any ancestor has disallowed compaction?
> > > I think ultimately it's up to cgroup maintainers whether the code should do
> > > that, but as far as I understand the whole point of cgroups is that a child
> > > can override the settings of its parent. Moreover, this property doesn't
> > > have CFTYPE_NS_DELEGATABLE set, so a child cgroup cannot just toggle it at
> > > will.
> > In general any attributes should have proper hieararchical semantic. I
> > am not sure what that should be in this case. What is a desire in a
> > child cgroup can become fragmentation pressure to others.
> > 
> > I think it would be really important to explain more thoroughly about
> > those usecases of mixed workloads.
> I think there are many examples of a system where one process is more
> important than
> others. For example, any sort of healthcheck or even the ssh daemon: these
> may become
> unresponsive during heavy compaction due to thousands of TLB invalidate IPIs
> or page faulting
> on pages that are being compacted. Another example is a VM that is
> responsible for routing
> traffic of all other VMs or even the entire cluster, you really want to
> prioritize its responsiveness, while
> still allowing compaction of memory for the rest of the system, for less
> important VMs or services etc.

Shouldn't those use mlock?

> > Is the memcg even a suitable level of
> > abstraction for this tunable?
> 
> In my opinion it is, since it is relatively common to put all related tasks
> into one cgroup with preset memory limits etc.
> 
> > Doesn't this belong to tasks if anything?
> 
> I think it would be very difficult to implement as a per-task attribute
> properly since compaction works at the folio
> level. While folios have a pointer to the memcg that owns them, they may be
> mapped by multiple process in case
> of shared memory. We would have to find all the address spaces mapping this
> folio, and then check the property on
> every one of them, which may be set to different values. This may be
> problematic performance-wise to do for
> every physical page, and it also introduces unclear semantics if different
> address spaces mapping the same page
> have different opinions.

Yes, it would need to be something like an implicit mlock. I haven't
really indicated that would be a _simpler_ solution. But as this has
obvious userspace API implications the much more important question is
what is a futureproof solution. Also we need to get an answer whether
this is really needed or too niche to cast an interface maintained for
ever for.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18  9:20         ` Michal Hocko
@ 2026-03-18  9:25           ` Daniil Tatianin
  2026-03-18 10:01             ` Michal Hocko
  0 siblings, 1 reply; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-18  9:25 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core


On 3/18/26 12:20 PM, Michal Hocko wrote:
> On Wed 18-03-26 12:04:10, Daniil Tatianin wrote:
>> On 3/18/26 11:25 AM, Michal Hocko wrote:
>>> On Tue 17-03-26 23:17:28, Daniil Tatianin wrote:
>>>> On 3/17/26 10:17 PM, Andrew Morton wrote:
>>>>> On Tue, 17 Mar 2026 13:00:58 +0300 Daniil Tatianin<d-tatianin@yandex-team.ru> wrote:
>>>>>
>>>>>> The current global sysctl compact_unevictable_allowed is too coarse.
>>>>>> In environments with mixed workloads, we may want to protect specific
>>>>>> important cgroups from compaction to ensure their stability and
>>>>>> responsiveness, while allowing compaction for others.
>>>>>>
>>>>>> This patch introduces a per-memcg compact_unevictable_allowed attribute.
>>>>>> This allows granular control over whether unevictable pages in a specific
>>>>>> cgroup can be compacted. The global sysctl still takes precedence if set
>>>>>> to disallow compaction, but this new setting allows opting out specific
>>>>>> cgroups.
>>>>>>
>>>>>> This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
>>>>>> isolate_migratepages_block to preserve the old behavior for the
>>>>>> ISOLATE_UNEVICTABLE flag unconditionally used by
>>>>>> isolage_migratepages_range.
>>>>> AI review asked questions:
>>>>> 	https://sashiko.dev/#/patchset/20260317100058.2316997-1-d-tatianin@yandex-team.ru
>>>>> Should this dynamically walk up the ancestor chain during evaluation to
>>>>> ensure it returns false if any ancestor has disallowed compaction?
>>>> I think ultimately it's up to cgroup maintainers whether the code should do
>>>> that, but as far as I understand the whole point of cgroups is that a child
>>>> can override the settings of its parent. Moreover, this property doesn't
>>>> have CFTYPE_NS_DELEGATABLE set, so a child cgroup cannot just toggle it at
>>>> will.
>>> In general any attributes should have proper hieararchical semantic. I
>>> am not sure what that should be in this case. What is a desire in a
>>> child cgroup can become fragmentation pressure to others.
>>>
>>> I think it would be really important to explain more thoroughly about
>>> those usecases of mixed workloads.
>> I think there are many examples of a system where one process is more
>> important than
>> others. For example, any sort of healthcheck or even the ssh daemon: these
>> may become
>> unresponsive during heavy compaction due to thousands of TLB invalidate IPIs
>> or page faulting
>> on pages that are being compacted. Another example is a VM that is
>> responsible for routing
>> traffic of all other VMs or even the entire cluster, you really want to
>> prioritize its responsiveness, while
>> still allowing compaction of memory for the rest of the system, for less
>> important VMs or services etc.
> Shouldn't those use mlock?

Absolutely, mlock is required to mark a folio as unevictable. Note that 
unevictable folios are still
perfectly eligible for compaction. This new property makes it so a 
cgroup can say whether its
unevictable pages should be compacted (same as the global 
compact_unevictable_allowed sysctl).

>
>>> Is the memcg even a suitable level of
>>> abstraction for this tunable?
>> In my opinion it is, since it is relatively common to put all related tasks
>> into one cgroup with preset memory limits etc.
>>
>>> Doesn't this belong to tasks if anything?
>> I think it would be very difficult to implement as a per-task attribute
>> properly since compaction works at the folio
>> level. While folios have a pointer to the memcg that owns them, they may be
>> mapped by multiple process in case
>> of shared memory. We would have to find all the address spaces mapping this
>> folio, and then check the property on
>> every one of them, which may be set to different values. This may be
>> problematic performance-wise to do for
>> every physical page, and it also introduces unclear semantics if different
>> address spaces mapping the same page
>> have different opinions.
> Yes, it would need to be something like an implicit mlock. I haven't
> really indicated that would be a _simpler_ solution. But as this has
> obvious userspace API implications the much more important question is
> what is a futureproof solution. Also we need to get an answer whether
> this is really needed or too niche to cast an interface maintained for
> ever for.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18  9:25           ` Daniil Tatianin
@ 2026-03-18 10:01             ` Michal Hocko
  2026-03-18 10:08               ` Daniil Tatianin
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Hocko @ 2026-03-18 10:01 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Wed 18-03-26 12:25:17, Daniil Tatianin wrote:
> 
> On 3/18/26 12:20 PM, Michal Hocko wrote:
[...]
> > Shouldn't those use mlock?
> 
> Absolutely, mlock is required to mark a folio as unevictable. Note that
> unevictable folios are still
> perfectly eligible for compaction. This new property makes it so a cgroup
> can say whether its
> unevictable pages should be compacted (same as the global
> compact_unevictable_allowed sysctl).

If the mlock is already used then why do we need a per memcg control as
well? Do we have different classes of mlocked pages some with acceptable
compaction while others without?

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18 10:01             ` Michal Hocko
@ 2026-03-18 10:08               ` Daniil Tatianin
  2026-03-18 11:47                 ` Michal Hocko
  0 siblings, 1 reply; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-18 10:08 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core


On 3/18/26 1:01 PM, Michal Hocko wrote:
> On Wed 18-03-26 12:25:17, Daniil Tatianin wrote:
>> On 3/18/26 12:20 PM, Michal Hocko wrote:
> [...]
>>> Shouldn't those use mlock?
>> Absolutely, mlock is required to mark a folio as unevictable. Note that
>> unevictable folios are still
>> perfectly eligible for compaction. This new property makes it so a cgroup
>> can say whether its
>> unevictable pages should be compacted (same as the global
>> compact_unevictable_allowed sysctl).
> If the mlock is already used then why do we need a per memcg control as
> well? Do we have different classes of mlocked pages some with acceptable
> compaction while others without?
>
The way it works is mlock(2) only prevents pages from being evicted from 
the page cache by
setting unevictable | mlocked flags on the page. Such pages, however, 
are still allowed for
compaction by default, unless /proc/sys/vm/compact_unevictable_allowed 
is set to 0. That
property essentially "promotes" ALL such (unevictable) pages to a new 
synthetic tier by
making compaction skip them. The per-cgroup property works similarly, 
however, it allows
the scope to be much smaller: from a global setting that promotes 
literally ALL unevictable
(mlocked) pages to this tier, to only promoting pages belonging to the 
cgroup that has
memory.compact_unevictable_allowed as 0.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18 10:08               ` Daniil Tatianin
@ 2026-03-18 11:47                 ` Michal Hocko
  2026-03-18 14:03                   ` Daniil Tatianin
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Hocko @ 2026-03-18 11:47 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Wed 18-03-26 13:08:31, Daniil Tatianin wrote:
> 
> On 3/18/26 1:01 PM, Michal Hocko wrote:
> > On Wed 18-03-26 12:25:17, Daniil Tatianin wrote:
> > > On 3/18/26 12:20 PM, Michal Hocko wrote:
> > [...]
> > > > Shouldn't those use mlock?
> > > Absolutely, mlock is required to mark a folio as unevictable. Note that
> > > unevictable folios are still
> > > perfectly eligible for compaction. This new property makes it so a cgroup
> > > can say whether its
> > > unevictable pages should be compacted (same as the global
> > > compact_unevictable_allowed sysctl).
> > If the mlock is already used then why do we need a per memcg control as
> > well? Do we have different classes of mlocked pages some with acceptable
> > compaction while others without?

OK, I have misread the intention and this is exactly focused at mlock
rather than general protection of all memcg charged memory. Now 

> The way it works is mlock(2) only prevents pages from being evicted
> from the page cache by setting unevictable | mlocked flags on the
> page. Such pages, however, are still allowed for compaction by
> default, unless /proc/sys/vm/compact_unevictable_allowed is set to 0.
> That property essentially "promotes" ALL such (unevictable) pages to a
> new synthetic tier by making compaction skip them. The per-cgroup
> property works similarly, however, it allows the scope to be much
> smaller: from a global setting that promotes literally ALL unevictable
> (mlocked) pages to this tier, to only promoting pages belonging to the
> cgroup that has memory.compact_unevictable_allowed as 0.

This is clear but what is not really clear to me is whether this is
worth having as mlock workloads are already quite specific, the amount
of mlocked memory shouldn't really consume huge portion of the memory so
you still need to have a solid usecase where such a micro management
really is worth it. In other words why a global
compact_unevictable_allowed is not sufficient.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18 11:47                 ` Michal Hocko
@ 2026-03-18 14:03                   ` Daniil Tatianin
  2026-03-18 19:55                     ` Shakeel Butt
  2026-03-19  8:24                     ` Michal Hocko
  0 siblings, 2 replies; 14+ messages in thread
From: Daniil Tatianin @ 2026-03-18 14:03 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core


On 3/18/26 2:47 PM, Michal Hocko wrote:
> On Wed 18-03-26 13:08:31, Daniil Tatianin wrote:
>> On 3/18/26 1:01 PM, Michal Hocko wrote:
>>> On Wed 18-03-26 12:25:17, Daniil Tatianin wrote:
>>>> On 3/18/26 12:20 PM, Michal Hocko wrote:
>>> [...]
>>>>> Shouldn't those use mlock?
>>>> Absolutely, mlock is required to mark a folio as unevictable. Note that
>>>> unevictable folios are still
>>>> perfectly eligible for compaction. This new property makes it so a cgroup
>>>> can say whether its
>>>> unevictable pages should be compacted (same as the global
>>>> compact_unevictable_allowed sysctl).
>>> If the mlock is already used then why do we need a per memcg control as
>>> well? Do we have different classes of mlocked pages some with acceptable
>>> compaction while others without?
> OK, I have misread the intention and this is exactly focused at mlock
> rather than general protection of all memcg charged memory. Now
>
>> The way it works is mlock(2) only prevents pages from being evicted
>> from the page cache by setting unevictable | mlocked flags on the
>> page. Such pages, however, are still allowed for compaction by
>> default, unless /proc/sys/vm/compact_unevictable_allowed is set to 0.
>> That property essentially "promotes" ALL such (unevictable) pages to a
>> new synthetic tier by making compaction skip them. The per-cgroup
>> property works similarly, however, it allows the scope to be much
>> smaller: from a global setting that promotes literally ALL unevictable
>> (mlocked) pages to this tier, to only promoting pages belonging to the
>> cgroup that has memory.compact_unevictable_allowed as 0.
> This is clear but what is not really clear to me is whether this is
> worth having as mlock workloads are already quite specific, the amount
> of mlocked memory shouldn't really consume huge portion of the memory so
> you still need to have a solid usecase where such a micro management
> really is worth it. In other words why a global
> compact_unevictable_allowed is not sufficient.

In my opinion both mlocked memory and non-compactible memory have the 
right to
co-exist on the same host without a global switch that turns one into 
the other. I agree
that it's not a super common thing, but I still think it can be beneficial.

Some examples include but not limited to: security: so that sensitive 
data is never swapped
to disk yet we have no problem if it gets compacted and the actual 
physical page gets replaced,
performance for some apps: so that we can e.g. memlock a large binary in 
memory to keep it in
page cache and improve startup time, but again don't care much if the 
actual backing pages are
replaced via compaction.

On the other hand, some critically important/real time applications do 
need protection from compaction
as well on top of the regular mlock, so that they have predictable 
latency and response time, which can
really fluctuate during heavy compaction. Both of these cases can 
coexist on the same physical machine.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18 14:03                   ` Daniil Tatianin
@ 2026-03-18 19:55                     ` Shakeel Butt
  2026-03-19  8:35                       ` Michal Hocko
  2026-03-19  8:24                     ` Michal Hocko
  1 sibling, 1 reply; 14+ messages in thread
From: Shakeel Butt @ 2026-03-18 19:55 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Michal Hocko, Andrew Morton, Johannes Weiner, Roman Gushchin,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

Hi Daniil,

On Wed, Mar 18, 2026 at 05:03:53PM +0300, Daniil Tatianin wrote:
> 
> On 3/18/26 2:47 PM, Michal Hocko wrote:
> > On Wed 18-03-26 13:08:31, Daniil Tatianin wrote:
> > > On 3/18/26 1:01 PM, Michal Hocko wrote:
> > > > On Wed 18-03-26 12:25:17, Daniil Tatianin wrote:
> > > > > On 3/18/26 12:20 PM, Michal Hocko wrote:
> > > > [...]
> > > > > > Shouldn't those use mlock?
> > > > > Absolutely, mlock is required to mark a folio as unevictable. Note that
> > > > > unevictable folios are still
> > > > > perfectly eligible for compaction. This new property makes it so a cgroup
> > > > > can say whether its
> > > > > unevictable pages should be compacted (same as the global
> > > > > compact_unevictable_allowed sysctl).
> > > > If the mlock is already used then why do we need a per memcg control as
> > > > well? Do we have different classes of mlocked pages some with acceptable
> > > > compaction while others without?
> > OK, I have misread the intention and this is exactly focused at mlock
> > rather than general protection of all memcg charged memory. Now
> > 
> > > The way it works is mlock(2) only prevents pages from being evicted
> > > from the page cache by setting unevictable | mlocked flags on the
> > > page. Such pages, however, are still allowed for compaction by
> > > default, unless /proc/sys/vm/compact_unevictable_allowed is set to 0.
> > > That property essentially "promotes" ALL such (unevictable) pages to a
> > > new synthetic tier by making compaction skip them. The per-cgroup
> > > property works similarly, however, it allows the scope to be much
> > > smaller: from a global setting that promotes literally ALL unevictable
> > > (mlocked) pages to this tier, to only promoting pages belonging to the
> > > cgroup that has memory.compact_unevictable_allowed as 0.
> > This is clear but what is not really clear to me is whether this is
> > worth having as mlock workloads are already quite specific, the amount
> > of mlocked memory shouldn't really consume huge portion of the memory so
> > you still need to have a solid usecase where such a micro management
> > really is worth it. In other words why a global
> > compact_unevictable_allowed is not sufficient.
> 
> In my opinion both mlocked memory and non-compactible memory have the right
> to
> co-exist on the same host without a global switch that turns one into the
> other. I agree
> that it's not a super common thing, but I still think it can be beneficial.
> 
> Some examples include but not limited to: security: so that sensitive data
> is never swapped
> to disk yet we have no problem if it gets compacted and the actual physical
> page gets replaced,
> performance for some apps: so that we can e.g. memlock a large binary in
> memory to keep it in
> page cache and improve startup time, but again don't care much if the actual
> backing pages are
> replaced via compaction.
> 
> On the other hand, some critically important/real time applications do need
> protection from compaction
> as well on top of the regular mlock, so that they have predictable latency
> and response time, which can
> really fluctuate during heavy compaction. Both of these cases can coexist on
> the same physical machine.
> 

IMO we should actually deprecate compact_unevictable_allowed and always allow
compaction for unevictable memory. We should decouple the notion of mlocked
memory from the pinned/unmovable memory. Pinned memory has much more
consequences on the system related to fragmentation and availability of larger
folios than mlocked memory. If there are applications which need unmovable
memory, they should request it explicitly. I don't think there is an API for
such memory but for such use-cases, it makes sense to have an explicit API.





^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18 14:03                   ` Daniil Tatianin
  2026-03-18 19:55                     ` Shakeel Butt
@ 2026-03-19  8:24                     ` Michal Hocko
  1 sibling, 0 replies; 14+ messages in thread
From: Michal Hocko @ 2026-03-19  8:24 UTC (permalink / raw)
  To: Daniil Tatianin
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Wed 18-03-26 17:03:53, Daniil Tatianin wrote:
> 
> On 3/18/26 2:47 PM, Michal Hocko wrote:
> > On Wed 18-03-26 13:08:31, Daniil Tatianin wrote:
> > > On 3/18/26 1:01 PM, Michal Hocko wrote:
> > > > On Wed 18-03-26 12:25:17, Daniil Tatianin wrote:
> > > > > On 3/18/26 12:20 PM, Michal Hocko wrote:
> > > > [...]
> > > > > > Shouldn't those use mlock?
> > > > > Absolutely, mlock is required to mark a folio as unevictable. Note that
> > > > > unevictable folios are still
> > > > > perfectly eligible for compaction. This new property makes it so a cgroup
> > > > > can say whether its
> > > > > unevictable pages should be compacted (same as the global
> > > > > compact_unevictable_allowed sysctl).
> > > > If the mlock is already used then why do we need a per memcg control as
> > > > well? Do we have different classes of mlocked pages some with acceptable
> > > > compaction while others without?
> > OK, I have misread the intention and this is exactly focused at mlock
> > rather than general protection of all memcg charged memory. Now
> > 
> > > The way it works is mlock(2) only prevents pages from being evicted
> > > from the page cache by setting unevictable | mlocked flags on the
> > > page. Such pages, however, are still allowed for compaction by
> > > default, unless /proc/sys/vm/compact_unevictable_allowed is set to 0.
> > > That property essentially "promotes" ALL such (unevictable) pages to a
> > > new synthetic tier by making compaction skip them. The per-cgroup
> > > property works similarly, however, it allows the scope to be much
> > > smaller: from a global setting that promotes literally ALL unevictable
> > > (mlocked) pages to this tier, to only promoting pages belonging to the
> > > cgroup that has memory.compact_unevictable_allowed as 0.
> > This is clear but what is not really clear to me is whether this is
> > worth having as mlock workloads are already quite specific, the amount
> > of mlocked memory shouldn't really consume huge portion of the memory so
> > you still need to have a solid usecase where such a micro management
> > really is worth it. In other words why a global
> > compact_unevictable_allowed is not sufficient.
> 
> In my opinion both mlocked memory and non-compactible memory have the right
> to
> co-exist on the same host without a global switch that turns one into the
> other. I agree
> that it's not a super common thing, but I still think it can be beneficial.
> 
> Some examples include but not limited to: security: so that sensitive data
> is never swapped
> to disk yet we have no problem if it gets compacted and the actual physical
> page gets replaced,
> performance for some apps: so that we can e.g. memlock a large binary in
> memory to keep it in
> page cache and improve startup time, but again don't care much if the actual
> backing pages are
> replaced via compaction.
> 
> On the other hand, some critically important/real time applications do need
> protection from compaction
> as well on top of the regular mlock, so that they have predictable latency
> and response time, which can
> really fluctuate during heavy compaction. Both of these cases can coexist on
> the same physical machine.

This is a very weak justification for adding a user API.
NAK to this.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
  2026-03-18 19:55                     ` Shakeel Butt
@ 2026-03-19  8:35                       ` Michal Hocko
  0 siblings, 0 replies; 14+ messages in thread
From: Michal Hocko @ 2026-03-19  8:35 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Daniil Tatianin, Andrew Morton, Johannes Weiner, Roman Gushchin,
	Muchun Song, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Brendan Jackman, Zi Yan,
	cgroups, linux-mm, linux-kernel, yc-core

On Wed 18-03-26 12:55:49, Shakeel Butt wrote:
[...]
> IMO we should actually deprecate compact_unevictable_allowed and always allow
> compaction for unevictable memory. We should decouple the notion of mlocked
> memory from the pinned/unmovable memory. Pinned memory has much more
> consequences on the system related to fragmentation and availability of larger
> folios than mlocked memory. If there are applications which need unmovable
> memory, they should request it explicitly. I don't think there is an API for
> such memory but for such use-cases, it makes sense to have an explicit API.

That would be really hard to do in a backward compatible way and there
are workloads (e.g. RT) where mlock is supposed to imply even no minor
faults.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2026-03-19  8:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-17 10:00 [PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute Daniil Tatianin
2026-03-17 19:17 ` Andrew Morton
2026-03-17 20:17   ` Daniil Tatianin
2026-03-18  8:25     ` Michal Hocko
2026-03-18  9:09       ` Daniil Tatianin
     [not found]       ` <7ca9876c-f3fa-441c-9a21-ae0ee5523318@yandex-team.ru>
2026-03-18  9:20         ` Michal Hocko
2026-03-18  9:25           ` Daniil Tatianin
2026-03-18 10:01             ` Michal Hocko
2026-03-18 10:08               ` Daniil Tatianin
2026-03-18 11:47                 ` Michal Hocko
2026-03-18 14:03                   ` Daniil Tatianin
2026-03-18 19:55                     ` Shakeel Butt
2026-03-19  8:35                       ` Michal Hocko
2026-03-19  8:24                     ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox