Linux cgroups development
 help / color / mirror / Atom feed
From: Joshua Hahn <joshua.hahnjy@gmail.com>
To: linux-mm@kvack.org
Cc: Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Muchun Song <muchun.song@linux.dev>,
	Lorenzo Stoakes <ljs@kernel.org>,
	"Liam R. Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	kernel-team@meta.com
Subject: [RFC PATCH 5/9 v2] mm/memcontrol: Set toptier limits proportional to memory limits
Date: Thu, 23 Apr 2026 13:34:39 -0700	[thread overview]
Message-ID: <20260423203445.2914963-6-joshua.hahnjy@gmail.com> (raw)
In-Reply-To: <20260423203445.2914963-1-joshua.hahnjy@gmail.com>

Compute proportional toptier limits based on memory limits when
users write to memory limit sysfs files, or when memory hotplug causes
the toptier capacity / total capacity ratio to be shifted.

Also introduce new read-only cgroup files memory.toptier_{min,low,high,max}
to expose the derived toptier limits.

Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
 include/linux/memcontrol.h | 12 +++++
 mm/memcontrol.c            | 93 ++++++++++++++++++++++++++++++++++++++
 mm/memory-tiers.c          |  8 +++-
 3 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0cdb6cd1955dc..6bcb866440075 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -543,6 +543,14 @@ static inline bool mem_cgroup_tiered_limits(void)
 #endif
 }
 
+#ifdef CONFIG_NUMA
+void update_memcg_toptier_limits(void);
+#else
+static inline void update_memcg_toptier_limits(void)
+{
+}
+#endif
+
 static inline void mem_cgroup_protection(struct mem_cgroup *root,
 					 struct mem_cgroup *memcg,
 					 unsigned long *min,
@@ -1099,6 +1107,10 @@ static inline bool mem_cgroup_tiered_limits(void)
 	return false;
 }
 
+static inline void update_memcg_toptier_limits(void)
+{
+}
+
 static inline void memcg_memory_event(struct mem_cgroup *memcg,
 				      enum memcg_memory_event event)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d891cf77cf6d6..3acb06388405c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3875,6 +3875,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		return ERR_CAST(memcg);
 
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
+	page_counter_set_high(&memcg->toptier, PAGE_COUNTER_MAX);
 	memcg1_soft_limit_reset(memcg);
 #ifdef CONFIG_ZSWAP
 	memcg->zswap_max = PAGE_COUNTER_MAX;
@@ -4092,6 +4093,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	page_counter_set_max(&memcg->memory, PAGE_COUNTER_MAX);
+	page_counter_set_max(&memcg->toptier, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX);
 #ifdef CONFIG_MEMCG_V1
 	page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
@@ -4100,6 +4102,9 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
+	page_counter_set_min(&memcg->toptier, 0);
+	page_counter_set_low(&memcg->toptier, 0);
+	page_counter_set_high(&memcg->toptier, PAGE_COUNTER_MAX);
 	memcg1_soft_limit_reset(memcg);
 	page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
 	memcg_wb_domain_size_changed(memcg);
@@ -4438,12 +4443,51 @@ static ssize_t memory_peak_write(struct kernfs_open_file *of, char *buf,
 
 #undef OFP_PEAK_UNSET
 
+static inline unsigned long page_counter_max_or_scale(unsigned long val)
+{
+	return val == PAGE_COUNTER_MAX ? PAGE_COUNTER_MAX :
+					 mt_scale_by_toptier(val);
+}
+
+void update_memcg_toptier_limits(void)
+{
+	struct mem_cgroup *memcg;
+
+	if (!mem_cgroup_tiered_limits())
+		return;
+
+	for_each_mem_cgroup(memcg) {
+		unsigned long old_min = READ_ONCE(memcg->memory.min);
+		unsigned long old_low = READ_ONCE(memcg->memory.low);
+		unsigned long old_high = READ_ONCE(memcg->memory.high);
+		unsigned long old_max = READ_ONCE(memcg->memory.max);
+
+		if (memcg == root_mem_cgroup)
+			continue;
+
+		page_counter_set_min(&memcg->toptier,
+				page_counter_max_or_scale(old_min));
+		page_counter_set_low(&memcg->toptier,
+				page_counter_max_or_scale(old_low));
+		page_counter_set_high(&memcg->toptier,
+				page_counter_max_or_scale(old_high));
+		xchg(&memcg->toptier.max,
+				page_counter_max_or_scale(old_max));
+	}
+}
+
 static int memory_min_show(struct seq_file *m, void *v)
 {
 	return seq_puts_memcg_tunable(m,
 		READ_ONCE(mem_cgroup_from_seq(m)->memory.min));
 }
 
+static int toptier_min_show(struct seq_file *m, void *v)
+{
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->toptier.min));
+}
+
 static ssize_t memory_min_write(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
@@ -4457,6 +4501,9 @@ static ssize_t memory_min_write(struct kernfs_open_file *of,
 		return err;
 
 	page_counter_set_min(&memcg->memory, min);
+	if (mem_cgroup_tiered_limits())
+		page_counter_set_min(&memcg->toptier,
+				     page_counter_max_or_scale(min));
 
 	return nbytes;
 }
@@ -4467,6 +4514,12 @@ static int memory_low_show(struct seq_file *m, void *v)
 		READ_ONCE(mem_cgroup_from_seq(m)->memory.low));
 }
 
+static int toptier_low_show(struct seq_file *m, void *v)
+{
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->toptier.low));
+}
+
 static ssize_t memory_low_write(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
@@ -4480,6 +4533,9 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
 		return err;
 
 	page_counter_set_low(&memcg->memory, low);
+	if (mem_cgroup_tiered_limits())
+		page_counter_set_low(&memcg->toptier,
+				     page_counter_max_or_scale(low));
 
 	return nbytes;
 }
@@ -4490,6 +4546,12 @@ static int memory_high_show(struct seq_file *m, void *v)
 		READ_ONCE(mem_cgroup_from_seq(m)->memory.high));
 }
 
+static int toptier_high_show(struct seq_file *m, void *v)
+{
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->toptier.high));
+}
+
 static ssize_t memory_high_write(struct kernfs_open_file *of,
 				 char *buf, size_t nbytes, loff_t off)
 {
@@ -4505,6 +4567,9 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 		return err;
 
 	page_counter_set_high(&memcg->memory, high);
+	if (mem_cgroup_tiered_limits())
+		page_counter_set_high(&memcg->toptier,
+				      page_counter_max_or_scale(high));
 
 	if (of->file->f_flags & O_NONBLOCK)
 		goto out;
@@ -4542,6 +4607,12 @@ static int memory_max_show(struct seq_file *m, void *v)
 		READ_ONCE(mem_cgroup_from_seq(m)->memory.max));
 }
 
+static int toptier_max_show(struct seq_file *m, void *v)
+{
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->toptier.max));
+}
+
 static ssize_t memory_max_write(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
@@ -4557,6 +4628,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 		return err;
 
 	xchg(&memcg->memory.max, max);
+	if (mem_cgroup_tiered_limits())
+		xchg(&memcg->toptier.max, page_counter_max_or_scale(max));
 
 	if (of->file->f_flags & O_NONBLOCK)
 		goto out;
@@ -4762,6 +4835,26 @@ static struct cftype memory_files[] = {
 		.seq_show = memory_max_show,
 		.write = memory_max_write,
 	},
+	{
+		.name = "toptier_min",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = toptier_min_show,
+	},
+	{
+		.name = "toptier_low",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = toptier_low_show,
+	},
+	{
+		.name = "toptier_high",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = toptier_high_show,
+	},
+	{
+		.name = "toptier_max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = toptier_max_show,
+	},
 	{
 		.name = "events",
 		.flags = CFTYPE_NOT_ON_ROOT,
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index acc02679e312d..ddcc11e3919da 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -924,15 +924,19 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
 	switch (action) {
 	case NODE_REMOVED_LAST_MEMORY:
 		mutex_lock(&memory_tier_lock);
-		if (clear_node_memory_tier(nn->nid))
+		if (clear_node_memory_tier(nn->nid)) {
 			establish_demotion_targets();
+			update_memcg_toptier_limits();
+		}
 		mutex_unlock(&memory_tier_lock);
 		break;
 	case NODE_ADDED_FIRST_MEMORY:
 		mutex_lock(&memory_tier_lock);
 		memtier = set_node_memory_tier(nn->nid);
-		if (!IS_ERR(memtier))
+		if (!IS_ERR(memtier)) {
 			establish_demotion_targets();
+			update_memcg_toptier_limits();
+		}
 		mutex_unlock(&memory_tier_lock);
 		break;
 	}
-- 
2.52.0


  parent reply	other threads:[~2026-04-23 20:34 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-23 20:34 [RFC PATCH 0/9 v2] mm/memcontrol: Make memory cgroup limits tier-aware Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 1/9 v2] cgroup: Introduce memory_tiered_limits cgroup mount option Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 3/9 v2] mm/memcontrol: Refactor page_counter charging in try_charge_memcg Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 4/9 v2] mm/memcontrol: charge/uncharge toptier memory to mem_cgroup Joshua Hahn
2026-04-23 20:34 ` Joshua Hahn [this message]
2026-04-23 20:34 ` [RFC PATCH 6/9 v2] mm/vmscan, memcontrol: Add nodemask to try_to_free_mem_cgroup_pages Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 7/9 v2] mm/memcontrol: Make memory.low and memory.min tier-aware Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 8/9 v2] mm/memcontrol: Make memory.high tier-aware Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 9/9 v2] mm/memcontrol: Make memory.max tier-aware Joshua Hahn
2026-05-11 15:56 ` [RFC PATCH 0/9 v2] mm/memcontrol: Make memory cgroup limits tier-aware David Hildenbrand (Arm)
2026-05-11 20:03   ` Joshua Hahn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260423203445.2914963-6-joshua.hahnjy@gmail.com \
    --to=joshua.hahnjy@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=david@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox