Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Joshua Hahn <joshua.hahnjy@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Lorenzo Stoakes <ljs@kernel.org>,
	"Liam R. Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@kernel.org>,
	linux-kernel@vger.kernel.org, kernel-team@meta.com
Subject: [RFC PATCH 2/9 v2] mm/memory-tiers: Introduce toptier utility functions
Date: Thu, 23 Apr 2026 13:34:36 -0700	[thread overview]
Message-ID: <20260423203445.2914963-3-joshua.hahnjy@gmail.com> (raw)
In-Reply-To: <20260423203445.2914963-1-joshua.hahnjy@gmail.com>

This patch introduces two toptier-related utility functions,
get_toptier_nodemask() and mt_scale_by_toptier().

Tier aware limits will introduce new memcg thresholds on toptier nodes
for systems with multiple memory tiers. To simplify the calculation
for these new thresholds, introduce a function mt_scale_by_toptier
to scale memory limits by the ratio of toptier capacity and total
capacity available on the system.

For single-node / single-tier systems, the scaling operation will be a
no-op since capacity updates are hooked into establish_demotion_targets.

Note that the ratio is static for the entire system. Explicitly, it does
not take cgroups' cpuset.mems into consideration, meaning even cgroups
limited to toptier nodes only will still get a scaled down toptier
limit.

This is to ensure that all cgroups are limited to their fair share of
toptier memory, regardless of what nodes they are restricted to. This
also has the added benefit of preventing accidental /unintentional
overcommitting of toptier memory, since every cgroup shares the same
toptier ratio.

get_toptier_nodemask() extends the existing node_is_toptier check to
return a nodemask of all N_MEMORY nodes living on toptier.
For !CONFIG_NUMA_MIGRATION or !CONFIG_NUMA systems, it will just return
all N_MEMORY nodes.

Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
 include/linux/memory-tiers.h | 17 ++++++++++++++++
 mm/memory-tiers.c            | 38 ++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 7999c58629eeb..f21525c50a5ff 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -52,10 +52,12 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist);
 struct memory_dev_type *mt_find_alloc_memory_type(int adist,
 						  struct list_head *memory_types);
 void mt_put_memory_types(struct list_head *memory_types);
+unsigned long mt_scale_by_toptier(unsigned long val);
 #ifdef CONFIG_NUMA_MIGRATION
 int next_demotion_node(int node, const nodemask_t *allowed_mask);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
 bool node_is_toptier(int node);
+void get_toptier_nodemask(nodemask_t *mask);
 #else
 static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
 {
@@ -71,6 +73,11 @@ static inline bool node_is_toptier(int node)
 {
 	return true;
 }
+
+static inline void get_toptier_nodemask(nodemask_t *mask)
+{
+	*mask = node_states[N_MEMORY];
+}
 #endif
 
 #else
@@ -116,6 +123,11 @@ static inline bool node_is_toptier(int node)
 	return true;
 }
 
+static inline void get_toptier_nodemask(nodemask_t *mask)
+{
+	*mask = node_states[N_MEMORY];
+}
+
 static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
 {
 	return 0;
@@ -151,5 +163,10 @@ static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist,
 static inline void mt_put_memory_types(struct list_head *memory_types)
 {
 }
+
+static inline unsigned long mt_scale_by_toptier(unsigned long val)
+{
+	return val;
+}
 #endif	/* CONFIG_NUMA */
 #endif  /* _LINUX_MEMORY_TIERS_H */
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 54851d8a195b0..acc02679e312d 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -46,6 +46,8 @@ static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
 struct memory_dev_type *default_dram_type;
 nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE;
 
+static unsigned long toptier_capacity;
+
 static const struct bus_type memory_tier_subsys = {
 	.name = "memory_tiering",
 	.dev_name = "memory_tier",
@@ -299,6 +301,17 @@ bool node_is_toptier(int node)
 	return toptier;
 }
 
+void get_toptier_nodemask(nodemask_t *mask)
+{
+	int node;
+
+	nodes_clear(*mask);
+	for_each_node_state(node, N_MEMORY) {
+		if (node_is_toptier(node))
+			node_set(node, *mask);
+	}
+}
+
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
 {
 	struct memory_tier *memtier;
@@ -428,6 +441,7 @@ static void establish_demotion_targets(void)
 	struct demotion_nodes *nd;
 	int target = NUMA_NO_NODE, node;
 	int distance, best_distance;
+	int i;
 	nodemask_t tier_nodes, lower_tier;
 
 	lockdep_assert_held_once(&memory_tier_lock);
@@ -496,6 +510,19 @@ static void establish_demotion_targets(void)
 			break;
 		}
 	}
+
+	toptier_capacity = 0;
+	for_each_node_state(node, N_MEMORY) {
+		if (!node_is_toptier(node))
+			continue;
+
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			struct zone *z = &NODE_DATA(node)->node_zones[i];
+
+			toptier_capacity += zone_managed_pages(z);
+		}
+	}
+
 	/*
 	 * Now build the lower_tier mask for each node collecting node mask from
 	 * all memory tier below it. This allows us to fallback demotion page
@@ -878,6 +905,16 @@ int mt_calc_adistance(int node, int *adist)
 }
 EXPORT_SYMBOL_GPL(mt_calc_adistance);
 
+unsigned long mt_scale_by_toptier(unsigned long val)
+{
+	unsigned long total_capacity = totalram_pages();
+
+	if (!total_capacity)
+		return 0;
+
+	return mult_frac(val, toptier_capacity, total_capacity);
+}
+
 static int __meminit memtier_hotplug_callback(struct notifier_block *self,
 					      unsigned long action, void *_arg)
 {
@@ -932,6 +969,7 @@ static int __init memory_tier_init(void)
 		  node_states[N_CPU]);
 
 	hotplug_node_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
+	toptier_capacity = totalram_pages();
 	return 0;
 }
 subsys_initcall(memory_tier_init);
-- 
2.52.0



  parent reply	other threads:[~2026-04-23 20:34 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-23 20:34 [RFC PATCH 0/9 v2] mm/memcontrol: Make memory cgroup limits tier-aware Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 1/9 v2] cgroup: Introduce memory_tiered_limits cgroup mount option Joshua Hahn
2026-04-23 20:34 ` Joshua Hahn [this message]
2026-04-23 20:34 ` [RFC PATCH 3/9 v2] mm/memcontrol: Refactor page_counter charging in try_charge_memcg Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 4/9 v2] mm/memcontrol: charge/uncharge toptier memory to mem_cgroup Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 5/9 v2] mm/memcontrol: Set toptier limits proportional to memory limits Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 6/9 v2] mm/vmscan, memcontrol: Add nodemask to try_to_free_mem_cgroup_pages Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 7/9 v2] mm/memcontrol: Make memory.low and memory.min tier-aware Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 8/9 v2] mm/memcontrol: Make memory.high tier-aware Joshua Hahn
2026-04-23 20:34 ` [RFC PATCH 9/9 v2] mm/memcontrol: Make memory.max tier-aware Joshua Hahn
2026-05-11 15:56 ` [RFC PATCH 0/9 v2] mm/memcontrol: Make memory cgroup limits tier-aware David Hildenbrand (Arm)
2026-05-11 20:03   ` Joshua Hahn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260423203445.2914963-3-joshua.hahnjy@gmail.com \
    --to=joshua.hahnjy@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox