public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed
* [PATCH 0/3] correct the parameter type of some mm functions
@ 2026-03-24 11:31 Qi Zheng
  2026-03-24 11:31 ` [PATCH] fix: mm: memcontrol: convert objcg to be per-memcg per-node type Qi Zheng
                   ` (4 more replies)
  0 siblings, 5 replies; 26+ messages in thread
From: Qi Zheng @ 2026-03-24 11:31 UTC (permalink / raw)
  To: hannes, hughd, mhocko, roman.gushchin, shakeel.butt, muchun.song,
	david, lorenzo.stoakes, ziy, harry.yoo, yosry.ahmed, imran.f.khan,
	kamalesh.babulal, axelrasmussen, yuanchu, weixugc, chenridong,
	mkoutny, akpm, hamzamahfooz, apais, lance.yang, bhe, usamaarif642
  Cc: linux-mm, linux-kernel, Qi Zheng

From: Qi Zheng <zhengqi.arch@bytedance.com>

Hi all,

As Harry Yoo pointed out [1], in the case of reparenting LRU folios, the value
passed to functions such as __mod_memcg{_lruvec}_state() may exceed the upper
limit of int, so this series aims to correct the parameter type of these
functions.

This series is based on the next-20260323.

Comments and suggestions are welcome!

Thanks,
Qi

[1]. https://lore.kernel.org/all/acDxaEgnqPI-Z4be@hyeyoo/

Qi Zheng (3):
  mm: memcontrol: correct the type of stats_updates to unsigned long
  mm: memcontrol: correct the parameter type of
    __mod_memcg{_lruvec}_state()
  mm: memcontrol: correct the nr_pages parameter type of
    mem_cgroup_update_lru_size()

 include/linux/memcontrol.h   |  2 +-
 include/trace/events/memcg.h | 10 +++++-----
 mm/memcontrol.c              | 28 ++++++++++++++--------------
 3 files changed, 20 insertions(+), 20 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 26+ messages in thread
* [PATCH v6 31/33] mm: memcontrol: convert objcg to be per-memcg per-node type
@ 2026-03-05 11:52 Qi Zheng
  2026-03-09 11:29 ` [PATCH] fix: " Qi Zheng
  0 siblings, 1 reply; 26+ messages in thread
From: Qi Zheng @ 2026-03-05 11:52 UTC (permalink / raw)
  To: hannes, hughd, mhocko, roman.gushchin, shakeel.butt, muchun.song,
	david, lorenzo.stoakes, ziy, harry.yoo, yosry.ahmed, imran.f.khan,
	kamalesh.babulal, axelrasmussen, yuanchu, weixugc, chenridong,
	mkoutny, akpm, hamzamahfooz, apais, lance.yang, bhe, usamaarif642
  Cc: linux-mm, linux-kernel, cgroups, Qi Zheng

From: Qi Zheng <zhengqi.arch@bytedance.com>

Convert objcg to be per-memcg per-node type, so that when reparent LRU
folios later, we can hold the lru lock at the node level, thus avoiding
holding too many lru locks at once.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
---
 include/linux/memcontrol.h | 23 +++++------
 include/linux/sched.h      |  2 +-
 mm/memcontrol.c            | 79 +++++++++++++++++++++++---------------
 3 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d2748e672fd88..57d86decf2830 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -116,6 +116,16 @@ struct mem_cgroup_per_node {
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 	struct mem_cgroup_reclaim_iter	iter;
 
+	/*
+	 * objcg is wiped out as a part of the objcg repaprenting process.
+	 * orig_objcg preserves a pointer (and a reference) to the original
+	 * objcg until the end of live of memcg.
+	 */
+	struct obj_cgroup __rcu	*objcg;
+	struct obj_cgroup	*orig_objcg;
+	/* list of inherited objcgs, protected by objcg_lock */
+	struct list_head objcg_list;
+
 #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
 	/* slab stats for nmi context */
 	atomic_t		slab_reclaimable;
@@ -180,6 +190,7 @@ struct obj_cgroup {
 		struct list_head list; /* protected by objcg_lock */
 		struct rcu_head rcu;
 	};
+	bool is_root;
 };
 
 /*
@@ -258,15 +269,6 @@ struct mem_cgroup {
 	seqlock_t		socket_pressure_seqlock;
 #endif
 	int kmemcg_id;
-	/*
-	 * memcg->objcg is wiped out as a part of the objcg repaprenting
-	 * process. memcg->orig_objcg preserves a pointer (and a reference)
-	 * to the original objcg until the end of live of memcg.
-	 */
-	struct obj_cgroup __rcu	*objcg;
-	struct obj_cgroup	*orig_objcg;
-	/* list of inherited objcgs, protected by objcg_lock */
-	struct list_head objcg_list;
 
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 
@@ -333,7 +335,6 @@ struct mem_cgroup {
 #define MEMCG_CHARGE_BATCH 64U
 
 extern struct mem_cgroup *root_mem_cgroup;
-extern struct obj_cgroup *root_obj_cgroup;
 
 enum page_memcg_data_flags {
 	/* page->memcg_data is a pointer to an slabobj_ext vector */
@@ -552,7 +553,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
 {
-	return objcg == root_obj_cgroup;
+	return objcg->is_root;
 }
 
 static inline bool mem_cgroup_disabled(void)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7b4a980eb2f0..7b63b7b74f414 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,7 +1533,7 @@ struct task_struct {
 	/* Used by memcontrol for targeted memcg charge: */
 	struct mem_cgroup		*active_memcg;
 
-	/* Cache for current->cgroups->memcg->objcg lookups: */
+	/* Cache for current->cgroups->memcg->nodeinfo[nid]->objcg lookups: */
 	struct obj_cgroup		*objcg;
 #endif
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b0519a16f5684..e31c58bc89188 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -84,8 +84,6 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
 struct mem_cgroup *root_mem_cgroup __read_mostly;
 EXPORT_SYMBOL(root_mem_cgroup);
 
-struct obj_cgroup *root_obj_cgroup __read_mostly;
-
 /* Active memory cgroup to use from an interrupt context */
 DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
@@ -210,18 +208,21 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
 }
 
 static inline struct obj_cgroup *__memcg_reparent_objcgs(struct mem_cgroup *memcg,
-							 struct mem_cgroup *parent)
+							 struct mem_cgroup *parent,
+							 int nid)
 {
 	struct obj_cgroup *objcg, *iter;
+	struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+	struct mem_cgroup_per_node *parent_pn = parent->nodeinfo[nid];
 
-	objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
+	objcg = rcu_replace_pointer(pn->objcg, NULL, true);
 	/* 1) Ready to reparent active objcg. */
-	list_add(&objcg->list, &memcg->objcg_list);
+	list_add(&objcg->list, &pn->objcg_list);
 	/* 2) Reparent active objcg and already reparented objcgs to parent. */
-	list_for_each_entry(iter, &memcg->objcg_list, list)
+	list_for_each_entry(iter, &pn->objcg_list, list)
 		WRITE_ONCE(iter->memcg, parent);
 	/* 3) Move already reparented objcgs to the parent's list */
-	list_splice(&memcg->objcg_list, &parent->objcg_list);
+	list_splice(&pn->objcg_list, &parent_pn->objcg_list);
 
 	return objcg;
 }
@@ -268,14 +269,17 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
 {
 	struct obj_cgroup *objcg;
 	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+	int nid;
 
-	reparent_locks(memcg, parent);
+	for_each_node(nid) {
+		reparent_locks(memcg, parent);
 
-	objcg = __memcg_reparent_objcgs(memcg, parent);
+		objcg = __memcg_reparent_objcgs(memcg, parent, nid);
 
-	reparent_unlocks(memcg, parent);
+		reparent_unlocks(memcg, parent);
 
-	percpu_ref_kill(&objcg->refcnt);
+		percpu_ref_kill(&objcg->refcnt);
+	}
 }
 
 /*
@@ -2877,8 +2881,10 @@ struct mem_cgroup *mem_cgroup_from_virt(void *p)
 
 static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
 {
+	int nid = numa_node_id();
+
 	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
+		struct obj_cgroup *objcg = rcu_dereference(memcg->nodeinfo[nid]->objcg);
 
 		if (likely(objcg && obj_cgroup_tryget(objcg)))
 			return objcg;
@@ -2942,6 +2948,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 {
 	struct mem_cgroup *memcg;
 	struct obj_cgroup *objcg;
+	int nid = numa_node_id();
 
 	if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi())
 		return NULL;
@@ -2958,14 +2965,14 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 		 * Objcg reference is kept by the task, so it's safe
 		 * to use the objcg by the current task.
 		 */
-		return objcg ? : root_obj_cgroup;
+		return objcg ? : rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1);
 	}
 
 	memcg = this_cpu_read(int_active_memcg);
 	if (unlikely(memcg))
 		goto from_memcg;
 
-	return root_obj_cgroup;
+	return rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1);
 
 from_memcg:
 	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
@@ -2975,12 +2982,12 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 		 * away and can be used within the scope without any additional
 		 * protection.
 		 */
-		objcg = rcu_dereference_check(memcg->objcg, 1);
+		objcg = rcu_dereference_check(memcg->nodeinfo[nid]->objcg, 1);
 		if (likely(objcg))
 			return objcg;
 	}
 
-	return root_obj_cgroup;
+	return rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1);
 }
 
 struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
@@ -3877,6 +3884,8 @@ static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	if (!pn->lruvec_stats_percpu)
 		goto fail;
 
+	INIT_LIST_HEAD(&pn->objcg_list);
+
 	lruvec_init(&pn->lruvec);
 	pn->memcg = memcg;
 
@@ -3891,10 +3900,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
 
-	obj_cgroup_put(memcg->orig_objcg);
+	for_each_node(node) {
+		struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 
-	for_each_node(node)
-		free_mem_cgroup_per_node_info(memcg->nodeinfo[node]);
+		obj_cgroup_put(pn->orig_objcg);
+		free_mem_cgroup_per_node_info(pn);
+	}
 	memcg1_free_events(memcg);
 	kfree(memcg->vmstats);
 	free_percpu(memcg->vmstats_percpu);
@@ -3965,7 +3976,6 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
 #endif
 	memcg1_memcg_init(memcg);
 	memcg->kmemcg_id = -1;
-	INIT_LIST_HEAD(&memcg->objcg_list);
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&memcg->cgwb_list);
 	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
@@ -4042,6 +4052,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct obj_cgroup *objcg;
+	int nid;
 
 	memcg_online_kmem(memcg);
 
@@ -4053,17 +4064,19 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (alloc_shrinker_info(memcg))
 		goto offline_kmem;
 
-	objcg = obj_cgroup_alloc();
-	if (!objcg)
-		goto free_shrinker;
+	for_each_node(nid) {
+		objcg = obj_cgroup_alloc();
+		if (!objcg)
+			goto free_objcg;
 
-	if (unlikely(mem_cgroup_is_root(memcg)))
-		root_obj_cgroup = objcg;
+		if (unlikely(mem_cgroup_is_root(memcg)))
+			objcg->is_root = true;
 
-	objcg->memcg = memcg;
-	rcu_assign_pointer(memcg->objcg, objcg);
-	obj_cgroup_get(objcg);
-	memcg->orig_objcg = objcg;
+		objcg->memcg = memcg;
+		rcu_assign_pointer(memcg->nodeinfo[nid]->objcg, objcg);
+		obj_cgroup_get(objcg);
+		memcg->nodeinfo[nid]->orig_objcg = objcg;
+	}
 
 	if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
 		queue_delayed_work(system_dfl_wq, &stats_flush_dwork,
@@ -4087,7 +4100,13 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	xa_store(&mem_cgroup_private_ids, memcg->id.id, memcg, GFP_KERNEL);
 
 	return 0;
-free_shrinker:
+free_objcg:
+	for_each_node(nid) {
+		struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+
+		if (pn && pn->orig_objcg)
+			obj_cgroup_put(pn->orig_objcg);
+	}
 	free_shrinker_info(memcg);
 offline_kmem:
 	memcg_offline_kmem(memcg);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2026-03-26  7:49 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-24 11:31 [PATCH 0/3] correct the parameter type of some mm functions Qi Zheng
2026-03-24 11:31 ` [PATCH] fix: mm: memcontrol: convert objcg to be per-memcg per-node type Qi Zheng
2026-03-24 11:34   ` Qi Zheng
2026-03-24 11:31 ` [PATCH 1/3] mm: memcontrol: correct the type of stats_updates to unsigned long Qi Zheng
2026-03-24 12:20   ` Lorenzo Stoakes (Oracle)
2026-03-24 11:31 ` [PATCH 2/3] mm: memcontrol: correct the parameter type of __mod_memcg{_lruvec}_state() Qi Zheng
2026-03-24 12:21   ` Lorenzo Stoakes (Oracle)
2026-03-24 14:12     ` Matthew Wilcox
2026-03-24 14:24       ` Lorenzo Stoakes (Oracle)
2026-03-25  1:43   ` Harry Yoo (Oracle)
2026-03-25  3:25     ` Qi Zheng
2026-03-25  5:17       ` Harry Yoo (Oracle)
2026-03-25  7:26         ` Qi Zheng
2026-03-25  7:36           ` Harry Yoo (Oracle)
2026-03-25  7:39             ` Qi Zheng
2026-03-26  7:49     ` Harry Yoo (Oracle)
2026-03-24 11:31 ` [PATCH 3/3] mm: memcontrol: correct the nr_pages parameter type of mem_cgroup_update_lru_size() Qi Zheng
2026-03-24 12:28   ` Lorenzo Stoakes (Oracle)
2026-03-25  0:27     ` Harry Yoo (Oracle)
2026-03-25  3:34       ` Qi Zheng
2026-03-26  2:35   ` kernel test robot
2026-03-26  3:36     ` Andrew Morton
2026-03-24 11:57 ` [PATCH 0/3] correct the parameter type of some mm functions Lorenzo Stoakes (Oracle)
  -- strict thread matches above, loose matches on Subject: below --
2026-03-05 11:52 [PATCH v6 31/33] mm: memcontrol: convert objcg to be per-memcg per-node type Qi Zheng
2026-03-09 11:29 ` [PATCH] fix: " Qi Zheng
2026-03-09 11:33   ` Usama Arif
2026-03-09 11:43     ` Qi Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox