Linux MM tree latest commits
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: mm-commits@vger.kernel.org,roman.gushchin@linux.dev,qi.zheng@linux.dev,oliver.sang@intel.com,muchun.song@linux.dev,mhocko@kernel.org,joshua.hahnjy@gmail.com,harry@kernel.org,hannes@cmpxchg.org,alex@ghiti.fr,shakeel.butt@linux.dev,akpm@linux-foundation.org
Subject: [merged mm-stable] memcg-store-node_id-instead-of-pglist_data-pointer.patch removed from -mm tree
Date: Tue, 02 Jun 2026 15:26:49 -0700	[thread overview]
Message-ID: <20260602222649.64FC71F00893@smtp.kernel.org> (raw)


The quilt patch titled
     Subject: memcg: store node_id instead of pglist_data pointer
has been removed from the -mm tree.  Its filename was
     memcg-store-node_id-instead-of-pglist_data-pointer.patch

This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

------------------------------------------------------
From: Shakeel Butt <shakeel.butt@linux.dev>
Subject: memcg: store node_id instead of pglist_data pointer
Date: Mon, 25 May 2026 20:39:28 -0700

Patch series "memcg: shrink obj_stock_pcp and cache multiple objcgs", v3.

Commit 01b9da291c49 ("mm: memcontrol: convert objcg to be per-memcg
per-node type") split a memcg's single obj_cgroup into one per NUMA node
so that reparenting LRU folios can take per-node lru locks.  As a side
effect, the per-CPU obj_stock_pcp -- which caches a single cached_objcg
pointer -- thrashes on workloads where threads of the same memcg run on
different NUMA nodes.  The kernel test robot reported a 67.7% regression
on stress-ng.switch.ops_per_sec from this pattern.

Commit d0211878ce06 ("memcg: cache obj_stock by memcg, not by objcg
pointer") landed as a temporary fix by treating sibling per-node objcgs as
equivalent for the cache lookup, intended to be reverted once per-node
kmem accounting is introduced.  This series takes a more general approach:
cache multiple objcgs per CPU using the multi-slot pattern memcg_stock_pcp
already uses, so the per-node objcg variants of one memcg can all coexist
in the stock without ever forcing a drain.  The temporary fix can then be
reverted.

To avoid increasing the per-CPU cache footprint, the first three patches
shrink the existing single-slot obj_stock_pcp fields.  The final patch
converts cached_objcg and nr_bytes into NR_OBJ_STOCK=5 slot arrays and
reorders the struct so the entire consume/refill/account hot path fits
within a single 64-byte cache line on non-debug 64-bit builds (verified
with pahole).


This patch (of 4):

The struct obj_stock_pcp stores a pointer to pglist_data for the slab
stats cached on the cpu.  On 64-bit machines, this costs 8 bytes.  The
pointer is not strictly required: NODE_DATA() can recover it from the node
id.  Replace cached_pgdat with int16_t node_id and use NUMA_NO_NODE as the
"no stats cached" sentinel.

At the moment all the archs limit MAX_NUMNODES to 1024 so int16_t is
plenty; a BUILD_BUG_ON() makes sure we notice if that ever changes.

Link: https://lore.kernel.org/20260526033931.1760588-1-shakeel.butt@linux.dev
Link: https://lore.kernel.org/20260526033931.1760588-2-shakeel.butt@linux.dev
Fixes: 01b9da291c49 ("mm: memcontrol: convert objcg to be per-memcg per-node type")
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Tested-by: kernel test robot <oliver.sang@intel.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Acked-by: Qi Zheng <qi.zheng@linux.dev>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/memcontrol.c |   26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

--- a/mm/memcontrol.c~memcg-store-node_id-instead-of-pglist_data-pointer
+++ a/mm/memcontrol.c
@@ -2022,7 +2022,7 @@ struct obj_stock_pcp {
 	local_trylock_t lock;
 	unsigned int nr_bytes;
 	struct obj_cgroup *cached_objcg;
-	struct pglist_data *cached_pgdat;
+	int16_t node_id;
 	int nr_slab_reclaimable_b;
 	int nr_slab_unreclaimable_b;
 
@@ -2032,6 +2032,7 @@ struct obj_stock_pcp {
 
 static DEFINE_PER_CPU_ALIGNED(struct obj_stock_pcp, obj_stock) = {
 	.lock = INIT_LOCAL_TRYLOCK(lock),
+	.node_id = NUMA_NO_NODE,
 };
 
 static DEFINE_MUTEX(percpu_charge_mutex);
@@ -3162,6 +3163,13 @@ static void __account_obj_stock(struct o
 {
 	int *bytes;
 
+	/*
+	 * Though at the moment MAX_NUMNODES <= 1024 in all archs but let's make
+	 * sure it does not exceed S16_MAX otherwise we need to fix node_id type
+	 * in struct obj_stock_pcp.
+	 */
+	BUILD_BUG_ON(MAX_NUMNODES >= S16_MAX);
+
 	if (!stock || READ_ONCE(stock->cached_objcg) != objcg)
 		goto direct;
 
@@ -3169,9 +3177,11 @@ static void __account_obj_stock(struct o
 	 * Save vmstat data in stock and skip vmstat array update unless
 	 * accumulating over a page of vmstat data or when pgdat changes.
 	 */
-	if (stock->cached_pgdat != pgdat) {
+	if (stock->node_id == NUMA_NO_NODE) {
+		stock->node_id = pgdat->node_id;
+	} else if (stock->node_id != pgdat->node_id) {
 		/* Flush the existing cached vmstat data */
-		struct pglist_data *oldpg = stock->cached_pgdat;
+		struct pglist_data *oldpg = NODE_DATA(stock->node_id);
 
 		if (stock->nr_slab_reclaimable_b) {
 			mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B,
@@ -3183,7 +3193,7 @@ static void __account_obj_stock(struct o
 					  stock->nr_slab_unreclaimable_b);
 			stock->nr_slab_unreclaimable_b = 0;
 		}
-		stock->cached_pgdat = pgdat;
+		stock->node_id = pgdat->node_id;
 	}
 
 	bytes = (idx == NR_SLAB_RECLAIMABLE_B) ? &stock->nr_slab_reclaimable_b
@@ -3279,19 +3289,21 @@ static void drain_obj_stock(struct obj_s
 	 * Flush the vmstat data in current stock
 	 */
 	if (stock->nr_slab_reclaimable_b || stock->nr_slab_unreclaimable_b) {
+		struct pglist_data *oldpg = NODE_DATA(stock->node_id);
+
 		if (stock->nr_slab_reclaimable_b) {
-			mod_objcg_mlstate(old, stock->cached_pgdat,
+			mod_objcg_mlstate(old, oldpg,
 					  NR_SLAB_RECLAIMABLE_B,
 					  stock->nr_slab_reclaimable_b);
 			stock->nr_slab_reclaimable_b = 0;
 		}
 		if (stock->nr_slab_unreclaimable_b) {
-			mod_objcg_mlstate(old, stock->cached_pgdat,
+			mod_objcg_mlstate(old, oldpg,
 					  NR_SLAB_UNRECLAIMABLE_B,
 					  stock->nr_slab_unreclaimable_b);
 			stock->nr_slab_unreclaimable_b = 0;
 		}
-		stock->cached_pgdat = NULL;
+		stock->node_id = NUMA_NO_NODE;
 	}
 
 	WRITE_ONCE(stock->cached_objcg, NULL);
_

Patches currently in -mm which might be from shakeel.butt@linux.dev are

mm-list_lru-drain-before-clearing-xarray-entry-on-reparent.patch


                 reply	other threads:[~2026-06-02 22:26 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260602222649.64FC71F00893@smtp.kernel.org \
    --to=akpm@linux-foundation.org \
    --cc=alex@ghiti.fr \
    --cc=hannes@cmpxchg.org \
    --cc=harry@kernel.org \
    --cc=joshua.hahnjy@gmail.com \
    --cc=mhocko@kernel.org \
    --cc=mm-commits@vger.kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=oliver.sang@intel.com \
    --cc=qi.zheng@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox