From: JP Kobryn <inwardvessel@gmail.com>
To: linux-mm@kvack.org
Cc: apopple@nvidia.com, akpm@linux-foundation.org,
axelrasmussen@google.com, byungchul@sk.com,
cgroups@vger.kernel.org, david@kernel.org, eperezma@redhat.com,
gourry@gourry.net, jasowang@redhat.com, hannes@cmpxchg.org,
joshua.hahnjy@gmail.com, Liam.Howlett@oracle.com,
linux-kernel@vger.kernel.org, lorenzo.stoakes@oracle.com,
matthew.brost@intel.com, mst@redhat.com, mhocko@suse.com,
rppt@kernel.org, muchun.song@linux.dev,
zhengqi.arch@bytedance.com, rakie.kim@sk.com,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
surenb@google.com, virtualization@lists.linux.dev,
vbabka@suse.cz, weixugc@google.com, xuanzhuo@linux.alibaba.com,
ying.huang@linux.alibaba.com, yuanchu@google.com, ziy@nvidia.com,
kernel-team@meta.com
Subject: [PATCH 1/2] mm/mempolicy: track page allocations per mempolicy
Date: Wed, 11 Feb 2026 20:51:08 -0800 [thread overview]
Message-ID: <20260212045109.255391-2-inwardvessel@gmail.com> (raw)
In-Reply-To: <20260212045109.255391-1-inwardvessel@gmail.com>
It would be useful to see a breakdown of allocations to understand which
NUMA policies are driving them. For example, when investigating memory
pressure, having policy-specific counts could show that allocations were
bound to the affected node (via MPOL_BIND).
Add per-policy page allocation counters as new node stat items. These
counters can provide correlation between a mempolicy and pressure on a
given node.
Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
---
include/linux/mmzone.h | 9 +++++++++
mm/mempolicy.c | 30 ++++++++++++++++++++++++++++--
mm/vmstat.c | 9 +++++++++
3 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fc5d6c88d2f0..762609d5f0af 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -255,6 +255,15 @@ enum node_stat_item {
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
PGDEMOTE_PROACTIVE,
+#ifdef CONFIG_NUMA
+ PGALLOC_MPOL_DEFAULT,
+ PGALLOC_MPOL_PREFERRED,
+ PGALLOC_MPOL_BIND,
+ PGALLOC_MPOL_INTERLEAVE,
+ PGALLOC_MPOL_LOCAL,
+ PGALLOC_MPOL_PREFERRED_MANY,
+ PGALLOC_MPOL_WEIGHTED_INTERLEAVE,
+#endif
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 68a98ba57882..3c64784af761 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -217,6 +217,21 @@ static void reduce_interleave_weights(unsigned int *bw, u8 *new_iw)
new_iw[nid] /= iw_gcd;
}
+#define CHECK_MPOL_NODE_STAT_OFFSET(mpol) \
+ BUILD_BUG_ON(PGALLOC_##mpol - mpol != PGALLOC_MPOL_DEFAULT)
+
+static enum node_stat_item mpol_node_stat(unsigned short mode)
+{
+ CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED);
+ CHECK_MPOL_NODE_STAT_OFFSET(MPOL_BIND);
+ CHECK_MPOL_NODE_STAT_OFFSET(MPOL_INTERLEAVE);
+ CHECK_MPOL_NODE_STAT_OFFSET(MPOL_LOCAL);
+ CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED_MANY);
+ CHECK_MPOL_NODE_STAT_OFFSET(MPOL_WEIGHTED_INTERLEAVE);
+
+ return PGALLOC_MPOL_DEFAULT + mode;
+}
+
int mempolicy_set_node_perf(unsigned int node, struct access_coordinate *coords)
{
struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL;
@@ -2446,8 +2461,14 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
nodemask = policy_nodemask(gfp, pol, ilx, &nid);
- if (pol->mode == MPOL_PREFERRED_MANY)
- return alloc_pages_preferred_many(gfp, order, nid, nodemask);
+ if (pol->mode == MPOL_PREFERRED_MANY) {
+ page = alloc_pages_preferred_many(gfp, order, nid, nodemask);
+ if (page)
+ __mod_node_page_state(page_pgdat(page),
+ mpol_node_stat(MPOL_PREFERRED_MANY), 1 << order);
+
+ return page;
+ }
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
/* filter "hugepage" allocation, unless from alloc_pages() */
@@ -2472,6 +2493,9 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
page = __alloc_frozen_pages_noprof(
gfp | __GFP_THISNODE | __GFP_NORETRY, order,
nid, NULL);
+ if (page)
+ __mod_node_page_state(page_pgdat(page),
+ mpol_node_stat(pol->mode), 1 << order);
if (page || !(gfp & __GFP_DIRECT_RECLAIM))
return page;
/*
@@ -2484,6 +2508,8 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
}
page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask);
+ if (page)
+ __mod_node_page_state(page_pgdat(page), mpol_node_stat(pol->mode), 1 << order);
if (unlikely(pol->mode == MPOL_INTERLEAVE ||
pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 65de88cdf40e..74e0ddde1e93 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1291,6 +1291,15 @@ const char * const vmstat_text[] = {
[I(PGDEMOTE_DIRECT)] = "pgdemote_direct",
[I(PGDEMOTE_KHUGEPAGED)] = "pgdemote_khugepaged",
[I(PGDEMOTE_PROACTIVE)] = "pgdemote_proactive",
+#ifdef CONFIG_NUMA
+ [I(PGALLOC_MPOL_DEFAULT)] = "pgalloc_mpol_default",
+ [I(PGALLOC_MPOL_PREFERRED)] = "pgalloc_mpol_preferred",
+ [I(PGALLOC_MPOL_BIND)] = "pgalloc_mpol_bind",
+ [I(PGALLOC_MPOL_INTERLEAVE)] = "pgalloc_mpol_interleave",
+ [I(PGALLOC_MPOL_LOCAL)] = "pgalloc_mpol_local",
+ [I(PGALLOC_MPOL_PREFERRED_MANY)] = "pgalloc_mpol_preferred_many",
+ [I(PGALLOC_MPOL_WEIGHTED_INTERLEAVE)] = "pgalloc_mpol_weighted_interleave",
+#endif
#ifdef CONFIG_HUGETLB_PAGE
[I(NR_HUGETLB)] = "nr_hugetlb",
#endif
--
2.47.3
next prev parent reply other threads:[~2026-02-12 4:51 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-12 4:51 [PATCH 0/2] improve per-node allocation and reclaim visibility JP Kobryn
2026-02-12 4:51 ` JP Kobryn [this message]
2026-02-12 7:29 ` [PATCH 1/2] mm/mempolicy: track page allocations per mempolicy Michal Hocko
2026-02-12 21:22 ` JP Kobryn
2026-02-16 8:26 ` Michal Hocko
2026-02-16 17:50 ` JP Kobryn (Meta)
2026-02-16 21:07 ` Michal Hocko
2026-02-17 7:48 ` JP Kobryn (Meta)
2026-02-17 12:37 ` Michal Hocko
2026-02-17 18:19 ` JP Kobryn (Meta)
2026-02-17 18:52 ` Michal Hocko
2026-02-12 15:07 ` Shakeel Butt
2026-02-12 21:23 ` JP Kobryn
2026-02-12 15:24 ` Vlastimil Babka
2026-02-12 21:25 ` JP Kobryn
2026-02-13 8:54 ` Vlastimil Babka
2026-02-13 19:56 ` JP Kobryn (Meta)
2026-02-18 4:25 ` kernel test robot
2026-02-12 4:51 ` [PATCH 2/2] mm: move pgscan and pgsteal to node stats JP Kobryn
2026-02-12 7:08 ` Michael S. Tsirkin
2026-02-12 21:23 ` JP Kobryn
2026-02-12 7:29 ` Michal Hocko
2026-02-12 21:20 ` JP Kobryn
2026-02-12 4:57 ` [PATCH 0/2] improve per-node allocation and reclaim visibility Matthew Wilcox
2026-02-12 21:22 ` JP Kobryn
2026-02-12 21:53 ` Matthew Wilcox
2026-02-12 18:08 ` [syzbot ci] " syzbot ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260212045109.255391-2-inwardvessel@gmail.com \
--to=inwardvessel@gmail.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=axelrasmussen@google.com \
--cc=byungchul@sk.com \
--cc=cgroups@vger.kernel.org \
--cc=david@kernel.org \
--cc=eperezma@redhat.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=jasowang@redhat.com \
--cc=joshua.hahnjy@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=matthew.brost@intel.com \
--cc=mhocko@suse.com \
--cc=mst@redhat.com \
--cc=muchun.song@linux.dev \
--cc=rakie.kim@sk.com \
--cc=roman.gushchin@linux.dev \
--cc=rppt@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=virtualization@lists.linux.dev \
--cc=weixugc@google.com \
--cc=xuanzhuo@linux.alibaba.com \
--cc=ying.huang@linux.alibaba.com \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.