public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed
From: Hao Ge <hao.ge@linux.dev>
To: Suren Baghdasaryan <surenb@google.com>,
	Kent Overstreet <kent.overstreet@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Hao Ge <hao.ge@linux.dev>
Subject: [PATCH v3] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
Date: Thu, 23 Apr 2026 16:37:56 +0800	[thread overview]
Message-ID: <20260423083756.157902-1-hao.ge@linux.dev> (raw)

Pages allocated before page_ext is available have their codetag left
uninitialized. Track these early PFNs and clear their codetag in
clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
warnings when they are freed later.

Currently a fixed-size array of 8192 entries is used, with a warning if
the limit is exceeded. However, the number of early allocations depends
on the number of CPUs and can be larger than 8192.

Replace the fixed-size array with a dynamically allocated linked list
of pages. Each page stores PFNs in its page body and uses page->lru
to link to the next page, with page->private tracking the number of
used slots.

The list pages themselves are allocated via alloc_page(), which would
trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and recurse
indefinitely. Introduce __GFP_NO_CODETAG (reuses the %__GFP_NO_OBJ_EXT
bit) and pass gfp_flags through pgalloc_tag_add() so that the early path
can skip recording allocations that carry this flag.

Signed-off-by: Hao Ge <hao.ge@linux.dev>
---
v3:
- Simplify linked list: use page->lru for chaining and page->private as
  slot counter, removing the early_pfn_node struct and freelist (suggested
  by Suren Baghdasaryan)
- Pass gfp_flags through alloc_tag_add_early_pfn() but strip
  __GFP_DIRECT_RECLAIM instead of selecting GFP_KERNEL/GFP_ATOMIC,
  because __alloc_tag_add_early_pfn() is invoked under rcu_read_lock().

v2:
- Use cmpxchg to atomically update early_pfn_pages, preventing page leak under concurrent allocation
- Pass gfp_flags through the full call chain and use gfpflags_allow_blocking()
  to select GFP_KERNEL vs GFP_ATOMIC, avoiding unnecessary GFP_ATOMIC in process context
---
 include/linux/alloc_tag.h |  22 ++++++-
 lib/alloc_tag.c           | 118 +++++++++++++++++++++-----------------
 mm/page_alloc.c           |  23 ++++----
 3 files changed, 97 insertions(+), 66 deletions(-)

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 02de2ede560f..ce9b478033bf 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -150,6 +150,23 @@ static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
 }
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+/*
+ * Skip early PFN recording for a page allocation.  Reuses the
+ * %__GFP_NO_OBJ_EXT bit.  Used by __alloc_tag_add_early_pfn() to avoid
+ * recursion when allocating pages for the early PFN tracking list
+ * itself.
+ *
+ * Callers must set the codetag to CODETAG_EMPTY (via
+ * clear_page_tag_ref()) before freeing pages allocated with this
+ * flag once page_ext becomes available, otherwise
+ * alloc_tag_sub_check() will trigger a warning.
+ */
+#define __GFP_NO_CODETAG	__GFP_NO_OBJ_EXT
+
+static inline bool should_record_early_pfn(gfp_t gfp_flags)
+{
+	return !(gfp_flags & __GFP_NO_CODETAG);
+}
 static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
 {
 	WARN_ONCE(ref && ref->ct && !is_codetag_empty(ref),
@@ -163,11 +180,12 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
 {
 	WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
 }
-void alloc_tag_add_early_pfn(unsigned long pfn);
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
 #else
 static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
 static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
-static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
+static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
+static inline bool should_record_early_pfn(gfp_t gfp_flags) { return false; }
 #endif
 
 /* Caller should verify both ref and tag to be valid */
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index ed1bdcf1f8ab..a00dc5a867e4 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -766,45 +766,49 @@ static __init bool need_page_alloc_tagging(void)
  * Some pages are allocated before page_ext becomes available, leaving
  * their codetag uninitialized. Track these early PFNs so we can clear
  * their codetag refs later to avoid warnings when they are freed.
- *
- * Early allocations include:
- *   - Base allocations independent of CPU count
- *   - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
- *     such as trace ring buffers, scheduler per-cpu data)
- *
- * For simplicity, we fix the size to 8192.
- * If insufficient, a warning will be triggered to alert the user.
- *
- * TODO: Replace fixed-size array with dynamic allocation using
- * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
  */
-#define EARLY_ALLOC_PFN_MAX		8192
+#define EARLY_PFNS_PER_PAGE		(PAGE_SIZE / sizeof(unsigned long))
 
-static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
-static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
+static struct page *early_pfn_current __initdata;
 
-static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
+static void __init __alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
 {
-	int old_idx, new_idx;
+	struct page *page;
+	unsigned long idx;
 
 	do {
-		old_idx = atomic_read(&early_pfn_count);
-		if (old_idx >= EARLY_ALLOC_PFN_MAX) {
-			pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
-				      EARLY_ALLOC_PFN_MAX);
-			return;
+		page = READ_ONCE(early_pfn_current);
+		if (!page || READ_ONCE(page->private) >= EARLY_PFNS_PER_PAGE) {
+			gfp_t gfp = gfp_flags & ~__GFP_DIRECT_RECLAIM;
+			struct page *new = alloc_page(gfp | __GFP_NO_CODETAG);
+
+			if (!new) {
+				pr_warn_once("early PFN tracking page allocation failed\n");
+				return;
+			}
+			new->lru.next = (struct list_head *)page;
+			if (cmpxchg(&early_pfn_current, page, new) != page) {
+				__free_page(new);
+				continue;
+			}
+			page = new;
 		}
-		new_idx = old_idx + 1;
-	} while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
+		idx = READ_ONCE(page->private);
+		if (idx >= EARLY_PFNS_PER_PAGE)
+			continue;
+		if (cmpxchg(&page->private, idx, idx + 1) != idx)
+			continue;
+		break;
+	} while (1);
 
-	early_pfns[old_idx] = pfn;
+	((unsigned long *)page_address(page))[idx] = pfn;
 }
 
-typedef void alloc_tag_add_func(unsigned long pfn);
+typedef void alloc_tag_add_func(unsigned long pfn, gfp_t gfp_flags);
 static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
 	RCU_INITIALIZER(__alloc_tag_add_early_pfn);
 
-void alloc_tag_add_early_pfn(unsigned long pfn)
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
 {
 	alloc_tag_add_func *alloc_tag_add;
 
@@ -814,13 +818,14 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
 	rcu_read_lock();
 	alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
 	if (alloc_tag_add)
-		alloc_tag_add(pfn);
+		alloc_tag_add(pfn, gfp_flags);
 	rcu_read_unlock();
 }
 
 static void __init clear_early_alloc_pfn_tag_refs(void)
 {
-	unsigned int i;
+	struct page *page, *next;
+	unsigned long i;
 
 	if (static_key_enabled(&mem_profiling_compressed))
 		return;
@@ -829,37 +834,42 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
 	/* Make sure we are not racing with __alloc_tag_add_early_pfn() */
 	synchronize_rcu();
 
-	for (i = 0; i < atomic_read(&early_pfn_count); i++) {
-		unsigned long pfn = early_pfns[i];
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			union pgtag_ref_handle handle;
-			union codetag_ref ref;
-
-			if (get_page_tag_ref(page, &ref, &handle)) {
-				/*
-				 * An early-allocated page could be freed and reallocated
-				 * after its page_ext is initialized but before we clear it.
-				 * In that case, it already has a valid tag set.
-				 * We should not overwrite that valid tag with CODETAG_EMPTY.
-				 *
-				 * Note: there is still a small race window between checking
-				 * ref.ct and calling set_codetag_empty(). We accept this
-				 * race as it's unlikely and the extra complexity of atomic
-				 * cmpxchg is not worth it for this debug-only code path.
-				 */
-				if (ref.ct) {
+	for (page = early_pfn_current; page; page = next) {
+		for (i = 0; i < page->private; i++) {
+			unsigned long pfn = ((unsigned long *)page_address(page))[i];
+
+			if (pfn_valid(pfn)) {
+				union pgtag_ref_handle handle;
+				union codetag_ref ref;
+
+				if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
+					/*
+					 * An early-allocated page could be freed and reallocated
+					 * after its page_ext is initialized but before we clear it.
+					 * In that case, it already has a valid tag set.
+					 * We should not overwrite that valid tag
+					 * with CODETAG_EMPTY.
+					 *
+					 * Note: there is still a small race window between checking
+					 * ref.ct and calling set_codetag_empty(). We accept this
+					 * race as it's unlikely and the extra complexity of atomic
+					 * cmpxchg is not worth it for this debug-only code path.
+					 */
+					if (ref.ct) {
+						put_page_tag_ref(handle);
+						continue;
+					}
+
+					set_codetag_empty(&ref);
+					update_page_tag_ref(handle, &ref);
 					put_page_tag_ref(handle);
-					continue;
 				}
-
-				set_codetag_empty(&ref);
-				update_page_tag_ref(handle, &ref);
-				put_page_tag_ref(handle);
 			}
 		}
 
+		next = (struct page *)page->lru.next;
+		clear_page_tag_ref(page);
+		__free_page(page);
 	}
 }
 #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 04494bc2e46f..5b7b234967a5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1284,7 +1284,7 @@ void __clear_page_tag_ref(struct page *page)
 /* Should be called only if mem_alloc_profiling_enabled() */
 static noinline
 void __pgalloc_tag_add(struct page *page, struct task_struct *task,
-		       unsigned int nr)
+		       unsigned int nr, gfp_t gfp_flags)
 {
 	union pgtag_ref_handle handle;
 	union codetag_ref ref;
@@ -1294,21 +1294,24 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
 		update_page_tag_ref(handle, &ref);
 		put_page_tag_ref(handle);
 	} else {
-		/*
-		 * page_ext is not available yet, record the pfn so we can
-		 * clear the tag ref later when page_ext is initialized.
-		 */
-		alloc_tag_add_early_pfn(page_to_pfn(page));
+
 		if (task->alloc_tag)
 			alloc_tag_set_inaccurate(task->alloc_tag);
+
+		/*
+		 * page_ext is not available yet, record the pfn so the
+		 * tag ref can be cleared later when page_ext is initialized.
+		 */
+		if (should_record_early_pfn(gfp_flags))
+			alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
 	}
 }
 
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr)
+				   unsigned int nr, gfp_t gfp_flags)
 {
 	if (mem_alloc_profiling_enabled())
-		__pgalloc_tag_add(page, task, nr);
+		__pgalloc_tag_add(page, task, nr, gfp_flags);
 }
 
 /* Should be called only if mem_alloc_profiling_enabled() */
@@ -1341,7 +1344,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr) {}
+				   unsigned int nr, gfp_t gfp_flags) {}
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
 static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
 
@@ -1896,7 +1899,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 
 	set_page_owner(page, order, gfp_flags);
 	page_table_check_alloc(page, order);
-	pgalloc_tag_add(page, current, 1 << order);
+	pgalloc_tag_add(page, current, 1 << order, gfp_flags);
 }
 
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
-- 
2.25.1



                 reply	other threads:[~2026-04-23  8:39 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260423083756.157902-1-hao.ge@linux.dev \
    --to=hao.ge@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=surenb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox