The Linux Kernel Mailing List
 help / color / mirror / Atom feed
* [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
@ 2026-06-04  2:40 Hao Ge
  2026-06-04  9:59 ` Hao Ge
  0 siblings, 1 reply; 3+ messages in thread
From: Hao Ge @ 2026-06-04  2:40 UTC (permalink / raw)
  To: Suren Baghdasaryan, Kent Overstreet, Andrew Morton; +Cc: linux-kernel, Hao Ge

Pages allocated before page_ext is available have their codetag left
uninitialized. Track these early PFNs and clear their codetag in
clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
warnings when they are freed later.

Currently a fixed-size array of 8192 entries is used, with a warning if
the limit is exceeded. However, the number of early allocations depends
on the number of CPUs and can be larger than 8192.

Replace the fixed-size array with a dynamically allocated linked list
of pfn_pool structs. Each node is allocated via alloc_page() and mapped
to a pfn_pool containing a next pointer, an atomic slot counter, and a
PFN array that fills the remainder of the page.

The tracking pages themselves are allocated via alloc_page(), which
would trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and
recurse indefinitely. Introduce __GFP_NO_CODETAG (reuses the
%__GFP_NO_OBJ_EXT bit) and pass gfp_flags through pgalloc_tag_add()
so that the early path can skip recording allocations that carry this flag.

Suggested-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Hao Ge <hao.ge@linux.dev>
---
 include/linux/alloc_tag.h |   4 +-
 lib/alloc_tag.c           | 140 +++++++++++++++++++++++++-------------
 mm/page_alloc.c           |  12 ++--
 3 files changed, 99 insertions(+), 57 deletions(-)

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 02de2ede560f..068ba2e77c5d 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -163,11 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
 {
 	WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
 }
-void alloc_tag_add_early_pfn(unsigned long pfn);
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
 #else
 static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
 static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
-static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
+static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
 #endif
 
 /* Caller should verify both ref and tag to be valid */
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index ed1bdcf1f8ab..f2f574bcf383 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -767,50 +767,82 @@ static __init bool need_page_alloc_tagging(void)
  * their codetag uninitialized. Track these early PFNs so we can clear
  * their codetag refs later to avoid warnings when they are freed.
  *
- * Early allocations include:
- *   - Base allocations independent of CPU count
- *   - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
- *     such as trace ring buffers, scheduler per-cpu data)
- *
- * For simplicity, we fix the size to 8192.
- * If insufficient, a warning will be triggered to alert the user.
+ * Each page is cast to a pfn_pool: the first few bytes hold metadata
+ * (next pointer and slot count), the remainder stores PFNs.
+ */
+struct pfn_pool {
+	struct pfn_pool *next;
+	atomic_t count;
+	unsigned long pfns[];
+};
+
+#define PFN_POOL_SIZE			((PAGE_SIZE - offsetof(struct pfn_pool, pfns)) / \
+					 sizeof(unsigned long))
+
+/*
+ * Skip early PFN recording for a page allocation.  Reuses the
+ * %__GFP_NO_OBJ_EXT bit.  Used by __alloc_tag_add_early_pfn() to avoid
+ * recursion when allocating pages for the early PFN tracking list
+ * itself.
  *
- * TODO: Replace fixed-size array with dynamic allocation using
- * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
+ * Codetags of the pages allocated with __GFP_NO_CODETAG should be
+ * cleared (via clear_page_tag_ref()) before freeing the pages to prevent
+ * alloc_tag_sub_check() from triggering a warning.
  */
-#define EARLY_ALLOC_PFN_MAX		8192
+#define __GFP_NO_CODETAG		__GFP_NO_OBJ_EXT
 
-static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
-static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
+static struct pfn_pool *current_pfn_pool __initdata;
 
 static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
 {
-	int old_idx, new_idx;
+	struct pfn_pool *pool;
+	int idx;
 
 	do {
-		old_idx = atomic_read(&early_pfn_count);
-		if (old_idx >= EARLY_ALLOC_PFN_MAX) {
-			pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
-				      EARLY_ALLOC_PFN_MAX);
-			return;
+		pool = READ_ONCE(current_pfn_pool);
+		if (!pool || atomic_read(&pool->count) >= PFN_POOL_SIZE) {
+			struct page *new_page = alloc_page(__GFP_HIGH | __GFP_NO_CODETAG);
+			struct pfn_pool *new;
+
+			if (!new_page) {
+				pr_warn_once("early PFN tracking page allocation failed\n");
+				return;
+			}
+			new = page_address(new_page);
+			new->next = pool;
+			atomic_set(&new->count, 0);
+			if (cmpxchg(&current_pfn_pool, pool, new) != pool) {
+				clear_page_tag_ref(new_page);
+				__free_page(new_page);
+				continue;
+			}
+			pool = new;
 		}
-		new_idx = old_idx + 1;
-	} while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
+		idx = atomic_read(&pool->count);
+		if (idx >= PFN_POOL_SIZE)
+			continue;
+		if (atomic_cmpxchg(&pool->count, idx, idx + 1) == idx)
+			break;
+	} while (1);
 
-	early_pfns[old_idx] = pfn;
+	pool->pfns[idx] = pfn;
 }
 
 typedef void alloc_tag_add_func(unsigned long pfn);
 static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
 	RCU_INITIALIZER(__alloc_tag_add_early_pfn);
 
-void alloc_tag_add_early_pfn(unsigned long pfn)
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
 {
 	alloc_tag_add_func *alloc_tag_add;
 
 	if (static_key_enabled(&mem_profiling_compressed))
 		return;
 
+	/* Skip allocations for the tracking list itself to avoid recursion. */
+	if (gfp_flags & __GFP_NO_CODETAG)
+		return;
+
 	rcu_read_lock();
 	alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
 	if (alloc_tag_add)
@@ -820,7 +852,9 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
 
 static void __init clear_early_alloc_pfn_tag_refs(void)
 {
-	unsigned int i;
+	struct pfn_pool *pool, *next;
+	struct page *page;
+	int i;
 
 	if (static_key_enabled(&mem_profiling_compressed))
 		return;
@@ -829,37 +863,45 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
 	/* Make sure we are not racing with __alloc_tag_add_early_pfn() */
 	synchronize_rcu();
 
-	for (i = 0; i < atomic_read(&early_pfn_count); i++) {
-		unsigned long pfn = early_pfns[i];
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			union pgtag_ref_handle handle;
-			union codetag_ref ref;
-
-			if (get_page_tag_ref(page, &ref, &handle)) {
-				/*
-				 * An early-allocated page could be freed and reallocated
-				 * after its page_ext is initialized but before we clear it.
-				 * In that case, it already has a valid tag set.
-				 * We should not overwrite that valid tag with CODETAG_EMPTY.
-				 *
-				 * Note: there is still a small race window between checking
-				 * ref.ct and calling set_codetag_empty(). We accept this
-				 * race as it's unlikely and the extra complexity of atomic
-				 * cmpxchg is not worth it for this debug-only code path.
-				 */
-				if (ref.ct) {
+	for (pool = current_pfn_pool; pool; pool = next) {
+		int nr_pfns = atomic_read(&pool->count);
+
+		for (i = 0; i < nr_pfns; i++) {
+			unsigned long pfn = pool->pfns[i];
+
+			if (pfn_valid(pfn)) {
+				union pgtag_ref_handle handle;
+				union codetag_ref ref;
+
+				if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
+					/*
+					 * An early-allocated page could be freed and reallocated
+					 * after its page_ext is initialized but before we clear it.
+					 * In that case, it already has a valid tag set.
+					 * We should not overwrite that valid tag
+					 * with CODETAG_EMPTY.
+					 *
+					 * Note: there is still a small race window between checking
+					 * ref.ct and calling set_codetag_empty(). We accept this
+					 * race as it's unlikely and the extra complexity of atomic
+					 * cmpxchg is not worth it for this debug-only code path.
+					 */
+					if (ref.ct) {
+						put_page_tag_ref(handle);
+						continue;
+					}
+
+					set_codetag_empty(&ref);
+					update_page_tag_ref(handle, &ref);
 					put_page_tag_ref(handle);
-					continue;
 				}
-
-				set_codetag_empty(&ref);
-				update_page_tag_ref(handle, &ref);
-				put_page_tag_ref(handle);
 			}
 		}
 
+		next = pool->next;
+		page = virt_to_page(pool);
+		clear_page_tag_ref(page);
+		__free_page(page);
 	}
 }
 #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d49c254174da..50b2bc8f42d9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1240,7 +1240,7 @@ void __clear_page_tag_ref(struct page *page)
 /* Should be called only if mem_alloc_profiling_enabled() */
 static noinline
 void __pgalloc_tag_add(struct page *page, struct task_struct *task,
-		       unsigned int nr)
+		       unsigned int nr, gfp_t gfp_flags)
 {
 	union pgtag_ref_handle handle;
 	union codetag_ref ref;
@@ -1254,17 +1254,17 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
 		 * page_ext is not available yet, record the pfn so we can
 		 * clear the tag ref later when page_ext is initialized.
 		 */
-		alloc_tag_add_early_pfn(page_to_pfn(page));
+		alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
 		if (task->alloc_tag)
 			alloc_tag_set_inaccurate(task->alloc_tag);
 	}
 }
 
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr)
+				   unsigned int nr, gfp_t gfp_flags)
 {
 	if (mem_alloc_profiling_enabled())
-		__pgalloc_tag_add(page, task, nr);
+		__pgalloc_tag_add(page, task, nr, gfp_flags);
 }
 
 /* Should be called only if mem_alloc_profiling_enabled() */
@@ -1297,7 +1297,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr) {}
+				   unsigned int nr, gfp_t gfp_flags) {}
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
 static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
 
@@ -1852,7 +1852,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 
 	set_page_owner(page, order, gfp_flags);
 	page_table_check_alloc(page, order);
-	pgalloc_tag_add(page, current, 1 << order);
+	pgalloc_tag_add(page, current, 1 << order, gfp_flags);
 }
 
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
  2026-06-04  2:40 [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list Hao Ge
@ 2026-06-04  9:59 ` Hao Ge
  2026-06-04 23:56   ` Suren Baghdasaryan
  0 siblings, 1 reply; 3+ messages in thread
From: Hao Ge @ 2026-06-04  9:59 UTC (permalink / raw)
  To: Suren Baghdasaryan, Kent Overstreet, Andrew Morton
  Cc: linux-kernel, Linux Memory Management List


Add cc:linux-mm@kvack.org and lost part of the changelog

because of local Git environment glitch.

Sorry for this.


On 2026/6/4 10:40, Hao Ge wrote:
> Pages allocated before page_ext is available have their codetag left
> uninitialized. Track these early PFNs and clear their codetag in
> clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
> warnings when they are freed later.
>
> Currently a fixed-size array of 8192 entries is used, with a warning if
> the limit is exceeded. However, the number of early allocations depends
> on the number of CPUs and can be larger than 8192.
>
> Replace the fixed-size array with a dynamically allocated linked list
> of pfn_pool structs. Each node is allocated via alloc_page() and mapped
> to a pfn_pool containing a next pointer, an atomic slot counter, and a
> PFN array that fills the remainder of the page.
>
> The tracking pages themselves are allocated via alloc_page(), which
> would trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and
> recurse indefinitely. Introduce __GFP_NO_CODETAG (reuses the
> %__GFP_NO_OBJ_EXT bit) and pass gfp_flags through pgalloc_tag_add()
> so that the early path can skip recording allocations that carry this flag.
>
> Suggested-by: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Hao Ge <hao.ge@linux.dev>

---

v6:
- Use hardcoded __GFP_HIGH | __GFP_NO_CODETAG instead of inheriting
   caller's gfp_flags for internal pfn_pool page allocation.

> ---
>   include/linux/alloc_tag.h |   4 +-
>   lib/alloc_tag.c           | 140 +++++++++++++++++++++++++-------------
>   mm/page_alloc.c           |  12 ++--
>   3 files changed, 99 insertions(+), 57 deletions(-)
>
> diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
> index 02de2ede560f..068ba2e77c5d 100644
> --- a/include/linux/alloc_tag.h
> +++ b/include/linux/alloc_tag.h
> @@ -163,11 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
>   {
>   	WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
>   }
> -void alloc_tag_add_early_pfn(unsigned long pfn);
> +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
>   #else
>   static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
>   static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
> -static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
> +static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
>   #endif
>   
>   /* Caller should verify both ref and tag to be valid */
> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> index ed1bdcf1f8ab..f2f574bcf383 100644
> --- a/lib/alloc_tag.c
> +++ b/lib/alloc_tag.c
> @@ -767,50 +767,82 @@ static __init bool need_page_alloc_tagging(void)
>    * their codetag uninitialized. Track these early PFNs so we can clear
>    * their codetag refs later to avoid warnings when they are freed.
>    *
> - * Early allocations include:
> - *   - Base allocations independent of CPU count
> - *   - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
> - *     such as trace ring buffers, scheduler per-cpu data)
> - *
> - * For simplicity, we fix the size to 8192.
> - * If insufficient, a warning will be triggered to alert the user.
> + * Each page is cast to a pfn_pool: the first few bytes hold metadata
> + * (next pointer and slot count), the remainder stores PFNs.
> + */
> +struct pfn_pool {
> +	struct pfn_pool *next;
> +	atomic_t count;
> +	unsigned long pfns[];
> +};
> +
> +#define PFN_POOL_SIZE			((PAGE_SIZE - offsetof(struct pfn_pool, pfns)) / \
> +					 sizeof(unsigned long))
> +
> +/*
> + * Skip early PFN recording for a page allocation.  Reuses the
> + * %__GFP_NO_OBJ_EXT bit.  Used by __alloc_tag_add_early_pfn() to avoid
> + * recursion when allocating pages for the early PFN tracking list
> + * itself.
>    *
> - * TODO: Replace fixed-size array with dynamic allocation using
> - * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
> + * Codetags of the pages allocated with __GFP_NO_CODETAG should be
> + * cleared (via clear_page_tag_ref()) before freeing the pages to prevent
> + * alloc_tag_sub_check() from triggering a warning.
>    */
> -#define EARLY_ALLOC_PFN_MAX		8192
> +#define __GFP_NO_CODETAG		__GFP_NO_OBJ_EXT
>   
> -static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
> -static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
> +static struct pfn_pool *current_pfn_pool __initdata;
>   
>   static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
>   {
> -	int old_idx, new_idx;
> +	struct pfn_pool *pool;
> +	int idx;
>   
>   	do {
> -		old_idx = atomic_read(&early_pfn_count);
> -		if (old_idx >= EARLY_ALLOC_PFN_MAX) {
> -			pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
> -				      EARLY_ALLOC_PFN_MAX);
> -			return;
> +		pool = READ_ONCE(current_pfn_pool);
> +		if (!pool || atomic_read(&pool->count) >= PFN_POOL_SIZE) {
> +			struct page *new_page = alloc_page(__GFP_HIGH | __GFP_NO_CODETAG);
> +			struct pfn_pool *new;
> +
> +			if (!new_page) {
> +				pr_warn_once("early PFN tracking page allocation failed\n");
> +				return;
> +			}
> +			new = page_address(new_page);
> +			new->next = pool;
> +			atomic_set(&new->count, 0);
> +			if (cmpxchg(&current_pfn_pool, pool, new) != pool) {
> +				clear_page_tag_ref(new_page);
> +				__free_page(new_page);
> +				continue;
> +			}
> +			pool = new;
>   		}
> -		new_idx = old_idx + 1;
> -	} while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
> +		idx = atomic_read(&pool->count);
> +		if (idx >= PFN_POOL_SIZE)
> +			continue;
> +		if (atomic_cmpxchg(&pool->count, idx, idx + 1) == idx)
> +			break;
> +	} while (1);
>   
> -	early_pfns[old_idx] = pfn;
> +	pool->pfns[idx] = pfn;
>   }
>   
>   typedef void alloc_tag_add_func(unsigned long pfn);
>   static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
>   	RCU_INITIALIZER(__alloc_tag_add_early_pfn);
>   
> -void alloc_tag_add_early_pfn(unsigned long pfn)
> +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
>   {
>   	alloc_tag_add_func *alloc_tag_add;
>   
>   	if (static_key_enabled(&mem_profiling_compressed))
>   		return;
>   
> +	/* Skip allocations for the tracking list itself to avoid recursion. */
> +	if (gfp_flags & __GFP_NO_CODETAG)
> +		return;
> +
>   	rcu_read_lock();
>   	alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
>   	if (alloc_tag_add)
> @@ -820,7 +852,9 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
>   
>   static void __init clear_early_alloc_pfn_tag_refs(void)
>   {
> -	unsigned int i;
> +	struct pfn_pool *pool, *next;
> +	struct page *page;
> +	int i;
>   
>   	if (static_key_enabled(&mem_profiling_compressed))
>   		return;
> @@ -829,37 +863,45 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
>   	/* Make sure we are not racing with __alloc_tag_add_early_pfn() */
>   	synchronize_rcu();
>   
> -	for (i = 0; i < atomic_read(&early_pfn_count); i++) {
> -		unsigned long pfn = early_pfns[i];
> -
> -		if (pfn_valid(pfn)) {
> -			struct page *page = pfn_to_page(pfn);
> -			union pgtag_ref_handle handle;
> -			union codetag_ref ref;
> -
> -			if (get_page_tag_ref(page, &ref, &handle)) {
> -				/*
> -				 * An early-allocated page could be freed and reallocated
> -				 * after its page_ext is initialized but before we clear it.
> -				 * In that case, it already has a valid tag set.
> -				 * We should not overwrite that valid tag with CODETAG_EMPTY.
> -				 *
> -				 * Note: there is still a small race window between checking
> -				 * ref.ct and calling set_codetag_empty(). We accept this
> -				 * race as it's unlikely and the extra complexity of atomic
> -				 * cmpxchg is not worth it for this debug-only code path.
> -				 */
> -				if (ref.ct) {
> +	for (pool = current_pfn_pool; pool; pool = next) {
> +		int nr_pfns = atomic_read(&pool->count);
> +
> +		for (i = 0; i < nr_pfns; i++) {
> +			unsigned long pfn = pool->pfns[i];
> +
> +			if (pfn_valid(pfn)) {
> +				union pgtag_ref_handle handle;
> +				union codetag_ref ref;
> +
> +				if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
> +					/*
> +					 * An early-allocated page could be freed and reallocated
> +					 * after its page_ext is initialized but before we clear it.
> +					 * In that case, it already has a valid tag set.
> +					 * We should not overwrite that valid tag
> +					 * with CODETAG_EMPTY.
> +					 *
> +					 * Note: there is still a small race window between checking
> +					 * ref.ct and calling set_codetag_empty(). We accept this
> +					 * race as it's unlikely and the extra complexity of atomic
> +					 * cmpxchg is not worth it for this debug-only code path.
> +					 */
> +					if (ref.ct) {
> +						put_page_tag_ref(handle);
> +						continue;
> +					}
> +
> +					set_codetag_empty(&ref);
> +					update_page_tag_ref(handle, &ref);
>   					put_page_tag_ref(handle);
> -					continue;
>   				}
> -
> -				set_codetag_empty(&ref);
> -				update_page_tag_ref(handle, &ref);
> -				put_page_tag_ref(handle);
>   			}
>   		}
>   
> +		next = pool->next;
> +		page = virt_to_page(pool);
> +		clear_page_tag_ref(page);
> +		__free_page(page);
>   	}
>   }
>   #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d49c254174da..50b2bc8f42d9 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1240,7 +1240,7 @@ void __clear_page_tag_ref(struct page *page)
>   /* Should be called only if mem_alloc_profiling_enabled() */
>   static noinline
>   void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> -		       unsigned int nr)
> +		       unsigned int nr, gfp_t gfp_flags)
>   {
>   	union pgtag_ref_handle handle;
>   	union codetag_ref ref;
> @@ -1254,17 +1254,17 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
>   		 * page_ext is not available yet, record the pfn so we can
>   		 * clear the tag ref later when page_ext is initialized.
>   		 */
> -		alloc_tag_add_early_pfn(page_to_pfn(page));
> +		alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
>   		if (task->alloc_tag)
>   			alloc_tag_set_inaccurate(task->alloc_tag);
>   	}
>   }
>   
>   static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> -				   unsigned int nr)
> +				   unsigned int nr, gfp_t gfp_flags)
>   {
>   	if (mem_alloc_profiling_enabled())
> -		__pgalloc_tag_add(page, task, nr);
> +		__pgalloc_tag_add(page, task, nr, gfp_flags);
>   }
>   
>   /* Should be called only if mem_alloc_profiling_enabled() */
> @@ -1297,7 +1297,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
>   #else /* CONFIG_MEM_ALLOC_PROFILING */
>   
>   static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> -				   unsigned int nr) {}
> +				   unsigned int nr, gfp_t gfp_flags) {}
>   static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
>   static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
>   
> @@ -1852,7 +1852,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
>   
>   	set_page_owner(page, order, gfp_flags);
>   	page_table_check_alloc(page, order);
> -	pgalloc_tag_add(page, current, 1 << order);
> +	pgalloc_tag_add(page, current, 1 << order, gfp_flags);
>   }
>   
>   static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
  2026-06-04  9:59 ` Hao Ge
@ 2026-06-04 23:56   ` Suren Baghdasaryan
  0 siblings, 0 replies; 3+ messages in thread
From: Suren Baghdasaryan @ 2026-06-04 23:56 UTC (permalink / raw)
  To: Hao Ge
  Cc: Kent Overstreet, Andrew Morton, linux-kernel,
	Linux Memory Management List

On Thu, Jun 4, 2026 at 2:59 AM Hao Ge <hao.ge@linux.dev> wrote:
>
>
> Add cc:linux-mm@kvack.org and lost part of the changelog
>
> because of local Git environment glitch.
>
> Sorry for this.
>
>
> On 2026/6/4 10:40, Hao Ge wrote:
> > Pages allocated before page_ext is available have their codetag left
> > uninitialized. Track these early PFNs and clear their codetag in
> > clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
> > warnings when they are freed later.
> >
> > Currently a fixed-size array of 8192 entries is used, with a warning if
> > the limit is exceeded. However, the number of early allocations depends
> > on the number of CPUs and can be larger than 8192.
> >
> > Replace the fixed-size array with a dynamically allocated linked list
> > of pfn_pool structs. Each node is allocated via alloc_page() and mapped
> > to a pfn_pool containing a next pointer, an atomic slot counter, and a
> > PFN array that fills the remainder of the page.
> >
> > The tracking pages themselves are allocated via alloc_page(), which
> > would trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and
> > recurse indefinitely. Introduce __GFP_NO_CODETAG (reuses the
> > %__GFP_NO_OBJ_EXT bit) and pass gfp_flags through pgalloc_tag_add()
> > so that the early path can skip recording allocations that carry this flag.
> >
> > Suggested-by: Suren Baghdasaryan <surenb@google.com>
> > Signed-off-by: Hao Ge <hao.ge@linux.dev>

Acked-by: Suren Baghdasaryan <surenb@google.com>

>
> ---
>
> v6:
> - Use hardcoded __GFP_HIGH | __GFP_NO_CODETAG instead of inheriting
>    caller's gfp_flags for internal pfn_pool page allocation.
>
> > ---
> >   include/linux/alloc_tag.h |   4 +-
> >   lib/alloc_tag.c           | 140 +++++++++++++++++++++++++-------------
> >   mm/page_alloc.c           |  12 ++--
> >   3 files changed, 99 insertions(+), 57 deletions(-)
> >
> > diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
> > index 02de2ede560f..068ba2e77c5d 100644
> > --- a/include/linux/alloc_tag.h
> > +++ b/include/linux/alloc_tag.h
> > @@ -163,11 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
> >   {
> >       WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
> >   }
> > -void alloc_tag_add_early_pfn(unsigned long pfn);
> > +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
> >   #else
> >   static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
> >   static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
> > -static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
> > +static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
> >   #endif
> >
> >   /* Caller should verify both ref and tag to be valid */
> > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> > index ed1bdcf1f8ab..f2f574bcf383 100644
> > --- a/lib/alloc_tag.c
> > +++ b/lib/alloc_tag.c
> > @@ -767,50 +767,82 @@ static __init bool need_page_alloc_tagging(void)
> >    * their codetag uninitialized. Track these early PFNs so we can clear
> >    * their codetag refs later to avoid warnings when they are freed.
> >    *
> > - * Early allocations include:
> > - *   - Base allocations independent of CPU count
> > - *   - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
> > - *     such as trace ring buffers, scheduler per-cpu data)
> > - *
> > - * For simplicity, we fix the size to 8192.
> > - * If insufficient, a warning will be triggered to alert the user.
> > + * Each page is cast to a pfn_pool: the first few bytes hold metadata
> > + * (next pointer and slot count), the remainder stores PFNs.
> > + */
> > +struct pfn_pool {
> > +     struct pfn_pool *next;
> > +     atomic_t count;
> > +     unsigned long pfns[];
> > +};
> > +
> > +#define PFN_POOL_SIZE                        ((PAGE_SIZE - offsetof(struct pfn_pool, pfns)) / \
> > +                                      sizeof(unsigned long))
> > +
> > +/*
> > + * Skip early PFN recording for a page allocation.  Reuses the
> > + * %__GFP_NO_OBJ_EXT bit.  Used by __alloc_tag_add_early_pfn() to avoid
> > + * recursion when allocating pages for the early PFN tracking list
> > + * itself.
> >    *
> > - * TODO: Replace fixed-size array with dynamic allocation using
> > - * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
> > + * Codetags of the pages allocated with __GFP_NO_CODETAG should be
> > + * cleared (via clear_page_tag_ref()) before freeing the pages to prevent
> > + * alloc_tag_sub_check() from triggering a warning.
> >    */
> > -#define EARLY_ALLOC_PFN_MAX          8192
> > +#define __GFP_NO_CODETAG             __GFP_NO_OBJ_EXT
> >
> > -static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
> > -static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
> > +static struct pfn_pool *current_pfn_pool __initdata;
> >
> >   static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
> >   {
> > -     int old_idx, new_idx;
> > +     struct pfn_pool *pool;
> > +     int idx;
> >
> >       do {
> > -             old_idx = atomic_read(&early_pfn_count);
> > -             if (old_idx >= EARLY_ALLOC_PFN_MAX) {
> > -                     pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
> > -                                   EARLY_ALLOC_PFN_MAX);
> > -                     return;
> > +             pool = READ_ONCE(current_pfn_pool);
> > +             if (!pool || atomic_read(&pool->count) >= PFN_POOL_SIZE) {
> > +                     struct page *new_page = alloc_page(__GFP_HIGH | __GFP_NO_CODETAG);
> > +                     struct pfn_pool *new;
> > +
> > +                     if (!new_page) {
> > +                             pr_warn_once("early PFN tracking page allocation failed\n");
> > +                             return;
> > +                     }
> > +                     new = page_address(new_page);
> > +                     new->next = pool;
> > +                     atomic_set(&new->count, 0);
> > +                     if (cmpxchg(&current_pfn_pool, pool, new) != pool) {
> > +                             clear_page_tag_ref(new_page);
> > +                             __free_page(new_page);
> > +                             continue;
> > +                     }
> > +                     pool = new;
> >               }
> > -             new_idx = old_idx + 1;
> > -     } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
> > +             idx = atomic_read(&pool->count);
> > +             if (idx >= PFN_POOL_SIZE)
> > +                     continue;
> > +             if (atomic_cmpxchg(&pool->count, idx, idx + 1) == idx)
> > +                     break;
> > +     } while (1);
> >
> > -     early_pfns[old_idx] = pfn;
> > +     pool->pfns[idx] = pfn;
> >   }
> >
> >   typedef void alloc_tag_add_func(unsigned long pfn);
> >   static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
> >       RCU_INITIALIZER(__alloc_tag_add_early_pfn);
> >
> > -void alloc_tag_add_early_pfn(unsigned long pfn)
> > +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
> >   {
> >       alloc_tag_add_func *alloc_tag_add;
> >
> >       if (static_key_enabled(&mem_profiling_compressed))
> >               return;
> >
> > +     /* Skip allocations for the tracking list itself to avoid recursion. */
> > +     if (gfp_flags & __GFP_NO_CODETAG)
> > +             return;
> > +
> >       rcu_read_lock();
> >       alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
> >       if (alloc_tag_add)
> > @@ -820,7 +852,9 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
> >
> >   static void __init clear_early_alloc_pfn_tag_refs(void)
> >   {
> > -     unsigned int i;
> > +     struct pfn_pool *pool, *next;
> > +     struct page *page;
> > +     int i;
> >
> >       if (static_key_enabled(&mem_profiling_compressed))
> >               return;
> > @@ -829,37 +863,45 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
> >       /* Make sure we are not racing with __alloc_tag_add_early_pfn() */
> >       synchronize_rcu();
> >
> > -     for (i = 0; i < atomic_read(&early_pfn_count); i++) {
> > -             unsigned long pfn = early_pfns[i];
> > -
> > -             if (pfn_valid(pfn)) {
> > -                     struct page *page = pfn_to_page(pfn);
> > -                     union pgtag_ref_handle handle;
> > -                     union codetag_ref ref;
> > -
> > -                     if (get_page_tag_ref(page, &ref, &handle)) {
> > -                             /*
> > -                              * An early-allocated page could be freed and reallocated
> > -                              * after its page_ext is initialized but before we clear it.
> > -                              * In that case, it already has a valid tag set.
> > -                              * We should not overwrite that valid tag with CODETAG_EMPTY.
> > -                              *
> > -                              * Note: there is still a small race window between checking
> > -                              * ref.ct and calling set_codetag_empty(). We accept this
> > -                              * race as it's unlikely and the extra complexity of atomic
> > -                              * cmpxchg is not worth it for this debug-only code path.
> > -                              */
> > -                             if (ref.ct) {
> > +     for (pool = current_pfn_pool; pool; pool = next) {
> > +             int nr_pfns = atomic_read(&pool->count);
> > +
> > +             for (i = 0; i < nr_pfns; i++) {
> > +                     unsigned long pfn = pool->pfns[i];
> > +
> > +                     if (pfn_valid(pfn)) {
> > +                             union pgtag_ref_handle handle;
> > +                             union codetag_ref ref;
> > +
> > +                             if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
> > +                                     /*
> > +                                      * An early-allocated page could be freed and reallocated
> > +                                      * after its page_ext is initialized but before we clear it.
> > +                                      * In that case, it already has a valid tag set.
> > +                                      * We should not overwrite that valid tag
> > +                                      * with CODETAG_EMPTY.
> > +                                      *
> > +                                      * Note: there is still a small race window between checking
> > +                                      * ref.ct and calling set_codetag_empty(). We accept this
> > +                                      * race as it's unlikely and the extra complexity of atomic
> > +                                      * cmpxchg is not worth it for this debug-only code path.
> > +                                      */
> > +                                     if (ref.ct) {
> > +                                             put_page_tag_ref(handle);
> > +                                             continue;
> > +                                     }
> > +
> > +                                     set_codetag_empty(&ref);
> > +                                     update_page_tag_ref(handle, &ref);
> >                                       put_page_tag_ref(handle);
> > -                                     continue;
> >                               }
> > -
> > -                             set_codetag_empty(&ref);
> > -                             update_page_tag_ref(handle, &ref);
> > -                             put_page_tag_ref(handle);
> >                       }
> >               }
> >
> > +             next = pool->next;
> > +             page = virt_to_page(pool);
> > +             clear_page_tag_ref(page);
> > +             __free_page(page);
> >       }
> >   }
> >   #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index d49c254174da..50b2bc8f42d9 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -1240,7 +1240,7 @@ void __clear_page_tag_ref(struct page *page)
> >   /* Should be called only if mem_alloc_profiling_enabled() */
> >   static noinline
> >   void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> > -                    unsigned int nr)
> > +                    unsigned int nr, gfp_t gfp_flags)
> >   {
> >       union pgtag_ref_handle handle;
> >       union codetag_ref ref;
> > @@ -1254,17 +1254,17 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> >                * page_ext is not available yet, record the pfn so we can
> >                * clear the tag ref later when page_ext is initialized.
> >                */
> > -             alloc_tag_add_early_pfn(page_to_pfn(page));
> > +             alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
> >               if (task->alloc_tag)
> >                       alloc_tag_set_inaccurate(task->alloc_tag);
> >       }
> >   }
> >
> >   static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> > -                                unsigned int nr)
> > +                                unsigned int nr, gfp_t gfp_flags)
> >   {
> >       if (mem_alloc_profiling_enabled())
> > -             __pgalloc_tag_add(page, task, nr);
> > +             __pgalloc_tag_add(page, task, nr, gfp_flags);
> >   }
> >
> >   /* Should be called only if mem_alloc_profiling_enabled() */
> > @@ -1297,7 +1297,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
> >   #else /* CONFIG_MEM_ALLOC_PROFILING */
> >
> >   static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> > -                                unsigned int nr) {}
> > +                                unsigned int nr, gfp_t gfp_flags) {}
> >   static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
> >   static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
> >
> > @@ -1852,7 +1852,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
> >
> >       set_page_owner(page, order, gfp_flags);
> >       page_table_check_alloc(page, order);
> > -     pgalloc_tag_add(page, current, 1 << order);
> > +     pgalloc_tag_add(page, current, 1 << order, gfp_flags);
> >   }
> >
> >   static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-06-04 23:56 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-04  2:40 [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list Hao Ge
2026-06-04  9:59 ` Hao Ge
2026-06-04 23:56   ` Suren Baghdasaryan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox