* [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
@ 2026-06-04 2:40 Hao Ge
2026-06-04 9:59 ` Hao Ge
0 siblings, 1 reply; 3+ messages in thread
From: Hao Ge @ 2026-06-04 2:40 UTC (permalink / raw)
To: Suren Baghdasaryan, Kent Overstreet, Andrew Morton; +Cc: linux-kernel, Hao Ge
Pages allocated before page_ext is available have their codetag left
uninitialized. Track these early PFNs and clear their codetag in
clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
warnings when they are freed later.
Currently a fixed-size array of 8192 entries is used, with a warning if
the limit is exceeded. However, the number of early allocations depends
on the number of CPUs and can be larger than 8192.
Replace the fixed-size array with a dynamically allocated linked list
of pfn_pool structs. Each node is allocated via alloc_page() and mapped
to a pfn_pool containing a next pointer, an atomic slot counter, and a
PFN array that fills the remainder of the page.
The tracking pages themselves are allocated via alloc_page(), which
would trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and
recurse indefinitely. Introduce __GFP_NO_CODETAG (reuses the
%__GFP_NO_OBJ_EXT bit) and pass gfp_flags through pgalloc_tag_add()
so that the early path can skip recording allocations that carry this flag.
Suggested-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Hao Ge <hao.ge@linux.dev>
---
include/linux/alloc_tag.h | 4 +-
lib/alloc_tag.c | 140 +++++++++++++++++++++++++-------------
mm/page_alloc.c | 12 ++--
3 files changed, 99 insertions(+), 57 deletions(-)
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 02de2ede560f..068ba2e77c5d 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -163,11 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
{
WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
}
-void alloc_tag_add_early_pfn(unsigned long pfn);
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
#else
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
-static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
+static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
#endif
/* Caller should verify both ref and tag to be valid */
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index ed1bdcf1f8ab..f2f574bcf383 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -767,50 +767,82 @@ static __init bool need_page_alloc_tagging(void)
* their codetag uninitialized. Track these early PFNs so we can clear
* their codetag refs later to avoid warnings when they are freed.
*
- * Early allocations include:
- * - Base allocations independent of CPU count
- * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
- * such as trace ring buffers, scheduler per-cpu data)
- *
- * For simplicity, we fix the size to 8192.
- * If insufficient, a warning will be triggered to alert the user.
+ * Each page is cast to a pfn_pool: the first few bytes hold metadata
+ * (next pointer and slot count), the remainder stores PFNs.
+ */
+struct pfn_pool {
+ struct pfn_pool *next;
+ atomic_t count;
+ unsigned long pfns[];
+};
+
+#define PFN_POOL_SIZE ((PAGE_SIZE - offsetof(struct pfn_pool, pfns)) / \
+ sizeof(unsigned long))
+
+/*
+ * Skip early PFN recording for a page allocation. Reuses the
+ * %__GFP_NO_OBJ_EXT bit. Used by __alloc_tag_add_early_pfn() to avoid
+ * recursion when allocating pages for the early PFN tracking list
+ * itself.
*
- * TODO: Replace fixed-size array with dynamic allocation using
- * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
+ * Codetags of the pages allocated with __GFP_NO_CODETAG should be
+ * cleared (via clear_page_tag_ref()) before freeing the pages to prevent
+ * alloc_tag_sub_check() from triggering a warning.
*/
-#define EARLY_ALLOC_PFN_MAX 8192
+#define __GFP_NO_CODETAG __GFP_NO_OBJ_EXT
-static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
-static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
+static struct pfn_pool *current_pfn_pool __initdata;
static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
{
- int old_idx, new_idx;
+ struct pfn_pool *pool;
+ int idx;
do {
- old_idx = atomic_read(&early_pfn_count);
- if (old_idx >= EARLY_ALLOC_PFN_MAX) {
- pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
- EARLY_ALLOC_PFN_MAX);
- return;
+ pool = READ_ONCE(current_pfn_pool);
+ if (!pool || atomic_read(&pool->count) >= PFN_POOL_SIZE) {
+ struct page *new_page = alloc_page(__GFP_HIGH | __GFP_NO_CODETAG);
+ struct pfn_pool *new;
+
+ if (!new_page) {
+ pr_warn_once("early PFN tracking page allocation failed\n");
+ return;
+ }
+ new = page_address(new_page);
+ new->next = pool;
+ atomic_set(&new->count, 0);
+ if (cmpxchg(¤t_pfn_pool, pool, new) != pool) {
+ clear_page_tag_ref(new_page);
+ __free_page(new_page);
+ continue;
+ }
+ pool = new;
}
- new_idx = old_idx + 1;
- } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
+ idx = atomic_read(&pool->count);
+ if (idx >= PFN_POOL_SIZE)
+ continue;
+ if (atomic_cmpxchg(&pool->count, idx, idx + 1) == idx)
+ break;
+ } while (1);
- early_pfns[old_idx] = pfn;
+ pool->pfns[idx] = pfn;
}
typedef void alloc_tag_add_func(unsigned long pfn);
static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
RCU_INITIALIZER(__alloc_tag_add_early_pfn);
-void alloc_tag_add_early_pfn(unsigned long pfn)
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
{
alloc_tag_add_func *alloc_tag_add;
if (static_key_enabled(&mem_profiling_compressed))
return;
+ /* Skip allocations for the tracking list itself to avoid recursion. */
+ if (gfp_flags & __GFP_NO_CODETAG)
+ return;
+
rcu_read_lock();
alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
if (alloc_tag_add)
@@ -820,7 +852,9 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
static void __init clear_early_alloc_pfn_tag_refs(void)
{
- unsigned int i;
+ struct pfn_pool *pool, *next;
+ struct page *page;
+ int i;
if (static_key_enabled(&mem_profiling_compressed))
return;
@@ -829,37 +863,45 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
/* Make sure we are not racing with __alloc_tag_add_early_pfn() */
synchronize_rcu();
- for (i = 0; i < atomic_read(&early_pfn_count); i++) {
- unsigned long pfn = early_pfns[i];
-
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- union pgtag_ref_handle handle;
- union codetag_ref ref;
-
- if (get_page_tag_ref(page, &ref, &handle)) {
- /*
- * An early-allocated page could be freed and reallocated
- * after its page_ext is initialized but before we clear it.
- * In that case, it already has a valid tag set.
- * We should not overwrite that valid tag with CODETAG_EMPTY.
- *
- * Note: there is still a small race window between checking
- * ref.ct and calling set_codetag_empty(). We accept this
- * race as it's unlikely and the extra complexity of atomic
- * cmpxchg is not worth it for this debug-only code path.
- */
- if (ref.ct) {
+ for (pool = current_pfn_pool; pool; pool = next) {
+ int nr_pfns = atomic_read(&pool->count);
+
+ for (i = 0; i < nr_pfns; i++) {
+ unsigned long pfn = pool->pfns[i];
+
+ if (pfn_valid(pfn)) {
+ union pgtag_ref_handle handle;
+ union codetag_ref ref;
+
+ if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
+ /*
+ * An early-allocated page could be freed and reallocated
+ * after its page_ext is initialized but before we clear it.
+ * In that case, it already has a valid tag set.
+ * We should not overwrite that valid tag
+ * with CODETAG_EMPTY.
+ *
+ * Note: there is still a small race window between checking
+ * ref.ct and calling set_codetag_empty(). We accept this
+ * race as it's unlikely and the extra complexity of atomic
+ * cmpxchg is not worth it for this debug-only code path.
+ */
+ if (ref.ct) {
+ put_page_tag_ref(handle);
+ continue;
+ }
+
+ set_codetag_empty(&ref);
+ update_page_tag_ref(handle, &ref);
put_page_tag_ref(handle);
- continue;
}
-
- set_codetag_empty(&ref);
- update_page_tag_ref(handle, &ref);
- put_page_tag_ref(handle);
}
}
+ next = pool->next;
+ page = virt_to_page(pool);
+ clear_page_tag_ref(page);
+ __free_page(page);
}
}
#else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d49c254174da..50b2bc8f42d9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1240,7 +1240,7 @@ void __clear_page_tag_ref(struct page *page)
/* Should be called only if mem_alloc_profiling_enabled() */
static noinline
void __pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr)
+ unsigned int nr, gfp_t gfp_flags)
{
union pgtag_ref_handle handle;
union codetag_ref ref;
@@ -1254,17 +1254,17 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
* page_ext is not available yet, record the pfn so we can
* clear the tag ref later when page_ext is initialized.
*/
- alloc_tag_add_early_pfn(page_to_pfn(page));
+ alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
if (task->alloc_tag)
alloc_tag_set_inaccurate(task->alloc_tag);
}
}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr)
+ unsigned int nr, gfp_t gfp_flags)
{
if (mem_alloc_profiling_enabled())
- __pgalloc_tag_add(page, task, nr);
+ __pgalloc_tag_add(page, task, nr, gfp_flags);
}
/* Should be called only if mem_alloc_profiling_enabled() */
@@ -1297,7 +1297,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
#else /* CONFIG_MEM_ALLOC_PROFILING */
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr) {}
+ unsigned int nr, gfp_t gfp_flags) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
@@ -1852,7 +1852,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_owner(page, order, gfp_flags);
page_table_check_alloc(page, order);
- pgalloc_tag_add(page, current, 1 << order);
+ pgalloc_tag_add(page, current, 1 << order, gfp_flags);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
2026-06-04 2:40 [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list Hao Ge
@ 2026-06-04 9:59 ` Hao Ge
2026-06-04 23:56 ` Suren Baghdasaryan
0 siblings, 1 reply; 3+ messages in thread
From: Hao Ge @ 2026-06-04 9:59 UTC (permalink / raw)
To: Suren Baghdasaryan, Kent Overstreet, Andrew Morton
Cc: linux-kernel, Linux Memory Management List
Add cc:linux-mm@kvack.org and lost part of the changelog
because of local Git environment glitch.
Sorry for this.
On 2026/6/4 10:40, Hao Ge wrote:
> Pages allocated before page_ext is available have their codetag left
> uninitialized. Track these early PFNs and clear their codetag in
> clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
> warnings when they are freed later.
>
> Currently a fixed-size array of 8192 entries is used, with a warning if
> the limit is exceeded. However, the number of early allocations depends
> on the number of CPUs and can be larger than 8192.
>
> Replace the fixed-size array with a dynamically allocated linked list
> of pfn_pool structs. Each node is allocated via alloc_page() and mapped
> to a pfn_pool containing a next pointer, an atomic slot counter, and a
> PFN array that fills the remainder of the page.
>
> The tracking pages themselves are allocated via alloc_page(), which
> would trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and
> recurse indefinitely. Introduce __GFP_NO_CODETAG (reuses the
> %__GFP_NO_OBJ_EXT bit) and pass gfp_flags through pgalloc_tag_add()
> so that the early path can skip recording allocations that carry this flag.
>
> Suggested-by: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Hao Ge <hao.ge@linux.dev>
---
v6:
- Use hardcoded __GFP_HIGH | __GFP_NO_CODETAG instead of inheriting
caller's gfp_flags for internal pfn_pool page allocation.
> ---
> include/linux/alloc_tag.h | 4 +-
> lib/alloc_tag.c | 140 +++++++++++++++++++++++++-------------
> mm/page_alloc.c | 12 ++--
> 3 files changed, 99 insertions(+), 57 deletions(-)
>
> diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
> index 02de2ede560f..068ba2e77c5d 100644
> --- a/include/linux/alloc_tag.h
> +++ b/include/linux/alloc_tag.h
> @@ -163,11 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
> {
> WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
> }
> -void alloc_tag_add_early_pfn(unsigned long pfn);
> +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
> #else
> static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
> static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
> -static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
> +static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
> #endif
>
> /* Caller should verify both ref and tag to be valid */
> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> index ed1bdcf1f8ab..f2f574bcf383 100644
> --- a/lib/alloc_tag.c
> +++ b/lib/alloc_tag.c
> @@ -767,50 +767,82 @@ static __init bool need_page_alloc_tagging(void)
> * their codetag uninitialized. Track these early PFNs so we can clear
> * their codetag refs later to avoid warnings when they are freed.
> *
> - * Early allocations include:
> - * - Base allocations independent of CPU count
> - * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
> - * such as trace ring buffers, scheduler per-cpu data)
> - *
> - * For simplicity, we fix the size to 8192.
> - * If insufficient, a warning will be triggered to alert the user.
> + * Each page is cast to a pfn_pool: the first few bytes hold metadata
> + * (next pointer and slot count), the remainder stores PFNs.
> + */
> +struct pfn_pool {
> + struct pfn_pool *next;
> + atomic_t count;
> + unsigned long pfns[];
> +};
> +
> +#define PFN_POOL_SIZE ((PAGE_SIZE - offsetof(struct pfn_pool, pfns)) / \
> + sizeof(unsigned long))
> +
> +/*
> + * Skip early PFN recording for a page allocation. Reuses the
> + * %__GFP_NO_OBJ_EXT bit. Used by __alloc_tag_add_early_pfn() to avoid
> + * recursion when allocating pages for the early PFN tracking list
> + * itself.
> *
> - * TODO: Replace fixed-size array with dynamic allocation using
> - * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
> + * Codetags of the pages allocated with __GFP_NO_CODETAG should be
> + * cleared (via clear_page_tag_ref()) before freeing the pages to prevent
> + * alloc_tag_sub_check() from triggering a warning.
> */
> -#define EARLY_ALLOC_PFN_MAX 8192
> +#define __GFP_NO_CODETAG __GFP_NO_OBJ_EXT
>
> -static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
> -static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
> +static struct pfn_pool *current_pfn_pool __initdata;
>
> static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
> {
> - int old_idx, new_idx;
> + struct pfn_pool *pool;
> + int idx;
>
> do {
> - old_idx = atomic_read(&early_pfn_count);
> - if (old_idx >= EARLY_ALLOC_PFN_MAX) {
> - pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
> - EARLY_ALLOC_PFN_MAX);
> - return;
> + pool = READ_ONCE(current_pfn_pool);
> + if (!pool || atomic_read(&pool->count) >= PFN_POOL_SIZE) {
> + struct page *new_page = alloc_page(__GFP_HIGH | __GFP_NO_CODETAG);
> + struct pfn_pool *new;
> +
> + if (!new_page) {
> + pr_warn_once("early PFN tracking page allocation failed\n");
> + return;
> + }
> + new = page_address(new_page);
> + new->next = pool;
> + atomic_set(&new->count, 0);
> + if (cmpxchg(¤t_pfn_pool, pool, new) != pool) {
> + clear_page_tag_ref(new_page);
> + __free_page(new_page);
> + continue;
> + }
> + pool = new;
> }
> - new_idx = old_idx + 1;
> - } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
> + idx = atomic_read(&pool->count);
> + if (idx >= PFN_POOL_SIZE)
> + continue;
> + if (atomic_cmpxchg(&pool->count, idx, idx + 1) == idx)
> + break;
> + } while (1);
>
> - early_pfns[old_idx] = pfn;
> + pool->pfns[idx] = pfn;
> }
>
> typedef void alloc_tag_add_func(unsigned long pfn);
> static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
> RCU_INITIALIZER(__alloc_tag_add_early_pfn);
>
> -void alloc_tag_add_early_pfn(unsigned long pfn)
> +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
> {
> alloc_tag_add_func *alloc_tag_add;
>
> if (static_key_enabled(&mem_profiling_compressed))
> return;
>
> + /* Skip allocations for the tracking list itself to avoid recursion. */
> + if (gfp_flags & __GFP_NO_CODETAG)
> + return;
> +
> rcu_read_lock();
> alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
> if (alloc_tag_add)
> @@ -820,7 +852,9 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
>
> static void __init clear_early_alloc_pfn_tag_refs(void)
> {
> - unsigned int i;
> + struct pfn_pool *pool, *next;
> + struct page *page;
> + int i;
>
> if (static_key_enabled(&mem_profiling_compressed))
> return;
> @@ -829,37 +863,45 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
> /* Make sure we are not racing with __alloc_tag_add_early_pfn() */
> synchronize_rcu();
>
> - for (i = 0; i < atomic_read(&early_pfn_count); i++) {
> - unsigned long pfn = early_pfns[i];
> -
> - if (pfn_valid(pfn)) {
> - struct page *page = pfn_to_page(pfn);
> - union pgtag_ref_handle handle;
> - union codetag_ref ref;
> -
> - if (get_page_tag_ref(page, &ref, &handle)) {
> - /*
> - * An early-allocated page could be freed and reallocated
> - * after its page_ext is initialized but before we clear it.
> - * In that case, it already has a valid tag set.
> - * We should not overwrite that valid tag with CODETAG_EMPTY.
> - *
> - * Note: there is still a small race window between checking
> - * ref.ct and calling set_codetag_empty(). We accept this
> - * race as it's unlikely and the extra complexity of atomic
> - * cmpxchg is not worth it for this debug-only code path.
> - */
> - if (ref.ct) {
> + for (pool = current_pfn_pool; pool; pool = next) {
> + int nr_pfns = atomic_read(&pool->count);
> +
> + for (i = 0; i < nr_pfns; i++) {
> + unsigned long pfn = pool->pfns[i];
> +
> + if (pfn_valid(pfn)) {
> + union pgtag_ref_handle handle;
> + union codetag_ref ref;
> +
> + if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
> + /*
> + * An early-allocated page could be freed and reallocated
> + * after its page_ext is initialized but before we clear it.
> + * In that case, it already has a valid tag set.
> + * We should not overwrite that valid tag
> + * with CODETAG_EMPTY.
> + *
> + * Note: there is still a small race window between checking
> + * ref.ct and calling set_codetag_empty(). We accept this
> + * race as it's unlikely and the extra complexity of atomic
> + * cmpxchg is not worth it for this debug-only code path.
> + */
> + if (ref.ct) {
> + put_page_tag_ref(handle);
> + continue;
> + }
> +
> + set_codetag_empty(&ref);
> + update_page_tag_ref(handle, &ref);
> put_page_tag_ref(handle);
> - continue;
> }
> -
> - set_codetag_empty(&ref);
> - update_page_tag_ref(handle, &ref);
> - put_page_tag_ref(handle);
> }
> }
>
> + next = pool->next;
> + page = virt_to_page(pool);
> + clear_page_tag_ref(page);
> + __free_page(page);
> }
> }
> #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d49c254174da..50b2bc8f42d9 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1240,7 +1240,7 @@ void __clear_page_tag_ref(struct page *page)
> /* Should be called only if mem_alloc_profiling_enabled() */
> static noinline
> void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> - unsigned int nr)
> + unsigned int nr, gfp_t gfp_flags)
> {
> union pgtag_ref_handle handle;
> union codetag_ref ref;
> @@ -1254,17 +1254,17 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> * page_ext is not available yet, record the pfn so we can
> * clear the tag ref later when page_ext is initialized.
> */
> - alloc_tag_add_early_pfn(page_to_pfn(page));
> + alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
> if (task->alloc_tag)
> alloc_tag_set_inaccurate(task->alloc_tag);
> }
> }
>
> static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> - unsigned int nr)
> + unsigned int nr, gfp_t gfp_flags)
> {
> if (mem_alloc_profiling_enabled())
> - __pgalloc_tag_add(page, task, nr);
> + __pgalloc_tag_add(page, task, nr, gfp_flags);
> }
>
> /* Should be called only if mem_alloc_profiling_enabled() */
> @@ -1297,7 +1297,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
> #else /* CONFIG_MEM_ALLOC_PROFILING */
>
> static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> - unsigned int nr) {}
> + unsigned int nr, gfp_t gfp_flags) {}
> static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
> static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
>
> @@ -1852,7 +1852,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
>
> set_page_owner(page, order, gfp_flags);
> page_table_check_alloc(page, order);
> - pgalloc_tag_add(page, current, 1 << order);
> + pgalloc_tag_add(page, current, 1 << order, gfp_flags);
> }
>
> static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
2026-06-04 9:59 ` Hao Ge
@ 2026-06-04 23:56 ` Suren Baghdasaryan
0 siblings, 0 replies; 3+ messages in thread
From: Suren Baghdasaryan @ 2026-06-04 23:56 UTC (permalink / raw)
To: Hao Ge
Cc: Kent Overstreet, Andrew Morton, linux-kernel,
Linux Memory Management List
On Thu, Jun 4, 2026 at 2:59 AM Hao Ge <hao.ge@linux.dev> wrote:
>
>
> Add cc:linux-mm@kvack.org and lost part of the changelog
>
> because of local Git environment glitch.
>
> Sorry for this.
>
>
> On 2026/6/4 10:40, Hao Ge wrote:
> > Pages allocated before page_ext is available have their codetag left
> > uninitialized. Track these early PFNs and clear their codetag in
> > clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
> > warnings when they are freed later.
> >
> > Currently a fixed-size array of 8192 entries is used, with a warning if
> > the limit is exceeded. However, the number of early allocations depends
> > on the number of CPUs and can be larger than 8192.
> >
> > Replace the fixed-size array with a dynamically allocated linked list
> > of pfn_pool structs. Each node is allocated via alloc_page() and mapped
> > to a pfn_pool containing a next pointer, an atomic slot counter, and a
> > PFN array that fills the remainder of the page.
> >
> > The tracking pages themselves are allocated via alloc_page(), which
> > would trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() and
> > recurse indefinitely. Introduce __GFP_NO_CODETAG (reuses the
> > %__GFP_NO_OBJ_EXT bit) and pass gfp_flags through pgalloc_tag_add()
> > so that the early path can skip recording allocations that carry this flag.
> >
> > Suggested-by: Suren Baghdasaryan <surenb@google.com>
> > Signed-off-by: Hao Ge <hao.ge@linux.dev>
Acked-by: Suren Baghdasaryan <surenb@google.com>
>
> ---
>
> v6:
> - Use hardcoded __GFP_HIGH | __GFP_NO_CODETAG instead of inheriting
> caller's gfp_flags for internal pfn_pool page allocation.
>
> > ---
> > include/linux/alloc_tag.h | 4 +-
> > lib/alloc_tag.c | 140 +++++++++++++++++++++++++-------------
> > mm/page_alloc.c | 12 ++--
> > 3 files changed, 99 insertions(+), 57 deletions(-)
> >
> > diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
> > index 02de2ede560f..068ba2e77c5d 100644
> > --- a/include/linux/alloc_tag.h
> > +++ b/include/linux/alloc_tag.h
> > @@ -163,11 +163,11 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
> > {
> > WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
> > }
> > -void alloc_tag_add_early_pfn(unsigned long pfn);
> > +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
> > #else
> > static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
> > static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
> > -static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
> > +static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
> > #endif
> >
> > /* Caller should verify both ref and tag to be valid */
> > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> > index ed1bdcf1f8ab..f2f574bcf383 100644
> > --- a/lib/alloc_tag.c
> > +++ b/lib/alloc_tag.c
> > @@ -767,50 +767,82 @@ static __init bool need_page_alloc_tagging(void)
> > * their codetag uninitialized. Track these early PFNs so we can clear
> > * their codetag refs later to avoid warnings when they are freed.
> > *
> > - * Early allocations include:
> > - * - Base allocations independent of CPU count
> > - * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
> > - * such as trace ring buffers, scheduler per-cpu data)
> > - *
> > - * For simplicity, we fix the size to 8192.
> > - * If insufficient, a warning will be triggered to alert the user.
> > + * Each page is cast to a pfn_pool: the first few bytes hold metadata
> > + * (next pointer and slot count), the remainder stores PFNs.
> > + */
> > +struct pfn_pool {
> > + struct pfn_pool *next;
> > + atomic_t count;
> > + unsigned long pfns[];
> > +};
> > +
> > +#define PFN_POOL_SIZE ((PAGE_SIZE - offsetof(struct pfn_pool, pfns)) / \
> > + sizeof(unsigned long))
> > +
> > +/*
> > + * Skip early PFN recording for a page allocation. Reuses the
> > + * %__GFP_NO_OBJ_EXT bit. Used by __alloc_tag_add_early_pfn() to avoid
> > + * recursion when allocating pages for the early PFN tracking list
> > + * itself.
> > *
> > - * TODO: Replace fixed-size array with dynamic allocation using
> > - * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
> > + * Codetags of the pages allocated with __GFP_NO_CODETAG should be
> > + * cleared (via clear_page_tag_ref()) before freeing the pages to prevent
> > + * alloc_tag_sub_check() from triggering a warning.
> > */
> > -#define EARLY_ALLOC_PFN_MAX 8192
> > +#define __GFP_NO_CODETAG __GFP_NO_OBJ_EXT
> >
> > -static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
> > -static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
> > +static struct pfn_pool *current_pfn_pool __initdata;
> >
> > static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
> > {
> > - int old_idx, new_idx;
> > + struct pfn_pool *pool;
> > + int idx;
> >
> > do {
> > - old_idx = atomic_read(&early_pfn_count);
> > - if (old_idx >= EARLY_ALLOC_PFN_MAX) {
> > - pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
> > - EARLY_ALLOC_PFN_MAX);
> > - return;
> > + pool = READ_ONCE(current_pfn_pool);
> > + if (!pool || atomic_read(&pool->count) >= PFN_POOL_SIZE) {
> > + struct page *new_page = alloc_page(__GFP_HIGH | __GFP_NO_CODETAG);
> > + struct pfn_pool *new;
> > +
> > + if (!new_page) {
> > + pr_warn_once("early PFN tracking page allocation failed\n");
> > + return;
> > + }
> > + new = page_address(new_page);
> > + new->next = pool;
> > + atomic_set(&new->count, 0);
> > + if (cmpxchg(¤t_pfn_pool, pool, new) != pool) {
> > + clear_page_tag_ref(new_page);
> > + __free_page(new_page);
> > + continue;
> > + }
> > + pool = new;
> > }
> > - new_idx = old_idx + 1;
> > - } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
> > + idx = atomic_read(&pool->count);
> > + if (idx >= PFN_POOL_SIZE)
> > + continue;
> > + if (atomic_cmpxchg(&pool->count, idx, idx + 1) == idx)
> > + break;
> > + } while (1);
> >
> > - early_pfns[old_idx] = pfn;
> > + pool->pfns[idx] = pfn;
> > }
> >
> > typedef void alloc_tag_add_func(unsigned long pfn);
> > static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
> > RCU_INITIALIZER(__alloc_tag_add_early_pfn);
> >
> > -void alloc_tag_add_early_pfn(unsigned long pfn)
> > +void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
> > {
> > alloc_tag_add_func *alloc_tag_add;
> >
> > if (static_key_enabled(&mem_profiling_compressed))
> > return;
> >
> > + /* Skip allocations for the tracking list itself to avoid recursion. */
> > + if (gfp_flags & __GFP_NO_CODETAG)
> > + return;
> > +
> > rcu_read_lock();
> > alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
> > if (alloc_tag_add)
> > @@ -820,7 +852,9 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
> >
> > static void __init clear_early_alloc_pfn_tag_refs(void)
> > {
> > - unsigned int i;
> > + struct pfn_pool *pool, *next;
> > + struct page *page;
> > + int i;
> >
> > if (static_key_enabled(&mem_profiling_compressed))
> > return;
> > @@ -829,37 +863,45 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
> > /* Make sure we are not racing with __alloc_tag_add_early_pfn() */
> > synchronize_rcu();
> >
> > - for (i = 0; i < atomic_read(&early_pfn_count); i++) {
> > - unsigned long pfn = early_pfns[i];
> > -
> > - if (pfn_valid(pfn)) {
> > - struct page *page = pfn_to_page(pfn);
> > - union pgtag_ref_handle handle;
> > - union codetag_ref ref;
> > -
> > - if (get_page_tag_ref(page, &ref, &handle)) {
> > - /*
> > - * An early-allocated page could be freed and reallocated
> > - * after its page_ext is initialized but before we clear it.
> > - * In that case, it already has a valid tag set.
> > - * We should not overwrite that valid tag with CODETAG_EMPTY.
> > - *
> > - * Note: there is still a small race window between checking
> > - * ref.ct and calling set_codetag_empty(). We accept this
> > - * race as it's unlikely and the extra complexity of atomic
> > - * cmpxchg is not worth it for this debug-only code path.
> > - */
> > - if (ref.ct) {
> > + for (pool = current_pfn_pool; pool; pool = next) {
> > + int nr_pfns = atomic_read(&pool->count);
> > +
> > + for (i = 0; i < nr_pfns; i++) {
> > + unsigned long pfn = pool->pfns[i];
> > +
> > + if (pfn_valid(pfn)) {
> > + union pgtag_ref_handle handle;
> > + union codetag_ref ref;
> > +
> > + if (get_page_tag_ref(pfn_to_page(pfn), &ref, &handle)) {
> > + /*
> > + * An early-allocated page could be freed and reallocated
> > + * after its page_ext is initialized but before we clear it.
> > + * In that case, it already has a valid tag set.
> > + * We should not overwrite that valid tag
> > + * with CODETAG_EMPTY.
> > + *
> > + * Note: there is still a small race window between checking
> > + * ref.ct and calling set_codetag_empty(). We accept this
> > + * race as it's unlikely and the extra complexity of atomic
> > + * cmpxchg is not worth it for this debug-only code path.
> > + */
> > + if (ref.ct) {
> > + put_page_tag_ref(handle);
> > + continue;
> > + }
> > +
> > + set_codetag_empty(&ref);
> > + update_page_tag_ref(handle, &ref);
> > put_page_tag_ref(handle);
> > - continue;
> > }
> > -
> > - set_codetag_empty(&ref);
> > - update_page_tag_ref(handle, &ref);
> > - put_page_tag_ref(handle);
> > }
> > }
> >
> > + next = pool->next;
> > + page = virt_to_page(pool);
> > + clear_page_tag_ref(page);
> > + __free_page(page);
> > }
> > }
> > #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index d49c254174da..50b2bc8f42d9 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -1240,7 +1240,7 @@ void __clear_page_tag_ref(struct page *page)
> > /* Should be called only if mem_alloc_profiling_enabled() */
> > static noinline
> > void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> > - unsigned int nr)
> > + unsigned int nr, gfp_t gfp_flags)
> > {
> > union pgtag_ref_handle handle;
> > union codetag_ref ref;
> > @@ -1254,17 +1254,17 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
> > * page_ext is not available yet, record the pfn so we can
> > * clear the tag ref later when page_ext is initialized.
> > */
> > - alloc_tag_add_early_pfn(page_to_pfn(page));
> > + alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
> > if (task->alloc_tag)
> > alloc_tag_set_inaccurate(task->alloc_tag);
> > }
> > }
> >
> > static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> > - unsigned int nr)
> > + unsigned int nr, gfp_t gfp_flags)
> > {
> > if (mem_alloc_profiling_enabled())
> > - __pgalloc_tag_add(page, task, nr);
> > + __pgalloc_tag_add(page, task, nr, gfp_flags);
> > }
> >
> > /* Should be called only if mem_alloc_profiling_enabled() */
> > @@ -1297,7 +1297,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
> > #else /* CONFIG_MEM_ALLOC_PROFILING */
> >
> > static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
> > - unsigned int nr) {}
> > + unsigned int nr, gfp_t gfp_flags) {}
> > static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
> > static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
> >
> > @@ -1852,7 +1852,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
> >
> > set_page_owner(page, order, gfp_flags);
> > page_table_check_alloc(page, order);
> > - pgalloc_tag_add(page, current, 1 << order);
> > + pgalloc_tag_add(page, current, 1 << order, gfp_flags);
> > }
> >
> > static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-06-04 23:56 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-04 2:40 [PATCH v6] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list Hao Ge
2026-06-04 9:59 ` Hao Ge
2026-06-04 23:56 ` Suren Baghdasaryan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox