From: Muchun Song <songmuchun@bytedance.com>
To: Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@kernel.org>,
Muchun Song <muchun.song@linux.dev>,
Oscar Salvador <osalvador@suse.de>,
Michael Ellerman <mpe@ellerman.id.au>,
Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Vlastimil Babka <vbabka@kernel.org>,
Mike Rapoport <rppt@kernel.org>,
Suren Baghdasaryan <surenb@google.com>,
Michal Hocko <mhocko@suse.com>,
Nicholas Piggin <npiggin@gmail.com>,
Christophe Leroy <chleroy@kernel.org>,
Ackerley Tng <ackerleytng@google.com>,
Frank van der Linden <fvdl@google.com>,
aneesh.kumar@linux.ibm.com, joao.m.martins@oracle.com,
linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,
linux-kernel@vger.kernel.org,
Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH v2 37/69] mm/sparse-vmemmap: Factor out shared vmemmap page allocation
Date: Wed, 13 May 2026 21:05:05 +0800 [thread overview]
Message-ID: <20260513130542.35604-38-songmuchun@bytedance.com> (raw)
In-Reply-To: <20260513130542.35604-1-songmuchun@bytedance.com>
HugeTLB and sparse-vmemmap each have their own helper to allocate the
shared tail page used by vmemmap optimization.
Factor that logic into a common vmemmap_shared_tail_page() helper in
sparse-vmemmap.c. It allocates the page through
vmemmap_alloc_block_zero(), initializes the tail struct pages, and uses
cmpxchg() to install the per-zone shared page.
This removes duplicate allocation logic while still handling both the
early boot and runtime paths through the same helper.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/mm.h | 1 +
mm/hugetlb_vmemmap.c | 28 +-----------------
mm/sparse-vmemmap.c | 67 ++++++++++++++++++--------------------------
3 files changed, 29 insertions(+), 67 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fef39be8acd2..5281f073230c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4866,6 +4866,7 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node,
void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
unsigned long headsize);
void vmemmap_populate_print_last(void);
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone);
#ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 66362e553870..d24143dd6051 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -499,32 +499,6 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
return vmemmap_should_optimize(h);
}
-static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
-{
- const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
- struct page *tail, *p;
- int node = zone_to_nid(zone);
-
- tail = READ_ONCE(zone->vmemmap_tails[idx]);
- if (likely(tail))
- return tail;
-
- tail = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
- if (!tail)
- return NULL;
-
- p = page_to_virt(tail);
- for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
- init_compound_tail(p + i, NULL, order, zone);
-
- if (cmpxchg(&zone->vmemmap_tails[idx], NULL, tail)) {
- __free_page(tail);
- tail = READ_ONCE(zone->vmemmap_tails[idx]);
- }
-
- return tail;
-}
-
static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
struct folio *folio,
struct list_head *vmemmap_pages,
@@ -541,7 +515,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
return ret;
nid = folio_nid(folio);
- vmemmap_tail = vmemmap_get_tail(h->order, folio_zone(folio));
+ vmemmap_tail = vmemmap_shared_tail_page(h->order, folio_zone(folio));
if (!vmemmap_tail)
return -ENOMEM;
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index dde4486195ad..53a341fcde74 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -34,27 +34,13 @@
#include "internal.h"
-/*
- * Allocate a block of memory to be used to back the virtual memory map
- * or to back the page tables that are used to create the mapping.
- * Uses the main allocators if they are available, else bootmem.
- */
-
-static void * __ref __earlyonly_bootmem_alloc(int node,
- unsigned long size,
- unsigned long align,
- unsigned long goal)
-{
- return memmap_alloc(size, align, goal, node, false);
-}
-
-void * __meminit vmemmap_alloc_block(unsigned long size, int node)
+void __ref *vmemmap_alloc_block(unsigned long size, int node)
{
/* If the main allocator is up use that, fallback to bootmem. */
if (slab_is_available()) {
gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
int order = get_order(size);
- static bool warned __meminitdata;
+ static bool warned;
struct page *page;
page = alloc_pages_node(node, gfp_mask, order);
@@ -68,8 +54,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
}
return NULL;
} else
- return __earlyonly_bootmem_alloc(node, size, size,
- __pa(MAX_DMA_ADDRESS));
+ return memmap_alloc(size, size, __pa(MAX_DMA_ADDRESS), node, false);
}
static void * __meminit altmap_alloc_block_buf(unsigned long size,
@@ -138,8 +123,6 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
start, end - 1);
}
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone);
-
static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
struct vmem_altmap *altmap,
unsigned long ptpfn)
@@ -158,7 +141,7 @@ static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, in
if (WARN_ON_ONCE(!zone))
return NULL;
- page = vmemmap_get_tail(section_order(ms), zone);
+ page = vmemmap_shared_tail_page(section_order(ms), zone);
if (!page)
return NULL;
ptpfn = page_to_pfn(page);
@@ -190,7 +173,7 @@ static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, in
return pte;
}
-static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
+static void *vmemmap_alloc_block_zero(unsigned long size, int node)
{
void *p = vmemmap_alloc_block(size, node);
@@ -329,32 +312,36 @@ void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end,
}
}
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
+struct page __ref *vmemmap_shared_tail_page(unsigned int order, struct zone *zone)
{
- struct page *p, *tail;
- unsigned int idx;
- int node = zone_to_nid(zone);
+ void *addr;
+ struct page *page;
+ const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
- if (WARN_ON_ONCE(order < OPTIMIZABLE_FOLIO_MIN_ORDER))
- return NULL;
- if (WARN_ON_ONCE(order > MAX_FOLIO_ORDER))
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(zone->vmemmap_tails)))
return NULL;
- idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
- tail = zone->vmemmap_tails[idx];
- if (tail)
- return tail;
+ page = READ_ONCE(zone->vmemmap_tails[idx]);
+ if (likely(page))
+ return page;
- p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
- if (!p)
+ addr = vmemmap_alloc_block_zero(PAGE_SIZE, zone_to_nid(zone));
+ if (!addr)
return NULL;
- for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
- init_compound_tail(p + i, NULL, order, zone);
- tail = virt_to_page(p);
- zone->vmemmap_tails[idx] = tail;
+ for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
+ init_compound_tail((struct page *)addr + i, NULL, order, zone);
+
+ page = virt_to_page(addr);
+ if (cmpxchg(&zone->vmemmap_tails[idx], NULL, page) != NULL) {
+ if (slab_is_available())
+ __free_page(page);
+ else
+ memblock_free(page_to_virt(page), PAGE_SIZE);
+ page = READ_ONCE(zone->vmemmap_tails[idx]);
+ }
- return tail;
+ return page;
}
void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
--
2.54.0
next prev parent reply other threads:[~2026-05-13 13:11 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-13 13:04 [PATCH v2 00/69] mm: Generalize HVO for HugeTLB and device DAX Muchun Song
2026-05-13 13:04 ` [PATCH v2 01/69] mm/hugetlb: Fix boot panic with CONFIG_DEBUG_VM and HVO bootmem pages Muchun Song
2026-05-13 13:04 ` [PATCH v2 02/69] mm/hugetlb_vmemmap: Fix __hugetlb_vmemmap_optimize_folios() Muchun Song
2026-05-13 13:04 ` [PATCH v2 03/69] powerpc/mm: Fix wrong addr_pfn tracking in compound vmemmap population Muchun Song
2026-05-13 13:04 ` [PATCH v2 04/69] mm/hugetlb: Initialize gigantic bootmem hugepage struct pages earlier Muchun Song
2026-05-13 13:04 ` [PATCH v2 05/69] mm/mm_init: Simplify deferred_free_pages() migratetype init Muchun Song
2026-05-13 13:04 ` [PATCH v2 06/69] mm/sparse: Panic on memmap and usemap allocation failure Muchun Song
2026-05-13 13:04 ` [PATCH v2 07/69] mm/sparse: Move subsection_map_init() into sparse_init() Muchun Song
2026-05-13 13:04 ` [PATCH v2 08/69] mm/mm_init: Defer sparse_init() until after zone initialization Muchun Song
2026-05-13 13:04 ` [PATCH v2 09/69] mm/mm_init: Defer hugetlb reservation " Muchun Song
2026-05-13 13:04 ` [PATCH v2 10/69] mm/mm_init: Remove set_pageblock_order() call from sparse_init() Muchun Song
2026-05-13 13:04 ` [PATCH v2 11/69] mm/sparse: Move sparse_vmemmap_init_nid_late() into sparse_init_nid() Muchun Song
2026-05-13 13:04 ` [PATCH v2 12/69] mm/hugetlb_cma: Validate hugetlb CMA range by zone at reserve time Muchun Song
2026-05-13 13:04 ` [PATCH v2 13/69] mm/hugetlb: Refactor early boot gigantic hugepage allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 14/69] mm/hugetlb: Free cross-zone bootmem gigantic pages after allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 15/69] mm/hugetlb_vmemmap: Move bootmem HVO setup to early init Muchun Song
2026-05-13 13:04 ` [PATCH v2 16/69] mm/hugetlb: Remove obsolete bootmem cross-zone checks Muchun Song
2026-05-13 13:04 ` [PATCH v2 17/69] mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late() Muchun Song
2026-05-13 13:04 ` [PATCH v2 18/69] mm/hugetlb: Remove unused bootmem cma field Muchun Song
2026-05-13 13:04 ` [PATCH v2 19/69] mm/mm_init: Make __init_page_from_nid() static Muchun Song
2026-05-13 13:04 ` [PATCH v2 20/69] mm/sparse-vmemmap: Drop VMEMMAP_POPULATE_PAGEREF Muchun Song
2026-05-13 13:04 ` [PATCH v2 21/69] mm: Rename vmemmap optimization macros around folio semantics Muchun Song
2026-05-13 13:04 ` [PATCH v2 22/69] mm/sparse: Drop power-of-2 size requirement for struct mem_section Muchun Song
2026-05-13 13:04 ` [PATCH v2 23/69] mm/sparse-vmemmap: track compound page order in " Muchun Song
2026-05-13 13:04 ` [PATCH v2 24/69] mm/mm_init: Skip initializing shared vmemmap tail pages Muchun Song
2026-05-13 13:04 ` [PATCH v2 25/69] mm/sparse-vmemmap: Initialize shared tail vmemmap pages on allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 26/69] mm/sparse-vmemmap: Support section-based vmemmap accounting Muchun Song
2026-05-13 13:04 ` [PATCH v2 27/69] mm/sparse-vmemmap: Support section-based vmemmap optimization Muchun Song
2026-05-13 13:04 ` [PATCH v2 28/69] mm/hugetlb: Use generic vmemmap optimization macros Muchun Song
2026-05-13 13:04 ` [PATCH v2 29/69] mm/sparse: Mark memblocks present earlier Muchun Song
2026-05-13 13:04 ` [PATCH v2 30/69] mm/hugetlb: Switch HugeTLB to section-based vmemmap optimization Muchun Song
2026-05-13 13:04 ` [PATCH v2 31/69] mm/sparse: Remove section_map_size() Muchun Song
2026-05-13 13:05 ` [PATCH v2 32/69] mm/mm_init: Factor out pfn_to_zone() as a shared helper Muchun Song
2026-05-13 13:05 ` [PATCH v2 33/69] mm/sparse: Remove SPARSEMEM_VMEMMAP_PREINIT Muchun Song
2026-05-13 13:05 ` [PATCH v2 34/69] mm/sparse: Inline usemap allocation into sparse_init_nid() Muchun Song
2026-05-13 13:05 ` [PATCH v2 35/69] mm/hugetlb: Remove HUGE_BOOTMEM_HVO Muchun Song
2026-05-13 13:05 ` [PATCH v2 36/69] mm/hugetlb: Remove HUGE_BOOTMEM_CMA Muchun Song
2026-05-13 13:05 ` Muchun Song [this message]
2026-05-13 13:05 ` [PATCH v2 38/69] mm/sparse-vmemmap: Introduce CONFIG_SPARSEMEM_VMEMMAP_OPTIMIZATION Muchun Song
2026-05-13 13:05 ` [PATCH v2 39/69] mm/sparse-vmemmap: Switch DAX to vmemmap_shared_tail_page() Muchun Song
2026-05-13 13:05 ` [PATCH v2 40/69] powerpc/mm: " Muchun Song
2026-05-13 13:05 ` [PATCH v2 41/69] mm/sparse-vmemmap: Drop the extra tail page from DAX reservation Muchun Song
2026-05-13 13:05 ` [PATCH v2 42/69] mm/sparse-vmemmap: Switch DAX to section-based vmemmap optimization Muchun Song
2026-05-13 13:05 ` [PATCH v2 43/69] mm/sparse-vmemmap: Unify DAX and HugeTLB population paths Muchun Song
2026-05-13 13:05 ` [PATCH v2 44/69] mm/sparse-vmemmap: Remove the unused ptpfn argument Muchun Song
2026-05-13 13:05 ` [PATCH v2 45/69] powerpc/mm: Make vmemmap_populate_compound_pages() static Muchun Song
2026-05-13 13:05 ` [PATCH v2 46/69] mm/sparse-vmemmap: Map shared vmemmap tail pages read-only Muchun Song
2026-05-13 13:20 ` [PATCH v2 47/69] powerpc/mm: " Muchun Song
2026-05-13 13:20 ` [PATCH v2 48/69] mm/sparse-vmemmap: Inline vmemmap_populate_address() into its caller Muchun Song
2026-05-13 13:20 ` [PATCH v2 49/69] mm/hugetlb_vmemmap: Remove vmemmap_wrprotect_hvo() Muchun Song
2026-05-13 13:20 ` [PATCH v2 50/69] mm/sparse: Simplify section_nr_vmemmap_pages() Muchun Song
2026-05-13 13:20 ` [PATCH v2 51/69] mm/sparse-vmemmap: Introduce vmemmap_nr_struct_pages() Muchun Song
2026-05-13 13:20 ` [PATCH v2 52/69] powerpc/mm: Drop powerpc vmemmap_can_optimize() Muchun Song
2026-05-13 13:20 ` [PATCH v2 53/69] mm/sparse-vmemmap: Drop vmemmap_can_optimize() Muchun Song
2026-05-13 13:20 ` [PATCH v2 54/69] mm/sparse-vmemmap: Drop @pgmap from vmemmap population APIs Muchun Song
2026-05-13 13:20 ` [PATCH v2 55/69] mm/sparse: Decouple section activation from ZONE_DEVICE Muchun Song
2026-05-13 13:20 ` [PATCH v2 56/69] mm: Redefine HVO as Hugepage Vmemmap Optimization Muchun Song
2026-05-13 13:20 ` [PATCH v2 57/69] mm/sparse-vmemmap: Consolidate HVO enable checks Muchun Song
2026-05-13 13:20 ` [PATCH v2 58/69] mm/hugetlb: Make HVO optimizable checks depend on generic logic Muchun Song
2026-05-13 13:20 ` [PATCH v2 59/69] mm/sparse-vmemmap: Localize init_compound_tail() Muchun Song
2026-05-13 13:20 ` [PATCH v2 60/69] mm/mm_init: Check zone consistency on optimized vmemmap sections Muchun Song
2026-05-13 13:20 ` [PATCH v2 61/69] mm/hugetlb: Drop boot-time HVO handling for gigantic folios Muchun Song
2026-05-13 13:20 ` [PATCH v2 62/69] mm/hugetlb: Simplify hugetlb_folio_init_vmemmap() Muchun Song
2026-05-13 13:20 ` [PATCH v2 63/69] mm/hugetlb: Initialize the full bootmem hugepage in hugetlb code Muchun Song
2026-05-13 13:20 ` [PATCH v2 64/69] mm/mm_init: Factor out compound page initialization Muchun Song
2026-05-13 13:20 ` [PATCH v2 65/69] mm/mm_init: Make __init_single_page() static Muchun Song
2026-05-13 13:20 ` [PATCH v2 66/69] mm/cma: Move CMA pageblock initialization into cma_activate_area() Muchun Song
2026-05-13 13:20 ` [PATCH v2 67/69] mm/cma: Move init_cma_pageblock() into cma.c Muchun Song
2026-05-13 13:20 ` [PATCH v2 68/69] mm/mm_init: Initialize pageblock migratetype in memmap init helpers Muchun Song
2026-05-13 13:20 ` [PATCH v2 69/69] Documentation/mm: Rewrite vmemmap_dedup.rst for unified HVO Muchun Song
2026-05-13 17:46 ` [PATCH v2 00/69] mm: Generalize HVO for HugeTLB and device DAX Andrew Morton
2026-05-13 18:26 ` Oscar Salvador
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260513130542.35604-38-songmuchun@bytedance.com \
--to=songmuchun@bytedance.com \
--cc=Liam.Howlett@oracle.com \
--cc=ackerleytng@google.com \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@linux.ibm.com \
--cc=chleroy@kernel.org \
--cc=david@kernel.org \
--cc=fvdl@google.com \
--cc=joao.m.martins@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=ljs@kernel.org \
--cc=maddy@linux.ibm.com \
--cc=mhocko@suse.com \
--cc=mpe@ellerman.id.au \
--cc=muchun.song@linux.dev \
--cc=npiggin@gmail.com \
--cc=osalvador@suse.de \
--cc=rppt@kernel.org \
--cc=surenb@google.com \
--cc=vbabka@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox