From: Muchun Song <songmuchun@bytedance.com>
To: Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@kernel.org>,
Muchun Song <muchun.song@linux.dev>,
Oscar Salvador <osalvador@suse.de>,
Michael Ellerman <mpe@ellerman.id.au>,
Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Vlastimil Babka <vbabka@kernel.org>,
Mike Rapoport <rppt@kernel.org>,
Suren Baghdasaryan <surenb@google.com>,
Michal Hocko <mhocko@suse.com>,
Nicholas Piggin <npiggin@gmail.com>,
Christophe Leroy <chleroy@kernel.org>,
Ackerley Tng <ackerleytng@google.com>,
Frank van der Linden <fvdl@google.com>,
aneesh.kumar@linux.ibm.com, joao.m.martins@oracle.com,
linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,
linux-kernel@vger.kernel.org,
Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH v2 13/69] mm/hugetlb: Refactor early boot gigantic hugepage allocation
Date: Wed, 13 May 2026 21:04:41 +0800 [thread overview]
Message-ID: <20260513130542.35604-14-songmuchun@bytedance.com> (raw)
In-Reply-To: <20260513130542.35604-1-songmuchun@bytedance.com>
The early boot gigantic hugepage allocation helpers currently mix
allocation with huge_bootmem_page setup, and leave part of the
initialization flow in architecture code.
Refactor the interface to return the allocated huge page pointer and
move the huge_bootmem_page setup into the generic hugetlb code. This
makes the architecture-specific paths focus only on finding memory,
while the common code handles node placement and early page metadata
setup in one place.
This also lets powerpc benefit from memblock_reserved_mark_noinit(),
which it did not enable before.
In addition, upcoming cross-zone validation for boot-time gigantic
hugetlb reservation is common logic. With this refactoring, that logic
can stay in the generic code instead of being duplicated in
architecture-specific paths.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
arch/powerpc/mm/hugetlbpage.c | 11 ++--
include/linux/hugetlb.h | 8 +--
mm/hugetlb.c | 95 ++++++++++++++---------------------
mm/hugetlb_cma.c | 12 ++---
mm/hugetlb_cma.h | 4 +-
5 files changed, 52 insertions(+), 78 deletions(-)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 558fafb82b8a..ff8c5ec831bb 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -104,17 +104,14 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p
}
}
-static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
+static __init void *pseries_alloc_bootmem_huge_page(struct hstate *hstate)
{
struct huge_bootmem_page *m;
if (nr_gpages == 0)
- return 0;
+ return NULL;
m = phys_to_virt(gpage_freearray[--nr_gpages]);
gpage_freearray[nr_gpages] = 0;
- list_add(&m->list, &huge_boot_pages[0]);
- m->hstate = hstate;
- m->flags = 0;
- return 1;
+ return m;
}
bool __init hugetlb_node_alloc_supported(void)
@@ -124,7 +121,7 @@ bool __init hugetlb_node_alloc_supported(void)
#endif
-int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid)
{
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 52a2c30f866c..9a65271d167c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -720,8 +720,8 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
unsigned long address, struct folio *folio);
/* arch callback */
-int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
-int __init alloc_bootmem_huge_page(struct hstate *h, int nid);
+void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid);
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid);
bool __init hugetlb_node_alloc_supported(void);
void __init hugetlb_add_hstate(unsigned order);
@@ -1152,9 +1152,9 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
return NULL;
}
-static inline int __alloc_bootmem_huge_page(struct hstate *h)
+static inline void *__alloc_bootmem_huge_page(struct hstate *h, int nid)
{
- return 0;
+ return NULL;
}
static inline struct hstate *hstate_file(struct file *f)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b4999653a156..e9ba0be2eb17 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3044,79 +3044,58 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact)
{
- struct huge_bootmem_page *m;
- int listnode = nid;
-
if (hugetlb_early_cma(h))
- m = hugetlb_cma_alloc_bootmem(h, &listnode, node_exact);
- else {
- if (node_exact)
- m = memblock_alloc_exact_nid_raw(huge_page_size(h),
+ return hugetlb_cma_alloc_bootmem(h, nid, node_exact);
+
+ if (node_exact)
+ return memblock_alloc_exact_nid_raw(huge_page_size(h),
huge_page_size(h), 0,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
- else {
- m = memblock_alloc_try_nid_raw(huge_page_size(h),
+
+ return memblock_alloc_try_nid_raw(huge_page_size(h),
huge_page_size(h), 0,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
- /*
- * For pre-HVO to work correctly, pages need to be on
- * the list for the node they were actually allocated
- * from. That node may be different in the case of
- * fallback by memblock_alloc_try_nid_raw. So,
- * extract the actual node first.
- */
- if (m)
- listnode = early_pfn_to_nid(PHYS_PFN(__pa(m)));
- }
-
- if (m) {
- m->flags = 0;
- m->cma = NULL;
- }
- }
-
- if (m) {
- /*
- * Use the beginning of the huge page to store the
- * huge_bootmem_page struct (until gather_bootmem
- * puts them into the mem_map).
- *
- * Put them into a private list first because mem_map
- * is not up yet.
- */
- INIT_LIST_HEAD(&m->list);
- list_add(&m->list, &huge_boot_pages[listnode]);
- m->hstate = h;
- }
-
- return m;
}
-int alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid)
__attribute__ ((weak, alias("__alloc_bootmem_huge_page")));
-int __alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid)
{
- struct huge_bootmem_page *m = NULL; /* initialize for clang */
int nr_nodes, node = nid;
/* do node specific alloc */
- if (nid != NUMA_NO_NODE) {
- m = alloc_bootmem(h, node, true);
- if (!m)
- return 0;
- goto found;
- }
+ if (nid != NUMA_NO_NODE)
+ return alloc_bootmem(h, node, true);
/* allocate from next node when distributing huge pages */
for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node,
- &hugetlb_bootmem_nodes) {
- m = alloc_bootmem(h, node, false);
- if (!m)
- return 0;
- goto found;
- }
+ &hugetlb_bootmem_nodes)
+ return alloc_bootmem(h, node, false);
-found:
+ return NULL;
+}
+
+static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+{
+ struct huge_bootmem_page *m = arch_alloc_bootmem_huge_page(h, nid);
+
+ if (!m)
+ return false;
+
+ nid = early_pfn_to_nid(PHYS_PFN(__pa(m)));
+ /*
+ * Use the beginning of the huge page to store the huge_bootmem_page
+ * struct (until gather_bootmem puts them into the mem_map).
+ *
+ * Put them into a private list first because mem_map is not up yet.
+ */
+ INIT_LIST_HEAD(&m->list);
+ list_add(&m->list, &huge_boot_pages[nid]);
+ m->hstate = h;
+ if (!hugetlb_early_cma(h)) {
+ m->cma = NULL;
+ m->flags = 0;
+ }
/*
* Only initialize the head struct page in memmap_init_reserved_pages,
@@ -3128,7 +3107,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE),
huge_page_size(h) - PAGE_SIZE);
- return 1;
+ return true;
}
/* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index 57a7b3acc758..6b5c2aec4449 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -57,13 +57,13 @@ struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
}
struct huge_bootmem_page * __init
-hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact)
+hugetlb_cma_alloc_bootmem(struct hstate *h, int nid, bool node_exact)
{
struct cma *cma;
struct huge_bootmem_page *m;
- int node = *nid;
+ int node;
- cma = hugetlb_cma[*nid];
+ cma = hugetlb_cma[nid];
m = cma_reserve_early(cma, huge_page_size(h));
if (!m) {
if (node_exact)
@@ -71,13 +71,11 @@ hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact)
for_each_node_mask(node, hugetlb_bootmem_nodes) {
cma = hugetlb_cma[node];
- if (!cma || node == *nid)
+ if (!cma || node == nid)
continue;
m = cma_reserve_early(cma, huge_page_size(h));
- if (m) {
- *nid = node;
+ if (m)
break;
- }
}
}
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index c619c394b1ae..057852c792bd 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -6,7 +6,7 @@
void hugetlb_cma_free_frozen_folio(struct folio *folio);
struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
int nid, nodemask_t *nodemask);
-struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
+struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int nid,
bool node_exact);
bool hugetlb_cma_exclusive_alloc(void);
unsigned long hugetlb_cma_total_size(void);
@@ -24,7 +24,7 @@ static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order,
}
static inline
-struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
+struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int nid,
bool node_exact)
{
return NULL;
--
2.54.0
next prev parent reply other threads:[~2026-05-13 13:08 UTC|newest]
Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-13 13:04 [PATCH v2 00/69] mm: Generalize HVO for HugeTLB and device DAX Muchun Song
2026-05-13 13:04 ` [PATCH v2 01/69] mm/hugetlb: Fix boot panic with CONFIG_DEBUG_VM and HVO bootmem pages Muchun Song
2026-05-13 13:04 ` [PATCH v2 02/69] mm/hugetlb_vmemmap: Fix __hugetlb_vmemmap_optimize_folios() Muchun Song
2026-05-13 13:04 ` [PATCH v2 03/69] powerpc/mm: Fix wrong addr_pfn tracking in compound vmemmap population Muchun Song
2026-05-13 13:04 ` [PATCH v2 04/69] mm/hugetlb: Initialize gigantic bootmem hugepage struct pages earlier Muchun Song
2026-05-13 13:04 ` [PATCH v2 05/69] mm/mm_init: Simplify deferred_free_pages() migratetype init Muchun Song
2026-05-13 13:04 ` [PATCH v2 06/69] mm/sparse: Panic on memmap and usemap allocation failure Muchun Song
2026-05-13 13:04 ` [PATCH v2 07/69] mm/sparse: Move subsection_map_init() into sparse_init() Muchun Song
2026-05-13 13:04 ` [PATCH v2 08/69] mm/mm_init: Defer sparse_init() until after zone initialization Muchun Song
2026-05-13 13:04 ` [PATCH v2 09/69] mm/mm_init: Defer hugetlb reservation " Muchun Song
2026-05-13 13:04 ` [PATCH v2 10/69] mm/mm_init: Remove set_pageblock_order() call from sparse_init() Muchun Song
2026-05-13 13:04 ` [PATCH v2 11/69] mm/sparse: Move sparse_vmemmap_init_nid_late() into sparse_init_nid() Muchun Song
2026-05-13 13:04 ` [PATCH v2 12/69] mm/hugetlb_cma: Validate hugetlb CMA range by zone at reserve time Muchun Song
2026-05-13 13:04 ` Muchun Song [this message]
2026-05-13 13:04 ` [PATCH v2 14/69] mm/hugetlb: Free cross-zone bootmem gigantic pages after allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 15/69] mm/hugetlb_vmemmap: Move bootmem HVO setup to early init Muchun Song
2026-05-13 13:04 ` [PATCH v2 16/69] mm/hugetlb: Remove obsolete bootmem cross-zone checks Muchun Song
2026-05-13 13:04 ` [PATCH v2 17/69] mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late() Muchun Song
2026-05-13 13:04 ` [PATCH v2 18/69] mm/hugetlb: Remove unused bootmem cma field Muchun Song
2026-05-13 13:04 ` [PATCH v2 19/69] mm/mm_init: Make __init_page_from_nid() static Muchun Song
2026-05-13 13:04 ` [PATCH v2 20/69] mm/sparse-vmemmap: Drop VMEMMAP_POPULATE_PAGEREF Muchun Song
2026-05-13 13:04 ` [PATCH v2 21/69] mm: Rename vmemmap optimization macros around folio semantics Muchun Song
2026-05-13 13:04 ` [PATCH v2 22/69] mm/sparse: Drop power-of-2 size requirement for struct mem_section Muchun Song
2026-05-13 13:04 ` [PATCH v2 23/69] mm/sparse-vmemmap: track compound page order in " Muchun Song
2026-05-13 13:04 ` [PATCH v2 24/69] mm/mm_init: Skip initializing shared vmemmap tail pages Muchun Song
2026-05-13 13:04 ` [PATCH v2 25/69] mm/sparse-vmemmap: Initialize shared tail vmemmap pages on allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 26/69] mm/sparse-vmemmap: Support section-based vmemmap accounting Muchun Song
2026-05-13 13:04 ` [PATCH v2 27/69] mm/sparse-vmemmap: Support section-based vmemmap optimization Muchun Song
2026-05-13 13:04 ` [PATCH v2 28/69] mm/hugetlb: Use generic vmemmap optimization macros Muchun Song
2026-05-13 13:04 ` [PATCH v2 29/69] mm/sparse: Mark memblocks present earlier Muchun Song
2026-05-13 13:04 ` [PATCH v2 30/69] mm/hugetlb: Switch HugeTLB to section-based vmemmap optimization Muchun Song
2026-05-13 13:04 ` [PATCH v2 31/69] mm/sparse: Remove section_map_size() Muchun Song
2026-05-13 13:05 ` [PATCH v2 32/69] mm/mm_init: Factor out pfn_to_zone() as a shared helper Muchun Song
2026-05-13 13:05 ` [PATCH v2 33/69] mm/sparse: Remove SPARSEMEM_VMEMMAP_PREINIT Muchun Song
2026-05-13 13:05 ` [PATCH v2 34/69] mm/sparse: Inline usemap allocation into sparse_init_nid() Muchun Song
2026-05-13 13:05 ` [PATCH v2 35/69] mm/hugetlb: Remove HUGE_BOOTMEM_HVO Muchun Song
2026-05-13 13:05 ` [PATCH v2 36/69] mm/hugetlb: Remove HUGE_BOOTMEM_CMA Muchun Song
2026-05-13 13:05 ` [PATCH v2 37/69] mm/sparse-vmemmap: Factor out shared vmemmap page allocation Muchun Song
2026-05-13 13:05 ` [PATCH v2 38/69] mm/sparse-vmemmap: Introduce CONFIG_SPARSEMEM_VMEMMAP_OPTIMIZATION Muchun Song
2026-05-13 13:05 ` [PATCH v2 39/69] mm/sparse-vmemmap: Switch DAX to vmemmap_shared_tail_page() Muchun Song
2026-05-13 13:05 ` [PATCH v2 40/69] powerpc/mm: " Muchun Song
2026-05-13 13:05 ` [PATCH v2 41/69] mm/sparse-vmemmap: Drop the extra tail page from DAX reservation Muchun Song
2026-05-13 13:05 ` [PATCH v2 42/69] mm/sparse-vmemmap: Switch DAX to section-based vmemmap optimization Muchun Song
2026-05-13 13:05 ` [PATCH v2 43/69] mm/sparse-vmemmap: Unify DAX and HugeTLB population paths Muchun Song
2026-05-13 13:05 ` [PATCH v2 44/69] mm/sparse-vmemmap: Remove the unused ptpfn argument Muchun Song
2026-05-13 13:05 ` [PATCH v2 45/69] powerpc/mm: Make vmemmap_populate_compound_pages() static Muchun Song
2026-05-13 13:05 ` [PATCH v2 46/69] mm/sparse-vmemmap: Map shared vmemmap tail pages read-only Muchun Song
2026-05-13 13:20 ` [PATCH v2 47/69] powerpc/mm: " Muchun Song
2026-05-13 13:20 ` [PATCH v2 48/69] mm/sparse-vmemmap: Inline vmemmap_populate_address() into its caller Muchun Song
2026-05-13 13:20 ` [PATCH v2 49/69] mm/hugetlb_vmemmap: Remove vmemmap_wrprotect_hvo() Muchun Song
2026-05-13 13:20 ` [PATCH v2 50/69] mm/sparse: Simplify section_nr_vmemmap_pages() Muchun Song
2026-05-13 13:20 ` [PATCH v2 51/69] mm/sparse-vmemmap: Introduce vmemmap_nr_struct_pages() Muchun Song
2026-05-13 13:20 ` [PATCH v2 52/69] powerpc/mm: Drop powerpc vmemmap_can_optimize() Muchun Song
2026-05-13 13:20 ` [PATCH v2 53/69] mm/sparse-vmemmap: Drop vmemmap_can_optimize() Muchun Song
2026-05-13 13:20 ` [PATCH v2 54/69] mm/sparse-vmemmap: Drop @pgmap from vmemmap population APIs Muchun Song
2026-05-13 13:20 ` [PATCH v2 55/69] mm/sparse: Decouple section activation from ZONE_DEVICE Muchun Song
2026-05-13 13:20 ` [PATCH v2 56/69] mm: Redefine HVO as Hugepage Vmemmap Optimization Muchun Song
2026-05-13 13:20 ` [PATCH v2 57/69] mm/sparse-vmemmap: Consolidate HVO enable checks Muchun Song
2026-05-13 13:20 ` [PATCH v2 58/69] mm/hugetlb: Make HVO optimizable checks depend on generic logic Muchun Song
2026-05-13 13:20 ` [PATCH v2 59/69] mm/sparse-vmemmap: Localize init_compound_tail() Muchun Song
2026-05-13 13:20 ` [PATCH v2 60/69] mm/mm_init: Check zone consistency on optimized vmemmap sections Muchun Song
2026-05-13 13:20 ` [PATCH v2 61/69] mm/hugetlb: Drop boot-time HVO handling for gigantic folios Muchun Song
2026-05-13 13:20 ` [PATCH v2 62/69] mm/hugetlb: Simplify hugetlb_folio_init_vmemmap() Muchun Song
2026-05-13 13:20 ` [PATCH v2 63/69] mm/hugetlb: Initialize the full bootmem hugepage in hugetlb code Muchun Song
2026-05-13 13:20 ` [PATCH v2 64/69] mm/mm_init: Factor out compound page initialization Muchun Song
2026-05-13 13:20 ` [PATCH v2 65/69] mm/mm_init: Make __init_single_page() static Muchun Song
2026-05-13 13:20 ` [PATCH v2 66/69] mm/cma: Move CMA pageblock initialization into cma_activate_area() Muchun Song
2026-05-13 13:20 ` [PATCH v2 67/69] mm/cma: Move init_cma_pageblock() into cma.c Muchun Song
2026-05-13 13:20 ` [PATCH v2 68/69] mm/mm_init: Initialize pageblock migratetype in memmap init helpers Muchun Song
2026-05-13 13:20 ` [PATCH v2 69/69] Documentation/mm: Rewrite vmemmap_dedup.rst for unified HVO Muchun Song
2026-05-13 17:46 ` [PATCH v2 00/69] mm: Generalize HVO for HugeTLB and device DAX Andrew Morton
2026-05-13 18:26 ` Oscar Salvador
2026-05-14 2:37 ` Muchun Song
2026-05-14 2:34 ` Muchun Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260513130542.35604-14-songmuchun@bytedance.com \
--to=songmuchun@bytedance.com \
--cc=Liam.Howlett@oracle.com \
--cc=ackerleytng@google.com \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@linux.ibm.com \
--cc=chleroy@kernel.org \
--cc=david@kernel.org \
--cc=fvdl@google.com \
--cc=joao.m.martins@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=ljs@kernel.org \
--cc=maddy@linux.ibm.com \
--cc=mhocko@suse.com \
--cc=mpe@ellerman.id.au \
--cc=muchun.song@linux.dev \
--cc=npiggin@gmail.com \
--cc=osalvador@suse.de \
--cc=rppt@kernel.org \
--cc=surenb@google.com \
--cc=vbabka@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox