Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Muchun Song <muchun.song@linux.dev>,
	Oscar Salvador <osalvador@suse.de>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Lorenzo Stoakes <ljs@kernel.org>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@suse.com>,
	Nicholas Piggin <npiggin@gmail.com>,
	Christophe Leroy <chleroy@kernel.org>,
	Ackerley Tng <ackerleytng@google.com>,
	Frank van der Linden <fvdl@google.com>,
	aneesh.kumar@linux.ibm.com, joao.m.martins@oracle.com,
	linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org,
	Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH v2 64/69] mm/mm_init: Factor out compound page initialization
Date: Wed, 13 May 2026 21:20:29 +0800	[thread overview]
Message-ID: <20260513132044.41690-18-songmuchun@bytedance.com> (raw)
In-Reply-To: <20260513132044.41690-1-songmuchun@bytedance.com>

The compound struct page initialization needed by boot-time gigantic hugetlb
folios is currently open-coded in hugetlb code, while ZONE_DEVICE has its own
separate initialization path in mm_init.c.

Factor the common compound memmap setup into memmap_init_compound_page_frozen()
so both paths can share the same frozen page initialization logic. This removes
duplicated open-coded compound page setup and keeps the initialization rules
in one place.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/hugetlb.c  |  25 +-----------
 mm/internal.h |   2 +
 mm/mm_init.c  | 111 +++++++++++++++++++-------------------------------
 3 files changed, 45 insertions(+), 93 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 10f04fa95d43..7e9f49882395 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3118,28 +3118,6 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
 	return true;
 }
 
-static void __init hugetlb_folio_init_vmemmap(struct page *head, unsigned long pfn,
-		enum zone_type zone, int nid, unsigned int order, unsigned int nr_pages)
-{
-	/*
-	 * This is an open-coded prep_compound_page() whereby we avoid
-	 * walking pages twice by initializing/preparing+freezing them in the
-	 * same go.
-	 */
-	__init_single_page(head, pfn, zone, nid);
-	set_page_count(head, 0);
-
-	__SetPageHead(head);
-	for (int i = 1; i < nr_pages; i++) {
-		struct page *page = head + i;
-
-		__init_single_page(page, pfn + i, zone, nid);
-		prep_compound_tail(page, head, order);
-		set_page_count(page, 0);
-	}
-	prep_compound_head(head, order);
-}
-
 /*
  * memblock-allocated pageblocks might not have the migrate type set
  * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE)
@@ -3210,8 +3188,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
 
 		VM_BUG_ON(!hstate_is_gigantic(h));
 
-		hugetlb_folio_init_vmemmap(page, pfn, zone, nid, huge_page_order(h),
-					   vmemmap_nr_struct_pages(pfn, nr_pages));
+		memmap_init_compound_page_frozen(page, pfn, zone, nid, huge_page_order(h));
 		init_new_hugetlb_folio(folio);
 
 		if (order_vmemmap_optimizable(pfn_to_section_order(pfn))) {
diff --git a/mm/internal.h b/mm/internal.h
index 416afdf7b2ec..2c67ae25124b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1793,6 +1793,8 @@ static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte
 
 void __meminit __init_single_page(struct page *page, unsigned long pfn,
 				unsigned long zone, int nid);
+void __meminit memmap_init_compound_page_frozen(struct page *head, unsigned long pfn,
+		enum zone_type zone, int nid, unsigned int order);
 
 /* shrinker related functions */
 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 95422e92ede8..9b23c31db8c6 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1018,79 +1018,46 @@ static void __init memmap_init(void)
 		init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
 }
 
-#ifdef CONFIG_ZONE_DEVICE
-static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
-					  unsigned long zone_idx, int nid,
-					  struct dev_pagemap *pgmap)
+static void __meminit init_single_page_frozen(struct page *page, unsigned long pfn,
+		enum zone_type zone, int nid)
 {
+	__init_single_page(page, pfn, zone, nid);
+	if (zone_is_zone_device(&NODE_DATA(nid)->node_zones[zone])) {
+		/*
+		 * ZONE_DEVICE pages are not managed by the page allocator, mark
+		 * them reserved to prevent them from being touched elsewhere.
+		 *
+		 * We can use the non-atomic __set_bit operation for setting
+		 * the flag as we are still initializing the pages.
+		 */
+		__SetPageReserved(page);
 
-	__init_single_page(page, pfn, zone_idx, nid);
-
-	/*
-	 * Mark page reserved as it will need to wait for onlining
-	 * phase for it to be fully associated with a zone.
-	 *
-	 * We can use the non-atomic __set_bit operation for setting
-	 * the flag as we are still initializing the pages.
-	 */
-	__SetPageReserved(page);
-
-	/*
-	 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
-	 * and zone_device_data.  It is a bug if a ZONE_DEVICE page is
-	 * ever freed or placed on a driver-private list.
-	 */
-	page_folio(page)->pgmap = pgmap;
-	page->zone_device_data = NULL;
-
-	/*
-	 * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released
-	 * directly to the driver page allocator which will set the page count
-	 * to 1 when allocating the page.
-	 *
-	 * MEMORY_TYPE_GENERIC and MEMORY_TYPE_FS_DAX pages automatically have
-	 * their refcount reset to one whenever they are freed (ie. after
-	 * their refcount drops to 0).
-	 */
-	switch (pgmap->type) {
-	case MEMORY_DEVICE_FS_DAX:
-	case MEMORY_DEVICE_PRIVATE:
-	case MEMORY_DEVICE_COHERENT:
-	case MEMORY_DEVICE_PCI_P2PDMA:
-		set_page_count(page, 0);
-		break;
-
-	case MEMORY_DEVICE_GENERIC:
-		break;
+		/*
+		 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
+		 * and zone_device_data.  It is a bug if a ZONE_DEVICE page is
+		 * ever freed or placed on a driver-private list.
+		 */
+		page->zone_device_data = NULL;
 	}
+	set_page_count(page, 0);
 }
 
-static void __ref memmap_init_compound(struct page *head,
-				       unsigned long head_pfn,
-				       unsigned long zone_idx, int nid,
-				       struct dev_pagemap *pgmap,
-				       unsigned long nr_pages)
+void __meminit memmap_init_compound_page_frozen(struct page *head, unsigned long pfn,
+		enum zone_type zone, int nid, unsigned int order)
 {
-	unsigned long pfn, end_pfn = head_pfn + nr_pages;
-	unsigned int order = pgmap->vmemmap_shift;
+	int nr_pages = vmemmap_nr_struct_pages(pfn, 1UL << order);
 
-	/*
-	 * We have to initialize the pages, including setting up page links.
-	 * prep_compound_page() does not take care of that, so instead we
-	 * open-code prep_compound_page() so we can take care of initializing
-	 * the pages in the same go.
-	 */
-	__SetPageHead(head);
-	for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
-		struct page *page = pfn_to_page(pfn);
+	init_single_page_frozen(head, pfn, zone, nid);
 
-		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
-		prep_compound_tail(page, head, order);
-		set_page_count(page, 0);
+	__SetPageHead(head);
+	for (int i = 1; i < nr_pages; i++) {
+		init_single_page_frozen(head + i, pfn + i, zone, nid);
+		prep_compound_tail(head + i, head, order);
 	}
 	prep_compound_head(head, order);
 }
 
+#ifdef CONFIG_ZONE_DEVICE
 void __ref memmap_init_zone_device(struct zone *zone,
 				   unsigned long start_pfn,
 				   unsigned long nr_pages,
@@ -1118,18 +1085,24 @@ void __ref memmap_init_zone_device(struct zone *zone,
 	}
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
-		struct page *page = pfn_to_page(pfn);
-
-		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
+		struct page *head = pfn_to_page(pfn);
 
 		if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
 			cond_resched();
 
-		if (pfns_per_compound == 1)
-			continue;
-
-		memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
-				     vmemmap_nr_struct_pages(pfn, pfns_per_compound));
+		if (pgmap->vmemmap_shift)
+			memmap_init_compound_page_frozen(head, pfn, zone_idx, nid,
+							 pgmap->vmemmap_shift);
+		else
+			init_single_page_frozen(head, pfn, zone_idx, nid);
+		/*
+		 * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released
+		 * directly to the driver page allocator which will set the page
+		 * count to 1 when allocating the page.
+		 */
+		if (pgmap->type == MEMORY_DEVICE_GENERIC)
+			init_page_count(head);
+		((struct folio *)head)->pgmap = pgmap;
 	}
 
 	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false, false);
-- 
2.54.0



  parent reply	other threads:[~2026-05-13 13:22 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-13 13:04 [PATCH v2 00/69] mm: Generalize HVO for HugeTLB and device DAX Muchun Song
2026-05-13 13:04 ` [PATCH v2 01/69] mm/hugetlb: Fix boot panic with CONFIG_DEBUG_VM and HVO bootmem pages Muchun Song
2026-05-14  7:51   ` Oscar Salvador
2026-05-14  8:13     ` Muchun Song
2026-05-13 13:04 ` [PATCH v2 02/69] mm/hugetlb_vmemmap: Fix __hugetlb_vmemmap_optimize_folios() Muchun Song
2026-05-14  7:56   ` Oscar Salvador
2026-05-14  8:19     ` Muchun Song
2026-05-13 13:04 ` [PATCH v2 03/69] powerpc/mm: Fix wrong addr_pfn tracking in compound vmemmap population Muchun Song
2026-05-14  8:00   ` Oscar Salvador
2026-05-13 13:04 ` [PATCH v2 04/69] mm/hugetlb: Initialize gigantic bootmem hugepage struct pages earlier Muchun Song
2026-05-14  8:05   ` Oscar Salvador
2026-05-14  8:16     ` Muchun Song
2026-05-13 13:04 ` [PATCH v2 05/69] mm/mm_init: Simplify deferred_free_pages() migratetype init Muchun Song
2026-05-14  8:12   ` Oscar Salvador
2026-05-13 13:04 ` [PATCH v2 06/69] mm/sparse: Panic on memmap and usemap allocation failure Muchun Song
2026-05-14  8:15   ` Oscar Salvador
2026-05-13 13:04 ` [PATCH v2 07/69] mm/sparse: Move subsection_map_init() into sparse_init() Muchun Song
2026-05-14  8:19   ` Oscar Salvador
2026-05-13 13:04 ` [PATCH v2 08/69] mm/mm_init: Defer sparse_init() until after zone initialization Muchun Song
2026-05-13 13:04 ` [PATCH v2 09/69] mm/mm_init: Defer hugetlb reservation " Muchun Song
2026-05-13 13:04 ` [PATCH v2 10/69] mm/mm_init: Remove set_pageblock_order() call from sparse_init() Muchun Song
2026-05-13 13:04 ` [PATCH v2 11/69] mm/sparse: Move sparse_vmemmap_init_nid_late() into sparse_init_nid() Muchun Song
2026-05-13 13:04 ` [PATCH v2 12/69] mm/hugetlb_cma: Validate hugetlb CMA range by zone at reserve time Muchun Song
2026-05-13 13:04 ` [PATCH v2 13/69] mm/hugetlb: Refactor early boot gigantic hugepage allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 14/69] mm/hugetlb: Free cross-zone bootmem gigantic pages after allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 15/69] mm/hugetlb_vmemmap: Move bootmem HVO setup to early init Muchun Song
2026-05-13 13:04 ` [PATCH v2 16/69] mm/hugetlb: Remove obsolete bootmem cross-zone checks Muchun Song
2026-05-13 13:04 ` [PATCH v2 17/69] mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late() Muchun Song
2026-05-13 13:04 ` [PATCH v2 18/69] mm/hugetlb: Remove unused bootmem cma field Muchun Song
2026-05-13 13:04 ` [PATCH v2 19/69] mm/mm_init: Make __init_page_from_nid() static Muchun Song
2026-05-13 13:04 ` [PATCH v2 20/69] mm/sparse-vmemmap: Drop VMEMMAP_POPULATE_PAGEREF Muchun Song
2026-05-13 13:04 ` [PATCH v2 21/69] mm: Rename vmemmap optimization macros around folio semantics Muchun Song
2026-05-13 13:04 ` [PATCH v2 22/69] mm/sparse: Drop power-of-2 size requirement for struct mem_section Muchun Song
2026-05-13 13:04 ` [PATCH v2 23/69] mm/sparse-vmemmap: track compound page order in " Muchun Song
2026-05-13 13:04 ` [PATCH v2 24/69] mm/mm_init: Skip initializing shared vmemmap tail pages Muchun Song
2026-05-13 13:04 ` [PATCH v2 25/69] mm/sparse-vmemmap: Initialize shared tail vmemmap pages on allocation Muchun Song
2026-05-13 13:04 ` [PATCH v2 26/69] mm/sparse-vmemmap: Support section-based vmemmap accounting Muchun Song
2026-05-13 13:04 ` [PATCH v2 27/69] mm/sparse-vmemmap: Support section-based vmemmap optimization Muchun Song
2026-05-13 13:04 ` [PATCH v2 28/69] mm/hugetlb: Use generic vmemmap optimization macros Muchun Song
2026-05-13 13:04 ` [PATCH v2 29/69] mm/sparse: Mark memblocks present earlier Muchun Song
2026-05-13 13:04 ` [PATCH v2 30/69] mm/hugetlb: Switch HugeTLB to section-based vmemmap optimization Muchun Song
2026-05-13 13:04 ` [PATCH v2 31/69] mm/sparse: Remove section_map_size() Muchun Song
2026-05-13 13:05 ` [PATCH v2 32/69] mm/mm_init: Factor out pfn_to_zone() as a shared helper Muchun Song
2026-05-13 13:05 ` [PATCH v2 33/69] mm/sparse: Remove SPARSEMEM_VMEMMAP_PREINIT Muchun Song
2026-05-13 13:05 ` [PATCH v2 34/69] mm/sparse: Inline usemap allocation into sparse_init_nid() Muchun Song
2026-05-13 13:05 ` [PATCH v2 35/69] mm/hugetlb: Remove HUGE_BOOTMEM_HVO Muchun Song
2026-05-13 13:05 ` [PATCH v2 36/69] mm/hugetlb: Remove HUGE_BOOTMEM_CMA Muchun Song
2026-05-13 13:05 ` [PATCH v2 37/69] mm/sparse-vmemmap: Factor out shared vmemmap page allocation Muchun Song
2026-05-13 13:05 ` [PATCH v2 38/69] mm/sparse-vmemmap: Introduce CONFIG_SPARSEMEM_VMEMMAP_OPTIMIZATION Muchun Song
2026-05-13 13:05 ` [PATCH v2 39/69] mm/sparse-vmemmap: Switch DAX to vmemmap_shared_tail_page() Muchun Song
2026-05-13 13:05 ` [PATCH v2 40/69] powerpc/mm: " Muchun Song
2026-05-13 13:05 ` [PATCH v2 41/69] mm/sparse-vmemmap: Drop the extra tail page from DAX reservation Muchun Song
2026-05-13 13:05 ` [PATCH v2 42/69] mm/sparse-vmemmap: Switch DAX to section-based vmemmap optimization Muchun Song
2026-05-13 13:05 ` [PATCH v2 43/69] mm/sparse-vmemmap: Unify DAX and HugeTLB population paths Muchun Song
2026-05-13 13:05 ` [PATCH v2 44/69] mm/sparse-vmemmap: Remove the unused ptpfn argument Muchun Song
2026-05-13 13:05 ` [PATCH v2 45/69] powerpc/mm: Make vmemmap_populate_compound_pages() static Muchun Song
2026-05-13 13:05 ` [PATCH v2 46/69] mm/sparse-vmemmap: Map shared vmemmap tail pages read-only Muchun Song
2026-05-13 13:20 ` [PATCH v2 47/69] powerpc/mm: " Muchun Song
2026-05-13 13:20   ` [PATCH v2 48/69] mm/sparse-vmemmap: Inline vmemmap_populate_address() into its caller Muchun Song
2026-05-13 13:20   ` [PATCH v2 49/69] mm/hugetlb_vmemmap: Remove vmemmap_wrprotect_hvo() Muchun Song
2026-05-13 13:20   ` [PATCH v2 50/69] mm/sparse: Simplify section_nr_vmemmap_pages() Muchun Song
2026-05-13 13:20   ` [PATCH v2 51/69] mm/sparse-vmemmap: Introduce vmemmap_nr_struct_pages() Muchun Song
2026-05-13 13:20   ` [PATCH v2 52/69] powerpc/mm: Drop powerpc vmemmap_can_optimize() Muchun Song
2026-05-13 13:20   ` [PATCH v2 53/69] mm/sparse-vmemmap: Drop vmemmap_can_optimize() Muchun Song
2026-05-13 13:20   ` [PATCH v2 54/69] mm/sparse-vmemmap: Drop @pgmap from vmemmap population APIs Muchun Song
2026-05-13 13:20   ` [PATCH v2 55/69] mm/sparse: Decouple section activation from ZONE_DEVICE Muchun Song
2026-05-13 13:20   ` [PATCH v2 56/69] mm: Redefine HVO as Hugepage Vmemmap Optimization Muchun Song
2026-05-13 13:20   ` [PATCH v2 57/69] mm/sparse-vmemmap: Consolidate HVO enable checks Muchun Song
2026-05-13 13:20   ` [PATCH v2 58/69] mm/hugetlb: Make HVO optimizable checks depend on generic logic Muchun Song
2026-05-13 13:20   ` [PATCH v2 59/69] mm/sparse-vmemmap: Localize init_compound_tail() Muchun Song
2026-05-13 13:20   ` [PATCH v2 60/69] mm/mm_init: Check zone consistency on optimized vmemmap sections Muchun Song
2026-05-13 13:20   ` [PATCH v2 61/69] mm/hugetlb: Drop boot-time HVO handling for gigantic folios Muchun Song
2026-05-13 13:20   ` [PATCH v2 62/69] mm/hugetlb: Simplify hugetlb_folio_init_vmemmap() Muchun Song
2026-05-13 13:20   ` [PATCH v2 63/69] mm/hugetlb: Initialize the full bootmem hugepage in hugetlb code Muchun Song
2026-05-13 13:20   ` Muchun Song [this message]
2026-05-13 13:20   ` [PATCH v2 65/69] mm/mm_init: Make __init_single_page() static Muchun Song
2026-05-13 13:20   ` [PATCH v2 66/69] mm/cma: Move CMA pageblock initialization into cma_activate_area() Muchun Song
2026-05-13 13:20   ` [PATCH v2 67/69] mm/cma: Move init_cma_pageblock() into cma.c Muchun Song
2026-05-13 13:20   ` [PATCH v2 68/69] mm/mm_init: Initialize pageblock migratetype in memmap init helpers Muchun Song
2026-05-13 13:20   ` [PATCH v2 69/69] Documentation/mm: Rewrite vmemmap_dedup.rst for unified HVO Muchun Song
2026-05-13 17:46 ` [PATCH v2 00/69] mm: Generalize HVO for HugeTLB and device DAX Andrew Morton
2026-05-13 18:26   ` Oscar Salvador
2026-05-14  2:37     ` Muchun Song
2026-05-14  2:34   ` Muchun Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260513132044.41690-18-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=ackerleytng@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=chleroy@kernel.org \
    --cc=david@kernel.org \
    --cc=fvdl@google.com \
    --cc=joao.m.martins@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=ljs@kernel.org \
    --cc=maddy@linux.ibm.com \
    --cc=mhocko@suse.com \
    --cc=mpe@ellerman.id.au \
    --cc=muchun.song@linux.dev \
    --cc=npiggin@gmail.com \
    --cc=osalvador@suse.de \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox