linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio
@ 2025-10-13 13:38 Kefeng Wang
  2025-10-13 13:38 ` [PATCH v3 1/6] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

Introduce alloc_contig_frozen_pages() and cma_alloc_frozen_compound()
which avoid atomic operation about page refcount, and then convert to
allocate frozen gigantic folio by the new helpers in hugetlb to cleanup
the alloc_gigantic_folio().

v3:
- Fix built warn/err, found by lkp test
- Address some David's comments,
  - Force on frozen part and drop the optimization part
  - Rename split_non_compound_pages() to __split_pages()
  - Adding back debug print/WARN_ON if no cma range found or the
    pfn range of page is not full match the cma range.

v2:
- Optimize gigantic folio allocation speed
- Using HPAGE_PUD_ORDER in debug_vm_pgtable
- Address some David's comments,
  - kill folio_alloc_gigantic()
  - add generic cma_alloc_frozen{_compound}() instead of
    cma_{alloc,free}_folio

Kefeng Wang (6):
  mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page()
  mm: page_alloc: add __split_page()
  mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  mm: cma: add __cma_release()
  mm: cma: add cma_alloc_frozen{_compound}()
  mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()

 include/linux/cma.h   |  26 ++----
 include/linux/gfp.h   |  52 +++++------
 mm/cma.c              | 109 +++++++++++-----------
 mm/debug_vm_pgtable.c |  38 ++++----
 mm/hugetlb.c          |  58 +++---------
 mm/hugetlb_cma.c      |  27 +++---
 mm/hugetlb_cma.h      |  10 +--
 mm/internal.h         |   6 ++
 mm/page_alloc.c       | 204 ++++++++++++++++++++++++++++--------------
 9 files changed, 271 insertions(+), 259 deletions(-)

-- 
2.27.0



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3 1/6] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page()
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
@ 2025-10-13 13:38 ` Kefeng Wang
  2025-10-13 13:38 ` [PATCH v3 2/6] mm: page_alloc: add __split_page() Kefeng Wang
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

Add a new helper to free huge page to be consistency to
debug_vm_pgtable_alloc_huge_page(), and use HPAGE_PUD_ORDER
instead of open-code.

Also move the free_contig_range() under CONFIG_ALLOC_CONTIG
since all caller are built with CONFIG_ALLOC_CONTIG.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/gfp.h   |  2 +-
 mm/debug_vm_pgtable.c | 38 +++++++++++++++++---------------------
 mm/page_alloc.c       |  2 +-
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0ceb4e09306c..1fefb63e0480 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -437,8 +437,8 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
 					      int nid, nodemask_t *nodemask);
 #define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
 
-#endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
+#endif
 
 #ifdef CONFIG_CONTIG_ALLOC
 static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 830107b6dd08..d7f82aa58711 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -946,22 +946,26 @@ static unsigned long __init get_random_vaddr(void)
 	return random_vaddr;
 }
 
-static void __init destroy_args(struct pgtable_debug_args *args)
+static void __init
+debug_vm_pgtable_free_huge_page(struct pgtable_debug_args *args,
+		unsigned long pfn, int order)
 {
-	struct page *page = NULL;
+#ifdef CONFIG_CONTIG_ALLOC
+	if (args->is_contiguous_page) {
+		free_contig_range(pfn, 1 << order);
+		return;
+	}
+#endif
+	__free_pages(pfn_to_page(pfn), order);
+}
 
+static void __init destroy_args(struct pgtable_debug_args *args)
+{
 	/* Free (huge) page */
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 	    has_transparent_pud_hugepage() &&
 	    args->pud_pfn != ULONG_MAX) {
-		if (args->is_contiguous_page) {
-			free_contig_range(args->pud_pfn,
-					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
-		} else {
-			page = pfn_to_page(args->pud_pfn);
-			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
-		}
-
+		debug_vm_pgtable_free_huge_page(args, args->pud_pfn, HPAGE_PUD_ORDER);
 		args->pud_pfn = ULONG_MAX;
 		args->pmd_pfn = ULONG_MAX;
 		args->pte_pfn = ULONG_MAX;
@@ -970,20 +974,13 @@ static void __init destroy_args(struct pgtable_debug_args *args)
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 	    has_transparent_hugepage() &&
 	    args->pmd_pfn != ULONG_MAX) {
-		if (args->is_contiguous_page) {
-			free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER));
-		} else {
-			page = pfn_to_page(args->pmd_pfn);
-			__free_pages(page, HPAGE_PMD_ORDER);
-		}
-
+		debug_vm_pgtable_free_huge_page(args, args->pmd_pfn, HPAGE_PMD_ORDER);
 		args->pmd_pfn = ULONG_MAX;
 		args->pte_pfn = ULONG_MAX;
 	}
 
 	if (args->pte_pfn != ULONG_MAX) {
-		page = pfn_to_page(args->pte_pfn);
-		__free_page(page);
+		__free_page(pfn_to_page(args->pte_pfn));
 
 		args->pte_pfn = ULONG_MAX;
 	}
@@ -1215,8 +1212,7 @@ static int __init init_args(struct pgtable_debug_args *args)
 	 */
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 	    has_transparent_pud_hugepage()) {
-		page = debug_vm_pgtable_alloc_huge_page(args,
-				HPAGE_PUD_SHIFT - PAGE_SHIFT);
+		page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PUD_ORDER);
 		if (page) {
 			args->pud_pfn = page_to_pfn(page);
 			args->pmd_pfn = args->pud_pfn;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 600d9e981c23..949b01f293d4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7121,7 +7121,6 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 	}
 	return NULL;
 }
-#endif /* CONFIG_CONTIG_ALLOC */
 
 void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 {
@@ -7148,6 +7147,7 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 	WARN(count != 0, "%lu pages are still in use!\n", count);
 }
 EXPORT_SYMBOL(free_contig_range);
+#endif /* CONFIG_CONTIG_ALLOC */
 
 /*
  * Effectively disable pcplists for the zone by setting the high limit to 0
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 2/6] mm: page_alloc: add __split_page()
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
  2025-10-13 13:38 ` [PATCH v3 1/6] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
@ 2025-10-13 13:38 ` Kefeng Wang
  2025-10-13 19:44   ` David Hildenbrand
  2025-10-13 13:38 ` [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

Factor out the splitting of non-compound page from make_alloc_exact()
and split_page() into a new helper function __split_page().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/page_alloc.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 949b01f293d4..646a6c2293f9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3042,6 +3042,15 @@ void free_unref_folios(struct folio_batch *folios)
 	folio_batch_reinit(folios);
 }
 
+static void __split_page(struct page *page, unsigned int order)
+{
+	VM_BUG_ON_PAGE(PageCompound(page), page);
+
+	split_page_owner(page, order, 0);
+	pgalloc_tag_split(page_folio(page), order, 0);
+	split_page_memcg(page, order);
+}
+
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
@@ -3054,14 +3063,12 @@ void split_page(struct page *page, unsigned int order)
 {
 	int i;
 
-	VM_BUG_ON_PAGE(PageCompound(page), page);
 	VM_BUG_ON_PAGE(!page_count(page), page);
 
 	for (i = 1; i < (1 << order); i++)
 		set_page_refcounted(page + i);
-	split_page_owner(page, order, 0);
-	pgalloc_tag_split(page_folio(page), order, 0);
-	split_page_memcg(page, order);
+
+	__split_page(page, order);
 }
 EXPORT_SYMBOL_GPL(split_page);
 
@@ -5339,9 +5346,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
 		struct page *page = virt_to_page((void *)addr);
 		struct page *last = page + nr;
 
-		split_page_owner(page, order, 0);
-		pgalloc_tag_split(page_folio(page), order, 0);
-		split_page_memcg(page, order);
+		__split_page(page, order);
 		while (page < --last)
 			set_page_refcounted(last);
 
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
  2025-10-13 13:38 ` [PATCH v3 1/6] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
  2025-10-13 13:38 ` [PATCH v3 2/6] mm: page_alloc: add __split_page() Kefeng Wang
@ 2025-10-13 13:38 ` Kefeng Wang
  2025-10-16 20:53   ` David Hildenbrand
  2025-10-13 13:38 ` [PATCH v3 4/6] mm: cma: add __cma_release() Kefeng Wang
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

In order to allocate given range of pages or allocate compound
pages without incrementing their refcount, adding two new helper
alloc_contig_{range_frozen,frozen_pages}() which may be beneficial
to some users (eg hugetlb), also free_contig_range_frozen() is
provided to match alloc_contig_range_frozen(), but it is better to
use free_frozen_pages() to free frozen compound pages.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/gfp.h |  29 +++++--
 mm/page_alloc.c     | 183 +++++++++++++++++++++++++++++---------------
 2 files changed, 143 insertions(+), 69 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1fefb63e0480..fbbdd8c88483 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -429,14 +429,27 @@ typedef unsigned int __bitwise acr_flags_t;
 #define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA
 
 /* The below functions must be run on a range from a single zone. */
-extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
-				     acr_flags_t alloc_flags, gfp_t gfp_mask);
-#define alloc_contig_range(...)			alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
-
-extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
-					      int nid, nodemask_t *nodemask);
-#define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
-
+int alloc_contig_range_frozen_noprof(unsigned long start, unsigned long end,
+		acr_flags_t alloc_flags, gfp_t gfp_mask);
+#define alloc_contig_range_frozen(...)	\
+	alloc_hooks(alloc_contig_range_frozen_noprof(__VA_ARGS__))
+
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
+		acr_flags_t alloc_flags, gfp_t gfp_mask);
+#define alloc_contig_range(...)	\
+	alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
+
+struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
+		gfp_t gfp_mask, int nid, nodemask_t *nodemask);
+#define alloc_contig_frozen_pages(...) \
+	alloc_hooks(alloc_contig_frozen_pages_noprof(__VA_ARGS__))
+
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask);
+#define alloc_contig_pages(...)	\
+	alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
+
+void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages);
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 #endif
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 646a6c2293f9..3db3fe9881ac 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6806,7 +6806,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 	return (ret < 0) ? ret : 0;
 }
 
-static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
+static void split_free_frozen_pages(struct list_head *list, gfp_t gfp_mask)
 {
 	int order;
 
@@ -6818,11 +6818,10 @@ static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
 			int i;
 
 			post_alloc_hook(page, order, gfp_mask);
-			set_page_refcounted(page);
 			if (!order)
 				continue;
 
-			split_page(page, order);
+			__split_page(page, order);
 
 			/* Add all subpages to the order-0 head, in sequence. */
 			list_del(&page->lru);
@@ -6866,28 +6865,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
 	return 0;
 }
 
-/**
- * alloc_contig_range() -- tries to allocate given range of pages
- * @start:	start PFN to allocate
- * @end:	one-past-the-last PFN to allocate
- * @alloc_flags:	allocation information
- * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
- *		action and reclaim modifiers are supported. Reclaim modifiers
- *		control allocation behavior during compaction/migration/reclaim.
- *
- * The PFN range does not have to be pageblock aligned. The PFN range must
- * belong to a single zone.
- *
- * The first thing this routine does is attempt to MIGRATE_ISOLATE all
- * pageblocks in the range.  Once isolated, the pageblocks should not
- * be modified by others.
- *
- * Return: zero on success or negative error code.  On success all
- * pages which PFN is in [start, end) are allocated for the caller and
- * need to be freed with free_contig_range().
- */
-int alloc_contig_range_noprof(unsigned long start, unsigned long end,
-			      acr_flags_t alloc_flags, gfp_t gfp_mask)
+int alloc_contig_range_frozen_noprof(unsigned long start, unsigned long end,
+		acr_flags_t alloc_flags, gfp_t gfp_mask)
 {
 	const unsigned int order = ilog2(end - start);
 	unsigned long outer_start, outer_end;
@@ -7003,19 +6982,18 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 	}
 
 	if (!(gfp_mask & __GFP_COMP)) {
-		split_free_pages(cc.freepages, gfp_mask);
+		split_free_frozen_pages(cc.freepages, gfp_mask);
 
 		/* Free head and tail (if any) */
 		if (start != outer_start)
-			free_contig_range(outer_start, start - outer_start);
+			free_contig_range_frozen(outer_start, start - outer_start);
 		if (end != outer_end)
-			free_contig_range(end, outer_end - end);
+			free_contig_range_frozen(end, outer_end - end);
 	} else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
 		struct page *head = pfn_to_page(start);
 
 		check_new_pages(head, order);
 		prep_new_page(head, order, gfp_mask, 0);
-		set_page_refcounted(head);
 	} else {
 		ret = -EINVAL;
 		WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
@@ -7025,16 +7003,48 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 	undo_isolate_page_range(start, end);
 	return ret;
 }
-EXPORT_SYMBOL(alloc_contig_range_noprof);
 
-static int __alloc_contig_pages(unsigned long start_pfn,
-				unsigned long nr_pages, gfp_t gfp_mask)
+/**
+ * alloc_contig_range() -- tries to allocate given range of pages
+ * @start:	start PFN to allocate
+ * @end:	one-past-the-last PFN to allocate
+ * @alloc_flags:	allocation information
+ * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
+ *		action and reclaim modifiers are supported. Reclaim modifiers
+ *		control allocation behavior during compaction/migration/reclaim.
+ *
+ * The PFN range does not have to be pageblock aligned. The PFN range must
+ * belong to a single zone.
+ *
+ * The first thing this routine does is attempt to MIGRATE_ISOLATE all
+ * pageblocks in the range.  Once isolated, the pageblocks should not
+ * be modified by others.
+ *
+ * Return: zero on success or negative error code.  On success all
+ * pages which PFN is in [start, end) are allocated for the caller and
+ * need to be freed with free_contig_range().
+ */
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
+			      acr_flags_t alloc_flags, gfp_t gfp_mask)
 {
-	unsigned long end_pfn = start_pfn + nr_pages;
+	int ret;
+
+	ret = alloc_contig_range_frozen_noprof(start, end, alloc_flags, gfp_mask);
+	if (ret)
+		return ret;
+
+	if (gfp_mask & __GFP_COMP) {
+		set_page_refcounted(pfn_to_page(start));
+	} else {
+		unsigned long pfn;
+
+		for (pfn = start; pfn < end; pfn++)
+			set_page_refcounted(pfn_to_page(pfn));
+	}
 
-	return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE,
-					 gfp_mask);
+	return 0;
 }
+EXPORT_SYMBOL(alloc_contig_range_noprof);
 
 static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
 				   unsigned long nr_pages)
@@ -7067,31 +7077,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
 	return zone_spans_pfn(zone, last_pfn);
 }
 
-/**
- * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
- * @nr_pages:	Number of contiguous pages to allocate
- * @gfp_mask:	GFP mask. Node/zone/placement hints limit the search; only some
- *		action and reclaim modifiers are supported. Reclaim modifiers
- *		control allocation behavior during compaction/migration/reclaim.
- * @nid:	Target node
- * @nodemask:	Mask for other possible nodes
- *
- * This routine is a wrapper around alloc_contig_range(). It scans over zones
- * on an applicable zonelist to find a contiguous pfn range which can then be
- * tried for allocation with alloc_contig_range(). This routine is intended
- * for allocation requests which can not be fulfilled with the buddy allocator.
- *
- * The allocated memory is always aligned to a page boundary. If nr_pages is a
- * power of two, then allocated range is also guaranteed to be aligned to same
- * nr_pages (e.g. 1GB request would be aligned to 1GB).
- *
- * Allocated pages can be freed with free_contig_range() or by manually calling
- * __free_page() on each allocated page.
- *
- * Return: pointer to contiguous pages on success, or NULL if not successful.
- */
-struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
-				 int nid, nodemask_t *nodemask)
+struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
+		gfp_t gfp_mask, int nid, nodemask_t *nodemask)
 {
 	unsigned long ret, pfn, flags;
 	struct zonelist *zonelist;
@@ -7114,7 +7101,9 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 				 * and cause alloc_contig_range() to fail...
 				 */
 				spin_unlock_irqrestore(&zone->lock, flags);
-				ret = __alloc_contig_pages(pfn, nr_pages,
+				ret = alloc_contig_range_frozen_noprof(pfn,
+							pfn + nr_pages,
+							ACR_FLAGS_NONE,
 							gfp_mask);
 				if (!ret)
 					return pfn_to_page(pfn);
@@ -7126,6 +7115,78 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(alloc_contig_range_frozen_noprof);
+
+void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages)
+{
+	struct folio *folio = pfn_folio(pfn);
+
+	if (folio_test_large(folio)) {
+		int expected = folio_nr_pages(folio);
+
+		WARN_ON(folio_ref_count(folio));
+
+		if (nr_pages == expected)
+			free_frozen_pages(&folio->page, folio_order(folio));
+		else
+			WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
+			     pfn, nr_pages, expected);
+		return;
+	}
+
+	for (; nr_pages--; pfn++) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON(page_ref_count(page));
+		free_frozen_pages(page, 0);
+	}
+}
+EXPORT_SYMBOL(free_contig_range_frozen);
+
+/**
+ * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
+ * @nr_pages:	Number of contiguous pages to allocate
+ * @gfp_mask:	GFP mask. Node/zone/placement hints limit the search; only some
+ *		action and reclaim modifiers are supported. Reclaim modifiers
+ *		control allocation behavior during compaction/migration/reclaim.
+ * @nid:	Target node
+ * @nodemask:	Mask for other possible nodes
+ *
+ * This routine is a wrapper around alloc_contig_range(). It scans over zones
+ * on an applicable zonelist to find a contiguous pfn range which can then be
+ * tried for allocation with alloc_contig_range(). This routine is intended
+ * for allocation requests which can not be fulfilled with the buddy allocator.
+ *
+ * The allocated memory is always aligned to a page boundary. If nr_pages is a
+ * power of two, then allocated range is also guaranteed to be aligned to same
+ * nr_pages (e.g. 1GB request would be aligned to 1GB).
+ *
+ * Allocated pages can be freed with free_contig_range() or by manually calling
+ * __free_page() on each allocated page.
+ *
+ * Return: pointer to contiguous pages on success, or NULL if not successful.
+ */
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask)
+{
+	struct page *page;
+
+	page =  alloc_contig_frozen_pages_noprof(nr_pages, gfp_mask, nid,
+						 nodemask);
+	if (!page)
+		return NULL;
+
+	if (gfp_mask & __GFP_COMP) {
+		set_page_refcounted(page);
+	} else {
+		unsigned long pfn = page_to_pfn(page);
+
+		for (; nr_pages--; pfn++)
+			set_page_refcounted(pfn_to_page(pfn));
+	}
+
+	return page;
+}
 
 void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 {
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 4/6] mm: cma: add __cma_release()
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
                   ` (2 preceding siblings ...)
  2025-10-13 13:38 ` [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
@ 2025-10-13 13:38 ` Kefeng Wang
  2025-10-13 19:48   ` David Hildenbrand
  2025-10-13 13:38 ` [PATCH v3 5/6] mm: cma: add cma_alloc_frozen{_compound}() Kefeng Wang
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

Kill cma_pages_valid() which only used in cma_release(), also
cleanup code duplication between cma pages valid checking and
cma memrange finding, add __cma_release() helper to prepare for
the upcoming frozen page release.

Reviewed-by: Jane Chu <jane.chu@oracle.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/cma.h |  1 -
 mm/cma.c            | 62 +++++++++++++++------------------------------
 2 files changed, 21 insertions(+), 42 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 62d9c1cf6326..e5745d2aec55 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -49,7 +49,6 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
 			      bool no_warn);
-extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
 
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
diff --git a/mm/cma.c b/mm/cma.c
index 813e6dc7b095..88016f4aef7f 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -942,34 +942,43 @@ struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
 	return page ? page_folio(page) : NULL;
 }
 
-bool cma_pages_valid(struct cma *cma, const struct page *pages,
-		     unsigned long count)
+static bool __cma_release(struct cma *cma, const struct page *pages,
+			  unsigned long count)
 {
 	unsigned long pfn, end;
 	int r;
 	struct cma_memrange *cmr;
-	bool ret;
+
+	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
 
 	if (!cma || !pages || count > cma->count)
 		return false;
 
 	pfn = page_to_pfn(pages);
-	ret = false;
 
 	for (r = 0; r < cma->nranges; r++) {
 		cmr = &cma->ranges[r];
 		end = cmr->base_pfn + cmr->count;
 		if (pfn >= cmr->base_pfn && pfn < end) {
-			ret = pfn + count <= end;
-			break;
+			if (pfn + count <= end)
+				break;
+
+			VM_WARN_ON_ONCE(1);
 		}
 	}
 
-	if (!ret)
-		pr_debug("%s(page %p, count %lu)\n",
-				__func__, (void *)pages, count);
+	if (r == cma->nranges) {
+		pr_debug("%s(no cma range match the page %p)\n",
+			 __func__, (void *)pages);
+		return false;
+	}
 
-	return ret;
+	free_contig_range(pfn, count);
+	cma_clear_bitmap(cma, cmr, pfn, count);
+	cma_sysfs_account_release_pages(cma, count);
+	trace_cma_release(cma->name, pfn, pages, count);
+
+	return true;
 }
 
 /**
@@ -985,36 +994,7 @@ bool cma_pages_valid(struct cma *cma, const struct page *pages,
 bool cma_release(struct cma *cma, const struct page *pages,
 		 unsigned long count)
 {
-	struct cma_memrange *cmr;
-	unsigned long pfn, end_pfn;
-	int r;
-
-	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
-
-	if (!cma_pages_valid(cma, pages, count))
-		return false;
-
-	pfn = page_to_pfn(pages);
-	end_pfn = pfn + count;
-
-	for (r = 0; r < cma->nranges; r++) {
-		cmr = &cma->ranges[r];
-		if (pfn >= cmr->base_pfn &&
-		    pfn < (cmr->base_pfn + cmr->count)) {
-			VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count);
-			break;
-		}
-	}
-
-	if (r == cma->nranges)
-		return false;
-
-	free_contig_range(pfn, count);
-	cma_clear_bitmap(cma, cmr, pfn, count);
-	cma_sysfs_account_release_pages(cma, count);
-	trace_cma_release(cma->name, pfn, pages, count);
-
-	return true;
+	return __cma_release(cma, pages, count);
 }
 
 bool cma_free_folio(struct cma *cma, const struct folio *folio)
@@ -1022,7 +1002,7 @@ bool cma_free_folio(struct cma *cma, const struct folio *folio)
 	if (WARN_ON(!folio_test_large(folio)))
 		return false;
 
-	return cma_release(cma, &folio->page, folio_nr_pages(folio));
+	return __cma_release(cma, &folio->page, folio_nr_pages(folio));
 }
 
 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 5/6] mm: cma: add cma_alloc_frozen{_compound}()
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
                   ` (3 preceding siblings ...)
  2025-10-13 13:38 ` [PATCH v3 4/6] mm: cma: add __cma_release() Kefeng Wang
@ 2025-10-13 13:38 ` Kefeng Wang
  2025-10-13 13:38 ` [PATCH v3 6/6] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
  2025-10-16  1:20 ` [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
  6 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

Introduce cma_alloc_frozen{_compound}() helper to alloc pages
without incrementing their refcount, and convert hugetlb cma
to use cma_alloc_frozen_compound, also move cma_validate_zones()
into mm/internal.h since no outside user.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/cma.h | 25 +++++----------------
 mm/cma.c            | 55 +++++++++++++++++++++++++++++----------------
 mm/hugetlb_cma.c    | 22 ++++++++++--------
 mm/internal.h       |  6 +++++
 4 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index e5745d2aec55..4981c151ef84 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -51,29 +51,14 @@ extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int
 			      bool no_warn);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
 
+struct page *cma_alloc_frozen(struct cma *cma, unsigned long count,
+		unsigned int align, bool no_warn);
+bool cma_release_frozen(struct cma *cma, const struct page *pages,
+		unsigned long count);
+
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
 extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end);
 
 extern void cma_reserve_pages_on_error(struct cma *cma);
 
-#ifdef CONFIG_CMA
-struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp);
-bool cma_free_folio(struct cma *cma, const struct folio *folio);
-bool cma_validate_zones(struct cma *cma);
-#else
-static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
-{
-	return NULL;
-}
-
-static inline bool cma_free_folio(struct cma *cma, const struct folio *folio)
-{
-	return false;
-}
-static inline bool cma_validate_zones(struct cma *cma)
-{
-	return false;
-}
-#endif
-
 #endif
diff --git a/mm/cma.c b/mm/cma.c
index 88016f4aef7f..ec64a3a88bba 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -836,7 +836,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
 		spin_unlock_irq(&cma->lock);
 
 		mutex_lock(&cma->alloc_mutex);
-		ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
+		ret = alloc_contig_range_frozen(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
 		mutex_unlock(&cma->alloc_mutex);
 		if (!ret)
 			break;
@@ -856,8 +856,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
 	return ret;
 }
 
-static struct page *__cma_alloc(struct cma *cma, unsigned long count,
-		       unsigned int align, gfp_t gfp)
+static struct page *__cma_alloc_frozen(struct cma *cma,
+		unsigned long count, unsigned int align, gfp_t gfp)
 {
 	struct page *page = NULL;
 	int ret = -ENOMEM, r;
@@ -914,6 +914,21 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
 	return page;
 }
 
+struct page *cma_alloc_frozen(struct cma *cma, unsigned long count,
+		unsigned int align, bool no_warn)
+{
+	gfp_t gfp = GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0);
+
+	return __cma_alloc_frozen(cma, count, align, gfp);
+}
+
+struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int order)
+{
+	gfp_t gfp = GFP_KERNEL | __GFP_COMP | __GFP_NOWARN;
+
+	return __cma_alloc_frozen(cma, 1 << order, order, gfp);
+}
+
 /**
  * cma_alloc() - allocate pages from contiguous area
  * @cma:   Contiguous memory region for which the allocation is performed.
@@ -927,23 +942,23 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
 struct page *cma_alloc(struct cma *cma, unsigned long count,
 		       unsigned int align, bool no_warn)
 {
-	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
-}
-
-struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
-{
+	unsigned long pfn;
 	struct page *page;
 
-	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
+	page = cma_alloc_frozen(cma, count, align, no_warn);
+	if (!page)
 		return NULL;
 
-	page = __cma_alloc(cma, 1 << order, order, gfp);
+	pfn = page_to_pfn(page);
 
-	return page ? page_folio(page) : NULL;
+	for (; count--; pfn++)
+		set_page_refcounted(pfn_to_page(pfn));
+
+	return page;
 }
 
 static bool __cma_release(struct cma *cma, const struct page *pages,
-			  unsigned long count)
+			  unsigned long count, bool frozen)
 {
 	unsigned long pfn, end;
 	int r;
@@ -973,7 +988,11 @@ static bool __cma_release(struct cma *cma, const struct page *pages,
 		return false;
 	}
 
-	free_contig_range(pfn, count);
+	if (frozen)
+		free_contig_range_frozen(pfn, count);
+	else
+		free_contig_range(pfn, count);
+
 	cma_clear_bitmap(cma, cmr, pfn, count);
 	cma_sysfs_account_release_pages(cma, count);
 	trace_cma_release(cma->name, pfn, pages, count);
@@ -994,15 +1013,13 @@ static bool __cma_release(struct cma *cma, const struct page *pages,
 bool cma_release(struct cma *cma, const struct page *pages,
 		 unsigned long count)
 {
-	return __cma_release(cma, pages, count);
+	return __cma_release(cma, pages, count, false);
 }
 
-bool cma_free_folio(struct cma *cma, const struct folio *folio)
+bool cma_release_frozen(struct cma *cma, const struct page *pages,
+		unsigned long count)
 {
-	if (WARN_ON(!folio_test_large(folio)))
-		return false;
-
-	return __cma_release(cma, &folio->page, folio_nr_pages(folio));
+	return __cma_release(cma, pages, count, true);
 }
 
 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index e8e4dc7182d5..fc41f3b949f8 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -22,33 +22,37 @@ void hugetlb_cma_free_folio(struct folio *folio)
 {
 	int nid = folio_nid(folio);
 
-	WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio));
+	WARN_ON_ONCE(!cma_release(hugetlb_cma[nid], &folio->page,
+				  folio_nr_pages(folio)));
 }
 
-
 struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask)
 {
 	int node;
-	struct folio *folio = NULL;
+	struct folio *folio;
+	struct page *page = NULL;
 
 	if (hugetlb_cma[nid])
-		folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask);
+		page = cma_alloc_frozen_compound(hugetlb_cma[nid], order);
 
-	if (!folio && !(gfp_mask & __GFP_THISNODE)) {
+	if (!page && !(gfp_mask & __GFP_THISNODE)) {
 		for_each_node_mask(node, *nodemask) {
 			if (node == nid || !hugetlb_cma[node])
 				continue;
 
-			folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask);
-			if (folio)
+			page = cma_alloc_frozen_compound(hugetlb_cma[nid], order);
+			if (page)
 				break;
 		}
 	}
 
-	if (folio)
-		folio_set_hugetlb_cma(folio);
+	if (!page)
+		return NULL;
 
+	set_page_refcounted(page);
+	folio = page_folio(page);
+	folio_set_hugetlb_cma(folio);
 	return folio;
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 65148cb98b9c..5f4fe54a5ba7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -936,9 +936,15 @@ void init_cma_reserved_pageblock(struct page *page);
 struct cma;
 
 #ifdef CONFIG_CMA
+struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int order);
+bool cma_validate_zones(struct cma *cma);
 void *cma_reserve_early(struct cma *cma, unsigned long size);
 void init_cma_pageblock(struct page *page);
 #else
+static inline bool cma_validate_zones(struct cma *cma)
+{
+	return false;
+}
 static inline void *cma_reserve_early(struct cma *cma, unsigned long size)
 {
 	return NULL;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 6/6] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
                   ` (4 preceding siblings ...)
  2025-10-13 13:38 ` [PATCH v3 5/6] mm: cma: add cma_alloc_frozen{_compound}() Kefeng Wang
@ 2025-10-13 13:38 ` Kefeng Wang
  2025-10-16  1:20 ` [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
  6 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-13 13:38 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Kefeng Wang

The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
with refcount increated and then freeze it, convert to allocate a
frozen folio to remove the atomic operation about folio refcount,
and saving atomic operation during __update_and_free_hugetlb_folio
too. Also rename hugetlb_cma_{alloc,free}_folio() with frozen which
make them more self-explanatory.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/gfp.h | 23 ------------------
 mm/hugetlb.c        | 58 +++++++++++----------------------------------
 mm/hugetlb_cma.c    | 11 ++++-----
 mm/hugetlb_cma.h    | 10 ++++----
 4 files changed, 24 insertions(+), 78 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fbbdd8c88483..82aba162f352 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -453,27 +453,4 @@ void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages);
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 #endif
 
-#ifdef CONFIG_CONTIG_ALLOC
-static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
-							int nid, nodemask_t *node)
-{
-	struct page *page;
-
-	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
-		return NULL;
-
-	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
-
-	return page ? page_folio(page) : NULL;
-}
-#else
-static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
-							int nid, nodemask_t *node)
-{
-	return NULL;
-}
-#endif
-/* This should be paired with folio_put() rather than free_contig_range(). */
-#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
-
 #endif /* __LINUX_GFP_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 85b2dac79d25..528754683dd4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -125,16 +125,6 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, bool take_locks);
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
 
-static void hugetlb_free_folio(struct folio *folio)
-{
-	if (folio_test_hugetlb_cma(folio)) {
-		hugetlb_cma_free_folio(folio);
-		return;
-	}
-
-	folio_put(folio);
-}
-
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
 	if (spool->count)
@@ -1471,46 +1461,24 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 		nr_nodes--)
 
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-#ifdef CONFIG_CONTIG_ALLOC
+#if defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) && defined(CONFIG_CONTIG_ALLOC)
 static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
 	struct folio *folio;
-	bool retried = false;
 
-retry:
-	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
-	if (!folio) {
-		if (hugetlb_cma_exclusive_alloc())
-			return NULL;
-
-		folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask);
-		if (!folio)
-			return NULL;
-	}
-
-	if (folio_ref_freeze(folio, 1))
+	folio = hugetlb_cma_alloc_frozen_folio(order, gfp_mask, nid, nodemask);
+	if (folio)
 		return folio;
 
-	pr_warn("HugeTLB: unexpected refcount on PFN %lu\n", folio_pfn(folio));
-	hugetlb_free_folio(folio);
-	if (!retried) {
-		retried = true;
-		goto retry;
-	}
-	return NULL;
-}
+	if (hugetlb_cma_exclusive_alloc())
+		return NULL;
 
-#else /* !CONFIG_CONTIG_ALLOC */
-static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
-					  nodemask_t *nodemask)
-{
-	return NULL;
+	folio = (struct folio *)alloc_contig_frozen_pages(1 << order, gfp_mask,
+							  nid, nodemask);
+	return folio;
 }
-#endif /* CONFIG_CONTIG_ALLOC */
-
-#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
+#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE || !CONFIG_CONTIG_ALLOC */
 static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
 					  nodemask_t *nodemask)
 {
@@ -1641,9 +1609,11 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
 	if (unlikely(folio_test_hwpoison(folio)))
 		folio_clear_hugetlb_hwpoison(folio);
 
-	folio_ref_unfreeze(folio, 1);
-
-	hugetlb_free_folio(folio);
+	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+	if (folio_test_hugetlb_cma(folio))
+		hugetlb_cma_free_frozen_folio(folio);
+	else
+		free_frozen_pages(&folio->page, folio_order(folio));
 }
 
 /*
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index fc41f3b949f8..af9caaf007e4 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -18,16 +18,16 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
 static bool hugetlb_cma_only;
 static unsigned long hugetlb_cma_size __initdata;
 
-void hugetlb_cma_free_folio(struct folio *folio)
+void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 	int nid = folio_nid(folio);
 
-	WARN_ON_ONCE(!cma_release(hugetlb_cma[nid], &folio->page,
-				  folio_nr_pages(folio)));
+	WARN_ON_ONCE(!cma_release_frozen(hugetlb_cma[nid], &folio->page,
+					 folio_nr_pages(folio)));
 }
 
-struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
-				      int nid, nodemask_t *nodemask)
+struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask)
 {
 	int node;
 	struct folio *folio;
@@ -50,7 +50,6 @@ struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
 	if (!page)
 		return NULL;
 
-	set_page_refcounted(page);
 	folio = page_folio(page);
 	folio_set_hugetlb_cma(folio);
 	return folio;
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index 2c2ec8a7e134..3bc295c8c38e 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -3,8 +3,8 @@
 #define _LINUX_HUGETLB_CMA_H
 
 #ifdef CONFIG_CMA
-void hugetlb_cma_free_folio(struct folio *folio);
-struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
+void hugetlb_cma_free_frozen_folio(struct folio *folio);
+struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask);
 struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
 						    bool node_exact);
@@ -14,12 +14,12 @@ unsigned long hugetlb_cma_total_size(void);
 void hugetlb_cma_validate_params(void);
 bool hugetlb_early_cma(struct hstate *h);
 #else
-static inline void hugetlb_cma_free_folio(struct folio *folio)
+static inline void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 }
 
-static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
-		int nid, nodemask_t *nodemask)
+static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order,
+		gfp_t gfp_mask,	int nid, nodemask_t *nodemask)
 {
 	return NULL;
 }
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 2/6] mm: page_alloc: add __split_page()
  2025-10-13 13:38 ` [PATCH v3 2/6] mm: page_alloc: add __split_page() Kefeng Wang
@ 2025-10-13 19:44   ` David Hildenbrand
  2025-10-14  3:45     ` Kefeng Wang
  0 siblings, 1 reply; 17+ messages in thread
From: David Hildenbrand @ 2025-10-13 19:44 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm

On 13.10.25 15:38, Kefeng Wang wrote:
> Factor out the splitting of non-compound page from make_alloc_exact()
> and split_page() into a new helper function __split_page().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/page_alloc.c | 19 ++++++++++++-------
>   1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 949b01f293d4..646a6c2293f9 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3042,6 +3042,15 @@ void free_unref_folios(struct folio_batch *folios)
>   	folio_batch_reinit(folios);
>   }
>   
> +static void __split_page(struct page *page, unsigned int order)
> +{
> +	VM_BUG_ON_PAGE(PageCompound(page), page);

While at it, make that a WM_WARN_ON_ONCE(). Unfortunately there is no 
_PAGE() variant, maybe there should be one.

With that

Acked-by: David Hildenbrand <david@redhat.com>

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 4/6] mm: cma: add __cma_release()
  2025-10-13 13:38 ` [PATCH v3 4/6] mm: cma: add __cma_release() Kefeng Wang
@ 2025-10-13 19:48   ` David Hildenbrand
  2025-10-14  3:45     ` Kefeng Wang
  0 siblings, 1 reply; 17+ messages in thread
From: David Hildenbrand @ 2025-10-13 19:48 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm

On 13.10.25 15:38, Kefeng Wang wrote:
> Kill cma_pages_valid() which only used in cma_release(), also
> cleanup code duplication between cma pages valid checking and
> cma memrange finding, add __cma_release() helper to prepare for
> the upcoming frozen page release.
> 
> Reviewed-by: Jane Chu <jane.chu@oracle.com>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   include/linux/cma.h |  1 -
>   mm/cma.c            | 62 +++++++++++++++------------------------------
>   2 files changed, 21 insertions(+), 42 deletions(-)
> 
> diff --git a/include/linux/cma.h b/include/linux/cma.h
> index 62d9c1cf6326..e5745d2aec55 100644
> --- a/include/linux/cma.h
> +++ b/include/linux/cma.h
> @@ -49,7 +49,6 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
>   					struct cma **res_cma);
>   extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
>   			      bool no_warn);
> -extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
>   extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
>   
>   extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
> diff --git a/mm/cma.c b/mm/cma.c
> index 813e6dc7b095..88016f4aef7f 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -942,34 +942,43 @@ struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
>   	return page ? page_folio(page) : NULL;
>   }
>   
> -bool cma_pages_valid(struct cma *cma, const struct page *pages,
> -		     unsigned long count)
> +static bool __cma_release(struct cma *cma, const struct page *pages,
> +			  unsigned long count)
>   {
>   	unsigned long pfn, end;
>   	int r;
>   	struct cma_memrange *cmr;
> -	bool ret;
> +
> +	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
>   
>   	if (!cma || !pages || count > cma->count)
>   		return false;
>   
>   	pfn = page_to_pfn(pages);
> -	ret = false;
>   
>   	for (r = 0; r < cma->nranges; r++) {
>   		cmr = &cma->ranges[r];
>   		end = cmr->base_pfn + cmr->count;
>   		if (pfn >= cmr->base_pfn && pfn < end) {
> -			ret = pfn + count <= end;
> -			break;
> +			if (pfn + count <= end)
> +				break;
> +
> +			VM_WARN_ON_ONCE(1);
>   		}
>   	}
>   
> -	if (!ret)
> -		pr_debug("%s(page %p, count %lu)\n",
> -				__func__, (void *)pages, count);
> +	if (r == cma->nranges) {
> +		pr_debug("%s(no cma range match the page %p)\n",
>

".. matches the page range ..." ?

With that

Acked-by: David Hildenbrand <david@redhat.com>

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 2/6] mm: page_alloc: add __split_page()
  2025-10-13 19:44   ` David Hildenbrand
@ 2025-10-14  3:45     ` Kefeng Wang
  0 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-14  3:45 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm



On 2025/10/14 3:44, David Hildenbrand wrote:
> On 13.10.25 15:38, Kefeng Wang wrote:
>> Factor out the splitting of non-compound page from make_alloc_exact()
>> and split_page() into a new helper function __split_page().
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/page_alloc.c | 19 ++++++++++++-------
>>   1 file changed, 12 insertions(+), 7 deletions(-)
>>
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 949b01f293d4..646a6c2293f9 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -3042,6 +3042,15 @@ void free_unref_folios(struct folio_batch *folios)
>>       folio_batch_reinit(folios);
>>   }
>> +static void __split_page(struct page *page, unsigned int order)
>> +{
>> +    VM_BUG_ON_PAGE(PageCompound(page), page);
> 
> While at it, make that a WM_WARN_ON_ONCE(). Unfortunately there is no 
> _PAGE() variant, maybe there should be one.

OK,it's strange, we have VM_WARN_ON_FOLIO, but not _PAGE(), I will add
a new and use it.

> 
> With that
> 
> Acked-by: David Hildenbrand <david@redhat.com>
> 


Thanks


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 4/6] mm: cma: add __cma_release()
  2025-10-13 19:48   ` David Hildenbrand
@ 2025-10-14  3:45     ` Kefeng Wang
  0 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-14  3:45 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm



On 2025/10/14 3:48, David Hildenbrand wrote:
> On 13.10.25 15:38, Kefeng Wang wrote:
>> Kill cma_pages_valid() which only used in cma_release(), also
>> cleanup code duplication between cma pages valid checking and
>> cma memrange finding, add __cma_release() helper to prepare for
>> the upcoming frozen page release.
>>
>> Reviewed-by: Jane Chu <jane.chu@oracle.com>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   include/linux/cma.h |  1 -
>>   mm/cma.c            | 62 +++++++++++++++------------------------------
>>   2 files changed, 21 insertions(+), 42 deletions(-)
>>
>> diff --git a/include/linux/cma.h b/include/linux/cma.h
>> index 62d9c1cf6326..e5745d2aec55 100644
>> --- a/include/linux/cma.h
>> +++ b/include/linux/cma.h
>> @@ -49,7 +49,6 @@ extern int cma_init_reserved_mem(phys_addr_t base, 
>> phys_addr_t size,
>>                       struct cma **res_cma);
>>   extern struct page *cma_alloc(struct cma *cma, unsigned long count, 
>> unsigned int align,
>>                     bool no_warn);
>> -extern bool cma_pages_valid(struct cma *cma, const struct page 
>> *pages, unsigned long count);
>>   extern bool cma_release(struct cma *cma, const struct page *pages, 
>> unsigned long count);
>>   extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), 
>> void *data);
>> diff --git a/mm/cma.c b/mm/cma.c
>> index 813e6dc7b095..88016f4aef7f 100644
>> --- a/mm/cma.c
>> +++ b/mm/cma.c
>> @@ -942,34 +942,43 @@ struct folio *cma_alloc_folio(struct cma *cma, 
>> int order, gfp_t gfp)
>>       return page ? page_folio(page) : NULL;
>>   }
>> -bool cma_pages_valid(struct cma *cma, const struct page *pages,
>> -             unsigned long count)
>> +static bool __cma_release(struct cma *cma, const struct page *pages,
>> +              unsigned long count)
>>   {
>>       unsigned long pfn, end;
>>       int r;
>>       struct cma_memrange *cmr;
>> -    bool ret;
>> +
>> +    pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, 
>> count);
>>       if (!cma || !pages || count > cma->count)
>>           return false;
>>       pfn = page_to_pfn(pages);
>> -    ret = false;
>>       for (r = 0; r < cma->nranges; r++) {
>>           cmr = &cma->ranges[r];
>>           end = cmr->base_pfn + cmr->count;
>>           if (pfn >= cmr->base_pfn && pfn < end) {
>> -            ret = pfn + count <= end;
>> -            break;
>> +            if (pfn + count <= end)
>> +                break;
>> +
>> +            VM_WARN_ON_ONCE(1);
>>           }
>>       }
>> -    if (!ret)
>> -        pr_debug("%s(page %p, count %lu)\n",
>> -                __func__, (void *)pages, count);
>> +    if (r == cma->nranges) {
>> +        pr_debug("%s(no cma range match the page %p)\n",
>>
> 
> ".. matches the page range ..." ?

Yeah,will update, thanks
> 
> With that
> 
> Acked-by: David Hildenbrand <david@redhat.com>
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio
  2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
                   ` (5 preceding siblings ...)
  2025-10-13 13:38 ` [PATCH v3 6/6] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
@ 2025-10-16  1:20 ` Kefeng Wang
  6 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-16  1:20 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm

Hello,

On 2025/10/13 21:38, Kefeng Wang wrote:
> Introduce alloc_contig_frozen_pages() and cma_alloc_frozen_compound()
> which avoid atomic operation about page refcount, and then convert to
> allocate frozen gigantic folio by the new helpers in hugetlb to cleanup
> the alloc_gigantic_folio().
> 
> v3:
> - Fix built warn/err, found by lkp test
> - Address some David's comments,
>    - Force on frozen part and drop the optimization part
>    - Rename split_non_compound_pages() to __split_pages()
>    - Adding back debug print/WARN_ON if no cma range found or the
>      pfn range of page is not full match the cma range.
> 
> v2:
> - Optimize gigantic folio allocation speed
> - Using HPAGE_PUD_ORDER in debug_vm_pgtable
> - Address some David's comments,
>    - kill folio_alloc_gigantic()
>    - add generic cma_alloc_frozen{_compound}() instead of
>      cma_{alloc,free}_folio
> 
> Kefeng Wang (6):
>    mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page()
>    mm: page_alloc: add __split_page()
>    mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
>    mm: cma: add __cma_release()
>    mm: cma: add cma_alloc_frozen{_compound}()
>    mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()


Any comments about patch3/5/6, I will update a new version in this week.

Thanks for all the comments.

> 
>   include/linux/cma.h   |  26 ++----
>   include/linux/gfp.h   |  52 +++++------
>   mm/cma.c              | 109 +++++++++++-----------
>   mm/debug_vm_pgtable.c |  38 ++++----
>   mm/hugetlb.c          |  58 +++---------
>   mm/hugetlb_cma.c      |  27 +++---
>   mm/hugetlb_cma.h      |  10 +--
>   mm/internal.h         |   6 ++
>   mm/page_alloc.c       | 204 ++++++++++++++++++++++++++++--------------
>   9 files changed, 271 insertions(+), 259 deletions(-)
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-10-13 13:38 ` [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
@ 2025-10-16 20:53   ` David Hildenbrand
  2025-10-17  7:19     ` Kefeng Wang
  0 siblings, 1 reply; 17+ messages in thread
From: David Hildenbrand @ 2025-10-16 20:53 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm

On 13.10.25 15:38, Kefeng Wang wrote:
> In order to allocate given range of pages or allocate compound
> pages without incrementing their refcount, adding two new helper
> alloc_contig_{range_frozen,frozen_pages}() which may be beneficial
> to some users (eg hugetlb), also free_contig_range_frozen() is
> provided to match alloc_contig_range_frozen(), but it is better to
> use free_frozen_pages() to free frozen compound pages.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   include/linux/gfp.h |  29 +++++--
>   mm/page_alloc.c     | 183 +++++++++++++++++++++++++++++---------------
>   2 files changed, 143 insertions(+), 69 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 1fefb63e0480..fbbdd8c88483 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -429,14 +429,27 @@ typedef unsigned int __bitwise acr_flags_t;
>   #define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA
>   
>   /* The below functions must be run on a range from a single zone. */
> -extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> -				     acr_flags_t alloc_flags, gfp_t gfp_mask);
> -#define alloc_contig_range(...)			alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
> -
> -extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> -					      int nid, nodemask_t *nodemask);
> -#define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
> -
> +int alloc_contig_range_frozen_noprof(unsigned long start, unsigned long end,
> +		acr_flags_t alloc_flags, gfp_t gfp_mask);

Just wondering: given alloc_contig_pages() vs. alloc_contig_frozen_pages_()

Shouldn't it be alloc_contig_range() vs. alloc_contig_frozen_range()

And then free_contig_frozen_range()?


Do we want kerneldoc here as well?

> +int alloc_contig_range_frozen_noprof(unsigned long start, unsigned long end,
> +		acr_flags_t alloc_flags, gfp_t gfp_mask)
>   {
>   	const unsigned int order = ilog2(end - start);
>   	unsigned long outer_start, outer_end;
> @@ -7003,19 +6982,18 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
>   	}
>   
>   	if (!(gfp_mask & __GFP_COMP)) {
> -		split_free_pages(cc.freepages, gfp_mask);
> +		split_free_frozen_pages(cc.freepages, gfp_mask);
>   
>   		/* Free head and tail (if any) */
>   		if (start != outer_start)
> -			free_contig_range(outer_start, start - outer_start);
> +			free_contig_range_frozen(outer_start, start - outer_start);
>   		if (end != outer_end)
> -			free_contig_range(end, outer_end - end);
> +			free_contig_range_frozen(end, outer_end - end);
>   	} else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
>   		struct page *head = pfn_to_page(start);
>   
>   		check_new_pages(head, order);
>   		prep_new_page(head, order, gfp_mask, 0);
> -		set_page_refcounted(head);
>   	} else {
>   		ret = -EINVAL;
>   		WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
> @@ -7025,16 +7003,48 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
>   	undo_isolate_page_range(start, end);
>   	return ret;
>   }
> -EXPORT_SYMBOL(alloc_contig_range_noprof);
>   
> -static int __alloc_contig_pages(unsigned long start_pfn,
> -				unsigned long nr_pages, gfp_t gfp_mask)
> +/**
> + * alloc_contig_range() -- tries to allocate given range of pages
> + * @start:	start PFN to allocate
> + * @end:	one-past-the-last PFN to allocate
> + * @alloc_flags:	allocation information
> + * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
> + *		action and reclaim modifiers are supported. Reclaim modifiers
> + *		control allocation behavior during compaction/migration/reclaim.
> + *
> + * The PFN range does not have to be pageblock aligned. The PFN range must
> + * belong to a single zone.
> + *
> + * The first thing this routine does is attempt to MIGRATE_ISOLATE all
> + * pageblocks in the range.  Once isolated, the pageblocks should not
> + * be modified by others.
> + *
> + * Return: zero on success or negative error code.  On success all
> + * pages which PFN is in [start, end) are allocated for the caller and
> + * need to be freed with free_contig_range().
> + */
> +int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> +			      acr_flags_t alloc_flags, gfp_t gfp_mask)
>   {
> -	unsigned long end_pfn = start_pfn + nr_pages;
> +	int ret;
> +
> +	ret = alloc_contig_range_frozen_noprof(start, end, alloc_flags, gfp_mask);
> +	if (ret)
> +		return ret;
> +
> +	if (gfp_mask & __GFP_COMP) {
> +		set_page_refcounted(pfn_to_page(start));
> +	} else {
> +		unsigned long pfn;
> +
> +		for (pfn = start; pfn < end; pfn++)
> +			set_page_refcounted(pfn_to_page(pfn));
> +	}

Might read better as

unsigned long pfn;

...

if (gfp_mask & __GFP_COMP) {
	set_page_refcounted(pfn_to_page(start));
	return 0;
}

for (pfn = start; pfn < end; pfn++)
	set_page_refcounted(pfn_to_page(pfn));
return 0;


One could also do something fancy like

unsigned long pfn;
...

for (pfn = start; pfn < end; pfn++) {
	set_page_refcounted(pfn_to_page(pfn));
	if (gfp_mask & __GFP_COMP)
		break;
}
return 0:


>   
> -	return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE,
> -					 gfp_mask);
> +	return 0;
>   }
> +EXPORT_SYMBOL(alloc_contig_range_noprof);
>   
>   static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
>   				   unsigned long nr_pages)
> @@ -7067,31 +7077,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
>   	return zone_spans_pfn(zone, last_pfn);
>   }


... kerneldoc? :)

> +struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
> +		gfp_t gfp_mask, int nid, nodemask_t *nodemask)
>   {
>   	unsigned long ret, pfn, flags;
>   	struct zonelist *zonelist;
> @@ -7114,7 +7101,9 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
>   				 * and cause alloc_contig_range() to fail...
>   				 */
>   				spin_unlock_irqrestore(&zone->lock, flags);
> -				ret = __alloc_contig_pages(pfn, nr_pages,
> +				ret = alloc_contig_range_frozen_noprof(pfn,
> +							pfn + nr_pages,
> +							ACR_FLAGS_NONE,
>   							gfp_mask);
>   				if (!ret)
>   					return pfn_to_page(pfn);
> @@ -7126,6 +7115,78 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
>   	}
>   	return NULL;
>   }
> +EXPORT_SYMBOL(alloc_contig_range_frozen_noprof);
> +

kerneldoc? :)

> +void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages)
> +{
> +	struct folio *folio = pfn_folio(pfn);
> +
> +	if (folio_test_large(folio)) {
> +		int expected = folio_nr_pages(folio);
> +
> +		WARN_ON(folio_ref_count(folio));
> +
> +		if (nr_pages == expected)
> +			free_frozen_pages(&folio->page, folio_order(folio));
> +		else
> +			WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
> +			     pfn, nr_pages, expected);
> +		return;
> +	}
> +
> +	for (; nr_pages--; pfn++) {
> +		struct page *page = pfn_to_page(pfn);
> +
> +		WARN_ON(page_ref_count(page));
> +		free_frozen_pages(page, 0);
> +	}

That's mostly a copy-and-paste of free_contig_range().

I wonder if there is some way to avoid duplicating a lot of
free_contig_range() here. Hmmm.

Also, the folio stuff in there looks a bit weird I'm afraid.

Can't we just refuse to free compound pages throught this interface and
free_contig_range() ? IIRC only hugetlb uses it and uses folio_put() either way?

Then we can just document that compound allocations are to be freed differently.

And do something like

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 600d9e981c23d..776b4addc3685 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7123,29 +7123,25 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
  }
  #endif /* CONFIG_CONTIG_ALLOC */
  
-void free_contig_range(unsigned long pfn, unsigned long nr_pages)
+static inline void __free_contig_range(unsigned long pfn, unsigned long nr_pages, bool put_ref)
  {
-       unsigned long count = 0;
-       struct folio *folio = pfn_folio(pfn);
-
-       if (folio_test_large(folio)) {
-               int expected = folio_nr_pages(folio);
-
-               if (nr_pages == expected)
-                       folio_put(folio);
-               else
-                       WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
-                            pfn, nr_pages, expected);
-               return;
-       }
+       if (WARN_ON_ONCE(PageHead(pfn_to_page(pfn))))
+               break;
  
         for (; nr_pages--; pfn++) {
                 struct page *page = pfn_to_page(pfn);
  
-               count += page_count(page) != 1;
-               __free_page(page);
+               if (put_ref)
+                       page_ref_dec(page);
+               if (WARN_ON_ONCE(page_count(page)))
+                       continue;
+               free_frozen_pages(page, 0);
         }
-       WARN(count != 0, "%lu pages are still in use!\n", count);
+}
+
+void free_contig_range(unsigned long pfn, unsigned long nr_pages)
+{
+       return __free_contig_range(pfn, nr_pages, /* put_ref= */ true);
  }
  EXPORT_SYMBOL(free_contig_range);
  

Just a thought, I dislike current free_contig_range() and the duplicated
variant.

> +}
> +EXPORT_SYMBOL(free_contig_range_frozen);
> +
> +/**
> + * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
> + * @nr_pages:	Number of contiguous pages to allocate
> + * @gfp_mask:	GFP mask. Node/zone/placement hints limit the search; only some
> + *		action and reclaim modifiers are supported. Reclaim modifiers
> + *		control allocation behavior during compaction/migration/reclaim.
> + * @nid:	Target node
> + * @nodemask:	Mask for other possible nodes
> + *
> + * This routine is a wrapper around alloc_contig_range(). It scans over zones
> + * on an applicable zonelist to find a contiguous pfn range which can then be
> + * tried for allocation with alloc_contig_range(). This routine is intended
> + * for allocation requests which can not be fulfilled with the buddy allocator.
> + *
> + * The allocated memory is always aligned to a page boundary. If nr_pages is a
> + * power of two, then allocated range is also guaranteed to be aligned to same
> + * nr_pages (e.g. 1GB request would be aligned to 1GB).
> + *
> + * Allocated pages can be freed with free_contig_range() or by manually calling
> + * __free_page() on each allocated page.
> + *
> + * Return: pointer to contiguous pages on success, or NULL if not successful.
> + */
> +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> +		int nid, nodemask_t *nodemask)
> +{
> +	struct page *page;
> +
> +	page =  alloc_contig_frozen_pages_noprof(nr_pages, gfp_mask, nid,
> +						 nodemask);
> +	if (!page)
> +		return NULL;
> +
> +	if (gfp_mask & __GFP_COMP) {
> +		set_page_refcounted(page);
> +	} else {
> +		unsigned long pfn = page_to_pfn(page);
> +
> +		for (; nr_pages--; pfn++)
> +			set_page_refcounted(pfn_to_page(pfn));
> +	}
> +
> +	return page;

Same here, might be able to make it easier to read like I suggested for the
alloc_contig_range_noprof().

Or that part can just be factored out?

void set_pages_refcounted(struct page *page, unsigned long nr_pages, gfp_t gfp_mask)

or better

void set_pages_refcounted(struct page *page, unsigned long nr_pages)

And deriving __GFP_COMP from PageHead().

-- 
Cheers

David / dhildenb



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-10-16 20:53   ` David Hildenbrand
@ 2025-10-17  7:19     ` Kefeng Wang
  2025-10-20 13:07       ` David Hildenbrand
  0 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2025-10-17  7:19 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm



On 2025/10/17 4:53, David Hildenbrand wrote:
> On 13.10.25 15:38, Kefeng Wang wrote:
>> In order to allocate given range of pages or allocate compound
>> pages without incrementing their refcount, adding two new helper
>> alloc_contig_{range_frozen,frozen_pages}() which may be beneficial
>> to some users (eg hugetlb), also free_contig_range_frozen() is
>> provided to match alloc_contig_range_frozen(), but it is better to
>> use free_frozen_pages() to free frozen compound pages.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   include/linux/gfp.h |  29 +++++--
>>   mm/page_alloc.c     | 183 +++++++++++++++++++++++++++++---------------
>>   2 files changed, 143 insertions(+), 69 deletions(-)
>>
>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>> index 1fefb63e0480..fbbdd8c88483 100644
>> --- a/include/linux/gfp.h
>> +++ b/include/linux/gfp.h
>> @@ -429,14 +429,27 @@ typedef unsigned int __bitwise acr_flags_t;
>>   #define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA
>>   /* The below functions must be run on a range from a single zone. */
>> -extern int alloc_contig_range_noprof(unsigned long start, unsigned 
>> long end,
>> -                     acr_flags_t alloc_flags, gfp_t gfp_mask);
>> -#define alloc_contig_range(...)            
>> alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
>> -
>> -extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, 
>> gfp_t gfp_mask,
>> -                          int nid, nodemask_t *nodemask);
>> -#define alloc_contig_pages(...)            
>> alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
>> -
>> +int alloc_contig_range_frozen_noprof(unsigned long start, unsigned 
>> long end,
>> +        acr_flags_t alloc_flags, gfp_t gfp_mask);
> 
> Just wondering: given alloc_contig_pages() vs. alloc_contig_frozen_pages_()
> 
> Shouldn't it be alloc_contig_range() vs. alloc_contig_frozen_range()
> 
> And then free_contig_frozen_range()?
> 

Sure, it's better to align them, not thought much about the naming.

> 
> Do we want kerneldoc here as well?

OK, will add a short kerneldoc for this one.

> 
>> +int alloc_contig_range_frozen_noprof(unsigned long start, unsigned 
>> long end,
>> +        acr_flags_t alloc_flags, gfp_t gfp_mask)
>>   {
>>       const unsigned int order = ilog2(end - start);
>>       unsigned long outer_start, outer_end;
>> @@ -7003,19 +6982,18 @@ int alloc_contig_range_noprof(unsigned long 
>> start, unsigned long end,
>>       }
>>       if (!(gfp_mask & __GFP_COMP)) {
>> -        split_free_pages(cc.freepages, gfp_mask);
>> +        split_free_frozen_pages(cc.freepages, gfp_mask);
>>           /* Free head and tail (if any) */
>>           if (start != outer_start)
>> -            free_contig_range(outer_start, start - outer_start);
>> +            free_contig_range_frozen(outer_start, start - outer_start);
>>           if (end != outer_end)
>> -            free_contig_range(end, outer_end - end);
>> +            free_contig_range_frozen(end, outer_end - end);
>>       } else if (start == outer_start && end == outer_end && 
>> is_power_of_2(end - start)) {
>>           struct page *head = pfn_to_page(start);
>>           check_new_pages(head, order);
>>           prep_new_page(head, order, gfp_mask, 0);
>> -        set_page_refcounted(head);
>>       } else {
>>           ret = -EINVAL;
>>           WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, 
>> %lu)\n",
>> @@ -7025,16 +7003,48 @@ int alloc_contig_range_noprof(unsigned long 
>> start, unsigned long end,
>>       undo_isolate_page_range(start, end);
>>       return ret;
>>   }
>> -EXPORT_SYMBOL(alloc_contig_range_noprof);
>> -static int __alloc_contig_pages(unsigned long start_pfn,
>> -                unsigned long nr_pages, gfp_t gfp_mask)
>> +/**
>> + * alloc_contig_range() -- tries to allocate given range of pages
>> + * @start:    start PFN to allocate
>> + * @end:    one-past-the-last PFN to allocate
>> + * @alloc_flags:    allocation information
>> + * @gfp_mask:    GFP mask. Node/zone/placement hints are ignored; 
>> only some
>> + *        action and reclaim modifiers are supported. Reclaim modifiers
>> + *        control allocation behavior during compaction/migration/ 
>> reclaim.
>> + *
>> + * The PFN range does not have to be pageblock aligned. The PFN range 
>> must
>> + * belong to a single zone.
>> + *
>> + * The first thing this routine does is attempt to MIGRATE_ISOLATE all
>> + * pageblocks in the range.  Once isolated, the pageblocks should not
>> + * be modified by others.
>> + *
>> + * Return: zero on success or negative error code.  On success all
>> + * pages which PFN is in [start, end) are allocated for the caller and
>> + * need to be freed with free_contig_range().
>> + */
>> +int alloc_contig_range_noprof(unsigned long start, unsigned long end,
>> +                  acr_flags_t alloc_flags, gfp_t gfp_mask)
>>   {
>> -    unsigned long end_pfn = start_pfn + nr_pages;
>> +    int ret;
>> +
>> +    ret = alloc_contig_range_frozen_noprof(start, end, alloc_flags, 
>> gfp_mask);
>> +    if (ret)
>> +        return ret;
>> +
>> +    if (gfp_mask & __GFP_COMP) {
>> +        set_page_refcounted(pfn_to_page(start));
>> +    } else {
>> +        unsigned long pfn;
>> +
>> +        for (pfn = start; pfn < end; pfn++)
>> +            set_page_refcounted(pfn_to_page(pfn));
>> +    }
> 
> Might read better as
> 
> unsigned long pfn;
> 
> ...
> 
> if (gfp_mask & __GFP_COMP) {
>      set_page_refcounted(pfn_to_page(start));
>      return 0;
> }
> 
> for (pfn = start; pfn < end; pfn++)
>      set_page_refcounted(pfn_to_page(pfn));
> return 0;
> 
> 

I like this one, avoid gfp_mask check in the loop.

> One could also do something fancy like
> 
> unsigned long pfn;
> ...
> 
> for (pfn = start; pfn < end; pfn++) {
>      set_page_refcounted(pfn_to_page(pfn));
>      if (gfp_mask & __GFP_COMP)
>          break;
> }
> return 0:
> 
> 
>> -    return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE,
>> -                     gfp_mask);
>> +    return 0;
>>   }
>> +EXPORT_SYMBOL(alloc_contig_range_noprof);
>>   static bool pfn_range_valid_contig(struct zone *z, unsigned long 
>> start_pfn,
>>                      unsigned long nr_pages)
>> @@ -7067,31 +7077,8 @@ static bool zone_spans_last_pfn(const struct 
>> zone *zone,
>>       return zone_spans_pfn(zone, last_pfn);
>>   }
> 
> 
> ... kerneldoc? :)

Sure.

> 
>> +struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
>> +        gfp_t gfp_mask, int nid, nodemask_t *nodemask)
>>   {
>>       unsigned long ret, pfn, flags;
>>       struct zonelist *zonelist;
>> @@ -7114,7 +7101,9 @@ struct page *alloc_contig_pages_noprof(unsigned 
>> long nr_pages, gfp_t gfp_mask,
>>                    * and cause alloc_contig_range() to fail...
>>                    */
>>                   spin_unlock_irqrestore(&zone->lock, flags);
>> -                ret = __alloc_contig_pages(pfn, nr_pages,
>> +                ret = alloc_contig_range_frozen_noprof(pfn,
>> +                            pfn + nr_pages,
>> +                            ACR_FLAGS_NONE,
>>                               gfp_mask);
>>                   if (!ret)
>>                       return pfn_to_page(pfn);
>> @@ -7126,6 +7115,78 @@ struct page *alloc_contig_pages_noprof(unsigned 
>> long nr_pages, gfp_t gfp_mask,
>>       }
>>       return NULL;
>>   }
>> +EXPORT_SYMBOL(alloc_contig_range_frozen_noprof);
>> +
> 
> kerneldoc? :)
> 
>> +void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages)
>> +{
>> +    struct folio *folio = pfn_folio(pfn);
>> +
>> +    if (folio_test_large(folio)) {
>> +        int expected = folio_nr_pages(folio);
>> +
>> +        WARN_ON(folio_ref_count(folio));
>> +
>> +        if (nr_pages == expected)
>> +            free_frozen_pages(&folio->page, folio_order(folio));
>> +        else
>> +            WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
>> +                 pfn, nr_pages, expected);
>> +        return;
>> +    }
>> +
>> +    for (; nr_pages--; pfn++) {
>> +        struct page *page = pfn_to_page(pfn);
>> +
>> +        WARN_ON(page_ref_count(page));
>> +        free_frozen_pages(page, 0);
>> +    }
> 
> That's mostly a copy-and-paste of free_contig_range().
> 
> I wonder if there is some way to avoid duplicating a lot of
> free_contig_range() here. Hmmm.
> 
> Also, the folio stuff in there looks a bit weird I'm afraid.
> 
> Can't we just refuse to free compound pages throught this interface and
> free_contig_range() ? IIRC only hugetlb uses it and uses folio_put() 
> either way?
> 
> Then we can just document that compound allocations are to be freed 
> differently.


There is a case for cma_free_folio, which calls free_contig_range for 
both in cma_release(), but I will try to check whether we could avoid
the folio stuff in free_contig_range().

> 
> And do something like
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 600d9e981c23d..776b4addc3685 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -7123,29 +7123,25 @@ struct page *alloc_contig_pages_noprof(unsigned 
> long nr_pages, gfp_t gfp_mask,
>   }
>   #endif /* CONFIG_CONTIG_ALLOC */
> 
> -void free_contig_range(unsigned long pfn, unsigned long nr_pages)
> +static inline void __free_contig_range(unsigned long pfn, unsigned long 
> nr_pages, bool put_ref)
>   {
> -       unsigned long count = 0;
> -       struct folio *folio = pfn_folio(pfn);
> -
> -       if (folio_test_large(folio)) {
> -               int expected = folio_nr_pages(folio);
> -
> -               if (nr_pages == expected)
> -                       folio_put(folio);
> -               else
> -                       WARN(true, "PFN %lu: nr_pages %lu != expected 
> %d\n",
> -                            pfn, nr_pages, expected);
> -               return;
> -       }
> +       if (WARN_ON_ONCE(PageHead(pfn_to_page(pfn))))
> +               break;
> 
>          for (; nr_pages--; pfn++) {
>                  struct page *page = pfn_to_page(pfn);
> 
> -               count += page_count(page) != 1;
> -               __free_page(page);
> +               if (put_ref)
> +                       page_ref_dec(page);
> +               if (WARN_ON_ONCE(page_count(page)))
> +                       continue;
> +               free_frozen_pages(page, 0);
>          }
> -       WARN(count != 0, "%lu pages are still in use!\n", count);
> +}
> +
> +void free_contig_range(unsigned long pfn, unsigned long nr_pages)
> +{
> +       return __free_contig_range(pfn, nr_pages, /* put_ref= */ true);
>   }
>   EXPORT_SYMBOL(free_contig_range);
> 
> 
> Just a thought, I dislike current free_contig_range() and the duplicated
> variant.
> 
>> +}
>> +EXPORT_SYMBOL(free_contig_range_frozen);
>> +
>> +/**
>> + * alloc_contig_pages() -- tries to find and allocate contiguous 
>> range of pages
>> + * @nr_pages:    Number of contiguous pages to allocate
>> + * @gfp_mask:    GFP mask. Node/zone/placement hints limit the 
>> search; only some
>> + *        action and reclaim modifiers are supported. Reclaim modifiers
>> + *        control allocation behavior during compaction/migration/ 
>> reclaim.
>> + * @nid:    Target node
>> + * @nodemask:    Mask for other possible nodes
>> + *
>> + * This routine is a wrapper around alloc_contig_range(). It scans 
>> over zones
>> + * on an applicable zonelist to find a contiguous pfn range which can 
>> then be
>> + * tried for allocation with alloc_contig_range(). This routine is 
>> intended
>> + * for allocation requests which can not be fulfilled with the buddy 
>> allocator.
>> + *
>> + * The allocated memory is always aligned to a page boundary. If 
>> nr_pages is a
>> + * power of two, then allocated range is also guaranteed to be 
>> aligned to same
>> + * nr_pages (e.g. 1GB request would be aligned to 1GB).
>> + *
>> + * Allocated pages can be freed with free_contig_range() or by 
>> manually calling
>> + * __free_page() on each allocated page.
>> + *
>> + * Return: pointer to contiguous pages on success, or NULL if not 
>> successful.
>> + */
>> +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t 
>> gfp_mask,
>> +        int nid, nodemask_t *nodemask)
>> +{
>> +    struct page *page;
>> +
>> +    page =  alloc_contig_frozen_pages_noprof(nr_pages, gfp_mask, nid,
>> +                         nodemask);
>> +    if (!page)
>> +        return NULL;
>> +
>> +    if (gfp_mask & __GFP_COMP) {
>> +        set_page_refcounted(page);
>> +    } else {
>> +        unsigned long pfn = page_to_pfn(page);
>> +
>> +        for (; nr_pages--; pfn++)
>> +            set_page_refcounted(pfn_to_page(pfn));
>> +    }
>> +
>> +    return page;
> 
> Same here, might be able to make it easier to read like I suggested for the
> alloc_contig_range_noprof().
> 
> Or that part can just be factored out?
> 
> void set_pages_refcounted(struct page *page, unsigned long nr_pages, 
> gfp_t gfp_mask)
> 
> or better
> 
> void set_pages_refcounted(struct page *page, unsigned long nr_pages)
> 
> And deriving __GFP_COMP from PageHead().
> 

Will try to fact it out.

Thanks.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-10-17  7:19     ` Kefeng Wang
@ 2025-10-20 13:07       ` David Hildenbrand
  2025-10-20 15:21         ` Kefeng Wang
  0 siblings, 1 reply; 17+ messages in thread
From: David Hildenbrand @ 2025-10-20 13:07 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm


>>
>>> +void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages)
>>> +{
>>> +    struct folio *folio = pfn_folio(pfn);
>>> +
>>> +    if (folio_test_large(folio)) {
>>> +        int expected = folio_nr_pages(folio);
>>> +
>>> +        WARN_ON(folio_ref_count(folio));
>>> +
>>> +        if (nr_pages == expected)
>>> +            free_frozen_pages(&folio->page, folio_order(folio));
>>> +        else
>>> +            WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
>>> +                 pfn, nr_pages, expected);
>>> +        return;
>>> +    }
>>> +
>>> +    for (; nr_pages--; pfn++) {
>>> +        struct page *page = pfn_to_page(pfn);
>>> +
>>> +        WARN_ON(page_ref_count(page));
>>> +        free_frozen_pages(page, 0);
>>> +    }
>>
>> That's mostly a copy-and-paste of free_contig_range().
>>
>> I wonder if there is some way to avoid duplicating a lot of
>> free_contig_range() here. Hmmm.
>>
>> Also, the folio stuff in there looks a bit weird I'm afraid.
>>
>> Can't we just refuse to free compound pages throught this interface and
>> free_contig_range() ? IIRC only hugetlb uses it and uses folio_put()
>> either way?
>>
>> Then we can just document that compound allocations are to be freed
>> differently.
> 
> 
> There is a case for cma_free_folio, which calls free_contig_range for
> both in cma_release(), but I will try to check whether we could avoid
> the folio stuff in free_contig_range().


Ah, right, there is hugetlb_cma_free_folio()->cma_free_folio().

And we need that, because we have to make sure that CMA stats are 
updated properly.

All compound page handling in the freeing path is just nasty and not 
particularly future-proof regarding memdescs.

I wonder if we could just teach alloc_contig to never hand out compound 
pages and then let the freeing path similarly assert that there are no 
compound pages.

Whoever wants a compound page (currently only hugetlb?) can create that 
from a frozen range. Before returning the frozen range the compound page 
can be dissolved. That way also any memdesc can be allocated/freed by 
the caller later.

The only nasty thing is the handing of splitting/merging of 
set_page_owner/page_table_check_alloc etc. :(



As an alternative, we could only allow compound pages for frozen pages. 
This way, we'd force any caller to handle the allocation/freeing of the 
memdesc in the future manually.

Essentially, only allow GFP_COMPOUND on the frozen interface, which we 
would convert hugetlb to.

That means that we can simplify free_contig_range() [no need to handle 
compound pages]. For free_contig_frozen_range() we would skip refcount 
checks on that level and do something like:

void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages)
{
	struct page *first_page = pfn_to_page(pfn)
	const unsigned int order = ilog2(nr_pages);

	if (PageHead(first_page)) {
		WARN_ON_ONCE(order != compound_order(first_page));
		free_frozen_pages(first_page, order);
		return;
	}

	for (; nr_pages--; pfn++)
		free_frozen_pages(pfn_to_page(pfn), 0);
}

CCing Willy, I don't know yet what will be better in the future. But the 
folio stuff in there screams for problems.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-10-20 13:07       ` David Hildenbrand
@ 2025-10-20 15:21         ` Kefeng Wang
  2025-10-23 12:06           ` Kefeng Wang
  0 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2025-10-20 15:21 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Matthew Wilcox

+ Matthew

On 2025/10/20 21:07, David Hildenbrand wrote:
> 
>>>
>>>> +void free_contig_range_frozen(unsigned long pfn, unsigned long 
>>>> nr_pages)
>>>> +{
>>>> +    struct folio *folio = pfn_folio(pfn);
>>>> +
>>>> +    if (folio_test_large(folio)) {
>>>> +        int expected = folio_nr_pages(folio);
>>>> +
>>>> +        WARN_ON(folio_ref_count(folio));
>>>> +
>>>> +        if (nr_pages == expected)
>>>> +            free_frozen_pages(&folio->page, folio_order(folio));
>>>> +        else
>>>> +            WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
>>>> +                 pfn, nr_pages, expected);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    for (; nr_pages--; pfn++) {
>>>> +        struct page *page = pfn_to_page(pfn);
>>>> +
>>>> +        WARN_ON(page_ref_count(page));
>>>> +        free_frozen_pages(page, 0);
>>>> +    }
>>>
>>> That's mostly a copy-and-paste of free_contig_range().
>>>
>>> I wonder if there is some way to avoid duplicating a lot of
>>> free_contig_range() here. Hmmm.
>>>
>>> Also, the folio stuff in there looks a bit weird I'm afraid.
>>>
>>> Can't we just refuse to free compound pages throught this interface and
>>> free_contig_range() ? IIRC only hugetlb uses it and uses folio_put()
>>> either way?
>>>
>>> Then we can just document that compound allocations are to be freed
>>> differently.
>>
>>
>> There is a case for cma_free_folio, which calls free_contig_range for
>> both in cma_release(), but I will try to check whether we could avoid
>> the folio stuff in free_contig_range().
> 
> 
> Ah, right, there is hugetlb_cma_free_folio()->cma_free_folio().
> 
> And we need that, because we have to make sure that CMA stats are 
> updated properly.
> 
> All compound page handling in the freeing path is just nasty and not 
> particularly future-proof regarding memdescs.
> 
> I wonder if we could just teach alloc_contig to never hand out compound 
> pages and then let the freeing path similarly assert that there are no 
> compound pages.
> 
> Whoever wants a compound page (currently only hugetlb?) can create that 
> from a frozen range. Before returning the frozen range the compound page 
> can be dissolved. That way also any memdesc can be allocated/freed by 
> the caller later.
> 
> The only nasty thing is the handing of splitting/merging of 
> set_page_owner/page_table_check_alloc etc. :(
> 
> 
> 
> As an alternative, we could only allow compound pages for frozen pages. 
> This way, we'd force any caller to handle the allocation/freeing of the 
> memdesc in the future manually.
> 
> Essentially, only allow GFP_COMPOUND on the frozen interface, which we 
> would convert hugetlb to.
> 
> That means that we can simplify free_contig_range() [no need to handle 
> compound pages]. For free_contig_frozen_range() we would skip refcount 
> checks on that level and do something like:
> 
> void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages)
> {
>      struct page *first_page = pfn_to_page(pfn)
>      const unsigned int order = ilog2(nr_pages);
> 
>      if (PageHead(first_page)) {
>          WARN_ON_ONCE(order != compound_order(first_page));
>          free_frozen_pages(first_page, order);
>          return;
>      }
> 
>      for (; nr_pages--; pfn++)
>          free_frozen_pages(pfn_to_page(pfn), 0);
> }
> 
> CCing Willy, I don't know yet what will be better in the future. But the 
> folio stuff in there screams for problems.
> 

Sorry forget to add cc in v3, the full link[1],

[1] 
https://lore.kernel.org/linux-mm/20251013133854.2466530-1-wangkefeng.wang@huawei.com/ 





^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-10-20 15:21         ` Kefeng Wang
@ 2025-10-23 12:06           ` Kefeng Wang
  0 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2025-10-23 12:06 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song
  Cc: sidhartha.kumar, jane.chu, Zi Yan, Vlastimil Babka,
	Brendan Jackman, Johannes Weiner, linux-mm, Matthew Wilcox



On 2025/10/20 23:21, Kefeng Wang wrote:
> + Matthew
> 
> On 2025/10/20 21:07, David Hildenbrand wrote:
>>
>>>>
>>>>> +void free_contig_range_frozen(unsigned long pfn, unsigned long 
>>>>> nr_pages)
>>>>> +{
>>>>> +    struct folio *folio = pfn_folio(pfn);
>>>>> +
>>>>> +    if (folio_test_large(folio)) {
>>>>> +        int expected = folio_nr_pages(folio);
>>>>> +
>>>>> +        WARN_ON(folio_ref_count(folio));
>>>>> +
>>>>> +        if (nr_pages == expected)
>>>>> +            free_frozen_pages(&folio->page, folio_order(folio));
>>>>> +        else
>>>>> +            WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
>>>>> +                 pfn, nr_pages, expected);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    for (; nr_pages--; pfn++) {
>>>>> +        struct page *page = pfn_to_page(pfn);
>>>>> +
>>>>> +        WARN_ON(page_ref_count(page));
>>>>> +        free_frozen_pages(page, 0);
>>>>> +    }
>>>>
>>>> That's mostly a copy-and-paste of free_contig_range().
>>>>
>>>> I wonder if there is some way to avoid duplicating a lot of
>>>> free_contig_range() here. Hmmm.
>>>>
>>>> Also, the folio stuff in there looks a bit weird I'm afraid.
>>>>
>>>> Can't we just refuse to free compound pages throught this interface and
>>>> free_contig_range() ? IIRC only hugetlb uses it and uses folio_put()
>>>> either way?
>>>>
>>>> Then we can just document that compound allocations are to be freed
>>>> differently.
>>>
>>>
>>> There is a case for cma_free_folio, which calls free_contig_range for
>>> both in cma_release(), but I will try to check whether we could avoid
>>> the folio stuff in free_contig_range().
>>
>>
>> Ah, right, there is hugetlb_cma_free_folio()->cma_free_folio().
>>
>> And we need that, because we have to make sure that CMA stats are 
>> updated properly.
>>
>> All compound page handling in the freeing path is just nasty and not 
>> particularly future-proof regarding memdescs.
>>
>> I wonder if we could just teach alloc_contig to never hand out 
>> compound pages and then let the freeing path similarly assert that 
>> there are no compound pages.
>>
>> Whoever wants a compound page (currently only hugetlb?) can create 
>> that from a frozen range. Before returning the frozen range the 
>> compound page can be dissolved. That way also any memdesc can be 
>> allocated/freed by the caller later.
>>
>> The only nasty thing is the handing of splitting/merging of 
>> set_page_owner/page_table_check_alloc etc. :(
>>
>>
>>
>> As an alternative, we could only allow compound pages for frozen 
>> pages. This way, we'd force any caller to handle the allocation/ 
>> freeing of the memdesc in the future manually.
>>
>> Essentially, only allow GFP_COMPOUND on the frozen interface, which we 
>> would convert hugetlb to.
>>
>> That means that we can simplify free_contig_range() [no need to handle 
>> compound pages]. For free_contig_frozen_range() we would skip refcount 
>> checks on that level and do something like:


I tried to only allocate/free non-compound pages in
alloc_contig_{range,pages}() and free_contig_range(),

The new added alloc_contig_frozen_{range,pages} can allocate compound/
non-compound frozen pages, let's discuss in the new version[1].

[1] 
https://lore.kernel.org/linux-mm/20251023115940.3573158-1-wangkefeng.wang@huawei.com/

>>
>> void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages)
>> {
>>      struct page *first_page = pfn_to_page(pfn)
>>      const unsigned int order = ilog2(nr_pages);
>>
>>      if (PageHead(first_page)) {
>>          WARN_ON_ONCE(order != compound_order(first_page));
>>          free_frozen_pages(first_page, order);
>>          return;
>>      }
>>
>>      for (; nr_pages--; pfn++)
>>          free_frozen_pages(pfn_to_page(pfn), 0);
>> }
>>
>> CCing Willy, I don't know yet what will be better in the future. But 
>> the folio stuff in there screams for problems.
>>
> 
> Sorry forget to add cc in v3, the full link[1],
> 
> [1] https://lore.kernel.org/linux-mm/20251013133854.2466530-1- 
> wangkefeng.wang@huawei.com/
> 
> 
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2025-10-23 12:06 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-13 13:38 [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
2025-10-13 13:38 ` [PATCH v3 1/6] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
2025-10-13 13:38 ` [PATCH v3 2/6] mm: page_alloc: add __split_page() Kefeng Wang
2025-10-13 19:44   ` David Hildenbrand
2025-10-14  3:45     ` Kefeng Wang
2025-10-13 13:38 ` [PATCH v3 3/6] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
2025-10-16 20:53   ` David Hildenbrand
2025-10-17  7:19     ` Kefeng Wang
2025-10-20 13:07       ` David Hildenbrand
2025-10-20 15:21         ` Kefeng Wang
2025-10-23 12:06           ` Kefeng Wang
2025-10-13 13:38 ` [PATCH v3 4/6] mm: cma: add __cma_release() Kefeng Wang
2025-10-13 19:48   ` David Hildenbrand
2025-10-14  3:45     ` Kefeng Wang
2025-10-13 13:38 ` [PATCH v3 5/6] mm: cma: add cma_alloc_frozen{_compound}() Kefeng Wang
2025-10-13 13:38 ` [PATCH v3 6/6] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
2025-10-16  1:20 ` [PATCH v3 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).