[PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio
@ 2025-08-02  7:31 Kefeng Wang
  2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
                   ` (6 more replies)
  0 siblings, 7 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

The first six patches cleanup some hugetlb folio allocation and the
last patch convert to allocate frozen folio in gigantic folio, similar
to alloc_buddy_hugetlb_folio().

Kefeng Wang (7):
  mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  mm: hugetlb: convert to prep_account_new_hugetlb_folio()
  mm; hugetlb: simpify alloc_buddy_hugetlb_folio()
  mm: hugetlb: directly pass order when allocate a hugetlb folio
  mm: hugetlb: remove struct hstate from init_new_hugetlb_folio()
  mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio()
  mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()

 include/linux/cma.h |  20 -----
 include/linux/gfp.h |  23 ------
 mm/cma.c            |   4 +-
 mm/hugetlb.c        | 182 +++++++++++++++++---------------------------
 mm/hugetlb_cma.c    |  13 ++--
 mm/hugetlb_cma.h    |  10 +--
 mm/internal.h       |  37 +++++++++
 mm/page_alloc.c     |   8 +-
 8 files changed, 124 insertions(+), 173 deletions(-)

-- 
2.27.0



^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-04 15:41   ` Sidhartha Kumar
                     ` (2 more replies)
  2025-08-02  7:31 ` [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio() Kefeng Wang
                   ` (5 subsequent siblings)
  6 siblings, 3 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

Now alloc_fresh_hugetlb_folio() is only called by
alloc_migrate_hugetlb_folio(), cleanup it by converting to
alloc_fresh_hugetlb_hvo_folio(), also simplify the
alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
too which help us to remove prep_new_hugetlb_folio() and
__prep_new_hugetlb_folio().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/hugetlb.c | 48 +++++++++++++++---------------------------------
 1 file changed, 15 insertions(+), 33 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 753f99b4c718..5b4c19e7a5f7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1906,20 +1906,6 @@ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 	set_hugetlb_cgroup_rsvd(folio, NULL);
 }
 
-static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
-{
-	init_new_hugetlb_folio(h, folio);
-	hugetlb_vmemmap_optimize_folio(h, folio);
-}
-
-static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
-{
-	__prep_new_hugetlb_folio(h, folio);
-	spin_lock_irq(&hugetlb_lock);
-	__prep_account_new_huge_page(h, nid);
-	spin_unlock_irq(&hugetlb_lock);
-}
-
 /*
  * Find and lock address space (mapping) in write mode.
  *
@@ -2005,25 +1991,20 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
 }
 
 /*
- * Common helper to allocate a fresh hugetlb page. All specific allocators
- * should use this function to get new hugetlb pages
+ * Common helper to allocate a fresh hugetlb folio. All specific allocators
+ * should use this function to get new hugetlb folio
  *
- * Note that returned page is 'frozen':  ref count of head page and all tail
+ * Note that returned folio is 'frozen':  ref count of head page and all tail
  * pages is zero.
  */
-static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
+static struct folio *alloc_fresh_hugetlb_hvo_folio(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask)
 {
 	struct folio *folio;
 
-	if (hstate_is_gigantic(h))
-		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
-	else
-		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
-	if (!folio)
-		return NULL;
-
-	prep_new_hugetlb_folio(h, folio, folio_nid(folio));
+	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	if (folio)
+		hugetlb_vmemmap_optimize_folio(h, folio);
 	return folio;
 }
 
@@ -2241,12 +2222,10 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
 		goto out_unlock;
 	spin_unlock_irq(&hugetlb_lock);
 
-	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
 	if (!folio)
 		return NULL;
 
-	hugetlb_vmemmap_optimize_folio(h, folio);
-
 	spin_lock_irq(&hugetlb_lock);
 	/*
 	 * nr_huge_pages needs to be adjusted within the same lock cycle
@@ -2286,10 +2265,14 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
 	if (hstate_is_gigantic(h))
 		return NULL;
 
-	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
+	folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
 	if (!folio)
 		return NULL;
 
+	spin_lock_irq(&hugetlb_lock);
+	__prep_account_new_huge_page(h, folio_nid(folio));
+	spin_unlock_irq(&hugetlb_lock);
+
 	/* fresh huge pages are frozen */
 	folio_ref_unfreeze(folio, 1);
 	/*
@@ -2836,11 +2819,10 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
 		if (!new_folio) {
 			spin_unlock_irq(&hugetlb_lock);
 			gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-			new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
-							      NULL, NULL);
+			new_folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask,
+								  nid, NULL);
 			if (!new_folio)
 				return -ENOMEM;
-			__prep_new_hugetlb_folio(h, new_folio);
 			goto retry;
 		}
 
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio()
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
  2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-04 15:54   ` Sidhartha Kumar
  2025-08-04 20:36   ` Vishal Moola (Oracle)
  2025-08-02  7:31 ` [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio() Kefeng Wang
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

In order to avoid the wrong nid passed into the account, it's better
to move folio_nid() into prep_account_new_hugetlb_folio().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/hugetlb.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5b4c19e7a5f7..afec5a6a8aca 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1890,11 +1890,11 @@ void free_huge_folio(struct folio *folio)
 /*
  * Must be called with the hugetlb lock held
  */
-static void __prep_account_new_huge_page(struct hstate *h, int nid)
+static void prep_account_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
 	lockdep_assert_held(&hugetlb_lock);
 	h->nr_huge_pages++;
-	h->nr_huge_pages_node[nid]++;
+	h->nr_huge_pages_node[folio_nid(folio)]++;
 }
 
 static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
@@ -2020,7 +2020,7 @@ static void prep_and_add_allocated_folios(struct hstate *h,
 	/* Add all new pool pages to free lists in one lock cycle */
 	spin_lock_irqsave(&hugetlb_lock, flags);
 	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
-		__prep_account_new_huge_page(h, folio_nid(folio));
+		prep_account_new_hugetlb_folio(h, folio);
 		enqueue_hugetlb_folio(h, folio);
 	}
 	spin_unlock_irqrestore(&hugetlb_lock, flags);
@@ -2232,7 +2232,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
 	 * as surplus_pages, otherwise it might confuse
 	 * persistent_huge_pages() momentarily.
 	 */
-	__prep_account_new_huge_page(h, folio_nid(folio));
+	prep_account_new_hugetlb_folio(h, folio);
 
 	/*
 	 * We could have raced with the pool size change.
@@ -2270,7 +2270,7 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
 		return NULL;
 
 	spin_lock_irq(&hugetlb_lock);
-	__prep_account_new_huge_page(h, folio_nid(folio));
+	prep_account_new_hugetlb_folio(h, folio);
 	spin_unlock_irq(&hugetlb_lock);
 
 	/* fresh huge pages are frozen */
@@ -2829,7 +2829,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
 		/*
 		 * Ok, old_folio is still a genuine free hugepage. Remove it from
 		 * the freelist and decrease the counters. These will be
-		 * incremented again when calling __prep_account_new_huge_page()
+		 * incremented again when calling prep_account_new_hugetlb_folio()
 		 * and enqueue_hugetlb_folio() for new_folio. The counters will
 		 * remain stable since this happens under the lock.
 		 */
@@ -2839,7 +2839,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
 		 * Ref count on new_folio is already zero as it was dropped
 		 * earlier.  It can be directly added to the pool free list.
 		 */
-		__prep_account_new_huge_page(h, nid);
+		prep_account_new_hugetlb_folio(h, new_folio);
 		enqueue_hugetlb_folio(h, new_folio);
 
 		/*
@@ -3309,7 +3309,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
 		hugetlb_bootmem_init_migratetype(folio, h);
 		/* Subdivide locks to achieve better parallel performance */
 		spin_lock_irqsave(&hugetlb_lock, flags);
-		__prep_account_new_huge_page(h, folio_nid(folio));
+		prep_account_new_hugetlb_folio(h, folio);
 		enqueue_hugetlb_folio(h, folio);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
 	}
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio()
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
  2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
  2025-08-02  7:31 ` [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio() Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-04 15:57   ` Sidhartha Kumar
  2025-08-04 20:56   ` Vishal Moola (Oracle)
  2025-08-02  7:31 ` [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio Kefeng Wang
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

Check folio once instead of three times in alloc_buddy_hugetlb_folio().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/hugetlb.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index afec5a6a8aca..436403fb0bed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1949,29 +1949,26 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
 		nid = numa_mem_id();
 
 	folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
-
-	/*
-	 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
-	 * folio this indicates an overall state change.  Clear bit so
-	 * that we resume normal 'try hard' allocations.
-	 */
-	if (node_alloc_noretry && folio && !alloc_try_hard)
-		node_clear(nid, *node_alloc_noretry);
-
-	/*
-	 * If we tried hard to get a folio but failed, set bit so that
-	 * subsequent attempts will not try as hard until there is an
-	 * overall state change.
-	 */
-	if (node_alloc_noretry && !folio && alloc_try_hard)
-		node_set(nid, *node_alloc_noretry);
-
-	if (!folio) {
+	if (folio) {
+		/*
+		 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
+		 * folio this indicates an overall state change.  Clear bit so
+		 * that we resume normal 'try hard' allocations.
+		 */
+		if (node_alloc_noretry && !alloc_try_hard)
+			node_clear(nid, *node_alloc_noretry);
+		__count_vm_event(HTLB_BUDDY_PGALLOC);
+	} else {
+		/*
+		 * If we tried hard to get a folio but failed, set bit so that
+		 * subsequent attempts will not try as hard until there is an
+		 * overall state change.
+		 */
+		if (node_alloc_noretry && alloc_try_hard)
+			node_set(nid, *node_alloc_noretry);
 		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-		return NULL;
 	}
 
-	__count_vm_event(HTLB_BUDDY_PGALLOC);
 	return folio;
 }
 
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
                   ` (2 preceding siblings ...)
  2025-08-02  7:31 ` [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio() Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-04 16:22   ` Sidhartha Kumar
  2025-08-06 20:05   ` jane.chu
  2025-08-02  7:31 ` [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio() Kefeng Wang
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

Use order instead of struct hstate to remove huge_page_order() call
from all hugetlb folio allocation.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/hugetlb.c     | 27 +++++++++++++--------------
 mm/hugetlb_cma.c |  3 +--
 mm/hugetlb_cma.h |  6 +++---
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 436403fb0bed..e174a9269f52 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1473,17 +1473,16 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 #ifdef CONFIG_CONTIG_ALLOC
-static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
 	struct folio *folio;
-	int order = huge_page_order(h);
 	bool retried = false;
 
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 retry:
-	folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask);
+	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
 	if (!folio) {
 		if (hugetlb_cma_exclusive_alloc())
 			return NULL;
@@ -1506,16 +1505,16 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 }
 
 #else /* !CONFIG_CONTIG_ALLOC */
-static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
-					int nid, nodemask_t *nodemask)
+static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
+					  nodemask_t *nodemask)
 {
 	return NULL;
 }
 #endif /* CONFIG_CONTIG_ALLOC */
 
 #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
-					int nid, nodemask_t *nodemask)
+static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
+					  nodemask_t *nodemask)
 {
 	return NULL;
 }
@@ -1926,11 +1925,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
 	return NULL;
 }
 
-static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
-		gfp_t gfp_mask, int nid, nodemask_t *nmask,
-		nodemask_t *node_alloc_noretry)
+static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
+		int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry)
 {
-	int order = huge_page_order(h);
 	struct folio *folio;
 	bool alloc_try_hard = true;
 
@@ -1977,11 +1974,13 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
 		nodemask_t *node_alloc_noretry)
 {
 	struct folio *folio;
+	int order = huge_page_order(h);
 
-	if (hstate_is_gigantic(h))
-		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
+	if (order > MAX_PAGE_ORDER)
+		folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
 	else
-		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry);
+		folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
+						  node_alloc_noretry);
 	if (folio)
 		init_new_hugetlb_folio(h, folio);
 	return folio;
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index f58ef4969e7a..e8e4dc7182d5 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -26,11 +26,10 @@ void hugetlb_cma_free_folio(struct folio *folio)
 }
 
 
-struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
+struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask)
 {
 	int node;
-	int order = huge_page_order(h);
 	struct folio *folio = NULL;
 
 	if (hugetlb_cma[nid])
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index f7d7fb9880a2..2c2ec8a7e134 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -4,7 +4,7 @@
 
 #ifdef CONFIG_CMA
 void hugetlb_cma_free_folio(struct folio *folio);
-struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
+struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask);
 struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
 						    bool node_exact);
@@ -18,8 +18,8 @@ static inline void hugetlb_cma_free_folio(struct folio *folio)
 {
 }
 
-static inline struct folio *hugetlb_cma_alloc_folio(struct hstate *h,
-	    gfp_t gfp_mask, int nid, nodemask_t *nodemask)
+static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask)
 {
 	return NULL;
 }
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio()
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
                   ` (3 preceding siblings ...)
  2025-08-02  7:31 ` [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-04 16:13   ` Sidhartha Kumar
  2025-08-06 20:08   ` jane.chu
  2025-08-02  7:31 ` [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio() Kefeng Wang
  2025-08-02  7:31 ` [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
  6 siblings, 2 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

The struct hstate is never used, remove it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/hugetlb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e174a9269f52..b16011c9645d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1896,7 +1896,7 @@ static void prep_account_new_hugetlb_folio(struct hstate *h, struct folio *folio
 	h->nr_huge_pages_node[folio_nid(folio)]++;
 }
 
-static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
+static void init_new_hugetlb_folio(struct folio *folio)
 {
 	__folio_set_hugetlb(folio);
 	INIT_LIST_HEAD(&folio->lru);
@@ -1982,7 +1982,7 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
 		folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
 						  node_alloc_noretry);
 	if (folio)
-		init_new_hugetlb_folio(h, folio);
+		init_new_hugetlb_folio(folio);
 	return folio;
 }
 
@@ -3401,7 +3401,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
 
 		hugetlb_folio_init_vmemmap(folio, h,
 					   HUGETLB_VMEMMAP_RESERVE_PAGES);
-		init_new_hugetlb_folio(h, folio);
+		init_new_hugetlb_folio(folio);
 
 		if (hugetlb_bootmem_page_prehvo(m))
 			/*
@@ -4013,7 +4013,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
 			prep_compound_page(page, dst->order);
 
 			new_folio->mapping = NULL;
-			init_new_hugetlb_folio(dst, new_folio);
+			init_new_hugetlb_folio(new_folio);
 			/* Copy the CMA flag so that it is freed correctly */
 			if (cma)
 				folio_set_hugetlb_cma(new_folio);
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio()
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
                   ` (4 preceding siblings ...)
  2025-08-02  7:31 ` [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio() Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-04 19:09   ` Sidhartha Kumar
  2025-08-06 20:43   ` jane.chu
  2025-08-02  7:31 ` [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
  6 siblings, 2 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

Move the NUMA_NO_NODE check out of buddy and gigantic folio allocation
to cleanup code a bit.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/hugetlb.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b16011c9645d..4f73b74a2cff 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1479,8 +1479,6 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
 	struct folio *folio;
 	bool retried = false;
 
-	if (nid == NUMA_NO_NODE)
-		nid = numa_mem_id();
 retry:
 	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
 	if (!folio) {
@@ -1942,8 +1940,6 @@ static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
 		alloc_try_hard = false;
 	if (alloc_try_hard)
 		gfp_mask |= __GFP_RETRY_MAYFAIL;
-	if (nid == NUMA_NO_NODE)
-		nid = numa_mem_id();
 
 	folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
 	if (folio) {
@@ -1976,6 +1972,9 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
 	struct folio *folio;
 	int order = huge_page_order(h);
 
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+
 	if (order > MAX_PAGE_ORDER)
 		folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
 	else
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
                   ` (5 preceding siblings ...)
  2025-08-02  7:31 ` [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio() Kefeng Wang
@ 2025-08-02  7:31 ` Kefeng Wang
  2025-08-07  1:22   ` jane.chu
  6 siblings, 1 reply; 33+ messages in thread
From: Kefeng Wang @ 2025-08-02  7:31 UTC (permalink / raw)
  To: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand
  Cc: linux-mm, Kefeng Wang

The alloc_gigantic_folio() will allocate a folio by alloc_contig_range()
with refcount increated and then freeze it, convert to allocate a frozen
folio directly to remove the atomic operation about folio refcount and
cleanup alloc_gigantic_folio() a bit.

Also move folio_alloc_frozen_gigantic(), cma_alloc/free_frozen_folio() and
cma_validate_zones() into mm/internal.h since only hugetlb use it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/cma.h | 20 --------------------
 include/linux/gfp.h | 23 -----------------------
 mm/cma.c            |  4 ++--
 mm/hugetlb.c        | 43 +++++++++++--------------------------------
 mm/hugetlb_cma.c    | 12 ++++++------
 mm/hugetlb_cma.h    | 10 +++++-----
 mm/internal.h       | 37 +++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c     |  8 +++++---
 8 files changed, 66 insertions(+), 91 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 62d9c1cf6326..f116e23629ee 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -57,24 +57,4 @@ extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long e
 
 extern void cma_reserve_pages_on_error(struct cma *cma);
 
-#ifdef CONFIG_CMA
-struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp);
-bool cma_free_folio(struct cma *cma, const struct folio *folio);
-bool cma_validate_zones(struct cma *cma);
-#else
-static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
-{
-	return NULL;
-}
-
-static inline bool cma_free_folio(struct cma *cma, const struct folio *folio)
-{
-	return false;
-}
-static inline bool cma_validate_zones(struct cma *cma)
-{
-	return false;
-}
-#endif
-
 #endif
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5ebf26fcdcfa..5ad78c296344 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -440,27 +440,4 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
 #endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 
-#ifdef CONFIG_CONTIG_ALLOC
-static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
-							int nid, nodemask_t *node)
-{
-	struct page *page;
-
-	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
-		return NULL;
-
-	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
-
-	return page ? page_folio(page) : NULL;
-}
-#else
-static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
-							int nid, nodemask_t *node)
-{
-	return NULL;
-}
-#endif
-/* This should be paired with folio_put() rather than free_contig_range(). */
-#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
-
 #endif /* __LINUX_GFP_H */
diff --git a/mm/cma.c b/mm/cma.c
index 2ffa4befb99a..9539fd5700b6 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -921,7 +921,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
 	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
 }
 
-struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
+struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp)
 {
 	struct page *page;
 
@@ -1008,7 +1008,7 @@ bool cma_release(struct cma *cma, const struct page *pages,
 	return true;
 }
 
-bool cma_free_folio(struct cma *cma, const struct folio *folio)
+bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio)
 {
 	if (WARN_ON(!folio_test_large(folio)))
 		return false;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4f73b74a2cff..eea9dc782007 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -125,16 +125,6 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, bool take_locks);
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
 
-static void hugetlb_free_folio(struct folio *folio)
-{
-	if (folio_test_hugetlb_cma(folio)) {
-		hugetlb_cma_free_folio(folio);
-		return;
-	}
-
-	folio_put(folio);
-}
-
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
 	if (spool->count)
@@ -1477,29 +1467,15 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
 	struct folio *folio;
-	bool retried = false;
-
-retry:
-	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
-	if (!folio) {
-		if (hugetlb_cma_exclusive_alloc())
-			return NULL;
-
-		folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask);
-		if (!folio)
-			return NULL;
-	}
 
-	if (folio_ref_freeze(folio, 1))
+	folio = hugetlb_cma_alloc_frozen_folio(order, gfp_mask, nid, nodemask);
+	if (folio)
 		return folio;
 
-	pr_warn("HugeTLB: unexpected refcount on PFN %lu\n", folio_pfn(folio));
-	hugetlb_free_folio(folio);
-	if (!retried) {
-		retried = true;
-		goto retry;
-	}
-	return NULL;
+	if (hugetlb_cma_exclusive_alloc())
+		return NULL;
+
+	return folio_alloc_frozen_gigantic(order, gfp_mask, nid, nodemask);
 }
 
 #else /* !CONFIG_CONTIG_ALLOC */
@@ -1641,9 +1617,12 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
 	if (unlikely(folio_test_hwpoison(folio)))
 		folio_clear_hugetlb_hwpoison(folio);
 
-	folio_ref_unfreeze(folio, 1);
+	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
 
-	hugetlb_free_folio(folio);
+	if (folio_test_hugetlb_cma(folio))
+		hugetlb_cma_free_frozen_folio(folio);
+	else
+		free_frozen_pages(&folio->page, folio_order(folio));
 }
 
 /*
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index e8e4dc7182d5..337776786ecf 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -18,29 +18,29 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
 static bool hugetlb_cma_only;
 static unsigned long hugetlb_cma_size __initdata;
 
-void hugetlb_cma_free_folio(struct folio *folio)
+void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 	int nid = folio_nid(folio);
 
-	WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio));
+	WARN_ON_ONCE(!cma_free_frozen_folio(hugetlb_cma[nid], folio));
 }
 
-
-struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
+struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask)
 {
 	int node;
 	struct folio *folio = NULL;
 
 	if (hugetlb_cma[nid])
-		folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask);
+		folio = cma_alloc_frozen_folio(hugetlb_cma[nid], order, gfp_mask);
 
 	if (!folio && !(gfp_mask & __GFP_THISNODE)) {
 		for_each_node_mask(node, *nodemask) {
 			if (node == nid || !hugetlb_cma[node])
 				continue;
 
-			folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask);
+			folio = cma_alloc_frozen_folio(hugetlb_cma[node],
+						       order, gfp_mask);
 			if (folio)
 				break;
 		}
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index 2c2ec8a7e134..71db3544816e 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -3,8 +3,8 @@
 #define _LINUX_HUGETLB_CMA_H
 
 #ifdef CONFIG_CMA
-void hugetlb_cma_free_folio(struct folio *folio);
-struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
+void hugetlb_cma_free_frozen_folio(struct folio *folio);
+struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask);
 struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
 						    bool node_exact);
@@ -14,12 +14,12 @@ unsigned long hugetlb_cma_total_size(void);
 void hugetlb_cma_validate_params(void);
 bool hugetlb_early_cma(struct hstate *h);
 #else
-static inline void hugetlb_cma_free_folio(struct folio *folio)
+static inline void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 }
 
-static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
-		int nid, nodemask_t *nodemask)
+static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order,
+		gfp_t gfp_mask, int nid, nodemask_t *nodemask)
 {
 	return NULL;
 }
diff --git a/mm/internal.h b/mm/internal.h
index 1da16d550a45..b12cd23e88c4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -933,6 +933,9 @@ struct cma;
 #ifdef CONFIG_CMA
 void *cma_reserve_early(struct cma *cma, unsigned long size);
 void init_cma_pageblock(struct page *page);
+struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp);
+bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio);
+bool cma_validate_zones(struct cma *cma);
 #else
 static inline void *cma_reserve_early(struct cma *cma, unsigned long size)
 {
@@ -941,8 +944,42 @@ static inline void *cma_reserve_early(struct cma *cma, unsigned long size)
 static inline void init_cma_pageblock(struct page *page)
 {
 }
+static inline struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp)
+{
+	return NULL;
+}
+static inline bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio)
+{
+	return false;
+}
+static inline bool cma_validate_zones(struct cma *cma)
+{
+	return false;
+}
 #endif
 
+#ifdef CONFIG_CONTIG_ALLOC
+static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order,
+		gfp_t gfp, int nid, nodemask_t *node)
+{
+	struct page *page;
+
+	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
+		return NULL;
+
+	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
+
+	return page ? page_folio(page) : NULL;
+}
+#else
+static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order,
+		gfp_t gfp, int nid, nodemask_t *node)
+{
+	return NULL;
+}
+#endif
+/* This should be paired with free_frozen_pages() rather than free_contig_range(). */
+#define folio_alloc_frozen_gigantic(...) alloc_hooks(folio_alloc_frozen_gigantic_noprof(__VA_ARGS__))
 
 int find_suitable_fallback(struct free_area *area, unsigned int order,
 			   int migratetype, bool claimable);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d1d037f97c5f..c542ababb8dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6822,6 +6822,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
  * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
  *		action and reclaim modifiers are supported. Reclaim modifiers
  *		control allocation behavior during compaction/migration/reclaim.
+ *		If gfp_mask contains __GFP_COMP, the refcount of compound page
+ *		will be not increased.
  *
  * The PFN range does not have to be pageblock aligned. The PFN range must
  * belong to a single zone.
@@ -6955,7 +6957,6 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 
 		check_new_pages(head, order);
 		prep_new_page(head, order, gfp_mask, 0);
-		set_page_refcounted(head);
 	} else {
 		ret = -EINVAL;
 		WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
@@ -7074,10 +7075,11 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 	struct folio *folio = pfn_folio(pfn);
 
 	if (folio_test_large(folio)) {
-		int expected = folio_nr_pages(folio);
+		int order = folio_order(folio);
+		int expected = 1 << order;
 
 		if (nr_pages == expected)
-			folio_put(folio);
+			free_frozen_pages(&folio->page, order);
 		else
 			WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
 			     pfn, nr_pages, expected);
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
@ 2025-08-04 15:41   ` Sidhartha Kumar
  2025-08-04 20:27   ` Vishal Moola (Oracle)
  2025-08-05 22:18   ` jane.chu
  2 siblings, 0 replies; 33+ messages in thread
From: Sidhartha Kumar @ 2025-08-04 15:41 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm

On 8/2/25 3:31 AM, Kefeng Wang wrote:
> Now alloc_fresh_hugetlb_folio() is only called by
> alloc_migrate_hugetlb_folio(), cleanup it by converting to
> alloc_fresh_hugetlb_hvo_folio(), also simplify the
> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
> too which help us to remove prep_new_hugetlb_folio() and
> __prep_new_hugetlb_folio().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

> ---
>   mm/hugetlb.c | 48 +++++++++++++++---------------------------------
>   1 file changed, 15 insertions(+), 33 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 753f99b4c718..5b4c19e7a5f7 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1906,20 +1906,6 @@ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
>   	set_hugetlb_cgroup_rsvd(folio, NULL);
>   }
>   
> -static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
> -{
> -	init_new_hugetlb_folio(h, folio);
> -	hugetlb_vmemmap_optimize_folio(h, folio);
> -}
> -
> -static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
> -{
> -	__prep_new_hugetlb_folio(h, folio);
> -	spin_lock_irq(&hugetlb_lock);
> -	__prep_account_new_huge_page(h, nid);
> -	spin_unlock_irq(&hugetlb_lock);
> -}
> -
>   /*
>    * Find and lock address space (mapping) in write mode.
>    *
> @@ -2005,25 +1991,20 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   }
>   
>   /*
> - * Common helper to allocate a fresh hugetlb page. All specific allocators
> - * should use this function to get new hugetlb pages
> + * Common helper to allocate a fresh hugetlb folio. All specific allocators
> + * should use this function to get new hugetlb folio
>    *
> - * Note that returned page is 'frozen':  ref count of head page and all tail
> + * Note that returned folio is 'frozen':  ref count of head page and all tail
>    * pages is zero.
>    */
> -static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
> +static struct folio *alloc_fresh_hugetlb_hvo_folio(struct hstate *h,
>   		gfp_t gfp_mask, int nid, nodemask_t *nmask)
>   {
>   	struct folio *folio;
>   
> -	if (hstate_is_gigantic(h))
> -		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
> -	else
> -		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> -	if (!folio)
> -		return NULL;
> -
> -	prep_new_hugetlb_folio(h, folio, folio_nid(folio));
> +	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> +	if (folio)
> +		hugetlb_vmemmap_optimize_folio(h, folio);
>   	return folio;
>   }
>   
> @@ -2241,12 +2222,10 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
>   		goto out_unlock;
>   	spin_unlock_irq(&hugetlb_lock);
>   
> -	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> +	folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
>   	if (!folio)
>   		return NULL;
>   
> -	hugetlb_vmemmap_optimize_folio(h, folio);
> -
>   	spin_lock_irq(&hugetlb_lock);
>   	/*
>   	 * nr_huge_pages needs to be adjusted within the same lock cycle
> @@ -2286,10 +2265,14 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
>   	if (hstate_is_gigantic(h))
>   		return NULL;
>   
> -	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
> +	folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
>   	if (!folio)
>   		return NULL;
>   
> +	spin_lock_irq(&hugetlb_lock);
> +	__prep_account_new_huge_page(h, folio_nid(folio));
> +	spin_unlock_irq(&hugetlb_lock);
> +
>   	/* fresh huge pages are frozen */
>   	folio_ref_unfreeze(folio, 1);
>   	/*
> @@ -2836,11 +2819,10 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
>   		if (!new_folio) {
>   			spin_unlock_irq(&hugetlb_lock);
>   			gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
> -			new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
> -							      NULL, NULL);
> +			new_folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask,
> +								  nid, NULL);
>   			if (!new_folio)
>   				return -ENOMEM;
> -			__prep_new_hugetlb_folio(h, new_folio);
>   			goto retry;
>   		}
>   



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio() Kefeng Wang
@ 2025-08-04 15:54   ` Sidhartha Kumar
  2025-08-04 20:36   ` Vishal Moola (Oracle)
  1 sibling, 0 replies; 33+ messages in thread
From: Sidhartha Kumar @ 2025-08-04 15:54 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm

On 8/2/25 3:31 AM, Kefeng Wang wrote:
> In order to avoid the wrong nid passed into the account, it's better
> to move folio_nid() into prep_account_new_hugetlb_folio().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

> ---
>   mm/hugetlb.c | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 5b4c19e7a5f7..afec5a6a8aca 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1890,11 +1890,11 @@ void free_huge_folio(struct folio *folio)
>   /*
>    * Must be called with the hugetlb lock held
>    */
> -static void __prep_account_new_huge_page(struct hstate *h, int nid)
> +static void prep_account_new_hugetlb_folio(struct hstate *h, struct folio *folio)
>   {
>   	lockdep_assert_held(&hugetlb_lock);
>   	h->nr_huge_pages++;
> -	h->nr_huge_pages_node[nid]++;
> +	h->nr_huge_pages_node[folio_nid(folio)]++;
>   }
>   
>   static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
> @@ -2020,7 +2020,7 @@ static void prep_and_add_allocated_folios(struct hstate *h,
>   	/* Add all new pool pages to free lists in one lock cycle */
>   	spin_lock_irqsave(&hugetlb_lock, flags);
>   	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
> -		__prep_account_new_huge_page(h, folio_nid(folio));
> +		prep_account_new_hugetlb_folio(h, folio);
>   		enqueue_hugetlb_folio(h, folio);
>   	}
>   	spin_unlock_irqrestore(&hugetlb_lock, flags);
> @@ -2232,7 +2232,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
>   	 * as surplus_pages, otherwise it might confuse
>   	 * persistent_huge_pages() momentarily.
>   	 */
> -	__prep_account_new_huge_page(h, folio_nid(folio));
> +	prep_account_new_hugetlb_folio(h, folio);
>   
>   	/*
>   	 * We could have raced with the pool size change.
> @@ -2270,7 +2270,7 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
>   		return NULL;
>   
>   	spin_lock_irq(&hugetlb_lock);
> -	__prep_account_new_huge_page(h, folio_nid(folio));
> +	prep_account_new_hugetlb_folio(h, folio);
>   	spin_unlock_irq(&hugetlb_lock);
>   
>   	/* fresh huge pages are frozen */
> @@ -2829,7 +2829,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
>   		/*
>   		 * Ok, old_folio is still a genuine free hugepage. Remove it from
>   		 * the freelist and decrease the counters. These will be
> -		 * incremented again when calling __prep_account_new_huge_page()
> +		 * incremented again when calling prep_account_new_hugetlb_folio()
>   		 * and enqueue_hugetlb_folio() for new_folio. The counters will
>   		 * remain stable since this happens under the lock.
>   		 */
> @@ -2839,7 +2839,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
>   		 * Ref count on new_folio is already zero as it was dropped
>   		 * earlier.  It can be directly added to the pool free list.
>   		 */
> -		__prep_account_new_huge_page(h, nid);
> +		prep_account_new_hugetlb_folio(h, new_folio);
>   		enqueue_hugetlb_folio(h, new_folio);
>   
>   		/*
> @@ -3309,7 +3309,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
>   		hugetlb_bootmem_init_migratetype(folio, h);
>   		/* Subdivide locks to achieve better parallel performance */
>   		spin_lock_irqsave(&hugetlb_lock, flags);
> -		__prep_account_new_huge_page(h, folio_nid(folio));
> +		prep_account_new_hugetlb_folio(h, folio);
>   		enqueue_hugetlb_folio(h, folio);
>   		spin_unlock_irqrestore(&hugetlb_lock, flags);
>   	}



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio() Kefeng Wang
@ 2025-08-04 15:57   ` Sidhartha Kumar
  2025-08-04 20:56   ` Vishal Moola (Oracle)
  1 sibling, 0 replies; 33+ messages in thread
From: Sidhartha Kumar @ 2025-08-04 15:57 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm

On 8/2/25 3:31 AM, Kefeng Wang wrote:
> Check folio once instead of three times in alloc_buddy_hugetlb_folio().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

> ---
>   mm/hugetlb.c | 37 +++++++++++++++++--------------------
>   1 file changed, 17 insertions(+), 20 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index afec5a6a8aca..436403fb0bed 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1949,29 +1949,26 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
>   		nid = numa_mem_id();
>   
>   	folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
> -
> -	/*
> -	 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
> -	 * folio this indicates an overall state change.  Clear bit so
> -	 * that we resume normal 'try hard' allocations.
> -	 */
> -	if (node_alloc_noretry && folio && !alloc_try_hard)
> -		node_clear(nid, *node_alloc_noretry);
> -
> -	/*
> -	 * If we tried hard to get a folio but failed, set bit so that
> -	 * subsequent attempts will not try as hard until there is an
> -	 * overall state change.
> -	 */
> -	if (node_alloc_noretry && !folio && alloc_try_hard)
> -		node_set(nid, *node_alloc_noretry);
> -
> -	if (!folio) {
> +	if (folio) {
> +		/*
> +		 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
> +		 * folio this indicates an overall state change.  Clear bit so
> +		 * that we resume normal 'try hard' allocations.
> +		 */
> +		if (node_alloc_noretry && !alloc_try_hard)
> +			node_clear(nid, *node_alloc_noretry);
> +		__count_vm_event(HTLB_BUDDY_PGALLOC);
> +	} else {
> +		/*
> +		 * If we tried hard to get a folio but failed, set bit so that
> +		 * subsequent attempts will not try as hard until there is an
> +		 * overall state change.
> +		 */
> +		if (node_alloc_noretry && alloc_try_hard)
> +			node_set(nid, *node_alloc_noretry);
>   		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> -		return NULL;
>   	}
>   
> -	__count_vm_event(HTLB_BUDDY_PGALLOC);
>   	return folio;
>   }
>   



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio() Kefeng Wang
@ 2025-08-04 16:13   ` Sidhartha Kumar
  2025-08-06 20:08   ` jane.chu
  1 sibling, 0 replies; 33+ messages in thread
From: Sidhartha Kumar @ 2025-08-04 16:13 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm

On 8/2/25 3:31 AM, Kefeng Wang wrote:
> The struct hstate is never used, remove it.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

> ---
>   mm/hugetlb.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index e174a9269f52..b16011c9645d 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1896,7 +1896,7 @@ static void prep_account_new_hugetlb_folio(struct hstate *h, struct folio *folio
>   	h->nr_huge_pages_node[folio_nid(folio)]++;
>   }
>   
> -static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
> +static void init_new_hugetlb_folio(struct folio *folio)
>   {
>   	__folio_set_hugetlb(folio);
>   	INIT_LIST_HEAD(&folio->lru);
> @@ -1982,7 +1982,7 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   		folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
>   						  node_alloc_noretry);
>   	if (folio)
> -		init_new_hugetlb_folio(h, folio);
> +		init_new_hugetlb_folio(folio);
>   	return folio;
>   }
>   
> @@ -3401,7 +3401,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
>   
>   		hugetlb_folio_init_vmemmap(folio, h,
>   					   HUGETLB_VMEMMAP_RESERVE_PAGES);
> -		init_new_hugetlb_folio(h, folio);
> +		init_new_hugetlb_folio(folio);
>   
>   		if (hugetlb_bootmem_page_prehvo(m))
>   			/*
> @@ -4013,7 +4013,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
>   			prep_compound_page(page, dst->order);
>   
>   			new_folio->mapping = NULL;
> -			init_new_hugetlb_folio(dst, new_folio);
> +			init_new_hugetlb_folio(new_folio);
>   			/* Copy the CMA flag so that it is freed correctly */
>   			if (cma)
>   				folio_set_hugetlb_cma(new_folio);



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio
  2025-08-02  7:31 ` [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio Kefeng Wang
@ 2025-08-04 16:22   ` Sidhartha Kumar
  2025-08-06 20:05   ` jane.chu
  1 sibling, 0 replies; 33+ messages in thread
From: Sidhartha Kumar @ 2025-08-04 16:22 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm

On 8/2/25 3:31 AM, Kefeng Wang wrote:
> Use order instead of struct hstate to remove huge_page_order() call
> from all hugetlb folio allocation.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

> ---
>   mm/hugetlb.c     | 27 +++++++++++++--------------
>   mm/hugetlb_cma.c |  3 +--
>   mm/hugetlb_cma.h |  6 +++---
>   3 files changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 436403fb0bed..e174a9269f52 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1473,17 +1473,16 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
>   
>   #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
>   #ifdef CONFIG_CONTIG_ALLOC
> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
>   		int nid, nodemask_t *nodemask)
>   {
>   	struct folio *folio;
> -	int order = huge_page_order(h);
>   	bool retried = false;
>   
>   	if (nid == NUMA_NO_NODE)
>   		nid = numa_mem_id();
>   retry:
> -	folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask);
> +	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
>   	if (!folio) {
>   		if (hugetlb_cma_exclusive_alloc())
>   			return NULL;
> @@ -1506,16 +1505,16 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
>   }
>   
>   #else /* !CONFIG_CONTIG_ALLOC */
> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
> -					int nid, nodemask_t *nodemask)
> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
> +					  nodemask_t *nodemask)
>   {
>   	return NULL;
>   }
>   #endif /* CONFIG_CONTIG_ALLOC */
>   
>   #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
> -					int nid, nodemask_t *nodemask)
> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
> +					  nodemask_t *nodemask)
>   {
>   	return NULL;
>   }
> @@ -1926,11 +1925,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
>   	return NULL;
>   }
>   
> -static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
> -		gfp_t gfp_mask, int nid, nodemask_t *nmask,
> -		nodemask_t *node_alloc_noretry)
> +static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
> +		int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry)
>   {
> -	int order = huge_page_order(h);
>   	struct folio *folio;
>   	bool alloc_try_hard = true;
>   
> @@ -1977,11 +1974,13 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   		nodemask_t *node_alloc_noretry)
>   {
>   	struct folio *folio;
> +	int order = huge_page_order(h);
>   
> -	if (hstate_is_gigantic(h))
> -		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
> +	if (order > MAX_PAGE_ORDER)
> +		folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
>   	else
> -		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry);
> +		folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
> +						  node_alloc_noretry);
>   	if (folio)
>   		init_new_hugetlb_folio(h, folio);
>   	return folio;
> diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
> index f58ef4969e7a..e8e4dc7182d5 100644
> --- a/mm/hugetlb_cma.c
> +++ b/mm/hugetlb_cma.c
> @@ -26,11 +26,10 @@ void hugetlb_cma_free_folio(struct folio *folio)
>   }
>   
>   
> -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
> +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
>   				      int nid, nodemask_t *nodemask)
>   {
>   	int node;
> -	int order = huge_page_order(h);
>   	struct folio *folio = NULL;
>   
>   	if (hugetlb_cma[nid])
> diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
> index f7d7fb9880a2..2c2ec8a7e134 100644
> --- a/mm/hugetlb_cma.h
> +++ b/mm/hugetlb_cma.h
> @@ -4,7 +4,7 @@
>   
>   #ifdef CONFIG_CMA
>   void hugetlb_cma_free_folio(struct folio *folio);
> -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
> +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
>   				      int nid, nodemask_t *nodemask);
>   struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
>   						    bool node_exact);
> @@ -18,8 +18,8 @@ static inline void hugetlb_cma_free_folio(struct folio *folio)
>   {
>   }
>   
> -static inline struct folio *hugetlb_cma_alloc_folio(struct hstate *h,
> -	    gfp_t gfp_mask, int nid, nodemask_t *nodemask)
> +static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
> +		int nid, nodemask_t *nodemask)
>   {
>   	return NULL;
>   }



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio() Kefeng Wang
@ 2025-08-04 19:09   ` Sidhartha Kumar
  2025-08-06 20:43   ` jane.chu
  1 sibling, 0 replies; 33+ messages in thread
From: Sidhartha Kumar @ 2025-08-04 19:09 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm

On 8/2/25 3:31 AM, Kefeng Wang wrote:
> Move the NUMA_NO_NODE check out of buddy and gigantic folio allocation
> to cleanup code a bit.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>

> ---
>   mm/hugetlb.c | 7 +++----
>   1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index b16011c9645d..4f73b74a2cff 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1479,8 +1479,6 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
>   	struct folio *folio;
>   	bool retried = false;
>   
> -	if (nid == NUMA_NO_NODE)
> -		nid = numa_mem_id();
>   retry:
>   	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
>   	if (!folio) {
> @@ -1942,8 +1940,6 @@ static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
>   		alloc_try_hard = false;
>   	if (alloc_try_hard)
>   		gfp_mask |= __GFP_RETRY_MAYFAIL;
> -	if (nid == NUMA_NO_NODE)
> -		nid = numa_mem_id();
>   
>   	folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
>   	if (folio) {
> @@ -1976,6 +1972,9 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   	struct folio *folio;
>   	int order = huge_page_order(h);
>   
> +	if (nid == NUMA_NO_NODE)
> +		nid = numa_mem_id();
> +
>   	if (order > MAX_PAGE_ORDER)
>   		folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
>   	else



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
  2025-08-04 15:41   ` Sidhartha Kumar
@ 2025-08-04 20:27   ` Vishal Moola (Oracle)
  2025-08-05 14:21     ` Kefeng Wang
  2025-08-05 22:18   ` jane.chu
  2 siblings, 1 reply; 33+ messages in thread
From: Vishal Moola (Oracle) @ 2025-08-04 20:27 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm

On Sat, Aug 02, 2025 at 03:31:01PM +0800, Kefeng Wang wrote:
> Now alloc_fresh_hugetlb_folio() is only called by
> alloc_migrate_hugetlb_folio(), cleanup it by converting to
> alloc_fresh_hugetlb_hvo_folio(), also simplify the
> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
> too which help us to remove prep_new_hugetlb_folio() and
> __prep_new_hugetlb_folio().

Have you considered renaming alloc_fresh_hugetlb_hvo_folio() to
alloc_fresh_hvo_folio() instead? The word hugetlb is already a part of
the hvo acronym.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio() Kefeng Wang
  2025-08-04 15:54   ` Sidhartha Kumar
@ 2025-08-04 20:36   ` Vishal Moola (Oracle)
  2025-08-05 14:21     ` Kefeng Wang
  2025-08-06  0:53     ` jane.chu
  1 sibling, 2 replies; 33+ messages in thread
From: Vishal Moola (Oracle) @ 2025-08-04 20:36 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm

On Sat, Aug 02, 2025 at 03:31:02PM +0800, Kefeng Wang wrote:
> In order to avoid the wrong nid passed into the account, it's better
> to move folio_nid() into prep_account_new_hugetlb_folio().

I like the patch, but I think we can rename prep_account_new_huge_page()
something better. I don't think we need the "prep" part anymore, could
we go with something like account_new_hugetlb_folio()? 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio() Kefeng Wang
  2025-08-04 15:57   ` Sidhartha Kumar
@ 2025-08-04 20:56   ` Vishal Moola (Oracle)
  2025-08-05 14:22     ` Kefeng Wang
  1 sibling, 1 reply; 33+ messages in thread
From: Vishal Moola (Oracle) @ 2025-08-04 20:56 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm

On Sat, Aug 02, 2025 at 03:31:03PM +0800, Kefeng Wang wrote:
> Check folio once instead of three times in alloc_buddy_hugetlb_folio().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  mm/hugetlb.c | 37 +++++++++++++++++--------------------
>  1 file changed, 17 insertions(+), 20 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index afec5a6a8aca..436403fb0bed 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1949,29 +1949,26 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
>  		nid = numa_mem_id();
>  
>  	folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
> -
> -	/*
> -	 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
> -	 * folio this indicates an overall state change.  Clear bit so
> -	 * that we resume normal 'try hard' allocations.
> -	 */
> -	if (node_alloc_noretry && folio && !alloc_try_hard)
> -		node_clear(nid, *node_alloc_noretry);
> -
> -	/*
> -	 * If we tried hard to get a folio but failed, set bit so that
> -	 * subsequent attempts will not try as hard until there is an
> -	 * overall state change.
> -	 */
> -	if (node_alloc_noretry && !folio && alloc_try_hard)
> -		node_set(nid, *node_alloc_noretry);
> -
> -	if (!folio) {

I really don't like the way you've done this below. I feel like it makes
the code harder to understand since it feels like we've effectively
divided this function into 2 different ones (depending on if we
successfully got a folio).

I don't have a better idea, but I also don't think eliminating those 2
extra checks is worth increasing the complexity like this.

> +	if (folio) {
> +		/*
> +		 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
> +		 * folio this indicates an overall state change.  Clear bit so
> +		 * that we resume normal 'try hard' allocations.
> +		 */
> +		if (node_alloc_noretry && !alloc_try_hard)
> +			node_clear(nid, *node_alloc_noretry);
> +		__count_vm_event(HTLB_BUDDY_PGALLOC);
> +	} else {
> +		/*
> +		 * If we tried hard to get a folio but failed, set bit so that
> +		 * subsequent attempts will not try as hard until there is an
> +		 * overall state change.
> +		 */
> +		if (node_alloc_noretry && alloc_try_hard)
> +			node_set(nid, *node_alloc_noretry);
>  		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> -		return NULL;
>  	}
>  
> -	__count_vm_event(HTLB_BUDDY_PGALLOC);
>  	return folio;
>  }
>  
> -- 
> 2.27.0
> 
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-04 20:27   ` Vishal Moola (Oracle)
@ 2025-08-05 14:21     ` Kefeng Wang
  2025-08-05 17:56       ` Vishal Moola (Oracle)
  0 siblings, 1 reply; 33+ messages in thread
From: Kefeng Wang @ 2025-08-05 14:21 UTC (permalink / raw)
  To: Vishal Moola (Oracle)
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm



On 2025/8/5 4:27, Vishal Moola (Oracle) wrote:
> On Sat, Aug 02, 2025 at 03:31:01PM +0800, Kefeng Wang wrote:
>> Now alloc_fresh_hugetlb_folio() is only called by
>> alloc_migrate_hugetlb_folio(), cleanup it by converting to
>> alloc_fresh_hugetlb_hvo_folio(), also simplify the
>> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
>> too which help us to remove prep_new_hugetlb_folio() and
>> __prep_new_hugetlb_folio().
> 
> Have you considered renaming alloc_fresh_hugetlb_hvo_folio() to
> alloc_fresh_hvo_folio() instead? The word hugetlb is already a part of
> the hvo acronym.

Maybe THP will support HVO in the future, so I think we could keep 
hugetlb word?


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio()
  2025-08-04 20:36   ` Vishal Moola (Oracle)
@ 2025-08-05 14:21     ` Kefeng Wang
  2025-08-06  0:53     ` jane.chu
  1 sibling, 0 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-05 14:21 UTC (permalink / raw)
  To: Vishal Moola (Oracle)
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm



On 2025/8/5 4:36, Vishal Moola (Oracle) wrote:
> On Sat, Aug 02, 2025 at 03:31:02PM +0800, Kefeng Wang wrote:
>> In order to avoid the wrong nid passed into the account, it's better
>> to move folio_nid() into prep_account_new_hugetlb_folio().
> 
> I like the patch, but I think we can rename prep_account_new_huge_page()
> something better. I don't think we need the "prep" part anymore, could
> we go with something like account_new_hugetlb_folio()?

OK, will update in next version.



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio()
  2025-08-04 20:56   ` Vishal Moola (Oracle)
@ 2025-08-05 14:22     ` Kefeng Wang
  0 siblings, 0 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-05 14:22 UTC (permalink / raw)
  To: Vishal Moola (Oracle)
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm



On 2025/8/5 4:56, Vishal Moola (Oracle) wrote:
> On Sat, Aug 02, 2025 at 03:31:03PM +0800, Kefeng Wang wrote:
>> Check folio once instead of three times in alloc_buddy_hugetlb_folio().
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/hugetlb.c | 37 +++++++++++++++++--------------------
>>   1 file changed, 17 insertions(+), 20 deletions(-)

...


> 
> I really don't like the way you've done this below. I feel like it makes
> the code harder to understand since it feels like we've effectively
> divided this function into 2 different ones (depending on if we
> successfully got a folio).
> 
> I don't have a better idea, but I also don't think eliminating those 2
> extra checks is worth increasing the complexity like this.

Sure, we could drop this changes.



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-05 14:21     ` Kefeng Wang
@ 2025-08-05 17:56       ` Vishal Moola (Oracle)
  0 siblings, 0 replies; 33+ messages in thread
From: Vishal Moola (Oracle) @ 2025-08-05 17:56 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm

On Tue, Aug 05, 2025 at 10:21:17PM +0800, Kefeng Wang wrote:
> 
> 
> On 2025/8/5 4:27, Vishal Moola (Oracle) wrote:
> > On Sat, Aug 02, 2025 at 03:31:01PM +0800, Kefeng Wang wrote:
> > > Now alloc_fresh_hugetlb_folio() is only called by
> > > alloc_migrate_hugetlb_folio(), cleanup it by converting to
> > > alloc_fresh_hugetlb_hvo_folio(), also simplify the
> > > alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
> > > too which help us to remove prep_new_hugetlb_folio() and
> > > __prep_new_hugetlb_folio().
> > 
> > Have you considered renaming alloc_fresh_hugetlb_hvo_folio() to
> > alloc_fresh_hvo_folio() instead? The word hugetlb is already a part of
> > the hvo acronym.
> 
> Maybe THP will support HVO in the future, so I think we could keep hugetlb
> word?

Hmmm, its alright either way, I just feel like hugetlb + hvo is redundant.
There's just so many words in the name now :(


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
  2025-08-04 15:41   ` Sidhartha Kumar
  2025-08-04 20:27   ` Vishal Moola (Oracle)
@ 2025-08-05 22:18   ` jane.chu
  2025-08-06  0:33     ` jane.chu
  2 siblings, 1 reply; 33+ messages in thread
From: jane.chu @ 2025-08-05 22:18 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm


On 8/2/2025 12:31 AM, Kefeng Wang wrote:
> Now alloc_fresh_hugetlb_folio() is only called by
> alloc_migrate_hugetlb_folio(), cleanup it by converting to
> alloc_fresh_hugetlb_hvo_folio(), also simplify the
> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
> too which help us to remove prep_new_hugetlb_folio() and
> __prep_new_hugetlb_folio().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/hugetlb.c | 48 +++++++++++++++---------------------------------
>   1 file changed, 15 insertions(+), 33 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 753f99b4c718..5b4c19e7a5f7 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1906,20 +1906,6 @@ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
>   	set_hugetlb_cgroup_rsvd(folio, NULL);
>   }
>   
> -static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
> -{
> -	init_new_hugetlb_folio(h, folio);
> -	hugetlb_vmemmap_optimize_folio(h, folio);
> -}
> -
> -static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
> -{
> -	__prep_new_hugetlb_folio(h, folio);
> -	spin_lock_irq(&hugetlb_lock);
> -	__prep_account_new_huge_page(h, nid);
> -	spin_unlock_irq(&hugetlb_lock);
> -}
> -
>   /*
>    * Find and lock address space (mapping) in write mode.
>    *
> @@ -2005,25 +1991,20 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   }
>   
>   /*
> - * Common helper to allocate a fresh hugetlb page. All specific allocators
> - * should use this function to get new hugetlb pages
> + * Common helper to allocate a fresh hugetlb folio. All specific allocators
> + * should use this function to get new hugetlb folio
>    *
> - * Note that returned page is 'frozen':  ref count of head page and all tail
> + * Note that returned folio is 'frozen':  ref count of head page and all tail
>    * pages is zero.
>    */
> -static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
> +static struct folio *alloc_fresh_hugetlb_hvo_folio(struct hstate *h,
>   		gfp_t gfp_mask, int nid, nodemask_t *nmask)
>   {
>   	struct folio *folio;
>   
> -	if (hstate_is_gigantic(h))
> -		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
> -	else
> -		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> -	if (!folio)
> -		return NULL;
> -
> -	prep_new_hugetlb_folio(h, folio, folio_nid(folio));
> +	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> +	if (folio)
> +		hugetlb_vmemmap_optimize_folio(h, folio);
>   	return folio;
>   }
>   
> @@ -2241,12 +2222,10 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
>   		goto out_unlock;
>   	spin_unlock_irq(&hugetlb_lock);
>   
> -	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> +	folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
>   	if (!folio)
>   		return NULL;
>   
> -	hugetlb_vmemmap_optimize_folio(h, folio);
> -
>   	spin_lock_irq(&hugetlb_lock);
>   	/*
>   	 * nr_huge_pages needs to be adjusted within the same lock cycle
> @@ -2286,10 +2265,14 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
>   	if (hstate_is_gigantic(h))
>   		return NULL;
>   
> -	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
> +	folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
>   	if (!folio)
>   		return NULL;
>   
> +	spin_lock_irq(&hugetlb_lock);
> +	__prep_account_new_huge_page(h, folio_nid(folio));
> +	spin_unlock_irq(&hugetlb_lock);
> +
>   	/* fresh huge pages are frozen */
>   	folio_ref_unfreeze(folio, 1);
>   	/*
> @@ -2836,11 +2819,10 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
>   		if (!new_folio) {
>   			spin_unlock_irq(&hugetlb_lock);
>   			gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
> -			new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
> -							      NULL, NULL);
> +			new_folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask,
> +								  nid, NULL);
>   			if (!new_folio)
>   				return -ENOMEM;
> -			__prep_new_hugetlb_folio(h, new_folio);
>   			goto retry;
>   		}
>   

Thanks for trying to clean up the clunky names and eliminate a couple 
not really needed helpers.

The decision to do HVO is baked in __hugetlb_vmemmap_optimize_folio(),
ultimately all hugetlb pages go thru __hugetlb_vmemmap_optimize_folio(), 
so let's not to add on 'hvo' in function name to indicate additional 
decision.

The difference between only_alloc_fresh_hugetlb_folio() and
alloc_fresh_hugetlb_folio() is that the latter invokes 
__hugetlb_vmemmap_optimize_folio(), while the former is used for huge
folio allocation in batch, and then __hugetlb_vmemmap_optimize_folio() 
is applied to a list of folios back to back.

Therefore, to hopefully make the names sound a bit more natural,
how about
   s/only_alloc_fresh_hugetlb_folio/alloc_hugetlb_folio_pre/
   s/alloc_fresh_hugetlb_folio/alloc_hugetlb_folio/
dropping "fresh", how is newly allocated folio not fresh?

Thanks!
-jane




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-05 22:18   ` jane.chu
@ 2025-08-06  0:33     ` jane.chu
  2025-08-06  0:56       ` Kefeng Wang
  0 siblings, 1 reply; 33+ messages in thread
From: jane.chu @ 2025-08-06  0:33 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 8/5/2025 3:18 PM, jane.chu@oracle.com wrote:
> 
> On 8/2/2025 12:31 AM, Kefeng Wang wrote:
>> Now alloc_fresh_hugetlb_folio() is only called by
>> alloc_migrate_hugetlb_folio(), cleanup it by converting to
>> alloc_fresh_hugetlb_hvo_folio(), also simplify the
>> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
>> too which help us to remove prep_new_hugetlb_folio() and
>> __prep_new_hugetlb_folio().
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/hugetlb.c | 48 +++++++++++++++---------------------------------
>>   1 file changed, 15 insertions(+), 33 deletions(-)
>>
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 753f99b4c718..5b4c19e7a5f7 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -1906,20 +1906,6 @@ static void init_new_hugetlb_folio(struct 
>> hstate *h, struct folio *folio)
>>       set_hugetlb_cgroup_rsvd(folio, NULL);
>>   }
>> -static void __prep_new_hugetlb_folio(struct hstate *h, struct folio 
>> *folio)
>> -{
>> -    init_new_hugetlb_folio(h, folio);
>> -    hugetlb_vmemmap_optimize_folio(h, folio);
>> -}
>> -
>> -static void prep_new_hugetlb_folio(struct hstate *h, struct folio 
>> *folio, int nid)
>> -{
>> -    __prep_new_hugetlb_folio(h, folio);
>> -    spin_lock_irq(&hugetlb_lock);
>> -    __prep_account_new_huge_page(h, nid);
>> -    spin_unlock_irq(&hugetlb_lock);
>> -}
>> -
>>   /*
>>    * Find and lock address space (mapping) in write mode.
>>    *
>> @@ -2005,25 +1991,20 @@ static struct folio 
>> *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>>   }
>>   /*
>> - * Common helper to allocate a fresh hugetlb page. All specific 
>> allocators
>> - * should use this function to get new hugetlb pages
>> + * Common helper to allocate a fresh hugetlb folio. All specific 
>> allocators
>> + * should use this function to get new hugetlb folio
>>    *
>> - * Note that returned page is 'frozen':  ref count of head page and 
>> all tail
>> + * Note that returned folio is 'frozen':  ref count of head page and 
>> all tail
>>    * pages is zero.
>>    */
>> -static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
>> +static struct folio *alloc_fresh_hugetlb_hvo_folio(struct hstate *h,
>>           gfp_t gfp_mask, int nid, nodemask_t *nmask)
>>   {
>>       struct folio *folio;
>> -    if (hstate_is_gigantic(h))
>> -        folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
>> -    else
>> -        folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, 
>> NULL);
>> -    if (!folio)
>> -        return NULL;
>> -
>> -    prep_new_hugetlb_folio(h, folio, folio_nid(folio));
>> +    folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, 
>> NULL);
>> +    if (folio)
>> +        hugetlb_vmemmap_optimize_folio(h, folio);
>>       return folio;
>>   }
>> @@ -2241,12 +2222,10 @@ static struct folio 
>> *alloc_surplus_hugetlb_folio(struct hstate *h,
>>           goto out_unlock;
>>       spin_unlock_irq(&hugetlb_lock);
>> -    folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, 
>> NULL);
>> +    folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
>>       if (!folio)
>>           return NULL;
>> -    hugetlb_vmemmap_optimize_folio(h, folio);
>> -
>>       spin_lock_irq(&hugetlb_lock);
>>       /*
>>        * nr_huge_pages needs to be adjusted within the same lock cycle
>> @@ -2286,10 +2265,14 @@ static struct folio 
>> *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
>>       if (hstate_is_gigantic(h))
>>           return NULL;
>> -    folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
>> +    folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask, nid, nmask);
>>       if (!folio)
>>           return NULL;
>> +    spin_lock_irq(&hugetlb_lock);
>> +    __prep_account_new_huge_page(h, folio_nid(folio));
>> +    spin_unlock_irq(&hugetlb_lock);
>> +
>>       /* fresh huge pages are frozen */
>>       folio_ref_unfreeze(folio, 1);
>>       /*
>> @@ -2836,11 +2819,10 @@ static int 
>> alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
>>           if (!new_folio) {
>>               spin_unlock_irq(&hugetlb_lock);
>>               gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
>> -            new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
>> -                                  NULL, NULL);
>> +            new_folio = alloc_fresh_hugetlb_hvo_folio(h, gfp_mask,
>> +                                  nid, NULL);
>>               if (!new_folio)
>>                   return -ENOMEM;
>> -            __prep_new_hugetlb_folio(h, new_folio);
>>               goto retry;
>>           }
> 
> Thanks for trying to clean up the clunky names and eliminate a couple 
> not really needed helpers.
> 
> The decision to do HVO is baked in __hugetlb_vmemmap_optimize_folio(),
> ultimately all hugetlb pages go thru __hugetlb_vmemmap_optimize_folio(), 
> so let's not to add on 'hvo' in function name to indicate additional 
> decision.
> 
> The difference between only_alloc_fresh_hugetlb_folio() and
> alloc_fresh_hugetlb_folio() is that the latter invokes 
> __hugetlb_vmemmap_optimize_folio(), while the former is used for huge
> folio allocation in batch, and then __hugetlb_vmemmap_optimize_folio() 
> is applied to a list of folios back to back.
> 
> Therefore, to hopefully make the names sound a bit more natural,
> how about
>    s/only_alloc_fresh_hugetlb_folio/alloc_hugetlb_folio_pre/
>    s/alloc_fresh_hugetlb_folio/alloc_hugetlb_folio/
> dropping "fresh", how is newly allocated folio not fresh?

MY bad, didn't notice that alloc_hugetlb_folio() is already in use for 
allocating hugetlb folio from the reservation pool.

So, how about just
    s/only_alloc_fresh_hugetlb_folio/alloc_fresh_hugetlb_folio_pre/
while keep alloc_fresh_hugetlb_folio() as is?

thanks!
-jane

> 
> Thanks!
> -jane
> 
> 
> 





^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio()
  2025-08-04 20:36   ` Vishal Moola (Oracle)
  2025-08-05 14:21     ` Kefeng Wang
@ 2025-08-06  0:53     ` jane.chu
  1 sibling, 0 replies; 33+ messages in thread
From: jane.chu @ 2025-08-06  0:53 UTC (permalink / raw)
  To: Vishal Moola (Oracle), Kefeng Wang
  Cc: Andrew Morton, Muchun Song, Oscar Salvador, David Hildenbrand,
	linux-mm

On 8/4/2025 1:36 PM, Vishal Moola (Oracle) wrote:
> On Sat, Aug 02, 2025 at 03:31:02PM +0800, Kefeng Wang wrote:
>> In order to avoid the wrong nid passed into the account, it's better
>> to move folio_nid() into prep_account_new_hugetlb_folio().
> 
> I like the patch, but I think we can rename prep_account_new_huge_page()
> something better. I don't think we need the "prep" part anymore, could
> we go with something like account_new_hugetlb_folio()?
> 

or, hugetlb_account_new_folio() ?

Unfortunately, the names of the hugetlb helpers are all over the place. 
Would it be nice to name most of them with the prefix "hugetlb_", 
followed by whatever they do briefly?  For example,
   hugetlb_init_new_folio (replacing init_new_hugetlb_folio)
   hugetlb_prep_new_folio (replacing prep_new_hugetlb_folio)
   hugetlb_alloc_fresh_folio_pre (replacing only_alloc_fresh_hugetlb_folio)
   hugetlb_alloc_fresh_folio (replacing alloc_fresh_hugetlb_folio)
etc.

thanks,
-jane

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-06  0:33     ` jane.chu
@ 2025-08-06  0:56       ` Kefeng Wang
  2025-08-06 18:16         ` jane.chu
  0 siblings, 1 reply; 33+ messages in thread
From: Kefeng Wang @ 2025-08-06  0:56 UTC (permalink / raw)
  To: jane.chu, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 2025/8/6 8:33, jane.chu@oracle.com wrote:
> 
> 
> On 8/5/2025 3:18 PM, jane.chu@oracle.com wrote:
>>
>> On 8/2/2025 12:31 AM, Kefeng Wang wrote:
>>> Now alloc_fresh_hugetlb_folio() is only called by
>>> alloc_migrate_hugetlb_folio(), cleanup it by converting to
>>> alloc_fresh_hugetlb_hvo_folio(), also simplify the
>>> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
>>> too which help us to remove prep_new_hugetlb_folio() and
>>> __prep_new_hugetlb_folio().
>>>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---

...

>> Thanks for trying to clean up the clunky names and eliminate a couple 
>> not really needed helpers.
>>
>> The decision to do HVO is baked in __hugetlb_vmemmap_optimize_folio(),
>> ultimately all hugetlb pages go thru 
>> __hugetlb_vmemmap_optimize_folio(), so let's not to add on 'hvo' in 
>> function name to indicate additional decision.
>>
>> The difference between only_alloc_fresh_hugetlb_folio() and
>> alloc_fresh_hugetlb_folio() is that the latter invokes 
>> __hugetlb_vmemmap_optimize_folio(), while the former is used for huge
>> folio allocation in batch, and then __hugetlb_vmemmap_optimize_folio() 
>> is applied to a list of folios back to back.
>>
>> Therefore, to hopefully make the names sound a bit more natural,
>> how about
>>    s/only_alloc_fresh_hugetlb_folio/alloc_hugetlb_folio_pre/
>>    s/alloc_fresh_hugetlb_folio/alloc_hugetlb_folio/
>> dropping "fresh", how is newly allocated folio not fresh?
> 
> MY bad, didn't notice that alloc_hugetlb_folio() is already in use for 
> allocating hugetlb folio from the reservation pool.
> 
> So, how about just
>     s/only_alloc_fresh_hugetlb_folio/alloc_fresh_hugetlb_folio_pre/
> while keep alloc_fresh_hugetlb_folio() as is?
> 


Hi Oscar/Jane, thanks for your comments, what about

s/only_alloc_fresh_hugetlb_folio/__alloc_fresh_hugetlb_folio/
alloc_fresh_hugetlb_folio


Thanks.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio()
  2025-08-06  0:56       ` Kefeng Wang
@ 2025-08-06 18:16         ` jane.chu
  0 siblings, 0 replies; 33+ messages in thread
From: jane.chu @ 2025-08-06 18:16 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 8/5/2025 5:56 PM, Kefeng Wang wrote:
> 
> 
> On 2025/8/6 8:33, jane.chu@oracle.com wrote:
>>
>>
>> On 8/5/2025 3:18 PM, jane.chu@oracle.com wrote:
>>>
>>> On 8/2/2025 12:31 AM, Kefeng Wang wrote:
>>>> Now alloc_fresh_hugetlb_folio() is only called by
>>>> alloc_migrate_hugetlb_folio(), cleanup it by converting to
>>>> alloc_fresh_hugetlb_hvo_folio(), also simplify the
>>>> alloc_and_dissolve_hugetlb_folio() and alloc_surplus_hugetlb_folio()
>>>> too which help us to remove prep_new_hugetlb_folio() and
>>>> __prep_new_hugetlb_folio().
>>>>
>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>> ---
> 
> ...
> 
>>> Thanks for trying to clean up the clunky names and eliminate a couple 
>>> not really needed helpers.
>>>
>>> The decision to do HVO is baked in __hugetlb_vmemmap_optimize_folio(),
>>> ultimately all hugetlb pages go thru 
>>> __hugetlb_vmemmap_optimize_folio(), so let's not to add on 'hvo' in 
>>> function name to indicate additional decision.
>>>
>>> The difference between only_alloc_fresh_hugetlb_folio() and
>>> alloc_fresh_hugetlb_folio() is that the latter invokes 
>>> __hugetlb_vmemmap_optimize_folio(), while the former is used for huge
>>> folio allocation in batch, and then 
>>> __hugetlb_vmemmap_optimize_folio() is applied to a list of folios 
>>> back to back.
>>>
>>> Therefore, to hopefully make the names sound a bit more natural,
>>> how about
>>>    s/only_alloc_fresh_hugetlb_folio/alloc_hugetlb_folio_pre/
>>>    s/alloc_fresh_hugetlb_folio/alloc_hugetlb_folio/
>>> dropping "fresh", how is newly allocated folio not fresh?
>>
>> MY bad, didn't notice that alloc_hugetlb_folio() is already in use for 
>> allocating hugetlb folio from the reservation pool.
>>
>> So, how about just
>>     s/only_alloc_fresh_hugetlb_folio/alloc_fresh_hugetlb_folio_pre/
>> while keep alloc_fresh_hugetlb_folio() as is?
>>
> 
> 
> Hi Oscar/Jane, thanks for your comments, what about
> 
> s/only_alloc_fresh_hugetlb_folio/__alloc_fresh_hugetlb_folio/
> alloc_fresh_hugetlb_folio
> 
> 

IMHO, the __alloc_fresh_hugetlb_folio naming style typically means it's 
the function that completes the real deal, while in comparison, the 
alloc_fresh_hugetlb_folio name sounds more like a front end. So that's 
not what we're having here.

That said, it's just naming, the patch itself is great.
I'll defer to Oscar.

thanks,
-jane

> Thanks.




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio
  2025-08-02  7:31 ` [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio Kefeng Wang
  2025-08-04 16:22   ` Sidhartha Kumar
@ 2025-08-06 20:05   ` jane.chu
  2025-08-06 20:17     ` jane.chu
  1 sibling, 1 reply; 33+ messages in thread
From: jane.chu @ 2025-08-06 20:05 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 8/2/2025 12:31 AM, Kefeng Wang wrote:
> Use order instead of struct hstate to remove huge_page_order() call
> from all hugetlb folio allocation.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/hugetlb.c     | 27 +++++++++++++--------------
>   mm/hugetlb_cma.c |  3 +--
>   mm/hugetlb_cma.h |  6 +++---
>   3 files changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 436403fb0bed..e174a9269f52 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1473,17 +1473,16 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
>   
>   #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
>   #ifdef CONFIG_CONTIG_ALLOC
> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
>   		int nid, nodemask_t *nodemask)
>   {
>   	struct folio *folio;
> -	int order = huge_page_order(h);
>   	bool retried = false;
>   
>   	if (nid == NUMA_NO_NODE)
>   		nid = numa_mem_id();
>   retry:
> -	folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask);
> +	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
>   	if (!folio) {
>   		if (hugetlb_cma_exclusive_alloc())
>   			return NULL;
> @@ -1506,16 +1505,16 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
>   }
>   
>   #else /* !CONFIG_CONTIG_ALLOC */
> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
> -					int nid, nodemask_t *nodemask)
> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
> +					  nodemask_t *nodemask)
>   {
>   	return NULL;
>   }
>   #endif /* CONFIG_CONTIG_ALLOC */
>   
>   #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
> -					int nid, nodemask_t *nodemask)
> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
> +					  nodemask_t *nodemask)
>   {
>   	return NULL;
>   }
> @@ -1926,11 +1925,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
>   	return NULL;
>   }
>   
> -static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
> -		gfp_t gfp_mask, int nid, nodemask_t *nmask,
> -		nodemask_t *node_alloc_noretry)
> +static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
> +		int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry)
>   {
> -	int order = huge_page_order(h);
>   	struct folio *folio;
>   	bool alloc_try_hard = true;
>   
> @@ -1977,11 +1974,13 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   		nodemask_t *node_alloc_noretry)
>   {
>   	struct folio *folio;
> +	int order = huge_page_order(h);
>   
> -	if (hstate_is_gigantic(h))
> -		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
> +	if (order > MAX_PAGE_ORDER)
> +		folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);

But hstate_is_gigantic() has two definitions depends on whether 
CONFIG_HUGETLB_PAGE is selected.

>   	else
> -		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry);
> +		folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
> +						  node_alloc_noretry);
>   	if (folio)
>   		init_new_hugetlb_folio(h, folio);
>   	return folio;
> diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
> index f58ef4969e7a..e8e4dc7182d5 100644
> --- a/mm/hugetlb_cma.c
> +++ b/mm/hugetlb_cma.c
> @@ -26,11 +26,10 @@ void hugetlb_cma_free_folio(struct folio *folio)
>   }
>   
>   
> -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
> +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
>   				      int nid, nodemask_t *nodemask)
>   {
>   	int node;
> -	int order = huge_page_order(h);
>   	struct folio *folio = NULL;
>   
>   	if (hugetlb_cma[nid])
> diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
> index f7d7fb9880a2..2c2ec8a7e134 100644
> --- a/mm/hugetlb_cma.h
> +++ b/mm/hugetlb_cma.h
> @@ -4,7 +4,7 @@
>   
>   #ifdef CONFIG_CMA
>   void hugetlb_cma_free_folio(struct folio *folio);
> -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
> +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
>   				      int nid, nodemask_t *nodemask);
>   struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
>   						    bool node_exact);
> @@ -18,8 +18,8 @@ static inline void hugetlb_cma_free_folio(struct folio *folio)
>   {
>   }
>   
> -static inline struct folio *hugetlb_cma_alloc_folio(struct hstate *h,
> -	    gfp_t gfp_mask, int nid, nodemask_t *nodemask)
> +static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
> +		int nid, nodemask_t *nodemask)
>   {
>   	return NULL;
>   }
The rest look okay.
thanks,
-jane



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio() Kefeng Wang
  2025-08-04 16:13   ` Sidhartha Kumar
@ 2025-08-06 20:08   ` jane.chu
  1 sibling, 0 replies; 33+ messages in thread
From: jane.chu @ 2025-08-06 20:08 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 8/2/2025 12:31 AM, Kefeng Wang wrote:
> The struct hstate is never used, remove it.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/hugetlb.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index e174a9269f52..b16011c9645d 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1896,7 +1896,7 @@ static void prep_account_new_hugetlb_folio(struct hstate *h, struct folio *folio
>   	h->nr_huge_pages_node[folio_nid(folio)]++;
>   }
>   
> -static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
> +static void init_new_hugetlb_folio(struct folio *folio)
>   {
>   	__folio_set_hugetlb(folio);
>   	INIT_LIST_HEAD(&folio->lru);
> @@ -1982,7 +1982,7 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   		folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
>   						  node_alloc_noretry);
>   	if (folio)
> -		init_new_hugetlb_folio(h, folio);
> +		init_new_hugetlb_folio(folio);
>   	return folio;
>   }
>   
> @@ -3401,7 +3401,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
>   
>   		hugetlb_folio_init_vmemmap(folio, h,
>   					   HUGETLB_VMEMMAP_RESERVE_PAGES);
> -		init_new_hugetlb_folio(h, folio);
> +		init_new_hugetlb_folio(folio);
>   
>   		if (hugetlb_bootmem_page_prehvo(m))
>   			/*
> @@ -4013,7 +4013,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
>   			prep_compound_page(page, dst->order);
>   
>   			new_folio->mapping = NULL;
> -			init_new_hugetlb_folio(dst, new_folio);
> +			init_new_hugetlb_folio(new_folio);
>   			/* Copy the CMA flag so that it is freed correctly */
>   			if (cma)
>   				folio_set_hugetlb_cma(new_folio);

Looks okay.
Reviewed-by: Jane Chu <jane.chu@oracle.com>




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio
  2025-08-06 20:05   ` jane.chu
@ 2025-08-06 20:17     ` jane.chu
  0 siblings, 0 replies; 33+ messages in thread
From: jane.chu @ 2025-08-06 20:17 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 8/6/2025 1:05 PM, jane.chu@oracle.com wrote:
> 
> 
> On 8/2/2025 12:31 AM, Kefeng Wang wrote:
>> Use order instead of struct hstate to remove huge_page_order() call
>> from all hugetlb folio allocation.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/hugetlb.c     | 27 +++++++++++++--------------
>>   mm/hugetlb_cma.c |  3 +--
>>   mm/hugetlb_cma.h |  6 +++---
>>   3 files changed, 17 insertions(+), 19 deletions(-)
>>
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 436403fb0bed..e174a9269f52 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -1473,17 +1473,16 @@ static int hstate_next_node_to_free(struct 
>> hstate *h, nodemask_t *nodes_allowed)
>>   #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
>>   #ifdef CONFIG_CONTIG_ALLOC
>> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t 
>> gfp_mask,
>> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
>>           int nid, nodemask_t *nodemask)
>>   {
>>       struct folio *folio;
>> -    int order = huge_page_order(h);
>>       bool retried = false;
>>       if (nid == NUMA_NO_NODE)
>>           nid = numa_mem_id();
>>   retry:
>> -    folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask);
>> +    folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
>>       if (!folio) {
>>           if (hugetlb_cma_exclusive_alloc())
>>               return NULL;
>> @@ -1506,16 +1505,16 @@ static struct folio 
>> *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
>>   }
>>   #else /* !CONFIG_CONTIG_ALLOC */
>> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t 
>> gfp_mask,
>> -                    int nid, nodemask_t *nodemask)
>> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, 
>> int nid,
>> +                      nodemask_t *nodemask)
>>   {
>>       return NULL;
>>   }
>>   #endif /* CONFIG_CONTIG_ALLOC */
>>   #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
>> -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t 
>> gfp_mask,
>> -                    int nid, nodemask_t *nodemask)
>> +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, 
>> int nid,
>> +                      nodemask_t *nodemask)
>>   {
>>       return NULL;
>>   }
>> @@ -1926,11 +1925,9 @@ struct address_space 
>> *hugetlb_folio_mapping_lock_write(struct folio *folio)
>>       return NULL;
>>   }
>> -static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
>> -        gfp_t gfp_mask, int nid, nodemask_t *nmask,
>> -        nodemask_t *node_alloc_noretry)
>> +static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t 
>> gfp_mask,
>> +        int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry)
>>   {
>> -    int order = huge_page_order(h);
>>       struct folio *folio;
>>       bool alloc_try_hard = true;
>> @@ -1977,11 +1974,13 @@ static struct folio 
>> *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>>           nodemask_t *node_alloc_noretry)
>>   {
>>       struct folio *folio;
>> +    int order = huge_page_order(h);
>> -    if (hstate_is_gigantic(h))
>> -        folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
>> +    if (order > MAX_PAGE_ORDER)
>> +        folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
> 
> But hstate_is_gigantic() has two definitions depends on whether 
> CONFIG_HUGETLB_PAGE is selected.

Sorry, cross that, don't know what was I thinking.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

thanks!

> 
>>       else
>> -        folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, 
>> node_alloc_noretry);
>> +        folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask,
>> +                          node_alloc_noretry);
>>       if (folio)
>>           init_new_hugetlb_folio(h, folio);
>>       return folio;
>> diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
>> index f58ef4969e7a..e8e4dc7182d5 100644
>> --- a/mm/hugetlb_cma.c
>> +++ b/mm/hugetlb_cma.c
>> @@ -26,11 +26,10 @@ void hugetlb_cma_free_folio(struct folio *folio)
>>   }
>> -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
>> +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
>>                         int nid, nodemask_t *nodemask)
>>   {
>>       int node;
>> -    int order = huge_page_order(h);
>>       struct folio *folio = NULL;
>>       if (hugetlb_cma[nid])
>> diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
>> index f7d7fb9880a2..2c2ec8a7e134 100644
>> --- a/mm/hugetlb_cma.h
>> +++ b/mm/hugetlb_cma.h
>> @@ -4,7 +4,7 @@
>>   #ifdef CONFIG_CMA
>>   void hugetlb_cma_free_folio(struct folio *folio);
>> -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask,
>> +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
>>                         int nid, nodemask_t *nodemask);
>>   struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate 
>> *h, int *nid,
>>                               bool node_exact);
>> @@ -18,8 +18,8 @@ static inline void hugetlb_cma_free_folio(struct 
>> folio *folio)
>>   {
>>   }
>> -static inline struct folio *hugetlb_cma_alloc_folio(struct hstate *h,
>> -        gfp_t gfp_mask, int nid, nodemask_t *nodemask)
>> +static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t 
>> gfp_mask,
>> +        int nid, nodemask_t *nodemask)
>>   {
>>       return NULL;
>>   }
> The rest look okay.
> thanks,
> -jane
> 
> 



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio()
  2025-08-02  7:31 ` [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio() Kefeng Wang
  2025-08-04 19:09   ` Sidhartha Kumar
@ 2025-08-06 20:43   ` jane.chu
  2025-08-12 12:13     ` Kefeng Wang
  1 sibling, 1 reply; 33+ messages in thread
From: jane.chu @ 2025-08-06 20:43 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 8/2/2025 12:31 AM, Kefeng Wang wrote:
> Move the NUMA_NO_NODE check out of buddy and gigantic folio allocation
> to cleanup code a bit.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/hugetlb.c | 7 +++----
>   1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index b16011c9645d..4f73b74a2cff 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1479,8 +1479,6 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
>   	struct folio *folio;
>   	bool retried = false;
>   
> -	if (nid == NUMA_NO_NODE)
> -		nid = numa_mem_id();
>   retry:
>   	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
>   	if (!folio) {
> @@ -1942,8 +1940,6 @@ static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
>   		alloc_try_hard = false;
>   	if (alloc_try_hard)
>   		gfp_mask |= __GFP_RETRY_MAYFAIL;
> -	if (nid == NUMA_NO_NODE)
> -		nid = numa_mem_id();

Looks like you're also fixing a bug, because the code block was
         if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry)) 
  <--
                 alloc_try_hard = false;
         if (alloc_try_hard)
                 gfp_mask |= __GFP_RETRY_MAYFAIL;
         if (nid == NUMA_NO_NODE)
                 nid = numa_mem_id();
in theory, NUMA_NO_NODE (-1) could be passed as 'nid' to node_isset().

>   
>   	folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask);
>   	if (folio) {
> @@ -1976,6 +1972,9 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>   	struct folio *folio;
>   	int order = huge_page_order(h);
>   
> +	if (nid == NUMA_NO_NODE)
> +		nid = numa_mem_id();
> +
>   	if (order > MAX_PAGE_ORDER)
>   		folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
>   	else
Looks good, thanks!

Reviewed-by: Jane Chu <jane.chu@oracle.com>


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-08-02  7:31 ` [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
@ 2025-08-07  1:22   ` jane.chu
  2025-08-12 12:11     ` Kefeng Wang
  0 siblings, 1 reply; 33+ messages in thread
From: jane.chu @ 2025-08-07  1:22 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm


On 8/2/2025 12:31 AM, Kefeng Wang wrote:
> The alloc_gigantic_folio() will allocate a folio by alloc_contig_range()
> with refcount increated and then freeze it, convert to allocate a frozen
> folio directly to remove the atomic operation about folio refcount and
> cleanup alloc_gigantic_folio() a bit.
> 
> Also move folio_alloc_frozen_gigantic(), cma_alloc/free_frozen_folio() and
> cma_validate_zones() into mm/internal.h since only hugetlb use it.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   include/linux/cma.h | 20 --------------------
>   include/linux/gfp.h | 23 -----------------------
>   mm/cma.c            |  4 ++--
>   mm/hugetlb.c        | 43 +++++++++++--------------------------------
>   mm/hugetlb_cma.c    | 12 ++++++------
>   mm/hugetlb_cma.h    | 10 +++++-----
>   mm/internal.h       | 37 +++++++++++++++++++++++++++++++++++++
>   mm/page_alloc.c     |  8 +++++---
>   8 files changed, 66 insertions(+), 91 deletions(-)
> 
> diff --git a/include/linux/cma.h b/include/linux/cma.h
> index 62d9c1cf6326..f116e23629ee 100644
> --- a/include/linux/cma.h
> +++ b/include/linux/cma.h
> @@ -57,24 +57,4 @@ extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long e
>   
>   extern void cma_reserve_pages_on_error(struct cma *cma);
>   
> -#ifdef CONFIG_CMA
> -struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp);
> -bool cma_free_folio(struct cma *cma, const struct folio *folio);
> -bool cma_validate_zones(struct cma *cma);
> -#else
> -static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
> -{
> -	return NULL;
> -}
> -
> -static inline bool cma_free_folio(struct cma *cma, const struct folio *folio)
> -{
> -	return false;
> -}
> -static inline bool cma_validate_zones(struct cma *cma)
> -{
> -	return false;
> -}
> -#endif
> -
>   #endif
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 5ebf26fcdcfa..5ad78c296344 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -440,27 +440,4 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
>   #endif
>   void free_contig_range(unsigned long pfn, unsigned long nr_pages);
>   
> -#ifdef CONFIG_CONTIG_ALLOC
> -static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
> -							int nid, nodemask_t *node)
> -{
> -	struct page *page;
> -
> -	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
> -		return NULL;
> -
> -	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
> -
> -	return page ? page_folio(page) : NULL;
> -}
> -#else
> -static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
> -							int nid, nodemask_t *node)
> -{
> -	return NULL;
> -}
> -#endif
> -/* This should be paired with folio_put() rather than free_contig_range(). */
> -#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
> -
>   #endif /* __LINUX_GFP_H */
> diff --git a/mm/cma.c b/mm/cma.c
> index 2ffa4befb99a..9539fd5700b6 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -921,7 +921,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
>   	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
>   }
>   
> -struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
> +struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp)
>   {
>   	struct page *page;
>   
> @@ -1008,7 +1008,7 @@ bool cma_release(struct cma *cma, const struct page *pages,
>   	return true;
>   }
>   
> -bool cma_free_folio(struct cma *cma, const struct folio *folio)
> +bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio)
>   {
>   	if (WARN_ON(!folio_test_large(folio)))
>   		return false;
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 4f73b74a2cff..eea9dc782007 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -125,16 +125,6 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
>   		unsigned long start, unsigned long end, bool take_locks);
>   static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
>   
> -static void hugetlb_free_folio(struct folio *folio)
> -{
> -	if (folio_test_hugetlb_cma(folio)) {
> -		hugetlb_cma_free_folio(folio);
> -		return;
> -	}
> -
> -	folio_put(folio);
> -}
> -
>   static inline bool subpool_is_free(struct hugepage_subpool *spool)
>   {
>   	if (spool->count)
> @@ -1477,29 +1467,15 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
>   		int nid, nodemask_t *nodemask)
>   {
>   	struct folio *folio;
> -	bool retried = false;
> -
> -retry:
> -	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
> -	if (!folio) {
> -		if (hugetlb_cma_exclusive_alloc())
> -			return NULL;
> -
> -		folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask);
> -		if (!folio)
> -			return NULL;
> -	}
>   
> -	if (folio_ref_freeze(folio, 1))
> +	folio = hugetlb_cma_alloc_frozen_folio(order, gfp_mask, nid, nodemask);
> +	if (folio)
>   		return folio;
>   
> -	pr_warn("HugeTLB: unexpected refcount on PFN %lu\n", folio_pfn(folio));
> -	hugetlb_free_folio(folio);
> -	if (!retried) {
> -		retried = true;
> -		goto retry;
> -	}
> -	return NULL;
> +	if (hugetlb_cma_exclusive_alloc())
> +		return NULL;
> +
> +	return folio_alloc_frozen_gigantic(order, gfp_mask, nid, nodemask);
>   }
>   
>   #else /* !CONFIG_CONTIG_ALLOC */
> @@ -1641,9 +1617,12 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
>   	if (unlikely(folio_test_hwpoison(folio)))
>   		folio_clear_hugetlb_hwpoison(folio);
>   
> -	folio_ref_unfreeze(folio, 1);
> +	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
>   
> -	hugetlb_free_folio(folio);
> +	if (folio_test_hugetlb_cma(folio))
> +		hugetlb_cma_free_frozen_folio(folio);
> +	else
> +		free_frozen_pages(&folio->page, folio_order(folio));
>   }
>   
>   /*
> diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
> index e8e4dc7182d5..337776786ecf 100644
> --- a/mm/hugetlb_cma.c
> +++ b/mm/hugetlb_cma.c
> @@ -18,29 +18,29 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
>   static bool hugetlb_cma_only;
>   static unsigned long hugetlb_cma_size __initdata;
>   
> -void hugetlb_cma_free_folio(struct folio *folio)
> +void hugetlb_cma_free_frozen_folio(struct folio *folio)
>   {
>   	int nid = folio_nid(folio);
>   
> -	WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio));
> +	WARN_ON_ONCE(!cma_free_frozen_folio(hugetlb_cma[nid], folio));
>   }
>   
> -
> -struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
> +struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
>   				      int nid, nodemask_t *nodemask)
>   {
>   	int node;
>   	struct folio *folio = NULL;
>   
>   	if (hugetlb_cma[nid])
> -		folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask);
> +		folio = cma_alloc_frozen_folio(hugetlb_cma[nid], order, gfp_mask);
>   
>   	if (!folio && !(gfp_mask & __GFP_THISNODE)) {
>   		for_each_node_mask(node, *nodemask) {
>   			if (node == nid || !hugetlb_cma[node])
>   				continue;
>   
> -			folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask);
> +			folio = cma_alloc_frozen_folio(hugetlb_cma[node],
> +						       order, gfp_mask);
>   			if (folio)
>   				break;
>   		}
> diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
> index 2c2ec8a7e134..71db3544816e 100644
> --- a/mm/hugetlb_cma.h
> +++ b/mm/hugetlb_cma.h
> @@ -3,8 +3,8 @@
>   #define _LINUX_HUGETLB_CMA_H
>   
>   #ifdef CONFIG_CMA
> -void hugetlb_cma_free_folio(struct folio *folio);
> -struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
> +void hugetlb_cma_free_frozen_folio(struct folio *folio);
> +struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
>   				      int nid, nodemask_t *nodemask);
>   struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
>   						    bool node_exact);
> @@ -14,12 +14,12 @@ unsigned long hugetlb_cma_total_size(void);
>   void hugetlb_cma_validate_params(void);
>   bool hugetlb_early_cma(struct hstate *h);
>   #else
> -static inline void hugetlb_cma_free_folio(struct folio *folio)
> +static inline void hugetlb_cma_free_frozen_folio(struct folio *folio)
>   {
>   }
>   
> -static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
> -		int nid, nodemask_t *nodemask)
> +static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order,
> +		gfp_t gfp_mask, int nid, nodemask_t *nodemask)
>   {
>   	return NULL;
>   }
> diff --git a/mm/internal.h b/mm/internal.h
> index 1da16d550a45..b12cd23e88c4 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -933,6 +933,9 @@ struct cma;
>   #ifdef CONFIG_CMA
>   void *cma_reserve_early(struct cma *cma, unsigned long size);
>   void init_cma_pageblock(struct page *page);
> +struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp);
> +bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio);
> +bool cma_validate_zones(struct cma *cma);
>   #else
>   static inline void *cma_reserve_early(struct cma *cma, unsigned long size)
>   {
> @@ -941,8 +944,42 @@ static inline void *cma_reserve_early(struct cma *cma, unsigned long size)
>   static inline void init_cma_pageblock(struct page *page)
>   {
>   }
> +static inline struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp)
> +{
> +	return NULL;
> +}
> +static inline bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio)
> +{
> +	return false;
> +}
> +static inline bool cma_validate_zones(struct cma *cma)
> +{
> +	return false;
> +}
>   #endif
>   
> +#ifdef CONFIG_CONTIG_ALLOC
> +static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order,
> +		gfp_t gfp, int nid, nodemask_t *node)
> +{
> +	struct page *page;
> +
> +	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
> +		return NULL;
> +
> +	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
> +
> +	return page ? page_folio(page) : NULL;
> +}
> +#else
> +static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order,
> +		gfp_t gfp, int nid, nodemask_t *node)
> +{
> +	return NULL;
> +}
> +#endif
> +/* This should be paired with free_frozen_pages() rather than free_contig_range(). */
> +#define folio_alloc_frozen_gigantic(...) alloc_hooks(folio_alloc_frozen_gigantic_noprof(__VA_ARGS__))
>   
>   int find_suitable_fallback(struct free_area *area, unsigned int order,
>   			   int migratetype, bool claimable);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d1d037f97c5f..c542ababb8dc 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6822,6 +6822,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
>    * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
>    *		action and reclaim modifiers are supported. Reclaim modifiers
>    *		control allocation behavior during compaction/migration/reclaim.
> + *		If gfp_mask contains __GFP_COMP, the refcount of compound page
> + *		will be not increased.
>    *
>    * The PFN range does not have to be pageblock aligned. The PFN range must
>    * belong to a single zone.
> @@ -6955,7 +6957,6 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
>   
>   		check_new_pages(head, order);
>   		prep_new_page(head, order, gfp_mask, 0);
> -		set_page_refcounted(head);
>   	} else {
>   		ret = -EINVAL;
>   		WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
> @@ -7074,10 +7075,11 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
>   	struct folio *folio = pfn_folio(pfn);
>   
>   	if (folio_test_large(folio)) {
> -		int expected = folio_nr_pages(folio);
> +		int order = folio_order(folio);
> +		int expected = 1 << order;
>   
>   		if (nr_pages == expected)
> -			folio_put(folio);
> +			free_frozen_pages(&folio->page, order);
>   		else
>   			WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
>   			     pfn, nr_pages, expected);

Is this patch solely for the purpose of saving a few back-and-forth 
setting refcount calls?

It seems to me that altering the behavior of alloc_contig_range_noprof() 
to the contrary of its comtemporaries, such as __alloc_pages_noprof(), 
alloc_pages_bulk_noprof() in mm/page_alloc.c, might be a source of 
confusion to unaware callers.  E.g.virtio_mem_fake_offline() calls 
alloc_contig_range(), for now, without setting __GFP_COMP, but if it 
does in future, it could be tripped.

I guess it's helpful to keep the existing convention such that, these 
alloc_()s from page_alloc.c behave the similar way, in that, head page 
is returned refcounted.

thanks,
-jane



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-08-07  1:22   ` jane.chu
@ 2025-08-12 12:11     ` Kefeng Wang
  0 siblings, 0 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-12 12:11 UTC (permalink / raw)
  To: jane.chu, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 2025/8/7 9:22, jane.chu@oracle.com wrote:
> 
> On 8/2/2025 12:31 AM, Kefeng Wang wrote:
>> The alloc_gigantic_folio() will allocate a folio by alloc_contig_range()
>> with refcount increated and then freeze it, convert to allocate a frozen
>> folio directly to remove the atomic operation about folio refcount and
>> cleanup alloc_gigantic_folio() a bit.
>>
>> Also move folio_alloc_frozen_gigantic(), cma_alloc/free_frozen_folio() 
>> and
>> cma_validate_zones() into mm/internal.h since only hugetlb use it.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   include/linux/cma.h | 20 --------------------
>>   include/linux/gfp.h | 23 -----------------------
>>   mm/cma.c            |  4 ++--
>>   mm/hugetlb.c        | 43 +++++++++++--------------------------------
>>   mm/hugetlb_cma.c    | 12 ++++++------
>>   mm/hugetlb_cma.h    | 10 +++++-----
>>   mm/internal.h       | 37 +++++++++++++++++++++++++++++++++++++
>>   mm/page_alloc.c     |  8 +++++---
>>   8 files changed, 66 insertions(+), 91 deletions(-)
>>

...

>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index d1d037f97c5f..c542ababb8dc 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -6822,6 +6822,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t 
>> gfp_mask, gfp_t *gfp_cc_mask)
>>    * @gfp_mask:    GFP mask. Node/zone/placement hints are ignored; 
>> only some
>>    *        action and reclaim modifiers are supported. Reclaim modifiers
>>    *        control allocation behavior during compaction/migration/ 
>> reclaim.
>> + *        If gfp_mask contains __GFP_COMP, the refcount of compound page
>> + *        will be not increased.
>>    *
>>    * The PFN range does not have to be pageblock aligned. The PFN 
>> range must
>>    * belong to a single zone.
>> @@ -6955,7 +6957,6 @@ int alloc_contig_range_noprof(unsigned long 
>> start, unsigned long end,
>>           check_new_pages(head, order);
>>           prep_new_page(head, order, gfp_mask, 0);
>> -        set_page_refcounted(head);
>>       } else {
>>           ret = -EINVAL;
>>           WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, 
>> %lu)\n",
>> @@ -7074,10 +7075,11 @@ void free_contig_range(unsigned long pfn, 
>> unsigned long nr_pages)
>>       struct folio *folio = pfn_folio(pfn);
>>       if (folio_test_large(folio)) {
>> -        int expected = folio_nr_pages(folio);
>> +        int order = folio_order(folio);
>> +        int expected = 1 << order;
>>           if (nr_pages == expected)
>> -            folio_put(folio);
>> +            free_frozen_pages(&folio->page, order);
>>           else
>>               WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
>>                    pfn, nr_pages, expected);
> 
> Is this patch solely for the purpose of saving a few back-and-forth 
> setting refcount calls?
> 
> It seems to me that altering the behavior of alloc_contig_range_noprof() 
> to the contrary of its comtemporaries, such as __alloc_pages_noprof(), 
> alloc_pages_bulk_noprof() in mm/page_alloc.c, might be a source of 
> confusion to unaware callers.  E.g.virtio_mem_fake_offline() calls 
> alloc_contig_range(), for now, without setting __GFP_COMP, but if it 
> does in future, it could be tripped.

OK, we may optimize some driver by adding __GFP_COMP.
> 
> I guess it's helpful to keep the existing convention such that, these 
> alloc_()s from page_alloc.c behave the similar way, in that, head page 
> is returned refcounted.

Will try to keep the original behave for the alloc_contig_* in next 
verison, thanks for the suggestion.

> 
> thanks,
> -jane
> 
> 



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio()
  2025-08-06 20:43   ` jane.chu
@ 2025-08-12 12:13     ` Kefeng Wang
  0 siblings, 0 replies; 33+ messages in thread
From: Kefeng Wang @ 2025-08-12 12:13 UTC (permalink / raw)
  To: jane.chu, Andrew Morton, Muchun Song, Oscar Salvador,
	David Hildenbrand
  Cc: linux-mm



On 2025/8/7 4:43, jane.chu@oracle.com wrote:
> 
> 
> On 8/2/2025 12:31 AM, Kefeng Wang wrote:
>> Move the NUMA_NO_NODE check out of buddy and gigantic folio allocation
>> to cleanup code a bit.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/hugetlb.c | 7 +++----
>>   1 file changed, 3 insertions(+), 4 deletions(-)
>>
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index b16011c9645d..4f73b74a2cff 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -1479,8 +1479,6 @@ static struct folio *alloc_gigantic_folio(int 
>> order, gfp_t gfp_mask,
>>       struct folio *folio;
>>       bool retried = false;
>> -    if (nid == NUMA_NO_NODE)
>> -        nid = numa_mem_id();
>>   retry:
>>       folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
>>       if (!folio) {
>> @@ -1942,8 +1940,6 @@ static struct folio 
>> *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask,
>>           alloc_try_hard = false;
>>       if (alloc_try_hard)
>>           gfp_mask |= __GFP_RETRY_MAYFAIL;
>> -    if (nid == NUMA_NO_NODE)
>> -        nid = numa_mem_id();
> 
> Looks like you're also fixing a bug, because the code block was
>          if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry)) 
>   <--
>                  alloc_try_hard = false;
>          if (alloc_try_hard)
>                  gfp_mask |= __GFP_RETRY_MAYFAIL;
>          if (nid == NUMA_NO_NODE)
>                  nid = numa_mem_id();
> in theory, NUMA_NO_NODE (-1) could be passed as 'nid' to node_isset().

alloc_surplus_hugetlb_folio/hugetlb_hstate_alloc_pages_onenode call
alloc_buddy_hugetlb_folio with node_alloc_noretry = NULL, and
alloc_pool_huge_folio call alloc_buddy_hugetlb_folio  with nid != 
NUMA_NO_NODE, so no issue for now, but we move it ahead which avoid
the possible issue in the future.


> 
>>       folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, 
>> nid, nmask);
>>       if (folio) {
>> @@ -1976,6 +1972,9 @@ static struct folio 
>> *only_alloc_fresh_hugetlb_folio(struct hstate *h,
>>       struct folio *folio;
>>       int order = huge_page_order(h);
>> +    if (nid == NUMA_NO_NODE)
>> +        nid = numa_mem_id();
>> +
>>       if (order > MAX_PAGE_ORDER)
>>           folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask);
>>       else
> Looks good, thanks!
> 
> Reviewed-by: Jane Chu <jane.chu@oracle.com>

Thanks.



^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2025-08-12 12:13 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-02  7:31 [PATCH 0/7] mm: hugetlb: cleanup and allocate frozen hugetlb folio Kefeng Wang
2025-08-02  7:31 ` [PATCH 1/7] mm: hugetlb: convert to alloc_fresh_hugetlb_hvo_folio() Kefeng Wang
2025-08-04 15:41   ` Sidhartha Kumar
2025-08-04 20:27   ` Vishal Moola (Oracle)
2025-08-05 14:21     ` Kefeng Wang
2025-08-05 17:56       ` Vishal Moola (Oracle)
2025-08-05 22:18   ` jane.chu
2025-08-06  0:33     ` jane.chu
2025-08-06  0:56       ` Kefeng Wang
2025-08-06 18:16         ` jane.chu
2025-08-02  7:31 ` [PATCH 2/7] mm: hugetlb: convert to prep_account_new_hugetlb_folio() Kefeng Wang
2025-08-04 15:54   ` Sidhartha Kumar
2025-08-04 20:36   ` Vishal Moola (Oracle)
2025-08-05 14:21     ` Kefeng Wang
2025-08-06  0:53     ` jane.chu
2025-08-02  7:31 ` [PATCH 3/7] mm; hugetlb: simpify alloc_buddy_hugetlb_folio() Kefeng Wang
2025-08-04 15:57   ` Sidhartha Kumar
2025-08-04 20:56   ` Vishal Moola (Oracle)
2025-08-05 14:22     ` Kefeng Wang
2025-08-02  7:31 ` [PATCH 4/7] mm: hugetlb: directly pass order when allocate a hugetlb folio Kefeng Wang
2025-08-04 16:22   ` Sidhartha Kumar
2025-08-06 20:05   ` jane.chu
2025-08-06 20:17     ` jane.chu
2025-08-02  7:31 ` [PATCH 5/7] mm: hugetlb: remove struct hstate from init_new_hugetlb_folio() Kefeng Wang
2025-08-04 16:13   ` Sidhartha Kumar
2025-08-06 20:08   ` jane.chu
2025-08-02  7:31 ` [PATCH 6/7] mm: hugeltb: check NUMA_NO_NODE in only_alloc_fresh_hugetlb_folio() Kefeng Wang
2025-08-04 19:09   ` Sidhartha Kumar
2025-08-06 20:43   ` jane.chu
2025-08-12 12:13     ` Kefeng Wang
2025-08-02  7:31 ` [PATCH 7/7] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
2025-08-07  1:22   ` jane.chu
2025-08-12 12:11     ` Kefeng Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).