Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation
@ 2026-06-28 19:01 Sourav Panda
  2026-06-28 19:38 ` Andrew Morton
  2026-06-30 17:39 ` Usama Arif
  0 siblings, 2 replies; 4+ messages in thread
From: Sourav Panda @ 2026-06-28 19:01 UTC (permalink / raw)
  To: muchun.song, osalvador, akpm
  Cc: david, surenb, fvdl, gthelen, rientjes, souravpanda, linux-mm,
	linux-kernel

Currently, hugetlb_cma reservation only supports absolute sizes (e.g.,
hugetlb_cma=2G or hugetlb_cma=0:1G,1:1G). This can be restrictive in
heterogeneous environments or when deploying common kernel command lines
across machines with different memory capacities.

Add support for percentage-based hugetlb_cma reservation (e.g.,
hugetlb_cma=20% or hugetlb_cma=0:20%,1:10%).

The percentage is calculated against the total memory (for global
settings) or against the node-specific memory (for node-specific
settings) using memblock APIs during early boot.

Signed-off-by: Sourav Panda <souravpanda@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202606262023.IKUrn01I-lkp@intel.com/
---
Link: https://lore.kernel.org/linux-mm/20260625215900.2151690-1-souravpanda@google.com/ [v1]

v2:
- Use mul_u64_u32_div() instead of mult_frac() to avoid 64-bit division on 32-bit architectures (Reported by kernel test robot).
- Fix physical memory size truncation on 32-bit PAE by using phys_addr_t in memblock_node_memory_size() (Sashiko).
- Fix boot parameter override logic to respect standard last-one-wins precedence by clearing opposite values during parsing and clearing node-specific values when global values are parsed.
 .../admin-guide/kernel-parameters.txt         |  7 +-
 mm/hugetlb_cma.c                              | 96 ++++++++++++++++++-
 2 files changed, 97 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5a05b48d1684..846940ce2858 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2064,8 +2064,11 @@ Kernel parameters
 	hugetlb_cma=	[HW,CMA,EARLY] The size of a CMA area used for allocation
 			of gigantic hugepages. Or using node format, the size
 			of a CMA area per node can be specified.
-			Format: nn[KMGTPE] or (node format)
-				<node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
+			The size can be an absolute value (e.g., 2G) or a
+			percentage of the total memory or node memory (e.g., 20%).
+			Format: nn[KMGTPE] or nn% or (node format)
+				<node>:nn[KMGTPE][,<node>:nn[KMGTPE]] or
+				<node>:nn%[,<node>:nn%]
 
 			Reserve a CMA area of given size and allocate gigantic
 			hugepages using the CMA allocator. If enabled, the
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index 7693ccefd0c6..684ffa3537d2 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -9,6 +9,9 @@
 #include <asm/setup.h>
 
 #include <linux/hugetlb.h>
+#include <linux/memblock.h>
+#include <linux/math.h>
+#include <linux/math64.h>
 #include "internal.h"
 #include "hugetlb_cma.h"
 
@@ -18,6 +21,28 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
 static bool hugetlb_cma_only __ro_after_init;
 static unsigned long hugetlb_cma_size __ro_after_init;
 
+static unsigned int hugetlb_cma_percent __initdata;
+static unsigned int hugetlb_cma_percent_in_node[MAX_NUMNODES] __initdata;
+
+#ifdef CONFIG_NUMA
+static phys_addr_t __init memblock_node_memory_size(int nid)
+{
+	struct memblock_region *reg;
+	phys_addr_t size = 0;
+
+	for_each_mem_region(reg) {
+		if (reg->nid == nid)
+			size += reg->size;
+	}
+	return size;
+}
+#else
+static phys_addr_t __init memblock_node_memory_size(int nid)
+{
+	return memblock_phys_mem_size();
+}
+#endif
+
 void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 	WARN_ON_ONCE(!cma_release_frozen(hugetlb_cma[folio_nid(folio)],
@@ -100,14 +125,28 @@ static int __init cmdline_parse_hugetlb_cma(char *p)
 			break;
 
 		if (s[count] == ':') {
+			char *next;
+
 			if (tmp >= MAX_NUMNODES)
 				break;
 			nid = array_index_nospec(tmp, MAX_NUMNODES);
 
 			s += count + 1;
-			tmp = memparse(s, &s);
-			hugetlb_cma_size_in_node[nid] = tmp;
-			hugetlb_cma_size += tmp;
+			tmp = memparse(s, &next);
+			if (*next == '%') {
+				if (tmp > 100) {
+					pr_warn("hugetlb_cma: invalid percentage %lu for node %d\n",
+						tmp, nid);
+					break;
+				}
+				hugetlb_cma_percent_in_node[nid] = tmp;
+				hugetlb_cma_size_in_node[nid] = 0;
+				s = next + 1;
+			} else {
+				hugetlb_cma_size_in_node[nid] = tmp;
+				hugetlb_cma_percent_in_node[nid] = 0;
+				s = next;
+			}
 
 			/*
 			 * Skip the separator if have one, otherwise
@@ -118,7 +157,28 @@ static int __init cmdline_parse_hugetlb_cma(char *p)
 			else
 				break;
 		} else {
-			hugetlb_cma_size = memparse(p, &p);
+			char *next;
+
+			tmp = memparse(p, &next);
+			if (*next == '%') {
+				if (tmp > 100) {
+					pr_warn("hugetlb_cma: invalid percentage %lu\n", tmp);
+				} else {
+					hugetlb_cma_percent = tmp;
+					hugetlb_cma_size = 0;
+					for (nid = 0; nid < MAX_NUMNODES; nid++) {
+						hugetlb_cma_size_in_node[nid] = 0;
+						hugetlb_cma_percent_in_node[nid] = 0;
+					}
+				}
+			} else {
+				hugetlb_cma_size = tmp;
+				hugetlb_cma_percent = 0;
+				for (nid = 0; nid < MAX_NUMNODES; nid++) {
+					hugetlb_cma_size_in_node[nid] = 0;
+					hugetlb_cma_percent_in_node[nid] = 0;
+				}
+			}
 			break;
 		}
 	}
@@ -144,8 +204,36 @@ void __init hugetlb_cma_reserve(void)
 {
 	unsigned long size, reserved, per_node, order;
 	bool node_specific_cma_alloc = false;
+	bool has_node_specific_param = false;
 	int nid;
 
+	for (nid = 0; nid < MAX_NUMNODES; nid++) {
+		if (hugetlb_cma_size_in_node[nid] || hugetlb_cma_percent_in_node[nid]) {
+			has_node_specific_param = true;
+			break;
+		}
+	}
+
+	if (has_node_specific_param) {
+		hugetlb_cma_size = 0;
+		for (nid = 0; nid < MAX_NUMNODES; nid++) {
+			if (hugetlb_cma_percent_in_node[nid]) {
+				phys_addr_t node_gfp_mem = memblock_node_memory_size(nid);
+				u64 s;
+
+				s = mul_u64_u32_div((u64)node_gfp_mem,
+						    hugetlb_cma_percent_in_node[nid],
+						    100);
+
+				hugetlb_cma_size_in_node[nid] = s;
+			}
+			hugetlb_cma_size += hugetlb_cma_size_in_node[nid];
+		}
+	} else if (hugetlb_cma_percent) {
+		hugetlb_cma_size = mul_u64_u32_div((u64)memblock_phys_mem_size(),
+						   hugetlb_cma_percent, 100);
+	}
+
 	if (!hugetlb_cma_size)
 		return;
 
-- 
2.55.0.rc0.799.gd6f94ed593-goog



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation
  2026-06-28 19:01 [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation Sourav Panda
@ 2026-06-28 19:38 ` Andrew Morton
  2026-06-28 19:42   ` Sourav Panda
  2026-06-30 17:39 ` Usama Arif
  1 sibling, 1 reply; 4+ messages in thread
From: Andrew Morton @ 2026-06-28 19:38 UTC (permalink / raw)
  To: Sourav Panda
  Cc: muchun.song, osalvador, david, surenb, fvdl, gthelen, rientjes,
	linux-mm, linux-kernel

On Sun, 28 Jun 2026 19:01:55 +0000 Sourav Panda <souravpanda@google.com> wrote:

> Currently, hugetlb_cma reservation only supports absolute sizes (e.g.,
> hugetlb_cma=2G or hugetlb_cma=0:1G,1:1G). This can be restrictive in
> heterogeneous environments or when deploying common kernel command lines
> across machines with different memory capacities.
> 
> Add support for percentage-based hugetlb_cma reservation (e.g.,
> hugetlb_cma=20% or hugetlb_cma=0:20%,1:10%).
> 
> The percentage is calculated against the total memory (for global
> settings) or against the node-specific memory (for node-specific
> settings) using memblock APIs during early boot.

Thanks, I'll queue this for testing and additional review.

There are a couple of pr_infos in this code, to give people an
understanding of how the kernel has sized these things.  Would it make
sense to enhance those, to help our operator to understand/confirm what
effect the chosen percentage configuration had?

> Signed-off-by: Sourav Panda <souravpanda@google.com>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202606262023.IKUrn01I-lkp@intel.com/

Robot found issues in the v1 patch.  This development-time info isn't
something which we (at least, I) normally include in the permanent
kernel record.



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation
  2026-06-28 19:38 ` Andrew Morton
@ 2026-06-28 19:42   ` Sourav Panda
  0 siblings, 0 replies; 4+ messages in thread
From: Sourav Panda @ 2026-06-28 19:42 UTC (permalink / raw)
  To: Andrew Morton
  Cc: muchun.song, osalvador, david, surenb, fvdl, gthelen, rientjes,
	linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1595 bytes --]

On Sun, Jun 28, 2026 at 12:38 PM Andrew Morton <akpm@linux-foundation.org>
wrote:

> On Sun, 28 Jun 2026 19:01:55 +0000 Sourav Panda <souravpanda@google.com>
> wrote:
>
> > Currently, hugetlb_cma reservation only supports absolute sizes (e.g.,
> > hugetlb_cma=2G or hugetlb_cma=0:1G,1:1G). This can be restrictive in
> > heterogeneous environments or when deploying common kernel command lines
> > across machines with different memory capacities.
> >
> > Add support for percentage-based hugetlb_cma reservation (e.g.,
> > hugetlb_cma=20% or hugetlb_cma=0:20%,1:10%).
> >
> > The percentage is calculated against the total memory (for global
> > settings) or against the node-specific memory (for node-specific
> > settings) using memblock APIs during early boot.
>
> Thanks, I'll queue this for testing and additional review.
>

Thank you very much!


>
> There are a couple of pr_infos in this code, to give people an
> understanding of how the kernel has sized these things.  Would it make
> sense to enhance those, to help our operator to understand/confirm what
> effect the chosen percentage configuration had?
>

Absolutely, will add them!


>
> > Signed-off-by: Sourav Panda <souravpanda@google.com>
> > Reported-by: kernel test robot <lkp@intel.com>
> > Closes:
> https://lore.kernel.org/oe-kbuild-all/202606262023.IKUrn01I-lkp@intel.com/
>
> Robot found issues in the v1 patch.  This development-time info isn't
> something which we (at least, I) normally include in the permanent
> kernel record.
>

Gotcha! Thanks for the feedback :)

[-- Attachment #2: Type: text/html, Size: 2691 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation
  2026-06-28 19:01 [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation Sourav Panda
  2026-06-28 19:38 ` Andrew Morton
@ 2026-06-30 17:39 ` Usama Arif
  1 sibling, 0 replies; 4+ messages in thread
From: Usama Arif @ 2026-06-30 17:39 UTC (permalink / raw)
  To: Sourav Panda
  Cc: Usama Arif, muchun.song, osalvador, akpm, david, surenb, fvdl,
	gthelen, rientjes, linux-mm, linux-kernel

On Sun, 28 Jun 2026 19:01:55 +0000 Sourav Panda <souravpanda@google.com> wrote:

> Currently, hugetlb_cma reservation only supports absolute sizes (e.g.,
> hugetlb_cma=2G or hugetlb_cma=0:1G,1:1G). This can be restrictive in
> heterogeneous environments or when deploying common kernel command lines
> across machines with different memory capacities.
> 
> Add support for percentage-based hugetlb_cma reservation (e.g.,
> hugetlb_cma=20% or hugetlb_cma=0:20%,1:10%).
> 
> The percentage is calculated against the total memory (for global
> settings) or against the node-specific memory (for node-specific
> settings) using memblock APIs during early boot.
> 
> Signed-off-by: Sourav Panda <souravpanda@google.com>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202606262023.IKUrn01I-lkp@intel.com/
> ---
> Link: https://lore.kernel.org/linux-mm/20260625215900.2151690-1-souravpanda@google.com/ [v1]
> 
> v2:
> - Use mul_u64_u32_div() instead of mult_frac() to avoid 64-bit division on 32-bit architectures (Reported by kernel test robot).
> - Fix physical memory size truncation on 32-bit PAE by using phys_addr_t in memblock_node_memory_size() (Sashiko).
> - Fix boot parameter override logic to respect standard last-one-wins precedence by clearing opposite values during parsing and clearing node-specific values when global values are parsed.
>  .../admin-guide/kernel-parameters.txt         |  7 +-
>  mm/hugetlb_cma.c                              | 96 ++++++++++++++++++-
>  2 files changed, 97 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 5a05b48d1684..846940ce2858 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2064,8 +2064,11 @@ Kernel parameters
>  	hugetlb_cma=	[HW,CMA,EARLY] The size of a CMA area used for allocation
>  			of gigantic hugepages. Or using node format, the size
>  			of a CMA area per node can be specified.
> -			Format: nn[KMGTPE] or (node format)
> -				<node>:nn[KMGTPE][,<node>:nn[KMGTPE]]
> +			The size can be an absolute value (e.g., 2G) or a
> +			percentage of the total memory or node memory (e.g., 20%).
> +			Format: nn[KMGTPE] or nn% or (node format)
> +				<node>:nn[KMGTPE][,<node>:nn[KMGTPE]] or
> +				<node>:nn%[,<node>:nn%]
>  
>  			Reserve a CMA area of given size and allocate gigantic
>  			hugepages using the CMA allocator. If enabled, the
> diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
> index 7693ccefd0c6..684ffa3537d2 100644
> --- a/mm/hugetlb_cma.c
> +++ b/mm/hugetlb_cma.c
> @@ -9,6 +9,9 @@
>  #include <asm/setup.h>
>  
>  #include <linux/hugetlb.h>
> +#include <linux/memblock.h>
> +#include <linux/math.h>
> +#include <linux/math64.h>
>  #include "internal.h"
>  #include "hugetlb_cma.h"
>  
> @@ -18,6 +21,28 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
>  static bool hugetlb_cma_only __ro_after_init;
>  static unsigned long hugetlb_cma_size __ro_after_init;
>  
> +static unsigned int hugetlb_cma_percent __initdata;
> +static unsigned int hugetlb_cma_percent_in_node[MAX_NUMNODES] __initdata;
> +
> +#ifdef CONFIG_NUMA
> +static phys_addr_t __init memblock_node_memory_size(int nid)
> +{
> +	struct memblock_region *reg;
> +	phys_addr_t size = 0;
> +
> +	for_each_mem_region(reg) {
> +		if (reg->nid == nid)
> +			size += reg->size;
> +	}
> +	return size;
> +}
> +#else
> +static phys_addr_t __init memblock_node_memory_size(int nid)
> +{
> +	return memblock_phys_mem_size();
> +}
> +#endif
> +
>  void hugetlb_cma_free_frozen_folio(struct folio *folio)
>  {
>  	WARN_ON_ONCE(!cma_release_frozen(hugetlb_cma[folio_nid(folio)],
> @@ -100,14 +125,28 @@ static int __init cmdline_parse_hugetlb_cma(char *p)
>  			break;
>  
>  		if (s[count] == ':') {
> +			char *next;
> +
>  			if (tmp >= MAX_NUMNODES)
>  				break;
>  			nid = array_index_nospec(tmp, MAX_NUMNODES);
>  
>  			s += count + 1;
> -			tmp = memparse(s, &s);
> -			hugetlb_cma_size_in_node[nid] = tmp;
> -			hugetlb_cma_size += tmp;
> +			tmp = memparse(s, &next);
> +			if (*next == '%') {
> +				if (tmp > 100) {
> +					pr_warn("hugetlb_cma: invalid percentage %lu for node %d\n",
> +						tmp, nid);
> +					break;
> +				}
> +				hugetlb_cma_percent_in_node[nid] = tmp;
> +				hugetlb_cma_size_in_node[nid] = 0;
> +				s = next + 1;
> +			} else {
> +				hugetlb_cma_size_in_node[nid] = tmp;
> +				hugetlb_cma_percent_in_node[nid] = 0;
> +				s = next;
> +			}
>  
>  			/*
>  			 * Skip the separator if have one, otherwise
> @@ -118,7 +157,28 @@ static int __init cmdline_parse_hugetlb_cma(char *p)
>  			else
>  				break;
>  		} else {
> -			hugetlb_cma_size = memparse(p, &p);
> +			char *next;
> +
> +			tmp = memparse(p, &next);
> +			if (*next == '%') {
> +				if (tmp > 100) {
> +					pr_warn("hugetlb_cma: invalid percentage %lu\n", tmp);
> +				} else {
> +					hugetlb_cma_percent = tmp;
> +					hugetlb_cma_size = 0;
> +					for (nid = 0; nid < MAX_NUMNODES; nid++) {
> +						hugetlb_cma_size_in_node[nid] = 0;
> +						hugetlb_cma_percent_in_node[nid] = 0;
> +					}
> +				}
> +			} else {
> +				hugetlb_cma_size = tmp;
> +				hugetlb_cma_percent = 0;
> +				for (nid = 0; nid < MAX_NUMNODES; nid++) {
> +					hugetlb_cma_size_in_node[nid] = 0;
> +					hugetlb_cma_percent_in_node[nid] = 0;
> +				}
> +			}
>  			break;
>  		}
>  	}
> @@ -144,8 +204,36 @@ void __init hugetlb_cma_reserve(void)
>  {
>  	unsigned long size, reserved, per_node, order;
>  	bool node_specific_cma_alloc = false;
> +	bool has_node_specific_param = false;
>  	int nid;
>  
> +	for (nid = 0; nid < MAX_NUMNODES; nid++) {
> +		if (hugetlb_cma_size_in_node[nid] || hugetlb_cma_percent_in_node[nid]) {
> +			has_node_specific_param = true;
> +			break;
> +		}
> +	}
> +
> +	if (has_node_specific_param) {
> +		hugetlb_cma_size = 0;
> +		for (nid = 0; nid < MAX_NUMNODES; nid++) {
> +			if (hugetlb_cma_percent_in_node[nid]) {
> +				phys_addr_t node_gfp_mem = memblock_node_memory_size(nid);
> +				u64 s;
> +
> +				s = mul_u64_u32_div((u64)node_gfp_mem,
> +						    hugetlb_cma_percent_in_node[nid],
> +						    100);
> +
> +				hugetlb_cma_size_in_node[nid] = s;
> +			}
> +			hugetlb_cma_size += hugetlb_cma_size_in_node[nid];
> +		}
> +	} else if (hugetlb_cma_percent) {
> +		hugetlb_cma_size = mul_u64_u32_div((u64)memblock_phys_mem_size(),
> +						   hugetlb_cma_percent, 100);
> +	}
> +
>  	if (!hugetlb_cma_size)
>  		return;

Can this round percentage-derived sizes to the gigantic page size before the
existing alignment checks run?

For example, on an x86_64 machine with 64 GiB of memory, hugetlb_cma=20%
computes 12.8 GiB.  The later hugetlb_cma_reserve() alignment check requires
the total to be a multiple of the gigantic_page_size, so the documented 20%
example is rejected and the reservation is disabled?

>  
> -- 
> 2.55.0.rc0.799.gd6f94ed593-goog
> 
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-30 17:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-28 19:01 [PATCH v2] mm/hugetlb_cma: support percentage-based hugetlb_cma reservation Sourav Panda
2026-06-28 19:38 ` Andrew Morton
2026-06-28 19:42   ` Sourav Panda
2026-06-30 17:39 ` Usama Arif

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox