public inbox for linux-cxl@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ACPI: add a boot parameter to disable parsing CFMWS during NUMA init
@ 2026-03-04  8:06 Haifeng Xu
  2026-03-04 14:10 ` kernel test robot
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Haifeng Xu @ 2026-03-04  8:06 UTC (permalink / raw)
  To: rafael, lenb, dan.j.williams, jonathan.cameron
  Cc: dave, dave.jiang, alison.schofield, vishal.l.verma, ira.weiny,
	linux-cxl, linux-acpi, linux-kernel, Haifeng Xu

For the machine Intel(R) Xeon(R) 6746E that supports CXL memory,
the possible nodes are 20 (0-19). However, only two numa nodes (0-1)
have memory and the rest nodes (2-19) detected by CEDT are memoryless.

The problems is that when creating many pods, the shrinker map size
needs to be expanded for all memory cgroups in expand_shrinker_info().
If the number of possible nodes is too large, the holding time of
shrinker lock grows significantly.

In this case, there is no CXL memory inserted in the machine, those
memoryless nodes are useless for us, so there is no need to set them
in 'numa_nodes_parsed'. After disabling parsing CFMWS, the pod creation
time is reduced from over 10 minutes to approximately 150 seconds in
our internal test.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
---
 arch/x86/mm/numa.c       |  2 ++
 drivers/acpi/numa/srat.c | 29 ++++++++++++++++++++++-------
 include/acpi/acpi_numa.h |  6 ++++++
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 7a97327140df..b127bb65d360 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -37,6 +37,8 @@ static __init int numa_setup(char *opt)
 		disable_srat();
 	if (!strncmp(opt, "nohmat", 6))
 		disable_hmat();
+	if (!strncmp(opt, "nocfmws", 7))
+		disable_cfmws();
 	return 0;
 }
 early_param("numa", numa_setup);
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index aa87ee1583a4..8716d70043fe 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -31,6 +31,7 @@ static int node_to_pxm_map[MAX_NUMNODES]
 
 unsigned char acpi_srat_revision __initdata;
 static int acpi_numa __initdata;
+static int cfmws_numa __initdata;
 
 static int last_real_pxm;
 
@@ -39,6 +40,12 @@ void __init disable_srat(void)
 	acpi_numa = -1;
 }
 
+void __init disable_cfmws(void)
+{
+	cfmws_numa = -1;
+}
+
+
 int pxm_to_node(int pxm)
 {
 	if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off)
@@ -313,6 +320,12 @@ int __init srat_disabled(void)
 	return acpi_numa < 0;
 }
 
+int __init cfmws_disabled(void)
+{
+	return cfmws_numa < 0;
+}
+
+
 __weak int __init numa_fill_memblks(u64 start, u64 end)
 {
 	return NUMA_NO_MEMBLK;
@@ -648,14 +661,16 @@ int __init acpi_numa_init(void)
 	 */
 
 	/* fake_pxm is the next unused PXM value after SRAT parsing */
-	for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) {
-		if (node_to_pxm_map[i] > fake_pxm)
-			fake_pxm = node_to_pxm_map[i];
+	if (!cfmws_disabled()) {
+		for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) {
+			if (node_to_pxm_map[i] > fake_pxm)
+				fake_pxm = node_to_pxm_map[i];
+		}
+		last_real_pxm = fake_pxm;
+		fake_pxm++;
+		acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
+				      &fake_pxm);
 	}
-	last_real_pxm = fake_pxm;
-	fake_pxm++;
-	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
-			      &fake_pxm);
 
 	if (cnt < 0)
 		return cnt;
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index 99b960bd473c..2435f60e56ce 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -21,6 +21,7 @@ extern int fix_pxm_node_maps(int max_nid);
 
 extern void bad_srat(void);
 extern int srat_disabled(void);
+extern void disable_cfmws(void);
 
 #else				/* CONFIG_ACPI_NUMA */
 static inline int fix_pxm_node_maps(int max_nid)
@@ -30,6 +31,11 @@ static inline int fix_pxm_node_maps(int max_nid)
 static inline void disable_srat(void)
 {
 }
+
+static inline void disable_cfmws(void)
+{
+}
+
 static inline int pxm_to_node(int pxm)
 {
 	return 0;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread
* [PATCH] ACPI: add a boot parameter to disable parsing CFMWS during NUMA init
@ 2026-03-04  6:46 Haifeng Xu
  2026-03-04  7:55 ` Haifeng Xu
  0 siblings, 1 reply; 12+ messages in thread
From: Haifeng Xu @ 2026-03-04  6:46 UTC (permalink / raw)
  To: rafael, lenb, dan.j.williams, jonathan.cameron
  Cc: dave, dave.jiang, alison.schofield, vishal.l.verma, ira.weiny,
	linux-cxl, linux-acpi, linux-kernel, Haifeng Xu

For the machine Intel(R) Xeon(R) 6746E that supports CXL memory,
the possible node is 20 (0-19). However, only two numa nodes (0-1)
have memory and the rest (2-19) nodes detected by CEDT is memoryless.

The problems is that when creating many pods, the shrinker map size
need to be expanded for all memory cgroups in expand_shrinker_info().
If the number of possibles nodes is too large, the holding time of
shrinker lock grows significantly.

In this case, there is no CXL memory inserted in the machine, those
memoryless nodes are useless for us, so there is no need set them to
'numa_nodes_parsed'. After disabling parsing CFMWS, the pod creation
time is reduced from over 10 minutes to approximately 150 seconds in
our intertel test.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
---
 drivers/acpi/numa/srat.c | 29 ++++++++++++++++++++++-------
 include/acpi/acpi_numa.h |  6 ++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index aa87ee1583a4..8716d70043fe 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -31,6 +31,7 @@ static int node_to_pxm_map[MAX_NUMNODES]
 
 unsigned char acpi_srat_revision __initdata;
 static int acpi_numa __initdata;
+static int cfmws_numa __initdata;
 
 static int last_real_pxm;
 
@@ -39,6 +40,12 @@ void __init disable_srat(void)
 	acpi_numa = -1;
 }
 
+void __init disable_cfmws(void)
+{
+	cfmws_numa = -1;
+}
+
+
 int pxm_to_node(int pxm)
 {
 	if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off)
@@ -313,6 +320,12 @@ int __init srat_disabled(void)
 	return acpi_numa < 0;
 }
 
+int __init cfmws_disabled(void)
+{
+	return cfmws_numa < 0;
+}
+
+
 __weak int __init numa_fill_memblks(u64 start, u64 end)
 {
 	return NUMA_NO_MEMBLK;
@@ -648,14 +661,16 @@ int __init acpi_numa_init(void)
 	 */
 
 	/* fake_pxm is the next unused PXM value after SRAT parsing */
-	for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) {
-		if (node_to_pxm_map[i] > fake_pxm)
-			fake_pxm = node_to_pxm_map[i];
+	if (!cfmws_disabled()) {
+		for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) {
+			if (node_to_pxm_map[i] > fake_pxm)
+				fake_pxm = node_to_pxm_map[i];
+		}
+		last_real_pxm = fake_pxm;
+		fake_pxm++;
+		acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
+				      &fake_pxm);
 	}
-	last_real_pxm = fake_pxm;
-	fake_pxm++;
-	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
-			      &fake_pxm);
 
 	if (cnt < 0)
 		return cnt;
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index 99b960bd473c..2435f60e56ce 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -21,6 +21,7 @@ extern int fix_pxm_node_maps(int max_nid);
 
 extern void bad_srat(void);
 extern int srat_disabled(void);
+extern void disable_cfmws(void);
 
 #else				/* CONFIG_ACPI_NUMA */
 static inline int fix_pxm_node_maps(int max_nid)
@@ -30,6 +31,11 @@ static inline int fix_pxm_node_maps(int max_nid)
 static inline void disable_srat(void)
 {
 }
+
+static inline void disable_cfmws(void)
+{
+}
+
 static inline int pxm_to_node(int pxm)
 {
 	return 0;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2026-03-05 23:01 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-04  8:06 [PATCH] ACPI: add a boot parameter to disable parsing CFMWS during NUMA init Haifeng Xu
2026-03-04 14:10 ` kernel test robot
2026-03-04 15:11 ` kernel test robot
2026-03-04 17:16 ` Gregory Price
2026-03-05  4:18   ` Haifeng Xu
2026-03-05  6:30     ` Gregory Price
2026-03-05  7:43       ` Haifeng Xu
2026-03-05 19:57 ` kernel test robot
2026-03-05 20:19 ` kernel test robot
2026-03-05 23:00 ` kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2026-03-04  6:46 Haifeng Xu
2026-03-04  7:55 ` Haifeng Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox