All of lore.kernel.org
 help / color / mirror / Atom feed
From: Srikar Dronamraju <srikar@linux.ibm.com>
To: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>,
	Madhavan Srinivasan <maddy@linux.ibm.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Nicholas Piggin <npiggin@gmail.com>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	Naveen N Rao <naveen@kernel.org>
Cc: skiboot@lists.ozlags.org, arbab@linux.ibm.com,
	mahesh@linux.ibm.com, Srikar Dronamraju <srikar@linux.ibm.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH] powerpc/topology: Support coregroup in PowerNV
Date: Sun, 24 May 2026 06:30:16 +0530	[thread overview]
Message-ID: <20260524010017.140408-1-srikar@linux.ibm.com> (raw)

Coregroup support was only available on PowerPC on PowerVM LPARs.
However if firmware were to expose coregroup-id to the kernel, then
coregroup can even be supported on PowerNV too.

PowerPC Linux kernel will detect support for coregroup by looking at the
primary_domain_index. Till now on PowerNV systems, primary_domain_index
has been the penultimate domain in cpunode ibm,associativity device-tree
property. This would be taken as hint that coregroup support is not
available in the firmware.

If on PowerNV systems, primary_domain_index is not the penultimate
domain in cpunode ibm,associativity device-tree property, then it would
be taken as a hint that coregroup support is available in the firmware.

This logic makes it compatible with PowerVM Systems, where
primary_domain_index is not the penultimate domain in cpunode
ibm,associativity device-tree property.

$ lscpu
Architecture:        ppc64le
Byte Order:          Little Endian
CPU(s):              480
On-line CPU(s) list: 0-479
Thread(s) per core:  8
Core(s) per socket:  15
Socket(s):           4
NUMA node(s):        4
Model:               2.0 (pvr 0080 0200)
Model name:          POWER10, altivec supported
CPU max MHz:         3249.0000
CPU min MHz:         3249.0000
L1d cache:           32K
L1i cache:           48K
L2 cache:            1024K
L3 cache:            4096K
NUMA node0 CPU(s):   0-119
NUMA node1 CPU(s):   120-239
NUMA node2 CPU(s):   240-359
NUMA node3 CPU(s):   360-479

with-out patched firmware and/or Linux-kernel
---------------------------------------------
$ grep -h -r . /sys/devices/system/cpu/*/topology/die_id |sort | uniq -c | sort -n -r
    120 27
    120 18
    120 9
    120 0

$ grep -h -r . /sys/devices/system/cpu/*/topology/die_cpus_list |sort | uniq -c | sort -n -r
    120 360-479
    120 240-359
    120 120-239
    120 0-119

with patched firmware and Linux-kernel
--------------------------------------
grep -h -r . /sys/devices/system/cpu/*/topology/die_id |sort | uniq -c | sort -n -r
     64 6
     64 4
     64 2
     64 0
     56 7
     56 5
     56 3
     56 1
grep -h -r . /sys/devices/system/cpu/*/topology/die_cpus_list |sort | uniq -c | sort -n -r
     64 360-375,392-407,424-439,456-471
     64 240-255,272-287,296-311,328-343
     64 120-135,152-167,184-199,208-223
     64 0-15,32-47,64-79,88-103
     56 376-391,408-423,440-455,472-479
     56 256-271,288-295,312-327,344-359
     56 16-31,48-63,80-87,104-119
     56 136-151,168-183,200-207,224-239

Observation:
Without the patched kernel and/or skiboot, die-id and numa were same.
With patched kernel and/or skiboot, we see 2 die-id per node.
Signed-off-by: Srikar Dronamraju <srikar@linux.ibm.com>
---
 arch/powerpc/include/asm/topology.h | 15 +++--
 arch/powerpc/mm/numa.c              | 87 ++++++++++++++++++++++-------
 2 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 66ed5fe1b718..568e6bc55726 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -71,6 +71,7 @@ extern void map_cpu_to_node(int cpu, int node);
 extern void unmap_cpu_from_node(unsigned long cpu);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+extern int cpu_to_coregroup_id(int cpu);
 #else
 
 static inline int early_cpu_to_node(int cpu) { return 0; }
@@ -107,14 +108,6 @@ static inline void map_cpu_to_node(int cpu, int node) {}
 static inline void unmap_cpu_from_node(unsigned long cpu) {}
 #endif /* CONFIG_HOTPLUG_CPU */
 #endif /* CONFIG_SMP */
-
-#endif /* CONFIG_NUMA */
-
-#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
-void find_and_update_cpu_nid(int cpu);
-extern int cpu_to_coregroup_id(int cpu);
-#else
-static inline void find_and_update_cpu_nid(int cpu) {}
 static inline int cpu_to_coregroup_id(int cpu)
 {
 #ifdef CONFIG_SMP
@@ -124,6 +117,12 @@ static inline int cpu_to_coregroup_id(int cpu)
 #endif
 }
 
+#endif /* CONFIG_NUMA */
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
+void find_and_update_cpu_nid(int cpu);
+#else
+static inline void find_and_update_cpu_nid(int cpu) {}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #include <asm-generic/topology.h>
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f4cf3ae036de..9b45cc9e1f27 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -432,7 +432,7 @@ static void __init initialize_form2_numa_distance_lookup_table(void)
 
 static int __init find_primary_domain_index(void)
 {
-	int index;
+	int index = -1;
 	struct device_node *root;
 
 	/*
@@ -502,12 +502,9 @@ static int __init find_primary_domain_index(void)
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
-	of_node_put(root);
-	return index;
-
 err:
 	of_node_put(root);
-	return -1;
+	return index;
 }
 
 static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
@@ -892,12 +889,32 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
 	return 0;
 }
 
+/*
+ * If hierarchy extends beyond primary_domain_index + 1, then next
+ * level corresponds to coregroup.
+ */
+static int detect_and_enable_coregroup(const __be32 *associativity, int index)
+{
+	if (!associativity)
+		return -1;
+
+	if (!index) {
+		index = of_read_number(associativity, 1);
+
+		if (index > primary_domain_index + 1)
+			coregroup_enabled = 1;
+		else
+			index = -1;
+	}
+	return index;
+}
+
 static int __init parse_numa_properties(void)
 {
 	struct device_node *memory, *pci;
-	int default_nid = 0;
-	unsigned long i;
+	int default_nid = 0, index = 0;
 	const __be32 *associativity;
+	unsigned long i;
 
 	if (numa_enabled == 0) {
 		pr_warn("disabled by user\n");
@@ -930,7 +947,6 @@ static int __init parse_numa_properties(void)
 	 */
 	for_each_present_cpu(i) {
 		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
-		struct device_node *cpu;
 		int nid = NUMA_NO_NODE;
 
 		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
@@ -938,7 +954,10 @@ static int __init parse_numa_properties(void)
 		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
 			nid = associativity_to_nid(vphn_assoc);
 			initialize_form1_numa_distance(vphn_assoc);
+			if (!index)
+				index = detect_and_enable_coregroup(vphn_assoc, index);
 		} else {
+			struct device_node *cpu;
 
 			/*
 			 * Don't fall back to default_nid yet -- we will plug
@@ -951,6 +970,8 @@ static int __init parse_numa_properties(void)
 			associativity = of_get_associativity(cpu);
 			if (associativity) {
 				nid = associativity_to_nid(associativity);
+				if (!index)
+					index = detect_and_enable_coregroup(associativity, index);
 				initialize_form1_numa_distance(associativity);
 			}
 			of_node_put(cpu);
@@ -1431,9 +1452,26 @@ void find_and_update_cpu_nid(int cpu)
 	pr_debug("%s:%d cpu %d nid %d\n", __func__, __LINE__, cpu, new_nid);
 }
 
+static int topology_update_init(void)
+{
+	topology_inited = 1;
+	return 0;
+}
+device_initcall(topology_update_init);
+
+#else
+static long vphn_get_associativity(unsigned long cpu,
+					__be32 *associativity)
+{
+	return -1;
+}
+#endif /* CONFIG_PPC_SPLPAR */
+
 int cpu_to_coregroup_id(int cpu)
 {
-	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+	int coregroup_id = cpu_to_core_id(cpu);
+	struct device_node *cpunode = NULL;
+	const __be32 *associativity;
 	int index;
 
 	if (cpu < 0 || cpu > nr_cpu_ids)
@@ -1442,24 +1480,31 @@ int cpu_to_coregroup_id(int cpu)
 	if (!coregroup_enabled)
 		goto out;
 
-	if (!firmware_has_feature(FW_FEATURE_VPHN))
-		goto out;
+	if (firmware_has_feature(FW_FEATURE_VPHN)) {
+		__be32 tmp[VPHN_ASSOC_BUFSIZE] = {0};
 
-	if (vphn_get_associativity(cpu, associativity))
+		if (vphn_get_associativity(cpu, tmp))
+			goto out;
+
+		associativity = tmp;
+
+	} else {
+		cpunode = of_get_cpu_node(cpu, NULL);
+		if (!cpunode)
+			goto out;
+
+		associativity = of_get_associativity(cpunode);
+	}
+	if (!associativity)
 		goto out;
 
 	index = of_read_number(associativity, 1);
 	if (index > primary_domain_index + 1)
-		return of_read_number(&associativity[index - 1], 1);
+		coregroup_id = of_read_number(&associativity[index - 1], 1);
 
 out:
-	return cpu_to_core_id(cpu);
-}
+	if (cpunode)
+		of_node_put(cpunode);
 
-static int topology_update_init(void)
-{
-	topology_inited = 1;
-	return 0;
+	return coregroup_id;
 }
-device_initcall(topology_update_init);
-#endif /* CONFIG_PPC_SPLPAR */
-- 
2.43.0



             reply	other threads:[~2026-05-24  1:01 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-24  1:00 Srikar Dronamraju [this message]
2026-05-29  7:58 ` [PATCH] powerpc/topology: Support coregroup in PowerNV Christophe Leroy (CS GROUP)
2026-05-29 10:31   ` Srikar Dronamraju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260524010017.140408-1-srikar@linux.ibm.com \
    --to=srikar@linux.ibm.com \
    --cc=arbab@linux.ibm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=maddy@linux.ibm.com \
    --cc=mahesh@linux.ibm.com \
    --cc=mpe@ellerman.id.au \
    --cc=naveen@kernel.org \
    --cc=npiggin@gmail.com \
    --cc=skiboot@lists.ozlags.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.