* [PATCH] powerpc/smp: Dynamically build powerpc topology
@ 2023-08-30 12:26 Srikar Dronamraju
2023-09-04 22:10 ` Peter Zijlstra
2023-10-20 12:10 ` Michael Ellerman
0 siblings, 2 replies; 5+ messages in thread
From: Srikar Dronamraju @ 2023-08-30 12:26 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Mark Rutland, Srikar Dronamraju, Peter Zijlstra,
ndesaulniers, linux-kernel, Nicholas Piggin, linuxppc-dev,
Josh Poimboeuf
Currently there are four powerpc specific sched topologies. These are
all statically defined. However not all these topologies are used by
all powerpc systems.
To avoid unnecessary degenerations by the scheduler , masks and flags
are compared. However if the sched topologies are build dynamically then
the code is simpler and there are greater chances of avoiding
degenerations.
Even x86 builds its sched topologies dynamically and new changes are
very similar to the way x86 is building its topologies.
System Configuration
type=Shared mode=Uncapped smt=8 lcpu=128 mem=1063126592 kB cpus=96 ent=40.00
$ lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 1024
On-line CPU(s) list: 0-1023
Model name: POWER10 (architected), altivec supported
Model: 2.0 (pvr 0080 0200)
Thread(s) per core: 8
Core(s) per socket: 32
Socket(s): 4
Hypervisor vendor: pHyp
Virtualization type: para
L1d cache: 8 MiB (256 instances)
L1i cache: 12 MiB (256 instances)
NUMA node(s): 4
From dmesg of v6.5
[ 0.174444] smp: Bringing up secondary CPUs ...
[ 3.918535] smp: Brought up 4 nodes, 1024 CPUs
[ 38.001402] sysrq: Changing Loglevel
[ 38.001446] sysrq: Loglevel set to 9
From dmesg of v6.5 + patch
[ 0.174462] smp: Bringing up secondary CPUs ...
[ 3.421462] smp: Brought up 4 nodes, 1024 CPUs
[ 35.417917] sysrq: Changing Loglevel
[ 35.417959] sysrq: Loglevel set to 9
5 runs of ppc64_cpu --smt=1 (time measured: lesser is better)
Kernel N Min Max Median Avg Stddev %Change
v6.5 5 518.08 574.27 528.61 535.388 22.341542
+patch 5 481.73 495.47 484.21 486.402 5.7997 -9.14963
5 runs of ppc64_cpu --smt=8 (time measured: lesser is better)
Kernel N Min Max Median Avg Stddev %Change
v6.5 5 1094.12 1117.1 1108.97 1106.3 8.606361
+patch 5 1067.5 1090.03 1073.89 1076.574 9.4189347 -2.68697
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
arch/powerpc/kernel/smp.c | 78 ++++++++++++++-------------------------
1 file changed, 28 insertions(+), 50 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 48b8161179a8..c16443a04c26 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -92,15 +92,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
EXPORT_SYMBOL_GPL(has_big_cores);
-enum {
-#ifdef CONFIG_SCHED_SMT
- smt_idx,
-#endif
- cache_idx,
- mc_idx,
- die_idx,
-};
-
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
#define THREAD_GROUP_SHARE_L2_L3 2
@@ -1048,16 +1039,6 @@ static const struct cpumask *cpu_mc_mask(int cpu)
return cpu_coregroup_mask(cpu);
}
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
- { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
static int __init init_big_cores(void)
{
int cpu;
@@ -1676,9 +1657,11 @@ void start_secondary(void *unused)
BUG();
}
-static void __init fixup_topology(void)
+static struct sched_domain_topology_level powerpc_topology[6];
+
+static void __init build_sched_topology(void)
{
- int i;
+ int i = 0;
if (is_shared_processor()) {
asym_pack_flag = SD_ASYM_PACKING;
@@ -1690,36 +1673,33 @@ static void __init fixup_topology(void)
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- powerpc_topology[smt_idx].mask = smallcore_smt_mask;
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
+ } else {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
}
#endif
+ if (shared_caches) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
+ };
+ }
+ if (has_coregroup_support()) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
+ };
+ }
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE)
+ };
- if (!has_coregroup_support())
- powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
-
- /*
- * Try to consolidate topology levels here instead of
- * allowing scheduler to degenerate.
- * - Dont consolidate if masks are different.
- * - Dont consolidate if sd_flags exists and are different.
- */
- for (i = 1; i <= die_idx; i++) {
- if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
- continue;
-
- if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
- powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
- continue;
-
- if (!powerpc_topology[i - 1].sd_flags)
- powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+ /* There must be one trailing NULL entry left. */
+ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
- powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
- powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
-#ifdef CONFIG_SCHED_DEBUG
- powerpc_topology[i].name = powerpc_topology[i + 1].name;
-#endif
- }
+ set_sched_topology(powerpc_topology);
}
void __init smp_cpus_done(unsigned int max_cpus)
@@ -1734,9 +1714,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
smp_ops->bringup_done();
dump_numa_cpu_topology();
-
- fixup_topology();
- set_sched_topology(powerpc_topology);
+ build_sched_topology();
}
#ifdef CONFIG_HOTPLUG_CPU
--
2.41.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH] powerpc/smp: Dynamically build powerpc topology
2023-08-30 12:26 [PATCH] powerpc/smp: Dynamically build powerpc topology Srikar Dronamraju
@ 2023-09-04 22:10 ` Peter Zijlstra
2023-09-05 5:37 ` Srikar Dronamraju
2023-10-20 12:10 ` Michael Ellerman
1 sibling, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2023-09-04 22:10 UTC (permalink / raw)
To: Srikar Dronamraju
Cc: Nathan Lynch, Mark Rutland, ndesaulniers, linux-kernel,
Nicholas Piggin, linuxppc-dev, Josh Poimboeuf
On Wed, Aug 30, 2023 at 05:56:14PM +0530, Srikar Dronamraju wrote:
> Currently there are four powerpc specific sched topologies. These are
> all statically defined. However not all these topologies are used by
> all powerpc systems.
>
> To avoid unnecessary degenerations by the scheduler , masks and flags
> are compared. However if the sched topologies are build dynamically then
> the code is simpler and there are greater chances of avoiding
> degenerations.
>
> Even x86 builds its sched topologies dynamically and new changes are
> very similar to the way x86 is building its topologies.
You're not stating it explicitly, but you're doing this as a performance
optimization, right? The x86 thing didn't particularly care about
avoiding degenerate topologies -- it's just that the fixed tables method
grew unwieldy due to combinatorics.
And how does this patch relate to the other series touching this?
powerpc/smp: Shared processor sched optimizations
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] powerpc/smp: Dynamically build powerpc topology
2023-09-04 22:10 ` Peter Zijlstra
@ 2023-09-05 5:37 ` Srikar Dronamraju
0 siblings, 0 replies; 5+ messages in thread
From: Srikar Dronamraju @ 2023-09-05 5:37 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Nathan Lynch, Mark Rutland, ndesaulniers, linux-kernel,
Nicholas Piggin, linuxppc-dev, Josh Poimboeuf
* Peter Zijlstra <peterz@infradead.org> [2023-09-05 00:10:04]:
> On Wed, Aug 30, 2023 at 05:56:14PM +0530, Srikar Dronamraju wrote:
> > Currently there are four powerpc specific sched topologies. These are
> > all statically defined. However not all these topologies are used by
> > all powerpc systems.
> >
> > To avoid unnecessary degenerations by the scheduler , masks and flags
> > are compared. However if the sched topologies are build dynamically then
> > the code is simpler and there are greater chances of avoiding
> > degenerations.
> >
> > Even x86 builds its sched topologies dynamically and new changes are
> > very similar to the way x86 is building its topologies.
>
Thanks Peter for taking a look.
> You're not stating it explicitly, but you're doing this as a performance
> optimization, right? The x86 thing didn't particularly care about
> avoiding degenerate topologies -- it's just that the fixed tables method
> grew unwieldy due to combinatorics.
>
Yes, its an optimization. On Powerpc, there is an utility ppc64, which users
would use to set their SMT mode, and whenever they do we end up recreating
the topology. Hence avoiding degenerates esp on large systems, should help.
Also dynamic add of CPUs is more common on Powerpc. Hence there also we
would avoid degenerating unnecessary domains.
> And how does this patch relate to the other series touching this?
>
> powerpc/smp: Shared processor sched optimizations
>
This patch will work independent of that patchset.
However Shared processor sched optimization patchset makes MC domain
avoid degeneration. Hence this patch will benefit from that patchset.
i.e without the Shared processor sched patchset, has_coregroup_support()
will return true on Power10 for even shared processor. And hence the
scheduler will create and destroy MC domains. If the patchset is already
present, on Power10 for shared processors, we will avoid MC domains.
Other that this there wont be any change.
--
Thanks and Regards
Srikar Dronamraju
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] powerpc/smp: Dynamically build powerpc topology
2023-08-30 12:26 [PATCH] powerpc/smp: Dynamically build powerpc topology Srikar Dronamraju
2023-09-04 22:10 ` Peter Zijlstra
@ 2023-10-20 12:10 ` Michael Ellerman
2023-10-20 13:21 ` Srikar Dronamraju
1 sibling, 1 reply; 5+ messages in thread
From: Michael Ellerman @ 2023-10-20 12:10 UTC (permalink / raw)
To: Srikar Dronamraju
Cc: Nathan Lynch, Mark Rutland, Srikar Dronamraju, Peter Zijlstra,
ndesaulniers, linux-kernel, Nicholas Piggin, linuxppc-dev,
Josh Poimboeuf
Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> Currently there are four powerpc specific sched topologies. These are
> all statically defined. However not all these topologies are used by
> all powerpc systems.
>
> To avoid unnecessary degenerations by the scheduler , masks and flags
> are compared. However if the sched topologies are build dynamically then
> the code is simpler and there are greater chances of avoiding
> degenerations.
>
> Even x86 builds its sched topologies dynamically and new changes are
> very similar to the way x86 is building its topologies.
>
> System Configuration
> type=Shared mode=Uncapped smt=8 lcpu=128 mem=1063126592 kB cpus=96 ent=40.00
>
> $ lscpu
> Architecture: ppc64le
> Byte Order: Little Endian
> CPU(s): 1024
> On-line CPU(s) list: 0-1023
> Model name: POWER10 (architected), altivec supported
> Model: 2.0 (pvr 0080 0200)
> Thread(s) per core: 8
> Core(s) per socket: 32
> Socket(s): 4
> Hypervisor vendor: pHyp
> Virtualization type: para
> L1d cache: 8 MiB (256 instances)
> L1i cache: 12 MiB (256 instances)
> NUMA node(s): 4
>
> From dmesg of v6.5
> [ 0.174444] smp: Bringing up secondary CPUs ...
> [ 3.918535] smp: Brought up 4 nodes, 1024 CPUs
> [ 38.001402] sysrq: Changing Loglevel
> [ 38.001446] sysrq: Loglevel set to 9
>
> From dmesg of v6.5 + patch
> [ 0.174462] smp: Bringing up secondary CPUs ...
> [ 3.421462] smp: Brought up 4 nodes, 1024 CPUs
> [ 35.417917] sysrq: Changing Loglevel
> [ 35.417959] sysrq: Loglevel set to 9
>
> 5 runs of ppc64_cpu --smt=1 (time measured: lesser is better)
> Kernel N Min Max Median Avg Stddev %Change
> v6.5 5 518.08 574.27 528.61 535.388 22.341542
> +patch 5 481.73 495.47 484.21 486.402 5.7997 -9.14963
>
> 5 runs of ppc64_cpu --smt=8 (time measured: lesser is better)
> Kernel N Min Max Median Avg Stddev %Change
> v6.5 5 1094.12 1117.1 1108.97 1106.3 8.606361
> +patch 5 1067.5 1090.03 1073.89 1076.574 9.4189347 -2.68697
>
> Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> ---
> arch/powerpc/kernel/smp.c | 78 ++++++++++++++-------------------------
> 1 file changed, 28 insertions(+), 50 deletions(-)
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 48b8161179a8..c16443a04c26 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -92,15 +92,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
> EXPORT_PER_CPU_SYMBOL(cpu_core_map);
> EXPORT_SYMBOL_GPL(has_big_cores);
>
> -enum {
> -#ifdef CONFIG_SCHED_SMT
> - smt_idx,
> -#endif
> - cache_idx,
> - mc_idx,
> - die_idx,
> -};
> -
> #define MAX_THREAD_LIST_SIZE 8
> #define THREAD_GROUP_SHARE_L1 1
> #define THREAD_GROUP_SHARE_L2_L3 2
> @@ -1048,16 +1039,6 @@ static const struct cpumask *cpu_mc_mask(int cpu)
> return cpu_coregroup_mask(cpu);
> }
>
> -static struct sched_domain_topology_level powerpc_topology[] = {
> -#ifdef CONFIG_SCHED_SMT
> - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> -#endif
> - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> - { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
> - { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
> - { NULL, },
> -};
This doesn't apply on my next or upstream.
It looks like it depends on your other 6-patch series. Please append
this patch to that series.
cheers
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] powerpc/smp: Dynamically build powerpc topology
2023-10-20 12:10 ` Michael Ellerman
@ 2023-10-20 13:21 ` Srikar Dronamraju
0 siblings, 0 replies; 5+ messages in thread
From: Srikar Dronamraju @ 2023-10-20 13:21 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Mark Rutland, Peter Zijlstra, ndesaulniers,
linux-kernel, Nicholas Piggin, linuxppc-dev, Josh Poimboeuf
* Michael Ellerman <mpe@ellerman.id.au> [2023-10-20 23:10:55]:
> Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> > Currently there are four powerpc specific sched topologies. These are
> > all statically defined. However not all these topologies are used by
> > all powerpc systems.
> >
> > To avoid unnecessary degenerations by the scheduler , masks and flags
> > are compared. However if the sched topologies are build dynamically then
> > the code is simpler and there are greater chances of avoiding
> > degenerations.
> >
> > Even x86 builds its sched topologies dynamically and new changes are
> > very similar to the way x86 is building its topologies.
> >
> > System Configuration
> > type=Shared mode=Uncapped smt=8 lcpu=128 mem=1063126592 kB cpus=96 ent=40.00
> >
> > $ lscpu
> > Architecture: ppc64le
> > Byte Order: Little Endian
> > CPU(s): 1024
> > On-line CPU(s) list: 0-1023
> > Model name: POWER10 (architected), altivec supported
> > Model: 2.0 (pvr 0080 0200)
> > Thread(s) per core: 8
> > Core(s) per socket: 32
> > Socket(s): 4
> > Hypervisor vendor: pHyp
> > Virtualization type: para
> > L1d cache: 8 MiB (256 instances)
> > L1i cache: 12 MiB (256 instances)
> > NUMA node(s): 4
> >
> > From dmesg of v6.5
> > [ 0.174444] smp: Bringing up secondary CPUs ...
> > [ 3.918535] smp: Brought up 4 nodes, 1024 CPUs
> > [ 38.001402] sysrq: Changing Loglevel
> > [ 38.001446] sysrq: Loglevel set to 9
> >
> > From dmesg of v6.5 + patch
> > [ 0.174462] smp: Bringing up secondary CPUs ...
> > [ 3.421462] smp: Brought up 4 nodes, 1024 CPUs
> > [ 35.417917] sysrq: Changing Loglevel
> > [ 35.417959] sysrq: Loglevel set to 9
> >
> > 5 runs of ppc64_cpu --smt=1 (time measured: lesser is better)
> > Kernel N Min Max Median Avg Stddev %Change
> > v6.5 5 518.08 574.27 528.61 535.388 22.341542
> > +patch 5 481.73 495.47 484.21 486.402 5.7997 -9.14963
> >
> > 5 runs of ppc64_cpu --smt=8 (time measured: lesser is better)
> > Kernel N Min Max Median Avg Stddev %Change
> > v6.5 5 1094.12 1117.1 1108.97 1106.3 8.606361
> > +patch 5 1067.5 1090.03 1073.89 1076.574 9.4189347 -2.68697
> >
> > Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> > ---
> > arch/powerpc/kernel/smp.c | 78 ++++++++++++++-------------------------
> > 1 file changed, 28 insertions(+), 50 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> > index 48b8161179a8..c16443a04c26 100644
> > --- a/arch/powerpc/kernel/smp.c
> > +++ b/arch/powerpc/kernel/smp.c
> > @@ -92,15 +92,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
> > EXPORT_PER_CPU_SYMBOL(cpu_core_map);
> > EXPORT_SYMBOL_GPL(has_big_cores);
> >
> > -enum {
> > -#ifdef CONFIG_SCHED_SMT
> > - smt_idx,
> > -#endif
> > - cache_idx,
> > - mc_idx,
> > - die_idx,
> > -};
> > -
> > #define MAX_THREAD_LIST_SIZE 8
> > #define THREAD_GROUP_SHARE_L1 1
> > #define THREAD_GROUP_SHARE_L2_L3 2
> > @@ -1048,16 +1039,6 @@ static const struct cpumask *cpu_mc_mask(int cpu)
> > return cpu_coregroup_mask(cpu);
> > }
> >
> > -static struct sched_domain_topology_level powerpc_topology[] = {
> > -#ifdef CONFIG_SCHED_SMT
> > - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> > -#endif
> > - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> > - { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
> > - { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
> > - { NULL, },
> > -};
>
> This doesn't apply on my next or upstream.
>
> It looks like it depends on your other 6-patch series. Please append
> this patch to that series.
>
> cheers
Ok, will do the needful in the next iteration.
--
Thanks and Regards
Srikar Dronamraju
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-10-20 13:22 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-30 12:26 [PATCH] powerpc/smp: Dynamically build powerpc topology Srikar Dronamraju
2023-09-04 22:10 ` Peter Zijlstra
2023-09-05 5:37 ` Srikar Dronamraju
2023-10-20 12:10 ` Michael Ellerman
2023-10-20 13:21 ` Srikar Dronamraju
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).