* [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
@ 2025-04-13 19:12 Gregory CLEMENT
2025-04-27 7:53 ` Thomas Bogendoerfer
2025-04-27 10:13 ` Huacai Chen
0 siblings, 2 replies; 9+ messages in thread
From: Gregory CLEMENT @ 2025-04-13 19:12 UTC (permalink / raw)
To: Thomas Bogendoerfer
Cc: Jiaxun Yang, Vladimir Kondratiev, Théo Lebrun, Tawfik Bayouk,
Thomas Petazzoni, linux-mips, linux-kernel, Gregory CLEMENT
Added support for starting CPUs in parallel on EyeQ to speed up boot time.
On EyeQ5, booting 8 CPUs is now ~90ms faster.
On EyeQ6, booting 32 CPUs is now ~650ms faster.
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
Hello,
This patch allows CPUs to start in parallel. It has been tested on
EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
systems use CPS to support SMP.
As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
faster.
Currently, this support is only for EyeQ SoC. However, it should also
work for other CPUs using CPS. I am less sure about MT ASE support,
but this patch can be a good starting point. If anyone wants to add
support for other systems, I can share some ideas, especially for the
MIPS_GENERIC setup that needs to handle both types of SMP setups.
Gregory
---
arch/mips/Kconfig | 2 ++
arch/mips/include/asm/topology.h | 3 +++
arch/mips/kernel/smp-cps.c | 2 ++
arch/mips/kernel/smp.c | 18 ++++++++++++++++++
4 files changed, 25 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -617,6 +617,7 @@ config EYEQ
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
+ select HOTPLUG_PARALLEL if SMP
help
Select this to build a kernel supporting EyeQ SoC from Mobileye.
@@ -2287,6 +2288,7 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPS_PM if HOTPLUG_CPU
select SMP
+ select HOTPLUG_SMT if HOTPLUG_PARALLEL
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
--- a/arch/mips/include/asm/topology.h
+++ b/arch/mips/include/asm/topology.h
@@ -16,6 +16,9 @@
#define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
#endif
#endif /* __ASM_TOPOLOGY_H */
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
/* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
if (!cl && !c)
smp_num_siblings = core_vpes;
+ cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
cpu_set_cluster(&cpu_data[nvpes + v], cl);
@@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
cl = cpu_cluster(¤t_cpu_data);
c = cpu_core(¤t_cpu_data);
cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
+ cpu_smt_set_num_threads(core_vpes, core_vpes);
core_bootcfg = &cluster_bootcfg->core_config[c];
bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1);
atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data));
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
+#ifndef CONFIG_HOTPLUG_PARALLEL
static DECLARE_COMPLETION(cpu_starting);
static DECLARE_COMPLETION(cpu_running);
+#endif
/*
* A logical cpu mask containing only one VPE per core to
@@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
unsigned int smp_max_threads __initdata = UINT_MAX;
static int __init early_nosmt(char *s)
@@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
set_cpu_core_map(cpu);
cpumask_set_cpu(cpu, &cpu_coherent_mask);
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ cpuhp_ap_sync_alive();
+#endif
notify_cpu_starting(cpu);
+#ifndef CONFIG_HOTPLUG_PARALLEL
/* Notify boot CPU that we're starting & ready to sync counters */
complete(&cpu_starting);
+#endif
synchronise_count_slave(cpu);
@@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
calculate_cpu_foreign_map();
+#ifndef CONFIG_HOTPLUG_PARALLEL
/*
* Notify boot CPU that we're up & online and it can safely return
* from __cpu_up
*/
complete(&cpu_running);
+#endif
/*
* irq will be enabled in ->smp_finish(), enabling it too early
@@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
set_cpu_online(0, true);
}
+#ifdef CONFIG_HOTPLUG_PARALLEL
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ return mp_ops->boot_secondary(cpu, tidle);
+}
+#else
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int err;
@@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
wait_for_completion(&cpu_running);
return 0;
}
+#endif
#ifdef CONFIG_PROFILING
/* Not really SMP stuff ... */
---
base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
change-id: 20250411-parallel-cpu-bringup-78999a9235ea
Best regards,
--
Grégory CLEMENT, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-13 19:12 [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ Gregory CLEMENT
@ 2025-04-27 7:53 ` Thomas Bogendoerfer
2025-04-27 10:13 ` Huacai Chen
1 sibling, 0 replies; 9+ messages in thread
From: Thomas Bogendoerfer @ 2025-04-27 7:53 UTC (permalink / raw)
To: Gregory CLEMENT
Cc: Jiaxun Yang, Vladimir Kondratiev, Théo Lebrun, Tawfik Bayouk,
Thomas Petazzoni, linux-mips, linux-kernel
On Sun, Apr 13, 2025 at 09:12:32PM +0200, Gregory CLEMENT wrote:
> Added support for starting CPUs in parallel on EyeQ to speed up boot time.
>
> On EyeQ5, booting 8 CPUs is now ~90ms faster.
> On EyeQ6, booting 32 CPUs is now ~650ms faster.
>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
> Hello,
>
> This patch allows CPUs to start in parallel. It has been tested on
> EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> systems use CPS to support SMP.
>
> As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> faster.
>
> Currently, this support is only for EyeQ SoC. However, it should also
> work for other CPUs using CPS. I am less sure about MT ASE support,
> but this patch can be a good starting point. If anyone wants to add
> support for other systems, I can share some ideas, especially for the
> MIPS_GENERIC setup that needs to handle both types of SMP setups.
>
> Gregory
> ---
> arch/mips/Kconfig | 2 ++
> arch/mips/include/asm/topology.h | 3 +++
> arch/mips/kernel/smp-cps.c | 2 ++
> arch/mips/kernel/smp.c | 18 ++++++++++++++++++
> 4 files changed, 25 insertions(+)
applied to mips-next.
Thomas.
--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-13 19:12 [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ Gregory CLEMENT
2025-04-27 7:53 ` Thomas Bogendoerfer
@ 2025-04-27 10:13 ` Huacai Chen
2025-04-30 3:20 ` Huacai Chen
1 sibling, 1 reply; 9+ messages in thread
From: Huacai Chen @ 2025-04-27 10:13 UTC (permalink / raw)
To: Gregory CLEMENT
Cc: Thomas Bogendoerfer, Jiaxun Yang, Vladimir Kondratiev,
Théo Lebrun, Tawfik Bayouk, Thomas Petazzoni, linux-mips,
linux-kernel
Hi, Gregory and Thomas,
I'm sorry I'm late, but I have some questions about this patch.
On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
<gregory.clement@bootlin.com> wrote:
>
> Added support for starting CPUs in parallel on EyeQ to speed up boot time.
>
> On EyeQ5, booting 8 CPUs is now ~90ms faster.
> On EyeQ6, booting 32 CPUs is now ~650ms faster.
>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
> Hello,
>
> This patch allows CPUs to start in parallel. It has been tested on
> EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> systems use CPS to support SMP.
>
> As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> faster.
>
> Currently, this support is only for EyeQ SoC. However, it should also
> work for other CPUs using CPS. I am less sure about MT ASE support,
> but this patch can be a good starting point. If anyone wants to add
> support for other systems, I can share some ideas, especially for the
> MIPS_GENERIC setup that needs to handle both types of SMP setups.
>
> Gregory
> ---
> arch/mips/Kconfig | 2 ++
> arch/mips/include/asm/topology.h | 3 +++
> arch/mips/kernel/smp-cps.c | 2 ++
> arch/mips/kernel/smp.c | 18 ++++++++++++++++++
> 4 files changed, 25 insertions(+)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -617,6 +617,7 @@ config EYEQ
> select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
> select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
> select USE_OF
> + select HOTPLUG_PARALLEL if SMP
> help
> Select this to build a kernel supporting EyeQ SoC from Mobileye.
>
> @@ -2287,6 +2288,7 @@ config MIPS_CPS
> select MIPS_CM
> select MIPS_CPS_PM if HOTPLUG_CPU
> select SMP
> + select HOTPLUG_SMT if HOTPLUG_PARALLEL
> select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
> select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
> select SYS_SUPPORTS_HOTPLUG_CPU
> diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
> index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
> --- a/arch/mips/include/asm/topology.h
> +++ b/arch/mips/include/asm/topology.h
> @@ -16,6 +16,9 @@
> #define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
> #define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
> #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
> +
> +extern struct cpumask __cpu_primary_thread_mask;
> +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
> #endif
>
> #endif /* __ASM_TOPOLOGY_H */
> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
> --- a/arch/mips/kernel/smp-cps.c
> +++ b/arch/mips/kernel/smp-cps.c
> @@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
> /* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
> if (!cl && !c)
> smp_num_siblings = core_vpes;
> + cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
>
> for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
> cpu_set_cluster(&cpu_data[nvpes + v], cl);
> @@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
> cl = cpu_cluster(¤t_cpu_data);
> c = cpu_core(¤t_cpu_data);
> cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
> + cpu_smt_set_num_threads(core_vpes, core_vpes);
> core_bootcfg = &cluster_bootcfg->core_config[c];
> bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1);
> atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data));
> diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
> cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
> EXPORT_SYMBOL(cpu_core_map);
>
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> static DECLARE_COMPLETION(cpu_starting);
> static DECLARE_COMPLETION(cpu_running);
> +#endif
>
> /*
> * A logical cpu mask containing only one VPE per core to
> @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
>
> cpumask_t cpu_coherent_mask;
>
> +struct cpumask __cpu_primary_thread_mask __read_mostly;
> +
> unsigned int smp_max_threads __initdata = UINT_MAX;
>
> static int __init early_nosmt(char *s)
> @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> set_cpu_core_map(cpu);
>
> cpumask_set_cpu(cpu, &cpu_coherent_mask);
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> + cpuhp_ap_sync_alive();
This is a "synchronization point" due to the description from commit
9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
before this point and serialized after this point.
But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
be executed in parallel. Maybe you haven't observed problems, but in
theory it's not correct.
Huacai
> +#endif
> notify_cpu_starting(cpu);
>
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> /* Notify boot CPU that we're starting & ready to sync counters */
> complete(&cpu_starting);
> +#endif
>
> synchronise_count_slave(cpu);
>
> @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
>
> calculate_cpu_foreign_map();
>
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> /*
> * Notify boot CPU that we're up & online and it can safely return
> * from __cpu_up
> */
> complete(&cpu_running);
> +#endif
>
> /*
> * irq will be enabled in ->smp_finish(), enabling it too early
> @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
> set_cpu_online(0, true);
> }
>
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
> +{
> + return mp_ops->boot_secondary(cpu, tidle);
> +}
> +#else
> int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> {
> int err;
> @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> wait_for_completion(&cpu_running);
> return 0;
> }
> +#endif
>
> #ifdef CONFIG_PROFILING
> /* Not really SMP stuff ... */
>
> ---
> base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
> change-id: 20250411-parallel-cpu-bringup-78999a9235ea
>
> Best regards,
> --
> Grégory CLEMENT, Bootlin
> Embedded Linux and Kernel engineering
> https://bootlin.com
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-27 10:13 ` Huacai Chen
@ 2025-04-30 3:20 ` Huacai Chen
2025-04-30 7:09 ` Gregory CLEMENT
0 siblings, 1 reply; 9+ messages in thread
From: Huacai Chen @ 2025-04-30 3:20 UTC (permalink / raw)
To: Gregory CLEMENT
Cc: Thomas Bogendoerfer, Jiaxun Yang, Vladimir Kondratiev,
Théo Lebrun, Tawfik Bayouk, Thomas Petazzoni, linux-mips,
linux-kernel
Hi, Gregory,
On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@kernel.org> wrote:
>
> Hi, Gregory and Thomas,
>
> I'm sorry I'm late, but I have some questions about this patch.
>
> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
> <gregory.clement@bootlin.com> wrote:
> >
> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
> >
> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
> >
> > Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> > ---
> > Hello,
> >
> > This patch allows CPUs to start in parallel. It has been tested on
> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> > systems use CPS to support SMP.
> >
> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> > faster.
> >
> > Currently, this support is only for EyeQ SoC. However, it should also
> > work for other CPUs using CPS. I am less sure about MT ASE support,
> > but this patch can be a good starting point. If anyone wants to add
> > support for other systems, I can share some ideas, especially for the
> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
> >
> > Gregory
> > ---
> > arch/mips/Kconfig | 2 ++
> > arch/mips/include/asm/topology.h | 3 +++
> > arch/mips/kernel/smp-cps.c | 2 ++
> > arch/mips/kernel/smp.c | 18 ++++++++++++++++++
> > 4 files changed, 25 insertions(+)
> >
> > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> > index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
> > --- a/arch/mips/Kconfig
> > +++ b/arch/mips/Kconfig
> > @@ -617,6 +617,7 @@ config EYEQ
> > select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
> > select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
> > select USE_OF
> > + select HOTPLUG_PARALLEL if SMP
> > help
> > Select this to build a kernel supporting EyeQ SoC from Mobileye.
> >
> > @@ -2287,6 +2288,7 @@ config MIPS_CPS
> > select MIPS_CM
> > select MIPS_CPS_PM if HOTPLUG_CPU
> > select SMP
> > + select HOTPLUG_SMT if HOTPLUG_PARALLEL
> > select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
> > select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
> > select SYS_SUPPORTS_HOTPLUG_CPU
> > diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
> > index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
> > --- a/arch/mips/include/asm/topology.h
> > +++ b/arch/mips/include/asm/topology.h
> > @@ -16,6 +16,9 @@
> > #define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
> > #define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
> > #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
> > +
> > +extern struct cpumask __cpu_primary_thread_mask;
> > +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
> > #endif
> >
> > #endif /* __ASM_TOPOLOGY_H */
> > diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> > index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
> > --- a/arch/mips/kernel/smp-cps.c
> > +++ b/arch/mips/kernel/smp-cps.c
> > @@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
> > /* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
> > if (!cl && !c)
> > smp_num_siblings = core_vpes;
> > + cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
> >
> > for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
> > cpu_set_cluster(&cpu_data[nvpes + v], cl);
> > @@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
> > cl = cpu_cluster(¤t_cpu_data);
> > c = cpu_core(¤t_cpu_data);
> > cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
> > + cpu_smt_set_num_threads(core_vpes, core_vpes);
> > core_bootcfg = &cluster_bootcfg->core_config[c];
> > bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1);
> > atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data));
> > diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> > index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
> > --- a/arch/mips/kernel/smp.c
> > +++ b/arch/mips/kernel/smp.c
> > @@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
> > cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
> > EXPORT_SYMBOL(cpu_core_map);
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> > static DECLARE_COMPLETION(cpu_starting);
> > static DECLARE_COMPLETION(cpu_running);
> > +#endif
> >
> > /*
> > * A logical cpu mask containing only one VPE per core to
> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
> >
> > cpumask_t cpu_coherent_mask;
> >
> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
> > +
> > unsigned int smp_max_threads __initdata = UINT_MAX;
> >
> > static int __init early_nosmt(char *s)
> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> > set_cpu_core_map(cpu);
> >
> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> > + cpuhp_ap_sync_alive();
> This is a "synchronization point" due to the description from commit
> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
> before this point and serialized after this point.
>
> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
> be executed in parallel. Maybe you haven't observed problems, but in
> theory it's not correct.
I don't know whether you have done reboot tests (for ~1000 times),
Jiaxun Yang submitted similar patches for LoongArch [1], but during
reboot tests we encountered problems that I have described in my
previous reply.
[1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/
Huacai
>
> Huacai
>
> > +#endif
> > notify_cpu_starting(cpu);
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> > /* Notify boot CPU that we're starting & ready to sync counters */
> > complete(&cpu_starting);
> > +#endif
> >
> > synchronise_count_slave(cpu);
> >
> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
> >
> > calculate_cpu_foreign_map();
> >
> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> > /*
> > * Notify boot CPU that we're up & online and it can safely return
> > * from __cpu_up
> > */
> > complete(&cpu_running);
> > +#endif
> >
> > /*
> > * irq will be enabled in ->smp_finish(), enabling it too early
> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
> > set_cpu_online(0, true);
> > }
> >
> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
> > +{
> > + return mp_ops->boot_secondary(cpu, tidle);
> > +}
> > +#else
> > int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> > {
> > int err;
> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> > wait_for_completion(&cpu_running);
> > return 0;
> > }
> > +#endif
> >
> > #ifdef CONFIG_PROFILING
> > /* Not really SMP stuff ... */
> >
> > ---
> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
> >
> > Best regards,
> > --
> > Grégory CLEMENT, Bootlin
> > Embedded Linux and Kernel engineering
> > https://bootlin.com
> >
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-30 3:20 ` Huacai Chen
@ 2025-04-30 7:09 ` Gregory CLEMENT
2025-04-30 7:21 ` Huacai Chen
0 siblings, 1 reply; 9+ messages in thread
From: Gregory CLEMENT @ 2025-04-30 7:09 UTC (permalink / raw)
To: Huacai Chen
Cc: Thomas Bogendoerfer, Jiaxun Yang, Vladimir Kondratiev,
Théo Lebrun, Tawfik Bayouk, Thomas Petazzoni, linux-mips,
linux-kernel
Hello Huacai,
> Hi, Gregory,
>
> On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@kernel.org> wrote:
>>
>> Hi, Gregory and Thomas,
>>
>> I'm sorry I'm late, but I have some questions about this patch.
>>
>> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
>> <gregory.clement@bootlin.com> wrote:
>> >
>> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
>> >
>> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
>> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
>> >
>> > Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> > ---
>> > Hello,
>> >
>> > This patch allows CPUs to start in parallel. It has been tested on
>> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
>> > systems use CPS to support SMP.
>> >
>> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
>> > faster.
>> >
>> > Currently, this support is only for EyeQ SoC. However, it should also
>> > work for other CPUs using CPS. I am less sure about MT ASE support,
>> > but this patch can be a good starting point. If anyone wants to add
>> > support for other systems, I can share some ideas, especially for the
>> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
>> >
[...]
>> > * A logical cpu mask containing only one VPE per core to
>> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
>> >
>> > cpumask_t cpu_coherent_mask;
>> >
>> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
>> > +
>> > unsigned int smp_max_threads __initdata = UINT_MAX;
>> >
>> > static int __init early_nosmt(char *s)
>> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
>> > set_cpu_core_map(cpu);
>> >
>> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
>> > +#ifdef CONFIG_HOTPLUG_PARALLEL
>> > + cpuhp_ap_sync_alive();
>> This is a "synchronization point" due to the description from commit
>> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
>> before this point and serialized after this point.
>>
>> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
>> be executed in parallel. Maybe you haven't observed problems, but in
>> theory it's not correct.
I am working on it. To address your remark, I have a few options that I
evaluate.
> I don't know whether you have done reboot tests (for ~1000 times),
> Jiaxun Yang submitted similar patches for LoongArch [1], but during
> reboot tests we encountered problems that I have described in my
> previous reply.
>
> [1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/
I saw that series and I wondered why the last patch was not merged.
I performed around 100 tests so far without encountering any issues; I
plan to automate them further to gather more data.
Gregpory
>
> Huacai
>
>>
>> Huacai
>>
>> > +#endif
>> > notify_cpu_starting(cpu);
>> >
>> > +#ifndef CONFIG_HOTPLUG_PARALLEL
>> > /* Notify boot CPU that we're starting & ready to sync counters */
>> > complete(&cpu_starting);
>> > +#endif
>> >
>> > synchronise_count_slave(cpu);
>> >
>> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
>> >
>> > calculate_cpu_foreign_map();
>> >
>> > +#ifndef CONFIG_HOTPLUG_PARALLEL
>> > /*
>> > * Notify boot CPU that we're up & online and it can safely return
>> > * from __cpu_up
>> > */
>> > complete(&cpu_running);
>> > +#endif
>> >
>> > /*
>> > * irq will be enabled in ->smp_finish(), enabling it too early
>> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
>> > set_cpu_online(0, true);
>> > }
>> >
>> > +#ifdef CONFIG_HOTPLUG_PARALLEL
>> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
>> > +{
>> > + return mp_ops->boot_secondary(cpu, tidle);
>> > +}
>> > +#else
>> > int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>> > {
>> > int err;
>> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>> > wait_for_completion(&cpu_running);
>> > return 0;
>> > }
>> > +#endif
>> >
>> > #ifdef CONFIG_PROFILING
>> > /* Not really SMP stuff ... */
>> >
>> > ---
>> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
>> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
>> >
>> > Best regards,
>> > --
>> > Grégory CLEMENT, Bootlin
>> > Embedded Linux and Kernel engineering
>> > https://bootlin.com
>> >
>> >
--
Grégory CLEMENT, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-30 7:09 ` Gregory CLEMENT
@ 2025-04-30 7:21 ` Huacai Chen
2025-04-30 7:31 ` Gregory CLEMENT
0 siblings, 1 reply; 9+ messages in thread
From: Huacai Chen @ 2025-04-30 7:21 UTC (permalink / raw)
To: Gregory CLEMENT
Cc: Thomas Bogendoerfer, Jiaxun Yang, Vladimir Kondratiev,
Théo Lebrun, Tawfik Bayouk, Thomas Petazzoni, linux-mips,
linux-kernel
On Wed, Apr 30, 2025 at 3:09 PM Gregory CLEMENT
<gregory.clement@bootlin.com> wrote:
>
> Hello Huacai,
>
> > Hi, Gregory,
> >
> > On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@kernel.org> wrote:
> >>
> >> Hi, Gregory and Thomas,
> >>
> >> I'm sorry I'm late, but I have some questions about this patch.
> >>
> >> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
> >> <gregory.clement@bootlin.com> wrote:
> >> >
> >> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
> >> >
> >> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
> >> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
> >> >
> >> > Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> >> > ---
> >> > Hello,
> >> >
> >> > This patch allows CPUs to start in parallel. It has been tested on
> >> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> >> > systems use CPS to support SMP.
> >> >
> >> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> >> > faster.
> >> >
> >> > Currently, this support is only for EyeQ SoC. However, it should also
> >> > work for other CPUs using CPS. I am less sure about MT ASE support,
> >> > but this patch can be a good starting point. If anyone wants to add
> >> > support for other systems, I can share some ideas, especially for the
> >> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
> >> >
> [...]
> >> > * A logical cpu mask containing only one VPE per core to
> >> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
> >> >
> >> > cpumask_t cpu_coherent_mask;
> >> >
> >> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
> >> > +
> >> > unsigned int smp_max_threads __initdata = UINT_MAX;
> >> >
> >> > static int __init early_nosmt(char *s)
> >> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> >> > set_cpu_core_map(cpu);
> >> >
> >> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> >> > + cpuhp_ap_sync_alive();
> >> This is a "synchronization point" due to the description from commit
> >> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
> >> before this point and serialized after this point.
> >>
> >> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
> >> be executed in parallel. Maybe you haven't observed problems, but in
> >> theory it's not correct.
>
> I am working on it. To address your remark, I have a few options that I
> evaluate.
I suggest to revert this patch temporary in mips-next.
Huacai
>
> > I don't know whether you have done reboot tests (for ~1000 times),
> > Jiaxun Yang submitted similar patches for LoongArch [1], but during
> > reboot tests we encountered problems that I have described in my
> > previous reply.
> >
> > [1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/
>
> I saw that series and I wondered why the last patch was not merged.
>
> I performed around 100 tests so far without encountering any issues; I
> plan to automate them further to gather more data.
>
> Gregpory
>
> >
> > Huacai
> >
> >>
> >> Huacai
> >>
> >> > +#endif
> >> > notify_cpu_starting(cpu);
> >> >
> >> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> >> > /* Notify boot CPU that we're starting & ready to sync counters */
> >> > complete(&cpu_starting);
> >> > +#endif
> >> >
> >> > synchronise_count_slave(cpu);
> >> >
> >> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
> >> >
> >> > calculate_cpu_foreign_map();
> >> >
> >> > +#ifndef CONFIG_HOTPLUG_PARALLEL
> >> > /*
> >> > * Notify boot CPU that we're up & online and it can safely return
> >> > * from __cpu_up
> >> > */
> >> > complete(&cpu_running);
> >> > +#endif
> >> >
> >> > /*
> >> > * irq will be enabled in ->smp_finish(), enabling it too early
> >> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
> >> > set_cpu_online(0, true);
> >> > }
> >> >
> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> >> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
> >> > +{
> >> > + return mp_ops->boot_secondary(cpu, tidle);
> >> > +}
> >> > +#else
> >> > int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> >> > {
> >> > int err;
> >> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> >> > wait_for_completion(&cpu_running);
> >> > return 0;
> >> > }
> >> > +#endif
> >> >
> >> > #ifdef CONFIG_PROFILING
> >> > /* Not really SMP stuff ... */
> >> >
> >> > ---
> >> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
> >> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
> >> >
> >> > Best regards,
> >> > --
> >> > Grégory CLEMENT, Bootlin
> >> > Embedded Linux and Kernel engineering
> >> > https://bootlin.com
> >> >
> >> >
>
> --
> Grégory CLEMENT, Bootlin
> Embedded Linux and Kernel engineering
> https://bootlin.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-30 7:21 ` Huacai Chen
@ 2025-04-30 7:31 ` Gregory CLEMENT
2025-05-02 15:32 ` Gregory CLEMENT
0 siblings, 1 reply; 9+ messages in thread
From: Gregory CLEMENT @ 2025-04-30 7:31 UTC (permalink / raw)
To: Huacai Chen
Cc: Thomas Bogendoerfer, Jiaxun Yang, Vladimir Kondratiev,
Théo Lebrun, Tawfik Bayouk, Thomas Petazzoni, linux-mips,
linux-kernel
Huacai Chen <chenhuacai@kernel.org> writes:
> On Wed, Apr 30, 2025 at 3:09 PM Gregory CLEMENT
> <gregory.clement@bootlin.com> wrote:
>>
>> Hello Huacai,
>>
>> > Hi, Gregory,
>> >
>> > On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@kernel.org> wrote:
>> >>
>> >> Hi, Gregory and Thomas,
>> >>
>> >> I'm sorry I'm late, but I have some questions about this patch.
>> >>
>> >> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
>> >> <gregory.clement@bootlin.com> wrote:
>> >> >
>> >> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
>> >> >
>> >> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
>> >> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
>> >> >
>> >> > Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> >> > ---
>> >> > Hello,
>> >> >
>> >> > This patch allows CPUs to start in parallel. It has been tested on
>> >> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
>> >> > systems use CPS to support SMP.
>> >> >
>> >> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
>> >> > faster.
>> >> >
>> >> > Currently, this support is only for EyeQ SoC. However, it should also
>> >> > work for other CPUs using CPS. I am less sure about MT ASE support,
>> >> > but this patch can be a good starting point. If anyone wants to add
>> >> > support for other systems, I can share some ideas, especially for the
>> >> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
>> >> >
>> [...]
>> >> > * A logical cpu mask containing only one VPE per core to
>> >> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
>> >> >
>> >> > cpumask_t cpu_coherent_mask;
>> >> >
>> >> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
>> >> > +
>> >> > unsigned int smp_max_threads __initdata = UINT_MAX;
>> >> >
>> >> > static int __init early_nosmt(char *s)
>> >> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
>> >> > set_cpu_core_map(cpu);
>> >> >
>> >> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
>> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
>> >> > + cpuhp_ap_sync_alive();
>> >> This is a "synchronization point" due to the description from commit
>> >> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
>> >> before this point and serialized after this point.
>> >>
>> >> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
>> >> be executed in parallel. Maybe you haven't observed problems, but in
>> >> theory it's not correct.
>>
>> I am working on it. To address your remark, I have a few options that I
>> evaluate.
> I suggest to revert this patch temporary in mips-next.
As I previously mentioned, I haven't observed any issues until now. What
I'm evaluating is whether there is a real problem with this
implementation. Let's examine whether we need a new patch or if this one
is sufficient.
I will have the resutls at the end of the week.
Gregory
>
> Huacai
>
>>
>> > I don't know whether you have done reboot tests (for ~1000 times),
>> > Jiaxun Yang submitted similar patches for LoongArch [1], but during
>> > reboot tests we encountered problems that I have described in my
>> > previous reply.
>> >
>> > [1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/
>>
>> I saw that series and I wondered why the last patch was not merged.
>>
>> I performed around 100 tests so far without encountering any issues; I
>> plan to automate them further to gather more data.
>>
>> Gregpory
>>
>> >
>> > Huacai
>> >
>> >>
>> >> Huacai
>> >>
>> >> > +#endif
>> >> > notify_cpu_starting(cpu);
>> >> >
>> >> > +#ifndef CONFIG_HOTPLUG_PARALLEL
>> >> > /* Notify boot CPU that we're starting & ready to sync counters */
>> >> > complete(&cpu_starting);
>> >> > +#endif
>> >> >
>> >> > synchronise_count_slave(cpu);
>> >> >
>> >> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
>> >> >
>> >> > calculate_cpu_foreign_map();
>> >> >
>> >> > +#ifndef CONFIG_HOTPLUG_PARALLEL
>> >> > /*
>> >> > * Notify boot CPU that we're up & online and it can safely return
>> >> > * from __cpu_up
>> >> > */
>> >> > complete(&cpu_running);
>> >> > +#endif
>> >> >
>> >> > /*
>> >> > * irq will be enabled in ->smp_finish(), enabling it too early
>> >> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
>> >> > set_cpu_online(0, true);
>> >> > }
>> >> >
>> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
>> >> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
>> >> > +{
>> >> > + return mp_ops->boot_secondary(cpu, tidle);
>> >> > +}
>> >> > +#else
>> >> > int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>> >> > {
>> >> > int err;
>> >> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>> >> > wait_for_completion(&cpu_running);
>> >> > return 0;
>> >> > }
>> >> > +#endif
>> >> >
>> >> > #ifdef CONFIG_PROFILING
>> >> > /* Not really SMP stuff ... */
>> >> >
>> >> > ---
>> >> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
>> >> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
>> >> >
>> >> > Best regards,
>> >> > --
>> >> > Grégory CLEMENT, Bootlin
>> >> > Embedded Linux and Kernel engineering
>> >> > https://bootlin.com
>> >> >
>> >> >
>>
>> --
>> Grégory CLEMENT, Bootlin
>> Embedded Linux and Kernel engineering
>> https://bootlin.com
--
Grégory CLEMENT, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-04-30 7:31 ` Gregory CLEMENT
@ 2025-05-02 15:32 ` Gregory CLEMENT
2025-05-02 22:03 ` Thomas Bogendoerfer
0 siblings, 1 reply; 9+ messages in thread
From: Gregory CLEMENT @ 2025-05-02 15:32 UTC (permalink / raw)
To: Huacai Chen
Cc: Thomas Bogendoerfer, Jiaxun Yang, Vladimir Kondratiev,
Théo Lebrun, Tawfik Bayouk, Thomas Petazzoni, linux-mips,
linux-kernel
Hello,
> Huacai Chen <chenhuacai@kernel.org> writes:
>
>> On Wed, Apr 30, 2025 at 3:09 PM Gregory CLEMENT
>> <gregory.clement@bootlin.com> wrote:
>>>
>>> Hello Huacai,
>>>
>>> > Hi, Gregory,
>>> >
>>> > On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@kernel.org> wrote:
>>> >>
>>> >> Hi, Gregory and Thomas,
>>> >>
>>> >> I'm sorry I'm late, but I have some questions about this patch.
>>> >>
>>> >> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
>>> >> <gregory.clement@bootlin.com> wrote:
>>> >> >
>>> >> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
>>> >> >
>>> >> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
>>> >> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
>>> >> >
>>> >> > Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>>> >> > ---
>>> >> > Hello,
>>> >> >
>>> >> > This patch allows CPUs to start in parallel. It has been tested on
>>> >> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
>>> >> > systems use CPS to support SMP.
>>> >> >
>>> >> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
>>> >> > faster.
>>> >> >
>>> >> > Currently, this support is only for EyeQ SoC. However, it should also
>>> >> > work for other CPUs using CPS. I am less sure about MT ASE support,
>>> >> > but this patch can be a good starting point. If anyone wants to add
>>> >> > support for other systems, I can share some ideas, especially for the
>>> >> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
>>> >> >
>>> [...]
>>> >> > * A logical cpu mask containing only one VPE per core to
>>> >> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
>>> >> >
>>> >> > cpumask_t cpu_coherent_mask;
>>> >> >
>>> >> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
>>> >> > +
>>> >> > unsigned int smp_max_threads __initdata = UINT_MAX;
>>> >> >
>>> >> > static int __init early_nosmt(char *s)
>>> >> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
>>> >> > set_cpu_core_map(cpu);
>>> >> >
>>> >> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
>>> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
>>> >> > + cpuhp_ap_sync_alive();
>>> >> This is a "synchronization point" due to the description from commit
>>> >> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
>>> >> before this point and serialized after this point.
>>> >>
>>> >> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
>>> >> be executed in parallel. Maybe you haven't observed problems, but in
>>> >> theory it's not correct.
>>>
>>> I am working on it. To address your remark, I have a few options that I
>>> evaluate.
>> I suggest to revert this patch temporary in mips-next.
>
>
> As I previously mentioned, I haven't observed any issues until now. What
> I'm evaluating is whether there is a real problem with this
> implementation. Let's examine whether we need a new patch or if this one
> is sufficient.
>
> I will have the resutls at the end of the week.
After hundreds of reboots on the EyeQ5, I did not encounter any failures
during boot. However, while executing the set_cpu_core_map() and
set_cpu_sibling_map() functions in parallel, modifications to shared
resources could result in issues. To address this, I proposed the
following fix:
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 1726744f2b2ec..5f30611f45a1c 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -374,13 +377,13 @@ asmlinkage void start_secondary(void)
calibrate_delay();
cpu_data[cpu].udelay_val = loops_per_jiffy;
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ cpuhp_ap_sync_alive();
+#endif
set_cpu_sibling_map(cpu);
set_cpu_core_map(cpu);
cpumask_set_cpu(cpu, &cpu_coherent_mask);
-#ifdef CONFIG_HOTPLUG_PARALLEL
- cpuhp_ap_sync_alive();
-#endif
notify_cpu_starting(cpu);
#ifndef CONFIG_HOTPLUG_PARALLEL
It moved these two functions back in the serialized part of the boot. I
was concerned about potential slowdowns during the boot process, but I
didn't notice any issues during my test on EyeQ5. Therefore, we can make
this change.
Thomas,
how would you like to proceed? Do you want to squash this patch
into the current commit, or do you prefere I create a separate patch for
it, or a new version of the patch including this change?
Gregory
>
> Gregory
>
>>
>> Huacai
>>
>>>
>>> > I don't know whether you have done reboot tests (for ~1000 times),
>>> > Jiaxun Yang submitted similar patches for LoongArch [1], but during
>>> > reboot tests we encountered problems that I have described in my
>>> > previous reply.
>>> >
>>> > [1] https://lore.kernel.org/loongarch/20240716-loongarch-hotplug-v3-0-af59b3bb35c8@flygoat.com/
>>>
>>> I saw that series and I wondered why the last patch was not merged.
>>>
>>> I performed around 100 tests so far without encountering any issues; I
>>> plan to automate them further to gather more data.
>>>
>>> Gregpory
>>>
>>> >
>>> > Huacai
>>> >
>>> >>
>>> >> Huacai
>>> >>
>>> >> > +#endif
>>> >> > notify_cpu_starting(cpu);
>>> >> >
>>> >> > +#ifndef CONFIG_HOTPLUG_PARALLEL
>>> >> > /* Notify boot CPU that we're starting & ready to sync counters */
>>> >> > complete(&cpu_starting);
>>> >> > +#endif
>>> >> >
>>> >> > synchronise_count_slave(cpu);
>>> >> >
>>> >> > @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
>>> >> >
>>> >> > calculate_cpu_foreign_map();
>>> >> >
>>> >> > +#ifndef CONFIG_HOTPLUG_PARALLEL
>>> >> > /*
>>> >> > * Notify boot CPU that we're up & online and it can safely return
>>> >> > * from __cpu_up
>>> >> > */
>>> >> > complete(&cpu_running);
>>> >> > +#endif
>>> >> >
>>> >> > /*
>>> >> > * irq will be enabled in ->smp_finish(), enabling it too early
>>> >> > @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
>>> >> > set_cpu_online(0, true);
>>> >> > }
>>> >> >
>>> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
>>> >> > +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
>>> >> > +{
>>> >> > + return mp_ops->boot_secondary(cpu, tidle);
>>> >> > +}
>>> >> > +#else
>>> >> > int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>>> >> > {
>>> >> > int err;
>>> >> > @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>>> >> > wait_for_completion(&cpu_running);
>>> >> > return 0;
>>> >> > }
>>> >> > +#endif
>>> >> >
>>> >> > #ifdef CONFIG_PROFILING
>>> >> > /* Not really SMP stuff ... */
>>> >> >
>>> >> > ---
>>> >> > base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
>>> >> > change-id: 20250411-parallel-cpu-bringup-78999a9235ea
>>> >> >
>>> >> > Best regards,
>>> >> > --
>>> >> > Grégory CLEMENT, Bootlin
>>> >> > Embedded Linux and Kernel engineering
>>> >> > https://bootlin.com
>>> >> >
>>> >> >
>>>
>>> --
>>> Grégory CLEMENT, Bootlin
>>> Embedded Linux and Kernel engineering
>>> https://bootlin.com
>
> --
> Grégory CLEMENT, Bootlin
> Embedded Linux and Kernel engineering
> https://bootlin.com
--
Grégory CLEMENT, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
2025-05-02 15:32 ` Gregory CLEMENT
@ 2025-05-02 22:03 ` Thomas Bogendoerfer
0 siblings, 0 replies; 9+ messages in thread
From: Thomas Bogendoerfer @ 2025-05-02 22:03 UTC (permalink / raw)
To: Gregory CLEMENT
Cc: Huacai Chen, Jiaxun Yang, Vladimir Kondratiev, Théo Lebrun,
Tawfik Bayouk, Thomas Petazzoni, linux-mips, linux-kernel
On Fri, May 02, 2025 at 05:32:54PM +0200, Gregory CLEMENT wrote:
> Hello,
>
> > Huacai Chen <chenhuacai@kernel.org> writes:
> >
> >> On Wed, Apr 30, 2025 at 3:09 PM Gregory CLEMENT
> >> <gregory.clement@bootlin.com> wrote:
> >>>
> >>> Hello Huacai,
> >>>
> >>> > Hi, Gregory,
> >>> >
> >>> > On Sun, Apr 27, 2025 at 6:13 PM Huacai Chen <chenhuacai@kernel.org> wrote:
> >>> >>
> >>> >> Hi, Gregory and Thomas,
> >>> >>
> >>> >> I'm sorry I'm late, but I have some questions about this patch.
> >>> >>
> >>> >> On Mon, Apr 14, 2025 at 3:12 AM Gregory CLEMENT
> >>> >> <gregory.clement@bootlin.com> wrote:
> >>> >> >
> >>> >> > Added support for starting CPUs in parallel on EyeQ to speed up boot time.
> >>> >> >
> >>> >> > On EyeQ5, booting 8 CPUs is now ~90ms faster.
> >>> >> > On EyeQ6, booting 32 CPUs is now ~650ms faster.
> >>> >> >
> >>> >> > Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> >>> >> > ---
> >>> >> > Hello,
> >>> >> >
> >>> >> > This patch allows CPUs to start in parallel. It has been tested on
> >>> >> > EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
> >>> >> > systems use CPS to support SMP.
> >>> >> >
> >>> >> > As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
> >>> >> > faster.
> >>> >> >
> >>> >> > Currently, this support is only for EyeQ SoC. However, it should also
> >>> >> > work for other CPUs using CPS. I am less sure about MT ASE support,
> >>> >> > but this patch can be a good starting point. If anyone wants to add
> >>> >> > support for other systems, I can share some ideas, especially for the
> >>> >> > MIPS_GENERIC setup that needs to handle both types of SMP setups.
> >>> >> >
> >>> [...]
> >>> >> > * A logical cpu mask containing only one VPE per core to
> >>> >> > @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
> >>> >> >
> >>> >> > cpumask_t cpu_coherent_mask;
> >>> >> >
> >>> >> > +struct cpumask __cpu_primary_thread_mask __read_mostly;
> >>> >> > +
> >>> >> > unsigned int smp_max_threads __initdata = UINT_MAX;
> >>> >> >
> >>> >> > static int __init early_nosmt(char *s)
> >>> >> > @@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
> >>> >> > set_cpu_core_map(cpu);
> >>> >> >
> >>> >> > cpumask_set_cpu(cpu, &cpu_coherent_mask);
> >>> >> > +#ifdef CONFIG_HOTPLUG_PARALLEL
> >>> >> > + cpuhp_ap_sync_alive();
> >>> >> This is a "synchronization point" due to the description from commit
> >>> >> 9244724fbf8ab394a7210e8e93bf037abc, which means things are parallel
> >>> >> before this point and serialized after this point.
> >>> >>
> >>> >> But unfortunately, set_cpu_sibling_map() and set_cpu_core_map() cannot
> >>> >> be executed in parallel. Maybe you haven't observed problems, but in
> >>> >> theory it's not correct.
> >>>
> >>> I am working on it. To address your remark, I have a few options that I
> >>> evaluate.
> >> I suggest to revert this patch temporary in mips-next.
> >
> >
> > As I previously mentioned, I haven't observed any issues until now. What
> > I'm evaluating is whether there is a real problem with this
> > implementation. Let's examine whether we need a new patch or if this one
> > is sufficient.
> >
> > I will have the resutls at the end of the week.
>
> After hundreds of reboots on the EyeQ5, I did not encounter any failures
> during boot. However, while executing the set_cpu_core_map() and
> set_cpu_sibling_map() functions in parallel, modifications to shared
> resources could result in issues. To address this, I proposed the
> following fix:
>
> diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> index 1726744f2b2ec..5f30611f45a1c 100644
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -374,13 +377,13 @@ asmlinkage void start_secondary(void)
> calibrate_delay();
> cpu_data[cpu].udelay_val = loops_per_jiffy;
>
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> + cpuhp_ap_sync_alive();
> +#endif
> set_cpu_sibling_map(cpu);
> set_cpu_core_map(cpu);
>
> cpumask_set_cpu(cpu, &cpu_coherent_mask);
> -#ifdef CONFIG_HOTPLUG_PARALLEL
> - cpuhp_ap_sync_alive();
> -#endif
> notify_cpu_starting(cpu);
>
> #ifndef CONFIG_HOTPLUG_PARALLEL
>
> It moved these two functions back in the serialized part of the boot. I
> was concerned about potential slowdowns during the boot process, but I
> didn't notice any issues during my test on EyeQ5. Therefore, we can make
> this change.
>
>
> Thomas,
>
> how would you like to proceed? Do you want to squash this patch
> into the current commit, or do you prefere I create a separate patch for
> it, or a new version of the patch including this change?
please send a seperate patch with just the fix
Thomas.
--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-05-02 22:03 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-13 19:12 [PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ Gregory CLEMENT
2025-04-27 7:53 ` Thomas Bogendoerfer
2025-04-27 10:13 ` Huacai Chen
2025-04-30 3:20 ` Huacai Chen
2025-04-30 7:09 ` Gregory CLEMENT
2025-04-30 7:21 ` Huacai Chen
2025-04-30 7:31 ` Gregory CLEMENT
2025-05-02 15:32 ` Gregory CLEMENT
2025-05-02 22:03 ` Thomas Bogendoerfer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).