* [PATCH RFC 0/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-11 13:38 ` Jinjie Ruan
0 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
Support for parallel secondary CPU bringup is already utilized by x86,
MIPS, and RISC-V. This patch brings this capability to the arm64
architecture.
Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to avoid primary SMT threads
to boot first constraint.
And Add a 'cpu' parameter to update_cpu_boot_status() to allow updating the
boot status at a per-CPU granularity during parallel bringup.
Rework the global `secondary_data` accessed during early boot into
a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
enabling the early boot code in head.S to resolve each secondary CPU's
logical ID concurrently.
Jinjie Ruan (3):
cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
arm64: smp: Pass CPU ID to update_cpu_boot_status()
arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
arch/Kconfig | 4 ++++
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/smp.h | 14 +++++++++++---
arch/arm64/kernel/cpufeature.c | 2 +-
arch/arm64/kernel/head.S | 23 ++++++++++++++++++++++
arch/arm64/kernel/smp.c | 35 ++++++++++++++++++++++++++++++----
arch/arm64/mm/context.c | 2 +-
arch/mips/Kconfig | 1 +
arch/riscv/Kconfig | 1 +
arch/x86/Kconfig | 1 +
kernel/cpu.c | 6 +++++-
11 files changed, 80 insertions(+), 10 deletions(-)
--
2.34.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH RFC 1/3] cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-11 13:38 ` Jinjie Ruan
-1 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
During parallel CPU bringup, x86 requires primary SMT threads to boot
first to avoid siblings stopping during microcode updates. This constraint
is architecture-specific and unnecessary for other platforms
like arm64.
Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to decouple this constraint.
Platforms requiring this temporal order (e.g., x86) can select it
in Kconfig. Other architectures (e.g., arm64) can leave it unselected
to entirely bypass the SMT branch via the preprocessor.
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/Kconfig | 4 ++++
arch/mips/Kconfig | 1 +
arch/riscv/Kconfig | 1 +
arch/x86/Kconfig | 1 +
kernel/cpu.c | 6 +++++-
5 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index e86880045158..0365d2df2659 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,6 +102,10 @@ config HOTPLUG_PARALLEL
bool
select HOTPLUG_SPLIT_STARTUP
+config PARALLEL_SMT_PRIMARY_FIRST
+ bool
+ depends on HOTPLUG_PARALLEL
+
config GENERIC_IRQ_ENTRY
bool
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4364f3dba688..84e11ac0cf71 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -642,6 +642,7 @@ config EYEQ
select MIPS_CPU_SCACHE
select MIPS_GIC
select MIPS_L1_CACHE_SHIFT_7
+ select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
select PCI_DRIVERS_GENERIC
select SMP_UP if SMP
select SWAP_IO_SPACE
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d235396c4514..0cc49aecc841 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -210,6 +210,7 @@ config RISCV
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
+ select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
select PCI_DOMAINS_GENERIC if PCI
select PCI_ECAM if (ACPI && PCI)
select PCI_MSI if PCI
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f3f7cb01d69d..3ad4115ad051 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -314,6 +314,7 @@ config X86
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
select NUMA_MEMBLKS if NUMA
+ select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
select PERF_EVENTS
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bc4f7a9ba64e..7ef8cdf4d239 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1792,6 +1792,7 @@ static int __init parallel_bringup_parse_param(char *arg)
}
early_param("cpuhp.parallel", parallel_bringup_parse_param);
+#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
#ifdef CONFIG_HOTPLUG_SMT
static inline bool cpuhp_smt_aware(void)
{
@@ -1811,7 +1812,8 @@ static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
{
return cpu_none_mask;
}
-#endif
+#endif /* CONFIG_HOTPLUG_SMT */
+#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
bool __weak arch_cpuhp_init_parallel_bringup(void)
{
@@ -1837,6 +1839,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
if (!__cpuhp_parallel_bringup)
return false;
+#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
if (cpuhp_smt_aware()) {
const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
static struct cpumask tmp_mask __initdata;
@@ -1857,6 +1860,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
cpumask_andnot(&tmp_mask, mask, pmask);
mask = &tmp_mask;
}
+#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
/* Bring the not-yet started CPUs up */
cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH RFC 1/3] cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
@ 2026-06-11 13:38 ` Jinjie Ruan
0 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
During parallel CPU bringup, x86 requires primary SMT threads to boot
first to avoid siblings stopping during microcode updates. This constraint
is architecture-specific and unnecessary for other platforms
like arm64.
Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to decouple this constraint.
Platforms requiring this temporal order (e.g., x86) can select it
in Kconfig. Other architectures (e.g., arm64) can leave it unselected
to entirely bypass the SMT branch via the preprocessor.
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/Kconfig | 4 ++++
arch/mips/Kconfig | 1 +
arch/riscv/Kconfig | 1 +
arch/x86/Kconfig | 1 +
kernel/cpu.c | 6 +++++-
5 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index e86880045158..0365d2df2659 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,6 +102,10 @@ config HOTPLUG_PARALLEL
bool
select HOTPLUG_SPLIT_STARTUP
+config PARALLEL_SMT_PRIMARY_FIRST
+ bool
+ depends on HOTPLUG_PARALLEL
+
config GENERIC_IRQ_ENTRY
bool
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4364f3dba688..84e11ac0cf71 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -642,6 +642,7 @@ config EYEQ
select MIPS_CPU_SCACHE
select MIPS_GIC
select MIPS_L1_CACHE_SHIFT_7
+ select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
select PCI_DRIVERS_GENERIC
select SMP_UP if SMP
select SWAP_IO_SPACE
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d235396c4514..0cc49aecc841 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -210,6 +210,7 @@ config RISCV
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
+ select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
select PCI_DOMAINS_GENERIC if PCI
select PCI_ECAM if (ACPI && PCI)
select PCI_MSI if PCI
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f3f7cb01d69d..3ad4115ad051 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -314,6 +314,7 @@ config X86
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
select NUMA_MEMBLKS if NUMA
+ select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
select PERF_EVENTS
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bc4f7a9ba64e..7ef8cdf4d239 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1792,6 +1792,7 @@ static int __init parallel_bringup_parse_param(char *arg)
}
early_param("cpuhp.parallel", parallel_bringup_parse_param);
+#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
#ifdef CONFIG_HOTPLUG_SMT
static inline bool cpuhp_smt_aware(void)
{
@@ -1811,7 +1812,8 @@ static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
{
return cpu_none_mask;
}
-#endif
+#endif /* CONFIG_HOTPLUG_SMT */
+#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
bool __weak arch_cpuhp_init_parallel_bringup(void)
{
@@ -1837,6 +1839,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
if (!__cpuhp_parallel_bringup)
return false;
+#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
if (cpuhp_smt_aware()) {
const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
static struct cpumask tmp_mask __initdata;
@@ -1857,6 +1860,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
cpumask_andnot(&tmp_mask, mask, pmask);
mask = &tmp_mask;
}
+#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
/* Bring the not-yet started CPUs up */
cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
--
2.34.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH RFC 1/3] cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-18 15:17 ` Thomas Gleixner
-1 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2026-06-18 15:17 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas, will, tsbogend, pjw, palmer, aou,
alex, mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw,
ojeda, ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
On Thu, Jun 11 2026 at 21:38, Jinjie Ruan wrote:
> During parallel CPU bringup, x86 requires primary SMT threads to boot
> first to avoid siblings stopping during microcode updates. This constraint
> is architecture-specific and unnecessary for other platforms
> like arm64.
>
> Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to decouple this constraint.
> Platforms requiring this temporal order (e.g., x86) can select it
> in Kconfig. Other architectures (e.g., arm64) can leave it unselected
> to entirely bypass the SMT branch via the preprocessor.
>
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> ---
> arch/Kconfig | 4 ++++
> arch/mips/Kconfig | 1 +
> arch/riscv/Kconfig | 1 +
> arch/x86/Kconfig | 1 +
> kernel/cpu.c | 6 +++++-
> 5 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index e86880045158..0365d2df2659 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -102,6 +102,10 @@ config HOTPLUG_PARALLEL
> bool
> select HOTPLUG_SPLIT_STARTUP
>
> +config PARALLEL_SMT_PRIMARY_FIRST
Proper namespaces are overrated, right?
All related options start with HOTPLUG_....
> + bool
> + depends on HOTPLUG_PARALLEL
> +
> config GENERIC_IRQ_ENTRY
> bool
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 4364f3dba688..84e11ac0cf71 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -642,6 +642,7 @@ config EYEQ
> select MIPS_CPU_SCACHE
> select MIPS_GIC
> select MIPS_L1_CACHE_SHIFT_7
> + select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
> select PCI_DRIVERS_GENERIC
> select SMP_UP if SMP
> select SWAP_IO_SPACE
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index d235396c4514..0cc49aecc841 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -210,6 +210,7 @@ config RISCV
> select OF
> select OF_EARLY_FLATTREE
> select OF_IRQ
> + select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
Why does RISCV need this? It does not select HOTPLUG_SMT to begin with.
> +#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
> #ifdef CONFIG_HOTPLUG_SMT
More #ifdeffery is better, right?
> static inline bool cpuhp_smt_aware(void)
> {
> @@ -1811,7 +1812,8 @@ static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
> {
> return cpu_none_mask;
> }
> -#endif
> +#endif /* CONFIG_HOTPLUG_SMT */
> +#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
>
> bool __weak arch_cpuhp_init_parallel_bringup(void)
> {
> @@ -1837,6 +1839,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
> if (!__cpuhp_parallel_bringup)
> return false;
>
> +#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
Seriously?
> if (cpuhp_smt_aware()) {
> const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
> static struct cpumask tmp_mask __initdata;
> @@ -1857,6 +1860,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
> cpumask_andnot(&tmp_mask, mask, pmask);
> mask = &tmp_mask;
> }
> +#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
Something simple like the uncompiled below should just work, no?
---
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,6 +102,10 @@ config HOTPLUG_PARALLEL
bool
select HOTPLUG_SPLIT_STARTUP
+config HOTPLUG_PARALLEL_SMT
+ bool
+ select HOTPLUG_PARALLEL
+
config GENERIC_IRQ_ENTRY
bool
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -657,7 +657,7 @@ config EYEQ
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
- select HOTPLUG_PARALLEL if HOTPLUG_CPU
+ select HOTPLUG_PARALLEL_SMT if HOTPLUG_CPU
help
Select this to build a kernel supporting EyeQ SoC from Mobileye.
@@ -2295,7 +2295,6 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPS_PM if HOTPLUG_CPU
select SMP
- select HOTPLUG_SMT if HOTPLUG_PARALLEL
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -304,7 +304,7 @@ config X86
select HAVE_USER_RETURN_NOTIFIER
select HAVE_GENERIC_VDSO
select VDSO_GETRANDOM if X86_64
- select HOTPLUG_PARALLEL if SMP && X86_64
+ select HOTPLUG_PARALLEL_SMT if SMP && X86_64
select HOTPLUG_SMT if SMP
select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
select IRQ_FORCED_THREADING
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1792,7 +1792,7 @@ static int __init parallel_bringup_parse
}
early_param("cpuhp.parallel", parallel_bringup_parse_param);
-#ifdef CONFIG_HOTPLUG_SMT
+#ifdef CONFIG_HOTPLUG_PARALLEL_SMT
static inline bool cpuhp_smt_aware(void)
{
return cpu_smt_max_threads > 1;
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH RFC 1/3] cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
@ 2026-06-18 15:17 ` Thomas Gleixner
0 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2026-06-18 15:17 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas, will, tsbogend, pjw, palmer, aou,
alex, mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw,
ojeda, ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
On Thu, Jun 11 2026 at 21:38, Jinjie Ruan wrote:
> During parallel CPU bringup, x86 requires primary SMT threads to boot
> first to avoid siblings stopping during microcode updates. This constraint
> is architecture-specific and unnecessary for other platforms
> like arm64.
>
> Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to decouple this constraint.
> Platforms requiring this temporal order (e.g., x86) can select it
> in Kconfig. Other architectures (e.g., arm64) can leave it unselected
> to entirely bypass the SMT branch via the preprocessor.
>
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> ---
> arch/Kconfig | 4 ++++
> arch/mips/Kconfig | 1 +
> arch/riscv/Kconfig | 1 +
> arch/x86/Kconfig | 1 +
> kernel/cpu.c | 6 +++++-
> 5 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index e86880045158..0365d2df2659 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -102,6 +102,10 @@ config HOTPLUG_PARALLEL
> bool
> select HOTPLUG_SPLIT_STARTUP
>
> +config PARALLEL_SMT_PRIMARY_FIRST
Proper namespaces are overrated, right?
All related options start with HOTPLUG_....
> + bool
> + depends on HOTPLUG_PARALLEL
> +
> config GENERIC_IRQ_ENTRY
> bool
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 4364f3dba688..84e11ac0cf71 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -642,6 +642,7 @@ config EYEQ
> select MIPS_CPU_SCACHE
> select MIPS_GIC
> select MIPS_L1_CACHE_SHIFT_7
> + select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
> select PCI_DRIVERS_GENERIC
> select SMP_UP if SMP
> select SWAP_IO_SPACE
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index d235396c4514..0cc49aecc841 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -210,6 +210,7 @@ config RISCV
> select OF
> select OF_EARLY_FLATTREE
> select OF_IRQ
> + select PARALLEL_SMT_PRIMARY_FIRST if HOTPLUG_PARALLEL
Why does RISCV need this? It does not select HOTPLUG_SMT to begin with.
> +#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
> #ifdef CONFIG_HOTPLUG_SMT
More #ifdeffery is better, right?
> static inline bool cpuhp_smt_aware(void)
> {
> @@ -1811,7 +1812,8 @@ static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
> {
> return cpu_none_mask;
> }
> -#endif
> +#endif /* CONFIG_HOTPLUG_SMT */
> +#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
>
> bool __weak arch_cpuhp_init_parallel_bringup(void)
> {
> @@ -1837,6 +1839,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
> if (!__cpuhp_parallel_bringup)
> return false;
>
> +#ifdef CONFIG_PARALLEL_SMT_PRIMARY_FIRST
Seriously?
> if (cpuhp_smt_aware()) {
> const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
> static struct cpumask tmp_mask __initdata;
> @@ -1857,6 +1860,7 @@ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
> cpumask_andnot(&tmp_mask, mask, pmask);
> mask = &tmp_mask;
> }
> +#endif /* CONFIG_PARALLEL_SMT_PRIMARY_FIRST */
Something simple like the uncompiled below should just work, no?
---
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -102,6 +102,10 @@ config HOTPLUG_PARALLEL
bool
select HOTPLUG_SPLIT_STARTUP
+config HOTPLUG_PARALLEL_SMT
+ bool
+ select HOTPLUG_PARALLEL
+
config GENERIC_IRQ_ENTRY
bool
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -657,7 +657,7 @@ config EYEQ
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
- select HOTPLUG_PARALLEL if HOTPLUG_CPU
+ select HOTPLUG_PARALLEL_SMT if HOTPLUG_CPU
help
Select this to build a kernel supporting EyeQ SoC from Mobileye.
@@ -2295,7 +2295,6 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPS_PM if HOTPLUG_CPU
select SMP
- select HOTPLUG_SMT if HOTPLUG_PARALLEL
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -304,7 +304,7 @@ config X86
select HAVE_USER_RETURN_NOTIFIER
select HAVE_GENERIC_VDSO
select VDSO_GETRANDOM if X86_64
- select HOTPLUG_PARALLEL if SMP && X86_64
+ select HOTPLUG_PARALLEL_SMT if SMP && X86_64
select HOTPLUG_SMT if SMP
select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
select IRQ_FORCED_THREADING
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1792,7 +1792,7 @@ static int __init parallel_bringup_parse
}
early_param("cpuhp.parallel", parallel_bringup_parse_param);
-#ifdef CONFIG_HOTPLUG_SMT
+#ifdef CONFIG_HOTPLUG_PARALLEL_SMT
static inline bool cpuhp_smt_aware(void)
{
return cpu_smt_max_threads > 1;
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH RFC 2/3] arm64: smp: Pass CPU ID to update_cpu_boot_status()
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-11 13:38 ` Jinjie Ruan
-1 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
To support CONFIG_HOTPLUG_PARALLEL, the CPU boot status tracking must
be refactored from a single global variable (secondary_data.status)
to a per-CPU tracking structure to prevent multi-core race conditions.
Add a 'cpu' parameter to update_cpu_boot_status() and update all its
callsites to pass the corresponding CPU ID. This allows updating the
boot status at a per-CPU granularity during parallel bringup.
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/arm64/include/asm/smp.h | 6 +++---
arch/arm64/kernel/cpufeature.c | 2 +-
arch/arm64/kernel/smp.c | 8 ++++----
arch/arm64/mm/context.c | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 10ea4f543069..e2151a01731f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -122,7 +122,7 @@ static inline void __noreturn cpu_park_loop(void)
}
}
-static inline void update_cpu_boot_status(int val)
+static inline void update_cpu_boot_status(unsigned int cpu, int val)
{
WRITE_ONCE(secondary_data.status, val);
/* Ensure the visibility of the status update */
@@ -134,9 +134,9 @@ static inline void update_cpu_boot_status(int val)
* which calls for a kernel panic. Update the boot status and park the calling
* CPU.
*/
-static inline void __noreturn cpu_panic_kernel(void)
+static inline void __noreturn cpu_panic_kernel(unsigned int cpu)
{
- update_cpu_boot_status(CPU_PANIC_KERNEL);
+ update_cpu_boot_status(cpu, CPU_PANIC_KERNEL);
cpu_park_loop();
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6d53bb15cf7b..0552202702bf 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3674,7 +3674,7 @@ static void verify_local_cpu_caps(u16 scope_mask)
caps->desc, system_has_cap, cpu_has_cap);
if (cpucap_panic_on_conflict(caps))
- cpu_panic_kernel();
+ cpu_panic_kernel(smp_processor_id());
else
cpu_die_early();
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 24f8448e1fbb..6bc90ee4820a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -118,7 +118,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
- update_cpu_boot_status(CPU_MMU_OFF);
+ update_cpu_boot_status(cpu, CPU_MMU_OFF);
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
@@ -252,7 +252,7 @@ asmlinkage notrace void secondary_start_kernel(void)
pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n",
cpu, (unsigned long)mpidr,
read_cpuid_id());
- update_cpu_boot_status(CPU_BOOT_SUCCESS);
+ update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -411,11 +411,11 @@ void __noreturn cpu_die_early(void)
rcutree_report_cpu_dead();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
- update_cpu_boot_status(CPU_KILL_ME);
+ update_cpu_boot_status(cpu, CPU_KILL_ME);
__cpu_try_die(cpu);
}
- update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
+ update_cpu_boot_status(cpu, CPU_STUCK_IN_KERNEL);
cpu_park_loop();
}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 0f4a28b87469..6b8a3245f393 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -72,7 +72,7 @@ void verify_cpu_asid_bits(void)
*/
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
smp_processor_id(), asid, asid_bits);
- cpu_panic_kernel();
+ cpu_panic_kernel(smp_processor_id());
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH RFC 2/3] arm64: smp: Pass CPU ID to update_cpu_boot_status()
@ 2026-06-11 13:38 ` Jinjie Ruan
0 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
To support CONFIG_HOTPLUG_PARALLEL, the CPU boot status tracking must
be refactored from a single global variable (secondary_data.status)
to a per-CPU tracking structure to prevent multi-core race conditions.
Add a 'cpu' parameter to update_cpu_boot_status() and update all its
callsites to pass the corresponding CPU ID. This allows updating the
boot status at a per-CPU granularity during parallel bringup.
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/arm64/include/asm/smp.h | 6 +++---
arch/arm64/kernel/cpufeature.c | 2 +-
arch/arm64/kernel/smp.c | 8 ++++----
arch/arm64/mm/context.c | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 10ea4f543069..e2151a01731f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -122,7 +122,7 @@ static inline void __noreturn cpu_park_loop(void)
}
}
-static inline void update_cpu_boot_status(int val)
+static inline void update_cpu_boot_status(unsigned int cpu, int val)
{
WRITE_ONCE(secondary_data.status, val);
/* Ensure the visibility of the status update */
@@ -134,9 +134,9 @@ static inline void update_cpu_boot_status(int val)
* which calls for a kernel panic. Update the boot status and park the calling
* CPU.
*/
-static inline void __noreturn cpu_panic_kernel(void)
+static inline void __noreturn cpu_panic_kernel(unsigned int cpu)
{
- update_cpu_boot_status(CPU_PANIC_KERNEL);
+ update_cpu_boot_status(cpu, CPU_PANIC_KERNEL);
cpu_park_loop();
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6d53bb15cf7b..0552202702bf 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3674,7 +3674,7 @@ static void verify_local_cpu_caps(u16 scope_mask)
caps->desc, system_has_cap, cpu_has_cap);
if (cpucap_panic_on_conflict(caps))
- cpu_panic_kernel();
+ cpu_panic_kernel(smp_processor_id());
else
cpu_die_early();
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 24f8448e1fbb..6bc90ee4820a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -118,7 +118,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
- update_cpu_boot_status(CPU_MMU_OFF);
+ update_cpu_boot_status(cpu, CPU_MMU_OFF);
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
@@ -252,7 +252,7 @@ asmlinkage notrace void secondary_start_kernel(void)
pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n",
cpu, (unsigned long)mpidr,
read_cpuid_id());
- update_cpu_boot_status(CPU_BOOT_SUCCESS);
+ update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -411,11 +411,11 @@ void __noreturn cpu_die_early(void)
rcutree_report_cpu_dead();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
- update_cpu_boot_status(CPU_KILL_ME);
+ update_cpu_boot_status(cpu, CPU_KILL_ME);
__cpu_try_die(cpu);
}
- update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
+ update_cpu_boot_status(cpu, CPU_STUCK_IN_KERNEL);
cpu_park_loop();
}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 0f4a28b87469..6b8a3245f393 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -72,7 +72,7 @@ void verify_cpu_asid_bits(void)
*/
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
smp_processor_id(), asid, asid_bits);
- cpu_panic_kernel();
+ cpu_panic_kernel(smp_processor_id());
}
}
--
2.34.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-11 13:38 ` Jinjie Ruan
-1 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
Support for parallel secondary CPU bringup is already utilized by x86,
MIPS, and RISC-V. This patch brings this capability to the arm64
architecture.
Rework the global `secondary_data` accessed during early boot into
a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
enabling the early boot code in head.S to resolve each secondary CPU's
logical ID concurrently.
To fully enable HOTPLUG_PARALLEL, this patch implements:
1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
| test kernel | secondary CPUs boot time |
| --------------------- | -------------------- |
| Without this patch | 155.672 |
| cpuhp.parallel=0 | 62.897 |
| cpuhp.parallel=1 | 166.703 |
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/smp.h | 8 ++++++++
arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
4 files changed, 59 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9091c67e1cc2..8735e9d8ed13 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -113,6 +113,7 @@ config ARM64
select CPUMASK_OFFSTACK if NR_CPUS > 256
select DCACHE_WORD_ACCESS
select HAVE_EXTRA_IPI_TRACEPOINTS
+ select HOTPLUG_PARALLEL if SMP && HOTPLUG_CPU
select DYNAMIC_FTRACE if FUNCTION_TRACER
select DMA_BOUNCE_UNALIGNED_KMALLOC
select DMA_DIRECT_REMAP
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index e2151a01731f..c39d4c0f8a37 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -92,6 +92,10 @@ struct secondary_data {
long status;
};
+#ifdef CONFIG_HOTPLUG_PARALLEL
+extern struct secondary_data cpu_boot_data[NR_CPUS];
+#endif
+
extern struct secondary_data secondary_data;
extern long __early_cpu_boot_status;
extern void secondary_entry(void);
@@ -124,7 +128,11 @@ static inline void __noreturn cpu_park_loop(void)
static inline void update_cpu_boot_status(unsigned int cpu, int val)
{
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ WRITE_ONCE(cpu_boot_data[cpu].status, val);
+#else
WRITE_ONCE(secondary_data.status, val);
+#endif
/* Ensure the visibility of the status update */
dsb(ishst);
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 87a822e5c4ca..8f6c1c5e66d2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <linux/threads.h>
#include <asm/asm_pointer_auth.h>
#include <asm/assembler.h>
@@ -391,7 +392,29 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
msr vbar_el1, x5
isb
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ mrs x0, mpidr_el1
+ mov_q x1, MPIDR_HWID_BITMASK
+ and x0, x0, x1
+
+ adr_l x1, __mpidr_to_cpuid_table
+ mov x19, #0
+.Lfind_cpuid:
+ ldr x3, [x1, x19, lsl #3]
+ cmp x3, x0
+ b.eq .Lfound_cpuid
+ add x19, x19, #1
+ cmp x19, #NR_CPUS
+ b.ne .Lfind_cpuid
+ b .
+
+.Lfound_cpuid:
+ adr_l x0, cpu_boot_data
+ lsl x3, x19, #4
+ add x0, x0, x3
+#else
adr_l x0, secondary_data
+#endif
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6bc90ee4820a..c909bf1c5119 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -61,6 +61,12 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+struct secondary_data cpu_boot_data[NR_CPUS] ____cacheline_aligned __ro_after_init;
+unsigned long __mpidr_to_cpuid_table[NR_CPUS] ____cacheline_aligned;
+#endif
+
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
@@ -106,8 +112,19 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
return -EOPNOTSUPP;
}
+#ifndef CONFIG_HOTPLUG_PARALLEL
static DECLARE_COMPLETION(cpu_running);
+#endif
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ cpu_boot_data[cpu].task = tidle;
+ update_cpu_boot_status(cpu, CPU_MMU_OFF);
+ return boot_secondary(cpu, tidle);
+}
+#else
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
@@ -172,6 +189,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
return -EIO;
}
+#endif /* CONFIG_HOTPLUG_PARALLEL */
static void init_gic_priority_masking(void)
{
@@ -206,6 +224,10 @@ asmlinkage notrace void secondary_start_kernel(void)
mmgrab(mm);
current->active_mm = mm;
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ cpuhp_ap_sync_alive();
+#endif
+
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
@@ -254,7 +276,9 @@ asmlinkage notrace void secondary_start_kernel(void)
read_cpuid_id());
update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
set_cpu_online(cpu, true);
+#ifndef CONFIG_HOTPLUG_PARALLEL
complete(&cpu_running);
+#endif
/*
* Secondary CPUs enter the kernel with all DAIF exceptions masked.
@@ -762,6 +786,9 @@ void __init smp_init_cpus(void)
*/
for (i = 1; i < nr_cpu_ids; i++) {
if (cpu_logical_map(i) != INVALID_HWID) {
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ __mpidr_to_cpuid_table[i] = cpu_logical_map(i);
+#endif
if (smp_cpu_setup(i))
set_cpu_logical_map(i, INVALID_HWID);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-11 13:38 ` Jinjie Ruan
0 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-11 13:38 UTC (permalink / raw)
To: catalin.marinas, will, tsbogend, pjw, palmer, aou, alex, tglx,
mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw, ojeda,
ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
Support for parallel secondary CPU bringup is already utilized by x86,
MIPS, and RISC-V. This patch brings this capability to the arm64
architecture.
Rework the global `secondary_data` accessed during early boot into
a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
enabling the early boot code in head.S to resolve each secondary CPU's
logical ID concurrently.
To fully enable HOTPLUG_PARALLEL, this patch implements:
1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
| test kernel | secondary CPUs boot time |
| --------------------- | -------------------- |
| Without this patch | 155.672 |
| cpuhp.parallel=0 | 62.897 |
| cpuhp.parallel=1 | 166.703 |
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/smp.h | 8 ++++++++
arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
4 files changed, 59 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9091c67e1cc2..8735e9d8ed13 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -113,6 +113,7 @@ config ARM64
select CPUMASK_OFFSTACK if NR_CPUS > 256
select DCACHE_WORD_ACCESS
select HAVE_EXTRA_IPI_TRACEPOINTS
+ select HOTPLUG_PARALLEL if SMP && HOTPLUG_CPU
select DYNAMIC_FTRACE if FUNCTION_TRACER
select DMA_BOUNCE_UNALIGNED_KMALLOC
select DMA_DIRECT_REMAP
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index e2151a01731f..c39d4c0f8a37 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -92,6 +92,10 @@ struct secondary_data {
long status;
};
+#ifdef CONFIG_HOTPLUG_PARALLEL
+extern struct secondary_data cpu_boot_data[NR_CPUS];
+#endif
+
extern struct secondary_data secondary_data;
extern long __early_cpu_boot_status;
extern void secondary_entry(void);
@@ -124,7 +128,11 @@ static inline void __noreturn cpu_park_loop(void)
static inline void update_cpu_boot_status(unsigned int cpu, int val)
{
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ WRITE_ONCE(cpu_boot_data[cpu].status, val);
+#else
WRITE_ONCE(secondary_data.status, val);
+#endif
/* Ensure the visibility of the status update */
dsb(ishst);
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 87a822e5c4ca..8f6c1c5e66d2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <linux/threads.h>
#include <asm/asm_pointer_auth.h>
#include <asm/assembler.h>
@@ -391,7 +392,29 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
msr vbar_el1, x5
isb
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ mrs x0, mpidr_el1
+ mov_q x1, MPIDR_HWID_BITMASK
+ and x0, x0, x1
+
+ adr_l x1, __mpidr_to_cpuid_table
+ mov x19, #0
+.Lfind_cpuid:
+ ldr x3, [x1, x19, lsl #3]
+ cmp x3, x0
+ b.eq .Lfound_cpuid
+ add x19, x19, #1
+ cmp x19, #NR_CPUS
+ b.ne .Lfind_cpuid
+ b .
+
+.Lfound_cpuid:
+ adr_l x0, cpu_boot_data
+ lsl x3, x19, #4
+ add x0, x0, x3
+#else
adr_l x0, secondary_data
+#endif
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6bc90ee4820a..c909bf1c5119 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -61,6 +61,12 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+struct secondary_data cpu_boot_data[NR_CPUS] ____cacheline_aligned __ro_after_init;
+unsigned long __mpidr_to_cpuid_table[NR_CPUS] ____cacheline_aligned;
+#endif
+
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
@@ -106,8 +112,19 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
return -EOPNOTSUPP;
}
+#ifndef CONFIG_HOTPLUG_PARALLEL
static DECLARE_COMPLETION(cpu_running);
+#endif
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ cpu_boot_data[cpu].task = tidle;
+ update_cpu_boot_status(cpu, CPU_MMU_OFF);
+ return boot_secondary(cpu, tidle);
+}
+#else
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
@@ -172,6 +189,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
return -EIO;
}
+#endif /* CONFIG_HOTPLUG_PARALLEL */
static void init_gic_priority_masking(void)
{
@@ -206,6 +224,10 @@ asmlinkage notrace void secondary_start_kernel(void)
mmgrab(mm);
current->active_mm = mm;
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ cpuhp_ap_sync_alive();
+#endif
+
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
@@ -254,7 +276,9 @@ asmlinkage notrace void secondary_start_kernel(void)
read_cpuid_id());
update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
set_cpu_online(cpu, true);
+#ifndef CONFIG_HOTPLUG_PARALLEL
complete(&cpu_running);
+#endif
/*
* Secondary CPUs enter the kernel with all DAIF exceptions masked.
@@ -762,6 +786,9 @@ void __init smp_init_cpus(void)
*/
for (i = 1; i < nr_cpu_ids; i++) {
if (cpu_logical_map(i) != INVALID_HWID) {
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ __mpidr_to_cpuid_table[i] = cpu_logical_map(i);
+#endif
if (smp_cpu_setup(i))
set_cpu_logical_map(i, INVALID_HWID);
}
--
2.34.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 24+ messages in thread* RE: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-12 15:45 ` Michael Kelley
-1 siblings, 0 replies; 24+ messages in thread
From: Michael Kelley @ 2026-06-12 15:45 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>
> Support for parallel secondary CPU bringup is already utilized by x86,
> MIPS, and RISC-V. This patch brings this capability to the arm64
> architecture.
>
> Rework the global `secondary_data` accessed during early boot into
> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
> enabling the early boot code in head.S to resolve each secondary CPU's
> logical ID concurrently.
>
> To fully enable HOTPLUG_PARALLEL, this patch implements:
> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
>
> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
>
> | test kernel | secondary CPUs boot time |
> | --------------------- | -------------------- |
> | Without this patch | 155.672 |
> | cpuhp.parallel=0 | 62.897 |
> | cpuhp.parallel=1 | 166.703 |
The last two rows seem mixed up. I would expect parallel=0 to
result in a longer boot time.
Michael
>
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> ---
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/smp.h | 8 ++++++++
> arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
> 4 files changed, 59 insertions(+)
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-12 15:45 ` Michael Kelley
0 siblings, 0 replies; 24+ messages in thread
From: Michael Kelley @ 2026-06-12 15:45 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>
> Support for parallel secondary CPU bringup is already utilized by x86,
> MIPS, and RISC-V. This patch brings this capability to the arm64
> architecture.
>
> Rework the global `secondary_data` accessed during early boot into
> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
> enabling the early boot code in head.S to resolve each secondary CPU's
> logical ID concurrently.
>
> To fully enable HOTPLUG_PARALLEL, this patch implements:
> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
>
> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
>
> | test kernel | secondary CPUs boot time |
> | --------------------- | -------------------- |
> | Without this patch | 155.672 |
> | cpuhp.parallel=0 | 62.897 |
> | cpuhp.parallel=1 | 166.703 |
The last two rows seem mixed up. I would expect parallel=0 to
result in a longer boot time.
Michael
>
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> ---
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/smp.h | 8 ++++++++
> arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
> 4 files changed, 59 insertions(+)
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-12 15:45 ` Michael Kelley
@ 2026-06-15 8:51 ` Jinjie Ruan
-1 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-15 8:51 UTC (permalink / raw)
To: Michael Kelley, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
On 6/12/2026 11:45 PM, Michael Kelley wrote:
> From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>>
>> Support for parallel secondary CPU bringup is already utilized by x86,
>> MIPS, and RISC-V. This patch brings this capability to the arm64
>> architecture.
>>
>> Rework the global `secondary_data` accessed during early boot into
>> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
>> enabling the early boot code in head.S to resolve each secondary CPU's
>> logical ID concurrently.
>>
>> To fully enable HOTPLUG_PARALLEL, this patch implements:
>> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
>> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
>>
>> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
>>
>> | test kernel | secondary CPUs boot time |
>> | --------------------- | -------------------- |
>> | Without this patch | 155.672 |
>> | cpuhp.parallel=0 | 62.897 |
>> | cpuhp.parallel=1 | 166.703 |
>
> The last two rows seem mixed up. I would expect parallel=0 to
> result in a longer boot time.
Hi, Michael,
The results are correct and not mixed up.
Compared to the original non‑HOTPLUG_PARALLEL approach, the advantage of
cpuhp.parallel=0 lies in its use of cpu_relax(`yield` on arm64) instead
of the wait_for_completion_timeout() mechanism (which may cause sleep
and context switching). This significantly reduces the overhead of VM
exits and context switches in a KVM guest, thereby cutting the secondary
CPU boot time by more than half.
Regarding cpuhp.parallel=1, I believe the reason it fails to optimize
boot time is that when a large number of CPUs issue the KICK_AP call
simultaneously, it results in severe lock contention within KVM, which
paradoxically slows down secondary CPU bringup. However, this needs
further investigation into the PSCI_CPU_ON code in KVM.
I'm testing these performance aspects on physical hardware, so the
results might be somewhat different because secondary CPU bringup
requires trapping into the ATF firmware.
Best regards,
Jinjie
>
> Michael
>
>>
>> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
>> ---
>> arch/arm64/Kconfig | 1 +
>> arch/arm64/include/asm/smp.h | 8 ++++++++
>> arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
>> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
>> 4 files changed, 59 insertions(+)
>>
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-15 8:51 ` Jinjie Ruan
0 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-15 8:51 UTC (permalink / raw)
To: Michael Kelley, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
On 6/12/2026 11:45 PM, Michael Kelley wrote:
> From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>>
>> Support for parallel secondary CPU bringup is already utilized by x86,
>> MIPS, and RISC-V. This patch brings this capability to the arm64
>> architecture.
>>
>> Rework the global `secondary_data` accessed during early boot into
>> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
>> enabling the early boot code in head.S to resolve each secondary CPU's
>> logical ID concurrently.
>>
>> To fully enable HOTPLUG_PARALLEL, this patch implements:
>> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
>> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
>>
>> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
>>
>> | test kernel | secondary CPUs boot time |
>> | --------------------- | -------------------- |
>> | Without this patch | 155.672 |
>> | cpuhp.parallel=0 | 62.897 |
>> | cpuhp.parallel=1 | 166.703 |
>
> The last two rows seem mixed up. I would expect parallel=0 to
> result in a longer boot time.
Hi, Michael,
The results are correct and not mixed up.
Compared to the original non‑HOTPLUG_PARALLEL approach, the advantage of
cpuhp.parallel=0 lies in its use of cpu_relax(`yield` on arm64) instead
of the wait_for_completion_timeout() mechanism (which may cause sleep
and context switching). This significantly reduces the overhead of VM
exits and context switches in a KVM guest, thereby cutting the secondary
CPU boot time by more than half.
Regarding cpuhp.parallel=1, I believe the reason it fails to optimize
boot time is that when a large number of CPUs issue the KICK_AP call
simultaneously, it results in severe lock contention within KVM, which
paradoxically slows down secondary CPU bringup. However, this needs
further investigation into the PSCI_CPU_ON code in KVM.
I'm testing these performance aspects on physical hardware, so the
results might be somewhat different because secondary CPU bringup
requires trapping into the ATF firmware.
Best regards,
Jinjie
>
> Michael
>
>>
>> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
>> ---
>> arch/arm64/Kconfig | 1 +
>> arch/arm64/include/asm/smp.h | 8 ++++++++
>> arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
>> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
>> 4 files changed, 59 insertions(+)
>>
>
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-15 8:51 ` Jinjie Ruan
@ 2026-06-18 12:21 ` Will Deacon
-1 siblings, 0 replies; 24+ messages in thread
From: Will Deacon @ 2026-06-18 12:21 UTC (permalink / raw)
To: Jinjie Ruan
Cc: Michael Kelley, catalin.marinas@arm.com,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
Hi Jinjie,
On Mon, Jun 15, 2026 at 04:51:48PM +0800, Jinjie Ruan wrote:
> On 6/12/2026 11:45 PM, Michael Kelley wrote:
> > From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
> >>
> >> Support for parallel secondary CPU bringup is already utilized by x86,
> >> MIPS, and RISC-V. This patch brings this capability to the arm64
> >> architecture.
> >>
> >> Rework the global `secondary_data` accessed during early boot into
> >> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
> >> enabling the early boot code in head.S to resolve each secondary CPU's
> >> logical ID concurrently.
> >>
> >> To fully enable HOTPLUG_PARALLEL, this patch implements:
> >> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
> >> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
> >>
> >> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
> >>
> >> | test kernel | secondary CPUs boot time |
> >> | --------------------- | -------------------- |
> >> | Without this patch | 155.672 |
> >> | cpuhp.parallel=0 | 62.897 |
> >> | cpuhp.parallel=1 | 166.703 |
> >
> > The last two rows seem mixed up. I would expect parallel=0 to
> > result in a longer boot time.
>
> Hi, Michael,
>
> The results are correct and not mixed up.
>
> Compared to the original non‑HOTPLUG_PARALLEL approach, the advantage of
> cpuhp.parallel=0 lies in its use of cpu_relax(`yield` on arm64) instead
> of the wait_for_completion_timeout() mechanism (which may cause sleep
> and context switching). This significantly reduces the overhead of VM
> exits and context switches in a KVM guest, thereby cutting the secondary
> CPU boot time by more than half.
I don't think that's a particularly compelling reason to enable this for
arm64, in all honesty. The yield instruction typically doesn't do
anything on actual arm64 silicon, so this probably means that you're
introducing busy-loops which tend to be bad for power and scalability.
I implemented this a while ago [1] but didn't manage to see much in terms
of performance improvement and so I didn't bother to send the patches out
after talking about it at KVM forum [2]. However, as mentioned at the end
of that talk, it _is_ still useful for confidential VMs using PSCI so
let me dust off my old series and send it out to see what you think.
It relies on PSCI v0.2, which means we don't need the NR_CPUS size array
for secondary_data and I also have some support for error handling (it
doesn't look like you handle __early_cpu_boot_status properly).
It looks like I could include your first patch, though!
Will
[1] https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=cpu-hotplug
[2] https://www.youtube.com/watch?v=Q6kOshnnQuE
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-18 12:21 ` Will Deacon
0 siblings, 0 replies; 24+ messages in thread
From: Will Deacon @ 2026-06-18 12:21 UTC (permalink / raw)
To: Jinjie Ruan
Cc: Michael Kelley, catalin.marinas@arm.com,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
Hi Jinjie,
On Mon, Jun 15, 2026 at 04:51:48PM +0800, Jinjie Ruan wrote:
> On 6/12/2026 11:45 PM, Michael Kelley wrote:
> > From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
> >>
> >> Support for parallel secondary CPU bringup is already utilized by x86,
> >> MIPS, and RISC-V. This patch brings this capability to the arm64
> >> architecture.
> >>
> >> Rework the global `secondary_data` accessed during early boot into
> >> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
> >> enabling the early boot code in head.S to resolve each secondary CPU's
> >> logical ID concurrently.
> >>
> >> To fully enable HOTPLUG_PARALLEL, this patch implements:
> >> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
> >> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
> >>
> >> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
> >>
> >> | test kernel | secondary CPUs boot time |
> >> | --------------------- | -------------------- |
> >> | Without this patch | 155.672 |
> >> | cpuhp.parallel=0 | 62.897 |
> >> | cpuhp.parallel=1 | 166.703 |
> >
> > The last two rows seem mixed up. I would expect parallel=0 to
> > result in a longer boot time.
>
> Hi, Michael,
>
> The results are correct and not mixed up.
>
> Compared to the original non‑HOTPLUG_PARALLEL approach, the advantage of
> cpuhp.parallel=0 lies in its use of cpu_relax(`yield` on arm64) instead
> of the wait_for_completion_timeout() mechanism (which may cause sleep
> and context switching). This significantly reduces the overhead of VM
> exits and context switches in a KVM guest, thereby cutting the secondary
> CPU boot time by more than half.
I don't think that's a particularly compelling reason to enable this for
arm64, in all honesty. The yield instruction typically doesn't do
anything on actual arm64 silicon, so this probably means that you're
introducing busy-loops which tend to be bad for power and scalability.
I implemented this a while ago [1] but didn't manage to see much in terms
of performance improvement and so I didn't bother to send the patches out
after talking about it at KVM forum [2]. However, as mentioned at the end
of that talk, it _is_ still useful for confidential VMs using PSCI so
let me dust off my old series and send it out to see what you think.
It relies on PSCI v0.2, which means we don't need the NR_CPUS size array
for secondary_data and I also have some support for error handling (it
doesn't look like you handle __early_cpu_boot_status properly).
It looks like I could include your first patch, though!
Will
[1] https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=cpu-hotplug
[2] https://www.youtube.com/watch?v=Q6kOshnnQuE
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-12 15:45 ` Michael Kelley
@ 2026-06-15 9:57 ` Jinjie Ruan
-1 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-15 9:57 UTC (permalink / raw)
To: Michael Kelley, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
On 6/12/2026 11:45 PM, Michael Kelley wrote:
> From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>>
>> Support for parallel secondary CPU bringup is already utilized by x86,
>> MIPS, and RISC-V. This patch brings this capability to the arm64
>> architecture.
>>
>> Rework the global `secondary_data` accessed during early boot into
>> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
>> enabling the early boot code in head.S to resolve each secondary CPU's
>> logical ID concurrently.
>>
>> To fully enable HOTPLUG_PARALLEL, this patch implements:
>> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
>> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
>>
>> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
>>
>> | test kernel | secondary CPUs boot time |
>> | --------------------- | -------------------- |
>> | Without this patch | 155.672 |
>> | cpuhp.parallel=0 | 62.897 |
>> | cpuhp.parallel=1 | 166.703 |
>
> The last two rows seem mixed up. I would expect parallel=0 to
> result in a longer boot time.
Without this patch:
KVM event statistics (6 entries)
Event name Samples Sample% Time (ns) Time%
Mean Time (ns)
DABT_LOW 323112 75.00% 1669148000 17.00%
5165
WFx 85817 19.00% 723215800 7.00%
8427
SYS64 14914 3.00% 419934530 4.00%
28157
IRQ 5643 1.00% 6732439250 70.00%
1193060
HVC64 282 0.00% 35543970 0.00%
126042
IABT_LOW 1 0.00% 6130 0.00%
6130
cpuhp.parallel=0:
Event name Samples Sample% Time (ns) Time%
Mean Time (ns)
DABT_LOW 308175 80.00% 643628050 6.00%
2088
WFx 55208 14.00% 261925270 2.00%
4744
SYS64 14975 3.00% 155727880 1.00%
10399
IRQ 4755 1.00% 8496162210 88.00%
1786784
HVC64 280 0.00% 19429900 0.00%
69392
IABT_LOW 1 0.00% 5850 0.00%
5850
cpuhp.parallel=1:
Event name Samples Sample% Time (ns) Time%
Mean Time (ns)
DABT_LOW 307923 77.00% 692965050 2.00%
2250
WFx 59549 15.00% 287888960 0.00%
4834
SYS64 15127 3.00% 334366230 1.00%
22103
IRQ 12861 3.00% 29784004970 95.00%
2315838
HVC64 280 0.00% 21869940 0.00%
78106
IABT_LOW 1 0.00% 9320 0.00%
9320
- Default (no patch): Slowest HVC64 handling (126 μs), highest WFx count
(85k), and most total VM‑exits.
- cpuhp.parallel=1: HVC64 latency improved to 78 μs (close to
cpuhp.parallel=0), but IRQ exits increased dramatically (12.9k, 2.7×
that of `cpuhp.parallel=0`), accounting for 95% of event time and
becoming the new bottleneck.
- cpuhp.parallel=0: Fastest HVC64 (69 μs), lowest IRQ exits (4.8k), and
lowest total samples, delivering the best overall boot performance.
Therefor, `cpuhp.parallel=1` reduces HVC cost but suffers from a massive
increase in IRQ exits, while `cpuhp.parallel=0` avoids this interrupt
storm and therefore performs best in a KVM guest.
>
> Michael
>
>>
>> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
>> ---
>> arch/arm64/Kconfig | 1 +
>> arch/arm64/include/asm/smp.h | 8 ++++++++
>> arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
>> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
>> 4 files changed, 59 insertions(+)
>>
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-15 9:57 ` Jinjie Ruan
0 siblings, 0 replies; 24+ messages in thread
From: Jinjie Ruan @ 2026-06-15 9:57 UTC (permalink / raw)
To: Michael Kelley, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
On 6/12/2026 11:45 PM, Michael Kelley wrote:
> From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>>
>> Support for parallel secondary CPU bringup is already utilized by x86,
>> MIPS, and RISC-V. This patch brings this capability to the arm64
>> architecture.
>>
>> Rework the global `secondary_data` accessed during early boot into
>> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
>> enabling the early boot code in head.S to resolve each secondary CPU's
>> logical ID concurrently.
>>
>> To fully enable HOTPLUG_PARALLEL, this patch implements:
>> 1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
>> 2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().
>>
>> Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).
>>
>> | test kernel | secondary CPUs boot time |
>> | --------------------- | -------------------- |
>> | Without this patch | 155.672 |
>> | cpuhp.parallel=0 | 62.897 |
>> | cpuhp.parallel=1 | 166.703 |
>
> The last two rows seem mixed up. I would expect parallel=0 to
> result in a longer boot time.
Without this patch:
KVM event statistics (6 entries)
Event name Samples Sample% Time (ns) Time%
Mean Time (ns)
DABT_LOW 323112 75.00% 1669148000 17.00%
5165
WFx 85817 19.00% 723215800 7.00%
8427
SYS64 14914 3.00% 419934530 4.00%
28157
IRQ 5643 1.00% 6732439250 70.00%
1193060
HVC64 282 0.00% 35543970 0.00%
126042
IABT_LOW 1 0.00% 6130 0.00%
6130
cpuhp.parallel=0:
Event name Samples Sample% Time (ns) Time%
Mean Time (ns)
DABT_LOW 308175 80.00% 643628050 6.00%
2088
WFx 55208 14.00% 261925270 2.00%
4744
SYS64 14975 3.00% 155727880 1.00%
10399
IRQ 4755 1.00% 8496162210 88.00%
1786784
HVC64 280 0.00% 19429900 0.00%
69392
IABT_LOW 1 0.00% 5850 0.00%
5850
cpuhp.parallel=1:
Event name Samples Sample% Time (ns) Time%
Mean Time (ns)
DABT_LOW 307923 77.00% 692965050 2.00%
2250
WFx 59549 15.00% 287888960 0.00%
4834
SYS64 15127 3.00% 334366230 1.00%
22103
IRQ 12861 3.00% 29784004970 95.00%
2315838
HVC64 280 0.00% 21869940 0.00%
78106
IABT_LOW 1 0.00% 9320 0.00%
9320
- Default (no patch): Slowest HVC64 handling (126 μs), highest WFx count
(85k), and most total VM‑exits.
- cpuhp.parallel=1: HVC64 latency improved to 78 μs (close to
cpuhp.parallel=0), but IRQ exits increased dramatically (12.9k, 2.7×
that of `cpuhp.parallel=0`), accounting for 95% of event time and
becoming the new bottleneck.
- cpuhp.parallel=0: Fastest HVC64 (69 μs), lowest IRQ exits (4.8k), and
lowest total samples, delivering the best overall boot performance.
Therefor, `cpuhp.parallel=1` reduces HVC cost but suffers from a massive
increase in IRQ exits, while `cpuhp.parallel=0` avoids this interrupt
storm and therefore performs best in a KVM guest.
>
> Michael
>
>>
>> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
>> ---
>> arch/arm64/Kconfig | 1 +
>> arch/arm64/include/asm/smp.h | 8 ++++++++
>> arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++
>> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++++++
>> 4 files changed, 59 insertions(+)
>>
>
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-15 9:57 ` Jinjie Ruan
@ 2026-06-18 15:53 ` Thomas Gleixner
-1 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2026-06-18 15:53 UTC (permalink / raw)
To: Jinjie Ruan, Michael Kelley, catalin.marinas@arm.com,
will@kernel.org, tsbogend@alpha.franken.de, pjw@kernel.org,
palmer@dabbelt.com, aou@eecs.berkeley.edu, alex@ghiti.fr,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
On Mon, Jun 15 2026 at 17:57, Jinjie Ruan wrote:
> On 6/12/2026 11:45 PM, Michael Kelley wrote:
>
> - Default (no patch): Slowest HVC64 handling (126 μs), highest WFx count
> (85k), and most total VM‑exits.
>
> - cpuhp.parallel=1: HVC64 latency improved to 78 μs (close to
> cpuhp.parallel=0), but IRQ exits increased dramatically (12.9k, 2.7×
> that of `cpuhp.parallel=0`), accounting for 95% of event time and
> becoming the new bottleneck.
>
> - cpuhp.parallel=0: Fastest HVC64 (69 μs), lowest IRQ exits (4.8k), and
> lowest total samples, delivering the best overall boot performance.
>
> Therefor, `cpuhp.parallel=1` reduces HVC cost but suffers from a massive
> increase in IRQ exits, while `cpuhp.parallel=0` avoids this interrupt
> storm and therefore performs best in a KVM guest.
What's the cause for having this massive interrupt exit rate in parallel
mode? Just because parallel does not give a useful explanation.
Thanks,
tglx
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-18 15:53 ` Thomas Gleixner
0 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2026-06-18 15:53 UTC (permalink / raw)
To: Jinjie Ruan, Michael Kelley, catalin.marinas@arm.com,
will@kernel.org, tsbogend@alpha.franken.de, pjw@kernel.org,
palmer@dabbelt.com, aou@eecs.berkeley.edu, alex@ghiti.fr,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
On Mon, Jun 15 2026 at 17:57, Jinjie Ruan wrote:
> On 6/12/2026 11:45 PM, Michael Kelley wrote:
>
> - Default (no patch): Slowest HVC64 handling (126 μs), highest WFx count
> (85k), and most total VM‑exits.
>
> - cpuhp.parallel=1: HVC64 latency improved to 78 μs (close to
> cpuhp.parallel=0), but IRQ exits increased dramatically (12.9k, 2.7×
> that of `cpuhp.parallel=0`), accounting for 95% of event time and
> becoming the new bottleneck.
>
> - cpuhp.parallel=0: Fastest HVC64 (69 μs), lowest IRQ exits (4.8k), and
> lowest total samples, delivering the best overall boot performance.
>
> Therefor, `cpuhp.parallel=1` reduces HVC cost but suffers from a massive
> increase in IRQ exits, while `cpuhp.parallel=0` avoids this interrupt
> storm and therefore performs best in a KVM guest.
What's the cause for having this massive interrupt exit rate in parallel
mode? Just because parallel does not give a useful explanation.
Thanks,
tglx
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-18 15:49 ` Thomas Gleixner
-1 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2026-06-18 15:49 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas, will, tsbogend, pjw, palmer, aou,
alex, mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw,
ojeda, ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
On Thu, Jun 11 2026 at 21:38, Jinjie Ruan wrote:
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -113,6 +113,7 @@ config ARM64
> select CPUMASK_OFFSTACK if NR_CPUS > 256
> select DCACHE_WORD_ACCESS
> select HAVE_EXTRA_IPI_TRACEPOINTS
> + select HOTPLUG_PARALLEL if SMP && HOTPLUG_CPU
Why do you tie that to HOTPLUG_CPU? HOTPLUG_CPU lets you unplug/plug
CPUs at runtime, but if its disabled then a SMP system still has to
bring up the APs. So why should that fall back to the existing variant?
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> +extern struct secondary_data cpu_boot_data[NR_CPUS];
> +#endif
> +
> extern struct secondary_data secondary_data;
> extern long __early_cpu_boot_status;
> extern void secondary_entry(void);
> @@ -124,7 +128,11 @@ static inline void __noreturn cpu_park_loop(void)
>
> static inline void update_cpu_boot_status(unsigned int cpu, int val)
> {
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> + WRITE_ONCE(cpu_boot_data[cpu].status, val);
> +#else
> WRITE_ONCE(secondary_data.status, val);
> +#endif
You're really a great fan of #ifdefs, right?
Just convert it over to the parallel mode unconditionally and get rid of
the existing cruft.
> /*
> * TTBR0 is only used for the identity mapping at this stage. Make it
> * point to zero page to avoid speculatively fetching new entries.
> @@ -254,7 +276,9 @@ asmlinkage notrace void secondary_start_kernel(void)
> read_cpuid_id());
> update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
> set_cpu_online(cpu, true);
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> complete(&cpu_running);
> +#endif
Just for the record. You can get rid of this completion w/o PARALLEL
hotplug by selecting HOTPLUG_SPLIT_STARTUP and implementing the
kick/sync parts.
Thanks,
tglx
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-18 15:49 ` Thomas Gleixner
0 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2026-06-18 15:49 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas, will, tsbogend, pjw, palmer, aou,
alex, mingo, bp, dave.hansen, hpa, peterz, kees, nathan, linusw,
ojeda, ruanjinjie, david.kaplan, lukas.bulwahn, ryan.roberts, maz,
timothy.hayes, lpieralisi, thuth, oupton, yeoreum.yun,
miko.lenczewski, broonie, kevin.brodsky, james.clark, tabba,
mrigendra.chaubey, arnd, anshuman.khandual, x86, linux-kernel,
linux-arm-kernel, linux-mips, linux-riscv
On Thu, Jun 11 2026 at 21:38, Jinjie Ruan wrote:
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -113,6 +113,7 @@ config ARM64
> select CPUMASK_OFFSTACK if NR_CPUS > 256
> select DCACHE_WORD_ACCESS
> select HAVE_EXTRA_IPI_TRACEPOINTS
> + select HOTPLUG_PARALLEL if SMP && HOTPLUG_CPU
Why do you tie that to HOTPLUG_CPU? HOTPLUG_CPU lets you unplug/plug
CPUs at runtime, but if its disabled then a SMP system still has to
bring up the APs. So why should that fall back to the existing variant?
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> +extern struct secondary_data cpu_boot_data[NR_CPUS];
> +#endif
> +
> extern struct secondary_data secondary_data;
> extern long __early_cpu_boot_status;
> extern void secondary_entry(void);
> @@ -124,7 +128,11 @@ static inline void __noreturn cpu_park_loop(void)
>
> static inline void update_cpu_boot_status(unsigned int cpu, int val)
> {
> +#ifdef CONFIG_HOTPLUG_PARALLEL
> + WRITE_ONCE(cpu_boot_data[cpu].status, val);
> +#else
> WRITE_ONCE(secondary_data.status, val);
> +#endif
You're really a great fan of #ifdefs, right?
Just convert it over to the parallel mode unconditionally and get rid of
the existing cruft.
> /*
> * TTBR0 is only used for the identity mapping at this stage. Make it
> * point to zero page to avoid speculatively fetching new entries.
> @@ -254,7 +276,9 @@ asmlinkage notrace void secondary_start_kernel(void)
> read_cpuid_id());
> update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
> set_cpu_online(cpu, true);
> +#ifndef CONFIG_HOTPLUG_PARALLEL
> complete(&cpu_running);
> +#endif
Just for the record. You can get rid of this completion w/o PARALLEL
hotplug by selecting HOTPLUG_SPLIT_STARTUP and implementing the
kick/sync parts.
Thanks,
tglx
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH RFC 0/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
2026-06-11 13:38 ` Jinjie Ruan
@ 2026-06-12 15:51 ` Michael Kelley
-1 siblings, 0 replies; 24+ messages in thread
From: Michael Kelley @ 2026-06-12 15:51 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>
> Support for parallel secondary CPU bringup is already utilized by x86,
> MIPS, and RISC-V. This patch brings this capability to the arm64
> architecture.
>
> Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to avoid primary SMT threads
> to boot first constraint.
>
> And Add a 'cpu' parameter to update_cpu_boot_status() to allow updating the
> boot status at a per-CPU granularity during parallel bringup.
>
> Rework the global `secondary_data` accessed during early boot into
> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
> enabling the early boot code in head.S to resolve each secondary CPU's
> logical ID concurrently.
I tested the series on an ARM64 VM running on Hyper-V in the Azure cloud.
Tested with 16 vCPUs in the VM and with 96 vCPUs in the VM. No issues.
I mainly wanted to make sure nothing expected happened with Hyper-V as
the host.
With 96 vCPUs, the secondary CPU startup time drops from ~140 milliseconds
to ~130 milliseconds. That improvement is not as dramatic as you saw on
QEMU, so I presume the difference is due to the hypervisor implementation
of the PSCI calls.
Tested-by: Michael Kelley <mhklinux@outlook.com>
>
> Jinjie Ruan (3):
> cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
> arm64: smp: Pass CPU ID to update_cpu_boot_status()
> arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
>
> arch/Kconfig | 4 ++++
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/smp.h | 14 +++++++++++---
> arch/arm64/kernel/cpufeature.c | 2 +-
> arch/arm64/kernel/head.S | 23 ++++++++++++++++++++++
> arch/arm64/kernel/smp.c | 35 ++++++++++++++++++++++++++++++----
> arch/arm64/mm/context.c | 2 +-
> arch/mips/Kconfig | 1 +
> arch/riscv/Kconfig | 1 +
> arch/x86/Kconfig | 1 +
> kernel/cpu.c | 6 +++++-
> 11 files changed, 80 insertions(+), 10 deletions(-)
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* RE: [PATCH RFC 0/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
@ 2026-06-12 15:51 ` Michael Kelley
0 siblings, 0 replies; 24+ messages in thread
From: Michael Kelley @ 2026-06-12 15:51 UTC (permalink / raw)
To: Jinjie Ruan, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, pjw@kernel.org, palmer@dabbelt.com,
aou@eecs.berkeley.edu, alex@ghiti.fr, tglx@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, peterz@infradead.org, kees@kernel.org,
nathan@kernel.org, linusw@kernel.org, ojeda@kernel.org,
david.kaplan@amd.com, lukas.bulwahn@redhat.com,
ryan.roberts@arm.com, maz@kernel.org, timothy.hayes@arm.com,
lpieralisi@kernel.org, thuth@redhat.com, oupton@kernel.org,
yeoreum.yun@arm.com, miko.lenczewski@arm.com, broonie@kernel.org,
kevin.brodsky@arm.com, james.clark@linaro.org, tabba@google.com,
mrigendra.chaubey@gmail.com, arnd@arndb.de,
anshuman.khandual@arm.com, x86@kernel.org,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mips@vger.kernel.org,
linux-riscv@lists.infradead.org
From: Jinjie Ruan <ruanjinjie@huawei.com> Sent: Thursday, June 11, 2026 6:38 AM
>
> Support for parallel secondary CPU bringup is already utilized by x86,
> MIPS, and RISC-V. This patch brings this capability to the arm64
> architecture.
>
> Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST to avoid primary SMT threads
> to boot first constraint.
>
> And Add a 'cpu' parameter to update_cpu_boot_status() to allow updating the
> boot status at a per-CPU granularity during parallel bringup.
>
> Rework the global `secondary_data` accessed during early boot into
> a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
> enabling the early boot code in head.S to resolve each secondary CPU's
> logical ID concurrently.
I tested the series on an ARM64 VM running on Hyper-V in the Azure cloud.
Tested with 16 vCPUs in the VM and with 96 vCPUs in the VM. No issues.
I mainly wanted to make sure nothing expected happened with Hyper-V as
the host.
With 96 vCPUs, the secondary CPU startup time drops from ~140 milliseconds
to ~130 milliseconds. That improvement is not as dramatic as you saw on
QEMU, so I presume the difference is due to the hypervisor implementation
of the PSCI calls.
Tested-by: Michael Kelley <mhklinux@outlook.com>
>
> Jinjie Ruan (3):
> cpu/hotplug: Introduce CONFIG_PARALLEL_SMT_PRIMARY_FIRST
> arm64: smp: Pass CPU ID to update_cpu_boot_status()
> arm64: Add HOTPLUG_PARALLEL support for secondary CPUs
>
> arch/Kconfig | 4 ++++
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/smp.h | 14 +++++++++++---
> arch/arm64/kernel/cpufeature.c | 2 +-
> arch/arm64/kernel/head.S | 23 ++++++++++++++++++++++
> arch/arm64/kernel/smp.c | 35 ++++++++++++++++++++++++++++++----
> arch/arm64/mm/context.c | 2 +-
> arch/mips/Kconfig | 1 +
> arch/riscv/Kconfig | 1 +
> arch/x86/Kconfig | 1 +
> kernel/cpu.c | 6 +++++-
> 11 files changed, 80 insertions(+), 10 deletions(-)
>
> --
> 2.34.1
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 24+ messages in thread