* [PATCHv5 1/3] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt
2023-09-08 14:19 [PATCHv5 0/3] enable nr_cpus for powerpc Pingfan Liu
@ 2023-09-08 14:19 ` Pingfan Liu
2023-09-08 15:47 ` kernel test robot
2023-09-08 14:19 ` [PATCHv5 2/3] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus Pingfan Liu
2023-09-08 14:19 ` [PATCHv5 3/3] powerpc/setup: alloc extra paca_ptrs to hold boot_cpuid Pingfan Liu
2 siblings, 1 reply; 5+ messages in thread
From: Pingfan Liu @ 2023-09-08 14:19 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Wen Xiong, Nicholas Piggin
*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.
*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.
*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).
Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/prom.c | 23 ++++----
arch/powerpc/kernel/setup-common.c | 87 +++++++++++++++++++++++-------
2 files changed, 83 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 0b5878c3125b..72be75d4f003 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -331,8 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
const __be32 *intserv;
int i, nthreads;
int len;
- int found = -1;
- int found_thread = 0;
+ bool found = false;
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -355,8 +354,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
for (i = 0; i < nthreads; i++) {
if (be32_to_cpu(intserv[i]) ==
fdt_boot_cpuid_phys(initial_boot_params)) {
- found = boot_cpu_count;
- found_thread = i;
+ /*
+ * always map the boot-cpu logical id into the
+ * range of [0, thread_per_core)
+ */
+ boot_cpuid = i;
+ found = true;
+ /* This works around the hole in paca_ptrs[]. */
+ if (nr_cpu_ids < nthreads)
+ nr_cpu_ids = nthreads;
}
#ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
@@ -365,15 +371,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
}
/* Not the boot CPU */
- if (found < 0)
+ if (!found)
return 0;
- DBG("boot cpu: logical %d physical %d\n", found,
- be32_to_cpu(intserv[found_thread]));
- boot_cpuid = found;
+ DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
+ be32_to_cpu(intserv[boot_cpuid]));
if (IS_ENABLED(CONFIG_PPC64))
- boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+ boot_cpu_hwid = be32_to_cpu(intserv[boot_cpuid]);
/*
* PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d2a446216444..a07af8de6674 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -36,6 +36,7 @@
#include <linux/of_platform.h>
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
+#include <linux/list.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/processor.h>
@@ -427,6 +428,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
u32 *cpu_to_phys_id = NULL;
+struct interrupt_server_node {
+ struct list_head node;
+ bool avail;
+ int len;
+ __be32 *intserv;
+};
+
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
@@ -448,11 +456,16 @@ u32 *cpu_to_phys_id = NULL;
void __init smp_setup_cpu_maps(void)
{
struct device_node *dn;
- int cpu = 0;
- int nthreads = 1;
+ int shift = 0, cpu = 0;
+ int j, nthreads = 1;
+ int len;
+ struct interrupt_server_node *intserv_node, *n;
+ struct list_head *bt_node, head;
+ bool avail, found_boot_cpu = false;
DBG("smp_setup_cpu_maps()\n");
+ INIT_LIST_HEAD(&head);
cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
__alignof__(u32));
if (!cpu_to_phys_id)
@@ -462,7 +475,6 @@ void __init smp_setup_cpu_maps(void)
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
- int j, len;
DBG(" * %pOF...\n", dn);
@@ -482,29 +494,68 @@ void __init smp_setup_cpu_maps(void)
}
}
- nthreads = len / sizeof(int);
-
- for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
- bool avail;
+ avail = of_device_is_available(dn);
+ if (!avail)
+ avail = !of_property_match_string(dn,
+ "enable-method", "spin-table");
- DBG(" thread %d -> cpu %d (hard id %d)\n",
- j, cpu, be32_to_cpu(intserv[j]));
- avail = of_device_is_available(dn);
- if (!avail)
- avail = !of_property_match_string(dn,
- "enable-method", "spin-table");
+ intserv_node = memblock_alloc(sizeof(struct interrupt_server_node) + len,
+ __alignof__(u32));
+ if (!intserv_node)
+ panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
+ __func__,
+ sizeof(struct interrupt_server_node) + len,
+ __alignof__(u32));
+ intserv_node->intserv = (__be32 *)((char *)intserv_node +
+ sizeof(struct interrupt_server_node));
+ intserv_node->len = len;
+ memcpy(intserv_node->intserv, intserv, len);
+ intserv_node->avail = avail;
+ INIT_LIST_HEAD(&intserv_node->node);
+ list_add_tail(&intserv_node->node, &head);
+
+ if (!found_boot_cpu) {
+ nthreads = len / sizeof(int);
+ for (j = 0 ; j < nthreads; j++) {
+ if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
+ bt_node = &intserv_node->node;
+ found_boot_cpu = true;
+ /*
+ * Record the round-shift between dt
+ * seq and cpu logical number
+ */
+ shift = cpu - j;
+ break;
+ }
+
+ cpu++;
+ }
+ }
+ }
+ cpu = 0;
+ list_del_init(&head);
+ /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
+ list_add_tail(&head, bt_node);
+ pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
+ list_for_each_entry(intserv_node, &head, node) {
+
+ avail = intserv_node->avail;
+ nthreads = intserv_node->len / sizeof(int);
+ for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
set_cpu_present(cpu, avail);
set_cpu_possible(cpu, true);
- cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
+ cpu_to_phys_id[cpu] = be32_to_cpu(intserv_node->intserv[j]);
+ DBG(" thread %d -> cpu %d (hard id %d)\n",
+ j, cpu, be32_to_cpu(intserv[j]));
cpu++;
}
+ }
- if (cpu >= nr_cpu_ids) {
- of_node_put(dn);
- break;
- }
+ list_for_each_entry_safe(intserv_node, n, &head, node) {
+ len = sizeof(struct interrupt_server_node) + intserv_node->len;
+ memblock_free(intserv_node, len);
}
/* If no SMT supported, nthreads is forced to 1 */
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCHv5 2/3] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
2023-09-08 14:19 [PATCHv5 0/3] enable nr_cpus for powerpc Pingfan Liu
2023-09-08 14:19 ` [PATCHv5 1/3] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt Pingfan Liu
@ 2023-09-08 14:19 ` Pingfan Liu
2023-09-08 14:19 ` [PATCHv5 3/3] powerpc/setup: alloc extra paca_ptrs to hold boot_cpuid Pingfan Liu
2 siblings, 0 replies; 5+ messages in thread
From: Pingfan Liu @ 2023-09-08 14:19 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Wen Xiong, Nicholas Piggin
If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
ensure the boot_cpu is in cpu_present_mask. This can be achieved by
reserving the last quota for the boot cpu.
Note: the restriction on nr_cpus will be lifted with more effort in the
next patch
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index a07af8de6674..58a988c64dd2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -456,8 +456,8 @@ struct interrupt_server_node {
void __init smp_setup_cpu_maps(void)
{
struct device_node *dn;
- int shift = 0, cpu = 0;
- int j, nthreads = 1;
+ int terminate, shift = 0, cpu = 0;
+ int j, bt_thread = 0, nthreads = 1;
int len;
struct interrupt_server_node *intserv_node, *n;
struct list_head *bt_node, head;
@@ -520,6 +520,7 @@ void __init smp_setup_cpu_maps(void)
for (j = 0 ; j < nthreads; j++) {
if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
bt_node = &intserv_node->node;
+ bt_thread = j;
found_boot_cpu = true;
/*
* Record the round-shift between dt
@@ -539,11 +540,21 @@ void __init smp_setup_cpu_maps(void)
/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
list_add_tail(&head, bt_node);
pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
+ terminate = nr_cpu_ids;
list_for_each_entry(intserv_node, &head, node) {
+ j = 0;
+ /* Choose a start point to cover the boot cpu */
+ if (nr_cpu_ids - 1 < bt_thread) {
+ /*
+ * The processor core puts assumption on the thread id,
+ * not to breach the assumption.
+ */
+ terminate = nr_cpu_ids - 1;
+ }
avail = intserv_node->avail;
nthreads = intserv_node->len / sizeof(int);
- for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+ for (; j < nthreads && cpu < terminate; j++) {
set_cpu_present(cpu, avail);
set_cpu_possible(cpu, true);
cpu_to_phys_id[cpu] = be32_to_cpu(intserv_node->intserv[j]);
@@ -551,6 +562,14 @@ void __init smp_setup_cpu_maps(void)
j, cpu, be32_to_cpu(intserv[j]));
cpu++;
}
+ /* Online the boot cpu */
+ if (nr_cpu_ids - 1 < bt_thread) {
+ set_cpu_present(bt_thread, avail);
+ set_cpu_possible(bt_thread, true);
+ cpu_to_phys_id[bt_thread] = be32_to_cpu(intserv_node->intserv[bt_thread]);
+ DBG(" thread %d -> cpu %d (hard id %d)\n",
+ bt_thread, bt_thread, be32_to_cpu(intserv[bt_thread]));
+ }
}
list_for_each_entry_safe(intserv_node, n, &head, node) {
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCHv5 3/3] powerpc/setup: alloc extra paca_ptrs to hold boot_cpuid
2023-09-08 14:19 [PATCHv5 0/3] enable nr_cpus for powerpc Pingfan Liu
2023-09-08 14:19 ` [PATCHv5 1/3] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt Pingfan Liu
2023-09-08 14:19 ` [PATCHv5 2/3] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus Pingfan Liu
@ 2023-09-08 14:19 ` Pingfan Liu
2 siblings, 0 replies; 5+ messages in thread
From: Pingfan Liu @ 2023-09-08 14:19 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Wen Xiong, Nicholas Piggin
paca_ptrs should be large enough to hold the boot_cpuid, hence, its
lower boundary is set to the bigger one between boot_cpuid+1 and
nr_cpus.
On the other hand, some kernel component: -1. the timer assumes cpu0
online since the timer_list->flags subfield 'TIMER_CPUMASK' is zero if
not initialized to a proper present cpu. -2. power9_idle_stop() assumes
the primary thread's paca is allocated.
Hence lift nr_cpu_ids from one to two to ensure cpu0 is onlined, if the
boot cpu is not cpu0.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/paca.c | 10 ++++++----
arch/powerpc/kernel/prom.c | 9 ++++++---
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index cda4e00b67c1..91e2401de1bd 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -242,9 +242,10 @@ static int __initdata paca_struct_size;
void __init allocate_paca_ptrs(void)
{
- paca_nr_cpu_ids = nr_cpu_ids;
+ int n = (boot_cpuid + 1) > nr_cpu_ids ? (boot_cpuid + 1) : nr_cpu_ids;
- paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ paca_nr_cpu_ids = n;
+ paca_ptrs_size = sizeof(struct paca_struct *) * n;
paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
if (!paca_ptrs)
panic("Failed to allocate %d bytes for paca pointers\n",
@@ -287,13 +288,14 @@ void __init allocate_paca(int cpu)
void __init free_unused_pacas(void)
{
int new_ptrs_size;
+ int n = (boot_cpuid + 1) > nr_cpu_ids ? (boot_cpuid + 1) : nr_cpu_ids;
- new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ new_ptrs_size = sizeof(struct paca_struct *) * n;
if (new_ptrs_size < paca_ptrs_size)
memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
paca_ptrs_size - new_ptrs_size);
- paca_nr_cpu_ids = nr_cpu_ids;
+ paca_nr_cpu_ids = n;
paca_ptrs_size = new_ptrs_size;
#ifdef CONFIG_PPC_64S_HASH_MMU
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 72be75d4f003..eca6a1568749 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -360,9 +360,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
*/
boot_cpuid = i;
found = true;
- /* This works around the hole in paca_ptrs[]. */
- if (nr_cpu_ids < nthreads)
- nr_cpu_ids = nthreads;
+ /*
+ * Ideally, nr_cpus=1 can be achieved if each kernel
+ * component does not assume cpu0 is onlined.
+ */
+ if (boot_cpuid != 0 && nr_cpu_ids < 2)
+ nr_cpu_ids = 2;
}
#ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread