* [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-09 11:30 [PATCHv8 0/5] enable nr_cpus for powerpc Pingfan Liu
@ 2023-10-09 11:30 ` Pingfan Liu
2023-10-10 4:44 ` Sourabh Jain
2023-10-09 11:30 ` [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt Pingfan Liu
` (3 subsequent siblings)
4 siblings, 1 reply; 19+ messages in thread
From: Pingfan Liu @ 2023-10-09 11:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Nicholas Piggin, Wen Xiong
In order to identify the boot cpu, its intserv[] should be recorded and
checked in smp_setup_cpu_maps().
smp_setup_cpu_maps() is shared between PPC64 and PPC32. Since PPC64 has
already used boot_cpu_hwid to carry that information, enabling this
variable on PPC32 so later it can also be used to carry that information
for PPC32 in the coming patch.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@us.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/include/asm/smp.h | 2 +-
arch/powerpc/kernel/prom.c | 3 +--
arch/powerpc/kernel/setup-common.c | 2 --
3 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index aaaa576d0e15..5db9178cc800 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -26,7 +26,7 @@
#include <asm/percpu.h>
extern int boot_cpuid;
-extern int boot_cpu_hwid; /* PPC64 only */
+extern int boot_cpu_hwid;
extern int spinning_secondaries;
extern u32 *cpu_to_phys_id;
extern bool coregroup_enabled;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 0b5878c3125b..ec82f5bda908 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -372,8 +372,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
- if (IS_ENABLED(CONFIG_PPC64))
- boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+ boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
/*
* PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d2a446216444..1b19a9815672 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -87,9 +87,7 @@ EXPORT_SYMBOL(machine_id);
int boot_cpuid = -1;
EXPORT_SYMBOL_GPL(boot_cpuid);
-#ifdef CONFIG_PPC64
int boot_cpu_hwid = -1;
-#endif
/*
* These are used in binfmt_elf.c to put aux entries on the stack
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-09 11:30 ` [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32 Pingfan Liu
@ 2023-10-10 4:44 ` Sourabh Jain
2023-10-10 8:24 ` Sourabh Jain
2023-10-10 9:08 ` Sourabh Jain
0 siblings, 2 replies; 19+ messages in thread
From: Sourabh Jain @ 2023-10-10 4:44 UTC (permalink / raw)
To: Pingfan Liu, linuxppc-dev
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong
Hello Pingfan,
With this patch series applied, the kdump kernel fails to boot on
powerpc with nr_cpus=1.
Console logs:
-------------------
[root]# echo c > /proc/sysrq-trigger
[ 74.783235] sysrq: Trigger a crash
[ 74.783244] Kernel panic - not syncing: sysrq triggered crash
[ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
6.6.0-rc5pf-nr-cpus+ #3
[ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
[ 74.783275] Call Trace:
[ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
dump_stack_lvl+0x6c/0x9c (unreliable)
[ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
[ 74.783298] [c00000020f4ebb90] [c000000000936d48]
sysrq_handle_crash+0x28/0x30
[ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
__handle_sysrq+0x10c/0x250
[ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
write_sysrq_trigger+0xc8/0x168
[ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
proc_reg_write+0x10c/0x1b0
[ 74.783321] [c00000020f4ebd00] [c00000000058da54] vfs_write+0x104/0x4b0
[ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc] ksys_write+0x7c/0x140
[ 74.783331] [c00000020f4ebe10] [c000000000033a64]
system_call_exception+0x144/0x3a0
[ 74.783337] [c00000020f4ebe50] [c00000000000c554]
system_call_common+0xf4/0x258
[ 74.783343] --- interrupt: c00 at 0x7fffa0721594
[ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
0000000000000000
[ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
(6.6.0-rc5pf-nr-cpus+)
[ 74.783376] MSR: 800000000280f033
<SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
[ 74.783394] IRQMASK: 0
[ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800 00007fffa0807300
0000000000000001
[ 74.783394] GPR04: 000000013549ea60 0000000000000002 0000000000000010
0000000000000000
[ 74.783394] GPR08: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70 0000000040000000
000000011a0f9798
[ 74.783394] GPR16: 000000011a0f9724 000000011a097688 000000011a02ff70
000000011a0fd568
[ 74.783394] GPR20: 0000000135554bf0 0000000000000001 000000011a0aa478
00007ffffc4b6a24
[ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94 0000000000000002
000000013549ea60
[ 74.783394] GPR28: 0000000000000002 00007fffa08017a0 000000013549ea60
0000000000000002
[ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
[ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
[ 74.783447] --- interrupt: c00
I'm in purgatory
[ 0.000000] radix-mmu: Page sizes from device-tree:
[ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
[ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
[ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
[ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
[ 0.000000] Activating Kernel Userspace Access Prevention
[ 0.000000] Activating Kernel Userspace Execution Prevention
[ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
with 64.0 KiB pages (exec)
[ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
with 64.0 KiB pages
[ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
with 2.00 MiB pages
[ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
with 2.00 MiB pages (exec)
[ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
with 2.00 MiB pages
[ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
with 1.00 GiB pages
[ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
with 2.00 MiB pages
[ 0.000000] lpar: Using radix MMU under hypervisor
[ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
(root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514 (Red
Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
41 CDT 2023
[ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
[ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
[ 0.000000] printk: bootconsole [udbg0] enabled
[ 0.000000] the round shift between dt seq and the cpu logic number: 56
[ 0.000000] BUG: Unable to handle kernel data access on write at
0xc0000001a0000000
[ 0.000000] Faulting instruction address: 0xc000000022009c64
[ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
[ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
[ 0.000000] Modules linked in:
[ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
6.6.0-rc5pf-nr-cpus+ #3
[ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
[ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
c0000000201ff348
[ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
(6.6.0-rc5pf-nr-cpus+)
[ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR: 28222824
XER: 00000001
[ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
42000000 IRQMASK: 1
[ 0.000000] GPR00: c000000022009ba0 c000000022aebda0 c0000000213d1300
0000000000000004
[ 0.000000] GPR04: 0000000000000001 c000000022aebbc0 c000000022aebbb8
0000000000000000
[ 0.000000] GPR08: 0000000000000001 c00000019ffffff8 000000000000003a
c0000000229c8a78
[ 0.000000] GPR12: 0000000000002000 c000000022e4a800 c0000000211d34b8
c0000000211d3aa8
[ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0 c0000000225f3b98
0000000000000000
[ 0.000000] GPR20: 0000000000000001 0000000000000001 0000000000000001
0000000000000001
[ 0.000000] GPR24: 0000000000000008 0000000000000000 0000000000000001
c00000019ffffdc0
[ 0.000000] GPR28: 0000000000000002 c000000022b368e0 c000000022aebe08
0000000000000008
[ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
[ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
[ 0.000000] Call Trace:
[ 0.000000] [c000000022aebda0] [c000000022009ba0]
smp_setup_cpu_maps+0x35c/0x724 (unreliable)
[ 0.000000] [c000000022aebeb0] [c00000002200a19c] setup_arch+0x1b8/0x54c
[ 0.000000] [c000000022aebf30] [c000000022003f88] start_kernel+0xb0/0x768
[ 0.000000] [c000000022aebfe0] [c00000002000d888]
start_here_common+0x1c/0x20
[ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
<7d49c12e> eb7b0000 7e99a378 4bffff3c
[ 0.000000] ---[ end trace 0000000000000000 ]---
[ 0.000000]
[ 0.000000] Kernel panic - not syncing: Fatal exception
[ 0.000000] Rebooting in 180 seconds..
However, the kdump kernel boots fine if the kernel crashes on CPU 0.
Thanks,
Sourabh Jain
On 09/10/23 17:00, Pingfan Liu wrote:
> In order to identify the boot cpu, its intserv[] should be recorded and
> checked in smp_setup_cpu_maps().
>
> smp_setup_cpu_maps() is shared between PPC64 and PPC32. Since PPC64 has
> already used boot_cpu_hwid to carry that information, enabling this
> variable on PPC32 so later it can also be used to carry that information
> for PPC32 in the coming patch.
>
> Signed-off-by: Pingfan Liu <piliu@redhat.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Nicholas Piggin <npiggin@gmail.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Cc: Wen Xiong <wenxiong@us.ibm.com>
> Cc: Baoquan He <bhe@redhat.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: kexec@lists.infradead.org
> To: linuxppc-dev@lists.ozlabs.org
> ---
> arch/powerpc/include/asm/smp.h | 2 +-
> arch/powerpc/kernel/prom.c | 3 +--
> arch/powerpc/kernel/setup-common.c | 2 --
> 3 files changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index aaaa576d0e15..5db9178cc800 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -26,7 +26,7 @@
> #include <asm/percpu.h>
>
> extern int boot_cpuid;
> -extern int boot_cpu_hwid; /* PPC64 only */
> +extern int boot_cpu_hwid;
> extern int spinning_secondaries;
> extern u32 *cpu_to_phys_id;
> extern bool coregroup_enabled;
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index 0b5878c3125b..ec82f5bda908 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -372,8 +372,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> be32_to_cpu(intserv[found_thread]));
> boot_cpuid = found;
>
> - if (IS_ENABLED(CONFIG_PPC64))
> - boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
> + boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
>
> /*
> * PAPR defines "logical" PVR values for cpus that
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index d2a446216444..1b19a9815672 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -87,9 +87,7 @@ EXPORT_SYMBOL(machine_id);
> int boot_cpuid = -1;
> EXPORT_SYMBOL_GPL(boot_cpuid);
>
> -#ifdef CONFIG_PPC64
> int boot_cpu_hwid = -1;
> -#endif
>
> /*
> * These are used in binfmt_elf.c to put aux entries on the stack
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-10 4:44 ` Sourabh Jain
@ 2023-10-10 8:24 ` Sourabh Jain
2023-10-10 9:08 ` Sourabh Jain
1 sibling, 0 replies; 19+ messages in thread
From: Sourabh Jain @ 2023-10-10 8:24 UTC (permalink / raw)
To: Pingfan Liu, linuxppc-dev
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong
Hello Pingfan,
>
> With this patch series applied, the kdump kernel fails to boot on
> powerpc with nr_cpus=1.
>
> Console logs:
> -------------------
> [root]# echo c > /proc/sysrq-trigger
> [ 74.783235] sysrq: Trigger a crash
> [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
> [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
> 6.6.0-rc5pf-nr-cpus+ #3
> [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
> [ 74.783275] Call Trace:
> [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
> dump_stack_lvl+0x6c/0x9c (unreliable)
> [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
> [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
> sysrq_handle_crash+0x28/0x30
> [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
> __handle_sysrq+0x10c/0x250
> [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
> write_sysrq_trigger+0xc8/0x168
> [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
> proc_reg_write+0x10c/0x1b0
> [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
> vfs_write+0x104/0x4b0
> [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
> ksys_write+0x7c/0x140
> [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
> system_call_exception+0x144/0x3a0
> [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
> system_call_common+0xf4/0x258
> [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
> [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
> 0000000000000000
> [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
> (6.6.0-rc5pf-nr-cpus+)
> [ 74.783376] MSR: 800000000280f033
> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> [ 74.783394] IRQMASK: 0
> [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800
> 00007fffa0807300 0000000000000001
> [ 74.783394] GPR04: 000000013549ea60 0000000000000002
> 0000000000000010 0000000000000000
> [ 74.783394] GPR08: 0000000000000000 0000000000000000
> 0000000000000000 0000000000000000
> [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70
> 0000000040000000 000000011a0f9798
> [ 74.783394] GPR16: 000000011a0f9724 000000011a097688
> 000000011a02ff70 000000011a0fd568
> [ 74.783394] GPR20: 0000000135554bf0 0000000000000001
> 000000011a0aa478 00007ffffc4b6a24
> [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94
> 0000000000000002 000000013549ea60
> [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0
> 000000013549ea60 0000000000000002
> [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
> [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
> [ 74.783447] --- interrupt: c00
> I'm in purgatory
> [ 0.000000] radix-mmu: Page sizes from device-tree:
> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> [ 0.000000] Activating Kernel Userspace Access Prevention
> [ 0.000000] Activating Kernel Userspace Execution Prevention
> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> with 64.0 KiB pages (exec)
> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> with 64.0 KiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> with 2.00 MiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> with 2.00 MiB pages (exec)
> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> with 2.00 MiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
> with 1.00 GiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
> with 2.00 MiB pages
> [ 0.000000] lpar: Using radix MMU under hypervisor
> [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
> (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514
> (Red Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
> 41 CDT 2023
> [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
> [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
> 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
> [ 0.000000] printk: bootconsole [udbg0] enabled
> [ 0.000000] the round shift between dt seq and the cpu logic
> number: 56
> [ 0.000000] BUG: Unable to handle kernel data access on write at
> 0xc0000001a0000000
> [ 0.000000] Faulting instruction address: 0xc000000022009c64
> [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
> [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> [ 0.000000] Modules linked in:
> [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
> 6.6.0-rc5pf-nr-cpus+ #3
> [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
> [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
> c0000000201ff348
> [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
> (6.6.0-rc5pf-nr-cpus+)
> [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR:
> 28222824 XER: 00000001
> [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
> 42000000 IRQMASK: 1
> [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0
> c0000000213d1300 0000000000000004
> [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0
> c000000022aebbb8 0000000000000000
> [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8
> 000000000000003a c0000000229c8a78
> [ 0.000000] GPR12: 0000000000002000 c000000022e4a800
> c0000000211d34b8 c0000000211d3aa8
> [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0
> c0000000225f3b98 0000000000000000
> [ 0.000000] GPR20: 0000000000000001 0000000000000001
> 0000000000000001 0000000000000001
> [ 0.000000] GPR24: 0000000000000008 0000000000000000
> 0000000000000001 c00000019ffffdc0
> [ 0.000000] GPR28: 0000000000000002 c000000022b368e0
> c000000022aebe08 0000000000000008
> [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
> [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
> [ 0.000000] Call Trace:
> [ 0.000000] [c000000022aebda0] [c000000022009ba0]
> smp_setup_cpu_maps+0x35c/0x724 (unreliable)
> [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
> setup_arch+0x1b8/0x54c
> [ 0.000000] [c000000022aebf30] [c000000022003f88]
> start_kernel+0xb0/0x768
> [ 0.000000] [c000000022aebfe0] [c00000002000d888]
> start_here_common+0x1c/0x20
> [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
> 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
> <7d49c12e> eb7b0000 7e99a378 4bffff3c
> [ 0.000000] ---[ end trace 0000000000000000 ]---
> [ 0.000000]
> [ 0.000000] Kernel panic - not syncing: Fatal exception
> [ 0.000000] Rebooting in 180 seconds..
>
> However, the kdump kernel boots fine if the kernel crashes on CPU 0.
Found a pattern in kdump kernel failure with nr_cpus=1.
On CPU 0, 8, 16, 24, 32, 40, it boots fine.
On CPUs 1-7, 9-15, 17-23, 25-31, 33-39, it fails to boot.
Hope this helps.
Thanks,
Sourabh
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-10 4:44 ` Sourabh Jain
2023-10-10 8:24 ` Sourabh Jain
@ 2023-10-10 9:08 ` Sourabh Jain
2023-10-11 2:30 ` Pingfan Liu
1 sibling, 1 reply; 19+ messages in thread
From: Sourabh Jain @ 2023-10-10 9:08 UTC (permalink / raw)
To: Pingfan Liu, linuxppc-dev
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong
Hello Pingfan,
>
> With this patch series applied, the kdump kernel fails to boot on
> powerpc with nr_cpus=1.
>
> Console logs:
> -------------------
> [root]# echo c > /proc/sysrq-trigger
> [ 74.783235] sysrq: Trigger a crash
> [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
> [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
> 6.6.0-rc5pf-nr-cpus+ #3
> [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
> [ 74.783275] Call Trace:
> [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
> dump_stack_lvl+0x6c/0x9c (unreliable)
> [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
> [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
> sysrq_handle_crash+0x28/0x30
> [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
> __handle_sysrq+0x10c/0x250
> [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
> write_sysrq_trigger+0xc8/0x168
> [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
> proc_reg_write+0x10c/0x1b0
> [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
> vfs_write+0x104/0x4b0
> [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
> ksys_write+0x7c/0x140
> [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
> system_call_exception+0x144/0x3a0
> [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
> system_call_common+0xf4/0x258
> [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
> [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
> 0000000000000000
> [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
> (6.6.0-rc5pf-nr-cpus+)
> [ 74.783376] MSR: 800000000280f033
> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> [ 74.783394] IRQMASK: 0
> [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800
> 00007fffa0807300 0000000000000001
> [ 74.783394] GPR04: 000000013549ea60 0000000000000002
> 0000000000000010 0000000000000000
> [ 74.783394] GPR08: 0000000000000000 0000000000000000
> 0000000000000000 0000000000000000
> [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70
> 0000000040000000 000000011a0f9798
> [ 74.783394] GPR16: 000000011a0f9724 000000011a097688
> 000000011a02ff70 000000011a0fd568
> [ 74.783394] GPR20: 0000000135554bf0 0000000000000001
> 000000011a0aa478 00007ffffc4b6a24
> [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94
> 0000000000000002 000000013549ea60
> [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0
> 000000013549ea60 0000000000000002
> [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
> [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
> [ 74.783447] --- interrupt: c00
> I'm in purgatory
> [ 0.000000] radix-mmu: Page sizes from device-tree:
> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> [ 0.000000] Activating Kernel Userspace Access Prevention
> [ 0.000000] Activating Kernel Userspace Execution Prevention
> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> with 64.0 KiB pages (exec)
> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> with 64.0 KiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> with 2.00 MiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> with 2.00 MiB pages (exec)
> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> with 2.00 MiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
> with 1.00 GiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
> with 2.00 MiB pages
> [ 0.000000] lpar: Using radix MMU under hypervisor
> [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
> (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514
> (Red Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
> 41 CDT 2023
> [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
> [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
> 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
> [ 0.000000] printk: bootconsole [udbg0] enabled
> [ 0.000000] the round shift between dt seq and the cpu logic
> number: 56
> [ 0.000000] BUG: Unable to handle kernel data access on write at
> 0xc0000001a0000000
> [ 0.000000] Faulting instruction address: 0xc000000022009c64
> [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
> [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> [ 0.000000] Modules linked in:
> [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
> 6.6.0-rc5pf-nr-cpus+ #3
> [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
> [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
> c0000000201ff348
> [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
> (6.6.0-rc5pf-nr-cpus+)
> [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR:
> 28222824 XER: 00000001
> [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
> 42000000 IRQMASK: 1
> [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0
> c0000000213d1300 0000000000000004
> [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0
> c000000022aebbb8 0000000000000000
> [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8
> 000000000000003a c0000000229c8a78
> [ 0.000000] GPR12: 0000000000002000 c000000022e4a800
> c0000000211d34b8 c0000000211d3aa8
> [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0
> c0000000225f3b98 0000000000000000
> [ 0.000000] GPR20: 0000000000000001 0000000000000001
> 0000000000000001 0000000000000001
> [ 0.000000] GPR24: 0000000000000008 0000000000000000
> 0000000000000001 c00000019ffffdc0
> [ 0.000000] GPR28: 0000000000000002 c000000022b368e0
> c000000022aebe08 0000000000000008
> [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
> [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
> [ 0.000000] Call Trace:
> [ 0.000000] [c000000022aebda0] [c000000022009ba0]
> smp_setup_cpu_maps+0x35c/0x724 (unreliable)
> [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
> setup_arch+0x1b8/0x54c
> [ 0.000000] [c000000022aebf30] [c000000022003f88]
> start_kernel+0xb0/0x768
> [ 0.000000] [c000000022aebfe0] [c00000002000d888]
> start_here_common+0x1c/0x20
> [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
> 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
> <7d49c12e> eb7b0000 7e99a378 4bffff3c
The faulting instruction address, 0xc000000022009c6, corresponds to the
code below:
File:
arch/powerpc/kernel/setup-common.c
Function
void __init smp_setup_cpu_maps(void)
{
...
cpu_to_phys_id[bt_thread] =
be32_to_cpu(intserv_node->intserv[bt_thread]);
...
}
Hope it helps.
Thanks,
Sourabh Jain
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-10 9:08 ` Sourabh Jain
@ 2023-10-11 2:30 ` Pingfan Liu
2023-10-11 10:53 ` Sourabh Jain
0 siblings, 1 reply; 19+ messages in thread
From: Pingfan Liu @ 2023-10-11 2:30 UTC (permalink / raw)
To: Sourabh Jain
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
linuxppc-dev, Wen Xiong
On Tue, Oct 10, 2023 at 02:38:40PM +0530, Sourabh Jain wrote:
> Hello Pingfan,
>
> >
> > With this patch series applied, the kdump kernel fails to boot on
> > powerpc with nr_cpus=1.
> >
> > Console logs:
> > -------------------
> > [root]# echo c > /proc/sysrq-trigger
> > [ 74.783235] sysrq: Trigger a crash
> > [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
> > [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
> > 6.6.0-rc5pf-nr-cpus+ #3
> > [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
> > [ 74.783275] Call Trace:
> > [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
> > dump_stack_lvl+0x6c/0x9c (unreliable)
> > [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
> > [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
> > sysrq_handle_crash+0x28/0x30
> > [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
> > __handle_sysrq+0x10c/0x250
> > [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
> > write_sysrq_trigger+0xc8/0x168
> > [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
> > proc_reg_write+0x10c/0x1b0
> > [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
> > vfs_write+0x104/0x4b0
> > [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
> > ksys_write+0x7c/0x140
> > [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
> > system_call_exception+0x144/0x3a0
> > [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
> > system_call_common+0xf4/0x258
> > [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
> > [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
> > 0000000000000000
> > [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
> > (6.6.0-rc5pf-nr-cpus+)
> > [ 74.783376] MSR: 800000000280f033
> > <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> > [ 74.783394] IRQMASK: 0
> > [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800 00007fffa0807300
> > 0000000000000001
> > [ 74.783394] GPR04: 000000013549ea60 0000000000000002 0000000000000010
> > 0000000000000000
> > [ 74.783394] GPR08: 0000000000000000 0000000000000000 0000000000000000
> > 0000000000000000
> > [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70 0000000040000000
> > 000000011a0f9798
> > [ 74.783394] GPR16: 000000011a0f9724 000000011a097688 000000011a02ff70
> > 000000011a0fd568
> > [ 74.783394] GPR20: 0000000135554bf0 0000000000000001 000000011a0aa478
> > 00007ffffc4b6a24
> > [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94 0000000000000002
> > 000000013549ea60
> > [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0 000000013549ea60
> > 0000000000000002
> > [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
> > [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
> > [ 74.783447] --- interrupt: c00
> > I'm in purgatory
> > [ 0.000000] radix-mmu: Page sizes from device-tree:
> > [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> > [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> > [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> > [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> > [ 0.000000] Activating Kernel Userspace Access Prevention
> > [ 0.000000] Activating Kernel Userspace Execution Prevention
> > [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> > with 64.0 KiB pages (exec)
> > [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> > with 64.0 KiB pages
> > [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> > with 2.00 MiB pages
> > [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> > with 2.00 MiB pages (exec)
> > [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> > with 2.00 MiB pages
> > [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
> > with 1.00 GiB pages
> > [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
> > with 2.00 MiB pages
> > [ 0.000000] lpar: Using radix MMU under hypervisor
> > [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
> > (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514 (Red
> > Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
> > 41 CDT 2023
> > [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
> > [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
> > 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
> > [ 0.000000] printk: bootconsole [udbg0] enabled
> > [ 0.000000] the round shift between dt seq and the cpu logic number:
> > 56
> > [ 0.000000] BUG: Unable to handle kernel data access on write at
> > 0xc0000001a0000000
> > [ 0.000000] Faulting instruction address: 0xc000000022009c64
> > [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
> > [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> > [ 0.000000] Modules linked in:
> > [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
> > 6.6.0-rc5pf-nr-cpus+ #3
> > [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
> > [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
> > c0000000201ff348
> > [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
> > (6.6.0-rc5pf-nr-cpus+)
> > [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR: 28222824
> > XER: 00000001
> > [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
> > 42000000 IRQMASK: 1
> > [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0 c0000000213d1300
> > 0000000000000004
> > [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0 c000000022aebbb8
> > 0000000000000000
> > [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8 000000000000003a
> > c0000000229c8a78
> > [ 0.000000] GPR12: 0000000000002000 c000000022e4a800 c0000000211d34b8
> > c0000000211d3aa8
> > [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0 c0000000225f3b98
> > 0000000000000000
> > [ 0.000000] GPR20: 0000000000000001 0000000000000001 0000000000000001
> > 0000000000000001
> > [ 0.000000] GPR24: 0000000000000008 0000000000000000 0000000000000001
> > c00000019ffffdc0
> > [ 0.000000] GPR28: 0000000000000002 c000000022b368e0 c000000022aebe08
> > 0000000000000008
> > [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
> > [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
> > [ 0.000000] Call Trace:
> > [ 0.000000] [c000000022aebda0] [c000000022009ba0]
> > smp_setup_cpu_maps+0x35c/0x724 (unreliable)
> > [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
> > setup_arch+0x1b8/0x54c
> > [ 0.000000] [c000000022aebf30] [c000000022003f88]
> > start_kernel+0xb0/0x768
> > [ 0.000000] [c000000022aebfe0] [c00000002000d888]
> > start_here_common+0x1c/0x20
> > [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
> > 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
> > <7d49c12e> eb7b0000 7e99a378 4bffff3c
>
> The faulting instruction address, 0xc000000022009c6, corresponds to the code
> below:
>
> File:
> arch/powerpc/kernel/setup-common.c
>
> Function
> void __init smp_setup_cpu_maps(void)
> {
> ...
> cpu_to_phys_id[bt_thread] =
> be32_to_cpu(intserv_node->intserv[bt_thread]);
> ...
> }
>
> Hope it helps.
>
Appreciate your help.
This issue should be linked with the capability of cpu_to_phys_id[].
Could you please to try the fix suggested at the end of the email?
It should be a fix for
[PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
Thanks,
Pingfan
---
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index bd7853a4bc91..849adc7a4b47 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -464,12 +464,6 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
INIT_LIST_HEAD(&head);
- cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
- __alignof__(u32));
- if (!cpu_to_phys_id)
- panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
- __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
-
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
@@ -533,6 +527,16 @@ void __init smp_setup_cpu_maps(void)
}
}
+
+ /* There may be hole between cpu0 and boot cpu */
+ j = (bt_thread + 1) > nr_cpu_ids ? (bt_thread + 1) : nr_cpu_ids;
+ cpu_to_phys_id = memblock_alloc(j * sizeof(u32),
+ __alignof__(u32));
+ if (!cpu_to_phys_id)
+ panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
+ __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
+
+
cpu = 0;
list_del_init(&head);
/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-11 2:30 ` Pingfan Liu
@ 2023-10-11 10:53 ` Sourabh Jain
2023-10-12 13:20 ` Pingfan Liu
0 siblings, 1 reply; 19+ messages in thread
From: Sourabh Jain @ 2023-10-11 10:53 UTC (permalink / raw)
To: Pingfan Liu, linuxppc-dev
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong
Hello Pingfan,
>>> With this patch series applied, the kdump kernel fails to boot on
>>> powerpc with nr_cpus=1.
>>>
>>> Console logs:
>>> -------------------
>>> [root]# echo c > /proc/sysrq-trigger
>>> [ 74.783235] sysrq: Trigger a crash
>>> [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
>>> [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
>>> 6.6.0-rc5pf-nr-cpus+ #3
>>> [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
>>> [ 74.783275] Call Trace:
>>> [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
>>> dump_stack_lvl+0x6c/0x9c (unreliable)
>>> [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
>>> [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
>>> sysrq_handle_crash+0x28/0x30
>>> [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
>>> __handle_sysrq+0x10c/0x250
>>> [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
>>> write_sysrq_trigger+0xc8/0x168
>>> [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
>>> proc_reg_write+0x10c/0x1b0
>>> [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
>>> vfs_write+0x104/0x4b0
>>> [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
>>> ksys_write+0x7c/0x140
>>> [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
>>> system_call_exception+0x144/0x3a0
>>> [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
>>> system_call_common+0xf4/0x258
>>> [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
>>> [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
>>> 0000000000000000
>>> [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
>>> (6.6.0-rc5pf-nr-cpus+)
>>> [ 74.783376] MSR: 800000000280f033
>>> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
>>> [ 74.783394] IRQMASK: 0
>>> [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800 00007fffa0807300
>>> 0000000000000001
>>> [ 74.783394] GPR04: 000000013549ea60 0000000000000002 0000000000000010
>>> 0000000000000000
>>> [ 74.783394] GPR08: 0000000000000000 0000000000000000 0000000000000000
>>> 0000000000000000
>>> [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70 0000000040000000
>>> 000000011a0f9798
>>> [ 74.783394] GPR16: 000000011a0f9724 000000011a097688 000000011a02ff70
>>> 000000011a0fd568
>>> [ 74.783394] GPR20: 0000000135554bf0 0000000000000001 000000011a0aa478
>>> 00007ffffc4b6a24
>>> [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94 0000000000000002
>>> 000000013549ea60
>>> [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0 000000013549ea60
>>> 0000000000000002
>>> [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
>>> [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
>>> [ 74.783447] --- interrupt: c00
>>> I'm in purgatory
>>> [ 0.000000] radix-mmu: Page sizes from device-tree:
>>> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
>>> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
>>> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
>>> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
>>> [ 0.000000] Activating Kernel Userspace Access Prevention
>>> [ 0.000000] Activating Kernel Userspace Execution Prevention
>>> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
>>> with 64.0 KiB pages (exec)
>>> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
>>> with 64.0 KiB pages
>>> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
>>> with 2.00 MiB pages
>>> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
>>> with 2.00 MiB pages (exec)
>>> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
>>> with 2.00 MiB pages
>>> [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
>>> with 1.00 GiB pages
>>> [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
>>> with 2.00 MiB pages
>>> [ 0.000000] lpar: Using radix MMU under hypervisor
>>> [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
>>> (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514 (Red
>>> Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
>>> 41 CDT 2023
>>> [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
>>> [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
>>> 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
>>> [ 0.000000] printk: bootconsole [udbg0] enabled
>>> [ 0.000000] the round shift between dt seq and the cpu logic number:
>>> 56
>>> [ 0.000000] BUG: Unable to handle kernel data access on write at
>>> 0xc0000001a0000000
>>> [ 0.000000] Faulting instruction address: 0xc000000022009c64
>>> [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
>>> [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
>>> [ 0.000000] Modules linked in:
>>> [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
>>> 6.6.0-rc5pf-nr-cpus+ #3
>>> [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
>>> [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
>>> c0000000201ff348
>>> [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
>>> (6.6.0-rc5pf-nr-cpus+)
>>> [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR: 28222824
>>> XER: 00000001
>>> [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
>>> 42000000 IRQMASK: 1
>>> [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0 c0000000213d1300
>>> 0000000000000004
>>> [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0 c000000022aebbb8
>>> 0000000000000000
>>> [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8 000000000000003a
>>> c0000000229c8a78
>>> [ 0.000000] GPR12: 0000000000002000 c000000022e4a800 c0000000211d34b8
>>> c0000000211d3aa8
>>> [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0 c0000000225f3b98
>>> 0000000000000000
>>> [ 0.000000] GPR20: 0000000000000001 0000000000000001 0000000000000001
>>> 0000000000000001
>>> [ 0.000000] GPR24: 0000000000000008 0000000000000000 0000000000000001
>>> c00000019ffffdc0
>>> [ 0.000000] GPR28: 0000000000000002 c000000022b368e0 c000000022aebe08
>>> 0000000000000008
>>> [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
>>> [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
>>> [ 0.000000] Call Trace:
>>> [ 0.000000] [c000000022aebda0] [c000000022009ba0]
>>> smp_setup_cpu_maps+0x35c/0x724 (unreliable)
>>> [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
>>> setup_arch+0x1b8/0x54c
>>> [ 0.000000] [c000000022aebf30] [c000000022003f88]
>>> start_kernel+0xb0/0x768
>>> [ 0.000000] [c000000022aebfe0] [c00000002000d888]
>>> start_here_common+0x1c/0x20
>>> [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
>>> 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
>>> <7d49c12e> eb7b0000 7e99a378 4bffff3c
>> The faulting instruction address, 0xc000000022009c6, corresponds to the code
>> below:
>>
>> File:
>> arch/powerpc/kernel/setup-common.c
>>
>> Function
>> void __init smp_setup_cpu_maps(void)
>> {
>> ...
>> cpu_to_phys_id[bt_thread] =
>> be32_to_cpu(intserv_node->intserv[bt_thread]);
>> ...
>> }
>>
>> Hope it helps.
>>
> Appreciate your help.
>
> This issue should be linked with the capability of cpu_to_phys_id[].
>
> Could you please to try the fix suggested at the end of the email?
> It should be a fix for
> [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
>
>
> Thanks,
>
> Pingfan
>
> ---
>
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index bd7853a4bc91..849adc7a4b47 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -464,12 +464,6 @@ void __init smp_setup_cpu_maps(void)
> DBG("smp_setup_cpu_maps()\n");
>
> INIT_LIST_HEAD(&head);
> - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
> - __alignof__(u32));
> - if (!cpu_to_phys_id)
> - panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
> -
> for_each_node_by_type(dn, "cpu") {
> const __be32 *intserv;
> __be32 cpu_be;
> @@ -533,6 +527,16 @@ void __init smp_setup_cpu_maps(void)
> }
>
> }
> +
> + /* There may be hole between cpu0 and boot cpu */
> + j = (bt_thread + 1) > nr_cpu_ids ? (bt_thread + 1) : nr_cpu_ids;
> + cpu_to_phys_id = memblock_alloc(j * sizeof(u32),
> + __alignof__(u32));
> + if (!cpu_to_phys_id)
> + panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> + __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
> +
> +
> cpu = 0;
> list_del_init(&head);
> /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
With the above changes applied, kdump kernel boots fine with a WARNING:
[root]# echo c > /proc/sysrq-trigger
[ 310.748248] sysrq: Trigger a crash
[ 310.748256] Kernel panic - not syncing: sysrq triggered crash
[ 310.748266] CPU: 26 PID: 2610 Comm: bash Kdump: loaded Not tainted
6.6.0-rc5-fix-setup-common+ #3
[ 310.748273] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
[ 310.748280] Call Trace:
[ 310.748284] [c000000184717ac0] [c000000000ecf8d8]
dump_stack_lvl+0x6c/0x9c (unreliable)
[ 310.748298] [c000000184717af0] [c000000000150310] panic+0x178/0x438
[ 310.748307] [c000000184717b90] [c00000000092c8b8]
sysrq_handle_crash+0x28/0x30
[ 310.748316] [c000000184717bf0] [c00000000092d2ac]
__handle_sysrq+0x10c/0x250
[ 310.748330] [c000000184717c90] [c00000000092db18]
write_sysrq_trigger+0xc8/0x168
[ 310.748339] [c000000184717cd0] [c00000000065c21c]
proc_reg_write+0x10c/0x1b0
[ 310.748349] [c000000184717d00] [c000000000583f94] vfs_write+0x104/0x4b0
[ 310.748356] [c000000184717dc0] [c00000000058451c] ksys_write+0x7c/0x140
[ 310.748365] [c000000184717e10] [c000000000033a54]
system_call_exception+0x144/0x3a0
[ 310.748377] [c000000184717e50] [c00000000000c554]
system_call_common+0xf4/0x258
[ 310.748389] --- interrupt: c00 at 0x7fff97720c34
[ 310.748395] NIP: 00007fff97720c34 LR: 00007fff97697c74 CTR:
0000000000000000
[ 310.748404] REGS: c000000184717e80 TRAP: 0c00 Not tainted
(6.6.0-rc5-fix-setup-common+)
[ 310.748413] MSR: 800000000280f033
<SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
[ 310.748430] IRQMASK: 0
[ 310.748430] GPR00: 0000000000000004 00007fffffabc510 00007fff97807300
0000000000000001
[ 310.748430] GPR04: 00000001624f7910 0000000000000002 0000000000000010
00007fff97669724
[ 310.748430] GPR08: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 310.748430] GPR12: 0000000000000000 00007fff97a5aee0 0000000040000000
0000000125c39798
[ 310.748430] GPR16: 0000000125c39724 0000000125bd8128 0000000125b70370
0000000125c3d568
[ 310.748430] GPR20: 0000000162551030 0000000000000001 0000000125beaf18
00007fffffabc734
[ 310.748430] GPR24: 00007fffffabc730 0000000125c3af94 0000000000000002
00000001624f7910
[ 310.748430] GPR28: 0000000000000002 00007fff97801798 00000001624f7910
0000000000000002
[ 310.748475] NIP [00007fff97720c34] 0x7fff97720c34
[ 310.748478] LR [00007fff97697c74] 0x7fff97697c74
[ 310.748482] --- interrupt: c00
I'm in purgatory
[ 0.000000] radix-mmu: Page sizes from device-tree:
[ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
[ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
[ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
[ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
[ 0.000000] Activating Kernel Userspace Access Prevention
[ 0.000000] Activating Kernel Userspace Execution Prevention
[ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
with 64.0 KiB pages (exec)
[ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
with 64.0 KiB pages
[ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
with 2.00 MiB pages
[ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
with 2.00 MiB pages (exec)
[ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
with 2.00 MiB pages
Trimmed logs ....
[ 0.001738] Mount-cache hash table entries: 16384 (order: 1, 131072
bytes, linear)
[ 0.001751] Mountpoint-cache hash table entries: 16384 (order: 1,
131072 bytes, linear)
[ 0.007339] ------------[ cut here ]------------
[ 0.007356] WARNING: CPU: 2 PID: 1 at arch/powerpc/kernel/smp.c:941
update_mask_from_threadgroup+0x128/0x1a0
[ 0.007371] Modules linked in:
[ 0.007377] CPU: 2 PID: 1 Comm: swapper/2 Not tainted
6.6.0-rc5-fix-setup-common+ #3
[ 0.007385] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
[ 0.007393] NIP: c000000022011ed8 LR: c000000022011e10 CTR:
0000000000000000
[ 0.007411] REGS: c0000000256338f0 TRAP: 0700 Not tainted
(6.6.0-rc5-fix-setup-common+)
[ 0.007425] MSR: 8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR:
44000842 XER: 0000000c
[ 0.007444] CFAR: c000000022011e78 IRQMASK: 0
[ 0.007444] GPR00: c000000022011e10 c000000025633b90 c0000000213c1300
0000000000000002
[ 0.007444] GPR04: 0000000000000000 0000000000000005 0000000000000001
0000000000000002
[ 0.007444] GPR08: 0000000000000008 0000000000000001 0000000000000002
0000000000000004
[ 0.007444] GPR12: 0000000000000000 c000000022e3ac00 c000000020010138
0000000000000000
[ 0.007444] GPR16: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007444] GPR20: 0000000000000018 c000000022150968 c000000022093580
c0000000253df000
[ 0.007444] GPR24: 0000000000000002 0000000000000000 c000000022b32058
0000000000000000
[ 0.007444] GPR28: c00000015fca0a68 c000000022ba0330 c00000002209352c
0000000000000000
[ 0.007520] NIP [c000000022011ed8]
update_mask_from_threadgroup+0x128/0x1a0
[ 0.007528] LR [c000000022011e10] update_mask_from_threadgroup+0x60/0x1a0
[ 0.007536] Call Trace:
[ 0.007539] [c000000025633b90] [c000000022011e10]
update_mask_from_threadgroup+0x60/0x1a0 (unreliable)
[ 0.007550] [c000000025633be0] [c000000022012210]
init_thread_group_cache_map+0x2c0/0x338
[ 0.007559] [c000000025633c50] [c0000000220125a0]
smp_prepare_cpus+0x318/0x510
[ 0.007568] [c000000025633d10] [c000000022004874]
kernel_init_freeable+0x198/0x3cc
[ 0.007578] [c000000025633de0] [c000000020010164] kernel_init+0x34/0x1b0
[ 0.007586] [c000000025633e50] [c00000002000cd94]
ret_from_kernel_user_thread+0x14/0x1c
[ 0.007596] --- interrupt: 0 at 0x0
[ 0.007601] NIP: 0000000000000000 LR: 0000000000000000 CTR:
0000000000000000
[ 0.007608] REGS: c000000025633e80 TRAP: 0000 Not tainted
(6.6.0-rc5-fix-setup-common+)
[ 0.007632] MSR: 0000000000000000 <> CR: 00000000 XER: 00000000
[ 0.007651] CFAR: 0000000000000000 IRQMASK: 0
[ 0.007651] GPR00: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR04: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR08: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR12: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR16: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR20: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR24: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007651] GPR28: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 0.007742] NIP [0000000000000000] 0x0
[ 0.007756] LR [0000000000000000] 0x0
[ 0.007769] --- interrupt: 0
[ 0.007779] Code: 7ca507b4 79081764 7d1e4214 8108000c 7f882000
409effdc 48000010 38e70001 7ce707b4 4bffffa4 2f8affff 409e0010
<0fe00000> 3860ffc3 4800004c 7f9b5000
[ 0.007805] ---[ end trace 0000000000000000 ]---
[ 0.007997] RCU Tasks Rude: Setting shift to 2 and lim to 1
rcu_task_cb_adjust=1.
[ 0.008018] RCU Tasks Trace: Setting shift to 2 and lim to 1
rcu_task_cb_adjust=1.
[ 0.008043] POWER10 performance monitor hardware support registered
[ 0.008071] rcu: Hierarchical SRCU implementation.
[ 0.008078] rcu: Max phase no-delay instances is 1000.
[ 0.008516] smp: Bringing up secondary CPUs ...
[ 0.008735] smp: Brought up 1 node, 2 CPUs
...
Note: no warning observed if crashing CPU is 0, 8, 16, 24, 32, ....
Code that generates warning:
File: arch/powerpc/kernel/smp.c
Function: update_mask_from_threadgroup
...
if (unlikely(i_group_start == -1)) {
WARN_ON_ONCE(1);
return -ENODATA;
}
Thanks,
Sourabh
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-11 10:53 ` Sourabh Jain
@ 2023-10-12 13:20 ` Pingfan Liu
2023-10-16 6:43 ` Sourabh Jain
0 siblings, 1 reply; 19+ messages in thread
From: Pingfan Liu @ 2023-10-12 13:20 UTC (permalink / raw)
To: Sourabh Jain
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
linuxppc-dev, Wen Xiong
On Wed, Oct 11, 2023 at 6:53 PM Sourabh Jain <sourabhjain@linux.ibm.com> wrote:
>
> Hello Pingfan,
> >>> With this patch series applied, the kdump kernel fails to boot on
> >>> powerpc with nr_cpus=1.
> >>>
> >>> Console logs:
> >>> -------------------
> >>> [root]# echo c > /proc/sysrq-trigger
> >>> [ 74.783235] sysrq: Trigger a crash
> >>> [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
> >>> [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
> >>> 6.6.0-rc5pf-nr-cpus+ #3
> >>> [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
> >>> [ 74.783275] Call Trace:
> >>> [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
> >>> dump_stack_lvl+0x6c/0x9c (unreliable)
> >>> [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
> >>> [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
> >>> sysrq_handle_crash+0x28/0x30
> >>> [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
> >>> __handle_sysrq+0x10c/0x250
> >>> [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
> >>> write_sysrq_trigger+0xc8/0x168
> >>> [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
> >>> proc_reg_write+0x10c/0x1b0
> >>> [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
> >>> vfs_write+0x104/0x4b0
> >>> [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
> >>> ksys_write+0x7c/0x140
> >>> [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
> >>> system_call_exception+0x144/0x3a0
> >>> [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
> >>> system_call_common+0xf4/0x258
> >>> [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
> >>> [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
> >>> 0000000000000000
> >>> [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
> >>> (6.6.0-rc5pf-nr-cpus+)
> >>> [ 74.783376] MSR: 800000000280f033
> >>> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> >>> [ 74.783394] IRQMASK: 0
> >>> [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800 00007fffa0807300
> >>> 0000000000000001
> >>> [ 74.783394] GPR04: 000000013549ea60 0000000000000002 0000000000000010
> >>> 0000000000000000
> >>> [ 74.783394] GPR08: 0000000000000000 0000000000000000 0000000000000000
> >>> 0000000000000000
> >>> [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70 0000000040000000
> >>> 000000011a0f9798
> >>> [ 74.783394] GPR16: 000000011a0f9724 000000011a097688 000000011a02ff70
> >>> 000000011a0fd568
> >>> [ 74.783394] GPR20: 0000000135554bf0 0000000000000001 000000011a0aa478
> >>> 00007ffffc4b6a24
> >>> [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94 0000000000000002
> >>> 000000013549ea60
> >>> [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0 000000013549ea60
> >>> 0000000000000002
> >>> [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
> >>> [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
> >>> [ 74.783447] --- interrupt: c00
> >>> I'm in purgatory
> >>> [ 0.000000] radix-mmu: Page sizes from device-tree:
> >>> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> >>> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> >>> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> >>> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> >>> [ 0.000000] Activating Kernel Userspace Access Prevention
> >>> [ 0.000000] Activating Kernel Userspace Execution Prevention
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> >>> with 64.0 KiB pages (exec)
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> >>> with 64.0 KiB pages
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> >>> with 2.00 MiB pages
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> >>> with 2.00 MiB pages (exec)
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> >>> with 2.00 MiB pages
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
> >>> with 1.00 GiB pages
> >>> [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
> >>> with 2.00 MiB pages
> >>> [ 0.000000] lpar: Using radix MMU under hypervisor
> >>> [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
> >>> (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514 (Red
> >>> Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
> >>> 41 CDT 2023
> >>> [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
> >>> [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
> >>> 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
> >>> [ 0.000000] printk: bootconsole [udbg0] enabled
> >>> [ 0.000000] the round shift between dt seq and the cpu logic number:
> >>> 56
> >>> [ 0.000000] BUG: Unable to handle kernel data access on write at
> >>> 0xc0000001a0000000
> >>> [ 0.000000] Faulting instruction address: 0xc000000022009c64
> >>> [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
> >>> [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> >>> [ 0.000000] Modules linked in:
> >>> [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
> >>> 6.6.0-rc5pf-nr-cpus+ #3
> >>> [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
> >>> [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
> >>> c0000000201ff348
> >>> [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
> >>> (6.6.0-rc5pf-nr-cpus+)
> >>> [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR: 28222824
> >>> XER: 00000001
> >>> [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
> >>> 42000000 IRQMASK: 1
> >>> [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0 c0000000213d1300
> >>> 0000000000000004
> >>> [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0 c000000022aebbb8
> >>> 0000000000000000
> >>> [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8 000000000000003a
> >>> c0000000229c8a78
> >>> [ 0.000000] GPR12: 0000000000002000 c000000022e4a800 c0000000211d34b8
> >>> c0000000211d3aa8
> >>> [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0 c0000000225f3b98
> >>> 0000000000000000
> >>> [ 0.000000] GPR20: 0000000000000001 0000000000000001 0000000000000001
> >>> 0000000000000001
> >>> [ 0.000000] GPR24: 0000000000000008 0000000000000000 0000000000000001
> >>> c00000019ffffdc0
> >>> [ 0.000000] GPR28: 0000000000000002 c000000022b368e0 c000000022aebe08
> >>> 0000000000000008
> >>> [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
> >>> [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
> >>> [ 0.000000] Call Trace:
> >>> [ 0.000000] [c000000022aebda0] [c000000022009ba0]
> >>> smp_setup_cpu_maps+0x35c/0x724 (unreliable)
> >>> [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
> >>> setup_arch+0x1b8/0x54c
> >>> [ 0.000000] [c000000022aebf30] [c000000022003f88]
> >>> start_kernel+0xb0/0x768
> >>> [ 0.000000] [c000000022aebfe0] [c00000002000d888]
> >>> start_here_common+0x1c/0x20
> >>> [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
> >>> 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
> >>> <7d49c12e> eb7b0000 7e99a378 4bffff3c
> >> The faulting instruction address, 0xc000000022009c6, corresponds to the code
> >> below:
> >>
> >> File:
> >> arch/powerpc/kernel/setup-common.c
> >>
> >> Function
> >> void __init smp_setup_cpu_maps(void)
> >> {
> >> ...
> >> cpu_to_phys_id[bt_thread] =
> >> be32_to_cpu(intserv_node->intserv[bt_thread]);
> >> ...
> >> }
> >>
> >> Hope it helps.
> >>
> > Appreciate your help.
> >
> > This issue should be linked with the capability of cpu_to_phys_id[].
> >
> > Could you please to try the fix suggested at the end of the email?
> > It should be a fix for
> > [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
> >
> >
> > Thanks,
> >
> > Pingfan
> >
> > ---
> >
> > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > index bd7853a4bc91..849adc7a4b47 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -464,12 +464,6 @@ void __init smp_setup_cpu_maps(void)
> > DBG("smp_setup_cpu_maps()\n");
> >
> > INIT_LIST_HEAD(&head);
> > - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
> > - __alignof__(u32));
> > - if (!cpu_to_phys_id)
> > - panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> > - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
> > -
> > for_each_node_by_type(dn, "cpu") {
> > const __be32 *intserv;
> > __be32 cpu_be;
> > @@ -533,6 +527,16 @@ void __init smp_setup_cpu_maps(void)
> > }
> >
> > }
> > +
> > + /* There may be hole between cpu0 and boot cpu */
> > + j = (bt_thread + 1) > nr_cpu_ids ? (bt_thread + 1) : nr_cpu_ids;
> > + cpu_to_phys_id = memblock_alloc(j * sizeof(u32),
> > + __alignof__(u32));
> > + if (!cpu_to_phys_id)
> > + panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> > + __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
> > +
> > +
> > cpu = 0;
> > list_del_init(&head);
> > /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
>
> With the above changes applied, kdump kernel boots fine with a WARNING:
>
> [root]# echo c > /proc/sysrq-trigger
> [ 310.748248] sysrq: Trigger a crash
> [ 310.748256] Kernel panic - not syncing: sysrq triggered crash
> [ 310.748266] CPU: 26 PID: 2610 Comm: bash Kdump: loaded Not tainted
> 6.6.0-rc5-fix-setup-common+ #3
> [ 310.748273] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
> [ 310.748280] Call Trace:
> [ 310.748284] [c000000184717ac0] [c000000000ecf8d8]
> dump_stack_lvl+0x6c/0x9c (unreliable)
> [ 310.748298] [c000000184717af0] [c000000000150310] panic+0x178/0x438
> [ 310.748307] [c000000184717b90] [c00000000092c8b8]
> sysrq_handle_crash+0x28/0x30
> [ 310.748316] [c000000184717bf0] [c00000000092d2ac]
> __handle_sysrq+0x10c/0x250
> [ 310.748330] [c000000184717c90] [c00000000092db18]
> write_sysrq_trigger+0xc8/0x168
> [ 310.748339] [c000000184717cd0] [c00000000065c21c]
> proc_reg_write+0x10c/0x1b0
> [ 310.748349] [c000000184717d00] [c000000000583f94] vfs_write+0x104/0x4b0
> [ 310.748356] [c000000184717dc0] [c00000000058451c] ksys_write+0x7c/0x140
> [ 310.748365] [c000000184717e10] [c000000000033a54]
> system_call_exception+0x144/0x3a0
> [ 310.748377] [c000000184717e50] [c00000000000c554]
> system_call_common+0xf4/0x258
> [ 310.748389] --- interrupt: c00 at 0x7fff97720c34
> [ 310.748395] NIP: 00007fff97720c34 LR: 00007fff97697c74 CTR:
> 0000000000000000
> [ 310.748404] REGS: c000000184717e80 TRAP: 0c00 Not tainted
> (6.6.0-rc5-fix-setup-common+)
> [ 310.748413] MSR: 800000000280f033
> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> [ 310.748430] IRQMASK: 0
> [ 310.748430] GPR00: 0000000000000004 00007fffffabc510 00007fff97807300
> 0000000000000001
> [ 310.748430] GPR04: 00000001624f7910 0000000000000002 0000000000000010
> 00007fff97669724
> [ 310.748430] GPR08: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 310.748430] GPR12: 0000000000000000 00007fff97a5aee0 0000000040000000
> 0000000125c39798
> [ 310.748430] GPR16: 0000000125c39724 0000000125bd8128 0000000125b70370
> 0000000125c3d568
> [ 310.748430] GPR20: 0000000162551030 0000000000000001 0000000125beaf18
> 00007fffffabc734
> [ 310.748430] GPR24: 00007fffffabc730 0000000125c3af94 0000000000000002
> 00000001624f7910
> [ 310.748430] GPR28: 0000000000000002 00007fff97801798 00000001624f7910
> 0000000000000002
> [ 310.748475] NIP [00007fff97720c34] 0x7fff97720c34
> [ 310.748478] LR [00007fff97697c74] 0x7fff97697c74
> [ 310.748482] --- interrupt: c00
> I'm in purgatory
> [ 0.000000] radix-mmu: Page sizes from device-tree:
> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> [ 0.000000] Activating Kernel Userspace Access Prevention
> [ 0.000000] Activating Kernel Userspace Execution Prevention
> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> with 64.0 KiB pages (exec)
> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> with 64.0 KiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> with 2.00 MiB pages
> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> with 2.00 MiB pages (exec)
> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> with 2.00 MiB pages
>
> Trimmed logs ....
>
> [ 0.001738] Mount-cache hash table entries: 16384 (order: 1, 131072
> bytes, linear)
> [ 0.001751] Mountpoint-cache hash table entries: 16384 (order: 1,
> 131072 bytes, linear)
> [ 0.007339] ------------[ cut here ]------------
> [ 0.007356] WARNING: CPU: 2 PID: 1 at arch/powerpc/kernel/smp.c:941
> update_mask_from_threadgroup+0x128/0x1a0
> [ 0.007371] Modules linked in:
> [ 0.007377] CPU: 2 PID: 1 Comm: swapper/2 Not tainted
> 6.6.0-rc5-fix-setup-common+ #3
> [ 0.007385] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
> [ 0.007393] NIP: c000000022011ed8 LR: c000000022011e10 CTR:
> 0000000000000000
> [ 0.007411] REGS: c0000000256338f0 TRAP: 0700 Not tainted
> (6.6.0-rc5-fix-setup-common+)
> [ 0.007425] MSR: 8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR:
> 44000842 XER: 0000000c
> [ 0.007444] CFAR: c000000022011e78 IRQMASK: 0
> [ 0.007444] GPR00: c000000022011e10 c000000025633b90 c0000000213c1300
> 0000000000000002
> [ 0.007444] GPR04: 0000000000000000 0000000000000005 0000000000000001
> 0000000000000002
> [ 0.007444] GPR08: 0000000000000008 0000000000000001 0000000000000002
> 0000000000000004
> [ 0.007444] GPR12: 0000000000000000 c000000022e3ac00 c000000020010138
> 0000000000000000
> [ 0.007444] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007444] GPR20: 0000000000000018 c000000022150968 c000000022093580
> c0000000253df000
> [ 0.007444] GPR24: 0000000000000002 0000000000000000 c000000022b32058
> 0000000000000000
> [ 0.007444] GPR28: c00000015fca0a68 c000000022ba0330 c00000002209352c
> 0000000000000000
> [ 0.007520] NIP [c000000022011ed8]
> update_mask_from_threadgroup+0x128/0x1a0
> [ 0.007528] LR [c000000022011e10] update_mask_from_threadgroup+0x60/0x1a0
> [ 0.007536] Call Trace:
> [ 0.007539] [c000000025633b90] [c000000022011e10]
> update_mask_from_threadgroup+0x60/0x1a0 (unreliable)
> [ 0.007550] [c000000025633be0] [c000000022012210]
> init_thread_group_cache_map+0x2c0/0x338
> [ 0.007559] [c000000025633c50] [c0000000220125a0]
> smp_prepare_cpus+0x318/0x510
> [ 0.007568] [c000000025633d10] [c000000022004874]
> kernel_init_freeable+0x198/0x3cc
> [ 0.007578] [c000000025633de0] [c000000020010164] kernel_init+0x34/0x1b0
> [ 0.007586] [c000000025633e50] [c00000002000cd94]
> ret_from_kernel_user_thread+0x14/0x1c
> [ 0.007596] --- interrupt: 0 at 0x0
> [ 0.007601] NIP: 0000000000000000 LR: 0000000000000000 CTR:
> 0000000000000000
> [ 0.007608] REGS: c000000025633e80 TRAP: 0000 Not tainted
> (6.6.0-rc5-fix-setup-common+)
> [ 0.007632] MSR: 0000000000000000 <> CR: 00000000 XER: 00000000
> [ 0.007651] CFAR: 0000000000000000 IRQMASK: 0
> [ 0.007651] GPR00: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR04: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR08: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR12: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR16: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR20: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR24: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007651] GPR28: 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000
> [ 0.007742] NIP [0000000000000000] 0x0
> [ 0.007756] LR [0000000000000000] 0x0
> [ 0.007769] --- interrupt: 0
> [ 0.007779] Code: 7ca507b4 79081764 7d1e4214 8108000c 7f882000
> 409effdc 48000010 38e70001 7ce707b4 4bffffa4 2f8affff 409e0010
> <0fe00000> 3860ffc3 4800004c 7f9b5000
> [ 0.007805] ---[ end trace 0000000000000000 ]---
> [ 0.007997] RCU Tasks Rude: Setting shift to 2 and lim to 1
> rcu_task_cb_adjust=1.
> [ 0.008018] RCU Tasks Trace: Setting shift to 2 and lim to 1
> rcu_task_cb_adjust=1.
> [ 0.008043] POWER10 performance monitor hardware support registered
> [ 0.008071] rcu: Hierarchical SRCU implementation.
> [ 0.008078] rcu: Max phase no-delay instances is 1000.
> [ 0.008516] smp: Bringing up secondary CPUs ...
> [ 0.008735] smp: Brought up 1 node, 2 CPUs
> ...
>
> Note: no warning observed if crashing CPU is 0, 8, 16, 24, 32, ....
>
> Code that generates warning:
>
> File: arch/powerpc/kernel/smp.c
> Function: update_mask_from_threadgroup
> ...
> if (unlikely(i_group_start == -1)) {
> WARN_ON_ONCE(1);
> return -ENODATA;
> }
>
It seems that the crash cpu passed the statements in
init_thread_group_cache_map()
{
if (unlikely(cpu_group_start == -1)) {
WARN_ON_ONCE(1);
return -ENODATA;
}
}
But raising warn in the above snippet. So it means that
get_cpu_thread_group_start(i, tg) for the @first_thread failed in
update_mask_from_threadgroup(). At present, I have no idea about it.
And is this warning observed if only applying [1-2/5] ?
According to my collected data, percpu area will cost 1792 kB per cpu.
Forcing all eight threads in a core online will cost 10752KB more than
the result if applying the whole series. Maybe I can put [3-5/5] aside
as Hari suggested, and try them later if needed.
Thanks,
Pingfan
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-12 13:20 ` Pingfan Liu
@ 2023-10-16 6:43 ` Sourabh Jain
2023-10-17 2:12 ` Pingfan Liu
0 siblings, 1 reply; 19+ messages in thread
From: Sourabh Jain @ 2023-10-16 6:43 UTC (permalink / raw)
To: Pingfan Liu
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
linuxppc-dev, Wen Xiong
Hello Pingfan,
>>>>> With this patch series applied, the kdump kernel fails to boot on
>>>>> powerpc with nr_cpus=1.
>>>>>
>>>>> Console logs:
>>>>> -------------------
>>>>> [root]# echo c > /proc/sysrq-trigger
>>>>> [ 74.783235] sysrq: Trigger a crash
>>>>> [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
>>>>> [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
>>>>> 6.6.0-rc5pf-nr-cpus+ #3
>>>>> [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
>>>>> [ 74.783275] Call Trace:
>>>>> [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
>>>>> dump_stack_lvl+0x6c/0x9c (unreliable)
>>>>> [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
>>>>> [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
>>>>> sysrq_handle_crash+0x28/0x30
>>>>> [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
>>>>> __handle_sysrq+0x10c/0x250
>>>>> [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
>>>>> write_sysrq_trigger+0xc8/0x168
>>>>> [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
>>>>> proc_reg_write+0x10c/0x1b0
>>>>> [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
>>>>> vfs_write+0x104/0x4b0
>>>>> [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
>>>>> ksys_write+0x7c/0x140
>>>>> [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
>>>>> system_call_exception+0x144/0x3a0
>>>>> [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
>>>>> system_call_common+0xf4/0x258
>>>>> [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
>>>>> [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
>>>>> 0000000000000000
>>>>> [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
>>>>> (6.6.0-rc5pf-nr-cpus+)
>>>>> [ 74.783376] MSR: 800000000280f033
>>>>> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
>>>>> [ 74.783394] IRQMASK: 0
>>>>> [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800 00007fffa0807300
>>>>> 0000000000000001
>>>>> [ 74.783394] GPR04: 000000013549ea60 0000000000000002 0000000000000010
>>>>> 0000000000000000
>>>>> [ 74.783394] GPR08: 0000000000000000 0000000000000000 0000000000000000
>>>>> 0000000000000000
>>>>> [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70 0000000040000000
>>>>> 000000011a0f9798
>>>>> [ 74.783394] GPR16: 000000011a0f9724 000000011a097688 000000011a02ff70
>>>>> 000000011a0fd568
>>>>> [ 74.783394] GPR20: 0000000135554bf0 0000000000000001 000000011a0aa478
>>>>> 00007ffffc4b6a24
>>>>> [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94 0000000000000002
>>>>> 000000013549ea60
>>>>> [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0 000000013549ea60
>>>>> 0000000000000002
>>>>> [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
>>>>> [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
>>>>> [ 74.783447] --- interrupt: c00
>>>>> I'm in purgatory
>>>>> [ 0.000000] radix-mmu: Page sizes from device-tree:
>>>>> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
>>>>> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
>>>>> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
>>>>> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
>>>>> [ 0.000000] Activating Kernel Userspace Access Prevention
>>>>> [ 0.000000] Activating Kernel Userspace Execution Prevention
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
>>>>> with 64.0 KiB pages (exec)
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
>>>>> with 64.0 KiB pages
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
>>>>> with 2.00 MiB pages
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
>>>>> with 2.00 MiB pages (exec)
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
>>>>> with 2.00 MiB pages
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
>>>>> with 1.00 GiB pages
>>>>> [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
>>>>> with 2.00 MiB pages
>>>>> [ 0.000000] lpar: Using radix MMU under hypervisor
>>>>> [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
>>>>> (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514 (Red
>>>>> Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
>>>>> 41 CDT 2023
>>>>> [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
>>>>> [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
>>>>> 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
>>>>> [ 0.000000] printk: bootconsole [udbg0] enabled
>>>>> [ 0.000000] the round shift between dt seq and the cpu logic number:
>>>>> 56
>>>>> [ 0.000000] BUG: Unable to handle kernel data access on write at
>>>>> 0xc0000001a0000000
>>>>> [ 0.000000] Faulting instruction address: 0xc000000022009c64
>>>>> [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>> [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
>>>>> [ 0.000000] Modules linked in:
>>>>> [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
>>>>> 6.6.0-rc5pf-nr-cpus+ #3
>>>>> [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
>>>>> [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
>>>>> c0000000201ff348
>>>>> [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
>>>>> (6.6.0-rc5pf-nr-cpus+)
>>>>> [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR: 28222824
>>>>> XER: 00000001
>>>>> [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
>>>>> 42000000 IRQMASK: 1
>>>>> [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0 c0000000213d1300
>>>>> 0000000000000004
>>>>> [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0 c000000022aebbb8
>>>>> 0000000000000000
>>>>> [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8 000000000000003a
>>>>> c0000000229c8a78
>>>>> [ 0.000000] GPR12: 0000000000002000 c000000022e4a800 c0000000211d34b8
>>>>> c0000000211d3aa8
>>>>> [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0 c0000000225f3b98
>>>>> 0000000000000000
>>>>> [ 0.000000] GPR20: 0000000000000001 0000000000000001 0000000000000001
>>>>> 0000000000000001
>>>>> [ 0.000000] GPR24: 0000000000000008 0000000000000000 0000000000000001
>>>>> c00000019ffffdc0
>>>>> [ 0.000000] GPR28: 0000000000000002 c000000022b368e0 c000000022aebe08
>>>>> 0000000000000008
>>>>> [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
>>>>> [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
>>>>> [ 0.000000] Call Trace:
>>>>> [ 0.000000] [c000000022aebda0] [c000000022009ba0]
>>>>> smp_setup_cpu_maps+0x35c/0x724 (unreliable)
>>>>> [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
>>>>> setup_arch+0x1b8/0x54c
>>>>> [ 0.000000] [c000000022aebf30] [c000000022003f88]
>>>>> start_kernel+0xb0/0x768
>>>>> [ 0.000000] [c000000022aebfe0] [c00000002000d888]
>>>>> start_here_common+0x1c/0x20
>>>>> [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
>>>>> 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
>>>>> <7d49c12e> eb7b0000 7e99a378 4bffff3c
>>>> The faulting instruction address, 0xc000000022009c6, corresponds to the code
>>>> below:
>>>>
>>>> File:
>>>> arch/powerpc/kernel/setup-common.c
>>>>
>>>> Function
>>>> void __init smp_setup_cpu_maps(void)
>>>> {
>>>> ...
>>>> cpu_to_phys_id[bt_thread] =
>>>> be32_to_cpu(intserv_node->intserv[bt_thread]);
>>>> ...
>>>> }
>>>>
>>>> Hope it helps.
>>>>
>>> Appreciate your help.
>>>
>>> This issue should be linked with the capability of cpu_to_phys_id[].
>>>
>>> Could you please to try the fix suggested at the end of the email?
>>> It should be a fix for
>>> [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
>>>
>>>
>>> Thanks,
>>>
>>> Pingfan
>>>
>>> ---
>>>
>>> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
>>> index bd7853a4bc91..849adc7a4b47 100644
>>> --- a/arch/powerpc/kernel/setup-common.c
>>> +++ b/arch/powerpc/kernel/setup-common.c
>>> @@ -464,12 +464,6 @@ void __init smp_setup_cpu_maps(void)
>>> DBG("smp_setup_cpu_maps()\n");
>>>
>>> INIT_LIST_HEAD(&head);
>>> - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
>>> - __alignof__(u32));
>>> - if (!cpu_to_phys_id)
>>> - panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
>>> - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
>>> -
>>> for_each_node_by_type(dn, "cpu") {
>>> const __be32 *intserv;
>>> __be32 cpu_be;
>>> @@ -533,6 +527,16 @@ void __init smp_setup_cpu_maps(void)
>>> }
>>>
>>> }
>>> +
>>> + /* There may be hole between cpu0 and boot cpu */
>>> + j = (bt_thread + 1) > nr_cpu_ids ? (bt_thread + 1) : nr_cpu_ids;
>>> + cpu_to_phys_id = memblock_alloc(j * sizeof(u32),
>>> + __alignof__(u32));
>>> + if (!cpu_to_phys_id)
>>> + panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
>>> + __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
>>> +
>>> +
>>> cpu = 0;
>>> list_del_init(&head);
>>> /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
>> With the above changes applied, kdump kernel boots fine with a WARNING:
>>
>> [root]# echo c > /proc/sysrq-trigger
>> [ 310.748248] sysrq: Trigger a crash
>> [ 310.748256] Kernel panic - not syncing: sysrq triggered crash
>> [ 310.748266] CPU: 26 PID: 2610 Comm: bash Kdump: loaded Not tainted
>> 6.6.0-rc5-fix-setup-common+ #3
>> [ 310.748273] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
>> [ 310.748280] Call Trace:
>> [ 310.748284] [c000000184717ac0] [c000000000ecf8d8]
>> dump_stack_lvl+0x6c/0x9c (unreliable)
>> [ 310.748298] [c000000184717af0] [c000000000150310] panic+0x178/0x438
>> [ 310.748307] [c000000184717b90] [c00000000092c8b8]
>> sysrq_handle_crash+0x28/0x30
>> [ 310.748316] [c000000184717bf0] [c00000000092d2ac]
>> __handle_sysrq+0x10c/0x250
>> [ 310.748330] [c000000184717c90] [c00000000092db18]
>> write_sysrq_trigger+0xc8/0x168
>> [ 310.748339] [c000000184717cd0] [c00000000065c21c]
>> proc_reg_write+0x10c/0x1b0
>> [ 310.748349] [c000000184717d00] [c000000000583f94] vfs_write+0x104/0x4b0
>> [ 310.748356] [c000000184717dc0] [c00000000058451c] ksys_write+0x7c/0x140
>> [ 310.748365] [c000000184717e10] [c000000000033a54]
>> system_call_exception+0x144/0x3a0
>> [ 310.748377] [c000000184717e50] [c00000000000c554]
>> system_call_common+0xf4/0x258
>> [ 310.748389] --- interrupt: c00 at 0x7fff97720c34
>> [ 310.748395] NIP: 00007fff97720c34 LR: 00007fff97697c74 CTR:
>> 0000000000000000
>> [ 310.748404] REGS: c000000184717e80 TRAP: 0c00 Not tainted
>> (6.6.0-rc5-fix-setup-common+)
>> [ 310.748413] MSR: 800000000280f033
>> <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
>> [ 310.748430] IRQMASK: 0
>> [ 310.748430] GPR00: 0000000000000004 00007fffffabc510 00007fff97807300
>> 0000000000000001
>> [ 310.748430] GPR04: 00000001624f7910 0000000000000002 0000000000000010
>> 00007fff97669724
>> [ 310.748430] GPR08: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 310.748430] GPR12: 0000000000000000 00007fff97a5aee0 0000000040000000
>> 0000000125c39798
>> [ 310.748430] GPR16: 0000000125c39724 0000000125bd8128 0000000125b70370
>> 0000000125c3d568
>> [ 310.748430] GPR20: 0000000162551030 0000000000000001 0000000125beaf18
>> 00007fffffabc734
>> [ 310.748430] GPR24: 00007fffffabc730 0000000125c3af94 0000000000000002
>> 00000001624f7910
>> [ 310.748430] GPR28: 0000000000000002 00007fff97801798 00000001624f7910
>> 0000000000000002
>> [ 310.748475] NIP [00007fff97720c34] 0x7fff97720c34
>> [ 310.748478] LR [00007fff97697c74] 0x7fff97697c74
>> [ 310.748482] --- interrupt: c00
>> I'm in purgatory
>> [ 0.000000] radix-mmu: Page sizes from device-tree:
>> [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
>> [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
>> [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
>> [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
>> [ 0.000000] Activating Kernel Userspace Access Prevention
>> [ 0.000000] Activating Kernel Userspace Execution Prevention
>> [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
>> with 64.0 KiB pages (exec)
>> [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
>> with 64.0 KiB pages
>> [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
>> with 2.00 MiB pages
>> [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
>> with 2.00 MiB pages (exec)
>> [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
>> with 2.00 MiB pages
>>
>> Trimmed logs ....
>>
>> [ 0.001738] Mount-cache hash table entries: 16384 (order: 1, 131072
>> bytes, linear)
>> [ 0.001751] Mountpoint-cache hash table entries: 16384 (order: 1,
>> 131072 bytes, linear)
>> [ 0.007339] ------------[ cut here ]------------
>> [ 0.007356] WARNING: CPU: 2 PID: 1 at arch/powerpc/kernel/smp.c:941
>> update_mask_from_threadgroup+0x128/0x1a0
>> [ 0.007371] Modules linked in:
>> [ 0.007377] CPU: 2 PID: 1 Comm: swapper/2 Not tainted
>> 6.6.0-rc5-fix-setup-common+ #3
>> [ 0.007385] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
>> [ 0.007393] NIP: c000000022011ed8 LR: c000000022011e10 CTR:
>> 0000000000000000
>> [ 0.007411] REGS: c0000000256338f0 TRAP: 0700 Not tainted
>> (6.6.0-rc5-fix-setup-common+)
>> [ 0.007425] MSR: 8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR:
>> 44000842 XER: 0000000c
>> [ 0.007444] CFAR: c000000022011e78 IRQMASK: 0
>> [ 0.007444] GPR00: c000000022011e10 c000000025633b90 c0000000213c1300
>> 0000000000000002
>> [ 0.007444] GPR04: 0000000000000000 0000000000000005 0000000000000001
>> 0000000000000002
>> [ 0.007444] GPR08: 0000000000000008 0000000000000001 0000000000000002
>> 0000000000000004
>> [ 0.007444] GPR12: 0000000000000000 c000000022e3ac00 c000000020010138
>> 0000000000000000
>> [ 0.007444] GPR16: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007444] GPR20: 0000000000000018 c000000022150968 c000000022093580
>> c0000000253df000
>> [ 0.007444] GPR24: 0000000000000002 0000000000000000 c000000022b32058
>> 0000000000000000
>> [ 0.007444] GPR28: c00000015fca0a68 c000000022ba0330 c00000002209352c
>> 0000000000000000
>> [ 0.007520] NIP [c000000022011ed8]
>> update_mask_from_threadgroup+0x128/0x1a0
>> [ 0.007528] LR [c000000022011e10] update_mask_from_threadgroup+0x60/0x1a0
>> [ 0.007536] Call Trace:
>> [ 0.007539] [c000000025633b90] [c000000022011e10]
>> update_mask_from_threadgroup+0x60/0x1a0 (unreliable)
>> [ 0.007550] [c000000025633be0] [c000000022012210]
>> init_thread_group_cache_map+0x2c0/0x338
>> [ 0.007559] [c000000025633c50] [c0000000220125a0]
>> smp_prepare_cpus+0x318/0x510
>> [ 0.007568] [c000000025633d10] [c000000022004874]
>> kernel_init_freeable+0x198/0x3cc
>> [ 0.007578] [c000000025633de0] [c000000020010164] kernel_init+0x34/0x1b0
>> [ 0.007586] [c000000025633e50] [c00000002000cd94]
>> ret_from_kernel_user_thread+0x14/0x1c
>> [ 0.007596] --- interrupt: 0 at 0x0
>> [ 0.007601] NIP: 0000000000000000 LR: 0000000000000000 CTR:
>> 0000000000000000
>> [ 0.007608] REGS: c000000025633e80 TRAP: 0000 Not tainted
>> (6.6.0-rc5-fix-setup-common+)
>> [ 0.007632] MSR: 0000000000000000 <> CR: 00000000 XER: 00000000
>> [ 0.007651] CFAR: 0000000000000000 IRQMASK: 0
>> [ 0.007651] GPR00: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR04: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR08: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR12: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR16: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR20: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR24: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007651] GPR28: 0000000000000000 0000000000000000 0000000000000000
>> 0000000000000000
>> [ 0.007742] NIP [0000000000000000] 0x0
>> [ 0.007756] LR [0000000000000000] 0x0
>> [ 0.007769] --- interrupt: 0
>> [ 0.007779] Code: 7ca507b4 79081764 7d1e4214 8108000c 7f882000
>> 409effdc 48000010 38e70001 7ce707b4 4bffffa4 2f8affff 409e0010
>> <0fe00000> 3860ffc3 4800004c 7f9b5000
>> [ 0.007805] ---[ end trace 0000000000000000 ]---
>> [ 0.007997] RCU Tasks Rude: Setting shift to 2 and lim to 1
>> rcu_task_cb_adjust=1.
>> [ 0.008018] RCU Tasks Trace: Setting shift to 2 and lim to 1
>> rcu_task_cb_adjust=1.
>> [ 0.008043] POWER10 performance monitor hardware support registered
>> [ 0.008071] rcu: Hierarchical SRCU implementation.
>> [ 0.008078] rcu: Max phase no-delay instances is 1000.
>> [ 0.008516] smp: Bringing up secondary CPUs ...
>> [ 0.008735] smp: Brought up 1 node, 2 CPUs
>> ...
>>
>> Note: no warning observed if crashing CPU is 0, 8, 16, 24, 32, ....
>>
>> Code that generates warning:
>>
>> File: arch/powerpc/kernel/smp.c
>> Function: update_mask_from_threadgroup
>> ...
>> if (unlikely(i_group_start == -1)) {
>> WARN_ON_ONCE(1);
>> return -ENODATA;
>> }
>>
> It seems that the crash cpu passed the statements in
> init_thread_group_cache_map()
> {
> if (unlikely(cpu_group_start == -1)) {
> WARN_ON_ONCE(1);
> return -ENODATA;
> }
>
> }
>
> But raising warn in the above snippet. So it means that
> get_cpu_thread_group_start(i, tg) for the @first_thread failed in
> update_mask_from_threadgroup(). At present, I have no idea about it.
>
>
> And is this warning observed if only applying [1-2/5] ?
No warning observed with just 1-2/5 patches.
>
> According to my collected data, percpu area will cost 1792 kB per cpu.
> Forcing all eight threads in a core online will cost 107z52KB more than
> the result if applying the whole series. Maybe I can put [3-5/5] aside
> as Hari suggested, and try them later if needed.
In my experiment 7MB was allocated for Percpu for both nr_cpus=1 and
nr_cpus=8 if only 1-2/5 patches are applied.
Trimmed output of lscpu and cat /proc/meminfo
With nr_cpus=1
============
kdump:/# lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 8
On-line CPU(s) list: 0,3
Off-line CPU(s) list: 1,2,4-7
Model name: POWER10
kdump:/#
kdump:/# cat /proc/meminfo | grep Percpu
Percpu: 7168 kB
kdump:/#
with nr_cpus=8
============
kdump:/# lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 8
On-line CPU(s) list: 0,2
Off-line CPU(s) list: 1,3-7
Model name: POWER10
kdump:/#
kdump:/# cat /proc/meminfo | grep Percpu
Percpu: 7168 kB
Thanks,
Sourabh Jain
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32
2023-10-16 6:43 ` Sourabh Jain
@ 2023-10-17 2:12 ` Pingfan Liu
0 siblings, 0 replies; 19+ messages in thread
From: Pingfan Liu @ 2023-10-17 2:12 UTC (permalink / raw)
To: Sourabh Jain
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
linuxppc-dev, Wen Xiong
On Mon, Oct 16, 2023 at 12:13:53PM +0530, Sourabh Jain wrote:
> Hello Pingfan,
>
> > > > > > With this patch series applied, the kdump kernel fails to boot on
> > > > > > powerpc with nr_cpus=1.
> > > > > >
> > > > > > Console logs:
> > > > > > -------------------
> > > > > > [root]# echo c > /proc/sysrq-trigger
> > > > > > [ 74.783235] sysrq: Trigger a crash
> > > > > > [ 74.783244] Kernel panic - not syncing: sysrq triggered crash
> > > > > > [ 74.783252] CPU: 58 PID: 3838 Comm: bash Kdump: loaded Not tainted
> > > > > > 6.6.0-rc5pf-nr-cpus+ #3
> > > > > > [ 74.783259] Hardware name: POWER10 (raw) phyp pSeries
> > > > > > [ 74.783275] Call Trace:
> > > > > > [ 74.783280] [c00000020f4ebac0] [c000000000ed9f38]
> > > > > > dump_stack_lvl+0x6c/0x9c (unreliable)
> > > > > > [ 74.783291] [c00000020f4ebaf0] [c000000000150300] panic+0x178/0x438
> > > > > > [ 74.783298] [c00000020f4ebb90] [c000000000936d48]
> > > > > > sysrq_handle_crash+0x28/0x30
> > > > > > [ 74.783304] [c00000020f4ebbf0] [c00000000093773c]
> > > > > > __handle_sysrq+0x10c/0x250
> > > > > > [ 74.783309] [c00000020f4ebc90] [c000000000937fa8]
> > > > > > write_sysrq_trigger+0xc8/0x168
> > > > > > [ 74.783314] [c00000020f4ebcd0] [c000000000665d8c]
> > > > > > proc_reg_write+0x10c/0x1b0
> > > > > > [ 74.783321] [c00000020f4ebd00] [c00000000058da54]
> > > > > > vfs_write+0x104/0x4b0
> > > > > > [ 74.783326] [c00000020f4ebdc0] [c00000000058dfdc]
> > > > > > ksys_write+0x7c/0x140
> > > > > > [ 74.783331] [c00000020f4ebe10] [c000000000033a64]
> > > > > > system_call_exception+0x144/0x3a0
> > > > > > [ 74.783337] [c00000020f4ebe50] [c00000000000c554]
> > > > > > system_call_common+0xf4/0x258
> > > > > > [ 74.783343] --- interrupt: c00 at 0x7fffa0721594
> > > > > > [ 74.783352] NIP: 00007fffa0721594 LR: 00007fffa0697bf4 CTR:
> > > > > > 0000000000000000
> > > > > > [ 74.783364] REGS: c00000020f4ebe80 TRAP: 0c00 Not tainted
> > > > > > (6.6.0-rc5pf-nr-cpus+)
> > > > > > [ 74.783376] MSR: 800000000280f033
> > > > > > <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> > > > > > [ 74.783394] IRQMASK: 0
> > > > > > [ 74.783394] GPR00: 0000000000000004 00007ffffc4b6800 00007fffa0807300
> > > > > > 0000000000000001
> > > > > > [ 74.783394] GPR04: 000000013549ea60 0000000000000002 0000000000000010
> > > > > > 0000000000000000
> > > > > > [ 74.783394] GPR08: 0000000000000000 0000000000000000 0000000000000000
> > > > > > 0000000000000000
> > > > > > [ 74.783394] GPR12: 0000000000000000 00007fffa0abaf70 0000000040000000
> > > > > > 000000011a0f9798
> > > > > > [ 74.783394] GPR16: 000000011a0f9724 000000011a097688 000000011a02ff70
> > > > > > 000000011a0fd568
> > > > > > [ 74.783394] GPR20: 0000000135554bf0 0000000000000001 000000011a0aa478
> > > > > > 00007ffffc4b6a24
> > > > > > [ 74.783394] GPR24: 00007ffffc4b6a20 000000011a0faf94 0000000000000002
> > > > > > 000000013549ea60
> > > > > > [ 74.783394] GPR28: 0000000000000002 00007fffa08017a0 000000013549ea60
> > > > > > 0000000000000002
> > > > > > [ 74.783440] NIP [00007fffa0721594] 0x7fffa0721594
> > > > > > [ 74.783443] LR [00007fffa0697bf4] 0x7fffa0697bf4
> > > > > > [ 74.783447] --- interrupt: c00
> > > > > > I'm in purgatory
> > > > > > [ 0.000000] radix-mmu: Page sizes from device-tree:
> > > > > > [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> > > > > > [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> > > > > > [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> > > > > > [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> > > > > > [ 0.000000] Activating Kernel Userspace Access Prevention
> > > > > > [ 0.000000] Activating Kernel Userspace Execution Prevention
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> > > > > > with 64.0 KiB pages (exec)
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> > > > > > with 64.0 KiB pages
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> > > > > > with 2.00 MiB pages
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> > > > > > with 2.00 MiB pages (exec)
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> > > > > > with 2.00 MiB pages
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000040000000-0x0000000180000000
> > > > > > with 1.00 GiB pages
> > > > > > [ 0.000000] radix-mmu: Mapped 0x0000000180000000-0x00000001a0000000
> > > > > > with 2.00 MiB pages
> > > > > > [ 0.000000] lpar: Using radix MMU under hypervisor
> > > > > > [ 0.000000] Linux version 6.6.0-rc5pf-nr-cpus+
> > > > > > (root@ltcever7x0-lp1.aus.stglabs.ibm.com) (gcc (GCC) 8.5.0 20210514 (Red
> > > > > > Hat 8.5.0-20), GNU ld version 2.30-123.el8) #3 SMP Mon Oct 9 11:07:
> > > > > > 41 CDT 2023
> > > > > > [ 0.000000] Found initrd at 0xc000000022e60000:0xc0000000248f08d8
> > > > > > [ 0.000000] Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200
> > > > > > 0xf000006 of:IBM,FW1060.00 (NM1060_016) hv:phyp pSeries
> > > > > > [ 0.000000] printk: bootconsole [udbg0] enabled
> > > > > > [ 0.000000] the round shift between dt seq and the cpu logic number:
> > > > > > 56
> > > > > > [ 0.000000] BUG: Unable to handle kernel data access on write at
> > > > > > 0xc0000001a0000000
> > > > > > [ 0.000000] Faulting instruction address: 0xc000000022009c64
> > > > > > [ 0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
> > > > > > [ 0.000000] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
> > > > > > [ 0.000000] Modules linked in:
> > > > > > [ 0.000000] CPU: 2 PID: 0 Comm: swapper Not tainted
> > > > > > 6.6.0-rc5pf-nr-cpus+ #3
> > > > > > [ 0.000000] Hardware name: POWER10 (raw) hv:phyp pSeries
> > > > > > [ 0.000000] NIP: c000000022009c64 LR: c000000022009c54 CTR:
> > > > > > c0000000201ff348
> > > > > > [ 0.000000] REGS: c000000022aebb00 TRAP: 0300 Not tainted
> > > > > > (6.6.0-rc5pf-nr-cpus+)
> > > > > > [ 0.000000] MSR: 8000000000001033 <SF,ME,IR,DR,RI,LE> CR: 28222824
> > > > > > XER: 00000001
> > > > > > [ 0.000000] CFAR: c000000020031574 DAR: c0000001a0000000 DSISR:
> > > > > > 42000000 IRQMASK: 1
> > > > > > [ 0.000000] GPR00: c000000022009ba0 c000000022aebda0 c0000000213d1300
> > > > > > 0000000000000004
> > > > > > [ 0.000000] GPR04: 0000000000000001 c000000022aebbc0 c000000022aebbb8
> > > > > > 0000000000000000
> > > > > > [ 0.000000] GPR08: 0000000000000001 c00000019ffffff8 000000000000003a
> > > > > > c0000000229c8a78
> > > > > > [ 0.000000] GPR12: 0000000000002000 c000000022e4a800 c0000000211d34b8
> > > > > > c0000000211d3aa8
> > > > > > [ 0.000000] GPR16: c0000000211d75a0 c0000000211d75b0 c0000000225f3b98
> > > > > > 0000000000000000
> > > > > > [ 0.000000] GPR20: 0000000000000001 0000000000000001 0000000000000001
> > > > > > 0000000000000001
> > > > > > [ 0.000000] GPR24: 0000000000000008 0000000000000000 0000000000000001
> > > > > > c00000019ffffdc0
> > > > > > [ 0.000000] GPR28: 0000000000000002 c000000022b368e0 c000000022aebe08
> > > > > > 0000000000000008
> > > > > > [ 0.000000] NIP [c000000022009c64] smp_setup_cpu_maps+0x420/0x724
> > > > > > [ 0.000000] LR [c000000022009c54] smp_setup_cpu_maps+0x410/0x724
> > > > > > [ 0.000000] Call Trace:
> > > > > > [ 0.000000] [c000000022aebda0] [c000000022009ba0]
> > > > > > smp_setup_cpu_maps+0x35c/0x724 (unreliable)
> > > > > > [ 0.000000] [c000000022aebeb0] [c00000002200a19c]
> > > > > > setup_arch+0x1b8/0x54c
> > > > > > [ 0.000000] [c000000022aebf30] [c000000022003f88]
> > > > > > start_kernel+0xb0/0x768
> > > > > > [ 0.000000] [c000000022aebfe0] [c00000002000d888]
> > > > > > start_here_common+0x1c/0x20
> > > > > > [ 0.000000] Code: 3929ffff 7f89e040 409c002c 7ec4b378 7f83e378
> > > > > > 4a027939 7f83e378 4a0278e5 e95b0018 3d22017d e929f028 7d4ac42c
> > > > > > <7d49c12e> eb7b0000 7e99a378 4bffff3c
> > > > > The faulting instruction address, 0xc000000022009c6, corresponds to the code
> > > > > below:
> > > > >
> > > > > File:
> > > > > arch/powerpc/kernel/setup-common.c
> > > > >
> > > > > Function
> > > > > void __init smp_setup_cpu_maps(void)
> > > > > {
> > > > > ...
> > > > > cpu_to_phys_id[bt_thread] =
> > > > > be32_to_cpu(intserv_node->intserv[bt_thread]);
> > > > > ...
> > > > > }
> > > > >
> > > > > Hope it helps.
> > > > >
> > > > Appreciate your help.
> > > >
> > > > This issue should be linked with the capability of cpu_to_phys_id[].
> > > >
> > > > Could you please to try the fix suggested at the end of the email?
> > > > It should be a fix for
> > > > [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
> > > >
> > > >
> > > > Thanks,
> > > >
> > > > Pingfan
> > > >
> > > > ---
> > > >
> > > > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > > > index bd7853a4bc91..849adc7a4b47 100644
> > > > --- a/arch/powerpc/kernel/setup-common.c
> > > > +++ b/arch/powerpc/kernel/setup-common.c
> > > > @@ -464,12 +464,6 @@ void __init smp_setup_cpu_maps(void)
> > > > DBG("smp_setup_cpu_maps()\n");
> > > >
> > > > INIT_LIST_HEAD(&head);
> > > > - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
> > > > - __alignof__(u32));
> > > > - if (!cpu_to_phys_id)
> > > > - panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> > > > - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
> > > > -
> > > > for_each_node_by_type(dn, "cpu") {
> > > > const __be32 *intserv;
> > > > __be32 cpu_be;
> > > > @@ -533,6 +527,16 @@ void __init smp_setup_cpu_maps(void)
> > > > }
> > > >
> > > > }
> > > > +
> > > > + /* There may be hole between cpu0 and boot cpu */
> > > > + j = (bt_thread + 1) > nr_cpu_ids ? (bt_thread + 1) : nr_cpu_ids;
> > > > + cpu_to_phys_id = memblock_alloc(j * sizeof(u32),
> > > > + __alignof__(u32));
> > > > + if (!cpu_to_phys_id)
> > > > + panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> > > > + __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
> > > > +
> > > > +
> > > > cpu = 0;
> > > > list_del_init(&head);
> > > > /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
> > > With the above changes applied, kdump kernel boots fine with a WARNING:
> > >
> > > [root]# echo c > /proc/sysrq-trigger
> > > [ 310.748248] sysrq: Trigger a crash
> > > [ 310.748256] Kernel panic - not syncing: sysrq triggered crash
> > > [ 310.748266] CPU: 26 PID: 2610 Comm: bash Kdump: loaded Not tainted
> > > 6.6.0-rc5-fix-setup-common+ #3
> > > [ 310.748273] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
> > > [ 310.748280] Call Trace:
> > > [ 310.748284] [c000000184717ac0] [c000000000ecf8d8]
> > > dump_stack_lvl+0x6c/0x9c (unreliable)
> > > [ 310.748298] [c000000184717af0] [c000000000150310] panic+0x178/0x438
> > > [ 310.748307] [c000000184717b90] [c00000000092c8b8]
> > > sysrq_handle_crash+0x28/0x30
> > > [ 310.748316] [c000000184717bf0] [c00000000092d2ac]
> > > __handle_sysrq+0x10c/0x250
> > > [ 310.748330] [c000000184717c90] [c00000000092db18]
> > > write_sysrq_trigger+0xc8/0x168
> > > [ 310.748339] [c000000184717cd0] [c00000000065c21c]
> > > proc_reg_write+0x10c/0x1b0
> > > [ 310.748349] [c000000184717d00] [c000000000583f94] vfs_write+0x104/0x4b0
> > > [ 310.748356] [c000000184717dc0] [c00000000058451c] ksys_write+0x7c/0x140
> > > [ 310.748365] [c000000184717e10] [c000000000033a54]
> > > system_call_exception+0x144/0x3a0
> > > [ 310.748377] [c000000184717e50] [c00000000000c554]
> > > system_call_common+0xf4/0x258
> > > [ 310.748389] --- interrupt: c00 at 0x7fff97720c34
> > > [ 310.748395] NIP: 00007fff97720c34 LR: 00007fff97697c74 CTR:
> > > 0000000000000000
> > > [ 310.748404] REGS: c000000184717e80 TRAP: 0c00 Not tainted
> > > (6.6.0-rc5-fix-setup-common+)
> > > [ 310.748413] MSR: 800000000280f033
> > > <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28222202 XER: 00000000
> > > [ 310.748430] IRQMASK: 0
> > > [ 310.748430] GPR00: 0000000000000004 00007fffffabc510 00007fff97807300
> > > 0000000000000001
> > > [ 310.748430] GPR04: 00000001624f7910 0000000000000002 0000000000000010
> > > 00007fff97669724
> > > [ 310.748430] GPR08: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 310.748430] GPR12: 0000000000000000 00007fff97a5aee0 0000000040000000
> > > 0000000125c39798
> > > [ 310.748430] GPR16: 0000000125c39724 0000000125bd8128 0000000125b70370
> > > 0000000125c3d568
> > > [ 310.748430] GPR20: 0000000162551030 0000000000000001 0000000125beaf18
> > > 00007fffffabc734
> > > [ 310.748430] GPR24: 00007fffffabc730 0000000125c3af94 0000000000000002
> > > 00000001624f7910
> > > [ 310.748430] GPR28: 0000000000000002 00007fff97801798 00000001624f7910
> > > 0000000000000002
> > > [ 310.748475] NIP [00007fff97720c34] 0x7fff97720c34
> > > [ 310.748478] LR [00007fff97697c74] 0x7fff97697c74
> > > [ 310.748482] --- interrupt: c00
> > > I'm in purgatory
> > > [ 0.000000] radix-mmu: Page sizes from device-tree:
> > > [ 0.000000] radix-mmu: Page size shift = 12 AP=0x0
> > > [ 0.000000] radix-mmu: Page size shift = 16 AP=0x5
> > > [ 0.000000] radix-mmu: Page size shift = 21 AP=0x1
> > > [ 0.000000] radix-mmu: Page size shift = 30 AP=0x2
> > > [ 0.000000] Activating Kernel Userspace Access Prevention
> > > [ 0.000000] Activating Kernel Userspace Execution Prevention
> > > [ 0.000000] radix-mmu: Mapped 0x0000000000000000-0x0000000000010000
> > > with 64.0 KiB pages (exec)
> > > [ 0.000000] radix-mmu: Mapped 0x0000000000010000-0x0000000000200000
> > > with 64.0 KiB pages
> > > [ 0.000000] radix-mmu: Mapped 0x0000000000200000-0x0000000020000000
> > > with 2.00 MiB pages
> > > [ 0.000000] radix-mmu: Mapped 0x0000000020000000-0x0000000022600000
> > > with 2.00 MiB pages (exec)
> > > [ 0.000000] radix-mmu: Mapped 0x0000000022600000-0x0000000040000000
> > > with 2.00 MiB pages
> > >
> > > Trimmed logs ....
> > >
> > > [ 0.001738] Mount-cache hash table entries: 16384 (order: 1, 131072
> > > bytes, linear)
> > > [ 0.001751] Mountpoint-cache hash table entries: 16384 (order: 1,
> > > 131072 bytes, linear)
> > > [ 0.007339] ------------[ cut here ]------------
> > > [ 0.007356] WARNING: CPU: 2 PID: 1 at arch/powerpc/kernel/smp.c:941
> > > update_mask_from_threadgroup+0x128/0x1a0
> > > [ 0.007371] Modules linked in:
> > > [ 0.007377] CPU: 2 PID: 1 Comm: swapper/2 Not tainted
> > > 6.6.0-rc5-fix-setup-common+ #3
> > > [ 0.007385] Hardware name: IBM,9043-MRX POWER10 hv:phyp pSeries
> > > [ 0.007393] NIP: c000000022011ed8 LR: c000000022011e10 CTR:
> > > 0000000000000000
> > > [ 0.007411] REGS: c0000000256338f0 TRAP: 0700 Not tainted
> > > (6.6.0-rc5-fix-setup-common+)
> > > [ 0.007425] MSR: 8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR:
> > > 44000842 XER: 0000000c
> > > [ 0.007444] CFAR: c000000022011e78 IRQMASK: 0
> > > [ 0.007444] GPR00: c000000022011e10 c000000025633b90 c0000000213c1300
> > > 0000000000000002
> > > [ 0.007444] GPR04: 0000000000000000 0000000000000005 0000000000000001
> > > 0000000000000002
> > > [ 0.007444] GPR08: 0000000000000008 0000000000000001 0000000000000002
> > > 0000000000000004
> > > [ 0.007444] GPR12: 0000000000000000 c000000022e3ac00 c000000020010138
> > > 0000000000000000
> > > [ 0.007444] GPR16: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007444] GPR20: 0000000000000018 c000000022150968 c000000022093580
> > > c0000000253df000
> > > [ 0.007444] GPR24: 0000000000000002 0000000000000000 c000000022b32058
> > > 0000000000000000
> > > [ 0.007444] GPR28: c00000015fca0a68 c000000022ba0330 c00000002209352c
> > > 0000000000000000
> > > [ 0.007520] NIP [c000000022011ed8]
> > > update_mask_from_threadgroup+0x128/0x1a0
> > > [ 0.007528] LR [c000000022011e10] update_mask_from_threadgroup+0x60/0x1a0
> > > [ 0.007536] Call Trace:
> > > [ 0.007539] [c000000025633b90] [c000000022011e10]
> > > update_mask_from_threadgroup+0x60/0x1a0 (unreliable)
> > > [ 0.007550] [c000000025633be0] [c000000022012210]
> > > init_thread_group_cache_map+0x2c0/0x338
> > > [ 0.007559] [c000000025633c50] [c0000000220125a0]
> > > smp_prepare_cpus+0x318/0x510
> > > [ 0.007568] [c000000025633d10] [c000000022004874]
> > > kernel_init_freeable+0x198/0x3cc
> > > [ 0.007578] [c000000025633de0] [c000000020010164] kernel_init+0x34/0x1b0
> > > [ 0.007586] [c000000025633e50] [c00000002000cd94]
> > > ret_from_kernel_user_thread+0x14/0x1c
> > > [ 0.007596] --- interrupt: 0 at 0x0
> > > [ 0.007601] NIP: 0000000000000000 LR: 0000000000000000 CTR:
> > > 0000000000000000
> > > [ 0.007608] REGS: c000000025633e80 TRAP: 0000 Not tainted
> > > (6.6.0-rc5-fix-setup-common+)
> > > [ 0.007632] MSR: 0000000000000000 <> CR: 00000000 XER: 00000000
> > > [ 0.007651] CFAR: 0000000000000000 IRQMASK: 0
> > > [ 0.007651] GPR00: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR04: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR08: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR12: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR16: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR20: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR24: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007651] GPR28: 0000000000000000 0000000000000000 0000000000000000
> > > 0000000000000000
> > > [ 0.007742] NIP [0000000000000000] 0x0
> > > [ 0.007756] LR [0000000000000000] 0x0
> > > [ 0.007769] --- interrupt: 0
> > > [ 0.007779] Code: 7ca507b4 79081764 7d1e4214 8108000c 7f882000
> > > 409effdc 48000010 38e70001 7ce707b4 4bffffa4 2f8affff 409e0010
> > > <0fe00000> 3860ffc3 4800004c 7f9b5000
> > > [ 0.007805] ---[ end trace 0000000000000000 ]---
> > > [ 0.007997] RCU Tasks Rude: Setting shift to 2 and lim to 1
> > > rcu_task_cb_adjust=1.
> > > [ 0.008018] RCU Tasks Trace: Setting shift to 2 and lim to 1
> > > rcu_task_cb_adjust=1.
> > > [ 0.008043] POWER10 performance monitor hardware support registered
> > > [ 0.008071] rcu: Hierarchical SRCU implementation.
> > > [ 0.008078] rcu: Max phase no-delay instances is 1000.
> > > [ 0.008516] smp: Bringing up secondary CPUs ...
> > > [ 0.008735] smp: Brought up 1 node, 2 CPUs
> > > ...
> > >
> > > Note: no warning observed if crashing CPU is 0, 8, 16, 24, 32, ....
> > >
> > > Code that generates warning:
> > >
> > > File: arch/powerpc/kernel/smp.c
> > > Function: update_mask_from_threadgroup
> > > ...
> > > if (unlikely(i_group_start == -1)) {
> > > WARN_ON_ONCE(1);
> > > return -ENODATA;
> > > }
> > >
> > It seems that the crash cpu passed the statements in
> > init_thread_group_cache_map()
> > {
> > if (unlikely(cpu_group_start == -1)) {
> > WARN_ON_ONCE(1);
> > return -ENODATA;
> > }
> >
> > }
> >
> > But raising warn in the above snippet. So it means that
> > get_cpu_thread_group_start(i, tg) for the @first_thread failed in
> > update_mask_from_threadgroup(). At present, I have no idea about it.
> >
> >
> > And is this warning observed if only applying [1-2/5] ?
>
> No warning observed with just 1-2/5 patches.
>
Good to know it. I think that [1-2/5] can be a first step.
I will post V9, which trims [3-5/5] later.
> >
> > According to my collected data, percpu area will cost 1792 kB per cpu.
> > Forcing all eight threads in a core online will cost 107z52KB more than
> > the result if applying the whole series. Maybe I can put [3-5/5] aside
> > as Hari suggested, and try them later if needed.
> In my experiment 7MB was allocated for Percpu for both nr_cpus=1 and
> nr_cpus=8 if only 1-2/5 patches are applied.
>
> Trimmed output of lscpu and cat /proc/meminfo
>
> With nr_cpus=1
> ============
>
> kdump:/# lscpu
> Architecture: ppc64le
> Byte Order: Little Endian
> CPU(s): 8
> On-line CPU(s) list: 0,3
> Off-line CPU(s) list: 1,2,4-7
The nr_cpus has the semantic of possible cpu, instead of online cpu.
It includes both On-line and Off-line CPU(s) list.
So Percpu area allocates memory for eight cpus.
> Model name: POWER10
> kdump:/#
> kdump:/# cat /proc/meminfo | grep Percpu
> Percpu: 7168 kB
> kdump:/#
>
>
> with nr_cpus=8
> ============
>
> kdump:/# lscpu
> Architecture: ppc64le
> Byte Order: Little Endian
> CPU(s): 8
> On-line CPU(s) list: 0,2
> Off-line CPU(s) list: 1,3-7
> Model name: POWER10
>
> kdump:/#
> kdump:/# cat /proc/meminfo | grep Percpu
> Percpu: 7168 kB
>
Here Percpu area also allocates memory for eight cpus, hence the size is
identical to the former one.
Again, thank you for your help and precious time.
Regards,
Pingfan
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt
2023-10-09 11:30 [PATCHv8 0/5] enable nr_cpus for powerpc Pingfan Liu
2023-10-09 11:30 ` [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32 Pingfan Liu
@ 2023-10-09 11:30 ` Pingfan Liu
2023-10-10 10:37 ` Hari Bathini
2023-10-09 11:30 ` [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus Pingfan Liu
` (2 subsequent siblings)
4 siblings, 1 reply; 19+ messages in thread
From: Pingfan Liu @ 2023-10-09 11:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Nicholas Piggin, Wen Xiong
*** Idea ***
For kexec -p, the boot cpu can be not the cpu0, this causes the problem
of allocating memory for paca_ptrs[]. However, in theory, there is no
requirement to assign cpu's logical id as its present sequence in the
device tree. But there is something like cpu_first_thread_sibling(),
which makes assumption on the mapping inside a core. Hence partially
loosening the mapping, i.e. unbind the mapping of core while keep the
mapping inside a core.
*** Implement ***
At this early stage, there are plenty of memory to utilize. Hence, this
patch allocates interim memory to link the cpu info on a list, then
reorder cpus by changing the list head. As a result, there is a rotate
shift between the sequence number in dt and the cpu logical number.
*** Result ***
After this patch, a boot-cpu's logical id will always be mapped into the
range [0,threads_per_core).
Besides this, at this phase, all threads in the boot core are forced to
be onlined. This restriction will be lifted in a later patch with
extra effort.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@us.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/prom.c | 25 +++++----
arch/powerpc/kernel/setup-common.c | 87 +++++++++++++++++++++++-------
2 files changed, 85 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ec82f5bda908..87272a2d8c10 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -76,7 +76,9 @@ u64 ppc64_rma_size;
unsigned int boot_cpu_node_count __ro_after_init;
#endif
static phys_addr_t first_memblock_size;
+#ifdef CONFIG_SMP
static int __initdata boot_cpu_count;
+#endif
static int __init early_parse_mem(char *p)
{
@@ -331,8 +333,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
const __be32 *intserv;
int i, nthreads;
int len;
- int found = -1;
- int found_thread = 0;
+ bool found = false;
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -355,8 +356,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
for (i = 0; i < nthreads; i++) {
if (be32_to_cpu(intserv[i]) ==
fdt_boot_cpuid_phys(initial_boot_params)) {
- found = boot_cpu_count;
- found_thread = i;
+ /*
+ * always map the boot-cpu logical id into the
+ * range of [0, thread_per_core)
+ */
+ boot_cpuid = i;
+ found = true;
+ /* This works around the hole in paca_ptrs[]. */
+ if (nr_cpu_ids < nthreads)
+ set_nr_cpu_ids(nthreads);
}
#ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
@@ -365,14 +373,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
}
/* Not the boot CPU */
- if (found < 0)
+ if (!found)
return 0;
- DBG("boot cpu: logical %d physical %d\n", found,
- be32_to_cpu(intserv[found_thread]));
- boot_cpuid = found;
+ DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
+ be32_to_cpu(intserv[boot_cpuid]));
- boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+ boot_cpu_hwid = be32_to_cpu(intserv[boot_cpuid]);
/*
* PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1b19a9815672..81291e13dec0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -36,6 +36,7 @@
#include <linux/of_platform.h>
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
+#include <linux/list.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/processor.h>
@@ -425,6 +426,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
u32 *cpu_to_phys_id = NULL;
+struct interrupt_server_node {
+ struct list_head node;
+ bool avail;
+ int len;
+ __be32 *intserv;
+};
+
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
@@ -446,11 +454,16 @@ u32 *cpu_to_phys_id = NULL;
void __init smp_setup_cpu_maps(void)
{
struct device_node *dn;
- int cpu = 0;
- int nthreads = 1;
+ int shift = 0, cpu = 0;
+ int j, nthreads = 1;
+ int len;
+ struct interrupt_server_node *intserv_node, *n;
+ struct list_head *bt_node, head;
+ bool avail, found_boot_cpu = false;
DBG("smp_setup_cpu_maps()\n");
+ INIT_LIST_HEAD(&head);
cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
__alignof__(u32));
if (!cpu_to_phys_id)
@@ -460,7 +473,6 @@ void __init smp_setup_cpu_maps(void)
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
- int j, len;
DBG(" * %pOF...\n", dn);
@@ -480,29 +492,68 @@ void __init smp_setup_cpu_maps(void)
}
}
- nthreads = len / sizeof(int);
+ avail = of_device_is_available(dn);
+ if (!avail)
+ avail = !of_property_match_string(dn,
+ "enable-method", "spin-table");
- for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
- bool avail;
- DBG(" thread %d -> cpu %d (hard id %d)\n",
- j, cpu, be32_to_cpu(intserv[j]));
-
- avail = of_device_is_available(dn);
- if (!avail)
- avail = !of_property_match_string(dn,
- "enable-method", "spin-table");
+ intserv_node = memblock_alloc(sizeof(struct interrupt_server_node) + len,
+ __alignof__(u32));
+ if (!intserv_node)
+ panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
+ __func__,
+ sizeof(struct interrupt_server_node) + len,
+ __alignof__(u32));
+ intserv_node->intserv = (__be32 *)((char *)intserv_node +
+ sizeof(struct interrupt_server_node));
+ intserv_node->len = len;
+ memcpy(intserv_node->intserv, intserv, len);
+ intserv_node->avail = avail;
+ INIT_LIST_HEAD(&intserv_node->node);
+ list_add_tail(&intserv_node->node, &head);
+
+ if (!found_boot_cpu) {
+ nthreads = len / sizeof(int);
+ for (j = 0 ; j < nthreads; j++) {
+ if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
+ bt_node = &intserv_node->node;
+ found_boot_cpu = true;
+ /*
+ * Record the round-shift between dt
+ * seq and cpu logical number
+ */
+ shift = cpu - j;
+ break;
+ }
+
+ cpu++;
+ }
+ }
+ }
+ cpu = 0;
+ list_del_init(&head);
+ /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
+ list_add_tail(&head, bt_node);
+ pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
+ list_for_each_entry(intserv_node, &head, node) {
+
+ avail = intserv_node->avail;
+ nthreads = intserv_node->len / sizeof(int);
+ for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
set_cpu_present(cpu, avail);
set_cpu_possible(cpu, true);
- cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
+ cpu_to_phys_id[cpu] = be32_to_cpu(intserv_node->intserv[j]);
+ DBG(" thread %d -> cpu %d (hard id %d)\n",
+ j, cpu, be32_to_cpu(intserv_node->intserv[j]));
cpu++;
}
+ }
- if (cpu >= nr_cpu_ids) {
- of_node_put(dn);
- break;
- }
+ list_for_each_entry_safe(intserv_node, n, &head, node) {
+ len = sizeof(struct interrupt_server_node) + intserv_node->len;
+ memblock_free(intserv_node, len);
}
/* If no SMT supported, nthreads is forced to 1 */
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt
2023-10-09 11:30 ` [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt Pingfan Liu
@ 2023-10-10 10:37 ` Hari Bathini
2023-10-11 3:11 ` Pingfan Liu
0 siblings, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2023-10-10 10:37 UTC (permalink / raw)
To: Pingfan Liu, linuxppc-dev
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong
On 09/10/23 5:00 pm, Pingfan Liu wrote:
> *** Idea ***
> For kexec -p, the boot cpu can be not the cpu0, this causes the problem
> of allocating memory for paca_ptrs[]. However, in theory, there is no
> requirement to assign cpu's logical id as its present sequence in the
> device tree. But there is something like cpu_first_thread_sibling(),
> which makes assumption on the mapping inside a core. Hence partially
> loosening the mapping, i.e. unbind the mapping of core while keep the
> mapping inside a core.
>
> *** Implement ***
> At this early stage, there are plenty of memory to utilize. Hence, this
> patch allocates interim memory to link the cpu info on a list, then
> reorder cpus by changing the list head. As a result, there is a rotate
> shift between the sequence number in dt and the cpu logical number.
>
> *** Result ***
> After this patch, a boot-cpu's logical id will always be mapped into the
> range [0,threads_per_core).
>
> Besides this, at this phase, all threads in the boot core are forced to
> be onlined. This restriction will be lifted in a later patch with
> extra effort.
>
> Signed-off-by: Pingfan Liu <piliu@redhat.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Nicholas Piggin <npiggin@gmail.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Cc: Wen Xiong <wenxiong@us.ibm.com>
> Cc: Baoquan He <bhe@redhat.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: kexec@lists.infradead.org
> To: linuxppc-dev@lists.ozlabs.org
> ---
> arch/powerpc/kernel/prom.c | 25 +++++----
> arch/powerpc/kernel/setup-common.c | 87 +++++++++++++++++++++++-------
> 2 files changed, 85 insertions(+), 27 deletions(-)
>
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index ec82f5bda908..87272a2d8c10 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -76,7 +76,9 @@ u64 ppc64_rma_size;
> unsigned int boot_cpu_node_count __ro_after_init;
> #endif
> static phys_addr_t first_memblock_size;
> +#ifdef CONFIG_SMP
> static int __initdata boot_cpu_count;
> +#endif
>
> static int __init early_parse_mem(char *p)
> {
> @@ -331,8 +333,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> const __be32 *intserv;
> int i, nthreads;
> int len;
> - int found = -1;
> - int found_thread = 0;
> + bool found = false;
>
> /* We are scanning "cpu" nodes only */
> if (type == NULL || strcmp(type, "cpu") != 0)
> @@ -355,8 +356,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> for (i = 0; i < nthreads; i++) {
> if (be32_to_cpu(intserv[i]) ==
> fdt_boot_cpuid_phys(initial_boot_params)) {
> - found = boot_cpu_count;
> - found_thread = i;
> + /*
> + * always map the boot-cpu logical id into the
> + * range of [0, thread_per_core)
> + */
> + boot_cpuid = i;
> + found = true;
> + /* This works around the hole in paca_ptrs[]. */
> + if (nr_cpu_ids < nthreads)
> + set_nr_cpu_ids(nthreads);
> }
> #ifdef CONFIG_SMP
> /* logical cpu id is always 0 on UP kernels */
> @@ -365,14 +373,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> }
>
> /* Not the boot CPU */
> - if (found < 0)
> + if (!found)
> return 0;
>
> - DBG("boot cpu: logical %d physical %d\n", found,
> - be32_to_cpu(intserv[found_thread]));
> - boot_cpuid = found;
> + DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
> + be32_to_cpu(intserv[boot_cpuid]));
>
> - boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
> + boot_cpu_hwid = be32_to_cpu(intserv[boot_cpuid]);
>
> /*
> * PAPR defines "logical" PVR values for cpus that
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 1b19a9815672..81291e13dec0 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -36,6 +36,7 @@
> #include <linux/of_platform.h>
> #include <linux/hugetlb.h>
> #include <linux/pgtable.h>
> +#include <linux/list.h>
> #include <asm/io.h>
> #include <asm/paca.h>
> #include <asm/processor.h>
> @@ -425,6 +426,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
>
> u32 *cpu_to_phys_id = NULL;
>
> +struct interrupt_server_node {
> + struct list_head node;
> + bool avail;
> + int len;
> + __be32 *intserv;
> +};
> +
> /**
> * setup_cpu_maps - initialize the following cpu maps:
> * cpu_possible_mask
> @@ -446,11 +454,16 @@ u32 *cpu_to_phys_id = NULL;
> void __init smp_setup_cpu_maps(void)
> {
> struct device_node *dn;
> - int cpu = 0;
> - int nthreads = 1;
> + int shift = 0, cpu = 0;
> + int j, nthreads = 1;
> + int len;
> + struct interrupt_server_node *intserv_node, *n;
> + struct list_head *bt_node, head;
> + bool avail, found_boot_cpu = false;
>
> DBG("smp_setup_cpu_maps()\n");
>
> + INIT_LIST_HEAD(&head);
> cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
> __alignof__(u32));
> if (!cpu_to_phys_id)
> @@ -460,7 +473,6 @@ void __init smp_setup_cpu_maps(void)
> for_each_node_by_type(dn, "cpu") {
> const __be32 *intserv;
> __be32 cpu_be;
> - int j, len;
>
> DBG(" * %pOF...\n", dn);
>
> @@ -480,29 +492,68 @@ void __init smp_setup_cpu_maps(void)
> }
> }
>
> - nthreads = len / sizeof(int);
> + avail = of_device_is_available(dn);
> + if (!avail)
> + avail = !of_property_match_string(dn,
> + "enable-method", "spin-table");
>
> - for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
> - bool avail;
>
> - DBG(" thread %d -> cpu %d (hard id %d)\n",
> - j, cpu, be32_to_cpu(intserv[j]));
> -
> - avail = of_device_is_available(dn);
> - if (!avail)
> - avail = !of_property_match_string(dn,
> - "enable-method", "spin-table");
> + intserv_node = memblock_alloc(sizeof(struct interrupt_server_node) + len,
> + __alignof__(u32));
> + if (!intserv_node)
> + panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> + __func__,
> + sizeof(struct interrupt_server_node) + len,
> + __alignof__(u32));
> + intserv_node->intserv = (__be32 *)((char *)intserv_node +
> + sizeof(struct interrupt_server_node));
This initialization won't be needed if the member was:
__be32 intserv[];
> + intserv_node->len = len;
> + memcpy(intserv_node->intserv, intserv, len);
> + intserv_node->avail = avail;
> + INIT_LIST_HEAD(&intserv_node->node);
> + list_add_tail(&intserv_node->node, &head);
INIT_LIST_HEAD(&intserv_node->node); is not needed as list_add_tail()
initializes both next & prev anyway.
Thanks
Hari
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt
2023-10-10 10:37 ` Hari Bathini
@ 2023-10-11 3:11 ` Pingfan Liu
0 siblings, 0 replies; 19+ messages in thread
From: Pingfan Liu @ 2023-10-11 3:11 UTC (permalink / raw)
To: Hari Bathini
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
linuxppc-dev, Wen Xiong
On Tue, Oct 10, 2023 at 04:07:00PM +0530, Hari Bathini wrote:
>
>
> On 09/10/23 5:00 pm, Pingfan Liu wrote:
> > *** Idea ***
> > For kexec -p, the boot cpu can be not the cpu0, this causes the problem
> > of allocating memory for paca_ptrs[]. However, in theory, there is no
> > requirement to assign cpu's logical id as its present sequence in the
> > device tree. But there is something like cpu_first_thread_sibling(),
> > which makes assumption on the mapping inside a core. Hence partially
> > loosening the mapping, i.e. unbind the mapping of core while keep the
> > mapping inside a core.
> >
> > *** Implement ***
> > At this early stage, there are plenty of memory to utilize. Hence, this
> > patch allocates interim memory to link the cpu info on a list, then
> > reorder cpus by changing the list head. As a result, there is a rotate
> > shift between the sequence number in dt and the cpu logical number.
> >
> > *** Result ***
> > After this patch, a boot-cpu's logical id will always be mapped into the
> > range [0,threads_per_core).
> >
> > Besides this, at this phase, all threads in the boot core are forced to
> > be onlined. This restriction will be lifted in a later patch with
> > extra effort.
> >
> > Signed-off-by: Pingfan Liu <piliu@redhat.com>
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Nicholas Piggin <npiggin@gmail.com>
> > Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> > Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> > Cc: Wen Xiong <wenxiong@us.ibm.com>
> > Cc: Baoquan He <bhe@redhat.com>
> > Cc: Ming Lei <ming.lei@redhat.com>
> > Cc: kexec@lists.infradead.org
> > To: linuxppc-dev@lists.ozlabs.org
> > ---
> > arch/powerpc/kernel/prom.c | 25 +++++----
> > arch/powerpc/kernel/setup-common.c | 87 +++++++++++++++++++++++-------
> > 2 files changed, 85 insertions(+), 27 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> > index ec82f5bda908..87272a2d8c10 100644
> > --- a/arch/powerpc/kernel/prom.c
> > +++ b/arch/powerpc/kernel/prom.c
> > @@ -76,7 +76,9 @@ u64 ppc64_rma_size;
> > unsigned int boot_cpu_node_count __ro_after_init;
> > #endif
> > static phys_addr_t first_memblock_size;
> > +#ifdef CONFIG_SMP
> > static int __initdata boot_cpu_count;
> > +#endif
> > static int __init early_parse_mem(char *p)
> > {
> > @@ -331,8 +333,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> > const __be32 *intserv;
> > int i, nthreads;
> > int len;
> > - int found = -1;
> > - int found_thread = 0;
> > + bool found = false;
> > /* We are scanning "cpu" nodes only */
> > if (type == NULL || strcmp(type, "cpu") != 0)
> > @@ -355,8 +356,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> > for (i = 0; i < nthreads; i++) {
> > if (be32_to_cpu(intserv[i]) ==
> > fdt_boot_cpuid_phys(initial_boot_params)) {
> > - found = boot_cpu_count;
> > - found_thread = i;
> > + /*
> > + * always map the boot-cpu logical id into the
> > + * range of [0, thread_per_core)
> > + */
> > + boot_cpuid = i;
> > + found = true;
> > + /* This works around the hole in paca_ptrs[]. */
> > + if (nr_cpu_ids < nthreads)
> > + set_nr_cpu_ids(nthreads);
> > }
> > #ifdef CONFIG_SMP
> > /* logical cpu id is always 0 on UP kernels */
> > @@ -365,14 +373,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
> > }
> > /* Not the boot CPU */
> > - if (found < 0)
> > + if (!found)
> > return 0;
> > - DBG("boot cpu: logical %d physical %d\n", found,
> > - be32_to_cpu(intserv[found_thread]));
> > - boot_cpuid = found;
> > + DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
> > + be32_to_cpu(intserv[boot_cpuid]));
> > - boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
> > + boot_cpu_hwid = be32_to_cpu(intserv[boot_cpuid]);
> > /*
> > * PAPR defines "logical" PVR values for cpus that
> > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > index 1b19a9815672..81291e13dec0 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -36,6 +36,7 @@
> > #include <linux/of_platform.h>
> > #include <linux/hugetlb.h>
> > #include <linux/pgtable.h>
> > +#include <linux/list.h>
> > #include <asm/io.h>
> > #include <asm/paca.h>
> > #include <asm/processor.h>
> > @@ -425,6 +426,13 @@ static void __init cpu_init_thread_core_maps(int tpc)
> > u32 *cpu_to_phys_id = NULL;
> > +struct interrupt_server_node {
> > + struct list_head node;
> > + bool avail;
> > + int len;
> > + __be32 *intserv;
> > +};
> > +
> > /**
> > * setup_cpu_maps - initialize the following cpu maps:
> > * cpu_possible_mask
> > @@ -446,11 +454,16 @@ u32 *cpu_to_phys_id = NULL;
> > void __init smp_setup_cpu_maps(void)
> > {
> > struct device_node *dn;
> > - int cpu = 0;
> > - int nthreads = 1;
> > + int shift = 0, cpu = 0;
> > + int j, nthreads = 1;
> > + int len;
> > + struct interrupt_server_node *intserv_node, *n;
> > + struct list_head *bt_node, head;
> > + bool avail, found_boot_cpu = false;
> > DBG("smp_setup_cpu_maps()\n");
> > + INIT_LIST_HEAD(&head);
> > cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
> > __alignof__(u32));
> > if (!cpu_to_phys_id)
> > @@ -460,7 +473,6 @@ void __init smp_setup_cpu_maps(void)
> > for_each_node_by_type(dn, "cpu") {
> > const __be32 *intserv;
> > __be32 cpu_be;
> > - int j, len;
> > DBG(" * %pOF...\n", dn);
> > @@ -480,29 +492,68 @@ void __init smp_setup_cpu_maps(void)
> > }
> > }
> > - nthreads = len / sizeof(int);
> > + avail = of_device_is_available(dn);
> > + if (!avail)
> > + avail = !of_property_match_string(dn,
> > + "enable-method", "spin-table");
> > - for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
> > - bool avail;
> > - DBG(" thread %d -> cpu %d (hard id %d)\n",
> > - j, cpu, be32_to_cpu(intserv[j]));
> > -
> > - avail = of_device_is_available(dn);
> > - if (!avail)
> > - avail = !of_property_match_string(dn,
> > - "enable-method", "spin-table");
> > + intserv_node = memblock_alloc(sizeof(struct interrupt_server_node) + len,
> > + __alignof__(u32));
> > + if (!intserv_node)
> > + panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
> > + __func__,
> > + sizeof(struct interrupt_server_node) + len,
> > + __alignof__(u32));
>
> > + intserv_node->intserv = (__be32 *)((char *)intserv_node +
> > + sizeof(struct interrupt_server_node));
>
> This initialization won't be needed if the member was:
>
> __be32 intserv[];
>
Thank you, I will use your suggestion.
> > + intserv_node->len = len;
> > + memcpy(intserv_node->intserv, intserv, len);
> > + intserv_node->avail = avail;
>
> > + INIT_LIST_HEAD(&intserv_node->node);
> > + list_add_tail(&intserv_node->node, &head);
>
> INIT_LIST_HEAD(&intserv_node->node); is not needed as list_add_tail()
> initializes both next & prev anyway.
>
You are right. I will remove the unnecessary initialization in the next
version.
Thanks,
Pingfan
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
2023-10-09 11:30 [PATCHv8 0/5] enable nr_cpus for powerpc Pingfan Liu
2023-10-09 11:30 ` [PATCHv8 1/5] powerpc/setup : Enable boot_cpu_hwid for PPC32 Pingfan Liu
2023-10-09 11:30 ` [PATCHv8 2/5] powerpc/setup: Loosen the mapping between cpu logical id and its seq in dt Pingfan Liu
@ 2023-10-09 11:30 ` Pingfan Liu
2023-10-10 8:26 ` Hari Bathini
2023-10-09 11:30 ` [PATCHv8 4/5] powerpc/cpu: Skip impossible cpu during iteration on a core Pingfan Liu
2023-10-09 11:30 ` [PATCHv8 5/5] powerpc/setup: alloc extra paca_ptrs to hold boot_cpuid Pingfan Liu
4 siblings, 1 reply; 19+ messages in thread
From: Pingfan Liu @ 2023-10-09 11:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Wen Xiong, Nicholas Piggin
If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
ensure the boot_cpu is in cpu_present_mask. This can be achieved by
reserving the last quota for the boot cpu.
Note: the restriction on nr_cpus will be lifted with more effort in the
successive patches
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 81291e13dec0..f9ef0a2666b0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -454,8 +454,8 @@ struct interrupt_server_node {
void __init smp_setup_cpu_maps(void)
{
struct device_node *dn;
- int shift = 0, cpu = 0;
- int j, nthreads = 1;
+ int terminate, shift = 0, cpu = 0;
+ int j, bt_thread = 0, nthreads = 1;
int len;
struct interrupt_server_node *intserv_node, *n;
struct list_head *bt_node, head;
@@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
for (j = 0 ; j < nthreads; j++) {
if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
bt_node = &intserv_node->node;
+ bt_thread = j;
found_boot_cpu = true;
/*
* Record the round-shift between dt
@@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
/* Select the primary thread, the boot cpu's slibing, as the logic 0 */
list_add_tail(&head, bt_node);
pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
+ terminate = nr_cpu_ids;
list_for_each_entry(intserv_node, &head, node) {
+ j = 0;
+ /* Choose a start point to cover the boot cpu */
+ if (nr_cpu_ids - 1 < bt_thread) {
+ /*
+ * The processor core puts assumption on the thread id,
+ * not to breach the assumption.
+ */
+ terminate = nr_cpu_ids - 1;
+ }
avail = intserv_node->avail;
nthreads = intserv_node->len / sizeof(int);
- for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+ for (; j < nthreads && cpu < terminate; j++) {
set_cpu_present(cpu, avail);
set_cpu_possible(cpu, true);
cpu_to_phys_id[cpu] = be32_to_cpu(intserv_node->intserv[j]);
@@ -549,6 +560,14 @@ void __init smp_setup_cpu_maps(void)
j, cpu, be32_to_cpu(intserv_node->intserv[j]));
cpu++;
}
+ /* Online the boot cpu */
+ if (nr_cpu_ids - 1 < bt_thread) {
+ set_cpu_present(bt_thread, avail);
+ set_cpu_possible(bt_thread, true);
+ cpu_to_phys_id[bt_thread] = be32_to_cpu(intserv_node->intserv[bt_thread]);
+ DBG(" thread %d -> cpu %d (hard id %d)\n",
+ bt_thread, bt_thread, be32_to_cpu(intserv_node->intserv[bt_thread]));
+ }
}
list_for_each_entry_safe(intserv_node, n, &head, node) {
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
2023-10-09 11:30 ` [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus Pingfan Liu
@ 2023-10-10 8:26 ` Hari Bathini
2023-10-11 3:05 ` Pingfan Liu
0 siblings, 1 reply; 19+ messages in thread
From: Hari Bathini @ 2023-10-10 8:26 UTC (permalink / raw)
To: Pingfan Liu, linuxppc-dev
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong
On 09/10/23 5:00 pm, Pingfan Liu wrote:
> If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
> ensure the boot_cpu is in cpu_present_mask. This can be achieved by
> reserving the last quota for the boot cpu.
>
> Note: the restriction on nr_cpus will be lifted with more effort in the
> successive patches
>
> Signed-off-by: Pingfan Liu <piliu@redhat.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Nicholas Piggin <npiggin@gmail.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Cc: Wen Xiong <wenxiong@linux.ibm.com>
> Cc: Baoquan He <bhe@redhat.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: kexec@lists.infradead.org
> To: linuxppc-dev@lists.ozlabs.org
> ---
> arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
> 1 file changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 81291e13dec0..f9ef0a2666b0 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -454,8 +454,8 @@ struct interrupt_server_node {
> void __init smp_setup_cpu_maps(void)
> {
> struct device_node *dn;
> - int shift = 0, cpu = 0;
> - int j, nthreads = 1;
> + int terminate, shift = 0, cpu = 0;
> + int j, bt_thread = 0, nthreads = 1;
> int len;
> struct interrupt_server_node *intserv_node, *n;
> struct list_head *bt_node, head;
> @@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
> for (j = 0 ; j < nthreads; j++) {
> if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
> bt_node = &intserv_node->node;
> + bt_thread = j;
> found_boot_cpu = true;
> /*
> * Record the round-shift between dt
> @@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
> /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
> list_add_tail(&head, bt_node);
> pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
> + terminate = nr_cpu_ids;
> list_for_each_entry(intserv_node, &head, node) {
>
> + j = 0;
> + /* Choose a start point to cover the boot cpu */
> + if (nr_cpu_ids - 1 < bt_thread) {
> + /*
> + * The processor core puts assumption on the thread id,
> + * not to breach the assumption.
> + */
> + terminate = nr_cpu_ids - 1;
nthreads is anyway assumed to be same for all cores. So, enforcing
nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
make the code much simpler without the need for above check and the
other complexities addressed in the subsequent patches...
Thanks
Hari
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
2023-10-10 8:26 ` Hari Bathini
@ 2023-10-11 3:05 ` Pingfan Liu
2023-10-12 5:32 ` Hari Bathini
0 siblings, 1 reply; 19+ messages in thread
From: Pingfan Liu @ 2023-10-11 3:05 UTC (permalink / raw)
To: Hari Bathini
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong, linuxppc-dev
On Tue, Oct 10, 2023 at 01:56:13PM +0530, Hari Bathini wrote:
>
>
> On 09/10/23 5:00 pm, Pingfan Liu wrote:
> > If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
> > ensure the boot_cpu is in cpu_present_mask. This can be achieved by
> > reserving the last quota for the boot cpu.
> >
> > Note: the restriction on nr_cpus will be lifted with more effort in the
> > successive patches
> >
> > Signed-off-by: Pingfan Liu <piliu@redhat.com>
> > Cc: Michael Ellerman <mpe@ellerman.id.au>
> > Cc: Nicholas Piggin <npiggin@gmail.com>
> > Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> > Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> > Cc: Wen Xiong <wenxiong@linux.ibm.com>
> > Cc: Baoquan He <bhe@redhat.com>
> > Cc: Ming Lei <ming.lei@redhat.com>
> > Cc: kexec@lists.infradead.org
> > To: linuxppc-dev@lists.ozlabs.org
> > ---
> > arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
> > 1 file changed, 22 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > index 81291e13dec0..f9ef0a2666b0 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -454,8 +454,8 @@ struct interrupt_server_node {
> > void __init smp_setup_cpu_maps(void)
> > {
> > struct device_node *dn;
> > - int shift = 0, cpu = 0;
> > - int j, nthreads = 1;
> > + int terminate, shift = 0, cpu = 0;
> > + int j, bt_thread = 0, nthreads = 1;
> > int len;
> > struct interrupt_server_node *intserv_node, *n;
> > struct list_head *bt_node, head;
> > @@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
> > for (j = 0 ; j < nthreads; j++) {
> > if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
> > bt_node = &intserv_node->node;
> > + bt_thread = j;
> > found_boot_cpu = true;
> > /*
> > * Record the round-shift between dt
> > @@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
> > /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
> > list_add_tail(&head, bt_node);
> > pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
> > + terminate = nr_cpu_ids;
> > list_for_each_entry(intserv_node, &head, node) {
> > + j = 0;
>
> > + /* Choose a start point to cover the boot cpu */
> > + if (nr_cpu_ids - 1 < bt_thread) {
> > + /*
> > + * The processor core puts assumption on the thread id,
> > + * not to breach the assumption.
> > + */
> > + terminate = nr_cpu_ids - 1;
>
> nthreads is anyway assumed to be same for all cores. So, enforcing
> nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
> make the code much simpler without the need for above check and the
> other complexities addressed in the subsequent patches...
>
Indeed, this series can be splited into two partsk, [1-2/5] and [3-5/5].
In [1-2/5], if smaller, the nr_cpu_ids is enforced to be equal to
nthreads. I will make it align upward on nthreads in the next version.
So [1-2/5] can be totally independent from the rest patches in this
series.
From an engineer's perspective, [3-5/5] are added to maintain the
nr_cpus semantics. (Finally, nr_cpus=1 can be achieved but requiring
effort on other subsystem)
Testing result on my Power9 machine with SMT=4
-1. taskset -c 4 bash -c 'echo c > /proc/sysrq-trigger'
kdump:/# cat /proc/meminfo | grep Percpu
Percpu: 896 kB
kdump:/# cat /sys/devices/system/cpu/possible
0
-2. taskset -c 5 bash -c 'echo c > /proc/sysrq-trigger'
kdump:/# cat /proc/meminfo | grep Percpu
Percpu: 1792 kB
kdump:/# cat /sys/devices/system/cpu/possible
0-1
-3. taskset -c 6 bash -c 'echo c > /proc/sysrq-trigger'
kdump:/# cat /proc/meminfo | grep Percpu
Percpu: 1792 kB
kdump:/# cat /sys/devices/system/cpu/possible
0,2
-4. taskset -c 7 bash -c 'echo c > /proc/sysrq-trigger'
kdump:/# cat /proc/meminfo | grep Percpu
Percpu: 1792 kB
kdump:/# cat /sys/devices/system/cpu/possible
0,3
Thanks,
Pingfan
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus
2023-10-11 3:05 ` Pingfan Liu
@ 2023-10-12 5:32 ` Hari Bathini
0 siblings, 0 replies; 19+ messages in thread
From: Hari Bathini @ 2023-10-12 5:32 UTC (permalink / raw)
To: Pingfan Liu
Cc: Baoquan He, kexec, Mahesh Salgaonkar, Nicholas Piggin, Ming Lei,
Wen Xiong, linuxppc-dev
On 11/10/23 8:35 am, Pingfan Liu wrote:
> On Tue, Oct 10, 2023 at 01:56:13PM +0530, Hari Bathini wrote:
>>
>>
>> On 09/10/23 5:00 pm, Pingfan Liu wrote:
>>> If the boot_cpuid is smaller than nr_cpus, it requires extra effort to
>>> ensure the boot_cpu is in cpu_present_mask. This can be achieved by
>>> reserving the last quota for the boot cpu.
>>>
>>> Note: the restriction on nr_cpus will be lifted with more effort in the
>>> successive patches
>>>
>>> Signed-off-by: Pingfan Liu <piliu@redhat.com>
>>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>>> Cc: Nicholas Piggin <npiggin@gmail.com>
>>> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
>>> Cc: Wen Xiong <wenxiong@linux.ibm.com>
>>> Cc: Baoquan He <bhe@redhat.com>
>>> Cc: Ming Lei <ming.lei@redhat.com>
>>> Cc: kexec@lists.infradead.org
>>> To: linuxppc-dev@lists.ozlabs.org
>>> ---
>>> arch/powerpc/kernel/setup-common.c | 25 ++++++++++++++++++++++---
>>> 1 file changed, 22 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
>>> index 81291e13dec0..f9ef0a2666b0 100644
>>> --- a/arch/powerpc/kernel/setup-common.c
>>> +++ b/arch/powerpc/kernel/setup-common.c
>>> @@ -454,8 +454,8 @@ struct interrupt_server_node {
>>> void __init smp_setup_cpu_maps(void)
>>> {
>>> struct device_node *dn;
>>> - int shift = 0, cpu = 0;
>>> - int j, nthreads = 1;
>>> + int terminate, shift = 0, cpu = 0;
>>> + int j, bt_thread = 0, nthreads = 1;
>>> int len;
>>> struct interrupt_server_node *intserv_node, *n;
>>> struct list_head *bt_node, head;
>>> @@ -518,6 +518,7 @@ void __init smp_setup_cpu_maps(void)
>>> for (j = 0 ; j < nthreads; j++) {
>>> if (be32_to_cpu(intserv[j]) == boot_cpu_hwid) {
>>> bt_node = &intserv_node->node;
>>> + bt_thread = j;
>>> found_boot_cpu = true;
>>> /*
>>> * Record the round-shift between dt
>>> @@ -537,11 +538,21 @@ void __init smp_setup_cpu_maps(void)
>>> /* Select the primary thread, the boot cpu's slibing, as the logic 0 */
>>> list_add_tail(&head, bt_node);
>>> pr_info("the round shift between dt seq and the cpu logic number: %d\n", shift);
>>> + terminate = nr_cpu_ids;
>>> list_for_each_entry(intserv_node, &head, node) {
>>> + j = 0;
>>
>>> + /* Choose a start point to cover the boot cpu */
>>> + if (nr_cpu_ids - 1 < bt_thread) {
>>> + /*
>>> + * The processor core puts assumption on the thread id,
>>> + * not to breach the assumption.
>>> + */
>>> + terminate = nr_cpu_ids - 1;
>>
>> nthreads is anyway assumed to be same for all cores. So, enforcing
>> nr_cpu_ids to a minimum of nthreads (and multiple of nthreads) should
>> make the code much simpler without the need for above check and the
>> other complexities addressed in the subsequent patches...
>>
>
> Indeed, this series can be splited into two partsk, [1-2/5] and [3-5/5].
> In [1-2/5], if smaller, the nr_cpu_ids is enforced to be equal to
> nthreads. I will make it align upward on nthreads in the next version.
> So [1-2/5] can be totally independent from the rest patches in this
> series.
Yup. Would prefer it that way.
> From an engineer's perspective, [3-5/5] are added to maintain the
> nr_cpus semantics. (Finally, nr_cpus=1 can be achieved but requiring
> effort on other subsystem)
I understand it would be nice to maintain semantics but not worth the
complexity it brings, IMHO. So, my suggest would be to drop [3-5/5].
Thanks
Hari
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCHv8 4/5] powerpc/cpu: Skip impossible cpu during iteration on a core
2023-10-09 11:30 [PATCHv8 0/5] enable nr_cpus for powerpc Pingfan Liu
` (2 preceding siblings ...)
2023-10-09 11:30 ` [PATCHv8 3/5] powerpc/setup: Handle the case when boot_cpuid greater than nr_cpus Pingfan Liu
@ 2023-10-09 11:30 ` Pingfan Liu
2023-10-09 11:30 ` [PATCHv8 5/5] powerpc/setup: alloc extra paca_ptrs to hold boot_cpuid Pingfan Liu
4 siblings, 0 replies; 19+ messages in thread
From: Pingfan Liu @ 2023-10-09 11:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Wen Xiong, Nicholas Piggin
The threads in a core have equal status, so the code introduces a for
loop pattern to execute the same task on each thread:
for (i = first_thread; i < first_thread + threads_per_core; i++)
Now that some threads may not be in the cpu_possible_mask, the iteration
skips those threads by checking the mask. In this way, the unpopulated
pcpu struct can be skipped and left unaccessed.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/include/asm/cputhreads.h | 6 +++++
arch/powerpc/kernel/smp.c | 2 +-
arch/powerpc/kvm/book3s_hv.c | 7 ++----
arch/powerpc/platforms/powernv/idle.c | 32 ++++++++++++------------
arch/powerpc/platforms/powernv/subcore.c | 5 +++-
5 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index f26c430f3982..fdb71ff7f6a9 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -65,6 +65,12 @@ static inline int cpu_last_thread_sibling(int cpu)
return cpu | (threads_per_core - 1);
}
+#define for_each_possible_cpu_in_core(start, iter) \
+ for (iter = start; iter < start + threads_per_core; iter++) \
+ if (unlikely(!cpu_possible(iter))) \
+ continue; \
+ else
+
/*
* tlb_thread_siblings are siblings which share a TLB. This is not
* architected, is not something a hypervisor could emulate and a future
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fbbb695bae3d..2936f7a2240d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -933,7 +933,7 @@ static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct threa
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
- for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ for_each_possible_cpu_in_core(first_thread, i) {
int i_group_start = get_cpu_thread_group_start(i, tg);
if (unlikely(i_group_start == -1)) {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 130bafdb1430..ff4b3f8affba 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -6235,12 +6235,9 @@ static int kvm_init_subcore_bitmap(void)
return -ENOMEM;
- for (j = 0; j < threads_per_core; j++) {
- int cpu = first_cpu + j;
-
- paca_ptrs[cpu]->sibling_subcore_state =
+ for_each_possible_cpu_in_core(first_cpu, j)
+ paca_ptrs[j]->sibling_subcore_state =
sibling_subcore_state;
- }
}
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index ad41dffe4d92..79d81ce5cf4c 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -823,36 +823,36 @@ void pnv_power9_force_smt4_catch(void)
cpu = smp_processor_id();
cpu0 = cpu & ~(threads_per_core - 1);
- for (thr = 0; thr < threads_per_core; ++thr) {
- if (cpu != cpu0 + thr)
- atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+ for_each_possible_cpu_in_core(cpu0, thr) {
+ if (cpu != thr)
+ atomic_inc(&paca_ptrs[thr]->dont_stop);
}
/* order setting dont_stop vs testing requested_psscr */
smp_mb();
- for (thr = 0; thr < threads_per_core; ++thr) {
- if (!paca_ptrs[cpu0+thr]->requested_psscr)
+ for_each_possible_cpu_in_core(cpu0, thr) {
+ if (!paca_ptrs[thr]->requested_psscr)
++awake_threads;
else
- poke_threads |= (1 << thr);
+ poke_threads |= (1 << (thr - cpu0));
}
/* If at least 3 threads are awake, the core is in SMT4 already */
if (awake_threads < need_awake) {
/* We have to wake some threads; we'll use msgsnd */
- for (thr = 0; thr < threads_per_core; ++thr) {
- if (poke_threads & (1 << thr)) {
+ for_each_possible_cpu_in_core(cpu0, thr) {
+ if (poke_threads & (1 << (thr - cpu0))) {
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
- paca_ptrs[cpu0+thr]->hw_cpu_id);
+ paca_ptrs[thr]->hw_cpu_id);
}
}
/* now spin until at least 3 threads are awake */
do {
- for (thr = 0; thr < threads_per_core; ++thr) {
- if ((poke_threads & (1 << thr)) &&
- !paca_ptrs[cpu0+thr]->requested_psscr) {
+ for_each_possible_cpu_in_core(cpu0, thr) {
+ if ((poke_threads & (1 << (thr - cpu0))) &&
+ !paca_ptrs[thr]->requested_psscr) {
++awake_threads;
- poke_threads &= ~(1 << thr);
+ poke_threads &= ~(1 << (thr - cpu0));
}
}
} while (awake_threads < need_awake);
@@ -868,9 +868,9 @@ void pnv_power9_force_smt4_release(void)
cpu0 = cpu & ~(threads_per_core - 1);
/* clear all the dont_stop flags */
- for (thr = 0; thr < threads_per_core; ++thr) {
- if (cpu != cpu0 + thr)
- atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+ for_each_possible_cpu_in_core(cpu0, thr) {
+ if (cpu != thr)
+ atomic_dec(&paca_ptrs[thr]->dont_stop);
}
}
EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 191424468f10..b229115c8c0f 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -151,9 +151,12 @@ static void wait_for_sync_step(int step)
{
int i, cpu = smp_processor_id();
- for (i = cpu + 1; i < cpu + threads_per_core; i++)
+ for_each_possible_cpu_in_core(cpu, i) {
+ if (i == cpu)
+ continue;
while(per_cpu(split_state, i).step < step)
barrier();
+ }
/* Order the wait loop vs any subsequent loads/stores. */
mb();
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCHv8 5/5] powerpc/setup: alloc extra paca_ptrs to hold boot_cpuid
2023-10-09 11:30 [PATCHv8 0/5] enable nr_cpus for powerpc Pingfan Liu
` (3 preceding siblings ...)
2023-10-09 11:30 ` [PATCHv8 4/5] powerpc/cpu: Skip impossible cpu during iteration on a core Pingfan Liu
@ 2023-10-09 11:30 ` Pingfan Liu
4 siblings, 0 replies; 19+ messages in thread
From: Pingfan Liu @ 2023-10-09 11:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Baoquan He, Pingfan Liu, kexec, Mahesh Salgaonkar, Ming Lei,
Wen Xiong, Nicholas Piggin
paca_ptrs should be large enough to hold the boot_cpuid, hence, its
lower boundary is set to the bigger one between boot_cpuid+1 and
nr_cpus.
On the other hand, some kernel component: -1. the timer assumes cpu0
online since the timer_list->flags subfield 'TIMER_CPUMASK' is zero if
not initialized to a proper present cpu. -2. power9_idle_stop() assumes
the primary thread's paca is allocated.
Hence lift nr_cpu_ids from one to two to ensure cpu0 is onlined, if the
boot cpu is not cpu0.
Result:
When nr_cpus=1, taskset -c 14 bash -c 'echo c > /proc/sysrq-trigger'
the kdump kernel brings up two cpus.
While when taskset -c 4 bash -c 'echo c > /proc/sysrq-trigger',
the kdump kernel brings up one cpu.
Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Wen Xiong <wenxiong@linux.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: kexec@lists.infradead.org
To: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/paca.c | 10 ++++++----
arch/powerpc/kernel/prom.c | 9 ++++++---
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index cda4e00b67c1..91e2401de1bd 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -242,9 +242,10 @@ static int __initdata paca_struct_size;
void __init allocate_paca_ptrs(void)
{
- paca_nr_cpu_ids = nr_cpu_ids;
+ int n = (boot_cpuid + 1) > nr_cpu_ids ? (boot_cpuid + 1) : nr_cpu_ids;
- paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ paca_nr_cpu_ids = n;
+ paca_ptrs_size = sizeof(struct paca_struct *) * n;
paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
if (!paca_ptrs)
panic("Failed to allocate %d bytes for paca pointers\n",
@@ -287,13 +288,14 @@ void __init allocate_paca(int cpu)
void __init free_unused_pacas(void)
{
int new_ptrs_size;
+ int n = (boot_cpuid + 1) > nr_cpu_ids ? (boot_cpuid + 1) : nr_cpu_ids;
- new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ new_ptrs_size = sizeof(struct paca_struct *) * n;
if (new_ptrs_size < paca_ptrs_size)
memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
paca_ptrs_size - new_ptrs_size);
- paca_nr_cpu_ids = nr_cpu_ids;
+ paca_nr_cpu_ids = n;
paca_ptrs_size = new_ptrs_size;
#ifdef CONFIG_PPC_64S_HASH_MMU
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 87272a2d8c10..15c994f54bf9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -362,9 +362,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
*/
boot_cpuid = i;
found = true;
- /* This works around the hole in paca_ptrs[]. */
- if (nr_cpu_ids < nthreads)
- set_nr_cpu_ids(nthreads);
+ /*
+ * Ideally, nr_cpus=1 can be achieved if each kernel
+ * component does not assume cpu0 is onlined.
+ */
+ if (boot_cpuid != 0 && nr_cpu_ids < 2)
+ set_nr_cpu_ids(2);
}
#ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread