* [PATCH]: Fix for sparc64 SMP boot failures
@ 2004-12-05 4:12 David S. Miller
2004-12-05 5:07 ` William Lee Irwin III
2004-12-05 23:33 ` Ben Collins
0 siblings, 2 replies; 3+ messages in thread
From: David S. Miller @ 2004-12-05 4:12 UTC (permalink / raw)
To: sparclinux
[-- Attachment #1: Type: text/plain, Size: 747 bytes --]
This bug has been around for quite some time, is present
in both 2.4.x and 2.6.x, and only triggers sporadically.
It depends upon the size of the kernel image and how the
BSS section is layed out. Specifically both of the following
conditions must hold to hit the bug:
1) The kernel must be > 4MB in size. You can check this via
looking at System.map for the kernel image and checking
if the final symbol "_end" is at an address >= 0x800000
2) The object "p1275buf" must cross a PAGE_SIZE (8K) boundary.
If this is true, secondary cpus will not boot up and instead
will hang in the kernel TLB miss handler.
The attached patches for 2.4.x and 2.6.x fix the problem and I'll
be pushing these upstream right after this email goes out.
[-- Attachment #2: sparc64_smp_24.diff --]
[-- Type: text/plain, Size: 4783 bytes --]
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2004/12/03 20:43:03-08:00 davem@nuts.davemloft.net
# [SPARC64]: Fix SMP cpu bringup bug when bigkernel.
#
# We have to load the bigkernel second TLB entry on
# secondary processors before we move over the use
# the kernel trap table. Otherwise we can take a
# TLB miss somewhere in the post-4MB area and the
# TLB handler is not prepared to service that.
#
# The case that usually occurs is the prom_set_trap_table
# call made by trampoline.S, since p1275buf usually sits
# very near the end of the kernel image. It worked by
# luck most of the time as long as p1275buf sits within
# a single page since earlier code running in trampoline.S
# forced that TLB entry to be loaded by the OBP TLB miss
# handler.
#
# This was not fun to figure out.
#
# Signed-off-by: David S. Miller <davem@davemloft.net>
#
# arch/sparc64/kernel/trampoline.S
# 2004/12/03 20:39:23-08:00 davem@nuts.davemloft.net +84 -1
# [SPARC64]: Fix SMP cpu bringup bug when bigkernel.
#
# arch/sparc64/kernel/smp.c
# 2004/12/03 20:39:23-08:00 davem@nuts.davemloft.net +0 -9
# [SPARC64]: Fix SMP cpu bringup bug when bigkernel.
#
diff -Nru a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
--- a/arch/sparc64/kernel/smp.c 2004-12-03 20:45:30 -08:00
+++ b/arch/sparc64/kernel/smp.c 2004-12-03 20:45:30 -08:00
@@ -137,15 +137,6 @@
void __init smp_callin(void)
{
int cpuid = hard_smp_processor_id();
- extern int bigkernel;
- extern unsigned long kern_locked_tte_data;
-
- if (bigkernel) {
- prom_dtlb_load(sparc64_highest_locked_tlbent()-1,
- kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);
- prom_itlb_load(sparc64_highest_locked_tlbent()-1,
- kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);
- }
inherit_locked_prom_mappings(0);
diff -Nru a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
--- a/arch/sparc64/kernel/trampoline.S 2004-12-03 20:45:30 -08:00
+++ b/arch/sparc64/kernel/trampoline.S 2004-12-03 20:45:30 -08:00
@@ -90,7 +90,9 @@
sllx %g2, 32, %g2
wr %g2, 0, %tick_cmpr
- /* Call OBP by hand to lock KERNBASE into i/d tlbs. */
+ /* Call OBP by hand to lock KERNBASE into i/d tlbs.
+ * We lock 2 consequetive entries if we are 'bigkernel'.
+ */
mov %o0, %l0
sethi %hi(prom_entry_lock), %g2
@@ -136,6 +138,46 @@
call %o1
add %sp, (2047 + 128), %o0
+ sethi %hi(bigkernel), %g2
+ lduw [%g2 + %lo(bigkernel)], %g2
+ cmp %g2, 0
+ be,pt %icc, do_dtlb
+ nop
+
+ sethi %hi(call_method), %g2
+ or %g2, %lo(call_method), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 5, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ sethi %hi(itlb_load), %g2
+ or %g2, %lo(itlb_load), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(mmu_ihandle_cache), %g2
+ lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
+ stx %g2, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(KERNBASE + 0x400000), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x28]
+ sethi %hi(kern_locked_tte_data), %g2
+ ldx [%g2 + %lo(kern_locked_tte_data)], %g2
+ sethi %hi(0x400000), %g1
+ add %g2, %g1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x30]
+
+ mov 14, %g2
+ BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
+
+ mov 62, %g2
+1:
+ stx %g2, [%sp + 2047 + 128 + 0x38]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+do_dtlb:
sethi %hi(call_method), %g2
or %g2, %lo(call_method), %g2
stx %g2, [%sp + 2047 + 128 + 0x00]
@@ -168,6 +210,47 @@
call %o1
add %sp, (2047 + 128), %o0
+ sethi %hi(bigkernel), %g2
+ lduw [%g2 + %lo(bigkernel)], %g2
+ cmp %g2, 0
+ be,pt %icc, do_unlock
+ nop
+
+ sethi %hi(call_method), %g2
+ or %g2, %lo(call_method), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 5, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ sethi %hi(dtlb_load), %g2
+ or %g2, %lo(dtlb_load), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(mmu_ihandle_cache), %g2
+ lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
+ stx %g2, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(KERNBASE + 0x400000), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x28]
+ sethi %hi(kern_locked_tte_data), %g2
+ ldx [%g2 + %lo(kern_locked_tte_data)], %g2
+ sethi %hi(0x400000), %g1
+ add %g2, %g1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x30]
+
+ mov 14, %g2
+ BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
+
+ mov 62, %g2
+1:
+
+ stx %g2, [%sp + 2047 + 128 + 0x38]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+do_unlock:
sethi %hi(prom_entry_lock), %g2
stb %g0, [%g2 + %lo(prom_entry_lock)]
membar #StoreStore | #StoreLoad
[-- Attachment #3: sparc64_smp_26.diff --]
[-- Type: text/plain, Size: 4783 bytes --]
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2004/12/04 19:34:20-08:00 davem@nuts.davemloft.net
# [SPARC64]: Fix SMP cpu bringup bug when bigkernel.
#
# We have to load the bigkernel second TLB entry on
# secondary processors before we move over the use
# the kernel trap table. Otherwise we can take a
# TLB miss somewhere in the post-4MB area and the
# TLB handler is not prepared to service that.
#
# The case that usually occurs is the prom_set_trap_table
# call made by trampoline.S, since p1275buf usually sits
# very near the end of the kernel image. It worked by
# luck most of the time as long as p1275buf sits within
# a single page since earlier code running in trampoline.S
# forced that TLB entry to be loaded by the OBP TLB miss
# handler.
#
# This was not fun to figure out.
#
# Signed-off-by: David S. Miller <davem@davemloft.net>
#
# arch/sparc64/kernel/trampoline.S
# 2004/12/04 19:33:42-08:00 davem@nuts.davemloft.net +84 -1
# [SPARC64]: Fix SMP cpu bringup bug when bigkernel.
#
# arch/sparc64/kernel/smp.c
# 2004/12/04 19:33:42-08:00 davem@nuts.davemloft.net +0 -9
# [SPARC64]: Fix SMP cpu bringup bug when bigkernel.
#
diff -Nru a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
--- a/arch/sparc64/kernel/smp.c 2004-12-04 19:41:44 -08:00
+++ b/arch/sparc64/kernel/smp.c 2004-12-04 19:41:44 -08:00
@@ -103,15 +103,6 @@
void __init smp_callin(void)
{
int cpuid = hard_smp_processor_id();
- extern int bigkernel;
- extern unsigned long kern_locked_tte_data;
-
- if (bigkernel) {
- prom_dtlb_load(sparc64_highest_locked_tlbent()-1,
- kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);
- prom_itlb_load(sparc64_highest_locked_tlbent()-1,
- kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);
- }
inherit_locked_prom_mappings(0);
diff -Nru a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
--- a/arch/sparc64/kernel/trampoline.S 2004-12-04 19:41:44 -08:00
+++ b/arch/sparc64/kernel/trampoline.S 2004-12-04 19:41:44 -08:00
@@ -90,7 +90,9 @@
sllx %g2, 32, %g2
wr %g2, 0, %tick_cmpr
- /* Call OBP by hand to lock KERNBASE into i/d tlbs. */
+ /* Call OBP by hand to lock KERNBASE into i/d tlbs.
+ * We lock 2 consequetive entries if we are 'bigkernel'.
+ */
mov %o0, %l0
sethi %hi(prom_entry_lock), %g2
@@ -136,6 +138,46 @@
call %o1
add %sp, (2047 + 128), %o0
+ sethi %hi(bigkernel), %g2
+ lduw [%g2 + %lo(bigkernel)], %g2
+ cmp %g2, 0
+ be,pt %icc, do_dtlb
+ nop
+
+ sethi %hi(call_method), %g2
+ or %g2, %lo(call_method), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 5, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ sethi %hi(itlb_load), %g2
+ or %g2, %lo(itlb_load), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(mmu_ihandle_cache), %g2
+ lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
+ stx %g2, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(KERNBASE + 0x400000), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x28]
+ sethi %hi(kern_locked_tte_data), %g2
+ ldx [%g2 + %lo(kern_locked_tte_data)], %g2
+ sethi %hi(0x400000), %g1
+ add %g2, %g1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x30]
+
+ mov 14, %g2
+ BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
+
+ mov 62, %g2
+1:
+ stx %g2, [%sp + 2047 + 128 + 0x38]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+do_dtlb:
sethi %hi(call_method), %g2
or %g2, %lo(call_method), %g2
stx %g2, [%sp + 2047 + 128 + 0x00]
@@ -168,6 +210,47 @@
call %o1
add %sp, (2047 + 128), %o0
+ sethi %hi(bigkernel), %g2
+ lduw [%g2 + %lo(bigkernel)], %g2
+ cmp %g2, 0
+ be,pt %icc, do_unlock
+ nop
+
+ sethi %hi(call_method), %g2
+ or %g2, %lo(call_method), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 5, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ sethi %hi(dtlb_load), %g2
+ or %g2, %lo(dtlb_load), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(mmu_ihandle_cache), %g2
+ lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
+ stx %g2, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(KERNBASE + 0x400000), %g2
+ stx %g2, [%sp + 2047 + 128 + 0x28]
+ sethi %hi(kern_locked_tte_data), %g2
+ ldx [%g2 + %lo(kern_locked_tte_data)], %g2
+ sethi %hi(0x400000), %g1
+ add %g2, %g1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x30]
+
+ mov 14, %g2
+ BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
+
+ mov 62, %g2
+1:
+
+ stx %g2, [%sp + 2047 + 128 + 0x38]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
+
+do_unlock:
sethi %hi(prom_entry_lock), %g2
stb %g0, [%g2 + %lo(prom_entry_lock)]
membar #StoreStore | #StoreLoad
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH]: Fix for sparc64 SMP boot failures
2004-12-05 4:12 [PATCH]: Fix for sparc64 SMP boot failures David S. Miller
@ 2004-12-05 5:07 ` William Lee Irwin III
2004-12-05 23:33 ` Ben Collins
1 sibling, 0 replies; 3+ messages in thread
From: William Lee Irwin III @ 2004-12-05 5:07 UTC (permalink / raw)
To: sparclinux
On Sat, Dec 04, 2004 at 08:12:11PM -0800, David S. Miller wrote:
> This bug has been around for quite some time, is present
> in both 2.4.x and 2.6.x, and only triggers sporadically.
> It depends upon the size of the kernel image and how the
> BSS section is layed out. Specifically both of the following
> conditions must hold to hit the bug:
> 1) The kernel must be > 4MB in size. You can check this via
> looking at System.map for the kernel image and checking
> if the final symbol "_end" is at an address >= 0x800000
> 2) The object "p1275buf" must cross a PAGE_SIZE (8K) boundary.
> If this is true, secondary cpus will not boot up and instead
> will hang in the kernel TLB miss handler.
> The attached patches for 2.4.x and 2.6.x fix the problem and I'll
> be pushing these upstream right after this email goes out.
I'll spin this up for a test. I'll try to beef up my .config for
the occasion.
-- wli
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH]: Fix for sparc64 SMP boot failures
2004-12-05 4:12 [PATCH]: Fix for sparc64 SMP boot failures David S. Miller
2004-12-05 5:07 ` William Lee Irwin III
@ 2004-12-05 23:33 ` Ben Collins
1 sibling, 0 replies; 3+ messages in thread
From: Ben Collins @ 2004-12-05 23:33 UTC (permalink / raw)
To: sparclinux
> 1) The kernel must be > 4MB in size. You can check this via
> looking at System.map for the kernel image and checking
> if the final symbol "_end" is at an address >= 0x800000
>
> 2) The object "p1275buf" must cross a PAGE_SIZE (8K) boundary.
>
> If this is true, secondary cpus will not boot up and instead
> will hang in the kernel TLB miss handler.
Cool. This sounds like the bug that hit once during the bigkernel/silo
testing.
--
Debian - http://www.debian.org/
Linux 1394 - http://www.linux1394.org/
Subversion - http://subversion.tigris.org/
WatchGuard - http://www.watchguard.com/
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2004-12-05 23:33 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-12-05 4:12 [PATCH]: Fix for sparc64 SMP boot failures David S. Miller
2004-12-05 5:07 ` William Lee Irwin III
2004-12-05 23:33 ` Ben Collins
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.