All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] SMP initialization order fixes.
@ 2006-02-22 19:09 Ralf Baechle
  2006-02-22 21:41 ` Stuart Anderson
  2006-02-23  9:52 ` Rojhalat Ibrahim
  0 siblings, 2 replies; 14+ messages in thread
From: Ralf Baechle @ 2006-02-22 19:09 UTC (permalink / raw)
  To: linux-mips

This one should hopefully fix the SMP problems of the resent times.  It
works on Malta with 34K, it seems to work on IP27 (the kernel is
presumably failing due to other issues), so now I'd ask especially
RM9000 & BCM1250 users for testing.  This really needs to be fixed for
2.6.16.

  Ralf

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index d86affa..d9293c5 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -540,6 +540,9 @@ void __init setup_arch(char **cmdline_p)
 	sparse_init();
 	paging_init();
 	resource_init();
+#ifdef CONFIG_SMP
+	plat_smp_setup();
+#endif
 }
 
 int __init fpu_disable(char *s)
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 5e18986..06ed907 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -236,7 +236,7 @@ void __init smp_prepare_cpus(unsigned in
 	init_new_context(current, &init_mm);
 	current_thread_info()->cpu = 0;
 	smp_tune_scheduling();
-	prom_prepare_cpus(max_cpus);
+	plat_prepare_cpus(max_cpus);
 }
 
 /* preload SMP state for boot cpu */
diff --git a/arch/mips/kernel/smp_mt.c b/arch/mips/kernel/smp_mt.c
index c930364..993b8bf 100644
--- a/arch/mips/kernel/smp_mt.c
+++ b/arch/mips/kernel/smp_mt.c
@@ -143,7 +143,7 @@ static struct irqaction irq_call = {
  * Make sure all CPU's are in a sensible state before we boot any of the
  * secondarys
  */
-void prom_prepare_cpus(unsigned int max_cpus)
+void plat_smp_setup(void)
 {
 	unsigned long val;
 	int i, num;
@@ -179,11 +179,9 @@ void prom_prepare_cpus(unsigned int max_
 				write_vpe_c0_vpeconf0(tmp);
 
 				/* Record this as available CPU */
-				if (i < max_cpus) {
-					cpu_set(i, phys_cpu_present_map);
-					__cpu_number_map[i]	= ++num;
-					__cpu_logical_map[num]	= i;
-				}
+				cpu_set(i, phys_cpu_present_map);
+				__cpu_number_map[i]	= ++num;
+				__cpu_logical_map[num]	= i;
 			}
 
 			/* disable multi-threading with TC's */
@@ -241,7 +239,10 @@ void prom_prepare_cpus(unsigned int max_
 		set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
 		set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
 	}
+}
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 	cpu_ipi_resched_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
 	cpu_ipi_call_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ;
 
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 7f8fda9..0e903cc 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -50,37 +50,25 @@ void __init prom_grab_secondary(void)
  * We don't want to start the secondary CPU yet nor do we have a nice probing
  * feature in PMON so we just assume presence of the secondary core.
  */
-static char maxcpus_string[] __initdata =
-	KERN_WARNING "max_cpus set to 0; using 1 instead\n";
-
-void __init prom_prepare_cpus(unsigned int max_cpus)
+void __init plat_smp_setup(void)
 {
-	int enabled = 0, i;
-
-	if (max_cpus == 0) {
-		printk(maxcpus_string);
-		max_cpus = 1;
-	}
+	int i;
 
 	cpus_clear(phys_cpu_present_map);
 
 	for (i = 0; i < 2; i++) {
-		if (i == max_cpus)
-			break;
-
-		/*
-		 * The boot CPU
-		 */
-		cpu_set(i, phys_cpu_present_map);
+		cpu_set(i, cpu_present_map);
 		__cpu_number_map[i]	= i;
 		__cpu_logical_map[i]	= i;
-		enabled++;
 	}
+}
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 	/*
 	 * Be paranoid.  Enable the IPI only if we're really about to go SMP.
 	 */
-	if (enabled > 1)
+	if (cpus_weight(cpu_possible_map))
 		set_c0_status(STATUSF_IP5);
 }
 
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index dbef3f6..09fa7f5 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -140,7 +140,7 @@ static __init void intr_clear_all(nasid_
 		REMOTE_HUB_CLR_INTR(nasid, i);
 }
 
-void __init prom_prepare_cpus(unsigned int max_cpus)
+void __init plat_smp_setup(void)
 {
 	cnodeid_t	cnode;
 
@@ -161,6 +161,11 @@ void __init prom_prepare_cpus(unsigned i
 	alloc_cpupda(0, 0);
 }
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
+	/* We already did everything necessary earlier */
+}
+
 /*
  * Launch a slave into smp_bootstrap().  It doesn't take an argument, and we
  * set sp to the kernel stack of the newly created idle process, gp to the proc
diff --git a/arch/mips/sibyte/cfe/smp.c b/arch/mips/sibyte/cfe/smp.c
index 4477af3..eab20e2 100644
--- a/arch/mips/sibyte/cfe/smp.c
+++ b/arch/mips/sibyte/cfe/smp.c
@@ -31,7 +31,7 @@
  *
  * Common setup before any secondaries are started
  */
-void __init prom_prepare_cpus(unsigned int max_cpus)
+void __init plat_smp_setup(void)
 {
 	int i, num;
 
@@ -40,14 +40,18 @@ void __init prom_prepare_cpus(unsigned i
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
-	for (i=1, num=0; i<NR_CPUS; i++) {
+	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
 			cpu_set(i, phys_cpu_present_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
 	}
-	printk("Detected %i available secondary CPU(s)\n", num);
+	printk(KERN_INFO "Detected %i available secondary CPU(s)\n", num);
+}
+
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 }
 
 /*
diff --git a/include/asm-mips/smp.h b/include/asm-mips/smp.h
index 5618f1e..75c6fe7 100644
--- a/include/asm-mips/smp.h
+++ b/include/asm-mips/smp.h
@@ -58,7 +58,9 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-/* These are defined by the board-specific code. */
+/*
+ * These are defined by the board-specific code.
+ */
 
 /*
  * Cause the function described by call_data to be executed on the passed
@@ -79,7 +81,12 @@ extern void prom_boot_secondary(int cpu,
 extern void prom_init_secondary(void);
 
 /*
- * Detect available CPUs, populate phys_cpu_present_map before smp_init
+ * Populate cpu_possible_map before smp_init, called from setup_arch.
+ */
+extern void plat_smp_setup(void);
+
+/*
+ * Called after init_IRQ but before __cpu_up.
  */
 extern void prom_prepare_cpus(unsigned int max_cpus);
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
  2006-02-22 19:09 [RFC] SMP initialization order fixes Ralf Baechle
@ 2006-02-22 21:41 ` Stuart Anderson
  2006-02-23 11:31   ` Ralf Baechle
  2006-02-24  1:42     ` Martin Michlmayr
  2006-02-23  9:52 ` Rojhalat Ibrahim
  1 sibling, 2 replies; 14+ messages in thread
From: Stuart Anderson @ 2006-02-22 21:41 UTC (permalink / raw)
  To: linux-mips

On Wed, 22 Feb 2006, Ralf Baechle wrote:

> This one should hopefully fix the SMP problems of the resent times.  It
> works on Malta with 34K, it seems to work on IP27 (the kernel is
> presumably failing due to other issues), so now I'd ask especially
> RM9000 & BCM1250 users for testing.  This really needs to be fixed for
> 2.6.16.

I'm not sure if this is the specific fix or not, but I can report that git
as of today (approx 2pm est) is working better than is has since 2.6.14 for
me on a bcm1480. I had tried git a couple of weeks ago, and it still hung
when I stressed it.

I use NFS root, and the stress test that would hang the system is simply
"make -j 4" of the kernel. Previously this would hang the syste before
finishing.

Now that things seem to be better, I'll leave this looping for a while,
and then run some other tests. It's time to run the ltp on the 3 different
ABIs again anyway.


                                  Stuart

Stuart R. Anderson                               anderson@netsweng.com
Network & Software Engineering                   http://www.netsweng.com/
1024D/37A79149:                                  0791 D3B8 9A4C 2CDC A31F
                                                   BD03 0A62 E534 37A7 9149

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
  2006-02-22 19:09 [RFC] SMP initialization order fixes Ralf Baechle
  2006-02-22 21:41 ` Stuart Anderson
@ 2006-02-23  9:52 ` Rojhalat Ibrahim
  2006-02-23 11:55   ` Ralf Baechle
  1 sibling, 1 reply; 14+ messages in thread
From: Rojhalat Ibrahim @ 2006-02-23  9:52 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips

Ralf Baechle wrote:
> This one should hopefully fix the SMP problems of the resent times.  It
> works on Malta with 34K, it seems to work on IP27 (the kernel is
> presumably failing due to other issues), so now I'd ask especially
> RM9000 & BCM1250 users for testing.  This really needs to be fixed for
> 2.6.16.
> 
>   Ralf
> 

Works for me with a little fix. You need to set phys_cpu_present_map
in yosemite/smp.c. Therefore the following two lines in the patch
are unnecessary.

> -		cpu_set(i, phys_cpu_present_map);
> +		cpu_set(i, cpu_present_map);

Thanks
Rojhalat Ibrahim

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
  2006-02-22 21:41 ` Stuart Anderson
@ 2006-02-23 11:31   ` Ralf Baechle
  2006-02-24  0:56     ` Stuart Anderson
  2006-02-24  1:42     ` Martin Michlmayr
  1 sibling, 1 reply; 14+ messages in thread
From: Ralf Baechle @ 2006-02-23 11:31 UTC (permalink / raw)
  To: Stuart Anderson; +Cc: linux-mips

On Wed, Feb 22, 2006 at 04:41:43PM -0500, Stuart Anderson wrote:
> Date:	Wed, 22 Feb 2006 16:41:43 -0500 (EST)
> From:	Stuart Anderson <anderson@netsweng.com>
> To:	linux-mips@linux-mips.org
> Subject: Re: [RFC] SMP initialization order fixes.
> Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed
> 
> On Wed, 22 Feb 2006, Ralf Baechle wrote:
> 
> >This one should hopefully fix the SMP problems of the resent times.  It
> >works on Malta with 34K, it seems to work on IP27 (the kernel is
> >presumably failing due to other issues), so now I'd ask especially
> >RM9000 & BCM1250 users for testing.  This really needs to be fixed for
> >2.6.16.
> 
> I'm not sure if this is the specific fix or not, but I can report that git
> as of today (approx 2pm est) is working better than is has since 2.6.14 for
> me on a bcm1480. I had tried git a couple of weeks ago, and it still hung
> when I stressed it.

Seems unrelated then.  This fix should make the difference between working
perfectly or not at all.  There have been numerous other fixes since 2.6.14
so hard to say what made the difference.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
  2006-02-23  9:52 ` Rojhalat Ibrahim
@ 2006-02-23 11:55   ` Ralf Baechle
  0 siblings, 0 replies; 14+ messages in thread
From: Ralf Baechle @ 2006-02-23 11:55 UTC (permalink / raw)
  To: Rojhalat Ibrahim; +Cc: linux-mips

On Thu, Feb 23, 2006 at 10:52:08AM +0100, Rojhalat Ibrahim wrote:

> Works for me with a little fix. You need to set phys_cpu_present_map
> in yosemite/smp.c. Therefore the following two lines in the patch
> are unnecessary.
> 
> > -		cpu_set(i, phys_cpu_present_map);
> > +		cpu_set(i, cpu_present_map);

In include/asm-mips/smp.h we have the define:

  #define cpu_possible_map        phys_cpu_present_map

I meant to get rid of direct references to phys_cpu_present_map so really
should have done this:

> > -		cpu_set(i, phys_cpu_present_map);
> > +		cpu_set(i, cpu_possible_map);

Anyway, I dropped this for now and will commit the patch.

Thanks for testing & fix.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
  2006-02-23 11:31   ` Ralf Baechle
@ 2006-02-24  0:56     ` Stuart Anderson
  0 siblings, 0 replies; 14+ messages in thread
From: Stuart Anderson @ 2006-02-24  0:56 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-mips

On Thu, 23 Feb 2006, Ralf Baechle wrote:

>> I'm not sure if this is the specific fix or not, but I can report that git
>> as of today (approx 2pm est) is working better than is has since 2.6.14 for
>> me on a bcm1480. I had tried git a couple of weeks ago, and it still hung
>> when I stressed it.
>
> Seems unrelated then.  This fix should make the difference between working
> perfectly or not at all.  There have been numerous other fixes since 2.6.14
> so hard to say what made the difference.

You're right, it is unrelated. Shortly after this message wnet out & came
back, it hung up again like it had been doing 8-(. I should have just kept my
mouth shut and then it would still be working.

It really did run much longer that one time, but I haven't been able to
reproduce a run that lasted that long again. Sigh....


                                 Stuart

Stuart R. Anderson                               anderson@netsweng.com
Network & Software Engineering                   http://www.netsweng.com/
1024D/37A79149:                                  0791 D3B8 9A4C 2CDC A31F
                                                  BD03 0A62 E534 37A7 9149

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
@ 2006-02-24  1:42     ` Martin Michlmayr
  0 siblings, 0 replies; 14+ messages in thread
From: Martin Michlmayr @ 2006-02-24  1:42 UTC (permalink / raw)
  To: linux-mips

* Stuart Anderson <anderson@netsweng.com> [2006-02-22 16:41]:
> >RM9000 & BCM1250 users for testing.  This really needs to be fixed for
> >2.6.16.
> I'm not sure if this is the specific fix or not, but I can report that git
> as of today (approx 2pm est) is working better than is has since 2.6.14 for
> me on a bcm1480.

Strange, I get the following oops during boot with latest git:


CFE> ifconfig eth0 -auto; boot -elf 192.168.1.1:/srv/tftp/mips/bigsur
Device eth0:  hwaddr 00-02-4C-F5-2C-3C, ipaddr 192.168.1.146, mask 255.255.255.0
        gateway 192.168.1.1, nameserver 131.111.8.42, domain cyrius.com
Loader:elf Filesys:tftp Dev:eth0 File:192.168.1.1:/srv/tftp/mips/bigsur Options:(null)
Loading: 0xffffffff80100000/3248680 \reth0: Link speed: 100BaseT FDX^[[J
0xffffffff80419228/286248 Entry at 0x803e9000
Closing network.
Starting program at 0x803e9000
[RUN!]
Broadcom SiByte BCM1480 A3 (pass1) @ 700 MHz (SB-1A rev 0)
Board type: SiByte BCM91x80A/B (BigSur)
[CPU1]
[cpu1]
[CPU2]
[cpu2]
[CPU3]
[cpu3]
Linux version 2.6.16-rc4-Helix64-smp (tbm@deprecation) (gcc version 4.0.3 20051201 (prerelease) (Debian 4.0.2-5)) #2 SMP Fri Feb 24 01:36:35 GMT 2006
CPU revision is: 01041100
FPU revision is: 000f0103
swarm setup: M41T81 RTC detected.
This kernel optimized for board runs with CFE
Determined physical RAM map:
 memory: 000000000fe7ae00 @ 0000000000000000 (usable)
 memory: 000000001ffffe00 @ 0000000080000000 (usable)
 memory: 000000000ffffe00 @ 00000000c0000000 (usable)
 memory: 000000003ffffe00 @ 0000000140000000 (usable)
Detected 3 available secondary CPU(s)
Built 1 zonelists
Kernel command line: root=/dev/hda2
Primary instruction cache 32kB, 4-way, linesize 32 bytes.
Primary data cache 32kB, 4-way, linesize 32 bytes.
Synthesized TLB refill handler (35 instructions).
Synthesized TLB load handler fastpath (49 instructions).
Synthesized TLB store handler fastpath (49 instructions).
Synthesized TLB modify handler fastpath (48 instructions).
PID hash table entries: 4096 (order: 12, 131072 bytes)
Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes)
Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)
Memory: 1991780k/2095580k available (2437k kernel code, 103096k reserved, 539k data, 196k init, 0k highmem)
Mount-cache hash table entries: 256
Checking for 'wait' instruction...  unavailable.
Checking for the multiply/shift bug... no.
Checking for the daddi bug... no.
Checking for the daddiu bug... no.
CPU revision is: 03041100
FPU revision is: 000f0103
CPU 0 Unable to handle kernel paging request at virtual address 0000008b3cb03e00, epc == ffffffff8010b37c, ra == ffffffff8010b2fc
Primary instruction cache 32kB, 4-way, linesize 32 bytes.
Primary data cache 32kB, 4-way, linesize 32 bytes.
Synthesized TLB refill handler (35 instructions).
Oops[#1]:
Cpu 0
$ 0   : 0000000000000000 0000000000000001 0000000003333333 0000008b3cb03e00
$ 4   : ffffffff8041be00 0000000000000001 0000000000000000 ffffffff8041c588
$ 8   : 0000000014001fe1 ffffffff8fefcae0 ffffffff803e9108 0000000000000000
$12   : ffffffffffffffff ffffffff8026f7e8 ffffffff80420000 ffffffff80420000
$16   : 0000000000000001 0000000000000001 0000000000000001 ffffffff8041c5c0
$20   : ffffffff80430000 0000000000000000 0000000000000000 0000000000000000
$24   : ffffffff80430000 ffffffff8fe7dfd4                                  
$28   : ffffffff9fff8000 a80000009fffbf30 0000000000000000 ffffffff8010b2fc
Hi    : 0000000000000000
Lo    : 0000000000000024
epc   : ffffffff8010b37c __cpu_up+0xb4/0x168     Not tainted
ra    : ffffffff8010b2fc __cpu_up+0x34/0x168
Status: 14001fe3    KX SX UX KERNEL EXL IE 
Cause : 00808008
BadVA : 0000008b3cb03e00
PrId  : 01041100
Process swapper (pid: 1, threadinfo=a80000009fff8000, task=a80000000fe79848)
Stack : 0000000000000001 0000000000000001 ffffffff8015eccc ffffffff8015ecb8
        0000000000000001 ffffffff80420000 ffffffff80420000 ffffffff803a0000
        0000000000000000 ffffffff80100e78 0000000000000000 0000000000000000
        0000000000000000 0000000000000000 0000000000000000 0000000000000000
        0000000000000000 0000000000000000 0000000000000000 ffffffff80104c80
        0000000000000000 ffffffff80104c70 0000000000000000 0000000000000000
        0000000000000000 0000000000000000
Call Trace:
 [<ffffffff8015eccc>] cpu_up+0xfc/0x190
 [<ffffffff8015ecb8>] cpu_up+0xe8/0x190
 [<ffffffff80100e78>] init+0x9c8/0x9f0
 [<ffffffff80104c80>] kernel_thread_helper+0x10/0x18
 [<ffffffff80104c70>] kernel_thread_helper+0x0/0x18


Code: 0062182d  3c020333  34423333 <dc640000> 000214b8  3442cccd  00021478  6446995c  00c4001d 
Kernel panic - not syncing: Attempted to kill init!
 <0>Rebooting in 5 seconds..261.63 BogoMIPS (lpj=130816)
Passing control back to CFE...

-- 
Martin Michlmayr
http://www.cyrius.com/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
@ 2006-02-24  1:42     ` Martin Michlmayr
  0 siblings, 0 replies; 14+ messages in thread
From: Martin Michlmayr @ 2006-02-24  1:42 UTC (permalink / raw)
  To: linux-mips

* Stuart Anderson <anderson@netsweng.com> [2006-02-22 16:41]:
> >RM9000 & BCM1250 users for testing.  This really needs to be fixed for
> >2.6.16.
> I'm not sure if this is the specific fix or not, but I can report that git
> as of today (approx 2pm est) is working better than is has since 2.6.14 for
> me on a bcm1480.

Strange, I get the following oops during boot with latest git:


CFE> ifconfig eth0 -auto; boot -elf 192.168.1.1:/srv/tftp/mips/bigsur
Device eth0:  hwaddr 00-02-4C-F5-2C-3C, ipaddr 192.168.1.146, mask 255.255.255.0
        gateway 192.168.1.1, nameserver 131.111.8.42, domain cyrius.com
Loader:elf Filesys:tftp Dev:eth0 File:192.168.1.1:/srv/tftp/mips/bigsur Options:(null)
Loading: 0xffffffff80100000/3248680 
eth0: Link speed: 100BaseT FDX^[[J
0xffffffff80419228/286248 Entry at 0x803e9000
Closing network.
Starting program at 0x803e9000
[RUN!]
Broadcom SiByte BCM1480 A3 (pass1) @ 700 MHz (SB-1A rev 0)
Board type: SiByte BCM91x80A/B (BigSur)
[CPU1]
[cpu1]
[CPU2]
[cpu2]
[CPU3]
[cpu3]
Linux version 2.6.16-rc4-Helix64-smp (tbm@deprecation) (gcc version 4.0.3 20051201 (prerelease) (Debian 4.0.2-5)) #2 SMP Fri Feb 24 01:36:35 GMT 2006
CPU revision is: 01041100
FPU revision is: 000f0103
swarm setup: M41T81 RTC detected.
This kernel optimized for board runs with CFE
Determined physical RAM map:
 memory: 000000000fe7ae00 @ 0000000000000000 (usable)
 memory: 000000001ffffe00 @ 0000000080000000 (usable)
 memory: 000000000ffffe00 @ 00000000c0000000 (usable)
 memory: 000000003ffffe00 @ 0000000140000000 (usable)
Detected 3 available secondary CPU(s)
Built 1 zonelists
Kernel command line: root=/dev/hda2
Primary instruction cache 32kB, 4-way, linesize 32 bytes.
Primary data cache 32kB, 4-way, linesize 32 bytes.
Synthesized TLB refill handler (35 instructions).
Synthesized TLB load handler fastpath (49 instructions).
Synthesized TLB store handler fastpath (49 instructions).
Synthesized TLB modify handler fastpath (48 instructions).
PID hash table entries: 4096 (order: 12, 131072 bytes)
Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes)
Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)
Memory: 1991780k/2095580k available (2437k kernel code, 103096k reserved, 539k data, 196k init, 0k highmem)
Mount-cache hash table entries: 256
Checking for 'wait' instruction...  unavailable.
Checking for the multiply/shift bug... no.
Checking for the daddi bug... no.
Checking for the daddiu bug... no.
CPU revision is: 03041100
FPU revision is: 000f0103
CPU 0 Unable to handle kernel paging request at virtual address 0000008b3cb03e00, epc == ffffffff8010b37c, ra == ffffffff8010b2fc
Primary instruction cache 32kB, 4-way, linesize 32 bytes.
Primary data cache 32kB, 4-way, linesize 32 bytes.
Synthesized TLB refill handler (35 instructions).
Oops[#1]:
Cpu 0
$ 0   : 0000000000000000 0000000000000001 0000000003333333 0000008b3cb03e00
$ 4   : ffffffff8041be00 0000000000000001 0000000000000000 ffffffff8041c588
$ 8   : 0000000014001fe1 ffffffff8fefcae0 ffffffff803e9108 0000000000000000
$12   : ffffffffffffffff ffffffff8026f7e8 ffffffff80420000 ffffffff80420000
$16   : 0000000000000001 0000000000000001 0000000000000001 ffffffff8041c5c0
$20   : ffffffff80430000 0000000000000000 0000000000000000 0000000000000000
$24   : ffffffff80430000 ffffffff8fe7dfd4                                  
$28   : ffffffff9fff8000 a80000009fffbf30 0000000000000000 ffffffff8010b2fc
Hi    : 0000000000000000
Lo    : 0000000000000024
epc   : ffffffff8010b37c __cpu_up+0xb4/0x168     Not tainted
ra    : ffffffff8010b2fc __cpu_up+0x34/0x168
Status: 14001fe3    KX SX UX KERNEL EXL IE 
Cause : 00808008
BadVA : 0000008b3cb03e00
PrId  : 01041100
Process swapper (pid: 1, threadinfo=a80000009fff8000, task=a80000000fe79848)
Stack : 0000000000000001 0000000000000001 ffffffff8015eccc ffffffff8015ecb8
        0000000000000001 ffffffff80420000 ffffffff80420000 ffffffff803a0000
        0000000000000000 ffffffff80100e78 0000000000000000 0000000000000000
        0000000000000000 0000000000000000 0000000000000000 0000000000000000
        0000000000000000 0000000000000000 0000000000000000 ffffffff80104c80
        0000000000000000 ffffffff80104c70 0000000000000000 0000000000000000
        0000000000000000 0000000000000000
Call Trace:
 [<ffffffff8015eccc>] cpu_up+0xfc/0x190
 [<ffffffff8015ecb8>] cpu_up+0xe8/0x190
 [<ffffffff80100e78>] init+0x9c8/0x9f0
 [<ffffffff80104c80>] kernel_thread_helper+0x10/0x18
 [<ffffffff80104c70>] kernel_thread_helper+0x0/0x18


Code: 0062182d  3c020333  34423333 <dc640000> 000214b8  3442cccd  00021478  6446995c  00c4001d 
Kernel panic - not syncing: Attempted to kill init!
 <0>Rebooting in 5 seconds..261.63 BogoMIPS (lpj=130816)
Passing control back to CFE...

-- 
Martin Michlmayr
http://www.cyrius.com/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [RFC] SMP initialization order fixes.
@ 2006-02-24 18:12 Mark E Mason
  0 siblings, 0 replies; 14+ messages in thread
From: Mark E Mason @ 2006-02-24 18:12 UTC (permalink / raw)
  To: Martin Michlmayr, linux-mips

Hello,

Odd.  The GIT tip is working for me on the 1480, *but* I'm compiling
with gcc 3.4.3, not gcc 4.*:

Transferring control to the kernel.
Kernel entry point is at 0x80424000
Broadcom SiByte BCM1480 A2 (pass1) @ 700 MHz (SB-1A rev 0)
Board type: SiByte BCM91x80A/B (BigSur)
[CPU1]
[cpu1]
[CPU2]
[cpu2]
[CPU3]
[cpu3]
[4294667.296000] Linux version 2.6.16-rc4-g762ac03f-dirty
(mason@hawaii.localdomain) (gcc version 3.4.3) #3 SMP Thu Feb 23
15:07:00 PST 2006
[4294667.296000] CPU revision is: 01041100
[4294667.296000] FPU revision is: 000f0103
[4294667.296000] swarm setup: M41T81 RTC detected.
[4294667.296000] This kernel optimized for board runs with CFE
[4294667.296000] Determined physical RAM map: 

/Mark 

> -----Original Message-----
> From: linux-mips-bounce@linux-mips.org 
> [mailto:linux-mips-bounce@linux-mips.org] On Behalf Of Martin 
> Michlmayr
> Sent: Thursday, February 23, 2006 5:42 PM
> To: linux-mips@linux-mips.org
> Subject: Re: [RFC] SMP initialization order fixes.
> 
> * Stuart Anderson <anderson@netsweng.com> [2006-02-22 16:41]:
> > >RM9000 & BCM1250 users for testing.  This really needs to be fixed 
> > >for 2.6.16.
> > I'm not sure if this is the specific fix or not, but I can 
> report that 
> > git as of today (approx 2pm est) is working better than is 
> has since 
> > 2.6.14 for me on a bcm1480.
> 
> Strange, I get the following oops during boot with latest git:
> 
> 
> CFE> ifconfig eth0 -auto; boot -elf 192.168.1.1:/srv/tftp/mips/bigsur
> Device eth0:  hwaddr 00-02-4C-F5-2C-3C, ipaddr 192.168.1.146, 
> mask 255.255.255.0
>         gateway 192.168.1.1, nameserver 131.111.8.42, domain 
> cyrius.com Loader:elf Filesys:tftp Dev:eth0 
> File:192.168.1.1:/srv/tftp/mips/bigsur Options:(null)
> Loading: 0xffffffff80100000/3248680
> eth0: Link speed: 100BaseT FDX^[[J
> 0xffffffff80419228/286248 Entry at 0x803e9000 Closing network.
> Starting program at 0x803e9000
> [RUN!]
> Broadcom SiByte BCM1480 A3 (pass1) @ 700 MHz (SB-1A rev 0) 
> Board type: SiByte BCM91x80A/B (BigSur) [CPU1] [cpu1] [CPU2] 
> [cpu2] [CPU3] [cpu3] Linux version 2.6.16-rc4-Helix64-smp 
> (tbm@deprecation) (gcc version 4.0.3 20051201 (prerelease) 
> (Debian 4.0.2-5)) #2 SMP Fri Feb 24 01:36:35 GMT 2006 CPU 
> revision is: 01041100 FPU revision is: 000f0103 swarm setup: 
> M41T81 RTC detected.
> This kernel optimized for board runs with CFE Determined 
> physical RAM map:
>  memory: 000000000fe7ae00 @ 0000000000000000 (usable)
>  memory: 000000001ffffe00 @ 0000000080000000 (usable)
>  memory: 000000000ffffe00 @ 00000000c0000000 (usable)
>  memory: 000000003ffffe00 @ 0000000140000000 (usable) 
> Detected 3 available secondary CPU(s) Built 1 zonelists 
> Kernel command line: root=/dev/hda2 Primary instruction cache 
> 32kB, 4-way, linesize 32 bytes.
> Primary data cache 32kB, 4-way, linesize 32 bytes.
> Synthesized TLB refill handler (35 instructions).
> Synthesized TLB load handler fastpath (49 instructions).
> Synthesized TLB store handler fastpath (49 instructions).
> Synthesized TLB modify handler fastpath (48 instructions).
> PID hash table entries: 4096 (order: 12, 131072 bytes) Dentry 
> cache hash table entries: 1048576 (order: 11, 8388608 bytes) 
> Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)
> Memory: 1991780k/2095580k available (2437k kernel code, 
> 103096k reserved, 539k data, 196k init, 0k highmem) 
> Mount-cache hash table entries: 256 Checking for 'wait' 
> instruction...  unavailable.
> Checking for the multiply/shift bug... no.
> Checking for the daddi bug... no.
> Checking for the daddiu bug... no.
> CPU revision is: 03041100
> FPU revision is: 000f0103
> CPU 0 Unable to handle kernel paging request at virtual 
> address 0000008b3cb03e00, epc == ffffffff8010b37c, ra == 
> ffffffff8010b2fc Primary instruction cache 32kB, 4-way, 
> linesize 32 bytes.
> Primary data cache 32kB, 4-way, linesize 32 bytes.
> Synthesized TLB refill handler (35 instructions).
> Oops[#1]:
> Cpu 0
> $ 0   : 0000000000000000 0000000000000001 0000000003333333 
> 0000008b3cb03e00
> $ 4   : ffffffff8041be00 0000000000000001 0000000000000000 
> ffffffff8041c588
> $ 8   : 0000000014001fe1 ffffffff8fefcae0 ffffffff803e9108 
> 0000000000000000
> $12   : ffffffffffffffff ffffffff8026f7e8 ffffffff80420000 
> ffffffff80420000
> $16   : 0000000000000001 0000000000000001 0000000000000001 
> ffffffff8041c5c0
> $20   : ffffffff80430000 0000000000000000 0000000000000000 
> 0000000000000000
> $24   : ffffffff80430000 ffffffff8fe7dfd4                     
>              
> $28   : ffffffff9fff8000 a80000009fffbf30 0000000000000000 
> ffffffff8010b2fc
> Hi    : 0000000000000000
> Lo    : 0000000000000024
> epc   : ffffffff8010b37c __cpu_up+0xb4/0x168     Not tainted
> ra    : ffffffff8010b2fc __cpu_up+0x34/0x168
> Status: 14001fe3    KX SX UX KERNEL EXL IE 
> Cause : 00808008
> BadVA : 0000008b3cb03e00
> PrId  : 01041100
> Process swapper (pid: 1, threadinfo=a80000009fff8000, 
> task=a80000000fe79848) Stack : 0000000000000001 
> 0000000000000001 ffffffff8015eccc ffffffff8015ecb8
>         0000000000000001 ffffffff80420000 ffffffff80420000 
> ffffffff803a0000
>         0000000000000000 ffffffff80100e78 0000000000000000 
> 0000000000000000
>         0000000000000000 0000000000000000 0000000000000000 
> 0000000000000000
>         0000000000000000 0000000000000000 0000000000000000 
> ffffffff80104c80
>         0000000000000000 ffffffff80104c70 0000000000000000 
> 0000000000000000
>         0000000000000000 0000000000000000 Call Trace:
>  [<ffffffff8015eccc>] cpu_up+0xfc/0x190
>  [<ffffffff8015ecb8>] cpu_up+0xe8/0x190
>  [<ffffffff80100e78>] init+0x9c8/0x9f0
>  [<ffffffff80104c80>] kernel_thread_helper+0x10/0x18  
> [<ffffffff80104c70>] kernel_thread_helper+0x0/0x18
> 
> 
> Code: 0062182d  3c020333  34423333 <dc640000> 000214b8  
> 3442cccd  00021478  6446995c  00c4001d 
> Kernel panic - not syncing: Attempted to kill init!
>  <0>Rebooting in 5 seconds..261.63 BogoMIPS (lpj=130816)
> Passing control back to CFE...
> 
> -- 
> Martin Michlmayr
> http://www.cyrius.com/
> 
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [RFC] SMP initialization order fixes.
@ 2006-02-24 19:43 Mark E Mason
  2006-02-24 20:02 ` Stuart Anderson
  0 siblings, 1 reply; 14+ messages in thread
From: Mark E Mason @ 2006-02-24 19:43 UTC (permalink / raw)
  To: Stuart Anderson, Ralf Baechle; +Cc: linux-mips

Hello Stuart,

Um - define "hung"...  There's some networking issues in 2.6.14 and
later kernels that only seem to show up in non-NAPI GigE drivers.  Do
you simply lose your NFS server & it never comes back, or does the
console stop responding to echo as well?

Thx,
Mark
 

> -----Original Message-----
> From: linux-mips-bounce@linux-mips.org 
> [mailto:linux-mips-bounce@linux-mips.org] On Behalf Of Stuart Anderson
> Sent: Thursday, February 23, 2006 4:57 PM
> To: Ralf Baechle
> Cc: linux-mips@linux-mips.org
> Subject: Re: [RFC] SMP initialization order fixes.
> 
> On Thu, 23 Feb 2006, Ralf Baechle wrote:
> 
> >> I'm not sure if this is the specific fix or not, but I can report 
> >> that git as of today (approx 2pm est) is working better 
> than is has 
> >> since 2.6.14 for me on a bcm1480. I had tried git a couple 
> of weeks 
> >> ago, and it still hung when I stressed it.
> >
> > Seems unrelated then.  This fix should make the difference between 
> > working perfectly or not at all.  There have been numerous 
> other fixes 
> > since 2.6.14 so hard to say what made the difference.
> 
> You're right, it is unrelated. Shortly after this message 
> wnet out & came back, it hung up again like it had been doing 
> 8-(. I should have just kept my mouth shut and then it would 
> still be working.
> 
> It really did run much longer that one time, but I haven't 
> been able to reproduce a run that lasted that long again. Sigh....
> 
> 
>                                  Stuart
> 
> Stuart R. Anderson                               anderson@netsweng.com
> Network & Software Engineering                   
> http://www.netsweng.com/
> 1024D/37A79149:                                  0791 D3B8 
> 9A4C 2CDC A31F
>                                                   BD03 0A62 
> E534 37A7 9149
> 
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [RFC] SMP initialization order fixes.
  2006-02-24 19:43 Mark E Mason
@ 2006-02-24 20:02 ` Stuart Anderson
  0 siblings, 0 replies; 14+ messages in thread
From: Stuart Anderson @ 2006-02-24 20:02 UTC (permalink / raw)
  To: Mark E Mason; +Cc: linux-mips

On Fri, 24 Feb 2006, Mark E Mason wrote:

> Hello Stuart,
>
> Um - define "hung"...

Networking stops happening. At this point, processes are still active.
Because I am using NFS root, any process that touches the filesystem
will then hang. It doesn't take too long for enough processes to touch
the FS for the system to be useless. As a test, I put a tmpfs on /tmp,
and ran sash from there. That shell would remain responsive after the
rest of the system was hung waiting on NFS.

                                  Stuart

Stuart R. Anderson                               anderson@netsweng.com
Network & Software Engineering                   http://www.netsweng.com/
1024D/37A79149:                                  0791 D3B8 9A4C 2CDC A31F
                                                   BD03 0A62 E534 37A7 9149

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [RFC] SMP initialization order fixes.
@ 2006-02-24 20:44 Mark E Mason
  2006-02-25 15:28 ` Stuart Anderson
  0 siblings, 1 reply; 14+ messages in thread
From: Mark E Mason @ 2006-02-24 20:44 UTC (permalink / raw)
  To: Stuart Anderson; +Cc: linux-mips

Hello,

Yep, that's the problem we're run into as well.  We're working on a NAPI
patch for the sb1250-mac.c driver which fixes this - but that's not
quite ready for release yet.

In the meantime, adding the following line manually to net/core/dev.c
(in netif_rx(), right after the enqueue label) appears to suppress the
problem with no ill effects...  Note: this is *NOT* a fix, it's a hack.
Please let me know if it works for you.

Thx,
Mark

        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
                if (queue->input_pkt_queue.qlen) {
enqueue:
+                       netif_rx_schedule(&queue->backlog_dev);
                        dev_hold(skb->dev);
                        __skb_queue_tail(&queue->input_pkt_queue, skb);
                        local_irq_restore(flags);
                        return NET_RX_SUCCESS;
                }

                netif_rx_schedule(&queue->backlog_dev);
                goto enqueue;
        }


> -----Original Message-----
> From: Stuart Anderson [mailto:anderson@netsweng.com] 
> Sent: Friday, February 24, 2006 12:03 PM
> To: Mark E Mason
> Cc: linux-mips@linux-mips.org
> Subject: RE: [RFC] SMP initialization order fixes.
> 
> On Fri, 24 Feb 2006, Mark E Mason wrote:
> 
> > Hello Stuart,
> >
> > Um - define "hung"...
> 
> Networking stops happening. At this point, processes are still active.
> Because I am using NFS root, any process that touches the 
> filesystem will then hang. It doesn't take too long for 
> enough processes to touch the FS for the system to be 
> useless. As a test, I put a tmpfs on /tmp, and ran sash from 
> there. That shell would remain responsive after the rest of 
> the system was hung waiting on NFS.
> 
>                                   Stuart
> 
> Stuart R. Anderson                               anderson@netsweng.com
> Network & Software Engineering                   
> http://www.netsweng.com/
> 1024D/37A79149:                                  0791 D3B8 
> 9A4C 2CDC A31F
>                                                    BD03 0A62 
> E534 37A7 9149
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [RFC] SMP initialization order fixes.
  2006-02-24 20:44 Mark E Mason
@ 2006-02-25 15:28 ` Stuart Anderson
  0 siblings, 0 replies; 14+ messages in thread
From: Stuart Anderson @ 2006-02-25 15:28 UTC (permalink / raw)
  To: Mark E Mason; +Cc: linux-mips

On Fri, 24 Feb 2006, Mark E Mason wrote:

> In the meantime, adding the following line manually to net/core/dev.c
> (in netif_rx(), right after the enqueue label) appears to suppress the
> problem with no ill effects...  Note: this is *NOT* a fix, it's a hack.
> Please let me know if it works for you.

It certainly improved things. I let it run "make clean;make -j 4" in a
loop. It ran for at least 3 hours, maybe longer (I left it running over
night), but it did finally end in an Oops:

    .
    .
    .
   LD      .tmp_vmlinux2
   KSYM    .tmp_kallsyms2.S
   AS      .tmp_kallsyms2.o
   LD      vmlinux
eth0: Link speed: 100BaseT FDX
CPU 0 Unable to handle kernel paging request at virtual address
0000000000000088, epc == ffffffff80274ad0, ra == ffffffff80274a64
Oops[#1]:
Cpu 0
$ 0   : 0000000000000000 0000000014001fe0 00000000014e8ad8 0000000000000000
$ 4   : 000000017534a510 0000000000000000 ffffffff8036be20 0000001000000000
$ 8   : 0000000000000000 0000000000000002 ffffffffffffffc0 ffffffff803e33a8
$12   : 0000000000000024 ffffffff803e92d8 0000000000000024 9000000000000000
$16   : a80000000055c3a0 a80000000fcddd80 0000000000050000 ffffffff803e6d00
$20   : a80000000fcddc00 000000ffffffffff a80000000fcddc08 ffffffff803e8008
$24   : 0000000000000002 ffffffff80410000 
$28   : ffffffff80368000 ffffffff8036bd30 0000000014001fe1 ffffffff80274a64
Hi    : 0000000000000000
Lo    : 0000000000000000
epc   : ffffffff80274ad0 sbmac_intr+0x3b0/0x5e8     Not tainted
ra    : ffffffff80274a64 sbmac_intr+0x344/0x5e8
Status: 14001fe2    KX SX UX KERNEL EXL 
Cause : 00809008
BadVA : 0000000000000088
PrId  : 01041100
Process swapper (pid: 0, threadinfo=ffffffff80368000, task=ffffffff8036c000)
Stack : a80000000055c3a0 0000000000000000 0000000000000024 ffffffff8036be20
         0000000000000000 0000000000000001 ffffffff80400000 fffffffffffffffb
         ffffffff8ffff2b0 ffffffff801612e8 0000001000000000 ffffffff803b4ba0
         0000000000000024 a80000000055c3a0 ffffffff803b4bc8 ffffffff8036be20
         ffffffff80161494 ffffffff8016142c 0000001000000000 000000000000001b
         0000000000000000 0000000000000040 0000001000000001 0000000000005188
         ffffffff803e0000 0000000000000004 ffffffff8010465c ffffffff801084bc
         ffffffff8010248c ffffffff80102414 0000000000000000 0000000014001fe1
         0000000000000000 0000000000000004 a8000000cbea6060 ffffffff803e8008
         0000000014001fe0 ffffffff803f0000 0000000000000001 ffffffff803f0000
         ...
Call Trace:
  [<ffffffff801612e8>] handle_IRQ_event+0x80/0xf0
  [<ffffffff80161494>] __do_IRQ+0x13c/0x1f8
  [<ffffffff8016142c>] __do_IRQ+0xd4/0x1f8
  [<ffffffff8010465c>] do_IRQ+0x1c/0x38
  [<ffffffff801084bc>] ll_local_timer_interrupt+0x8c/0x98
  [<ffffffff8010248c>] bcm1480_irq_handler+0x18c/0x1a0
  [<ffffffff80102414>] bcm1480_irq_handler+0x114/0x1a0
  [<ffffffff80299850>] dev_queue_xmit+0x0/0x338
  [<ffffffff80104fa0>] cpu_idle+0x70/0x78
  [<ffffffff80104f60>] cpu_idle+0x30/0x78
  [<ffffffff803b7bf4>] start_kernel+0x39c/0x3e0


Code: de840058  de820048  64420001 <9ca30088> 0083202d  fe820048
fe840058  c0a300ac  2462ffff 
Kernel panic - not syncing: Aiee, killing interrupt handler!
  <0>Rebooting in 5 seconds..Passing control back to CFE...



The "Link speed" message right before it crashed makes me wonder if this may
be a different issue from the one the patch, er, hack, was working around.

Still, this is progress in the right direction.



                                 Stuart

Stuart R. Anderson                               anderson@netsweng.com
Network & Software Engineering                   http://www.netsweng.com/
1024D/37A79149:                                  0791 D3B8 9A4C 2CDC A31F
                                                  BD03 0A62 E534 37A7 9149

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] SMP initialization order fixes.
  2006-02-24  1:42     ` Martin Michlmayr
  (?)
@ 2006-02-28 20:26     ` Martin Michlmayr
  -1 siblings, 0 replies; 14+ messages in thread
From: Martin Michlmayr @ 2006-02-28 20:26 UTC (permalink / raw)
  To: linux-mips

* Martin Michlmayr <tbm@cyrius.com> [2006-02-24 01:42]:
> > I'm not sure if this is the specific fix or not, but I can report that git
> > as of today (approx 2pm est) is working better than is has since 2.6.14 for
> > me on a bcm1480.
> 
> Strange, I get the following oops during boot with latest git:

I upgraded CFE (to 1.2.5) and now the kernel boots.

Mark, it seems the CFE you ship 1480 boards with cannot boot current
kernels.  Maybe you want to check this out.
-- 
Martin Michlmayr
http://www.cyrius.com/

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2006-02-28 20:19 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-22 19:09 [RFC] SMP initialization order fixes Ralf Baechle
2006-02-22 21:41 ` Stuart Anderson
2006-02-23 11:31   ` Ralf Baechle
2006-02-24  0:56     ` Stuart Anderson
2006-02-24  1:42   ` Martin Michlmayr
2006-02-24  1:42     ` Martin Michlmayr
2006-02-28 20:26     ` Martin Michlmayr
2006-02-23  9:52 ` Rojhalat Ibrahim
2006-02-23 11:55   ` Ralf Baechle
  -- strict thread matches above, loose matches on Subject: below --
2006-02-24 18:12 Mark E Mason
2006-02-24 19:43 Mark E Mason
2006-02-24 20:02 ` Stuart Anderson
2006-02-24 20:44 Mark E Mason
2006-02-25 15:28 ` Stuart Anderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.