All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] SMP initialization order fixes.
@ 2006-02-22 19:09 Ralf Baechle
  2006-02-22 21:41 ` Stuart Anderson
  2006-02-23  9:52 ` Rojhalat Ibrahim
  0 siblings, 2 replies; 14+ messages in thread
From: Ralf Baechle @ 2006-02-22 19:09 UTC (permalink / raw)
  To: linux-mips

This one should hopefully fix the SMP problems of the resent times.  It
works on Malta with 34K, it seems to work on IP27 (the kernel is
presumably failing due to other issues), so now I'd ask especially
RM9000 & BCM1250 users for testing.  This really needs to be fixed for
2.6.16.

  Ralf

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index d86affa..d9293c5 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -540,6 +540,9 @@ void __init setup_arch(char **cmdline_p)
 	sparse_init();
 	paging_init();
 	resource_init();
+#ifdef CONFIG_SMP
+	plat_smp_setup();
+#endif
 }
 
 int __init fpu_disable(char *s)
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 5e18986..06ed907 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -236,7 +236,7 @@ void __init smp_prepare_cpus(unsigned in
 	init_new_context(current, &init_mm);
 	current_thread_info()->cpu = 0;
 	smp_tune_scheduling();
-	prom_prepare_cpus(max_cpus);
+	plat_prepare_cpus(max_cpus);
 }
 
 /* preload SMP state for boot cpu */
diff --git a/arch/mips/kernel/smp_mt.c b/arch/mips/kernel/smp_mt.c
index c930364..993b8bf 100644
--- a/arch/mips/kernel/smp_mt.c
+++ b/arch/mips/kernel/smp_mt.c
@@ -143,7 +143,7 @@ static struct irqaction irq_call = {
  * Make sure all CPU's are in a sensible state before we boot any of the
  * secondarys
  */
-void prom_prepare_cpus(unsigned int max_cpus)
+void plat_smp_setup(void)
 {
 	unsigned long val;
 	int i, num;
@@ -179,11 +179,9 @@ void prom_prepare_cpus(unsigned int max_
 				write_vpe_c0_vpeconf0(tmp);
 
 				/* Record this as available CPU */
-				if (i < max_cpus) {
-					cpu_set(i, phys_cpu_present_map);
-					__cpu_number_map[i]	= ++num;
-					__cpu_logical_map[num]	= i;
-				}
+				cpu_set(i, phys_cpu_present_map);
+				__cpu_number_map[i]	= ++num;
+				__cpu_logical_map[num]	= i;
 			}
 
 			/* disable multi-threading with TC's */
@@ -241,7 +239,10 @@ void prom_prepare_cpus(unsigned int max_
 		set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
 		set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
 	}
+}
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 	cpu_ipi_resched_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
 	cpu_ipi_call_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ;
 
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 7f8fda9..0e903cc 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -50,37 +50,25 @@ void __init prom_grab_secondary(void)
  * We don't want to start the secondary CPU yet nor do we have a nice probing
  * feature in PMON so we just assume presence of the secondary core.
  */
-static char maxcpus_string[] __initdata =
-	KERN_WARNING "max_cpus set to 0; using 1 instead\n";
-
-void __init prom_prepare_cpus(unsigned int max_cpus)
+void __init plat_smp_setup(void)
 {
-	int enabled = 0, i;
-
-	if (max_cpus == 0) {
-		printk(maxcpus_string);
-		max_cpus = 1;
-	}
+	int i;
 
 	cpus_clear(phys_cpu_present_map);
 
 	for (i = 0; i < 2; i++) {
-		if (i == max_cpus)
-			break;
-
-		/*
-		 * The boot CPU
-		 */
-		cpu_set(i, phys_cpu_present_map);
+		cpu_set(i, cpu_present_map);
 		__cpu_number_map[i]	= i;
 		__cpu_logical_map[i]	= i;
-		enabled++;
 	}
+}
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 	/*
 	 * Be paranoid.  Enable the IPI only if we're really about to go SMP.
 	 */
-	if (enabled > 1)
+	if (cpus_weight(cpu_possible_map))
 		set_c0_status(STATUSF_IP5);
 }
 
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index dbef3f6..09fa7f5 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -140,7 +140,7 @@ static __init void intr_clear_all(nasid_
 		REMOTE_HUB_CLR_INTR(nasid, i);
 }
 
-void __init prom_prepare_cpus(unsigned int max_cpus)
+void __init plat_smp_setup(void)
 {
 	cnodeid_t	cnode;
 
@@ -161,6 +161,11 @@ void __init prom_prepare_cpus(unsigned i
 	alloc_cpupda(0, 0);
 }
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
+	/* We already did everything necessary earlier */
+}
+
 /*
  * Launch a slave into smp_bootstrap().  It doesn't take an argument, and we
  * set sp to the kernel stack of the newly created idle process, gp to the proc
diff --git a/arch/mips/sibyte/cfe/smp.c b/arch/mips/sibyte/cfe/smp.c
index 4477af3..eab20e2 100644
--- a/arch/mips/sibyte/cfe/smp.c
+++ b/arch/mips/sibyte/cfe/smp.c
@@ -31,7 +31,7 @@
  *
  * Common setup before any secondaries are started
  */
-void __init prom_prepare_cpus(unsigned int max_cpus)
+void __init plat_smp_setup(void)
 {
 	int i, num;
 
@@ -40,14 +40,18 @@ void __init prom_prepare_cpus(unsigned i
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
-	for (i=1, num=0; i<NR_CPUS; i++) {
+	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
 			cpu_set(i, phys_cpu_present_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
 	}
-	printk("Detected %i available secondary CPU(s)\n", num);
+	printk(KERN_INFO "Detected %i available secondary CPU(s)\n", num);
+}
+
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 }
 
 /*
diff --git a/include/asm-mips/smp.h b/include/asm-mips/smp.h
index 5618f1e..75c6fe7 100644
--- a/include/asm-mips/smp.h
+++ b/include/asm-mips/smp.h
@@ -58,7 +58,9 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-/* These are defined by the board-specific code. */
+/*
+ * These are defined by the board-specific code.
+ */
 
 /*
  * Cause the function described by call_data to be executed on the passed
@@ -79,7 +81,12 @@ extern void prom_boot_secondary(int cpu,
 extern void prom_init_secondary(void);
 
 /*
- * Detect available CPUs, populate phys_cpu_present_map before smp_init
+ * Populate cpu_possible_map before smp_init, called from setup_arch.
+ */
+extern void plat_smp_setup(void);
+
+/*
+ * Called after init_IRQ but before __cpu_up.
  */
 extern void prom_prepare_cpus(unsigned int max_cpus);
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread
* RE: [RFC] SMP initialization order fixes.
@ 2006-02-24 18:12 Mark E Mason
  0 siblings, 0 replies; 14+ messages in thread
From: Mark E Mason @ 2006-02-24 18:12 UTC (permalink / raw)
  To: Martin Michlmayr, linux-mips

Hello,

Odd.  The GIT tip is working for me on the 1480, *but* I'm compiling
with gcc 3.4.3, not gcc 4.*:

Transferring control to the kernel.
Kernel entry point is at 0x80424000
Broadcom SiByte BCM1480 A2 (pass1) @ 700 MHz (SB-1A rev 0)
Board type: SiByte BCM91x80A/B (BigSur)
[CPU1]
[cpu1]
[CPU2]
[cpu2]
[CPU3]
[cpu3]
[4294667.296000] Linux version 2.6.16-rc4-g762ac03f-dirty
(mason@hawaii.localdomain) (gcc version 3.4.3) #3 SMP Thu Feb 23
15:07:00 PST 2006
[4294667.296000] CPU revision is: 01041100
[4294667.296000] FPU revision is: 000f0103
[4294667.296000] swarm setup: M41T81 RTC detected.
[4294667.296000] This kernel optimized for board runs with CFE
[4294667.296000] Determined physical RAM map: 

/Mark 

> -----Original Message-----
> From: linux-mips-bounce@linux-mips.org 
> [mailto:linux-mips-bounce@linux-mips.org] On Behalf Of Martin 
> Michlmayr
> Sent: Thursday, February 23, 2006 5:42 PM
> To: linux-mips@linux-mips.org
> Subject: Re: [RFC] SMP initialization order fixes.
> 
> * Stuart Anderson <anderson@netsweng.com> [2006-02-22 16:41]:
> > >RM9000 & BCM1250 users for testing.  This really needs to be fixed 
> > >for 2.6.16.
> > I'm not sure if this is the specific fix or not, but I can 
> report that 
> > git as of today (approx 2pm est) is working better than is 
> has since 
> > 2.6.14 for me on a bcm1480.
> 
> Strange, I get the following oops during boot with latest git:
> 
> 
> CFE> ifconfig eth0 -auto; boot -elf 192.168.1.1:/srv/tftp/mips/bigsur
> Device eth0:  hwaddr 00-02-4C-F5-2C-3C, ipaddr 192.168.1.146, 
> mask 255.255.255.0
>         gateway 192.168.1.1, nameserver 131.111.8.42, domain 
> cyrius.com Loader:elf Filesys:tftp Dev:eth0 
> File:192.168.1.1:/srv/tftp/mips/bigsur Options:(null)
> Loading: 0xffffffff80100000/3248680
> eth0: Link speed: 100BaseT FDX^[[J
> 0xffffffff80419228/286248 Entry at 0x803e9000 Closing network.
> Starting program at 0x803e9000
> [RUN!]
> Broadcom SiByte BCM1480 A3 (pass1) @ 700 MHz (SB-1A rev 0) 
> Board type: SiByte BCM91x80A/B (BigSur) [CPU1] [cpu1] [CPU2] 
> [cpu2] [CPU3] [cpu3] Linux version 2.6.16-rc4-Helix64-smp 
> (tbm@deprecation) (gcc version 4.0.3 20051201 (prerelease) 
> (Debian 4.0.2-5)) #2 SMP Fri Feb 24 01:36:35 GMT 2006 CPU 
> revision is: 01041100 FPU revision is: 000f0103 swarm setup: 
> M41T81 RTC detected.
> This kernel optimized for board runs with CFE Determined 
> physical RAM map:
>  memory: 000000000fe7ae00 @ 0000000000000000 (usable)
>  memory: 000000001ffffe00 @ 0000000080000000 (usable)
>  memory: 000000000ffffe00 @ 00000000c0000000 (usable)
>  memory: 000000003ffffe00 @ 0000000140000000 (usable) 
> Detected 3 available secondary CPU(s) Built 1 zonelists 
> Kernel command line: root=/dev/hda2 Primary instruction cache 
> 32kB, 4-way, linesize 32 bytes.
> Primary data cache 32kB, 4-way, linesize 32 bytes.
> Synthesized TLB refill handler (35 instructions).
> Synthesized TLB load handler fastpath (49 instructions).
> Synthesized TLB store handler fastpath (49 instructions).
> Synthesized TLB modify handler fastpath (48 instructions).
> PID hash table entries: 4096 (order: 12, 131072 bytes) Dentry 
> cache hash table entries: 1048576 (order: 11, 8388608 bytes) 
> Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)
> Memory: 1991780k/2095580k available (2437k kernel code, 
> 103096k reserved, 539k data, 196k init, 0k highmem) 
> Mount-cache hash table entries: 256 Checking for 'wait' 
> instruction...  unavailable.
> Checking for the multiply/shift bug... no.
> Checking for the daddi bug... no.
> Checking for the daddiu bug... no.
> CPU revision is: 03041100
> FPU revision is: 000f0103
> CPU 0 Unable to handle kernel paging request at virtual 
> address 0000008b3cb03e00, epc == ffffffff8010b37c, ra == 
> ffffffff8010b2fc Primary instruction cache 32kB, 4-way, 
> linesize 32 bytes.
> Primary data cache 32kB, 4-way, linesize 32 bytes.
> Synthesized TLB refill handler (35 instructions).
> Oops[#1]:
> Cpu 0
> $ 0   : 0000000000000000 0000000000000001 0000000003333333 
> 0000008b3cb03e00
> $ 4   : ffffffff8041be00 0000000000000001 0000000000000000 
> ffffffff8041c588
> $ 8   : 0000000014001fe1 ffffffff8fefcae0 ffffffff803e9108 
> 0000000000000000
> $12   : ffffffffffffffff ffffffff8026f7e8 ffffffff80420000 
> ffffffff80420000
> $16   : 0000000000000001 0000000000000001 0000000000000001 
> ffffffff8041c5c0
> $20   : ffffffff80430000 0000000000000000 0000000000000000 
> 0000000000000000
> $24   : ffffffff80430000 ffffffff8fe7dfd4                     
>              
> $28   : ffffffff9fff8000 a80000009fffbf30 0000000000000000 
> ffffffff8010b2fc
> Hi    : 0000000000000000
> Lo    : 0000000000000024
> epc   : ffffffff8010b37c __cpu_up+0xb4/0x168     Not tainted
> ra    : ffffffff8010b2fc __cpu_up+0x34/0x168
> Status: 14001fe3    KX SX UX KERNEL EXL IE 
> Cause : 00808008
> BadVA : 0000008b3cb03e00
> PrId  : 01041100
> Process swapper (pid: 1, threadinfo=a80000009fff8000, 
> task=a80000000fe79848) Stack : 0000000000000001 
> 0000000000000001 ffffffff8015eccc ffffffff8015ecb8
>         0000000000000001 ffffffff80420000 ffffffff80420000 
> ffffffff803a0000
>         0000000000000000 ffffffff80100e78 0000000000000000 
> 0000000000000000
>         0000000000000000 0000000000000000 0000000000000000 
> 0000000000000000
>         0000000000000000 0000000000000000 0000000000000000 
> ffffffff80104c80
>         0000000000000000 ffffffff80104c70 0000000000000000 
> 0000000000000000
>         0000000000000000 0000000000000000 Call Trace:
>  [<ffffffff8015eccc>] cpu_up+0xfc/0x190
>  [<ffffffff8015ecb8>] cpu_up+0xe8/0x190
>  [<ffffffff80100e78>] init+0x9c8/0x9f0
>  [<ffffffff80104c80>] kernel_thread_helper+0x10/0x18  
> [<ffffffff80104c70>] kernel_thread_helper+0x0/0x18
> 
> 
> Code: 0062182d  3c020333  34423333 <dc640000> 000214b8  
> 3442cccd  00021478  6446995c  00c4001d 
> Kernel panic - not syncing: Attempted to kill init!
>  <0>Rebooting in 5 seconds..261.63 BogoMIPS (lpj=130816)
> Passing control back to CFE...
> 
> -- 
> Martin Michlmayr
> http://www.cyrius.com/
> 
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread
* RE: [RFC] SMP initialization order fixes.
@ 2006-02-24 19:43 Mark E Mason
  2006-02-24 20:02 ` Stuart Anderson
  0 siblings, 1 reply; 14+ messages in thread
From: Mark E Mason @ 2006-02-24 19:43 UTC (permalink / raw)
  To: Stuart Anderson, Ralf Baechle; +Cc: linux-mips

Hello Stuart,

Um - define "hung"...  There's some networking issues in 2.6.14 and
later kernels that only seem to show up in non-NAPI GigE drivers.  Do
you simply lose your NFS server & it never comes back, or does the
console stop responding to echo as well?

Thx,
Mark
 

> -----Original Message-----
> From: linux-mips-bounce@linux-mips.org 
> [mailto:linux-mips-bounce@linux-mips.org] On Behalf Of Stuart Anderson
> Sent: Thursday, February 23, 2006 4:57 PM
> To: Ralf Baechle
> Cc: linux-mips@linux-mips.org
> Subject: Re: [RFC] SMP initialization order fixes.
> 
> On Thu, 23 Feb 2006, Ralf Baechle wrote:
> 
> >> I'm not sure if this is the specific fix or not, but I can report 
> >> that git as of today (approx 2pm est) is working better 
> than is has 
> >> since 2.6.14 for me on a bcm1480. I had tried git a couple 
> of weeks 
> >> ago, and it still hung when I stressed it.
> >
> > Seems unrelated then.  This fix should make the difference between 
> > working perfectly or not at all.  There have been numerous 
> other fixes 
> > since 2.6.14 so hard to say what made the difference.
> 
> You're right, it is unrelated. Shortly after this message 
> wnet out & came back, it hung up again like it had been doing 
> 8-(. I should have just kept my mouth shut and then it would 
> still be working.
> 
> It really did run much longer that one time, but I haven't 
> been able to reproduce a run that lasted that long again. Sigh....
> 
> 
>                                  Stuart
> 
> Stuart R. Anderson                               anderson@netsweng.com
> Network & Software Engineering                   
> http://www.netsweng.com/
> 1024D/37A79149:                                  0791 D3B8 
> 9A4C 2CDC A31F
>                                                   BD03 0A62 
> E534 37A7 9149
> 
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread
* RE: [RFC] SMP initialization order fixes.
@ 2006-02-24 20:44 Mark E Mason
  2006-02-25 15:28 ` Stuart Anderson
  0 siblings, 1 reply; 14+ messages in thread
From: Mark E Mason @ 2006-02-24 20:44 UTC (permalink / raw)
  To: Stuart Anderson; +Cc: linux-mips

Hello,

Yep, that's the problem we're run into as well.  We're working on a NAPI
patch for the sb1250-mac.c driver which fixes this - but that's not
quite ready for release yet.

In the meantime, adding the following line manually to net/core/dev.c
(in netif_rx(), right after the enqueue label) appears to suppress the
problem with no ill effects...  Note: this is *NOT* a fix, it's a hack.
Please let me know if it works for you.

Thx,
Mark

        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
                if (queue->input_pkt_queue.qlen) {
enqueue:
+                       netif_rx_schedule(&queue->backlog_dev);
                        dev_hold(skb->dev);
                        __skb_queue_tail(&queue->input_pkt_queue, skb);
                        local_irq_restore(flags);
                        return NET_RX_SUCCESS;
                }

                netif_rx_schedule(&queue->backlog_dev);
                goto enqueue;
        }


> -----Original Message-----
> From: Stuart Anderson [mailto:anderson@netsweng.com] 
> Sent: Friday, February 24, 2006 12:03 PM
> To: Mark E Mason
> Cc: linux-mips@linux-mips.org
> Subject: RE: [RFC] SMP initialization order fixes.
> 
> On Fri, 24 Feb 2006, Mark E Mason wrote:
> 
> > Hello Stuart,
> >
> > Um - define "hung"...
> 
> Networking stops happening. At this point, processes are still active.
> Because I am using NFS root, any process that touches the 
> filesystem will then hang. It doesn't take too long for 
> enough processes to touch the FS for the system to be 
> useless. As a test, I put a tmpfs on /tmp, and ran sash from 
> there. That shell would remain responsive after the rest of 
> the system was hung waiting on NFS.
> 
>                                   Stuart
> 
> Stuart R. Anderson                               anderson@netsweng.com
> Network & Software Engineering                   
> http://www.netsweng.com/
> 1024D/37A79149:                                  0791 D3B8 
> 9A4C 2CDC A31F
>                                                    BD03 0A62 
> E534 37A7 9149
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2006-02-28 20:19 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-22 19:09 [RFC] SMP initialization order fixes Ralf Baechle
2006-02-22 21:41 ` Stuart Anderson
2006-02-23 11:31   ` Ralf Baechle
2006-02-24  0:56     ` Stuart Anderson
2006-02-24  1:42   ` Martin Michlmayr
2006-02-24  1:42     ` Martin Michlmayr
2006-02-28 20:26     ` Martin Michlmayr
2006-02-23  9:52 ` Rojhalat Ibrahim
2006-02-23 11:55   ` Ralf Baechle
  -- strict thread matches above, loose matches on Subject: below --
2006-02-24 18:12 Mark E Mason
2006-02-24 19:43 Mark E Mason
2006-02-24 20:02 ` Stuart Anderson
2006-02-24 20:44 Mark E Mason
2006-02-25 15:28 ` Stuart Anderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.