LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 3/4] powerpc/85xx: Minor fixups for kexec on 85xx
From: Matthew McClintock @ 2010-09-16 22:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Matthew McClintock, kumar.gala
In-Reply-To: <1284677906-23787-1-git-send-email-msm@freescale.com>

Make kexec_down_cpus atmoic since it will be incremented by all
cores as they are coming down

Remove duplicate calls to mpc85xx_smp_kexec_down, now it's called
by the crash and normal kexec pathway only once

Increase the timeout to wait for other cores to shutdown

Signed-off-by: Matthew McClintock <msm@freescale.com>
---
 arch/powerpc/platforms/85xx/smp.c |   24 +++++++++++-------------
 1 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index cb8ad3b..29416a9 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -114,17 +114,15 @@ struct smp_ops_t smp_85xx_ops = {
 };
 
 #ifdef CONFIG_KEXEC
-static int kexec_down_cpus = 0;
+atomic_t kexec_down_cpus = ATOMIC_INIT(0);
 
 void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
 {
-	/* When crashing, this gets called on all CPU's we only
-	 * take down the non-boot cpus */
-	if (smp_processor_id() != boot_cpuid)
-	{
-		local_irq_disable();
-		kexec_down_cpus++;
+	local_irq_disable();
 
+	if (secondary) {
+		atomic_inc(&kexec_down_cpus);
+		/* loop forever */
 		while (1);
 	}
 }
@@ -137,14 +135,14 @@ static void mpc85xx_smp_kexec_down(void *arg)
 
 static void mpc85xx_smp_machine_kexec(struct kimage *image)
 {
-	int timeout = 2000;
-	int i;
+	int timeout = INT_MAX;
+	int i, num_cpus = num_present_cpus();
 
-	set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
 
-	smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
 
-	while ( (kexec_down_cpus != (num_online_cpus() - 1)) &&
+	while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) &&
 		( timeout > 0 ) )
 	{
 		timeout--;
@@ -153,7 +151,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
 	if ( !timeout )
 		printk(KERN_ERR "Unable to bring down secondary cpu(s)");
 
-	for (i = 0; i < num_present_cpus(); i++)
+	for (i = 0; i < num_cpus; i++)
 	{
 		if ( i == smp_processor_id() ) continue;
 		mpic_reset_core(i);
-- 
1.6.6.1

^ permalink raw reply related

* [PATCH 4/4] powerpc/85xx: flush dcache before resetting cores
From: Matthew McClintock @ 2010-09-16 22:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Matthew McClintock, kumar.gala
In-Reply-To: <1284677906-23787-1-git-send-email-msm@freescale.com>

When we do an mpic_reset_core we need to make sure the dcache
is flushed

Signed-off-by: Matthew McClintock <msm@freescale.com>
---
 arch/powerpc/platforms/85xx/smp.c |   50 +++++++++++++++++++++++++++++++++++++
 1 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 29416a9..c89a370 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/kexec.h>
+#include <linux/highmem.h>
 
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
@@ -133,11 +134,60 @@ static void mpc85xx_smp_kexec_down(void *arg)
 		ppc_md.kexec_cpu_down(0,1);
 }
 
+static void map_and_flush(unsigned long paddr)
+{
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned long kaddr  = (unsigned long)kmap(page);
+
+	flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
+	kunmap(page);
+}
+
+/**
+ * Before we reset the other cores, we need to flush relevant cache
+ * out to memory so we don't get anything corrupted, some of these flushes
+ * are performed out of an overabundance of caution as interrupts are not
+ * disabled yet and we can switch cores
+ */
+static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image)
+{
+	kimage_entry_t *ptr, entry;
+	unsigned long paddr;
+	int i;
+
+	if (image->type == KEXEC_TYPE_DEFAULT) {
+		/* normal kexec images are stored in temporary pages */
+		for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+		     ptr = (entry & IND_INDIRECTION) ?
+				phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+			if (!(entry & IND_DESTINATION)) {
+				map_and_flush(entry);
+			}
+		}
+		/* flush out last IND_DONE page */
+		map_and_flush(entry);
+	} else {
+		/* crash type kexec images are copied to the crash region */
+		for (i = 0; i < image->nr_segments; i++) {
+			struct kexec_segment *seg = &image->segment[i];
+			for (paddr = seg->mem; paddr < seg->mem + seg->memsz;
+			     paddr += PAGE_SIZE) {
+				map_and_flush(paddr);
+			}
+		}
+	}
+
+	/* also flush the kimage struct to be passed in as well */
+	flush_dcache_range((unsigned long)image,
+			   (unsigned long)image + sizeof(*image));
+}
+
 static void mpc85xx_smp_machine_kexec(struct kimage *image)
 {
 	int timeout = INT_MAX;
 	int i, num_cpus = num_present_cpus();
 
+	mpc85xx_smp_flush_dcache_kexec(image);
 
 	if (image->type == KEXEC_TYPE_DEFAULT)
 		smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
-- 
1.6.6.1

^ permalink raw reply related

* Re: [PATCH 01/15] ppc: fix return type of BUID_{HI,LO} macros
From: Scott Wood @ 2010-09-16 23:04 UTC (permalink / raw)
  To: linasvepstas
  Cc: Nishanth Aravamudan, linuxppc-dev, Breno Leitao, Paul Mackerras,
	Milton Miller
In-Reply-To: <AANLkTikHxiUQR2+Gao0sNn2mzVN-M=Z=+3Z3=7H7GJdK@mail.gmail.com>

On Thu, 16 Sep 2010 17:54:46 -0500
Linas Vepstas <linasvepstas@gmail.com> wrote:

> Acked-by: Linas Vepstas <linasvepstas@gmail.com>
> 
> I'm guessing this worked up til now because the rtas_call function prototype
> was telling compiler to cast these to 32-bit before passing them as args.
> (and since these would still get passed as one arg per 64-bit reg, it
> still wouldn't go wrong.)
> 
> What I'm wondering about is why there was no compiler warning about an
> implicit cast of a 64-bit int to a 32-bit int?  Surely, this is something that
> should be warned about!

-Wconversion enables this.

There'd be a lot of false positives to squash with that enabled --
and it might not be worth the resulting explosion in casts.

-Scott

^ permalink raw reply

* Re: [PATCH 01/15] ppc: fix return type of BUID_{HI,LO} macros
From: Linas Vepstas @ 2010-09-16 22:54 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: linuxppc-dev, Paul Mackerras, Breno Leitao, Milton Miller
In-Reply-To: <20100915181319.GB3683@us.ibm.com>

Acked-by: Linas Vepstas <linasvepstas@gmail.com>

I'm guessing this worked up til now because the rtas_call function prototyp=
e
was telling compiler to cast these to 32-bit before passing them as args.
(and since these would still get passed as one arg per 64-bit reg, it
still wouldn't go wrong.)

What I'm wondering about is why there was no compiler warning about an
implicit cast of a 64-bit int to a 32-bit int?  Surely, this is something t=
hat
should be warned about!

-- Linas

On 15 September 2010 13:13, Nishanth Aravamudan <nacc@us.ibm.com> wrote:
> BUID_HI and BUID_LO are used to pass data to call_rtas, which expects
> ints or u32s. But the macro doesn't cast the return, so the result is
> still u64. Use the upper_32_bits and lower_32_bits macros that have been
> added to kernel.h.
>
> Found by getting printf format errors trying to debug print the args, no
> actual code change for 64 bit kernels where the macros are actually
> used.
>
> Signed-off-by: Milton Miller <miltonm@bga.com>
> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
> ---
> =C2=A0arch/powerpc/include/asm/ppc-pci.h | =C2=A0 =C2=A04 ++--
> =C2=A01 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/as=
m/ppc-pci.h
> index 42fdff0..43268f1 100644
> --- a/arch/powerpc/include/asm/ppc-pci.h
> +++ b/arch/powerpc/include/asm/ppc-pci.h
> @@ -28,8 +28,8 @@ extern void find_and_init_phbs(void);
> =C2=A0extern struct pci_dev *isa_bridge_pcidev; =C2=A0 =C2=A0 =C2=A0/* ma=
y be NULL if no ISA bus */
>
> =C2=A0/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID *=
/
> -#define BUID_HI(buid) ((buid) >> 32)
> -#define BUID_LO(buid) ((buid) & 0xffffffff)
> +#define BUID_HI(buid) upper_32_bits(buid)
> +#define BUID_LO(buid) lower_32_bits(buid)
>
> =C2=A0/* PCI device_node operations */
> =C2=A0struct device_node;
> --
> 1.7.0.4
>
>

^ permalink raw reply

* [PATCH 1/4] powerpc/kexec: make masking/disabling interrupts generic
From: Matthew McClintock @ 2010-09-16 22:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Matthew McClintock, kumar.gala

Right now just the kexec crash pathway turns turns off the
interrupts. Pull that out and make a generic version for
use elsewhere

Signed-off-by: Matthew McClintock <msm@freescale.com>
---
 arch/powerpc/include/asm/kexec.h       |    1 +
 arch/powerpc/kernel/crash.c            |   13 +------------
 arch/powerpc/kernel/machine_kexec.c    |   24 ++++++++++++++++++++++++
 arch/powerpc/kernel/machine_kexec_32.c |    4 ++++
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 076327f..f54408d 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -91,6 +91,7 @@ extern void machine_kexec_simple(struct kimage *image);
 extern void crash_kexec_secondary(struct pt_regs *regs);
 extern int overlaps_crashkernel(unsigned long start, unsigned long size);
 extern void reserve_crashkernel(void);
+extern void machine_kexec_mask_interrupts(void);
 
 #else /* !CONFIG_KEXEC */
 static inline int kexec_sr_activated(int cpu) { return 0; }
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 4457382..832c8c4 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -414,18 +414,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crash_kexec_wait_realmode(crashing_cpu);
 #endif
 
-	for_each_irq(i) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		if (!desc || !desc->chip || !desc->chip->eoi)
-			continue;
-
-		if (desc->status & IRQ_INPROGRESS)
-			desc->chip->eoi(i);
-
-		if (!(desc->status & IRQ_DISABLED))
-			desc->chip->shutdown(i);
-	}
+	machine_kexec_mask_interrupts();
 
 	/*
 	 * Call registered shutdown routines savely.  Swap out
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index dd6c141..df7e20c 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -14,10 +14,34 @@
 #include <linux/threads.h>
 #include <linux/memblock.h>
 #include <linux/of.h>
+#include <linux/irq.h>
+
 #include <asm/machdep.h>
 #include <asm/prom.h>
 #include <asm/sections.h>
 
+void machine_kexec_mask_interrupts(void) {
+	unsigned int i;
+
+	for_each_irq(i) {
+		struct irq_desc *desc = irq_to_desc(i);
+
+		if (!desc || !desc->chip)
+			continue;
+
+		if (desc->chip->eoi &&
+		    desc->status & IRQ_INPROGRESS)
+			desc->chip->eoi(i);
+
+		if (desc->chip->mask)
+			desc->chip->mask(i);
+
+		if (desc->chip->disable &&
+		    !(desc->status & IRQ_DISABLED))
+			desc->chip->disable(i);
+	}
+}
+
 void machine_crash_shutdown(struct pt_regs *regs)
 {
 	if (ppc_md.machine_crash_shutdown)
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index ae63a96..e63f2e7 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -39,6 +39,10 @@ void default_machine_kexec(struct kimage *image)
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
+	/* mask each interrupt so we are in a more sane state for the
+	 * kexec kernel */
+	machine_kexec_mask_interrupts();
+
 	page_list = image->head;
 
 	/* we need both effective and real address here */
-- 
1.6.6.1

^ permalink raw reply related

* [PATCH 2/4] powerpc/85xx: Remove call to mpic_teardown_this_cpu in kexec
From: Matthew McClintock @ 2010-09-16 22:58 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Matthew McClintock, kumar.gala
In-Reply-To: <1284677906-23787-1-git-send-email-msm@freescale.com>

We no longer need to call this explicitly as a generic version is
called by default

Signed-off-by: Matthew McClintock <msm@freescale.com>
---
 arch/powerpc/platforms/85xx/smp.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index a6b1065..cb8ad3b 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -118,8 +118,6 @@ static int kexec_down_cpus = 0;
 
 void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
 {
-	mpic_teardown_this_cpu(1);
-
 	/* When crashing, this gets called on all CPU's we only
 	 * take down the non-boot cpus */
 	if (smp_processor_id() != boot_cpuid)
-- 
1.6.6.1

^ permalink raw reply related

* Re: linux support for freescale e5500 core?
From: Chris Friesen @ 2010-09-16 22:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Chris Friesen, Scott Wood, paulus, timur, linuxppc-dev
In-Reply-To: <1284674628.30449.98.camel@pasglop>

On 09/16/2010 04:03 PM, Benjamin Herrenschmidt wrote:
> On Thu, 2010-09-16 at 15:44 -0600, Chris Friesen wrote:

>> Right.  We currently use a 970-series cpu and have implemented a
>> per-process flag to indicate whether 32-byte mode is needed or not.
>> We'd have to do something similar with the new cpu.
> 
> Sounds like a candidate for upstreaming the patch :-)

As I recall we proposed upstreaming it a while back but there wasn't a
lot of interest since it's most useful in supporting poorly-written
legacy apps. :)

Chris

-- 
The author works for GENBAND Corporation (GENBAND) who is solely
responsible for this email and its contents. All enquiries regarding
this email should be addressed to GENBAND. Nortel has provided the use
of the nortel.com domain to GENBAND in connection with this email solely
for the purpose of connectivity and Nortel Networks Inc. has no
liability for the email or its contents. GENBAND's web site is
http://www.genband.com

^ permalink raw reply

* [PATCH 4/7] wire up sys_time_change_notify() on powerpc
From: Alexander Shishkin @ 2010-09-16 22:10 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jesper Nilsson, Greg KH, Chris Friesen, John Stultz,
	Andreas Schwab, Kay Sievers, linuxppc-dev, Alexander Shishkin,
	Paul Mackerras, H. Peter Anvin, Kirill A. Shutemov, Andrew Morton,
	Linus Torvalds, Christoph Hellwig
In-Reply-To: <1284675049-23479-1-git-send-email-virtuoso@slind.org>

Signed-off-by: Alexander Shishkin <virtuoso@slind.org>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Andreas Schwab <schwab@linux-m68k.org>
CC: Alexander Shishkin <virtuoso@slind.org>
CC: Christoph Hellwig <hch@lst.de>
CC: Jesper Nilsson <jesper.nilsson@axis.com>
CC: linuxppc-dev@lists.ozlabs.org
CC: linux-kernel@vger.kernel.org
---
 arch/powerpc/include/asm/systbl.h |    1 +
 arch/powerpc/include/asm/unistd.h |    3 ++-
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 3d21266..124b610 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -329,3 +329,4 @@ COMPAT_SYS(rt_tgsigqueueinfo)
 SYSCALL(fanotify_init)
 COMPAT_SYS(fanotify_mark)
 SYSCALL_SPU(prlimit64)
+SYSCALL(time_change_notify)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 597e6f9..6ab64da 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -348,10 +348,11 @@
 #define __NR_fanotify_init	323
 #define __NR_fanotify_mark	324
 #define __NR_prlimit64		325
+#define __NR_time_change_notify	326
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		326
+#define __NR_syscalls		327
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
-- 
1.7.2.1.45.gb66c2

^ permalink raw reply related

* Re: linux support for freescale e5500 core?
From: Benjamin Herrenschmidt @ 2010-09-16 22:03 UTC (permalink / raw)
  To: Chris Friesen; +Cc: Scott Wood, linuxppc-dev, paulus, timur
In-Reply-To: <4C928FC5.9050700@genband.com>

On Thu, 2010-09-16 at 15:44 -0600, Chris Friesen wrote:
> On 09/16/2010 03:39 PM, Scott Wood wrote:
> > On Thu, 16 Sep 2010 14:06:37 -0600
> > Chris Friesen <chris.friesen@genband.com> wrote:
> > 
> >> We're looking at maybe doing some work with an e5500-based system.  Is
> >> there any support existing/planned for this core?
> > 
> > Check with whoever you'd be getting the hardware from about a BSP.
> > 
> > And yes, it should be supported upstream at some point.
> 
> We haven't settled on a vendor yet, so I was just wondering in general
> what the story was around support.

Well, the "core" support for 64-bit BookE is upstream (and has been for
a little while) so +/- specific tweaks FSL may have done and the usual
SoC/board support, it shouldn't be too far off.

> Right.  We currently use a 970-series cpu and have implemented a
> per-process flag to indicate whether 32-byte mode is needed or not.
> We'd have to do something similar with the new cpu.

Sounds like a candidate for upstreaming the patch :-)

> One last question--can you comment on the speed of an e5500 relative to
> a 970 for integer operations?

Cheers,
Ben.

^ permalink raw reply

* Re: Reserved pages in PowerPC
From: Benjamin Herrenschmidt @ 2010-09-16 21:52 UTC (permalink / raw)
  To: Ankita Garg; +Cc: linuxppc-dev, linux-mm
In-Reply-To: <20100916120806.GJ2332@in.ibm.com>

On Thu, 2010-09-16 at 17:38 +0530, Ankita Garg wrote:
> Thanks Ben for taking a look at this. So I checked the rtas messages
> on
> the serial console and see the following:
> 
> instantiating rtas at 0x000000000f632000... done
> 
> Which does not correspond to the higher addresses that I see as
> reserved
> (observation on a 16G machine). 

Well, I'd suggest you audit prom_init.c which builds the reserve map,
and the various memblock_reserve() calls in prom.c

Cheers,
Ben.

^ permalink raw reply

* Re: linux support for freescale e5500 core?
From: Chris Friesen @ 2010-09-16 21:44 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev, paulus, timur
In-Reply-To: <20100916163911.6255d359@schlenkerla.am.freescale.net>

On 09/16/2010 03:39 PM, Scott Wood wrote:
> On Thu, 16 Sep 2010 14:06:37 -0600
> Chris Friesen <chris.friesen@genband.com> wrote:
> 
>> We're looking at maybe doing some work with an e5500-based system.  Is
>> there any support existing/planned for this core?
> 
> Check with whoever you'd be getting the hardware from about a BSP.
> 
> And yes, it should be supported upstream at some point.

We haven't settled on a vendor yet, so I was just wondering in general
what the story was around support.

>> Also, do we know what the cache line size is--we have some legacy apps
>> that assume 32-byte.
> 
> The cache line is 64 bytes.  As with e500mc, there is a "dcbz32" mode
> for compatibility, though you probably lose much of the performance
> benefit of dcbz, and it might upset other software that properly checks
> for the cache line size but doesn't use dcbzl.

Right.  We currently use a 970-series cpu and have implemented a
per-process flag to indicate whether 32-byte mode is needed or not.
We'd have to do something similar with the new cpu.

One last question--can you comment on the speed of an e5500 relative to
a 970 for integer operations?

Thanks,

Chris

-- 
Chris Friesen
Software Developer
GENBAND
chris.friesen@genband.com
www.genband.com

^ permalink raw reply

* Re: linux support for freescale e5500 core?
From: Scott Wood @ 2010-09-16 21:39 UTC (permalink / raw)
  To: Chris Friesen; +Cc: linuxppc-dev, paulus, timur
In-Reply-To: <4C9278CD.10607@genband.com>

On Thu, 16 Sep 2010 14:06:37 -0600
Chris Friesen <chris.friesen@genband.com> wrote:

> We're looking at maybe doing some work with an e5500-based system.  Is
> there any support existing/planned for this core?

Check with whoever you'd be getting the hardware from about a BSP.

And yes, it should be supported upstream at some point.

> Also, do we know what the cache line size is--we have some legacy apps
> that assume 32-byte.

The cache line is 64 bytes.  As with e500mc, there is a "dcbz32" mode
for compatibility, though you probably lose much of the performance
benefit of dcbz, and it might upset other software that properly checks
for the cache line size but doesn't use dcbzl.

-Scott

^ permalink raw reply

* Can't find Ethernet PHY on MDIO bus
From: Juliano Maia @ 2010-09-16 20:39 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 1972 bytes --]

Hi.

I have 2 Ethernet PHYs connected to Powerpc MPC8313 TSEC1 and TSEC2
interfaces. The PHY I'm having trouble with is a Marvell 88E3015, and its
connected to TSEC2. I have mapped them in DTS file as follows:

mdio@24520 {
#address-cells = <1>;
#size-cells = <0>;
compatible = "fsl,gianfar-mdio";
reg = <0x24520 0x20>;
phy3: ethernet-phy@3 {
reg = <0x3>;
device_type = "ethernet-phy";
};
phy1: ethernet-phy@1 {
interrupt-parent = <&ipic>;
interrupts = <24 0x8>;
reg = <0x1>;
device_type = "ethernet-phy";
};
};

enet0: ethernet@24000 {
cell-index = <0>;
device_type = "network";
compatible = "gianfar";
reg = <0x24000 0x1000>;
model = "eTSEC";
local-mac-address = [ 00 00 00 00 00 00 ];
interrupts = <32 0x8 33 0x8 34 0x8>;
interrupt-parent = <&ipic>;
fixed-link = <3 1 100 1 0>;
linux,network-index = <0>;
};

enet1: ethernet@25000 {
#address-cells = <1>;
#size-cells = <1>;

cell-index = <1>;
device_type = "network";
model = "eTSEC";
compatible = "gianfar";
reg = <0x25000 0x1000>;
local-mac-address = [ 00 a0 3e a8 a6 6E ];
interrupts = <35 0x8 36 0x8 37 0x8>;
interrupt-parent = <&ipic>;
phy-handle = < &phy1 >;
};

The problem is I'm not being able to set *ETH1* up. When i try, say "*ifconfig
eth1 up*" i got:
*vmunix: Trying to connect phy mdio@e0024520:01
vmunix: eth1: Could not attach to PHY*

I've tried to track this error down and found that the it raises from *
bus_find_device_by_name* function at *bus.c*. For some reason ETH1 is never
added do *devices_kset* for MDIO bus. It only contains an entry for ETH0,
which is 00:3.

What could be wrong? In a hardware point of view, this PHY seems to be OK,
since I've made some tests from U-boot command line trying some MDIO
commands. I've also checked MDIO pins with oscilloscope and everything seems
to be fine. But during Linux initialization I could not see any activity in
MDIO pins. Is that normal? Is there any other files I should change (other
than DTS) to make PHYs work?

Thanks

-- 
Juliano Maia

[-- Attachment #2: Type: text/html, Size: 2907 bytes --]

^ permalink raw reply

* Re: [PATCH] spi_mpc8xxx: fix buffer overrun when sending only/receiving only more than PAGE_SIZE bytes
From: Grant Likely @ 2010-09-16 20:15 UTC (permalink / raw)
  To: christophe leroy
  Cc: spi-devel-general, David Brownell, linuxppc-dev, linux-kernel
In-Reply-To: <20100916070525.4882CC7391@messagerie.si.c-s.fr>

On Thu, Sep 16, 2010 at 09:05:25AM +0200, christophe leroy wrote:
> This patch applies to 2.6.34.7 and 2.6.35.4
> It fixes an issue when sending only or receiving only more than PAGE_SIZE bytes
> 
> Signed-off-by: christophe leroy <christophe.leroy@c-s.fr>

applied to merge-spi branch, thanks.

g.

> 
> diff -urN c/drivers/spi/spi_mpc8xxx.c d/drivers/spi/spi_mpc8xxx.c
> --- c/drivers/spi/spi_mpc8xxx.c	2010-09-08 16:44:03.000000000 +0200
> +++ d/drivers/spi/spi_mpc8xxx.c	2010-09-08 16:44:14.000000000 +0200
> @@ -393,11 +393,17 @@
>  
>  	xfer_ofs = mspi->xfer_in_progress->len - mspi->count;
>  
> -	out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs);
> +	if (mspi->rx_dma == mspi->dma_dummy_rx)
> +		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma);
> +	else
> +		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs);
>  	out_be16(&rx_bd->cbd_datlen, 0);
>  	out_be16(&rx_bd->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP);
>  
> -	out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs);
> +	if (mspi->tx_dma == mspi->dma_dummy_tx)
> +		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma);
> +	else
> +		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs);
>  	out_be16(&tx_bd->cbd_datlen, xfer_len);
>  	out_be16(&tx_bd->cbd_sc, BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP |
>  				 BD_SC_LAST);

^ permalink raw reply

* Help with finding memory read performance problem
From: Ayman El-Khashab @ 2010-09-16 20:12 UTC (permalink / raw)
  To: linuxppc-dev

For our code we needed a fast memory compare of 5 buffers.  I've implemented
said routine in asm and it works fine and is very fast in the test bench. 
However when integrated with the app it is much less performant and we 
are trying to figure out why.

The app in question gets the 5 4MB buffers in the kernel via kmalloc and
then uses them for DMA.  No other methods are being called for the memory
besides kmalloc.  This is an embedded system on the 460EX, so there is no
drive, only RAM.  Within the user code mmap is called on these buffers
physical address and they are given to the compare routine.  The result 
is slow.  If I allocate buffers in user space then the performance is
excellent.  

Next I implemented my compare routine within a kernel module so that it
was using the kernel virtual addresses for each of the buffers. I did 
not see any change between this and the mmap approach.  

For comparison sake, using the kernel memory is about 19s whereas user
memory is about 11s for the same size / configuration of buffer.  In the
test bench the algorithm is about 8s.  The processor is not doing any
other intensive tasks during these tests and the times are repeatable.

Is something happening to mmap'd memory that causes the access to it to
be slow?  Is there a way to speed that up?  Why are the kernel memory
access slower than user memory?  

What is the best overall approach?  Is it to DMA into user memory and
then run the routines there?  Is kmalloc not the best approach for 
kernel DMA memory?

This is on linux 2.6.31.5 on 460EX

thanks
ayman

^ permalink raw reply

* linux support for freescale e5500 core?
From: Chris Friesen @ 2010-09-16 20:06 UTC (permalink / raw)
  To: linuxppc-dev, timur, Benjamin Herrenschmidt, paulus

Hi,

We're looking at maybe doing some work with an e5500-based system.  Is
there any support existing/planned for this core?

Also, do we know what the cache line size is--we have some legacy apps
that assume 32-byte.

Thanks,
Chris

-- 
Chris Friesen
Software Developer
GENBAND
chris.friesen@genband.com
www.genband.com

^ permalink raw reply

* Re: [PATCH] spi_mpc8xxx: fix writing to adress 0
From: Grant Likely @ 2010-09-16 20:11 UTC (permalink / raw)
  To: Joakim Tjernlund
  Cc: christophe leroy, spi-devel-general, David Brownell, linuxppc-dev,
	linux-kernel
In-Reply-To: <OFE491C952.64B1BE24-ONC12577A0.0029DACE-C12577A0.002A0993@transmode.se>

On Thu, Sep 16, 2010 at 09:39:09AM +0200, Joakim Tjernlund wrote:
> > From: christophe leroy <christophe.leroy@c-s.fr>
> > To: David Brownell <dbrownell@users.sourceforge.net>, Grant Likely
> > <grant.likely@secretlab.ca>, spi-devel-general@lists.sourceforge.net, linux-
> > kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
> > Date: 2010/09/16 09:06
> > Subject: [PATCH] spi_mpc8xxx: fix writing to adress 0
> > Sent by: linuxppc-dev-bounces+joakim.tjernlund=transmode.se@lists.ozlabs.org
> >
> > This patch applies to 2.6.34.7 (already included in 2.6.35.4)
> > It fixes an issue when sending only or receiving only (mspi->tx-dma was reset
> > as when no tx_buf is defined, tx_dma is 0)
> >
> > Signed-off-by: christophe leroy <christophe.leroy@c-s.fr>
> 
> Acked-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> 

You need to send this to the linux-stable list and include the sha1
commit id that fixes it in mainline.

Acked-by: Grant Likely <grant.likely@secretlab.ca>

g.

^ permalink raw reply

* Re: 2.6.35-stable/ppc64/p7: suspicious rcu_dereference_check() usage detected during 2.6.35-stable boot
From: Subrata Modak @ 2010-09-16 18:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: sachinp, Valdis.Kletnieks, Li Zefan, linux-kernel, Linuxppc-dev,
	paulmck, DIVYA PRAKASH
In-Reply-To: <20100916161259.GF2462@linux.vnet.ibm.com>

On Thu, 2010-09-16 at 09:12 -0700, Paul E. McKenney wrote:
> On Thu, Sep 16, 2010 at 05:50:31PM +0200, Peter Zijlstra wrote:
> > On Mon, 2010-08-09 at 09:12 -0700, Paul E. McKenney wrote:
> > 
> > > > [    0.051203] CPU0: AMD QEMU Virtual CPU version 0.12.4 stepping 03
> > > > [    0.052999] lockdep: fixing up alternatives.
> > > > [    0.054105]
> > > > [    0.054106] ===================================================
> > > > [    0.054999] [ INFO: suspicious rcu_dereference_check() usage. ]
> > > > [    0.054999] ---------------------------------------------------
> > > > [    0.054999] kernel/sched.c:616 invoked rcu_dereference_check() without protection!
> > > > [    0.054999]
> > > > [    0.054999] other info that might help us debug this:
> > > > [    0.054999]
> > > > [    0.054999]
> > > > [    0.054999] rcu_scheduler_active = 1, debug_locks = 1
> > > > [    0.054999] 3 locks held by swapper/1:
> > > > [    0.054999]  #0:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff814be933>] cpu_up+0x42/0x6a
> > > > [    0.054999]  #1:  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff810400d8>] cpu_hotplug_begin+0x2a/0x51
> > > > [    0.054999]  #2:  (&rq->lock){-.-...}, at: [<ffffffff814be2f7>] init_idle+0x2f/0x113
> > > > [    0.054999]
> > > > [    0.054999] stack backtrace:
> > > > [    0.054999] Pid: 1, comm: swapper Not tainted 2.6.35 #1
> > > > [    0.054999] Call Trace:
> > > > [    0.054999]  [<ffffffff81068054>] lockdep_rcu_dereference+0x9b/0xa3
> > > > [    0.054999]  [<ffffffff810325c3>] task_group+0x7b/0x8a
> > > > [    0.054999]  [<ffffffff810325e5>] set_task_rq+0x13/0x40
> > > > [    0.054999]  [<ffffffff814be39a>] init_idle+0xd2/0x113
> > > > [    0.054999]  [<ffffffff814be78a>] fork_idle+0xb8/0xc7
> > > > [    0.054999]  [<ffffffff81068717>] ? mark_held_locks+0x4d/0x6b
> > > > [    0.054999]  [<ffffffff814bcebd>] do_fork_idle+0x17/0x2b
> > > > [    0.054999]  [<ffffffff814bc89b>] native_cpu_up+0x1c1/0x724
> > > > [    0.054999]  [<ffffffff814bcea6>] ? do_fork_idle+0x0/0x2b
> > > > [    0.054999]  [<ffffffff814be876>] _cpu_up+0xac/0x127
> > > > [    0.054999]  [<ffffffff814be946>] cpu_up+0x55/0x6a
> > > > [    0.054999]  [<ffffffff81ab562a>] kernel_init+0xe1/0x1ff
> > > > [    0.054999]  [<ffffffff81003854>] kernel_thread_helper+0x4/0x10
> > > > [    0.054999]  [<ffffffff814c353c>] ? restore_args+0x0/0x30
> > > > [    0.054999]  [<ffffffff81ab5549>] ? kernel_init+0x0/0x1ff
> > > > [    0.054999]  [<ffffffff81003850>] ? kernel_thread_helper+0x0/0x10
> > > > [    0.056074] Booting Node   0, Processors  #1lockdep: fixing up alternatives.
> > > > [    0.130045]  #2lockdep: fixing up alternatives.
> > > > [    0.203089]  #3 Ok.
> > > > [    0.275286] Brought up 4 CPUs
> > > > [    0.276005] Total of 4 processors activated (16017.17 BogoMIPS).
> > > 
> > > This does look like a new one, thank you for reporting it!
> > > 
> > > Here is my analysis, which should at least provide some humor value to
> > > those who understand the code better than I do.  ;-)
> > > 
> > > So the corresponding rcu_dereference_check() is in
> > > task_subsys_state_check(), and is fetching the cpu_cgroup_subsys_id
> > > element of the newly created task's task->cgroups->subsys[] array.
> > > The "git grep" command finds only three uses of cpu_cgroup_subsys_id,
> > > but no definition.
> > > 
> > > Now, fork_idle() invokes copy_process(), which invokes cgroup_fork(),
> > > which sets the child process's ->cgroups pointer to that of the parent,
> > > also invoking get_css_set(), which increments the corresponding reference
> > > count, doing both operations under task_lock() protection (->alloc_lock).
> > > Because fork_idle() does not specify any of CLONE_NEWNS, CLONE_NEWUTS,
> > > CLONE_NEWIPC, CLONE_NEWPID, or CLONE_NEWNET, copy_namespaces() should
> > > not create a new namespace, and so there should be no ns_cgroup_clone().
> > > We should thus retain the parent's ->cgroups pointer.  And copy_process()
> > > installs the new task in the various lists, so that the task is externally
> > > accessible upon return.
> > > 
> > > After a non-error return from copy_process(), fork_init() invokes
> > > init_idle_pid(), which does not appear to affect the task's cgroup
> > > state.  Next fork_init() invokes init_idle(), which in turn invokes
> > > __set_task_cpu(), which invokes set_task_rq(), which calls task_group()
> > > several times, which calls task_subsys_state_check(), which calls the
> > > rcu_dereference_check() that complained above.
> > > 
> > > However, the result returns by rcu_dereference_check() is stored into
> > > the task structure:
> > > 
> > > 	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
> > > 	p->se.parent = task_group(p)->se[cpu];
> > > 
> > > This means that the corresponding structure must have been tied down with
> > > a reference count or some such.  If such a reference has been taken, then
> > > this complaint is a false positive, and could be suppressed by putting
> > > rcu_read_lock() and rcu_read_unlock() around the call to init_idle()
> > > from fork_idle().  However, although, reference to the enclosing ->cgroups
> > > struct css_set is held, it is not clear to me that this reference applies
> > > to the structures pointed to by the ->subsys[] array, especially given
> > > that the cgroup_subsys_state structures referenced by this array have
> > > their own reference count, which does not appear to me to be acquired
> > > by this code path.
> > > 
> > > Or are the cgroup_subsys_state structures referenced by idle tasks
> > > never freed or some such?
> > 
> > I would hope so!, the idle tasks should be part of the root cgroup,
> > which is not removable.
> > 
> > The problem is that while we do in-fact hold rq->lock, the newly spawned
> > idle thread's cpu is not yet set to the correct cpu so the lockdep check
> > in task_group():
> > 
> >   lockdep_is_held(&task_rq(p)->lock)
> > 
> > will fail.
> > 
> > But of a chicken and egg problem. Setting the cpu needs to have the cpu
> > set ;-)
> 
> OK, makes sense to me.
> 
> > Ingo, why do we have rq->lock there at all? The CPU isn't up and running
> > yet, nothing should be touching it.
> 
> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

Thanks Peter for the fix.

Regards--
Subrata

> > ---
> >  kernel/sched.c |   12 ++++++++++++
> >  1 files changed, 12 insertions(+), 0 deletions(-)
> > 
> > diff --git a/kernel/sched.c b/kernel/sched.c
> > index bd8b487..6241049 100644
> > --- a/kernel/sched.c
> > +++ b/kernel/sched.c
> > @@ -5332,7 +5332,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
> >  	idle->se.exec_start = sched_clock();
> >  
> >  	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
> > +	/*
> > +	 * We're having a chicken and egg problem, even though we are
> > +	 * holding rq->lock, the cpu isn't yet set to this cpu so the
> > +	 * lockdep check in task_group() will fail.
> > +	 *
> > +	 * Similar case to sched_fork(). / Alternatively we could
> > +	 * use task_rq_lock() here and obtain the other rq->lock.
> > +	 *
> > +	 * Silence PROVE_RCU
> > +	 */
> > +	rcu_read_lock();
> >  	__set_task_cpu(idle, cpu);
> > +	rcu_read_unlock();
> >  
> >  	rq->curr = rq->idle = idle;
> >  #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
> > 

^ permalink raw reply

* Re: Generating elf kernel ?
From: Scott Wood @ 2010-09-16 17:09 UTC (permalink / raw)
  To: tiejun.chen; +Cc: linuxppc-dev, Guillaume Dargaud
In-Reply-To: <4C9182EC.5030900@windriver.com>

On Thu, 16 Sep 2010 10:37:32 +0800
"tiejun.chen" <tiejun.chen@windriver.com> wrote:

> 1> can you load the Linux vmlinux directly to the physical address '0' on
> current bootloader?

That depends on what bootloader we're talking about -- I don't know
what the original poster's custom loader can do.  Obviously the
bootloader itself would have to be executing from some other address
(e.g. U-Boot runs from the top of RAM).

> 2> additionally you have to find a way to pass dtb to the native vmlinux.

Yes, of course.  But that's a different issue. :-)

> I believe the hypervisor can boot vmlinux directly. But your so-called vmlinux
> should be guest OS. And the hypervisor will handle/assit TLB exception for the
> guest OS on MMU. Right?  So you can use the hypervisor to load vmlinux to any
> physical address as you expect. 

I was just using our hypervisor as an example, since it has an ELF
loader that can pass a device tree.

> But the guest OS should not be same as the native Linux.

The guest OS *is* the same as native Linux, as far as TLB handling is
concerned.

-Scott

^ permalink raw reply

* Re: [PATCH 2/3 v3] P4080/mtd: Only make elbc nand driver detect nand flash partitions
From: Scott Wood @ 2010-09-16 16:53 UTC (permalink / raw)
  To: Anton Vorontsov
  Cc: Wood Scott-B07421, dedekind1, Zang Roy-R61911, Lan Chunhe-B25806,
	linuxppc-dev, linux-mtd, akpm, dwmw2, Gala Kumar-B11780
In-Reply-To: <20100916164044.GA5669@oksana.dev.rtsoft.ru>

On Thu, 16 Sep 2010 20:40:44 +0400
Anton Vorontsov <cbouatmailru@gmail.com> wrote:

> On Thu, Sep 16, 2010 at 11:14:48AM -0500, Scott Wood wrote:
> > > > DEFINE_MUTEX(fsl_elbc_mutex);
> > > 
> > > I'd place the mutex inside the fsl_lbc_ctrl_dev,
> > > i.e. fsl_lbc_ctrl_dev->nand_lock. This is to avoid more
> > > global variables.
> > 
> > I wouldn't.  If the lock is only meaningful to the NAND driver, it
> > should be declared in the NAND driver.
> > 
> > Besides, it's not any less of a global just because it's sitting inside
> > a singleton struct.
> > 
> > Perhaps it should be declared as a static local inside the probe
> > function, if it's just to guard against this one race.
> 
> OK, in that case better be persistent and not introduce
> fsl_lbc_ctrl_dev->nand at all, as it isn't used outside
> of the driver.

We could, though it would be a step further away from being able to
support multiple controllers if that ever does happen.  Whereas sharing
a mutex between multiple controllers isn't a problem (it's just init,
not a performance-sensitive path where fine-grained locking is
desireable).

> Having fsl_lbc_ctrl_dev->nand and its lock elsewhere in
> the code makes no sense.

That depends on whether you see it as that field's lock or as the init
code's lock, I guess.

It's not that big of a deal either way.

-Scott

^ permalink raw reply

* Re: [PATCH 2/3 v3] P4080/mtd: Only make elbc nand driver detect nand flash partitions
From: Anton Vorontsov @ 2010-09-16 16:40 UTC (permalink / raw)
  To: Scott Wood
  Cc: Wood Scott-B07421, dedekind1, Zang Roy-R61911, Lan Chunhe-B25806,
	linuxppc-dev, linux-mtd, akpm, dwmw2, Gala Kumar-B11780
In-Reply-To: <20100916111448.27ef7440@schlenkerla.am.freescale.net>

On Thu, Sep 16, 2010 at 11:14:48AM -0500, Scott Wood wrote:
> > > DEFINE_MUTEX(fsl_elbc_mutex);
> > 
> > I'd place the mutex inside the fsl_lbc_ctrl_dev,
> > i.e. fsl_lbc_ctrl_dev->nand_lock. This is to avoid more
> > global variables.
> 
> I wouldn't.  If the lock is only meaningful to the NAND driver, it
> should be declared in the NAND driver.
> 
> Besides, it's not any less of a global just because it's sitting inside
> a singleton struct.
> 
> Perhaps it should be declared as a static local inside the probe
> function, if it's just to guard against this one race.

OK, in that case better be persistent and not introduce
fsl_lbc_ctrl_dev->nand at all, as it isn't used outside
of the driver.

Having fsl_lbc_ctrl_dev->nand and its lock elsewhere in
the code makes no sense.

> > Btw, even before this patch, it seems that the driver had
> > all these bugs/races, i.e. ctrl->controller.lock was not
> > used at all. Ugh.
> 
> It is used, search nand_base.c for controller->lock.

OK, now I see, the driver implements its own chip->controller
(which is exactly what ctrl->controller is). Then we're fine.

Thanks,

-- 
Anton Vorontsov
email: cbouatmailru@gmail.com
irc://irc.freenode.net/bd2

^ permalink raw reply

* Re: [PATCH 2/3 v3] P4080/mtd: Only make elbc nand driver detect nand flash partitions
From: Scott Wood @ 2010-09-16 16:14 UTC (permalink / raw)
  To: Anton Vorontsov
  Cc: Wood Scott-B07421, dedekind1, Zang Roy-R61911, Lan Chunhe-B25806,
	linuxppc-dev, linux-mtd, akpm, dwmw2, Gala Kumar-B11780
In-Reply-To: <20100916112624.GA32074@oksana.dev.rtsoft.ru>

On Thu, 16 Sep 2010 15:26:24 +0400
Anton Vorontsov <cbouatmailru@gmail.com> wrote:

> On Thu, Sep 16, 2010 at 06:39:40PM +0800, Zang Roy-R61911 wrote:
> [...]
> > But my code has some assignment for "foo" instead of a simple
> > allocation, how about this way for my code:
> 
> This will surely work, and all the rest is just a matter of
> taste. So, I'm fine with it. But see below, I think I found
> some new, quite serious issues.
> 
> > DEFINE_MUTEX(fsl_elbc_mutex);
> 
> I'd place the mutex inside the fsl_lbc_ctrl_dev,
> i.e. fsl_lbc_ctrl_dev->nand_lock. This is to avoid more
> global variables.

I wouldn't.  If the lock is only meaningful to the NAND driver, it
should be declared in the NAND driver.

Besides, it's not any less of a global just because it's sitting inside
a singleton struct.

Perhaps it should be declared as a static local inside the probe
function, if it's just to guard against this one race.

> >                 elbc_fcm_ctrl->read_bytes = 0;
> >                 elbc_fcm_ctrl->index = 0;
> >                 elbc_fcm_ctrl->addr = NULL;
> 
> I guess these variables should be per chip select, as
> otherwise there will be tons of races when somebody try
> to access two or more NAND chips simultaneously.

The NAND layer has its own per-controller mutex that prevents this.

> So, I'd suggest to redo the whole thing this way: don't allocate
> elbc_fcm_ctrl in this driver, but make an array inside the
> fsl_lbc_ctrl_dev. I.e.
> fsl_lbc_ctrl_dev->nand_ctrl[MAX_CHIP_SELECTS]

NACK.

There is not a separate controller per chip select.

> Btw, even before this patch, it seems that the driver had
> all these bugs/races, i.e. ctrl->controller.lock was not
> used at all. Ugh.

It is used, search nand_base.c for controller->lock.

-Scott

^ permalink raw reply

* Re: 2.6.35-stable/ppc64/p7: suspicious rcu_dereference_check() usage detected during 2.6.35-stable boot
From: Paul E. McKenney @ 2010-09-16 16:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: sachinp, Valdis.Kletnieks, Li Zefan, linux-kernel, Linuxppc-dev,
	Subrata Modak, DIVYA PRAKASH
In-Reply-To: <1284652231.2275.569.camel@laptop>

On Thu, Sep 16, 2010 at 05:50:31PM +0200, Peter Zijlstra wrote:
> On Mon, 2010-08-09 at 09:12 -0700, Paul E. McKenney wrote:
> 
> > > [    0.051203] CPU0: AMD QEMU Virtual CPU version 0.12.4 stepping 03
> > > [    0.052999] lockdep: fixing up alternatives.
> > > [    0.054105]
> > > [    0.054106] ===================================================
> > > [    0.054999] [ INFO: suspicious rcu_dereference_check() usage. ]
> > > [    0.054999] ---------------------------------------------------
> > > [    0.054999] kernel/sched.c:616 invoked rcu_dereference_check() without protection!
> > > [    0.054999]
> > > [    0.054999] other info that might help us debug this:
> > > [    0.054999]
> > > [    0.054999]
> > > [    0.054999] rcu_scheduler_active = 1, debug_locks = 1
> > > [    0.054999] 3 locks held by swapper/1:
> > > [    0.054999]  #0:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff814be933>] cpu_up+0x42/0x6a
> > > [    0.054999]  #1:  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff810400d8>] cpu_hotplug_begin+0x2a/0x51
> > > [    0.054999]  #2:  (&rq->lock){-.-...}, at: [<ffffffff814be2f7>] init_idle+0x2f/0x113
> > > [    0.054999]
> > > [    0.054999] stack backtrace:
> > > [    0.054999] Pid: 1, comm: swapper Not tainted 2.6.35 #1
> > > [    0.054999] Call Trace:
> > > [    0.054999]  [<ffffffff81068054>] lockdep_rcu_dereference+0x9b/0xa3
> > > [    0.054999]  [<ffffffff810325c3>] task_group+0x7b/0x8a
> > > [    0.054999]  [<ffffffff810325e5>] set_task_rq+0x13/0x40
> > > [    0.054999]  [<ffffffff814be39a>] init_idle+0xd2/0x113
> > > [    0.054999]  [<ffffffff814be78a>] fork_idle+0xb8/0xc7
> > > [    0.054999]  [<ffffffff81068717>] ? mark_held_locks+0x4d/0x6b
> > > [    0.054999]  [<ffffffff814bcebd>] do_fork_idle+0x17/0x2b
> > > [    0.054999]  [<ffffffff814bc89b>] native_cpu_up+0x1c1/0x724
> > > [    0.054999]  [<ffffffff814bcea6>] ? do_fork_idle+0x0/0x2b
> > > [    0.054999]  [<ffffffff814be876>] _cpu_up+0xac/0x127
> > > [    0.054999]  [<ffffffff814be946>] cpu_up+0x55/0x6a
> > > [    0.054999]  [<ffffffff81ab562a>] kernel_init+0xe1/0x1ff
> > > [    0.054999]  [<ffffffff81003854>] kernel_thread_helper+0x4/0x10
> > > [    0.054999]  [<ffffffff814c353c>] ? restore_args+0x0/0x30
> > > [    0.054999]  [<ffffffff81ab5549>] ? kernel_init+0x0/0x1ff
> > > [    0.054999]  [<ffffffff81003850>] ? kernel_thread_helper+0x0/0x10
> > > [    0.056074] Booting Node   0, Processors  #1lockdep: fixing up alternatives.
> > > [    0.130045]  #2lockdep: fixing up alternatives.
> > > [    0.203089]  #3 Ok.
> > > [    0.275286] Brought up 4 CPUs
> > > [    0.276005] Total of 4 processors activated (16017.17 BogoMIPS).
> > 
> > This does look like a new one, thank you for reporting it!
> > 
> > Here is my analysis, which should at least provide some humor value to
> > those who understand the code better than I do.  ;-)
> > 
> > So the corresponding rcu_dereference_check() is in
> > task_subsys_state_check(), and is fetching the cpu_cgroup_subsys_id
> > element of the newly created task's task->cgroups->subsys[] array.
> > The "git grep" command finds only three uses of cpu_cgroup_subsys_id,
> > but no definition.
> > 
> > Now, fork_idle() invokes copy_process(), which invokes cgroup_fork(),
> > which sets the child process's ->cgroups pointer to that of the parent,
> > also invoking get_css_set(), which increments the corresponding reference
> > count, doing both operations under task_lock() protection (->alloc_lock).
> > Because fork_idle() does not specify any of CLONE_NEWNS, CLONE_NEWUTS,
> > CLONE_NEWIPC, CLONE_NEWPID, or CLONE_NEWNET, copy_namespaces() should
> > not create a new namespace, and so there should be no ns_cgroup_clone().
> > We should thus retain the parent's ->cgroups pointer.  And copy_process()
> > installs the new task in the various lists, so that the task is externally
> > accessible upon return.
> > 
> > After a non-error return from copy_process(), fork_init() invokes
> > init_idle_pid(), which does not appear to affect the task's cgroup
> > state.  Next fork_init() invokes init_idle(), which in turn invokes
> > __set_task_cpu(), which invokes set_task_rq(), which calls task_group()
> > several times, which calls task_subsys_state_check(), which calls the
> > rcu_dereference_check() that complained above.
> > 
> > However, the result returns by rcu_dereference_check() is stored into
> > the task structure:
> > 
> > 	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
> > 	p->se.parent = task_group(p)->se[cpu];
> > 
> > This means that the corresponding structure must have been tied down with
> > a reference count or some such.  If such a reference has been taken, then
> > this complaint is a false positive, and could be suppressed by putting
> > rcu_read_lock() and rcu_read_unlock() around the call to init_idle()
> > from fork_idle().  However, although, reference to the enclosing ->cgroups
> > struct css_set is held, it is not clear to me that this reference applies
> > to the structures pointed to by the ->subsys[] array, especially given
> > that the cgroup_subsys_state structures referenced by this array have
> > their own reference count, which does not appear to me to be acquired
> > by this code path.
> > 
> > Or are the cgroup_subsys_state structures referenced by idle tasks
> > never freed or some such?
> 
> I would hope so!, the idle tasks should be part of the root cgroup,
> which is not removable.
> 
> The problem is that while we do in-fact hold rq->lock, the newly spawned
> idle thread's cpu is not yet set to the correct cpu so the lockdep check
> in task_group():
> 
>   lockdep_is_held(&task_rq(p)->lock)
> 
> will fail.
> 
> But of a chicken and egg problem. Setting the cpu needs to have the cpu
> set ;-)

OK, makes sense to me.

> Ingo, why do we have rq->lock there at all? The CPU isn't up and running
> yet, nothing should be touching it.

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  kernel/sched.c |   12 ++++++++++++
>  1 files changed, 12 insertions(+), 0 deletions(-)
> 
> diff --git a/kernel/sched.c b/kernel/sched.c
> index bd8b487..6241049 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -5332,7 +5332,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
>  	idle->se.exec_start = sched_clock();
>  
>  	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
> +	/*
> +	 * We're having a chicken and egg problem, even though we are
> +	 * holding rq->lock, the cpu isn't yet set to this cpu so the
> +	 * lockdep check in task_group() will fail.
> +	 *
> +	 * Similar case to sched_fork(). / Alternatively we could
> +	 * use task_rq_lock() here and obtain the other rq->lock.
> +	 *
> +	 * Silence PROVE_RCU
> +	 */
> +	rcu_read_lock();
>  	__set_task_cpu(idle, cpu);
> +	rcu_read_unlock();
>  
>  	rq->curr = rq->idle = idle;
>  #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
> 

^ permalink raw reply

* Re: 2.6.35-stable/ppc64/p7: suspicious rcu_dereference_check() usage detected during 2.6.35-stable boot
From: Peter Zijlstra @ 2010-09-16 15:50 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: sachinp, Li Zefan, linux-kernel, Linuxppc-dev, paulmck,
	Subrata Modak, DIVYA PRAKASH
In-Reply-To: <10430.1284649951@localhost>

On Thu, 2010-09-16 at 11:12 -0400, Valdis.Kletnieks@vt.edu wrote:
>=20
> Ping?  I just hit it on 2.6.36-rc4-mmotm0915.  Just wanted to make sure t=
he
> issue hadn't been lost/forgotten.=20

lost,.. thanks!

^ permalink raw reply

* Re: 2.6.35-stable/ppc64/p7: suspicious rcu_dereference_check() usage detected during 2.6.35-stable boot
From: Peter Zijlstra @ 2010-09-16 15:50 UTC (permalink / raw)
  To: paulmck
  Cc: sachinp, Valdis.Kletnieks, Li Zefan, linux-kernel, Linuxppc-dev,
	Subrata Modak, DIVYA PRAKASH
In-Reply-To: <20100809161200.GC3026@linux.vnet.ibm.com>

On Mon, 2010-08-09 at 09:12 -0700, Paul E. McKenney wrote:

> > [    0.051203] CPU0: AMD QEMU Virtual CPU version 0.12.4 stepping 03
> > [    0.052999] lockdep: fixing up alternatives.
> > [    0.054105]
> > [    0.054106] =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D
> > [    0.054999] [ INFO: suspicious rcu_dereference_check() usage. ]
> > [    0.054999] ---------------------------------------------------
> > [    0.054999] kernel/sched.c:616 invoked rcu_dereference_check() witho=
ut protection!
> > [    0.054999]
> > [    0.054999] other info that might help us debug this:
> > [    0.054999]
> > [    0.054999]
> > [    0.054999] rcu_scheduler_active =3D 1, debug_locks =3D 1
> > [    0.054999] 3 locks held by swapper/1:
> > [    0.054999]  #0:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff814be=
933>] cpu_up+0x42/0x6a
> > [    0.054999]  #1:  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff810400d8=
>] cpu_hotplug_begin+0x2a/0x51
> > [    0.054999]  #2:  (&rq->lock){-.-...}, at: [<ffffffff814be2f7>] init=
_idle+0x2f/0x113
> > [    0.054999]
> > [    0.054999] stack backtrace:
> > [    0.054999] Pid: 1, comm: swapper Not tainted 2.6.35 #1
> > [    0.054999] Call Trace:
> > [    0.054999]  [<ffffffff81068054>] lockdep_rcu_dereference+0x9b/0xa3
> > [    0.054999]  [<ffffffff810325c3>] task_group+0x7b/0x8a
> > [    0.054999]  [<ffffffff810325e5>] set_task_rq+0x13/0x40
> > [    0.054999]  [<ffffffff814be39a>] init_idle+0xd2/0x113
> > [    0.054999]  [<ffffffff814be78a>] fork_idle+0xb8/0xc7
> > [    0.054999]  [<ffffffff81068717>] ? mark_held_locks+0x4d/0x6b
> > [    0.054999]  [<ffffffff814bcebd>] do_fork_idle+0x17/0x2b
> > [    0.054999]  [<ffffffff814bc89b>] native_cpu_up+0x1c1/0x724
> > [    0.054999]  [<ffffffff814bcea6>] ? do_fork_idle+0x0/0x2b
> > [    0.054999]  [<ffffffff814be876>] _cpu_up+0xac/0x127
> > [    0.054999]  [<ffffffff814be946>] cpu_up+0x55/0x6a
> > [    0.054999]  [<ffffffff81ab562a>] kernel_init+0xe1/0x1ff
> > [    0.054999]  [<ffffffff81003854>] kernel_thread_helper+0x4/0x10
> > [    0.054999]  [<ffffffff814c353c>] ? restore_args+0x0/0x30
> > [    0.054999]  [<ffffffff81ab5549>] ? kernel_init+0x0/0x1ff
> > [    0.054999]  [<ffffffff81003850>] ? kernel_thread_helper+0x0/0x10
> > [    0.056074] Booting Node   0, Processors  #1lockdep: fixing up alter=
natives.
> > [    0.130045]  #2lockdep: fixing up alternatives.
> > [    0.203089]  #3 Ok.
> > [    0.275286] Brought up 4 CPUs
> > [    0.276005] Total of 4 processors activated (16017.17 BogoMIPS).
>=20
> This does look like a new one, thank you for reporting it!
>=20
> Here is my analysis, which should at least provide some humor value to
> those who understand the code better than I do.  ;-)
>=20
> So the corresponding rcu_dereference_check() is in
> task_subsys_state_check(), and is fetching the cpu_cgroup_subsys_id
> element of the newly created task's task->cgroups->subsys[] array.
> The "git grep" command finds only three uses of cpu_cgroup_subsys_id,
> but no definition.
>=20
> Now, fork_idle() invokes copy_process(), which invokes cgroup_fork(),
> which sets the child process's ->cgroups pointer to that of the parent,
> also invoking get_css_set(), which increments the corresponding reference
> count, doing both operations under task_lock() protection (->alloc_lock).
> Because fork_idle() does not specify any of CLONE_NEWNS, CLONE_NEWUTS,
> CLONE_NEWIPC, CLONE_NEWPID, or CLONE_NEWNET, copy_namespaces() should
> not create a new namespace, and so there should be no ns_cgroup_clone().
> We should thus retain the parent's ->cgroups pointer.  And copy_process()
> installs the new task in the various lists, so that the task is externall=
y
> accessible upon return.
>=20
> After a non-error return from copy_process(), fork_init() invokes
> init_idle_pid(), which does not appear to affect the task's cgroup
> state.  Next fork_init() invokes init_idle(), which in turn invokes
> __set_task_cpu(), which invokes set_task_rq(), which calls task_group()
> several times, which calls task_subsys_state_check(), which calls the
> rcu_dereference_check() that complained above.
>=20
> However, the result returns by rcu_dereference_check() is stored into
> the task structure:
>=20
> 	p->se.cfs_rq =3D task_group(p)->cfs_rq[cpu];
> 	p->se.parent =3D task_group(p)->se[cpu];
>=20
> This means that the corresponding structure must have been tied down with
> a reference count or some such.  If such a reference has been taken, then
> this complaint is a false positive, and could be suppressed by putting
> rcu_read_lock() and rcu_read_unlock() around the call to init_idle()
> from fork_idle().  However, although, reference to the enclosing ->cgroup=
s
> struct css_set is held, it is not clear to me that this reference applies
> to the structures pointed to by the ->subsys[] array, especially given
> that the cgroup_subsys_state structures referenced by this array have
> their own reference count, which does not appear to me to be acquired
> by this code path.
>=20
> Or are the cgroup_subsys_state structures referenced by idle tasks
> never freed or some such?

I would hope so!, the idle tasks should be part of the root cgroup,
which is not removable.

The problem is that while we do in-fact hold rq->lock, the newly spawned
idle thread's cpu is not yet set to the correct cpu so the lockdep check
in task_group():

  lockdep_is_held(&task_rq(p)->lock)

will fail.

But of a chicken and egg problem. Setting the cpu needs to have the cpu
set ;-)

Ingo, why do we have rq->lock there at all? The CPU isn't up and running
yet, nothing should be touching it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index bd8b487..6241049 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5332,7 +5332,19 @@ void __cpuinit init_idle(struct task_struct *idle, i=
nt cpu)
 	idle->se.exec_start =3D sched_clock();
=20
 	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+	/*
+	 * We're having a chicken and egg problem, even though we are
+	 * holding rq->lock, the cpu isn't yet set to this cpu so the
+	 * lockdep check in task_group() will fail.
+	 *
+	 * Similar case to sched_fork(). / Alternatively we could
+	 * use task_rq_lock() here and obtain the other rq->lock.
+	 *
+	 * Silence PROVE_RCU
+	 */
+	rcu_read_lock();
 	__set_task_cpu(idle, cpu);
+	rcu_read_unlock();
=20
 	rq->curr =3D rq->idle =3D idle;
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox