LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] Fake NUMA emulation for PowerPC
From: David Rientjes @ 2007-12-07 23:06 UTC (permalink / raw)
  To: Olof Johansson; +Cc: linuxppc-dev, LKML, Balbir Singh
In-Reply-To: <20071207212817.GA391@lixom.net>

On Fri, 7 Dec 2007, Olof Johansson wrote:

> > Comments are as always welcome!
> 
> Care to explain what this is useful for? (Not saying it's a stupid idea,
> just wondering what the reason for doing it is).
> 

Fake NUMA has always been useful for testing NUMA code without having to 
have a wide range of hardware available to you.  It's a clever tool on 
x86_64 intended for kernel developers that simply makes it easier to test 
code and adds an increased level of robustness to the kernel.  I think 
it's a valuable addition.

^ permalink raw reply

* [PATCH] Fake NUMA emulation for PowerPC (Take 2)
From: Balbir Singh @ 2007-12-07 22:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: LKML, Balbir Singh


Changelog

1. Get rid of the constant 5 (based on comments from
                                Geert.Uytterhoeven@sonycom.com)
2. Implement suggestions from Olof Johannson
3. Check if cmdline is NULL in fake_numa_create_new_node()

Tested with additional parameters from Olof

numa=debug,fake=
numa=foo,fake=bar


Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
NUMA nodes can be specified using the following command line option

numa=fake=<node range>

node range is of the format <range1>,<range2>,...<rangeN>

Each of the rangeX parameters is passed using memparse(). I find the patch
useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
on a non-numa box with the following arguments

numa=fake=1G
numa=fake=1G,2G
name=fake=1G,512M,2G
numa=fake=1500M,2800M mem=3500M
numa=fake=1G mem=512M
numa=fake=1G mem=1G

This patch applies on top of 2.6.24-rc4.

All though I've tried my best to handle some of the architecture specific
details of PowerPC, I might have overlooked something obvious, like the usage
of an API or some architecture tweaks. The patch depends on CONFIG_NUMA and
I decided against creating a separate config option for fake NUMA to keep
the code simple.

Comments are as always welcome!

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 arch/powerpc/mm/numa.c |   59 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 5 deletions(-)

diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
--- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
+++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 03:19:46.000000000 +0530
@@ -24,6 +24,8 @@
 
 static int numa_enabled = 1;
 
+static char *cmdline __initdata;
+
 static int numa_debug;
 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
 
@@ -39,6 +41,43 @@ static bootmem_data_t __initdata plat_no
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
+static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+						unsigned int *nid)
+{
+	unsigned long long mem;
+	char *p = cmdline;
+	static unsigned int fake_nid = 0;
+	static unsigned long long curr_boundary = 0;
+
+	*nid = fake_nid;
+	if (!p)
+		return 0;
+
+	mem = memparse(p, &p);
+	if (!mem)
+		return 0;
+
+	if (mem < curr_boundary)
+		return 0;
+
+	curr_boundary = mem;
+
+	if ((end_pfn << PAGE_SHIFT) > mem) {
+		/*
+		 * Skip commas and spaces
+		 */
+		while (*p == ',' || *p == ' ' || *p == '\t')
+			p++;
+
+		cmdline = p;
+		fake_nid++;
+		*nid = fake_nid;
+		dbg("created new fake_node with id %d\n", fake_nid);
+		return 1;
+	}
+	return 0;
+}
+
 static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 	numa_cpu_lookup_table[cpu] = node;
@@ -344,12 +383,14 @@ static void __init parse_drconf_memory(s
 			if (nid == 0xffff || nid >= MAX_NUMNODES)
 				nid = default_nid;
 		}
-		node_set_online(nid);
 
 		size = numa_enforce_memory_limit(start, lmb_size);
 		if (!size)
 			continue;
 
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
 		add_active_range(nid, start >> PAGE_SHIFT,
 				 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
 	}
@@ -429,7 +470,6 @@ new_range:
 		nid = of_node_to_nid_single(memory);
 		if (nid < 0)
 			nid = default_nid;
-		node_set_online(nid);
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
 			if (--ranges)
@@ -438,6 +478,9 @@ new_range:
 				continue;
 		}
 
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
 		add_active_range(nid, start >> PAGE_SHIFT,
 				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
 
@@ -461,7 +504,7 @@ static void __init setup_nonnuma(void)
 	unsigned long top_of_ram = lmb_end_of_DRAM();
 	unsigned long total_ram = lmb_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
-	unsigned int i;
+	unsigned int i, nid = 0;
 
 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
@@ -471,9 +514,11 @@ static void __init setup_nonnuma(void)
 	for (i = 0; i < lmb.memory.cnt; ++i) {
 		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
-		add_active_range(0, start_pfn, end_pfn);
+
+		fake_numa_create_new_node(end_pfn, &nid);
+		add_active_range(nid, start_pfn, end_pfn);
+		node_set_online(nid);
 	}
-	node_set_online(0);
 }
 
 void __init dump_numa_cpu_topology(void)
@@ -702,6 +747,10 @@ static int __init early_numa(char *p)
 	if (strstr(p, "debug"))
 		numa_debug = 1;
 
+	p = strstr(p, "fake=");
+	if (p)
+		cmdline = p + strlen("fake=");
+
 	return 0;
 }
 early_param("numa", early_numa);
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 22:26 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: linuxppc-dev, LKML
In-Reply-To: <20071207221106.GH16824@localdomain>

Nathan Lynch wrote:
> Hi Balbir-
> 
> Balbir Singh wrote:
>>
>> Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
>> NUMA nodes can be specified using the following command line option
>>
>> numa=fake=<node range>
>>
>> node range is of the format <range1>,<range2>,...<rangeN>
>>
>> Each of the rangeX parameters is passed using memparse(). I find the patch
>> useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
>> on a non-numa box with the following arguments
>>
>> numa=fake=1G
>> numa=fake=1G,2G
>> name=fake=1G,512M,2G
>> numa=fake=1500M,2800M mem=3500M
>> numa=fake=1G mem=512M
>> numa=fake=1G mem=1G
> 
> So this doesn't appear to allow one to assign cpus to fake nodes?  Do
> all cpus just get assigned to node 0 with numa=fake?
> 

Yes, they all appear on node 0. We could have tweaks to distribute CPU's
as well.

> A different approach that occurs to me is to use kexec with a doctored
> device tree (i.e. with the ibm,associativity properties modified to
> reflect your desired topology).  Perhaps a little bit obscure, but it
> seems more flexible.
> 

That would be interesting, but it always means that we need to run
kexec, which might involve two boots.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 22:22 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: linuxppc-dev, LKML
In-Reply-To: <200712072301.38723.arnd@arndb.de>

Arnd Bergmann wrote:
> On Friday 07 December 2007, Balbir Singh wrote:
>> Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
>> NUMA nodes can be specified using the following command line option
>>
>> numa=fake=<node range>
>>
>> node range is of the format <range1>,<range2>,...<rangeN>
> 
> Excellent idea! I'd love to have this in RHEL5u1, because that would make
> that distro boot on certain machines that have more memory than is supported
> without an iommu driver. The problem we have is that when you simply
> say mem=1G but all of the first gigabyte is on the first node, you end
> up with a memoryless node, which is not supported.
> 
> Unfortunately, it comes too late for me now, as all new distros already boot
> on Cell machines that need an IOMMU.

Very interesting use case! I am sure there are others were fake NUMA
nodes can be applied. I just listed one other in another email, apart
from using it for playing around with NUMA like machines.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 22:18 UTC (permalink / raw)
  To: Kumar Gala; +Cc: Olof Johansson, linuxppc-dev, LKML
In-Reply-To: <9AEDD952-7F20-471C-9A82-B6F3254BC869@kernel.crashing.org>

Kumar Gala wrote:
> 
> On Dec 7, 2007, at 4:12 PM, Balbir Singh wrote:
> 
>> Kumar Gala wrote:
>>>
>>> On Dec 7, 2007, at 3:35 PM, Balbir Singh wrote:
>>>
>>>> Olof Johansson wrote:
>>>>> Hi,
>>>>>
>>>>> On Sat, Dec 08, 2007 at 02:44:25AM +0530, Balbir Singh wrote:
>>>>>
>>>>>> Comments are as always welcome!
>>>>>
>>>>> Care to explain what this is useful for? (Not saying it's a stupid
>>>>> idea,
>>>>> just wondering what the reason for doing it is).
>>>>>
>>>>
>>>> In my case, I use it to test parts of my memory controller patches
>>>> on an
>>>> emulated NUMA machine. I plan to use it to test out page migration
>>>> across nodes.
>>>
>>> Can you explain that further.  I'm still not clear on why this is
>>> useful.
>>>
>>> - k
>>
>> Sure. In my case I need to emulate NUMA nodes to do some NUMA specific
>> testing. The memory controller I've written has some interesting data
>> structures like per node, per zone LRU lists. To be able to test those
>> features on a non-numa box is a problem, since we get just the default
>> node.
> 
> Maybe I'm missing something, what do you mean by memory controller
> you've written?  (I'm use to the term 'memory controller' meaning the
> actual RAM control).
> 

Ah! that explains the disconnect. If you look at the latest -mm tree. We
have a memory controller under control groups, we use it to control how
much memory a group of process can access at a time.

>> To be able to test the memory controller under NUMA, I use fake NUMA
>> nodes. x86-64 has a similar feature, the code I have here is the
>> simplest I could come up with for PowerPC.
>>
>> I just thought of another very interesting use case, it can be used to
>> split up the zone's lru lock which is highly contended.
> 
> - k


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Kumar Gala @ 2007-12-07 22:15 UTC (permalink / raw)
  To: balbir; +Cc: Olof Johansson, linuxppc-dev, LKML
In-Reply-To: <4759C548.6030304@linux.vnet.ibm.com>


On Dec 7, 2007, at 4:12 PM, Balbir Singh wrote:

> Kumar Gala wrote:
>>
>> On Dec 7, 2007, at 3:35 PM, Balbir Singh wrote:
>>
>>> Olof Johansson wrote:
>>>> Hi,
>>>>
>>>> On Sat, Dec 08, 2007 at 02:44:25AM +0530, Balbir Singh wrote:
>>>>
>>>>> Comments are as always welcome!
>>>>
>>>> Care to explain what this is useful for? (Not saying it's a  
>>>> stupid idea,
>>>> just wondering what the reason for doing it is).
>>>>
>>>
>>> In my case, I use it to test parts of my memory controller patches  
>>> on an
>>> emulated NUMA machine. I plan to use it to test out page migration
>>> across nodes.
>>
>> Can you explain that further.  I'm still not clear on why this is  
>> useful.
>>
>> - k
>
> Sure. In my case I need to emulate NUMA nodes to do some NUMA specific
> testing. The memory controller I've written has some interesting data
> structures like per node, per zone LRU lists. To be able to test those
> features on a non-numa box is a problem, since we get just the  
> default node.

Maybe I'm missing something, what do you mean by memory controller  
you've written?  (I'm use to the term 'memory controller' meaning the  
actual RAM control).

> To be able to test the memory controller under NUMA, I use fake NUMA
> nodes. x86-64 has a similar feature, the code I have here is the
> simplest I could come up with for PowerPC.
>
> I just thought of another very interesting use case, it can be used to
> split up the zone's lru lock which is highly contended.

- k

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 22:12 UTC (permalink / raw)
  To: Kumar Gala; +Cc: Olof Johansson, linuxppc-dev, LKML
In-Reply-To: <975B5B2B-C1F3-4021-9AE2-8873FFE1BDEC@kernel.crashing.org>

Kumar Gala wrote:
> 
> On Dec 7, 2007, at 3:35 PM, Balbir Singh wrote:
> 
>> Olof Johansson wrote:
>>> Hi,
>>>
>>> On Sat, Dec 08, 2007 at 02:44:25AM +0530, Balbir Singh wrote:
>>>
>>>> Comments are as always welcome!
>>>
>>> Care to explain what this is useful for? (Not saying it's a stupid idea,
>>> just wondering what the reason for doing it is).
>>>
>>
>> In my case, I use it to test parts of my memory controller patches on an
>> emulated NUMA machine. I plan to use it to test out page migration
>> across nodes.
> 
> Can you explain that further.  I'm still not clear on why this is useful.
> 
> - k

Sure. In my case I need to emulate NUMA nodes to do some NUMA specific
testing. The memory controller I've written has some interesting data
structures like per node, per zone LRU lists. To be able to test those
features on a non-numa box is a problem, since we get just the default node.

To be able to test the memory controller under NUMA, I use fake NUMA
nodes. x86-64 has a similar feature, the code I have here is the
simplest I could come up with for PowerPC.

I just thought of another very interesting use case, it can be used to
split up the zone's lru lock which is highly contended.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 22:03 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Geert Uytterhoeven, linuxppc-dev, LKML
In-Reply-To: <200712072258.19331.arnd@arndb.de>

Arnd Bergmann wrote:
> On Friday 07 December 2007, Balbir Singh wrote:
>> Balbir Singh wrote:
>>> Geert Uytterhoeven wrote:
>>>> On Sat, 8 Dec 2007, Balbir Singh wrote:
>>>>> +   if (strstr(p, "fake="))
>>>>> +           cmdline = p + 5;        /* 5 is faster than strlen("fake=") */
>>>> Really? My gcc is smart enough to replace the `strlen("fake=")' by 5, even
>>>> without -O.
>>>>
>>> Thanks for pointing that out, but I am surprised that a compiler would
>>> interpret library routines like strlen.
>>>
>> I just tested it and it turns out that you are right. I'll go hunt to
>> see where gcc gets its magic powers from.
>>
> 
> Even if it wasn't: Why the heck would you want to optimize this? The function
> is run _once_ at boot time and the object code gets thrown away afterwards!
> 
> 	Arnd <><

Cause, I see no downside of doing it. The strlen of fake= is fixed.
But having said that, I am not a purist about the approach, I just want
cmdline to point after "fake="

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Arnd Bergmann @ 2007-12-07 22:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: LKML, Balbir Singh
In-Reply-To: <20071207211425.10223.91240.sendpatchset@balbir-laptop>

On Friday 07 December 2007, Balbir Singh wrote:
> Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
> NUMA nodes can be specified using the following command line option
> 
> numa=fake=<node range>
> 
> node range is of the format <range1>,<range2>,...<rangeN>

Excellent idea! I'd love to have this in RHEL5u1, because that would make
that distro boot on certain machines that have more memory than is supported
without an iommu driver. The problem we have is that when you simply
say mem=1G but all of the first gigabyte is on the first node, you end
up with a memoryless node, which is not supported.

Unfortunately, it comes too late for me now, as all new distros already boot
on Cell machines that need an IOMMU.

	Arnd <><

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Arnd Bergmann @ 2007-12-07 21:58 UTC (permalink / raw)
  To: linuxppc-dev, balbir; +Cc: Geert Uytterhoeven, LKML
In-Reply-To: <4759BE88.3020702@linux.vnet.ibm.com>

On Friday 07 December 2007, Balbir Singh wrote:
> Balbir Singh wrote:
> > Geert Uytterhoeven wrote:
> >> On Sat, 8 Dec 2007, Balbir Singh wrote:
> >>> +=A0=A0=A0if (strstr(p, "fake=3D"))
> >>> +=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0cmdline =3D p + 5;=A0=A0=A0=A0=A0=
=A0=A0=A0/* 5 is faster than strlen("fake=3D") */
> >> Really? My gcc is smart enough to replace the `strlen("fake=3D")' by 5=
, even
> >> without -O.
> >>
> >=20
> > Thanks for pointing that out, but I am surprised that a compiler would
> > interpret library routines like strlen.
> >=20
>=20
> I just tested it and it turns out that you are right. I'll go hunt to
> see where gcc gets its magic powers from.
>=20

Even if it wasn't: Why the heck would you want to optimize this? The functi=
on
is run _once_ at boot time and the object code gets thrown away afterwards!

	Arnd <><

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Kumar Gala @ 2007-12-07 21:55 UTC (permalink / raw)
  To: balbir; +Cc: Olof Johansson, linuxppc-dev, LKML
In-Reply-To: <4759BCA2.1020809@linux.vnet.ibm.com>


On Dec 7, 2007, at 3:35 PM, Balbir Singh wrote:

> Olof Johansson wrote:
>> Hi,
>>
>> On Sat, Dec 08, 2007 at 02:44:25AM +0530, Balbir Singh wrote:
>>
>>> Comments are as always welcome!
>>
>> Care to explain what this is useful for? (Not saying it's a stupid  
>> idea,
>> just wondering what the reason for doing it is).
>>
>
> In my case, I use it to test parts of my memory controller patches  
> on an
> emulated NUMA machine. I plan to use it to test out page migration
> across nodes.

Can you explain that further.  I'm still not clear on why this is  
useful.

- k

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 21:43 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: linuxppc-dev, LKML
In-Reply-To: <4759BCBA.7060800@linux.vnet.ibm.com>

Balbir Singh wrote:
> Geert Uytterhoeven wrote:
>> On Sat, 8 Dec 2007, Balbir Singh wrote:
>>> +	if (strstr(p, "fake="))
>>> +		cmdline = p + 5;	/* 5 is faster than strlen("fake=") */
>> Really? My gcc is smart enough to replace the `strlen("fake=")' by 5, even
>> without -O.
>>
> 
> Thanks for pointing that out, but I am surprised that a compiler would
> interpret library routines like strlen.
> 

I just tested it and it turns out that you are right. I'll go hunt to
see where gcc gets its magic powers from.

>> With kind regards,
>>
>> Geert Uytterhoeven
>> Software Architect
> 
> 

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 21:35 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: linuxppc-dev, LKML
In-Reply-To: <Pine.LNX.4.62.0712072229280.26862@pademelon.sonytel.be>

Geert Uytterhoeven wrote:
> On Sat, 8 Dec 2007, Balbir Singh wrote:
>> +	if (strstr(p, "fake="))
>> +		cmdline = p + 5;	/* 5 is faster than strlen("fake=") */
> 
> Really? My gcc is smart enough to replace the `strlen("fake=")' by 5, even
> without -O.
> 

Thanks for pointing that out, but I am surprised that a compiler would
interpret library routines like strlen.

> With kind regards,
> 
> Geert Uytterhoeven
> Software Architect


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 21:35 UTC (permalink / raw)
  To: Olof Johansson; +Cc: linuxppc-dev, LKML
In-Reply-To: <20071207212817.GA391@lixom.net>

Olof Johansson wrote:
> Hi,
> 
> On Sat, Dec 08, 2007 at 02:44:25AM +0530, Balbir Singh wrote:
> 
>> Comments are as always welcome!
> 
> Care to explain what this is useful for? (Not saying it's a stupid idea,
> just wondering what the reason for doing it is).
> 

In my case, I use it to test parts of my memory controller patches on an
emulated NUMA machine. I plan to use it to test out page migration
across nodes.

>> diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
>> --- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
>> +++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 02:36:02.000000000 +0530
>> @@ -24,6 +24,8 @@
>>  
>>  static int numa_enabled = 1;
>>  
>> +char *cmdline __initdata;
>> +
> 
> Looks like this should be static.
> 

Yes, good catch!

>> @@ -702,6 +744,9 @@ static int __init early_numa(char *p)
>>  	if (strstr(p, "debug"))
>>  		numa_debug = 1;
>>  
>> +	if (strstr(p, "fake="))
>> +		cmdline = p + 5;	/* 5 is faster than strlen("fake=") */
> 
> This doesn't look right.
> 
> You check if it contains fake=, not if it starts with it. So if someone
> did: "numa=foo,fake=bar", or even "numa=debug,fake=", things wouldn't
> work right.
> 

Yes, you are right. I merely followed the strstr convention already
present, which as you righly point out is wrong. I suspect I need to do
something like

p = strstr(p, "fake=")
if (p)
	cmdline = p + 5;

This would still allow us to do things like

numa=foo,fake=bar but the memparse() utility would fail at fake=bar
								^^^

or even

numa=debug,fake=1G

I suspect that this should be good enough for a command line option.

> 
> -Olof


-- 
	Thanks,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Geert Uytterhoeven @ 2007-12-07 21:30 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, LKML
In-Reply-To: <20071207211425.10223.91240.sendpatchset@balbir-laptop>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 819 bytes --]

On Sat, 8 Dec 2007, Balbir Singh wrote:
> +	if (strstr(p, "fake="))
> +		cmdline = p + 5;	/* 5 is faster than strlen("fake=") */

Really? My gcc is smart enough to replace the `strlen("fake=")' by 5, even
without -O.

With kind regards,
 
Geert Uytterhoeven
Software Architect

Sony Network and Software Technology Center Europe
The Corporate Village · Da Vincilaan 7-D1 · B-1935 Zaventem · Belgium
 
Phone:    +32 (0)2 700 8453	
Fax:      +32 (0)2 700 8622	
E-mail:   Geert.Uytterhoeven@sonycom.com	
Internet: http://www.sony-europe.com/
 	
Sony Network and Software Technology Center Europe	
A division of Sony Service Centre (Europe) N.V.	
Registered office: Technologielaan 7 · B-1840 Londerzeel · Belgium	
VAT BE 0413.825.160 · RPR Brussels	
Fortis Bank Zaventem · Swift GEBABEBB08A · IBAN BE39001382358619

^ permalink raw reply

* Re: [PATCH] Fake NUMA emulation for PowerPC
From: Olof Johansson @ 2007-12-07 21:28 UTC (permalink / raw)
  To: Balbir Singh; +Cc: linuxppc-dev, LKML
In-Reply-To: <20071207211425.10223.91240.sendpatchset@balbir-laptop>

Hi,

On Sat, Dec 08, 2007 at 02:44:25AM +0530, Balbir Singh wrote:

> Comments are as always welcome!

Care to explain what this is useful for? (Not saying it's a stupid idea,
just wondering what the reason for doing it is).

> diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
> --- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
> +++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 02:36:02.000000000 +0530
> @@ -24,6 +24,8 @@
>  
>  static int numa_enabled = 1;
>  
> +char *cmdline __initdata;
> +

Looks like this should be static.

> @@ -702,6 +744,9 @@ static int __init early_numa(char *p)
>  	if (strstr(p, "debug"))
>  		numa_debug = 1;
>  
> +	if (strstr(p, "fake="))
> +		cmdline = p + 5;	/* 5 is faster than strlen("fake=") */

This doesn't look right.

You check if it contains fake=, not if it starts with it. So if someone
did: "numa=foo,fake=bar", or even "numa=debug,fake=", things wouldn't
work right.


-Olof

^ permalink raw reply

* [PATCH] Fake NUMA emulation for PowerPC
From: Balbir Singh @ 2007-12-07 21:14 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: LKML, Balbir Singh



Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
NUMA nodes can be specified using the following command line option

numa=fake=<node range>

node range is of the format <range1>,<range2>,...<rangeN>

Each of the rangeX parameters is passed using memparse(). I find the patch
useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
on a non-numa box with the following arguments

numa=fake=1G
numa=fake=1G,2G
name=fake=1G,512M,2G
numa=fake=1500M,2800M mem=3500M
numa=fake=1G mem=512M
numa=fake=1G mem=1G

This patch applies on top of 2.6.24-rc4.

All though I've tried my best to handle some of the architecture specific
details of PowerPC, I might have overlooked something obvious, like the usage
of an API or some architecture tweaks. The patch depends on CONFIG_NUMA and
I decided against creating a separate config option for fake NUMA to keep
the code simple.

Comments are as always welcome!

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 arch/powerpc/mm/numa.c |   55 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 5 deletions(-)

diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
--- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy	2007-12-07 21:25:55.000000000 +0530
+++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c	2007-12-08 02:36:02.000000000 +0530
@@ -24,6 +24,8 @@
 
 static int numa_enabled = 1;
 
+char *cmdline __initdata;
+
 static int numa_debug;
 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
 
@@ -39,6 +41,40 @@ static bootmem_data_t __initdata plat_no
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
+static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+						unsigned int *nid)
+{
+	unsigned long long mem;
+	char *p = cmdline;
+	static unsigned int fake_nid = 0;
+	static unsigned long long curr_boundary = 0;
+
+	*nid = fake_nid;
+	mem = memparse(p, &p);
+	if (!mem)
+		return 0;
+
+	if (mem < curr_boundary)
+		return 0;
+
+	curr_boundary = mem;
+
+	if ((end_pfn << PAGE_SHIFT) > mem) {
+		/*
+		 * Skip commas and spaces
+		 */
+		while (*p == ',' || *p == ' ' || *p == '\t')
+			p++;
+
+		cmdline = p;
+		fake_nid++;
+		*nid = fake_nid;
+		dbg("created new fake_node with id %d\n", fake_nid);
+		return 1;
+	}
+	return 0;
+}
+
 static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 	numa_cpu_lookup_table[cpu] = node;
@@ -344,12 +380,14 @@ static void __init parse_drconf_memory(s
 			if (nid == 0xffff || nid >= MAX_NUMNODES)
 				nid = default_nid;
 		}
-		node_set_online(nid);
 
 		size = numa_enforce_memory_limit(start, lmb_size);
 		if (!size)
 			continue;
 
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
 		add_active_range(nid, start >> PAGE_SHIFT,
 				 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
 	}
@@ -429,7 +467,6 @@ new_range:
 		nid = of_node_to_nid_single(memory);
 		if (nid < 0)
 			nid = default_nid;
-		node_set_online(nid);
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
 			if (--ranges)
@@ -438,6 +475,9 @@ new_range:
 				continue;
 		}
 
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
 		add_active_range(nid, start >> PAGE_SHIFT,
 				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
 
@@ -461,7 +501,7 @@ static void __init setup_nonnuma(void)
 	unsigned long top_of_ram = lmb_end_of_DRAM();
 	unsigned long total_ram = lmb_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
-	unsigned int i;
+	unsigned int i, nid = 0;
 
 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
@@ -471,9 +511,11 @@ static void __init setup_nonnuma(void)
 	for (i = 0; i < lmb.memory.cnt; ++i) {
 		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
-		add_active_range(0, start_pfn, end_pfn);
+
+		fake_numa_create_new_node(end_pfn, &nid);
+		add_active_range(nid, start_pfn, end_pfn);
+		node_set_online(nid);
 	}
-	node_set_online(0);
 }
 
 void __init dump_numa_cpu_topology(void)
@@ -702,6 +744,9 @@ static int __init early_numa(char *p)
 	if (strstr(p, "debug"))
 		numa_debug = 1;
 
+	if (strstr(p, "fake="))
+		cmdline = p + 5;	/* 5 is faster than strlen("fake=") */
+
 	return 0;
 }
 early_param("numa", early_numa);
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply

* Re: [PATCH 1/11] ibm_newemac: Add BCM5248 and Marvell 88E1111 PHY support
From: Jeff Garzik @ 2007-12-07 20:09 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: netdev, linuxppc-dev
In-Reply-To: <20071205001534.D681ADDF2D@ozlabs.org>

Benjamin Herrenschmidt wrote:
> From: Stefan Roese <sr@denx.de>
> 
> This patch adds BCM5248 and Marvell 88E1111 PHY support to NEW EMAC driver.
> These PHY chips are used on PowerPC 440EPx boards.
> The PHY code is based on the previous work by Stefan Roese <sr@denx.de>
> 
> Signed-off-by: Stefan Roese <sr@denx.de>
> Signed-off-by: Valentine Barshak <vbarshak@ru.mvista.com>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
> 
>  drivers/net/ibm_newemac/phy.c |   39 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)

applied 1-11 #upstream-fixes

^ permalink raw reply

* Re: Help with MPC5200 + Bestcomm + Local Plus Bus.... same problem [ifm ScanMail: oK]
From: roger blofeld @ 2007-12-07 18:41 UTC (permalink / raw)
  To: jan_baldauf, linuxppc-embedded

---- Original Message ----

From: "jan_baldauf@ifm-electronic.com" <jan_baldauf@ifm-electronic.com>

To: linuxppc-embedded@ozlabs.org

Sent: Friday, December 7, 2007 4:58:56 AM

Subject: Help with MPC5200 + Bestcomm + Local Plus Bus.... same problem [ifm ScanMail: oK] 



  

Hello Roger, 

 

i have read your message. I have now the same problem. I use a CPLD on the Local Plus Bus. The CPLD generates interrupts for the MPC5200B processor. Because of the quick signals, i want to use the Bestcomm unit, to push the Data ( 3 x 16 bit register) directly into the memory. I use the same Kernel. It would be nice, if you could help me, to program a linux driver for that. Or maybe you have any examples. So looking forward for you reply. Thanks a lot for your time. 

 

best regards Jan Baldauf


------




Jan,

 I never did get my FPGA driver working with the bestcomm unit. I ended up doing a half-baked version where I would program the lpc to transfer the data into the FIFO and generate an interrupt when it finished. I could then copy the data quickly from the FIFO to memory (The FIFO interface is wider and faster than my FPGA, so I do reduce processor loading somewhat over directly reading the FPGA) That works OK for small data transfers, but is not optimal.



 In the current mainline there is support for a "gen_bd" bestcomm task which I presume is to be used for the purpose you imply. Perhaps somebody has an example of using that?





good luck!

-rb









      ____________________________________________________________________________________
Looking for last minute shopping deals?  
Find them fast with Yahoo! Search.  http://tools.search.yahoo.com/newsearch/category.php?category=shopping

^ permalink raw reply

* [DTC][PATCH] Fix cross-compile building
From: Kumar Gala @ 2007-12-07 18:28 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev, stuarth

From: Stuart Hughes <stuarth@freescale.com>

This patch allows you to build the DTC source without making the
tests directory.  This is necessary when cross compiling as the
dumptest (and other) files cannot be run/used on the host system.
To use this use: 'make TESTS='

Signed-off-by: Stuart Hughes <stuarth@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 Makefile |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/Makefile b/Makefile
index bcb143f..9d926b5 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,9 @@ else
 DEPTARGETS = $(filter-out $(NODEPTARGETS),$(MAKECMDGOALS))
 endif

-all: dtc ftdump libfdt tests
+TESTS = tests
+
+all: dtc ftdump libfdt $(TESTS)

 install: all
 	@$(VECHO) INSTALL
-- 
1.5.3.4

^ permalink raw reply related

* [PATCH v2] ucc_uart: add support for Freescale QUICCEngine UART
From: Timur Tabi @ 2007-12-07 16:44 UTC (permalink / raw)
  To: galak, linuxppc-dev; +Cc: Timur Tabi

Add support for UART serial ports using a Freescale QUICC Engine
(found on some MPC83xx and MPC85xx SOCs).

Updated booting-without-of.txt to define new properties for a QE UART node,
and a new node definition that describes uploaded QE firmware.

Because of a silicon bug in some QE-enabled SOCs (e.g. 8323 and 8360), a new
microcode is required. This microcode implements UART via a work-around,
hence it's called "Soft-UART".  This driver can use the QE firmware upload
feature to upload the correct microcode to the QE.

Signed-off-by: Timur Tabi <timur@freescale.com>
---

Made selection of Soft-UART dynamic.  Updated to reflect changes in
prerequisite patches.  Added support for 'firmware' node in device tree.

This patch is for Kumar's for-2.6.25 branch, and it applies on top of my
previous patches, "qe: add ability to upload QE firmware" and
"qe: add function qe_clock_source()".

 Documentation/powerpc/booting-without-of.txt |    9 +-
 arch/powerpc/boot/dts/mpc832x_mds.dts        |   49 +
 arch/powerpc/sysdev/qe_lib/Kconfig           |    2 +-
 arch/powerpc/sysdev/qe_lib/ucc_slow.c        |   10 +-
 drivers/serial/Kconfig                       |   10 +
 drivers/serial/Makefile                      |    1 +
 drivers/serial/ucc_uart.c                    | 1498 ++++++++++++++++++++++++++
 7 files changed, 1576 insertions(+), 3 deletions(-)
 create mode 100644 drivers/serial/ucc_uart.c

diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 00fd333..9de0a97 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1616,7 +1616,7 @@ platforms are moved over to use the flattened-device-tree model.
 
    Required properties:
    - device_type : should be "network", "hldc", "uart", "transparent"
-    "bisync" or "atm".
+     "bisync", "atm", or "serial".
    - compatible : could be "ucc_geth" or "fsl_atm" and so on.
    - model : should be "UCC".
    - device-id : the ucc number(1-8), corresponding to UCCx in UM.
@@ -1629,6 +1629,13 @@ platforms are moved over to use the flattened-device-tree model.
    - interrupt-parent : the phandle for the interrupt controller that
      services interrupts for this device.
    - pio-handle : The phandle for the Parallel I/O port configuration.
+   - port-number : for UART drivers, the port number to use, between 0 and 3.
+     This usually corresponds to the /dev/ttyQE device, e.g. <0> = /dev/ttyQE0.
+     The port number is added to the minor number of the device.  Unlike the
+     CPM UART driver, the port-number is required for the QE UART driver.
+   - soft-uart : for UART drivers, if specified this means the QE UART device
+     driver should use "Soft-UART" mode, which is needed on some SOCs that have
+     broken UART hardware.  Soft-UART is provided via a microcode upload.
    - rx-clock-name: the UCC receive clock source
      "none": clock source is disabled
      "brg1" through "brg16": clock source is BRG1-BRG16, respectively
diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts
index fe54489..f6dd945 100644
--- a/arch/powerpc/boot/dts/mpc832x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc832x_mds.dts
@@ -7,6 +7,18 @@
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
+
+ * To enable external serial I/O on a Freescale MPC 8323 SYS/MDS board, do
+ * this:
+ *
+ * 1) On chip U61, lift (disconnect) pins 21 (TXD) and 22 (RXD) from the board.
+ * 2) Solder a wire from U61-21 to P19A-23.  P19 is a grid of pins on the board
+ *    next to the serial ports.
+ * 3) Solder a wire from U61-22 to P19K-22.
+ *
+ * Note that there's a typo in the schematic.  The board labels the last column
+ * of pins "P19K", but in the schematic, that column is called "P19J".  So if
+ * you're going by the schematic, the pin is called "P19J-K22".
  */
 
 / {
@@ -159,6 +171,23 @@
 					1 1e  1  0  1  0 	/* TX_EN */
 					1 1f  2  0  1  0>;/* CRS */
 			};
+			pio5: ucc_pin@05 {
+				pio-map = <
+				/*
+				 *    		      open       has
+				 *   port  pin  dir  drain  sel  irq
+				 */
+					2    0    1      0    2    0  /* TxD5 */
+					2    8    2      0    2    0  /* RxD5 */
+
+					2   1d    2      0    0    0  /* CTS5 */
+					2   1f    1      0    2    0  /* RTS5 */
+
+					2   18    2      0    0    0  /* CD */
+
+				>;
+			};
+
 		};
 	};
 
@@ -250,6 +279,26 @@
 			pio-handle = < &pio4 >;
 		};
 
+		ucc@2400 {
+			device_type = "serial";
+			compatible = "ucc_uart";
+			model = "UCC";
+			device-id = <5>;	/* The UCC number, 1-7*/
+			port-number = <0>;	/* Which ttyQEx device */
+			soft-uart;		/* We need Soft-UART */
+			reg = <2400 200>;
+			interrupts = <28>;	/* From Table 18-12 */
+			interrupt-parent = < &qeic >;
+			/*
+			 * For Soft-UART, we need to set TX to 1X, which
+			 * means specifying separate clock sources.
+			 */
+			rx-clock-name = "brg5";
+			tx-clock-name = "brg6";
+			pio-handle = < &pio5 >;
+		};
+
+
 		mdio@2320 {
 			#address-cells = <1>;
 			#size-cells = <0>;
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index f611d34..adc6621 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -4,7 +4,7 @@
 
 config UCC_SLOW
 	bool
-	default n
+	default y if SERIAL_QE
 	help
 	  This option provides qe_lib support to UCC slow
 	  protocols: UART, BISYNC, QMC
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
index 0174b3a..b2870b2 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
@@ -19,6 +19,7 @@
 #include <linux/stddef.h>
 #include <linux/interrupt.h>
 #include <linux/err.h>
+#include <linux/module.h>
 
 #include <asm/io.h>
 #include <asm/immap_qe.h>
@@ -41,6 +42,7 @@ u32 ucc_slow_get_qe_cr_subblock(int uccs_num)
 	default: return QE_CR_SUBBLOCK_INVALID;
 	}
 }
+EXPORT_SYMBOL(ucc_slow_get_qe_cr_subblock);
 
 void ucc_slow_poll_transmitter_now(struct ucc_slow_private * uccs)
 {
@@ -56,6 +58,7 @@ void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs)
 	qe_issue_cmd(QE_GRACEFUL_STOP_TX, id,
 			 QE_CR_PROTOCOL_UNSPECIFIED, 0);
 }
+EXPORT_SYMBOL(ucc_slow_graceful_stop_tx);
 
 void ucc_slow_stop_tx(struct ucc_slow_private * uccs)
 {
@@ -65,6 +68,7 @@ void ucc_slow_stop_tx(struct ucc_slow_private * uccs)
 	id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num);
 	qe_issue_cmd(QE_STOP_TX, id, QE_CR_PROTOCOL_UNSPECIFIED, 0);
 }
+EXPORT_SYMBOL(ucc_slow_stop_tx);
 
 void ucc_slow_restart_tx(struct ucc_slow_private * uccs)
 {
@@ -74,6 +78,7 @@ void ucc_slow_restart_tx(struct ucc_slow_private * uccs)
 	id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num);
 	qe_issue_cmd(QE_RESTART_TX, id, QE_CR_PROTOCOL_UNSPECIFIED, 0);
 }
+EXPORT_SYMBOL(ucc_slow_restart_tx);
 
 void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode)
 {
@@ -94,6 +99,7 @@ void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode)
 	}
 	out_be32(&us_regs->gumr_l, gumr_l);
 }
+EXPORT_SYMBOL(ucc_slow_enable);
 
 void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode)
 {
@@ -114,6 +120,7 @@ void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode)
 	}
 	out_be32(&us_regs->gumr_l, gumr_l);
 }
+EXPORT_SYMBOL(ucc_slow_disable);
 
 /* Initialize the UCC for Slow operations
  *
@@ -347,6 +354,7 @@ int ucc_slow_init(struct ucc_slow_info * us_info, struct ucc_slow_private ** ucc
 	*uccs_ret = uccs;
 	return 0;
 }
+EXPORT_SYMBOL(ucc_slow_init);
 
 void ucc_slow_free(struct ucc_slow_private * uccs)
 {
@@ -366,5 +374,5 @@ void ucc_slow_free(struct ucc_slow_private * uccs)
 
 	kfree(uccs);
 }
-
+EXPORT_SYMBOL(ucc_slow_free);
 
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index d7e1996..d962b74 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1284,4 +1284,14 @@ config SERIAL_OF_PLATFORM
 	  Currently, only 8250 compatible ports are supported, but
 	  others can easily be added.
 
+config SERIAL_QE
+	tristate "Freescale QUICC Engine serial port support"
+	depends on QUICC_ENGINE
+	select SERIAL_CORE
+	select FW_LOADER
+	default n
+	help
+	  This driver supports the QE serial ports on Freescale embedded
+	  PowerPC that contain a QUICC Engine.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index af6377d..7eb4553 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -64,3 +64,4 @@ obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
 obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
 obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
+obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
new file mode 100644
index 0000000..1b31994
--- /dev/null
+++ b/drivers/serial/ucc_uart.c
@@ -0,0 +1,1498 @@
+/*
+ * Freescale QUICC Engine UART device driver
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ * This driver adds support for UART devices via Freescale's QUICC Engine
+ * found on some Freescale SOCs.
+ *
+ * If Soft-UART support is needed but not already present, then this driver
+ * will request and upload the "Soft-UART" microcode upon probe.  The
+ * filename of the microcode should be fsl_qe_ucode_uart_X_YZ.bin, where "X"
+ * is the name of the SOC (e.g. 8323), and YZ is the revision of the SOC,
+ * (e.g. "11" for 1.1).
+ */
+
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/fs_uart_pd.h>
+#include <asm/ucc_slow.h>
+
+#include <linux/firmware.h>
+#include <asm/reg.h>
+
+/*
+ * The GUMR flag for Soft UART.  This would normally be defined in qe.h,
+ * but Soft-UART is a hack and we want to keep everything related to it in
+ * this file.
+ */
+#define UCC_SLOW_GUMR_H_SUART   	0x00004000      /* Soft-UART */
+
+/*
+ * soft_uart is 1 if we need to use Soft-UART mode
+ */
+static int soft_uart;
+/*
+ * firmware_loaded is 1 if the firmware has been loaded, 0 otherwise.
+ */
+static int firmware_loaded;
+
+/* Enable this macro to configure all serial ports in internal loopback
+   mode */
+/* #define LOOPBACK */
+
+/* The major and minor device numbers are defined in
+ * http://www.lanana.org/docs/device-list/devices-2.6+.txt.  For the QE
+ * UART, we have major number 204 and minor numbers 46 - 49, which are the
+ * same as for the CPM2.  This decision was made because no Freescale part
+ * has both a CPM and a QE.
+ */
+#define SERIAL_QE_MAJOR 204
+#define SERIAL_QE_MINOR 46
+
+/* Since we only have minor numbers 46 - 49, there is a hard limit of 4 ports */
+#define UCC_MAX_UART    4
+
+/* The number of buffer descriptors for receiving characters. */
+#define RX_NUM_FIFO     4
+
+/* The number of buffer descriptors for transmitting characters. */
+#define TX_NUM_FIFO     4
+
+/* The maximum size of the character buffer for a single RX BD. */
+#define RX_BUF_SIZE     32
+
+/* The maximum size of the character buffer for a single TX BD. */
+#define TX_BUF_SIZE     32
+
+#define UCC_WAIT_CLOSING 100
+
+struct ucc_uart_pram {
+	struct ucc_slow_pram common;
+	u8 res1[8];     	/* reserved */
+	__be16 maxidl;  	/* Maximum idle chars */
+	__be16 idlc;    	/* temp idle counter */
+	__be16 brkcr;   	/* Break count register */
+	__be16 parec;   	/* receive parity error counter */
+	__be16 frmec;   	/* receive framing error counter */
+	__be16 nosec;   	/* receive noise counter */
+	__be16 brkec;   	/* receive break condition counter */
+	__be16 brkln;   	/* last received break length */
+	__be16 uaddr[2];	/* UART address character 1 & 2 */
+	__be16 rtemp;   	/* Temp storage */
+	__be16 toseq;   	/* Transmit out of sequence char */
+	__be16 cchars[8];       /* control characters 1-8 */
+	__be16 rccm;    	/* receive control character mask */
+	__be16 rccr;    	/* receive control character register */
+	__be16 rlbc;    	/* receive last break character */
+	__be16 res2;    	/* reserved */
+	__be32 res3;    	/* reserved, should be cleared */
+	u8 res4;		/* reserved, should be cleared */
+	u8 res5[3];     	/* reserved, should be cleared */
+	__be32 res6;    	/* reserved, should be cleared */
+	__be32 res7;    	/* reserved, should be cleared */
+	__be32 res8;    	/* reserved, should be cleared */
+	__be32 res9;    	/* reserved, should be cleared */
+	__be32 res10;   	/* reserved, should be cleared */
+	__be32 res11;   	/* reserved, should be cleared */
+	__be32 res12;   	/* reserved, should be cleared */
+	__be32 res13;   	/* reserved, should be cleared */
+/* The rest is for Soft-UART only */
+	__be16 supsmr;  	/* 0x90, Shadow UPSMR */
+	__be16 res92;   	/* 0x92, reserved, initialize to 0 */
+	__be32 rx_state;	/* 0x94, RX state, initialize to 0 */
+	__be32 rx_cnt;  	/* 0x98, RX count, initialize to 0 */
+	u8 rx_length;   	/* 0x9C, Char length, set to 1+CL+PEN+1+SL */
+	u8 rx_bitmark;  	/* 0x9D, reserved, initialize to 0 */
+	u8 rx_temp_dlst_qe;     /* 0x9E, reserved, initialize to 0 */
+	u8 res14[0xBC - 0x9F];  /* reserved */
+	__be32 dump_ptr;	/* 0xBC, Dump pointer */
+	__be32 rx_frame_rem;    /* 0xC0, reserved, initialize to 0 */
+	u8 rx_frame_rem_size;   /* 0xC4, reserved, initialize to 0 */
+	u8 tx_mode;     	/* 0xC5, mode, 0=AHDLC, 1=UART */
+	u16 tx_state;   	/* 0xC6, TX state */
+	u8 res15[0xD0 - 0xC8];  /* reserved */
+	__be32 resD0;   	/* 0xD0, reserved, initialize to 0 */
+	u8 resD4;       	/* 0xD4, reserved, initialize to 0 */
+	__be16 resD5;   	/* 0xD5, reserved, initialize to 0 */
+} __attribute__ ((packed));
+
+/* SUPSMR definitions, for Soft-UART only */
+#define UCC_UART_SUPSMR_SL      	0x8000
+#define UCC_UART_SUPSMR_RPM_MASK	0x6000
+#define UCC_UART_SUPSMR_RPM_ODD 	0x0000
+#define UCC_UART_SUPSMR_RPM_LOW 	0x2000
+#define UCC_UART_SUPSMR_RPM_EVEN	0x4000
+#define UCC_UART_SUPSMR_RPM_HIGH	0x6000
+#define UCC_UART_SUPSMR_PEN     	0x1000
+#define UCC_UART_SUPSMR_TPM_MASK	0x0C00
+#define UCC_UART_SUPSMR_TPM_ODD 	0x0000
+#define UCC_UART_SUPSMR_TPM_LOW 	0x0400
+#define UCC_UART_SUPSMR_TPM_EVEN	0x0800
+#define UCC_UART_SUPSMR_TPM_HIGH	0x0C00
+#define UCC_UART_SUPSMR_FRZ     	0x0100
+#define UCC_UART_SUPSMR_UM_MASK 	0x00c0
+#define UCC_UART_SUPSMR_UM_NORMAL       0x0000
+#define UCC_UART_SUPSMR_UM_MAN_MULTI    0x0040
+#define UCC_UART_SUPSMR_UM_AUTO_MULTI   0x00c0
+#define UCC_UART_SUPSMR_CL_MASK 	0x0030
+#define UCC_UART_SUPSMR_CL_8    	0x0030
+#define UCC_UART_SUPSMR_CL_7    	0x0020
+#define UCC_UART_SUPSMR_CL_6    	0x0010
+#define UCC_UART_SUPSMR_CL_5    	0x0000
+
+#define UCC_UART_TX_STATE_AHDLC 	0x00
+#define UCC_UART_TX_STATE_UART  	0x01
+#define UCC_UART_TX_STATE_X1    	0x00
+#define UCC_UART_TX_STATE_X16   	0x80
+
+#define UCC_UART_PRAM_ALIGNMENT 0x100
+
+#define UCC_UART_SIZE_OF_BD     UCC_SLOW_SIZE_OF_BD
+#define NUM_CONTROL_CHARS       8
+
+/* Private per-port data structure */
+struct uart_qe_port {
+	struct uart_port port;
+	struct ucc_slow __iomem *uccp;
+	struct ucc_uart_pram __iomem *uccup;
+	struct ucc_slow_info us_info;
+	struct ucc_slow_private *us_private;
+	struct device_node *np;
+	unsigned int ucc_num;   /* First ucc is 0, not 1 */
+
+	u16 rx_nrfifos;
+	u16 rx_fifosize;
+	u16 tx_nrfifos;
+	u16 tx_fifosize;
+	int wait_closing;
+	u32 flags;
+	struct qe_bd *rx_bd_base;
+	struct qe_bd *rx_cur;
+	struct qe_bd *tx_bd_base;
+	struct qe_bd *tx_cur;
+	unsigned char *tx_buf;
+	unsigned char *rx_buf;
+	void *bd_virt;  	/* virtual address of the BD buffers */
+	dma_addr_t bd_dma_addr; /* bus address of the BD buffers */
+	unsigned int bd_size;   /* size of BD buffer space */
+};
+
+static struct uart_driver ucc_uart_driver = {
+	.owner  	= THIS_MODULE,
+	.driver_name    = "serial",
+	.dev_name       = "ttyQE",
+	.major  	= SERIAL_QE_MAJOR,
+	.minor  	= SERIAL_QE_MINOR,
+	.nr     	= UCC_MAX_UART,
+};
+
+/*
+ * Virtual to physical address translation.
+ *
+ * Given the virtual address for a character buffer, this function returns
+ * the physical (DMA) equivalent.
+ */
+static inline dma_addr_t cpu2qe_addr(void *addr, struct uart_qe_port *qe_port)
+{
+	if (likely((addr >= qe_port->bd_virt)) &&
+	    (addr < (qe_port->bd_virt + qe_port->bd_size)))
+		return qe_port->bd_dma_addr + (addr - qe_port->bd_virt);
+
+	/* something nasty happened */
+	printk(KERN_ERR "%s: addr=%p\n", __FUNCTION__, addr);
+	BUG();
+	return 0;
+}
+
+/*
+ * Physical to virtual address translation.
+ *
+ * Given the physical (DMA) address for a character buffer, this function
+ * returns the virtual equivalent.
+ */
+static inline void *qe2cpu_addr(dma_addr_t addr, struct uart_qe_port *qe_port)
+{
+	/* sanity check */
+	if (likely((addr >= qe_port->bd_dma_addr) &&
+		   (addr < (qe_port->bd_dma_addr + qe_port->bd_size))))
+		return qe_port->bd_virt + (addr - qe_port->bd_dma_addr);
+
+	/* something nasty happened */
+	printk(KERN_ERR "%s: addr=%x\n", __FUNCTION__, addr);
+	BUG();
+	return NULL;
+}
+
+/*
+ * Return 1 if the QE is done transmitting all buffers for this port
+ *
+ * This function scan each BD in sequence.  If we find a BD that is not
+ * ready (READY=1), then we return 0 indicating that the QE is still sending
+ * data.  If we reach the last BD (WRAP=1), then we know we've scanned
+ * the entire list, and all BDs are done.
+ */
+static unsigned int qe_uart_tx_empty(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+	struct qe_bd *bdp = qe_port->tx_bd_base;
+
+	while (1) {
+		if (in_be16(&bdp->status) & BD_SC_READY)
+			/* This BD is not done, so return "not done" */
+			return 0;
+
+		if (in_be16(&bdp->status) & BD_SC_WRAP)
+			/*
+			 * This BD is done and it's the last one, so return
+			 * "done"
+			 */
+			return 1;
+
+		bdp++;
+	};
+}
+
+/*
+ * Set the modem control lines
+ *
+ * We currently don't support setting modem control lines, but this function
+ * needs to exist, otherwise the kernel will panic.
+ */
+void qe_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+/*
+ * Get the current modem control line status
+ *
+ * We don't support changing the modem control line status, so we always
+ * return Carrier Detect, Data Set Ready, and Clear To Send.
+ */
+static unsigned int qe_uart_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+}
+
+/*
+ * Disable the transmit interrupt.
+ *
+ * Although this function is called "stop_tx", it does not actually stop
+ * transmission of data.  Instead, it tells the QE to not generate an
+ * interrupt when the UCC is finished sending characters.
+ */
+static void qe_uart_stop_tx(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+
+	clrbits16(&qe_port->uccp->uccm, UCC_UART_UCCE_TX);
+}
+
+/*
+ * Transmit as many characters to the HW as possible.
+ *
+ * This function will attempt to stuff of all the characters from the
+ * kernel's transmit buffer into TX BDs.
+ *
+ * A return value of non-zero indicates that it sucessfully stuffed all
+ * characters from the kernel buffer.
+ *
+ * A return value of zero indicates that there are still characters in the
+ * kernel's buffer that have not been transmitted, but there are no more BDs
+ * available.  This function should be called again after a BD has been made
+ * available.
+ */
+static int qe_uart_tx_pump(struct uart_qe_port *qe_port)
+{
+	struct qe_bd *bdp;
+	unsigned char *p;
+	unsigned int count;
+	struct uart_port *port = &qe_port->port;
+	struct circ_buf *xmit = &port->info->xmit;
+
+	bdp = qe_port->rx_cur;
+
+	/* Handle xon/xoff */
+	if (port->x_char) {
+		/* Pick next descriptor and fill from buffer */
+		bdp = qe_port->tx_cur;
+
+		p = qe2cpu_addr(bdp->buf, qe_port);
+
+		*p++ = port->x_char;
+		out_be16(&bdp->length, 1);
+		setbits16(&bdp->status, BD_SC_READY);
+		/* Get next BD. */
+		if (in_be16(&bdp->status) & BD_SC_WRAP)
+			bdp = qe_port->tx_bd_base;
+		else
+			bdp++;
+		qe_port->tx_cur = bdp;
+
+		port->icount.tx++;
+		port->x_char = 0;
+		return 1;
+	}
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+		qe_uart_stop_tx(port);
+		return 0;
+	}
+
+	/* Pick next descriptor and fill from buffer */
+	bdp = qe_port->tx_cur;
+
+	while (!(in_be16(&bdp->status) & BD_SC_READY) &&
+	       (xmit->tail != xmit->head)) {
+		count = 0;
+		p = qe2cpu_addr(bdp->buf, qe_port);
+		while (count < qe_port->tx_fifosize) {
+			*p++ = xmit->buf[xmit->tail];
+			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+			port->icount.tx++;
+			count++;
+			if (xmit->head == xmit->tail)
+				break;
+		}
+
+		out_be16(&bdp->length, count);
+		setbits16(&bdp->status, BD_SC_READY);
+
+		/* Get next BD. */
+		if (in_be16(&bdp->status) & BD_SC_WRAP)
+			bdp = qe_port->tx_bd_base;
+		else
+			bdp++;
+	}
+	qe_port->tx_cur = bdp;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit)) {
+		/* The kernel buffer is empty, so turn off TX interrupts.  We
+		   don't need to be told when the QE is finished transmitting
+		   the data. */
+		qe_uart_stop_tx(port);
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Start transmitting data
+ *
+ * This function will start transmitting any available data, if the port
+ * isn't already transmitting data.
+ */
+static void qe_uart_start_tx(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+
+	/* If we currently are transmitting, then just return */
+	if (in_be16(&qe_port->uccp->uccm) & UCC_UART_UCCE_TX)
+		return;
+
+	/* Otherwise, pump the port and start transmission */
+	if (qe_uart_tx_pump(qe_port))
+		setbits16(&qe_port->uccp->uccm, UCC_UART_UCCE_TX);
+}
+
+/*
+ * Stop transmitting data
+ */
+static void qe_uart_stop_rx(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+
+	clrbits16(&qe_port->uccp->uccm, UCC_UART_UCCE_RX);
+}
+
+/*
+ * Enable status change interrupts
+ *
+ * We don't support status change interrupts, but we need to define this
+ * function otherwise the kernel will panic.
+ */
+static void qe_uart_enable_ms(struct uart_port *port)
+{
+}
+
+/* Start or stop sending  break signal
+ *
+ * This function controls the sending of a break signal.  If break_state=1,
+ * then we start sending a break signal.  If break_state=0, then we stop
+ * sending the break signal.
+ */
+static void qe_uart_break_ctl(struct uart_port *port, int break_state)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+
+	if (break_state)
+		ucc_slow_stop_tx(qe_port->us_private);
+	else
+		ucc_slow_restart_tx(qe_port->us_private);
+}
+
+/* ISR helper function for receiving character.
+ *
+ * This function is called by the ISR to handling receiving characters
+ */
+static void qe_uart_int_rx(struct uart_qe_port *qe_port)
+{
+	int i;
+	unsigned char ch, *cp;
+	struct uart_port *port = &qe_port->port;
+	struct tty_struct *tty = port->info->tty;
+	struct qe_bd *bdp;
+	u16 status;
+	unsigned int flg;
+
+	/* Just loop through the closed BDs and copy the characters into
+	 * the buffer.
+	 */
+	bdp = qe_port->rx_cur;
+	while (1) {
+		status = in_be16(&bdp->status);
+
+		/* If this one is empty, then we assume we've read them all */
+		if (status & BD_SC_EMPTY)
+			break;
+
+		/* get number of characters, and check space in RX buffer */
+		i = in_be16(&bdp->length);
+
+		/* If we don't have enough room in RX buffer for the entire BD,
+		 * then we try later, which will be the next RX interrupt.
+		 */
+		if (tty_buffer_request_room(tty, i) < i) {
+			dev_dbg(port->dev, "ucc-uart: no room in RX buffer\n");
+			return;
+		}
+
+		/* get pointer */
+		cp = qe2cpu_addr(bdp->buf, qe_port);
+
+		/* loop through the buffer */
+		while (i-- > 0) {
+			ch = *cp++;
+			port->icount.rx++;
+			flg = TTY_NORMAL;
+
+			if (!i && status &
+			    (BD_SC_BR | BD_SC_FR | BD_SC_PR | BD_SC_OV))
+				goto handle_error;
+			if (uart_handle_sysrq_char(port, ch))
+				continue;
+
+error_return:
+			tty_insert_flip_char(tty, ch, flg);
+
+		}
+
+		/* This BD is ready to be used again. Clear status. get next */
+		clrsetbits_be16(&bdp->status, BD_SC_BR | BD_SC_FR | BD_SC_PR |
+			BD_SC_OV | BD_SC_ID, BD_SC_EMPTY);
+		if (in_be16(&bdp->status) & BD_SC_WRAP)
+			bdp = qe_port->rx_bd_base;
+		else
+			bdp++;
+
+	}
+
+	/* Write back buffer pointer */
+	qe_port->rx_cur = bdp;
+
+	/* Activate BH processing */
+	tty_flip_buffer_push(tty);
+
+	return;
+
+	/* Error processing */
+
+handle_error:
+	/* Statistics */
+	if (status & BD_SC_BR)
+		port->icount.brk++;
+	if (status & BD_SC_PR)
+		port->icount.parity++;
+	if (status & BD_SC_FR)
+		port->icount.frame++;
+	if (status & BD_SC_OV)
+		port->icount.overrun++;
+
+	/* Mask out ignored conditions */
+	status &= port->read_status_mask;
+
+	/* Handle the remaining ones */
+	if (status & BD_SC_BR)
+		flg = TTY_BREAK;
+	else if (status & BD_SC_PR)
+		flg = TTY_PARITY;
+	else if (status & BD_SC_FR)
+		flg = TTY_FRAME;
+
+	/* Overrun does not affect the current character ! */
+	if (status & BD_SC_OV)
+		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+#ifdef SUPPORT_SYSRQ
+	port->sysrq = 0;
+#endif
+	goto error_return;
+}
+
+/* Interrupt handler
+ *
+ * This interrupt handler is called after a BD is processed.
+ */
+static irqreturn_t qe_uart_int(int irq, void *data)
+{
+	struct uart_qe_port *qe_port = (struct uart_qe_port *) data;
+	struct ucc_slow __iomem *uccp = qe_port->uccp;
+	u16 events;
+
+	/* Clear the interrupts */
+	events = in_be16(&uccp->ucce);
+	out_be16(&uccp->ucce, events);
+
+	if (events & UCC_UART_UCCE_BRKE)
+		uart_handle_break(&qe_port->port);
+
+	if (events & UCC_UART_UCCE_RX)
+		qe_uart_int_rx(qe_port);
+
+	if (events & UCC_UART_UCCE_TX)
+		qe_uart_tx_pump(qe_port);
+
+	return events ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/* Initialize buffer descriptors
+ *
+ * This function initializes all of the RX and TX buffer descriptors.
+ */
+static void qe_uart_initbd(struct uart_qe_port *qe_port)
+{
+	int i;
+	void *bd_virt;
+	struct qe_bd *bdp;
+
+	/* Set the physical address of the host memory buffers in the buffer
+	 * descriptors, and the virtual address for us to work with.
+	 */
+	bd_virt = qe_port->bd_virt;
+	bdp = qe_port->rx_bd_base;
+	qe_port->rx_cur = qe_port->rx_bd_base;
+	for (i = 0; i < (qe_port->rx_nrfifos - 1); i++) {
+		out_be16(&bdp->status, BD_SC_EMPTY | BD_SC_INTRPT);
+		out_be32(&bdp->buf, cpu2qe_addr(bd_virt, qe_port));
+		out_be16(&bdp->length, 0);
+		bd_virt += qe_port->rx_fifosize;
+		bdp++;
+	}
+
+	/* */
+	out_be16(&bdp->status, BD_SC_WRAP | BD_SC_EMPTY | BD_SC_INTRPT);
+	out_be32(&bdp->buf, cpu2qe_addr(bd_virt, qe_port));
+	out_be16(&bdp->length, 0);
+
+	/* Set the physical address of the host memory
+	 * buffers in the buffer descriptors, and the
+	 * virtual address for us to work with.
+	 */
+	bd_virt = qe_port->bd_virt +
+		L1_CACHE_ALIGN(qe_port->rx_nrfifos * qe_port->rx_fifosize);
+	qe_port->tx_cur = qe_port->tx_bd_base;
+	bdp = qe_port->tx_bd_base;
+	for (i = 0; i < (qe_port->tx_nrfifos - 1); i++) {
+		out_be16(&bdp->status, BD_SC_INTRPT);
+		out_be32(&bdp->buf, cpu2qe_addr(bd_virt, qe_port));
+		out_be16(&bdp->length, 0);
+		bd_virt += qe_port->tx_fifosize;
+		bdp++;
+	}
+
+	/* Loopback requires the preamble bit to be set on the first TX BD */
+#ifdef LOOPBACK
+	setbits16(&qe_port->tx_cur->status, BD_SC_P);
+#endif
+
+	out_be16(&bdp->status, BD_SC_WRAP | BD_SC_INTRPT);
+	out_be32(&bdp->buf, cpu2qe_addr(bd_virt, qe_port));
+	out_be16(&bdp->length, 0);
+}
+
+/*
+ * Initialize a UCC for UART.
+ *
+ * This function configures a given UCC to be used as a UART device. Basic
+ * UCC initialization is handled in qe_uart_request_port().  This function
+ * does all the UART-specific stuff.
+ */
+static void qe_uart_init_ucc(struct uart_qe_port *qe_port)
+{
+	u32 cecr_subblock;
+	struct ucc_slow __iomem *uccp = qe_port->uccp;
+	struct ucc_uart_pram *uccup = qe_port->uccup;
+
+	unsigned int i;
+
+	/* First, disable TX and RX in the UCC */
+	ucc_slow_disable(qe_port->us_private, COMM_DIR_RX_AND_TX);
+
+	/* Program the UCC UART parameter RAM */
+	out_8(&uccup->common.rbmr, UCC_BMR_GBL | UCC_BMR_BO_BE);
+	out_8(&uccup->common.tbmr, UCC_BMR_GBL | UCC_BMR_BO_BE);
+	out_be16(&uccup->common.mrblr, qe_port->rx_fifosize);
+	out_be16(&uccup->maxidl, 0x10);
+	out_be16(&uccup->brkcr, 1);
+	out_be16(&uccup->parec, 0);
+	out_be16(&uccup->frmec, 0);
+	out_be16(&uccup->nosec, 0);
+	out_be16(&uccup->brkec, 0);
+	out_be16(&uccup->uaddr[0], 0);
+	out_be16(&uccup->uaddr[1], 0);
+	out_be16(&uccup->toseq, 0);
+	for (i = 0; i < 8; i++)
+		out_be16(&uccup->cchars[i], 0xC000);
+	out_be16(&uccup->rccm, 0xc0ff);
+
+	/* Configure the GUMR registers for UART */
+	if (soft_uart)
+		/* Soft-UART requires a 1X multiplier for TX */
+		clrsetbits_be32(&uccp->gumr_l,
+			UCC_SLOW_GUMR_L_MODE_MASK | UCC_SLOW_GUMR_L_TDCR_MASK |
+			UCC_SLOW_GUMR_L_RDCR_MASK,
+			UCC_SLOW_GUMR_L_MODE_UART | UCC_SLOW_GUMR_L_TDCR_1 |
+			UCC_SLOW_GUMR_L_RDCR_16);
+	else
+		clrsetbits_be32(&uccp->gumr_l,
+			UCC_SLOW_GUMR_L_MODE_MASK | UCC_SLOW_GUMR_L_TDCR_MASK |
+			UCC_SLOW_GUMR_L_RDCR_MASK,
+			UCC_SLOW_GUMR_L_MODE_UART | UCC_SLOW_GUMR_L_TDCR_16 |
+			UCC_SLOW_GUMR_L_RDCR_16);
+
+	clrsetbits_be32(&uccp->gumr_h, UCC_SLOW_GUMR_H_RFW,
+		UCC_SLOW_GUMR_H_TRX | UCC_SLOW_GUMR_H_TTX);
+
+#ifdef LOOPBACK
+	clrsetbits_be32(&uccp->gumr_l, UCC_SLOW_GUMR_L_DIAG_MASK,
+		UCC_SLOW_GUMR_L_DIAG_LOOP);
+	clrsetbits_be32(&uccp->gumr_h,
+		UCC_SLOW_GUMR_H_CTSP | UCC_SLOW_GUMR_H_RSYN,
+		UCC_SLOW_GUMR_H_CDS);
+#endif
+
+	/* Enable rx interrupts  and clear all pending events.  */
+	out_be16(&uccp->uccm, 0);
+	out_be16(&uccp->ucce, 0xffff);
+	out_be16(&uccp->udsr, 0x7e7e);
+
+	/* Initialize UPSMR */
+	out_be16(&uccp->upsmr, 0);
+
+	if (soft_uart) {
+		out_be16(&uccup->supsmr, 0x30);
+		out_be16(&uccup->res92, 0);
+		out_be32(&uccup->rx_state, 0);
+		out_be32(&uccup->rx_cnt, 0);
+		out_8(&uccup->rx_bitmark, 0);
+		out_8(&uccup->rx_length, 10);
+		out_be32(&uccup->dump_ptr, 0x4000);
+		out_8(&uccup->rx_temp_dlst_qe, 0);
+		out_be32(&uccup->rx_frame_rem, 0);
+		out_8(&uccup->rx_frame_rem_size, 0);
+		/* Soft-UART requires TX to be 1X */
+		out_8(&uccup->tx_mode,
+			UCC_UART_TX_STATE_UART | UCC_UART_TX_STATE_X1);
+		out_be16(&uccup->tx_state, 0);
+		out_8(&uccup->resD4, 0);
+		out_be16(&uccup->resD5, 0);
+
+		/* Set UART mode.
+		 * Enable receive and transmit.
+		 */
+
+		/* From the microcode errata:
+		 * 1.GUMR_L register, set mode=0010 (QMC).
+		 * 2.Set GUMR_H[17] bit. (UART/AHDLC mode).
+		 * 3.Set GUMR_H[19:20] (Transparent mode)
+		 * 4.Clear GUMR_H[26] (RFW)
+		 * ...
+		 * 6.Receiver must use 16x over sampling
+		 */
+		clrsetbits_be32(&uccp->gumr_l,
+			UCC_SLOW_GUMR_L_MODE_MASK | UCC_SLOW_GUMR_L_TDCR_MASK |
+			UCC_SLOW_GUMR_L_RDCR_MASK,
+			UCC_SLOW_GUMR_L_MODE_QMC | UCC_SLOW_GUMR_L_TDCR_16 |
+			UCC_SLOW_GUMR_L_RDCR_16);
+
+		clrsetbits_be32(&uccp->gumr_h,
+			UCC_SLOW_GUMR_H_RFW | UCC_SLOW_GUMR_H_RSYN,
+			UCC_SLOW_GUMR_H_SUART | UCC_SLOW_GUMR_H_TRX |
+			UCC_SLOW_GUMR_H_TTX | UCC_SLOW_GUMR_H_TFL);
+
+#ifdef LOOPBACK
+		clrsetbits_be32(&uccp->gumr_l, UCC_SLOW_GUMR_L_DIAG_MASK,
+				UCC_SLOW_GUMR_L_DIAG_LOOP);
+		clrbits32(&uccp->gumr_h, UCC_SLOW_GUMR_H_CTSP |
+			  UCC_SLOW_GUMR_H_CDS);
+#endif
+
+		cecr_subblock = ucc_slow_get_qe_cr_subblock(qe_port->ucc_num);
+		qe_issue_cmd(QE_INIT_TX_RX, cecr_subblock,
+			QE_CR_PROTOCOL_UNSPECIFIED, 0);
+	}
+}
+
+/*
+ * Initialize the port.
+ */
+static int qe_uart_startup(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+	int ret;
+
+	/*
+	 * If we're using Soft-UART mode, then we need to make sure the
+	 * firmware has been uploaded first.
+	 */
+	if (soft_uart && !firmware_loaded) {
+		dev_err(port->dev, "Soft-UART firmware not uploaded\n");
+		return -ENODEV;
+	}
+
+	qe_uart_initbd(qe_port);
+	qe_uart_init_ucc(qe_port);
+
+	/* Install interrupt handler. */
+	ret = request_irq(port->irq, qe_uart_int, IRQF_SHARED, "ucc-uart",
+		qe_port);
+	if (ret) {
+		dev_err(port->dev, "could not claim IRQ %u\n", port->irq);
+		return ret;
+	}
+
+	/* Startup rx-int */
+	setbits16(&qe_port->uccp->uccm, UCC_UART_UCCE_RX);
+	ucc_slow_enable(qe_port->us_private, COMM_DIR_RX_AND_TX);
+
+	return 0;
+}
+
+/*
+ * Shutdown the port.
+ */
+static void qe_uart_shutdown(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+	struct ucc_slow __iomem *uccp = qe_port->uccp;
+	unsigned int timeout = 20;
+
+	/* Disable RX and TX */
+
+	/* Wait for all the BDs marked sent */
+	while (!qe_uart_tx_empty(port)) {
+		if (!--timeout) {
+			dev_warn(port->dev, "shutdown timeout\n");
+			break;
+		}
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(2);
+	}
+
+	if (qe_port->wait_closing) {
+		/* Wait a bit longer */
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(qe_port->wait_closing);
+	}
+
+	/* Stop uarts */
+	ucc_slow_disable(qe_port->us_private, COMM_DIR_RX_AND_TX);
+	clrbits16(&uccp->uccm, UCC_UART_UCCE_TX | UCC_UART_UCCE_RX);
+
+	/* Shut them really down and reinit buffer descriptors */
+	ucc_slow_graceful_stop_tx(qe_port->us_private);
+	qe_uart_initbd(qe_port);
+
+	free_irq(port->irq, qe_port);
+}
+
+/*
+ * Set the serial port parameters.
+ */
+static void qe_uart_set_termios(struct uart_port *port,
+				struct ktermios *termios, struct ktermios *old)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+	struct ucc_slow __iomem *uccp = qe_port->uccp;
+	unsigned int baud;
+	unsigned long flags;
+	u16 upsmr = in_be16(&uccp->upsmr);
+	struct ucc_uart_pram __iomem *uccup = qe_port->uccup;
+	u16 supsmr = in_be16(&uccup->supsmr);
+	u8 char_length = 2; /* 1 + CL + PEN + 1 + SL */
+
+	/* Character length programmed into the mode register is the
+	 * sum of: 1 start bit, number of data bits, 0 or 1 parity bit,
+	 * 1 or 2 stop bits, minus 1.
+	 * The value 'bits' counts this for us.
+	 */
+
+	/* byte size */
+	upsmr &= UCC_UART_UPSMR_CL_MASK;
+	supsmr &= UCC_UART_SUPSMR_CL_MASK;
+
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		upsmr |= UCC_UART_UPSMR_CL_5;
+		supsmr |= UCC_UART_SUPSMR_CL_5;
+		char_length += 5;
+		break;
+	case CS6:
+		upsmr |= UCC_UART_UPSMR_CL_6;
+		supsmr |= UCC_UART_SUPSMR_CL_6;
+		char_length += 6;
+		break;
+	case CS7:
+		upsmr |= UCC_UART_UPSMR_CL_7;
+		supsmr |= UCC_UART_SUPSMR_CL_7;
+		char_length += 7;
+		break;
+	default:	/* case CS8 */
+		upsmr |= UCC_UART_UPSMR_CL_8;
+		supsmr |= UCC_UART_SUPSMR_CL_8;
+		char_length += 8;
+		break;
+	}
+
+	/* If CSTOPB is set, we want two stop bits */
+	if (termios->c_cflag & CSTOPB) {
+		upsmr |= UCC_UART_UPSMR_SL;
+		supsmr |= UCC_UART_SUPSMR_SL;
+		char_length++;  /* + SL */
+	}
+
+	if (termios->c_cflag & PARENB) {
+		upsmr |= UCC_UART_UPSMR_PEN;
+		supsmr |= UCC_UART_SUPSMR_PEN;
+		char_length++;  /* + PEN */
+
+		if (!(termios->c_cflag & PARODD)) {
+			upsmr &= ~(UCC_UART_UPSMR_RPM_MASK |
+				   UCC_UART_UPSMR_TPM_MASK);
+			upsmr |= UCC_UART_UPSMR_RPM_EVEN |
+				UCC_UART_UPSMR_TPM_EVEN;
+			supsmr &= ~(UCC_UART_SUPSMR_RPM_MASK |
+				    UCC_UART_SUPSMR_TPM_MASK);
+			supsmr |= UCC_UART_SUPSMR_RPM_EVEN |
+				UCC_UART_SUPSMR_TPM_EVEN;
+		}
+	}
+
+	/*
+	 * Set up parity check flag
+	 */
+	port->read_status_mask = BD_SC_EMPTY | BD_SC_OV;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= BD_SC_FR | BD_SC_PR;
+	if (termios->c_iflag & (BRKINT | PARMRK))
+		port->read_status_mask |= BD_SC_BR;
+
+	/*
+	 * Characters to ignore
+	 */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= BD_SC_PR | BD_SC_FR;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= BD_SC_BR;
+		/*
+		 * If we're ignore parity and break indicators, ignore
+		 * overruns too.  (For real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= BD_SC_OV;
+	}
+	/*
+	 * !!! ignore all characters if CREAD is not set
+	 */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->read_status_mask &= ~BD_SC_EMPTY;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, 115200);
+
+	/* Do we really need a spinlock here? */
+	spin_lock_irqsave(&port->lock, flags);
+
+	out_be16(&uccp->upsmr, upsmr);
+	if (soft_uart) {
+		out_be16(&uccup->supsmr, supsmr);
+		out_8(&uccup->rx_length, char_length);
+
+		/* Soft-UART requires a 1X multiplier for TX */
+		qe_setbrg(qe_port->us_info.rx_clock, baud, 16);
+		qe_setbrg(qe_port->us_info.tx_clock, baud, 1);
+	} else {
+		qe_setbrg(qe_port->us_info.rx_clock, baud, 16);
+		qe_setbrg(qe_port->us_info.tx_clock, baud, 16);
+	}
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ * Return a pointer to a string that describes what kind of port this is.
+ */
+static const char *qe_uart_type(struct uart_port *port)
+{
+	return "QE";
+}
+
+/*
+ * Allocate any memory and I/O resources required by the port.
+ */
+static int qe_uart_request_port(struct uart_port *port)
+{
+	int ret;
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+	struct ucc_slow_info *us_info = &qe_port->us_info;
+	struct ucc_slow_private *uccs;
+	unsigned int rx_size, tx_size;
+	void *bd_virt;
+	dma_addr_t bd_dma_addr = 0;
+
+	ret = ucc_slow_init(us_info, &uccs);
+	if (ret) {
+		dev_err(port->dev, "could not initialize UCC%u\n",
+		       qe_port->ucc_num);
+		return ret;
+	}
+
+	qe_port->us_private = uccs;
+	qe_port->uccp = uccs->us_regs;
+	qe_port->uccup = (struct ucc_uart_pram *) uccs->us_pram;
+	qe_port->rx_bd_base = uccs->rx_bd;
+	qe_port->tx_bd_base = uccs->tx_bd;
+
+	/*
+	 * Allocate the transmit and receive data buffers.
+	 */
+
+	rx_size = L1_CACHE_ALIGN(qe_port->rx_nrfifos * qe_port->rx_fifosize);
+	tx_size = L1_CACHE_ALIGN(qe_port->tx_nrfifos * qe_port->tx_fifosize);
+
+	bd_virt = dma_alloc_coherent(NULL, rx_size + tx_size, &bd_dma_addr,
+		GFP_KERNEL);
+	if (!bd_virt) {
+		dev_err(port->dev, "could not allocate buffer descriptors\n");
+		return -ENOMEM;
+	}
+
+	qe_port->bd_virt = bd_virt;
+	qe_port->bd_dma_addr = bd_dma_addr;
+	qe_port->bd_size = rx_size + tx_size;
+
+	qe_port->rx_buf = bd_virt;
+	qe_port->tx_buf = qe_port->rx_buf + rx_size;
+
+	return 0;
+}
+
+/*
+ * Configure the port.
+ *
+ * We say we're a CPM-type port because that's mostly true.  Once the device
+ * is configured, this driver operates almost identically to the CPM serial
+ * driver.
+ */
+static void qe_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_CPM;
+		qe_uart_request_port(port);
+	}
+}
+
+/*
+ * Release any memory and I/O resources that were allocated in
+ * qe_uart_request_port().
+ */
+static void qe_uart_release_port(struct uart_port *port)
+{
+	struct uart_qe_port *qe_port =
+		container_of(port, struct uart_qe_port, port);
+	struct ucc_slow_private *uccs = qe_port->us_private;
+
+	dma_free_coherent(NULL, qe_port->bd_size, qe_port->bd_virt,
+			  qe_port->bd_dma_addr);
+
+	ucc_slow_free(uccs);
+}
+
+/*
+ * Verify that the data in serial_struct is suitable for this device.
+ */
+static int qe_uart_verify_port(struct uart_port *port,
+			       struct serial_struct *ser)
+{
+	if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
+		return -EINVAL;
+
+	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+		return -EINVAL;
+
+	if (ser->baud_base < 9600)
+		return -EINVAL;
+
+	return 0;
+}
+/* UART operations
+ *
+ * Details on these functions can be found in Documentation/serial/driver
+ */
+static struct uart_ops qe_uart_pops = {
+	.tx_empty       = qe_uart_tx_empty,
+	.set_mctrl      = qe_uart_set_mctrl,
+	.get_mctrl      = qe_uart_get_mctrl,
+	.stop_tx	= qe_uart_stop_tx,
+	.start_tx       = qe_uart_start_tx,
+	.stop_rx	= qe_uart_stop_rx,
+	.enable_ms      = qe_uart_enable_ms,
+	.break_ctl      = qe_uart_break_ctl,
+	.startup	= qe_uart_startup,
+	.shutdown       = qe_uart_shutdown,
+	.set_termios    = qe_uart_set_termios,
+	.type   	= qe_uart_type,
+	.release_port   = qe_uart_release_port,
+	.request_port   = qe_uart_request_port,
+	.config_port    = qe_uart_config_port,
+	.verify_port    = qe_uart_verify_port,
+};
+
+/*
+ * Obtain the SOC model number and revision level
+ *
+ * This function parses the device tree to obtain the SOC model.  It then
+ * reads the SVR register to the revision.
+ *
+ * The device tree stores the SOC model two different ways.
+ *
+ * The new way is:
+ *
+ *      	cpu@0 {
+ *      		compatible = "PowerPC,8323";
+ *      		device_type = "cpu";
+ *      		...
+ *
+ *
+ * The old way is:
+ *      	 PowerPC,8323@0 {
+ *      		device_type = "cpu";
+ *      		...
+ *
+ * This code first checks the new way, and then the old way.
+ */
+static unsigned int soc_info(unsigned int *rev_h, unsigned int *rev_l)
+{
+	struct device_node *np;
+	const char *soc_string;
+	unsigned int svr;
+	unsigned int soc;
+
+	/* Find the CPU node */
+	np = of_find_node_by_type(NULL, "cpu");
+	if (!np)
+		return 0;
+	/* Find the compatible property */
+	soc_string = of_get_property(np, "compatible", NULL);
+	if (!soc_string)
+		/* No compatible property, so try the name. */
+		soc_string = np->name;
+
+	/* Extract the SOC number from the "PowerPC," string */
+	if ((sscanf(soc_string, "PowerPC,%u", &soc) != 1) || !soc)
+		return 0;
+
+	/* Get the revision from the SVR */
+	svr = mfspr(SPRN_SVR);
+	*rev_h = (svr >> 4) & 0xf;
+	*rev_l = svr & 0xf;
+
+	return soc;
+}
+
+/*
+ * requst_firmware_nowait() callback function
+ *
+ * This function is called by the kernel when a firmware is made available,
+ * or if it times out waiting for the firmware.
+ */
+static void uart_firmware_cont(const struct firmware *fw, void *context)
+{
+	struct qe_firmware *firmware;
+	struct device *dev = context;
+	int ret;
+
+	if (!fw) {
+		dev_err(dev, "firmware not found\n");
+		return;
+	}
+
+	firmware = (struct qe_firmware *) fw->data;
+
+	if (firmware->header.length != fw->size) {
+		dev_err(dev, "invalid firmware\n");
+		return;
+	}
+
+	ret = qe_upload_firmware(firmware);
+	if (ret) {
+		dev_err(dev, "could not load firmware\n");
+		return;
+	}
+
+	firmware_loaded = 1;
+}
+
+static int ucc_uart_probe(struct of_device *ofdev,
+	const struct of_device_id *match)
+{
+	struct device_node *np = ofdev->node;
+	const unsigned int *iprop;      /* Integer OF properties */
+	const char *sprop;      /* String OF properties */
+	struct uart_qe_port *qe_port = NULL;
+	struct resource res;
+	int ret;
+
+	/*
+	 * Determine if we need Soft-UART mode
+	 */
+	if (of_find_property(np, "soft-uart", NULL)) {
+		dev_dbg(&ofdev->dev, "using Soft-UART mode\n");
+		soft_uart = 1;
+	}
+
+	/*
+	 * If we are using Soft-UART, determine if we need to upload the
+	 * firmware, too.
+	 */
+	if (soft_uart) {
+		struct qe_firmware_info *qe_fw_info;
+
+		qe_fw_info = qe_get_firmware_info();
+
+		/* Check if the firmware has been uploaded. */
+		if (strstr(qe_fw_info->id, "Soft-UART")) {
+			firmware_loaded = 1;
+		} else {
+			char filename[32];
+			unsigned int soc;
+			unsigned int rev_h;
+			unsigned int rev_l;
+
+			soc = soc_info(&rev_h, &rev_l);
+			if (!soc) {
+				dev_err(&ofdev->dev, "unknown CPU model\n");
+				return -ENXIO;
+			}
+			sprintf(filename, "fsl_qe_ucode_uart_%u_%u%u.bin",
+				soc, rev_h, rev_l);
+
+			dev_info(&ofdev->dev, "waiting for firmware %s\n",
+				filename);
+
+			/*
+			 * We call request_firmware_nowait instead of
+			 * request_firmware so that the driver can load and
+			 * initialize the ports without holding up the rest of
+			 * the kernel.  If hotplug support is enabled in the
+			 * kernel, then we use it.
+			 */
+			ret = request_firmware_nowait(THIS_MODULE,
+				FW_ACTION_HOTPLUG, filename, &ofdev->dev,
+				&ofdev->dev, uart_firmware_cont);
+			if (ret) {
+				dev_err(&ofdev->dev,
+					"could not load firmware %s\n",
+					filename);
+				return ret;
+			}
+		}
+	}
+
+	qe_port = kzalloc(sizeof(struct uart_qe_port), GFP_KERNEL);
+	if (!qe_port) {
+		dev_err(&ofdev->dev, "can't allocate QE port structure\n");
+		return -ENOMEM;
+	}
+
+	/* Search for IRQ and mapbase */
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret) {
+		dev_err(&ofdev->dev, "missing 'reg' property in device tree\n");
+		kfree(qe_port);
+		return ret;
+	}
+	if (!res.start) {
+		dev_err(&ofdev->dev, "invalid 'reg' property in device tree\n");
+		kfree(qe_port);
+		return -EINVAL;
+	}
+	qe_port->port.mapbase = res.start;
+
+	/* Get the UCC number (device ID) */
+	/* UCCs are numbered 1-7 */
+	iprop = of_get_property(np, "device-id", NULL);
+	if (!iprop || (*iprop < 1) || (*iprop > UCC_MAX_NUM)) {
+		dev_err(&ofdev->dev,
+			"missing or invalid UCC specified in device tree\n");
+		kfree(qe_port);
+		return -ENODEV;
+	}
+	qe_port->ucc_num = *iprop - 1;
+
+	/* In the future, we should not require the BRG to be specified in the
+	   device tree.  If no clock-source is specified, then just pick a BRG
+	   to use.  This requires a new QE library function that manages BRG
+	   assignments. */
+
+	sprop = of_get_property(np, "rx-clock-name", NULL);
+	if (!sprop) {
+		dev_err(&ofdev->dev, "missing rx-clock-name in device tree\n");
+		kfree(qe_port);
+		return -ENODEV;
+	}
+
+	qe_port->us_info.rx_clock = qe_clock_source(sprop);
+	if ((qe_port->us_info.rx_clock < QE_BRG1) ||
+	    (qe_port->us_info.rx_clock > QE_BRG16)) {
+		dev_err(&ofdev->dev, "rx-clock-name must be a BRG for UART\n");
+		kfree(qe_port);
+		return -ENODEV;
+	}
+
+#ifdef LOOPBACK
+	/* In internal loopback mode, TX and RX must use the same clock */
+	qe_port->us_info.tx_clock = qe_port->us_info.rx_clock;
+#else
+	sprop = of_get_property(np, "tx-clock-name", NULL);
+	if (!sprop) {
+		dev_err(&ofdev->dev, "missing tx-clock-name in device tree\n");
+		kfree(qe_port);
+		return -ENODEV;
+	}
+	qe_port->us_info.tx_clock = qe_clock_source(sprop);
+#endif
+	if ((qe_port->us_info.tx_clock < QE_BRG1) ||
+	    (qe_port->us_info.tx_clock > QE_BRG16)) {
+		dev_err(&ofdev->dev, "tx-clock-name must be a BRG for UART\n");
+		kfree(qe_port);
+		return -ENODEV;
+	}
+
+	/* Get the port number, numbered 0-3 */
+	iprop = of_get_property(np, "port-number", NULL);
+	if (!iprop) {
+		dev_err(&ofdev->dev, "missing port-number in device tree\n");
+		kfree(qe_port);
+		return -EINVAL;
+	}
+	qe_port->port.line = *iprop;
+	if (qe_port->port.line >= UCC_MAX_UART) {
+		dev_err(&ofdev->dev, "port-number must be 0-%u\n",
+			UCC_MAX_UART - 1);
+		kfree(qe_port);
+		return -EINVAL;
+	}
+
+	qe_port->port.irq = irq_of_parse_and_map(np, 0);
+	if (qe_port->port.irq == NO_IRQ) {
+		dev_err(&ofdev->dev, "could not map IRQ for UCC%u\n",
+		       qe_port->ucc_num + 1);
+		kfree(qe_port);
+		return -EINVAL;
+	}
+
+	np = of_find_node_by_type(NULL, "qe");
+	if (!np) {
+		dev_err(&ofdev->dev, "could not find parent 'qe' node\n");
+		kfree(qe_port);
+		return -EINVAL;
+	}
+
+	iprop = of_get_property(np, "brg-frequency", NULL);
+	if (!iprop) {
+		dev_err(&ofdev->dev,
+		       "missing brg-frequency in device tree\n");
+		kfree(qe_port);
+		return -EINVAL;
+	}
+
+	if (*iprop)
+		qe_port->port.uartclk = *iprop;
+	else {
+		/*
+		 * Older versions of U-Boot do not initialize the brg-frequency
+		 * property, so in this case we assume the BRG frequency is
+		 * half the QE bus frequency.
+		 */
+		iprop = of_get_property(np, "bus-frequency", NULL);
+		if (!iprop) {
+			dev_err(&ofdev->dev,
+				"missing QE bus-frequency in device tree\n");
+			kfree(qe_port);
+			return -EINVAL;
+		}
+		if (*iprop)
+			qe_port->port.uartclk = *iprop / 2;
+		else {
+			dev_err(&ofdev->dev,
+				"invalid QE bus-frequency in device tree\n");
+			kfree(qe_port);
+			return -EINVAL;
+		}
+	}
+
+	spin_lock_init(&qe_port->port.lock);
+	qe_port->np = np;
+	qe_port->port.dev = &ofdev->dev;
+	qe_port->port.ops = &qe_uart_pops;
+	qe_port->port.iotype = UPIO_MEM;
+
+	qe_port->tx_nrfifos = TX_NUM_FIFO;
+	qe_port->tx_fifosize = TX_BUF_SIZE;
+	qe_port->rx_nrfifos = RX_NUM_FIFO;
+	qe_port->rx_fifosize = RX_BUF_SIZE;
+
+	qe_port->wait_closing = UCC_WAIT_CLOSING;
+	qe_port->port.fifosize = 512;
+	qe_port->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
+
+	qe_port->us_info.ucc_num = qe_port->ucc_num;
+	qe_port->us_info.regs = (phys_addr_t) res.start;
+	qe_port->us_info.irq = qe_port->port.irq;
+
+	qe_port->us_info.rx_bd_ring_len = qe_port->rx_nrfifos;
+	qe_port->us_info.tx_bd_ring_len = qe_port->tx_nrfifos;
+
+	/* Make sure ucc_slow_init() initializes both TX and RX */
+	qe_port->us_info.init_tx = 1;
+	qe_port->us_info.init_rx = 1;
+
+	/* Add the port to the uart sub-system.  This will cause
+	 * qe_uart_config_port() to be called, so the us_info structure must
+	 * be initialized.
+	 */
+	ret = uart_add_one_port(&ucc_uart_driver, &qe_port->port);
+	if (ret) {
+		dev_err(&ofdev->dev, "could not add /dev/ttyQE%u\n",
+		       qe_port->port.line);
+		kfree(qe_port);
+		return ret;
+	}
+
+	dev_set_drvdata(&ofdev->dev, qe_port);
+
+	dev_info(&ofdev->dev, "UCC%u assigned to /dev/ttyQE%u\n",
+		qe_port->ucc_num + 1, qe_port->port.line);
+
+	/* Display the mknod command for this device */
+	dev_dbg(&ofdev->dev, "mknod command is 'mknod /dev/ttyQE%u c %u %u'\n",
+	       qe_port->port.line, SERIAL_QE_MAJOR,
+	       SERIAL_QE_MINOR + qe_port->port.line);
+
+	return 0;
+}
+
+static int ucc_uart_remove(struct of_device *ofdev)
+{
+	struct uart_qe_port *qe_port = dev_get_drvdata(&ofdev->dev);
+
+	dev_info(&ofdev->dev, "removing /dev/ttyQE%u\n", qe_port->port.line);
+
+	uart_remove_one_port(&ucc_uart_driver, &qe_port->port);
+
+	dev_set_drvdata(&ofdev->dev, NULL);
+	kfree(qe_port);
+
+	return 0;
+}
+
+static struct of_device_id ucc_uart_match[] = {
+	{
+		.type = "serial",
+		.compatible = "ucc_uart",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, ucc_uart_match);
+
+static struct of_platform_driver ucc_uart_of_driver = {
+	.owner  	= THIS_MODULE,
+	.name   	= "ucc_uart",
+	.match_table    = ucc_uart_match,
+	.probe  	= ucc_uart_probe,
+	.remove 	= ucc_uart_remove,
+};
+
+static int __init ucc_uart_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "Freescale QUICC Engine UART device driver\n");
+#ifdef LOOPBACK
+	printk(KERN_INFO "ucc-uart: Using loopback mode\n");
+#endif
+
+	ret = uart_register_driver(&ucc_uart_driver);
+	if (ret) {
+		printk(KERN_ERR "ucc-uart: could not register UART driver\n");
+		return ret;
+	}
+
+	ret = of_register_platform_driver(&ucc_uart_of_driver);
+	if (ret)
+		printk(KERN_ERR
+		       "ucc-uart: could not register platform driver\n");
+
+	return ret;
+}
+
+static void __exit ucc_uart_exit(void)
+{
+	printk(KERN_INFO
+	       "Freescale QUICC Engine UART device driver unloading\n");
+
+	of_unregister_platform_driver(&ucc_uart_of_driver);
+	uart_unregister_driver(&ucc_uart_driver);
+}
+
+module_init(ucc_uart_init);
+module_exit(ucc_uart_exit);
+
+MODULE_DESCRIPTION("Freescale QUICC Engine (QE) UART");
+MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CHARDEV_MAJOR(SERIAL_QE_MAJOR);
+
-- 
1.5.2.4

^ permalink raw reply related

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
From: Joachim Fenkes @ 2007-12-07 16:25 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Arnd Bergmann, OF-EWG, LKML, linuxppc-dev, Christoph Raisch,
	Marcus Eder, OF-General, Stefan Roscher
In-Reply-To: <ada7ijrd6gy.fsf@cisco.com>

Roland Dreier <rdreier@cisco.com> wrote on 06.12.2007 19:27:09:

>  > > +               ehca_lock_hcalls = 
!(cur_cpu_spec->cpu_user_features
>  > > +                                    & PPC_FEATURE_ARCH_2_05);
> 
>  > We already talked about this yesterday, but I still feel that 
checking the
>  > instruction set of the CPU should not be used to determine whether a
>  > specific device driver implementation is used int hypervisor.
> 
> I had the same reaction... is testing cpu_user_features really the
> best way to detect this issue?

I concur it's not nice, but it was the only feasible method we could find 
without adding a "bug fixed" feature flag to the partition<->firmware 
interface. The firmware version reported in the OFDT is not a reliable 
enough source, and even if it were, it would require a lot of string 
parsing and matching against tables.

We're taking this to the firmware architects at the moment, but they're 
not very fond of the idea of reporting the absence of bugs through 
capability flags, as this could quickly lead to the exhaustion of flag 
bits. We'll let the discussion stew for a bit, but if we don't get this 
flag, we'll have to resort to the CPU features.
 
> I'll hold off applying this for a few days so you guys can decide the
> best thing to do.  We'll definitely get some fix into 2.6.24 but we
> have time to make a good decision.

Right.
 
>  > Regarding the performance problem, have you checked whether 
converting all
>  > your spin_lock_irqsave to spin_lock/spin_lock_irq improves your 
performance
>  > on the older machines? Maybe it's already fast enough that way.
> 
> It does seem that the only places that the hcall_lock is taken also
> use msleep, so they must always be in process context.  So you can
> safely just use spin_lock(), right?

As Arnd said, there are hCalls that will never return H_LONG_BUSY_*, such 
as H_QUERY_PORT and chums, so they will never sleep. The surrounding 
functions, though, are not prepared to be called from interrupt context 
(GFP_KERNEL comes to mind), so I agree that a simple spin_lock() will 
suffice. Thanks, Arnd, for pointing this out.

We'll keep you guys posted on the feature flag discussion. Until then, 
have a nice weekend!

Joachim

^ permalink raw reply

* [PATCH v5] qe: add ability to upload QE firmware
From: Timur Tabi @ 2007-12-07 15:43 UTC (permalink / raw)
  To: galak, linuxppc-dev; +Cc: Timur Tabi

Define the layout of a binary blob that contains a QE firmware and instructions
on how to upload it.  Add function qe_upload_firmware() to parse the blob
and perform the actual upload.  Fully define 'struct rsp' in immap_qe.h to
include the actual RISC Special Registers.  Added description of a new
QE firmware node to booting-without-of.txt.

Signed-off-by: Timur Tabi <timur@freescale.com>
---

Argh, another booting-without-of.txt fix.  There are 8 virtual traps, not 16.

This patch is for Kumar's for-2.6.25 branch.  This code is necessary for
my QE UART driver.

 Documentation/powerpc/00-INDEX               |    3 +
 Documentation/powerpc/booting-without-of.txt |   33 +++-
 Documentation/powerpc/qe_firmware.txt        |  295 ++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig               |    1 +
 arch/powerpc/sysdev/qe_lib/qe.c              |  240 +++++++++++++++++++++
 include/asm-powerpc/immap_qe.h               |   34 +++-
 include/asm-powerpc/qe.h                     |   61 ++++++
 7 files changed, 663 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/powerpc/qe_firmware.txt

diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
index 94a3c57..3be84aa 100644
--- a/Documentation/powerpc/00-INDEX
+++ b/Documentation/powerpc/00-INDEX
@@ -28,3 +28,6 @@ sound.txt
 	- info on sound support under Linux/PPC
 zImage_layout.txt
 	- info on the kernel images for Linux/PPC
+qe_firmware.txt
+	- describes the layout of firmware binaries for the Freescale QUICC
+	  Engine and the code that parses and uploads the microcode therein.
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index e9a3cb1..8b27711 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -52,7 +52,10 @@ Table of Contents
       i) Freescale QUICC Engine module (QE)
       j) CFI or JEDEC memory-mapped NOR flash
       k) Global Utilities Block
-      l) Xilinx IP cores
+      l) Freescale Communications Processor Module
+      m) Chipselect/Local Bus
+      n) 4xx/Axon EMAC ethernet nodes
+      o) Xilinx IP cores
 
   VII - Specifying interrupt information for devices
     1) interrupts property
@@ -1772,6 +1775,32 @@ platforms are moved over to use the flattened-device-tree model.
 		};
 	};
 
+   viii) Uploaded QE firmware
+
+	 If a new firwmare has been uploaded to the QE (usually by the
+	 boot loader), then a 'firmware' child node should be added to the QE
+	 node.  This node provides information on the uploaded firmware that
+	 device drivers may need.
+
+	 Required properties:
+	 - id: The string name of the firmware.  This is taken from the 'id'
+	       member of the qe_firmware structure of the uploaded firmware.
+	       Device drivers can search this string to determine if the
+	       firmware they want is already present.
+	 - extended_modes: The Extended Modes bitfield, taken from the
+			   firmware binary.  It is a 64-bit number represented
+			   as an array of two 32-bit numbers.
+	 - virtual_traps: The virtual traps, taken from the firmware binary.
+			  It is an array of 8 32-bit numbers.
+
+	 Example:
+
+		firmware {
+			id = "Soft-UART";
+			extended_modes = <0 0>;
+			virtual_traps = <0 0 0 0 0 0 0 0>;
+		}
+
    j) CFI or JEDEC memory-mapped NOR flash
 
     Flash chips (Memory Technology Devices) are often used for solid state
@@ -2254,7 +2283,7 @@ platforms are moved over to use the flattened-device-tree model.
 			   available.
 			   For Axon: 0x0000012a
 
-   l) Xilinx IP cores
+   o) Xilinx IP cores
 
    The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
    in Xilinx Spartan and Virtex FPGAs.  The devices cover the whole range
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
new file mode 100644
index 0000000..8962664
--- /dev/null
+++ b/Documentation/powerpc/qe_firmware.txt
@@ -0,0 +1,295 @@
+	   Freescale QUICC Engine Firmware Uploading
+	   -----------------------------------------
+
+(c) 2007 Timur Tabi <timur at freescale.com>,
+    Freescale Semiconductor
+
+Table of Contents
+=================
+
+  I - Software License for Firmware
+
+  II - Microcode Availability
+
+  III - Description and Terminology
+
+  IV - Microcode Programming Details
+
+  V - Firmware Structure Layout
+
+  VI - Sample Code for Creating Firmware Files
+
+Revision Information
+====================
+
+November 30, 2007: Rev 1.0 - Initial version
+
+I - Software License for Firmware
+=================================
+
+Each firmware file comes with its own software license.  For information on
+the particular license, please see the license text that is distributed with
+the firmware.
+
+II - Microcode Availability
+===========================
+
+Firmware files are distributed through various channels.  Some are available on
+http://opensource.freescale.com.  For other firmware files, please contact
+your Freescale representative or your operating system vendor.
+
+III - Description and Terminology
+================================
+
+In this document, the term 'microcode' refers to the sequence of 32-bit
+integers that compose the actual QE microcode.
+
+The term 'firmware' refers to a binary blob that contains the microcode as
+well as other data that
+
+	1) describes the microcode's purpose
+	2) describes how and where to upload the microcode
+	3) specifies the values of various registers
+	4) includes additional data for use by specific device drivers
+
+Firmware files are binary files that contain only a firmware.
+
+IV - Microcode Programming Details
+===================================
+
+The QE architecture allows for only one microcode present in I-RAM for each
+RISC processor.  To replace any current microcode, a full QE reset (which
+disables the microcode) must be performed first.
+
+QE microcode is uploaded using the following procedure:
+
+1) The microcode is placed into I-RAM at a specific location, using the
+   IRAM.IADD and IRAM.IDATA registers.
+
+2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
+   needs split I-RAM.  Split I-RAM is only meaningful for SOCs that have
+   QEs with multiple RISC processors, such as the 8360.  Splitting the I-RAM
+   allows each processor to run a different microcode, effectively creating an
+   asymmetric multiprocessing (AMP) system.
+
+3) The TIBCR trap registers are loaded with the addresses of the trap handlers
+   in the microcode.
+
+4) The RSP.ECCR register is programmed with the value provided.
+
+5) If necessary, device drivers that need the virtual traps and extended mode
+   data will use them.
+
+Virtual Microcode Traps
+
+These virtual traps are conditional branches in the microcode.  These are
+"soft" provisional introduced in the ROMcode in order to enable higher
+flexibility and save h/w traps If new features are activated or an issue is
+being fixed in the RAM package utilizing they should be activated.  This data
+structure signals the microcode which of these virtual traps is active.
+
+This structure contains 6 words that the application should copy to some
+specific been defined.  This table describes the structure.
+
+	---------------------------------------------------------------
+	| Offset in |                  | Destination Offset | Size of |
+	|   array   |     Protocol     |   within PRAM      | Operand |
+	--------------------------------------------------------------|
+	|     0     | Ethernet         |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     4     | ATM              |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     8     | PPP              |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     12    | Ethernet RX      |      0x22          | 1 byte  |
+	|           | Distributor Page |                    |         |
+	---------------------------------------------------------------
+	|     16    | ATM Globtal      |      0x28          | 1 byte  |
+	|           | Params Table     |                    |         |
+	---------------------------------------------------------------
+	|     20    | Insert Frame     |      0xF8          | 4 bytes |
+	---------------------------------------------------------------
+
+
+Extended Modes
+
+This is a double word bit array (64 bits) that defines special functionality
+which has an impact on the softwarew drivers.  Each bit has its own impact
+and has special instructions for the s/w associated with it.  This structure is
+described in this table:
+
+	-----------------------------------------------------------------------
+	| Bit #  |     Name     |   Description                               |
+	-----------------------------------------------------------------------
+	|   0    | General      | Indicates that prior to each host command   |
+	|        | push command | given by the application, the software must |
+	|        |              | assert a special host command (push command)|
+	|        |              | CECDR = 0x00800000.                         |
+	|        |              | CECR = 0x01c1000f.                          |
+	-----------------------------------------------------------------------
+	|   1    | UCC ATM      | Indicates that after issuing ATM RX INIT    |
+	|        | RX INIT      | command, the host must issue another special|
+	|        | push command | command (push command) and immediately      |
+	|        |              | following that re-issue the ATM RX INIT     |
+	|        |              | command. (This makes the sequence of        |
+	|        |              | initializing the ATM receiver a sequence of |
+	|        |              | three host commands)                        |
+	|        |              | CECDR = 0x00800000.                         |
+	|        |              | CECR = 0x01c1000f.                          |
+	-----------------------------------------------------------------------
+	|   2    | Add/remove   | Indicates that following the specific host  |
+	|        | command      | command: "Add/Remove entry in Hash Lookup   |
+	|        | validation   | Table" used in Interworking setup, the user |
+	|        |              | must issue another command.                 |
+	|        |              | CECDR = 0xce000003.                         |
+	|        |              | CECR = 0x01c10f58.                          |
+	-----------------------------------------------------------------------
+	|   3    | General push | Indicates that the s/w has to initialize    |
+	|        | command      | some pointers in the Ethernet thread pages  |
+	|        |              | which are used when Header Compression is   |
+	|        |              | activated.  The full details of these       |
+	|        |              | pointers is located in the software drivers.|
+	-----------------------------------------------------------------------
+	|   4    | General push | Indicates that after issuing Ethernet TX    |
+	|        | command      | INIT command, user must issue this command  |
+	|        |              | for each SNUM of Ethernet TX thread.        |
+	|        |              | CECDR = 0x00800003.                         |
+	|        |              | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM},  |
+	|        |              |        1'b{1}, 12'b{0}, 4'b{1}              |
+	-----------------------------------------------------------------------
+	| 5 - 31 |     N/A      | Reserved, set to zero.                      |
+	-----------------------------------------------------------------------
+
+V - Firmware Structure Layout
+==============================
+
+QE microcode from Freescale is typically provided as a header file.  This
+header file contains macros that define the microcode binary itself as well as
+some other data used in uploading that microcode.  The format of these files
+do not lend themselves to simple inclusion into other code.  Hence,
+the need for a more portable format.  This section defines that format.
+
+Instead of distributing a header file, the microcode and related data are
+embedded into a binary blob.  This blob is passed to the qe_upload_firmware()
+function, which parses the blob and performs everything necessary to upload
+the microcode.
+
+All integers are big-endian.  See the comments for function
+qe_upload_firmware() for up-to-date implementation information.
+
+This structure supports versioning, where the version of the structure is
+embedded into the structure itself.  To ensure forward and backwards
+compatibility, all versions of the structure must use the same 'qe_header'
+structure at the beginning.
+
+'header' (type: struct qe_header):
+	The 'length' field is the size, in bytes, of the entire structure,
+	including all the microcode embedded in it, as well as the CRC (if
+	present).
+
+	The 'magic' field is an array of three bytes that contains the letters
+	'Q', 'E', and 'F'.  This is an identifier that indicates that this
+	structure is a QE Firmware structure.
+
+	The 'version' field is a single byte that indicates the version of this
+	structure.  If the layout of the structure should ever need to be
+	changed to add support for additional types of microcode, then the
+	version number should also be changed.
+
+The 'id' field is a null-terminated string(suitable for printing) that
+identifies the firmware.
+
+The 'count' field indicates the number of 'microcode' structures.  There
+must be one and only one 'microcode' structure for each RISC processor.
+Therefore, this field also represents the number of RISC processors for this
+SOC.
+
+The 'soc' structure contains the SOC numbers and revisions used to match
+the microcode to the SOC itself.  Normally, the microcode loader should
+check the data in this structure with the SOC number and revisions, and
+only upload the microcode if there's a match.  However, this check is not
+made on all platforms.
+
+Although it is not recommended, you can specify '0' in the soc.model
+field to skip matching SOCs altogether.
+
+The 'model' field is a 16-bit number that matches the actual SOC. The
+'major' and 'minor' fields are the major and minor revision numbrs,
+respectively, of the SOC.
+
+For example, to match the 8323, revision 1.0:
+     soc.model = 8323
+     soc.major = 1
+     soc.minor = 0
+
+'padding' is neccessary for structure alignment.  This field ensures that the
+'extended_modes' field is aligned on a 64-bit boundary.
+
+'extended_modes' is a bitfield that defines special functionality which has an
+impact on the device drivers.  Each bit has its own impact and has special
+instructions for the driver associated with it.  This field is stored in
+the QE library and available to any driver that calles qe_get_firmware_info().
+
+'vtraps' is an array of 8 words that contain virtual trap values for each
+virtual traps.  As with 'extended_modes', this field is stored in the QE
+library and available to any driver that calles qe_get_firmware_info().
+
+'microcode' (type: struct qe_microcode):
+	For each RISC processor there is one 'microcode' structure.  The first
+	'microcode' structure is for the first RISC, and so on.
+
+	The 'id' field is a null-terminated string suitable for printing that
+	identifies this particular microcode.
+
+	'traps' is an array of 16 words that contain hardware trap values
+	for each of the 16 traps.  If trap[i] is 0, then this particular
+	trap is to be ignored (i.e. not written to TIBCR[i]).  The entire value
+	is written as-is to the TIBCR[i] register, so be sure to set the EN
+	and T_IBP bits if necessary.
+
+	'eccr' is the value to program into the ECCR register.
+
+	'iram_offset' is the offset into IRAM to start writing the
+	microcode.
+
+	'count' is the number of 32-bit words in the microcode.
+
+	'code_offset' is the offset, in bytes, from the beginning of this
+	structure where the microcode itself can be found.  The first
+	microcode binary should be located immediately after the 'microcode'
+	array.
+
+	'major', 'minor', and 'revision' are the major, minor, and revision
+	version numbers, respectively, of the microcode.  If all values are 0,
+	then these fields are ignored.
+
+	'reserved' is necessary for structure alignment.  Since 'microcode'
+	is an array, the 64-bit 'extended_modes' field needs to be aligned
+	on a 64-bit boundary, and this can only happen if the size of
+	'microcode' is a multiple of 8 bytes.  To ensure that, we add
+	'reserved'.
+
+After the last microcode is a 32-bit CRC.  It can be calculated using
+this algorithm:
+
+u32 crc32(const u8 *p, unsigned int len)
+{
+	unsigned int i;
+	u32 crc = 0;
+
+	while (len--) {
+	   crc ^= *p++;
+	   for (i = 0; i < 8; i++)
+		   crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+	}
+	return crc;
+}
+
+VI - Sample Code for Creating Firmware Files
+============================================
+
+A Python program that creates firmware binaries from the header files normally
+distributed by Freescale can be found on http://opensource.freescale.com.
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index ea22cad..18f101b 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -265,6 +265,7 @@ config TAU_AVERAGE
 config QUICC_ENGINE
 	bool
 	select PPC_LIB_RHEAP
+	select CRC32
 	help
 	  The QUICC Engine (QE) is a new generation of communications
 	  coprocessors on Freescale embedded CPUs (akin to CPM in older chips).
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 1df3b4a..497eb88 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
+#include <linux/crc32.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -362,3 +363,242 @@ void *qe_muram_addr(unsigned long offset)
 	return (void *)&qe_immr->muram[offset];
 }
 EXPORT_SYMBOL(qe_muram_addr);
+
+/* The maximum number of RISCs we support */
+#define MAX_QE_RISC     2
+
+/* Firmware information stored here for qe_get_firmware_info() */
+static struct qe_firmware_info qe_firmware_info;
+
+/*
+ * Set to 1 if QE firmware has been uploaded, and therefore
+ * qe_firmware_info contains valid data.
+ */
+static int qe_firmware_uploaded;
+
+/*
+ * Upload a QE microcode
+ *
+ * This function is a worker function for qe_upload_firmware().  It does
+ * the actual uploading of the microcode.
+ */
+static void qe_upload_microcode(const void *base,
+	const struct qe_microcode *ucode)
+{
+	const __be32 *code = base + be32_to_cpu(ucode->code_offset);
+	unsigned int i;
+
+	if (ucode->major || ucode->minor || ucode->revision)
+		printk(KERN_INFO "qe-firmware: "
+			"uploading microcode '%s' version %u.%u.%u\n",
+			ucode->id, ucode->major, ucode->minor, ucode->revision);
+	else
+		printk(KERN_INFO "qe-firmware: "
+			"uploading microcode '%s'\n", ucode->id);
+
+	/* Use auto-increment */
+	out_be32(&qe_immr->iram.iadd, be32_to_cpu(ucode->iram_offset) |
+		QE_IRAM_IADD_AIE | QE_IRAM_IADD_BADDR);
+
+	for (i = 0; i < be32_to_cpu(ucode->count); i++)
+		out_be32(&qe_immr->iram.idata, be32_to_cpu(code[i]));
+}
+
+/*
+ * Upload a microcode to the I-RAM at a specific address.
+ *
+ * See Documentation/powerpc/qe-firmware.txt for information on QE microcode
+ * uploading.
+ *
+ * Currently, only version 1 is supported, so the 'version' field must be
+ * set to 1.
+ *
+ * The SOC model and revision are not validated, they are only displayed for
+ * informational purposes.
+ *
+ * 'calc_size' is the calculated size, in bytes, of the firmware structure and
+ * all of the microcode structures, minus the CRC.
+ *
+ * 'length' is the size that the structure says it is, including the CRC.
+ */
+int qe_upload_firmware(const struct qe_firmware *firmware)
+{
+	unsigned int i;
+	unsigned int j;
+	u32 crc;
+	size_t calc_size = sizeof(struct qe_firmware);
+	size_t length;
+	const struct qe_header *hdr;
+
+	if (!firmware) {
+		printk(KERN_ERR "qe-firmware: invalid pointer\n");
+		return -EINVAL;
+	}
+
+	hdr = &firmware->header;
+	length = be32_to_cpu(hdr->length);
+
+	/* Check the magic */
+	if ((hdr->magic[0] != 'Q') || (hdr->magic[1] != 'E') ||
+	    (hdr->magic[2] != 'F')) {
+		printk(KERN_ERR "qe-firmware: not a microcode\n");
+		return -EPERM;
+	}
+
+	/* Check the version */
+	if (hdr->version != 1) {
+		printk(KERN_ERR "qe-firmware: unsupported version\n");
+		return -EPERM;
+	}
+
+	/* Validate some of the fields */
+	if ((firmware->count < 1) || (firmware->count >= MAX_QE_RISC)) {
+		printk(KERN_ERR "qe-firmware: invalid data\n");
+		return -EINVAL;
+	}
+
+	/* Validate the length and check if there's a CRC */
+	calc_size += (firmware->count - 1) * sizeof(struct qe_microcode);
+
+	for (i = 0; i < firmware->count; i++)
+		/*
+		 * For situations where the second RISC uses the same microcode
+		 * as the first, the 'code_offset' and 'count' fields will be
+		 * zero, so it's okay to add those.
+		 */
+		calc_size += sizeof(__be32) *
+			be32_to_cpu(firmware->microcode[i].count);
+
+	/* Validate the length */
+	if (length != calc_size + sizeof(__be32)) {
+		printk(KERN_ERR "qe-firmware: invalid length\n");
+		return -EPERM;
+	}
+
+	/* Validate the CRC */
+	crc = be32_to_cpu(*(__be32 *)((void *)firmware + calc_size));
+	if (crc != crc32(0, firmware, calc_size)) {
+		printk(KERN_ERR "qe-firmware: firmware CRC is invalid\n");
+		return -EIO;
+	}
+
+	/*
+	 * If the microcode calls for it, split the I-RAM.
+	 */
+	if (!firmware->split)
+		setbits16(&qe_immr->cp.cercr, QE_CP_CERCR_CIR);
+
+	if (firmware->soc.model)
+		printk(KERN_INFO
+			"qe-firmware: firmware '%s' for %u V%u.%u\n",
+			firmware->id, be16_to_cpu(firmware->soc.model),
+			firmware->soc.major, firmware->soc.minor);
+	else
+		printk(KERN_INFO "qe-firmware: firmware '%s'\n",
+			firmware->id);
+
+	/*
+	 * The QE only supports one microcode per RISC, so clear out all the
+	 * saved microcode information and put in the new.
+	 */
+	memset(&qe_firmware_info, 0, sizeof(qe_firmware_info));
+	strcpy(qe_firmware_info.id, firmware->id);
+	qe_firmware_info.extended_modes = firmware->extended_modes;
+	memcpy(qe_firmware_info.vtraps, firmware->vtraps,
+		sizeof(firmware->vtraps));
+
+	/* Loop through each microcode. */
+	for (i = 0; i < firmware->count; i++) {
+		const struct qe_microcode *ucode = &firmware->microcode[i];
+
+		/* Upload a microcode if it's present */
+		if (ucode->code_offset)
+			qe_upload_microcode(firmware, ucode);
+
+		/* Program the traps for this processor */
+		for (j = 0; j < 16; j++) {
+			u32 trap = be32_to_cpu(ucode->traps[j]);
+
+			if (trap)
+				out_be32(&qe_immr->rsp[i].tibcr[j], trap);
+		}
+
+		/* Enable traps */
+		out_be32(&qe_immr->rsp[i].eccr, be32_to_cpu(ucode->eccr));
+	}
+
+	qe_firmware_uploaded = 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(qe_upload_firmware);
+
+/*
+ * Get info on the currently-loaded firmware
+ *
+ * This function also checks the device tree to see if the boot loader has
+ * uploaded a firmware already.
+ */
+struct qe_firmware_info *qe_get_firmware_info(void)
+{
+	static int initialized;
+
+	/*
+	 * If we haven't checked yet, and a driver hasn't uploaded a firmware
+	 * yet, then check the device tree for information.
+	 */
+	do {
+		struct device_node *qe;
+		struct device_node *fw = NULL;
+		const char *sprop;
+		const u32 *iprop;
+
+		if (initialized || qe_firmware_uploaded)
+			break;
+
+		initialized = 1;
+
+		qe = of_find_node_by_type(NULL, "qe");
+		if (!qe)
+			break;
+
+		/* Find the 'firmware' child node */
+		while ((fw = of_get_next_child(qe, fw)))
+			if (strcmp(fw->name, "firmware") == 0)
+				break;
+
+		/* Did we find the 'firmware' node? */
+		if (!fw) {
+			of_node_put(qe);
+			break;
+		}
+
+		qe_firmware_uploaded = 1;
+
+		/* Copy the data into qe_firmware_info*/
+		sprop = of_get_property(fw, "id", NULL);
+		if (sprop)
+			strncpy(qe_firmware_info.id, sprop,
+				sizeof(qe_firmware_info.id) - 1);
+
+		iprop = of_get_property(fw, "extended_modes", NULL);
+		if (iprop)
+			qe_firmware_info.extended_modes =
+				(u64) iprop[0] << 32 | iprop[1];
+
+		iprop = of_get_property(fw, "virtual_traps", NULL);
+		if (iprop) {
+			unsigned int i = 0;
+
+			for (; i < ARRAY_SIZE(qe_firmware_info.vtraps); i++)
+				qe_firmware_info.vtraps[i] = iprop[i];
+		}
+
+		of_node_put(fw);
+		of_node_put(qe);
+	} while (0);
+
+	return qe_firmware_uploaded ? &qe_firmware_info : NULL;
+}
+EXPORT_SYMBOL(qe_get_firmware_info);
+
diff --git a/include/asm-powerpc/immap_qe.h b/include/asm-powerpc/immap_qe.h
index aba9806..82a4526 100644
--- a/include/asm-powerpc/immap_qe.h
+++ b/include/asm-powerpc/immap_qe.h
@@ -393,9 +393,39 @@ struct dbg {
 	u8	res2[0x48];
 } __attribute__ ((packed));
 
-/* RISC Special Registers (Trap and Breakpoint) */
+/*
+ * RISC Special Registers (Trap and Breakpoint).  These are described in
+ * the QE Developer's Handbook.
+ */
 struct rsp {
-	u32	reg[0x40];	/* 64 32-bit registers */
+	__be32 tibcr[16];	/* Trap/instruction breakpoint control regs */
+	u8 res0[64];
+	__be32 ibcr0;
+	__be32 ibs0;
+	__be32 ibcnr0;
+	u8 res1[4];
+	__be32 ibcr1;
+	__be32 ibs1;
+	__be32 ibcnr1;
+	__be32 npcr;
+	__be32 dbcr;
+	__be32 dbar;
+	__be32 dbamr;
+	__be32 dbsr;
+	__be32 dbcnr;
+	u8 res2[12];
+	__be32 dbdr_h;
+	__be32 dbdr_l;
+	__be32 dbdmr_h;
+	__be32 dbdmr_l;
+	__be32 bsr;
+	__be32 bor;
+	__be32 bior;
+	u8 res3[4];
+	__be32 iatr[4];
+	__be32 eccr;		/* Exception control configuration register */
+	__be32 eicr;
+	u8 res4[0x100-0xf8];
 } __attribute__ ((packed));
 
 struct qe_immap {
diff --git a/include/asm-powerpc/qe.h b/include/asm-powerpc/qe.h
index bcf60be..35c7b8d 100644
--- a/include/asm-powerpc/qe.h
+++ b/include/asm-powerpc/qe.h
@@ -93,6 +93,58 @@ unsigned long qe_muram_alloc_fixed(unsigned long offset, int size);
 void qe_muram_dump(void);
 void *qe_muram_addr(unsigned long offset);
 
+/* Structure that defines QE firmware binary files.
+ *
+ * See Documentation/powerpc/qe-firmware.txt for a description of these
+ * fields.
+ */
+struct qe_firmware {
+	struct qe_header {
+		__be32 length;  /* Length of the entire structure, in bytes */
+		u8 magic[3];    /* Set to { 'Q', 'E', 'F' } */
+		u8 version;     /* Version of this layout. First ver is '1' */
+	} header;
+	u8 id[62];      /* Null-terminated identifier string */
+	u8 split;	/* 0 = shared I-RAM, 1 = split I-RAM */
+	u8 count;       /* Number of microcode[] structures */
+	struct {
+		__be16 model;   	/* The SOC model  */
+		u8 major;       	/* The SOC revision major */
+		u8 minor;       	/* The SOC revision minor */
+	} __attribute__ ((packed)) soc;
+	u8 padding[4];			/* Reserved, for alignment */
+	__be64 extended_modes;		/* Extended modes */
+	__be32 vtraps[8];		/* Virtual trap addresses */
+	u8 reserved[4];			/* Reserved, for future expansion */
+	struct qe_microcode {
+		u8 id[32];      	/* Null-terminated identifier */
+		__be32 traps[16];       /* Trap addresses, 0 == ignore */
+		__be32 eccr;    	/* The value for the ECCR register */
+		__be32 iram_offset;     /* Offset into I-RAM for the code */
+		__be32 count;   	/* Number of 32-bit words of the code */
+		__be32 code_offset;     /* Offset of the actual microcode */
+		u8 major;       	/* The microcode version major */
+		u8 minor;       	/* The microcode version minor */
+		u8 revision;		/* The microcode version revision */
+		u8 padding;		/* Reserved, for alignment */
+		u8 reserved[4];		/* Reserved, for future expansion */
+	} __attribute__ ((packed)) microcode[1];
+	/* All microcode binaries should be located here */
+	/* CRC32 should be located here, after the microcode binaries */
+} __attribute__ ((packed));
+
+struct qe_firmware_info {
+	char id[64];		/* Firmware name */
+	u32 vtraps[8];		/* Virtual trap addresses */
+	u64 extended_modes;	/* Extended modes */
+};
+
+/* Upload a firmware to the QE */
+int qe_upload_firmware(const struct qe_firmware *firmware);
+
+/* Obtain information on the uploaded firmware */
+struct qe_firmware_info *qe_get_firmware_info(void);
+
 /* Buffer descriptors */
 struct qe_bd {
 	__be16 status;
@@ -328,6 +380,15 @@ enum comm_dir {
 
 #define QE_SDEBCR_BA_MASK	0x01FFFFFF
 
+/* Communication Processor */
+#define QE_CP_CERCR_MEE		0x8000	/* Multi-user RAM ECC enable */
+#define QE_CP_CERCR_IEE		0x4000	/* Instruction RAM ECC enable */
+#define QE_CP_CERCR_CIR		0x0800	/* Common instruction RAM */
+
+/* I-RAM */
+#define QE_IRAM_IADD_AIE	0x80000000	/* Auto Increment Enable */
+#define QE_IRAM_IADD_BADDR	0x00080000	/* Base Address */
+
 /* UPC */
 #define UPGCR_PROTOCOL	0x80000000	/* protocol ul2 or pl2 */
 #define UPGCR_TMS	0x40000000	/* Transmit master/slave mode */
-- 
1.5.2.4

^ permalink raw reply related

* [PATCH v4] qe: add ability to upload QE firmware
From: Timur Tabi @ 2007-12-07 15:32 UTC (permalink / raw)
  To: galak, linuxppc-dev; +Cc: Timur Tabi

Define the layout of a binary blob that contains a QE firmware and instructions
on how to upload it.  Add function qe_upload_firmware() to parse the blob
and perform the actual upload.  Fully define 'struct rsp' in immap_qe.h to
include the actual RISC Special Registers.  Added description of a new
QE firmware node to booting-without-of.txt.

Signed-off-by: Timur Tabi <timur@freescale.com>
---

Fixed mismatch between code and documentation in booting-without-of.txt.

This patch is for Kumar's for-2.6.25 branch.  This code is necessary for
my QE UART driver.

 Documentation/powerpc/00-INDEX               |    3 +
 Documentation/powerpc/booting-without-of.txt |   33 +++-
 Documentation/powerpc/qe_firmware.txt        |  295 ++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig               |    1 +
 arch/powerpc/sysdev/qe_lib/qe.c              |  240 +++++++++++++++++++++
 include/asm-powerpc/immap_qe.h               |   34 +++-
 include/asm-powerpc/qe.h                     |   61 ++++++
 7 files changed, 663 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/powerpc/qe_firmware.txt

diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
index 94a3c57..3be84aa 100644
--- a/Documentation/powerpc/00-INDEX
+++ b/Documentation/powerpc/00-INDEX
@@ -28,3 +28,6 @@ sound.txt
 	- info on sound support under Linux/PPC
 zImage_layout.txt
 	- info on the kernel images for Linux/PPC
+qe_firmware.txt
+	- describes the layout of firmware binaries for the Freescale QUICC
+	  Engine and the code that parses and uploads the microcode therein.
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index e9a3cb1..6546b81 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -52,7 +52,10 @@ Table of Contents
       i) Freescale QUICC Engine module (QE)
       j) CFI or JEDEC memory-mapped NOR flash
       k) Global Utilities Block
-      l) Xilinx IP cores
+      l) Freescale Communications Processor Module
+      m) Chipselect/Local Bus
+      n) 4xx/Axon EMAC ethernet nodes
+      o) Xilinx IP cores
 
   VII - Specifying interrupt information for devices
     1) interrupts property
@@ -1772,6 +1775,32 @@ platforms are moved over to use the flattened-device-tree model.
 		};
 	};
 
+   viii) Uploaded QE firmware
+
+	 If a new firwmare has been uploaded to the QE (usually by the
+	 boot loader), then a 'firmware' child node should be added to the QE
+	 node.  This node provides information on the uploaded firmware that
+	 device drivers may need.
+
+	 Required properties:
+	 - id: The string name of the firmware.  This is taken from the 'id'
+	       member of the qe_firmware structure of the uploaded firmware.
+	       Device drivers can search this string to determine if the
+	       firmware they want is already present.
+	 - extended_modes: The Extended Modes bitfield, taken from the
+			   firmware binary.  It is a 64-bit number represented
+			   as an array of two 32-bit numbers.
+	 - virtual_traps: The virtual traps, taken from the firmware binary.
+			  It is an array of 16 32-bit numbers.
+
+	 Example:
+
+		firmware {
+			id = "Soft-UART";
+			extended_modes = <0 0>;
+			virtual_traps = <0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0>;
+		}
+
    j) CFI or JEDEC memory-mapped NOR flash
 
     Flash chips (Memory Technology Devices) are often used for solid state
@@ -2254,7 +2283,7 @@ platforms are moved over to use the flattened-device-tree model.
 			   available.
 			   For Axon: 0x0000012a
 
-   l) Xilinx IP cores
+   o) Xilinx IP cores
 
    The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
    in Xilinx Spartan and Virtex FPGAs.  The devices cover the whole range
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
new file mode 100644
index 0000000..8962664
--- /dev/null
+++ b/Documentation/powerpc/qe_firmware.txt
@@ -0,0 +1,295 @@
+	   Freescale QUICC Engine Firmware Uploading
+	   -----------------------------------------
+
+(c) 2007 Timur Tabi <timur at freescale.com>,
+    Freescale Semiconductor
+
+Table of Contents
+=================
+
+  I - Software License for Firmware
+
+  II - Microcode Availability
+
+  III - Description and Terminology
+
+  IV - Microcode Programming Details
+
+  V - Firmware Structure Layout
+
+  VI - Sample Code for Creating Firmware Files
+
+Revision Information
+====================
+
+November 30, 2007: Rev 1.0 - Initial version
+
+I - Software License for Firmware
+=================================
+
+Each firmware file comes with its own software license.  For information on
+the particular license, please see the license text that is distributed with
+the firmware.
+
+II - Microcode Availability
+===========================
+
+Firmware files are distributed through various channels.  Some are available on
+http://opensource.freescale.com.  For other firmware files, please contact
+your Freescale representative or your operating system vendor.
+
+III - Description and Terminology
+================================
+
+In this document, the term 'microcode' refers to the sequence of 32-bit
+integers that compose the actual QE microcode.
+
+The term 'firmware' refers to a binary blob that contains the microcode as
+well as other data that
+
+	1) describes the microcode's purpose
+	2) describes how and where to upload the microcode
+	3) specifies the values of various registers
+	4) includes additional data for use by specific device drivers
+
+Firmware files are binary files that contain only a firmware.
+
+IV - Microcode Programming Details
+===================================
+
+The QE architecture allows for only one microcode present in I-RAM for each
+RISC processor.  To replace any current microcode, a full QE reset (which
+disables the microcode) must be performed first.
+
+QE microcode is uploaded using the following procedure:
+
+1) The microcode is placed into I-RAM at a specific location, using the
+   IRAM.IADD and IRAM.IDATA registers.
+
+2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
+   needs split I-RAM.  Split I-RAM is only meaningful for SOCs that have
+   QEs with multiple RISC processors, such as the 8360.  Splitting the I-RAM
+   allows each processor to run a different microcode, effectively creating an
+   asymmetric multiprocessing (AMP) system.
+
+3) The TIBCR trap registers are loaded with the addresses of the trap handlers
+   in the microcode.
+
+4) The RSP.ECCR register is programmed with the value provided.
+
+5) If necessary, device drivers that need the virtual traps and extended mode
+   data will use them.
+
+Virtual Microcode Traps
+
+These virtual traps are conditional branches in the microcode.  These are
+"soft" provisional introduced in the ROMcode in order to enable higher
+flexibility and save h/w traps If new features are activated or an issue is
+being fixed in the RAM package utilizing they should be activated.  This data
+structure signals the microcode which of these virtual traps is active.
+
+This structure contains 6 words that the application should copy to some
+specific been defined.  This table describes the structure.
+
+	---------------------------------------------------------------
+	| Offset in |                  | Destination Offset | Size of |
+	|   array   |     Protocol     |   within PRAM      | Operand |
+	--------------------------------------------------------------|
+	|     0     | Ethernet         |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     4     | ATM              |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     8     | PPP              |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     12    | Ethernet RX      |      0x22          | 1 byte  |
+	|           | Distributor Page |                    |         |
+	---------------------------------------------------------------
+	|     16    | ATM Globtal      |      0x28          | 1 byte  |
+	|           | Params Table     |                    |         |
+	---------------------------------------------------------------
+	|     20    | Insert Frame     |      0xF8          | 4 bytes |
+	---------------------------------------------------------------
+
+
+Extended Modes
+
+This is a double word bit array (64 bits) that defines special functionality
+which has an impact on the softwarew drivers.  Each bit has its own impact
+and has special instructions for the s/w associated with it.  This structure is
+described in this table:
+
+	-----------------------------------------------------------------------
+	| Bit #  |     Name     |   Description                               |
+	-----------------------------------------------------------------------
+	|   0    | General      | Indicates that prior to each host command   |
+	|        | push command | given by the application, the software must |
+	|        |              | assert a special host command (push command)|
+	|        |              | CECDR = 0x00800000.                         |
+	|        |              | CECR = 0x01c1000f.                          |
+	-----------------------------------------------------------------------
+	|   1    | UCC ATM      | Indicates that after issuing ATM RX INIT    |
+	|        | RX INIT      | command, the host must issue another special|
+	|        | push command | command (push command) and immediately      |
+	|        |              | following that re-issue the ATM RX INIT     |
+	|        |              | command. (This makes the sequence of        |
+	|        |              | initializing the ATM receiver a sequence of |
+	|        |              | three host commands)                        |
+	|        |              | CECDR = 0x00800000.                         |
+	|        |              | CECR = 0x01c1000f.                          |
+	-----------------------------------------------------------------------
+	|   2    | Add/remove   | Indicates that following the specific host  |
+	|        | command      | command: "Add/Remove entry in Hash Lookup   |
+	|        | validation   | Table" used in Interworking setup, the user |
+	|        |              | must issue another command.                 |
+	|        |              | CECDR = 0xce000003.                         |
+	|        |              | CECR = 0x01c10f58.                          |
+	-----------------------------------------------------------------------
+	|   3    | General push | Indicates that the s/w has to initialize    |
+	|        | command      | some pointers in the Ethernet thread pages  |
+	|        |              | which are used when Header Compression is   |
+	|        |              | activated.  The full details of these       |
+	|        |              | pointers is located in the software drivers.|
+	-----------------------------------------------------------------------
+	|   4    | General push | Indicates that after issuing Ethernet TX    |
+	|        | command      | INIT command, user must issue this command  |
+	|        |              | for each SNUM of Ethernet TX thread.        |
+	|        |              | CECDR = 0x00800003.                         |
+	|        |              | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM},  |
+	|        |              |        1'b{1}, 12'b{0}, 4'b{1}              |
+	-----------------------------------------------------------------------
+	| 5 - 31 |     N/A      | Reserved, set to zero.                      |
+	-----------------------------------------------------------------------
+
+V - Firmware Structure Layout
+==============================
+
+QE microcode from Freescale is typically provided as a header file.  This
+header file contains macros that define the microcode binary itself as well as
+some other data used in uploading that microcode.  The format of these files
+do not lend themselves to simple inclusion into other code.  Hence,
+the need for a more portable format.  This section defines that format.
+
+Instead of distributing a header file, the microcode and related data are
+embedded into a binary blob.  This blob is passed to the qe_upload_firmware()
+function, which parses the blob and performs everything necessary to upload
+the microcode.
+
+All integers are big-endian.  See the comments for function
+qe_upload_firmware() for up-to-date implementation information.
+
+This structure supports versioning, where the version of the structure is
+embedded into the structure itself.  To ensure forward and backwards
+compatibility, all versions of the structure must use the same 'qe_header'
+structure at the beginning.
+
+'header' (type: struct qe_header):
+	The 'length' field is the size, in bytes, of the entire structure,
+	including all the microcode embedded in it, as well as the CRC (if
+	present).
+
+	The 'magic' field is an array of three bytes that contains the letters
+	'Q', 'E', and 'F'.  This is an identifier that indicates that this
+	structure is a QE Firmware structure.
+
+	The 'version' field is a single byte that indicates the version of this
+	structure.  If the layout of the structure should ever need to be
+	changed to add support for additional types of microcode, then the
+	version number should also be changed.
+
+The 'id' field is a null-terminated string(suitable for printing) that
+identifies the firmware.
+
+The 'count' field indicates the number of 'microcode' structures.  There
+must be one and only one 'microcode' structure for each RISC processor.
+Therefore, this field also represents the number of RISC processors for this
+SOC.
+
+The 'soc' structure contains the SOC numbers and revisions used to match
+the microcode to the SOC itself.  Normally, the microcode loader should
+check the data in this structure with the SOC number and revisions, and
+only upload the microcode if there's a match.  However, this check is not
+made on all platforms.
+
+Although it is not recommended, you can specify '0' in the soc.model
+field to skip matching SOCs altogether.
+
+The 'model' field is a 16-bit number that matches the actual SOC. The
+'major' and 'minor' fields are the major and minor revision numbrs,
+respectively, of the SOC.
+
+For example, to match the 8323, revision 1.0:
+     soc.model = 8323
+     soc.major = 1
+     soc.minor = 0
+
+'padding' is neccessary for structure alignment.  This field ensures that the
+'extended_modes' field is aligned on a 64-bit boundary.
+
+'extended_modes' is a bitfield that defines special functionality which has an
+impact on the device drivers.  Each bit has its own impact and has special
+instructions for the driver associated with it.  This field is stored in
+the QE library and available to any driver that calles qe_get_firmware_info().
+
+'vtraps' is an array of 8 words that contain virtual trap values for each
+virtual traps.  As with 'extended_modes', this field is stored in the QE
+library and available to any driver that calles qe_get_firmware_info().
+
+'microcode' (type: struct qe_microcode):
+	For each RISC processor there is one 'microcode' structure.  The first
+	'microcode' structure is for the first RISC, and so on.
+
+	The 'id' field is a null-terminated string suitable for printing that
+	identifies this particular microcode.
+
+	'traps' is an array of 16 words that contain hardware trap values
+	for each of the 16 traps.  If trap[i] is 0, then this particular
+	trap is to be ignored (i.e. not written to TIBCR[i]).  The entire value
+	is written as-is to the TIBCR[i] register, so be sure to set the EN
+	and T_IBP bits if necessary.
+
+	'eccr' is the value to program into the ECCR register.
+
+	'iram_offset' is the offset into IRAM to start writing the
+	microcode.
+
+	'count' is the number of 32-bit words in the microcode.
+
+	'code_offset' is the offset, in bytes, from the beginning of this
+	structure where the microcode itself can be found.  The first
+	microcode binary should be located immediately after the 'microcode'
+	array.
+
+	'major', 'minor', and 'revision' are the major, minor, and revision
+	version numbers, respectively, of the microcode.  If all values are 0,
+	then these fields are ignored.
+
+	'reserved' is necessary for structure alignment.  Since 'microcode'
+	is an array, the 64-bit 'extended_modes' field needs to be aligned
+	on a 64-bit boundary, and this can only happen if the size of
+	'microcode' is a multiple of 8 bytes.  To ensure that, we add
+	'reserved'.
+
+After the last microcode is a 32-bit CRC.  It can be calculated using
+this algorithm:
+
+u32 crc32(const u8 *p, unsigned int len)
+{
+	unsigned int i;
+	u32 crc = 0;
+
+	while (len--) {
+	   crc ^= *p++;
+	   for (i = 0; i < 8; i++)
+		   crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+	}
+	return crc;
+}
+
+VI - Sample Code for Creating Firmware Files
+============================================
+
+A Python program that creates firmware binaries from the header files normally
+distributed by Freescale can be found on http://opensource.freescale.com.
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index ea22cad..18f101b 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -265,6 +265,7 @@ config TAU_AVERAGE
 config QUICC_ENGINE
 	bool
 	select PPC_LIB_RHEAP
+	select CRC32
 	help
 	  The QUICC Engine (QE) is a new generation of communications
 	  coprocessors on Freescale embedded CPUs (akin to CPM in older chips).
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 1df3b4a..497eb88 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
+#include <linux/crc32.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -362,3 +363,242 @@ void *qe_muram_addr(unsigned long offset)
 	return (void *)&qe_immr->muram[offset];
 }
 EXPORT_SYMBOL(qe_muram_addr);
+
+/* The maximum number of RISCs we support */
+#define MAX_QE_RISC     2
+
+/* Firmware information stored here for qe_get_firmware_info() */
+static struct qe_firmware_info qe_firmware_info;
+
+/*
+ * Set to 1 if QE firmware has been uploaded, and therefore
+ * qe_firmware_info contains valid data.
+ */
+static int qe_firmware_uploaded;
+
+/*
+ * Upload a QE microcode
+ *
+ * This function is a worker function for qe_upload_firmware().  It does
+ * the actual uploading of the microcode.
+ */
+static void qe_upload_microcode(const void *base,
+	const struct qe_microcode *ucode)
+{
+	const __be32 *code = base + be32_to_cpu(ucode->code_offset);
+	unsigned int i;
+
+	if (ucode->major || ucode->minor || ucode->revision)
+		printk(KERN_INFO "qe-firmware: "
+			"uploading microcode '%s' version %u.%u.%u\n",
+			ucode->id, ucode->major, ucode->minor, ucode->revision);
+	else
+		printk(KERN_INFO "qe-firmware: "
+			"uploading microcode '%s'\n", ucode->id);
+
+	/* Use auto-increment */
+	out_be32(&qe_immr->iram.iadd, be32_to_cpu(ucode->iram_offset) |
+		QE_IRAM_IADD_AIE | QE_IRAM_IADD_BADDR);
+
+	for (i = 0; i < be32_to_cpu(ucode->count); i++)
+		out_be32(&qe_immr->iram.idata, be32_to_cpu(code[i]));
+}
+
+/*
+ * Upload a microcode to the I-RAM at a specific address.
+ *
+ * See Documentation/powerpc/qe-firmware.txt for information on QE microcode
+ * uploading.
+ *
+ * Currently, only version 1 is supported, so the 'version' field must be
+ * set to 1.
+ *
+ * The SOC model and revision are not validated, they are only displayed for
+ * informational purposes.
+ *
+ * 'calc_size' is the calculated size, in bytes, of the firmware structure and
+ * all of the microcode structures, minus the CRC.
+ *
+ * 'length' is the size that the structure says it is, including the CRC.
+ */
+int qe_upload_firmware(const struct qe_firmware *firmware)
+{
+	unsigned int i;
+	unsigned int j;
+	u32 crc;
+	size_t calc_size = sizeof(struct qe_firmware);
+	size_t length;
+	const struct qe_header *hdr;
+
+	if (!firmware) {
+		printk(KERN_ERR "qe-firmware: invalid pointer\n");
+		return -EINVAL;
+	}
+
+	hdr = &firmware->header;
+	length = be32_to_cpu(hdr->length);
+
+	/* Check the magic */
+	if ((hdr->magic[0] != 'Q') || (hdr->magic[1] != 'E') ||
+	    (hdr->magic[2] != 'F')) {
+		printk(KERN_ERR "qe-firmware: not a microcode\n");
+		return -EPERM;
+	}
+
+	/* Check the version */
+	if (hdr->version != 1) {
+		printk(KERN_ERR "qe-firmware: unsupported version\n");
+		return -EPERM;
+	}
+
+	/* Validate some of the fields */
+	if ((firmware->count < 1) || (firmware->count >= MAX_QE_RISC)) {
+		printk(KERN_ERR "qe-firmware: invalid data\n");
+		return -EINVAL;
+	}
+
+	/* Validate the length and check if there's a CRC */
+	calc_size += (firmware->count - 1) * sizeof(struct qe_microcode);
+
+	for (i = 0; i < firmware->count; i++)
+		/*
+		 * For situations where the second RISC uses the same microcode
+		 * as the first, the 'code_offset' and 'count' fields will be
+		 * zero, so it's okay to add those.
+		 */
+		calc_size += sizeof(__be32) *
+			be32_to_cpu(firmware->microcode[i].count);
+
+	/* Validate the length */
+	if (length != calc_size + sizeof(__be32)) {
+		printk(KERN_ERR "qe-firmware: invalid length\n");
+		return -EPERM;
+	}
+
+	/* Validate the CRC */
+	crc = be32_to_cpu(*(__be32 *)((void *)firmware + calc_size));
+	if (crc != crc32(0, firmware, calc_size)) {
+		printk(KERN_ERR "qe-firmware: firmware CRC is invalid\n");
+		return -EIO;
+	}
+
+	/*
+	 * If the microcode calls for it, split the I-RAM.
+	 */
+	if (!firmware->split)
+		setbits16(&qe_immr->cp.cercr, QE_CP_CERCR_CIR);
+
+	if (firmware->soc.model)
+		printk(KERN_INFO
+			"qe-firmware: firmware '%s' for %u V%u.%u\n",
+			firmware->id, be16_to_cpu(firmware->soc.model),
+			firmware->soc.major, firmware->soc.minor);
+	else
+		printk(KERN_INFO "qe-firmware: firmware '%s'\n",
+			firmware->id);
+
+	/*
+	 * The QE only supports one microcode per RISC, so clear out all the
+	 * saved microcode information and put in the new.
+	 */
+	memset(&qe_firmware_info, 0, sizeof(qe_firmware_info));
+	strcpy(qe_firmware_info.id, firmware->id);
+	qe_firmware_info.extended_modes = firmware->extended_modes;
+	memcpy(qe_firmware_info.vtraps, firmware->vtraps,
+		sizeof(firmware->vtraps));
+
+	/* Loop through each microcode. */
+	for (i = 0; i < firmware->count; i++) {
+		const struct qe_microcode *ucode = &firmware->microcode[i];
+
+		/* Upload a microcode if it's present */
+		if (ucode->code_offset)
+			qe_upload_microcode(firmware, ucode);
+
+		/* Program the traps for this processor */
+		for (j = 0; j < 16; j++) {
+			u32 trap = be32_to_cpu(ucode->traps[j]);
+
+			if (trap)
+				out_be32(&qe_immr->rsp[i].tibcr[j], trap);
+		}
+
+		/* Enable traps */
+		out_be32(&qe_immr->rsp[i].eccr, be32_to_cpu(ucode->eccr));
+	}
+
+	qe_firmware_uploaded = 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(qe_upload_firmware);
+
+/*
+ * Get info on the currently-loaded firmware
+ *
+ * This function also checks the device tree to see if the boot loader has
+ * uploaded a firmware already.
+ */
+struct qe_firmware_info *qe_get_firmware_info(void)
+{
+	static int initialized;
+
+	/*
+	 * If we haven't checked yet, and a driver hasn't uploaded a firmware
+	 * yet, then check the device tree for information.
+	 */
+	do {
+		struct device_node *qe;
+		struct device_node *fw = NULL;
+		const char *sprop;
+		const u32 *iprop;
+
+		if (initialized || qe_firmware_uploaded)
+			break;
+
+		initialized = 1;
+
+		qe = of_find_node_by_type(NULL, "qe");
+		if (!qe)
+			break;
+
+		/* Find the 'firmware' child node */
+		while ((fw = of_get_next_child(qe, fw)))
+			if (strcmp(fw->name, "firmware") == 0)
+				break;
+
+		/* Did we find the 'firmware' node? */
+		if (!fw) {
+			of_node_put(qe);
+			break;
+		}
+
+		qe_firmware_uploaded = 1;
+
+		/* Copy the data into qe_firmware_info*/
+		sprop = of_get_property(fw, "id", NULL);
+		if (sprop)
+			strncpy(qe_firmware_info.id, sprop,
+				sizeof(qe_firmware_info.id) - 1);
+
+		iprop = of_get_property(fw, "extended_modes", NULL);
+		if (iprop)
+			qe_firmware_info.extended_modes =
+				(u64) iprop[0] << 32 | iprop[1];
+
+		iprop = of_get_property(fw, "virtual_traps", NULL);
+		if (iprop) {
+			unsigned int i = 0;
+
+			for (; i < ARRAY_SIZE(qe_firmware_info.vtraps); i++)
+				qe_firmware_info.vtraps[i] = iprop[i];
+		}
+
+		of_node_put(fw);
+		of_node_put(qe);
+	} while (0);
+
+	return qe_firmware_uploaded ? &qe_firmware_info : NULL;
+}
+EXPORT_SYMBOL(qe_get_firmware_info);
+
diff --git a/include/asm-powerpc/immap_qe.h b/include/asm-powerpc/immap_qe.h
index aba9806..82a4526 100644
--- a/include/asm-powerpc/immap_qe.h
+++ b/include/asm-powerpc/immap_qe.h
@@ -393,9 +393,39 @@ struct dbg {
 	u8	res2[0x48];
 } __attribute__ ((packed));
 
-/* RISC Special Registers (Trap and Breakpoint) */
+/*
+ * RISC Special Registers (Trap and Breakpoint).  These are described in
+ * the QE Developer's Handbook.
+ */
 struct rsp {
-	u32	reg[0x40];	/* 64 32-bit registers */
+	__be32 tibcr[16];	/* Trap/instruction breakpoint control regs */
+	u8 res0[64];
+	__be32 ibcr0;
+	__be32 ibs0;
+	__be32 ibcnr0;
+	u8 res1[4];
+	__be32 ibcr1;
+	__be32 ibs1;
+	__be32 ibcnr1;
+	__be32 npcr;
+	__be32 dbcr;
+	__be32 dbar;
+	__be32 dbamr;
+	__be32 dbsr;
+	__be32 dbcnr;
+	u8 res2[12];
+	__be32 dbdr_h;
+	__be32 dbdr_l;
+	__be32 dbdmr_h;
+	__be32 dbdmr_l;
+	__be32 bsr;
+	__be32 bor;
+	__be32 bior;
+	u8 res3[4];
+	__be32 iatr[4];
+	__be32 eccr;		/* Exception control configuration register */
+	__be32 eicr;
+	u8 res4[0x100-0xf8];
 } __attribute__ ((packed));
 
 struct qe_immap {
diff --git a/include/asm-powerpc/qe.h b/include/asm-powerpc/qe.h
index bcf60be..35c7b8d 100644
--- a/include/asm-powerpc/qe.h
+++ b/include/asm-powerpc/qe.h
@@ -93,6 +93,58 @@ unsigned long qe_muram_alloc_fixed(unsigned long offset, int size);
 void qe_muram_dump(void);
 void *qe_muram_addr(unsigned long offset);
 
+/* Structure that defines QE firmware binary files.
+ *
+ * See Documentation/powerpc/qe-firmware.txt for a description of these
+ * fields.
+ */
+struct qe_firmware {
+	struct qe_header {
+		__be32 length;  /* Length of the entire structure, in bytes */
+		u8 magic[3];    /* Set to { 'Q', 'E', 'F' } */
+		u8 version;     /* Version of this layout. First ver is '1' */
+	} header;
+	u8 id[62];      /* Null-terminated identifier string */
+	u8 split;	/* 0 = shared I-RAM, 1 = split I-RAM */
+	u8 count;       /* Number of microcode[] structures */
+	struct {
+		__be16 model;   	/* The SOC model  */
+		u8 major;       	/* The SOC revision major */
+		u8 minor;       	/* The SOC revision minor */
+	} __attribute__ ((packed)) soc;
+	u8 padding[4];			/* Reserved, for alignment */
+	__be64 extended_modes;		/* Extended modes */
+	__be32 vtraps[8];		/* Virtual trap addresses */
+	u8 reserved[4];			/* Reserved, for future expansion */
+	struct qe_microcode {
+		u8 id[32];      	/* Null-terminated identifier */
+		__be32 traps[16];       /* Trap addresses, 0 == ignore */
+		__be32 eccr;    	/* The value for the ECCR register */
+		__be32 iram_offset;     /* Offset into I-RAM for the code */
+		__be32 count;   	/* Number of 32-bit words of the code */
+		__be32 code_offset;     /* Offset of the actual microcode */
+		u8 major;       	/* The microcode version major */
+		u8 minor;       	/* The microcode version minor */
+		u8 revision;		/* The microcode version revision */
+		u8 padding;		/* Reserved, for alignment */
+		u8 reserved[4];		/* Reserved, for future expansion */
+	} __attribute__ ((packed)) microcode[1];
+	/* All microcode binaries should be located here */
+	/* CRC32 should be located here, after the microcode binaries */
+} __attribute__ ((packed));
+
+struct qe_firmware_info {
+	char id[64];		/* Firmware name */
+	u32 vtraps[8];		/* Virtual trap addresses */
+	u64 extended_modes;	/* Extended modes */
+};
+
+/* Upload a firmware to the QE */
+int qe_upload_firmware(const struct qe_firmware *firmware);
+
+/* Obtain information on the uploaded firmware */
+struct qe_firmware_info *qe_get_firmware_info(void);
+
 /* Buffer descriptors */
 struct qe_bd {
 	__be16 status;
@@ -328,6 +380,15 @@ enum comm_dir {
 
 #define QE_SDEBCR_BA_MASK	0x01FFFFFF
 
+/* Communication Processor */
+#define QE_CP_CERCR_MEE		0x8000	/* Multi-user RAM ECC enable */
+#define QE_CP_CERCR_IEE		0x4000	/* Instruction RAM ECC enable */
+#define QE_CP_CERCR_CIR		0x0800	/* Common instruction RAM */
+
+/* I-RAM */
+#define QE_IRAM_IADD_AIE	0x80000000	/* Auto Increment Enable */
+#define QE_IRAM_IADD_BADDR	0x00080000	/* Base Address */
+
 /* UPC */
 #define UPGCR_PROTOCOL	0x80000000	/* protocol ul2 or pl2 */
 #define UPGCR_TMS	0x40000000	/* Transmit master/slave mode */
-- 
1.5.2.4

^ permalink raw reply related

* Re: NAND JFFS2 wbuf non-contiguous write problem in linux-2.4.20
From: Santanu Sen @ 2007-12-07 12:56 UTC (permalink / raw)
  To: linuxppc-embedded

[-- Attachment #1: Type: text/plain, Size: 297 bytes --]

Here is the current version of the wbuf.c we have .

Regards.


      ____________________________________________________________________________________
Be a better friend, newshound, and 
know-it-all with Yahoo! Mobile.  Try it now.  http://mobile.yahoo.com/;_ylt=Ahu06i62sR8HDtDypao8Wcj9tAcJ 

[-- Attachment #2: 590649757-wbuf.c --]
[-- Type: application/octet-stream, Size: 24390 bytes --]

/*
 * JFFS2 -- Journalling Flash File System, Version 2.
 *
 * Copyright (C) 2001, 2002 Red Hat, Inc.
 *
 * Created by David Woodhouse <dwmw2@cambridge.redhat.com>
 *
 * For licensing information, see the file 'LICENCE' in this directory.
 *
 * $Id: wbuf.c,v 1.1 2003/06/26 13:43:20 anil Exp $
 *
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mtd/mtd.h>
#include <linux/crc32.h>
#include <linux/mtd/nand.h>
#include "nodelist.h"

/* max. erase failures before we mark a block bad */
#define MAX_ERASE_FAILURES 	5

/* two seconds timeout for timed wbuf-flushing */
#define WBUF_FLUSH_TIMEOUT	2 * HZ

#define JFFS2_OOB_ECCPOS0		0
#define JFFS2_OOB_ECCPOS1		1
#define JFFS2_OOB_ECCPOS2		2
#define JFFS2_OOB_ECCPOS3		3
#define JFFS2_OOB_ECCPOS4		6
#define JFFS2_OOB_ECCPOS5		7

#define NAND_JFFS2_OOB8_FSDAPOS		6
#define NAND_JFFS2_OOB16_FSDAPOS	8
#define NAND_JFFS2_OOB8_FSDALEN		2
#define NAND_JFFS2_OOB16_FSDALEN	8

struct nand_oobinfo jffs2_oobinfo = {
	useecc: 1,
	eccpos: {JFFS2_OOB_ECCPOS0, JFFS2_OOB_ECCPOS1, JFFS2_OOB_ECCPOS2, JFFS2_OOB_ECCPOS3, JFFS2_OOB_ECCPOS4, JFFS2_OOB_ECCPOS5}
};

static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
{
	struct list_head *this, *next;
	static int n;

	if (list_empty(&c->erasable_pending_wbuf_list))
		return;

	list_for_each_safe(this, next, &c->erasable_pending_wbuf_list) {
		struct jffs2_eraseblock *jeb = list_entry(this, struct jffs2_eraseblock, list);

		D1(printk(KERN_DEBUG "Removing eraseblock at 0x%08x from erasable_pending_wbuf_list...\n", jeb->offset));
		list_del(this);
		if ((jiffies + (n++)) & 127) {
			/* Most of the time, we just erase it immediately. Otherwise we
			   spend ages scanning it on mount, etc. */
			D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
			list_add_tail(&jeb->list, &c->erase_pending_list);
			c->nr_erasing_blocks++;
			jffs2_erase_pending_trigger(c);
		} else {
			/* Sometimes, however, we leave it elsewhere so it doesn't get
			   immediately reused, and we spread the load a bit. */
			D1(printk(KERN_DEBUG "...and adding to erasable_list\n"));
			list_add_tail(&jeb->list, &c->erasable_list);
		}
	}
}

/* 
*	Timed flushing of wbuf. If we have no consecutive write to wbuf, within	
*	the specified time, we flush the contents with padding !
*/
void jffs2_wbuf_timeout (unsigned long data)
{
	struct jffs2_sb_info *c = (struct jffs2_sb_info *) data;
	/* 
	* Wake up the flush process, we need process context to have the right 
	* to sleep on flash write
	*/
	D1(printk(KERN_DEBUG "jffs2_wbuf_timeout(): timer expired\n"));
	schedule_work(&c->wbuf_task);
}

/*
*	Process for timed wbuf flush
*
*	FIXME What happens, if we have a write failure there ????
*/
void jffs2_wbuf_process (void *data)
{
	struct jffs2_sb_info *c = (struct jffs2_sb_info *) data;	
	
	D1(printk(KERN_DEBUG "jffs2_wbuf_process() entered\n"));
	
	/* Check, if the timer is active again */
	if (timer_pending (&c->wbuf_timer)) {
		D1(printk (KERN_DEBUG "Nothing to do, timer is active again\n"));
		return;
	}

	if (down_trylock(&c->alloc_sem)) {
		/* If someone else has the alloc_sem, they're about to
		   write anyway. So no need to waste space by
		   padding */
		D1(printk (KERN_DEBUG "jffs2_wbuf_process() alloc_sem already occupied\n"));
		return;
	}	

	D1(printk (KERN_DEBUG "jffs2_wbuf_process() alloc_sem got\n"));

	if (!c->nextblock) {
		D1(printk(KERN_DEBUG "jffs2_wbuf_process(): nextblock NULL, nothing to do\n"));
		if (c->wbuf_len) {
			printk(KERN_WARNING "jffs2_wbuf_process(): c->wbuf_len is 0x%03x but nextblock is NULL!\n", c->wbuf_len);
			up(&c->alloc_sem);
			BUG();
		}
		return;
	}
	
	
	/* if !c->nextblock then the tail will have got flushed from
	   jffs2_do_reserve_space() anyway. */
	if(c->nextblock)
		jffs2_flush_wbuf(c, 2); /* pad and adjust nextblock */

	up(&c->alloc_sem);
}


/* Meaning of pad argument:
   0: Do not pad. Probably pointless - we only ever use this when we can't pad anyway.
   1: Pad, do not adjust nextblock free_size
   2: Pad, adjust nextblock free_size
*/
int jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
{
	int ret;
	size_t retlen;

	/* Nothing to do if not NAND flash. In particular, we shouldn't
	   del_timer() the timer we never initialised. */
	if (jffs2_can_mark_obsolete(c))
		return 0;

	if (!down_trylock(&c->alloc_sem)) {
		up(&c->alloc_sem);
		printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n");
		BUG();
	}

	/* delete a eventually started timed wbuf flush */
	del_timer_sync(&c->wbuf_timer);

	if(!c->wbuf || !c->wbuf_len)
		return 0;

	/* claim remaining space on the page
	   this happens, if we have a change to a new block,
	   or if fsync forces us to flush the writebuffer.
	   if we have a switch to next page, we will not have
	   enough remaining space for this. 
	*/
	if (pad) {
		c->wbuf_len = PAD(c->wbuf_len);
		
		if ( c->wbuf_len + sizeof(struct jffs2_unknown_node) < c->wbuf_pagesize) {
			struct jffs2_unknown_node *padnode = (void *)(c->wbuf + c->wbuf_len);
			padnode->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
			padnode->nodetype = cpu_to_je16(JFFS2_NODETYPE_PADDING);
			padnode->totlen = cpu_to_je32(c->wbuf_pagesize - c->wbuf_len);
			padnode->hdr_crc = cpu_to_je32(crc32(0, padnode, sizeof(*padnode)-4));
		}
	}
	/* else jffs2_flash_writev has actually filled in the rest of the
	   buffer for us, and will deal with the node refs etc. later. */
	
	ret = c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf, NULL, &jffs2_oobinfo);
	
	if (ret || retlen != c->wbuf_pagesize) {
		if (ret)
			printk(KERN_CRIT "jffs2_flush_wbuf(): Write failed with %d\n",ret);
		else
			printk(KERN_CRIT "jffs2_flush_wbuf(): Write was short: %zd instead of %d\n",
				retlen, c->wbuf_pagesize);
			
		ret = -EIO;		
		/* CHECKME NAND 
		   So that the caller knows what happened. If
		   we were called from jffs2_flash_writev(), it'll
		   know to return failure and _its_ caller will
		   try again. writev gives back to jffs2_write_xxx 
		   in write.c. There are the real fixme's
		 */

		/*  FIXME NAND
		   If we were called from GC or fsync, there's no repair kit yet
		*/
		    
		return ret; 
	}

	/* Adjusting free size of next block only, if it's called from fsync ! */
	if (pad == 2) {
		D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of c->nextblock\n"));
		spin_lock(&c->erase_completion_lock);
		if (!c->nextblock)
			BUG();
		/* wbuf_pagesize - wbuf_len is the amount of space that's to be 
		   padded. If there is less free space in the block than that,
		   something screwed up */
		if (c->nextblock->free_size < (c->wbuf_pagesize - c->wbuf_len)) {
			printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n",
			       c->wbuf_ofs, c->wbuf_len, c->wbuf_pagesize-c->wbuf_len);
			printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n",
			       c->nextblock->offset, c->nextblock->free_size);
			BUG();
		}
		c->nextblock->free_size -= (c->wbuf_pagesize - c->wbuf_len);
		c->free_size -= (c->wbuf_pagesize - c->wbuf_len);
		c->nextblock->wasted_size += (c->wbuf_pagesize - c->wbuf_len);
		c->wasted_size += (c->wbuf_pagesize - c->wbuf_len);
		spin_unlock(&c->erase_completion_lock);
	}

	/* Stick any now-obsoleted blocks on the erase_pending_list */
	spin_lock(&c->erase_completion_lock);
	jffs2_refile_wbuf_blocks(c);
	spin_unlock(&c->erase_completion_lock);

	memset(c->wbuf,0xff,c->wbuf_pagesize);
	/* adjust write buffer offset, else we get a non contigous write bug */
	c->wbuf_ofs+= c->wbuf_pagesize;
	c->wbuf_len = 0;
	return 0;
}

#define PAGE_DIV(x) ( (x) & (~(c->wbuf_pagesize - 1)) )
#define PAGE_MOD(x) ( (x) & (c->wbuf_pagesize - 1) )
int jffs2_flash_writev(struct jffs2_sb_info *c, const struct iovec *invecs, unsigned long count, loff_t to, size_t *retlen)
{
	struct iovec outvecs[3];
	uint32_t totlen = 0;
	uint32_t split_ofs = 0;
	uint32_t old_totlen;
	int ret, splitvec = -1;
	int invec, outvec;
	size_t wbuf_retlen;
	unsigned char *wbuf_ptr;
	size_t donelen = 0;
	uint32_t outvec_to = to;

	/* If not NAND flash, don't bother */
	if (!c->wbuf)
		return jffs2_flash_direct_writev(c, invecs, count, to, retlen);
	
	/* If wbuf_ofs is not initialized, set it to target address */
	if (c->wbuf_ofs == 0xFFFFFFFF) {
		c->wbuf_ofs = PAGE_DIV(to);
		c->wbuf_len = PAGE_MOD(to);			
		memset(c->wbuf,0xff,c->wbuf_pagesize);
	}

	/* Sanity checks on target address. 
	   It's permitted to write at PAD(c->wbuf_len+c->wbuf_ofs), 
	   and it's permitted to write at the beginning of a new 
	   erase block. Anything else, and you die.
	   New block starts at xxx000c (0-b = block header)
	*/
	if ( (to & ~(c->sector_size-1)) != (c->wbuf_ofs & ~(c->sector_size-1)) ) {
		/* It's a write to a new block */
		if (c->wbuf_len) {
			D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx causes flush of wbuf at 0x%08x\n", (unsigned long)to, c->wbuf_ofs));
			ret = jffs2_flush_wbuf(c, 1);
			if (ret) {
				/* the underlying layer has to check wbuf_len to do the cleanup */
				D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret));
				*retlen = 0;
				return ret;
			}
		}
		/* set pointer to new block */
		c->wbuf_ofs = PAGE_DIV(to);
		c->wbuf_len = PAGE_MOD(to);			
	} 

	if (to != PAD(c->wbuf_ofs + c->wbuf_len)) {
		/* We're not writing immediately after the writebuffer. Bad. */
		printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write to %08lx\n", (unsigned long)to);
		if (c->wbuf_len)
			printk(KERN_CRIT "wbuf was previously %08x-%08x\n",
					  c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len);
		BUG();
	}

	/* Note outvecs[3] above. We know count is never greater than 2 */
	if (count > 2) {
		printk(KERN_CRIT "jffs2_flash_writev(): count is %ld\n", count);
		BUG();
	}

	invec = 0;
	outvec = 0;


	/* Fill writebuffer first, if already in use */	
	if (c->wbuf_len) {
		uint32_t invec_ofs = 0;

		/* adjust alignment offset */ 
		if (c->wbuf_len != PAGE_MOD(to)) {
			c->wbuf_len = PAGE_MOD(to);
			/* take care of alignment to next page */
			if (!c->wbuf_len)
				c->wbuf_len = c->wbuf_pagesize;
		}
		
		while(c->wbuf_len < c->wbuf_pagesize) {
			uint32_t thislen;
			
			if (invec == count)
				goto alldone;

			thislen = c->wbuf_pagesize - c->wbuf_len;

			if (thislen >= invecs[invec].iov_len)
				thislen = invecs[invec].iov_len;
	
			invec_ofs = thislen;

			memcpy(c->wbuf + c->wbuf_len, invecs[invec].iov_base, thislen);
			c->wbuf_len += thislen;
			donelen += thislen;
			/* Get next invec, if actual did not fill the buffer */
			if (c->wbuf_len < c->wbuf_pagesize) 
				invec++;
		}			
		
		/* write buffer is full, flush buffer */
		ret = jffs2_flush_wbuf(c, 0);
		if (ret) {
			/* the underlying layer has to check wbuf_len to do the cleanup */
			D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret));
			*retlen = 0;
			return ret;
		}
		outvec_to += donelen;
		c->wbuf_ofs = outvec_to;
		
		/* All invecs done ? */
		if (invec == count)
			goto alldone;

		/* Set up the first outvec, containing the remainder of the
		   invec we partially used */
		if (invecs[invec].iov_len > invec_ofs) {
			outvecs[0].iov_base = invecs[invec].iov_base+invec_ofs;
			totlen = outvecs[0].iov_len = invecs[invec].iov_len-invec_ofs;
			if (totlen > c->wbuf_pagesize) {
				splitvec = outvec;
				split_ofs = outvecs[0].iov_len - PAGE_MOD(totlen);
			}
			outvec++;
		}
		invec++;
	}

	/* OK, now we've flushed the wbuf and the start of the bits
	   we have been asked to write, now to write the rest.... */

	/* totlen holds the amount of data still to be written */
	old_totlen = totlen;
	for ( ; invec < count; invec++,outvec++ ) {
		outvecs[outvec].iov_base = invecs[invec].iov_base;
		totlen += outvecs[outvec].iov_len = invecs[invec].iov_len;
		if (PAGE_DIV(totlen) != PAGE_DIV(old_totlen)) {
			splitvec = outvec;
			split_ofs = outvecs[outvec].iov_len - PAGE_MOD(totlen);
			old_totlen = totlen;
		}
	}

	/* Now the outvecs array holds all the remaining data to write */
	/* Up to splitvec,split_ofs is to be written immediately. The rest
	   goes into the (now-empty) wbuf */

	if (splitvec != -1) {
		uint32_t remainder;
		int ret;

		remainder = outvecs[splitvec].iov_len - split_ofs;
		outvecs[splitvec].iov_len = split_ofs;

		/* We did cross a page boundary, so we write some now */
		ret = c->mtd->writev_ecc(c->mtd, outvecs, splitvec+1, outvec_to, &wbuf_retlen, NULL, &jffs2_oobinfo); 
		if (ret < 0 || wbuf_retlen != PAGE_DIV(totlen)) {
			/* At this point we have no problem,
			   c->wbuf is empty. 
			*/
			*retlen = donelen;
			return ret;
		}
		
		donelen += wbuf_retlen;
		c->wbuf_ofs = PAGE_DIV(outvec_to) + PAGE_DIV(totlen);

		if (remainder) {
			outvecs[splitvec].iov_base += split_ofs;
			outvecs[splitvec].iov_len = remainder;
		} else {
			splitvec++;
		}

	} else {
		splitvec = 0;
	}

	/* Now splitvec points to the start of the bits we have to copy
	   into the wbuf */
	wbuf_ptr = c->wbuf;

	for ( ; splitvec < outvec; splitvec++) {
		/* Don't copy the wbuf into itself */
		if (outvecs[splitvec].iov_base == c->wbuf)
			continue;
		memcpy(wbuf_ptr, outvecs[splitvec].iov_base, outvecs[splitvec].iov_len);
		wbuf_ptr += outvecs[splitvec].iov_len;
		donelen += outvecs[splitvec].iov_len;
	}
	c->wbuf_len = wbuf_ptr - c->wbuf;

alldone:	
	*retlen = donelen;
	/* Setup timed wbuf flush, if buffer len != 0 */
	if (c->wbuf_len) {
		D1(printk (KERN_DEBUG "jffs2_flash_writev: mod wbuf_timer\n"));	
		mod_timer(&c->wbuf_timer, jiffies + WBUF_FLUSH_TIMEOUT);
	}
	return 0;
}

/*
 *	This is the entry for flash write.
 *	Check, if we work on NAND FLASH, if so build an iovec and write it via vritev
*/
int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, const u_char *buf)
{
	struct iovec vecs[1];

	if (jffs2_can_mark_obsolete(c))
		return c->mtd->write(c->mtd, ofs, len, retlen, buf);

	vecs[0].iov_base = (unsigned char *) buf;
	vecs[0].iov_len = len;
	return jffs2_flash_writev(c, vecs, 1, ofs, retlen);
}

/*
	Handle readback from writebuffer and ECC failure return
*/
int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, u_char *buf)
{
	loff_t	orbf = 0, owbf = 0, lwbf = 0;
	int	ret;

	/* Read flash */
	if (!jffs2_can_mark_obsolete(c)) {
		ret = c->mtd->read_ecc(c->mtd, ofs, len, retlen, buf, NULL, &jffs2_oobinfo);

		if ( (ret == -EIO) && (*retlen == len) ) {
			printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n",
			       len, ofs);
			/* 
			 * We have the raw data without ECC correction in the buffer, maybe 
			 * we are lucky and all data or parts are correct. We check the node.
			 * If data are corrupted node check will sort it out.
			 * We keep this block, it will fail on write or erase and the we
			 * mark it bad. Or should we do that now? But we should give him a chance.
			 * Maybe we had a system crash or power loss before the ecc write or  
			 * a erase was completed.
			 * So we return success. :)
			 */
		 	ret = 0;
		 }	
	} else
		return c->mtd->read(c->mtd, ofs, len, retlen, buf);

	/* if no writebuffer available or write buffer empty, return */
	if (!c->wbuf_pagesize || !c->wbuf_len)
		return ret;

	/* if we read in a different block, return */
	if ( (ofs & ~(c->sector_size-1)) != (c->wbuf_ofs & ~(c->sector_size-1)) ) 
		return ret;	

	if (ofs >= c->wbuf_ofs) {
		owbf = (ofs - c->wbuf_ofs);	/* offset in write buffer */
		if (owbf > c->wbuf_len)		/* is read beyond write buffer ? */
			return ret;
		lwbf = c->wbuf_len - owbf;	/* number of bytes to copy */
		if (lwbf > len)	
			lwbf = len;
	} else {	
		orbf = (c->wbuf_ofs - ofs);	/* offset in read buffer */
		if (orbf > len)			/* is write beyond write buffer ? */
			return ret;
		lwbf = len - orbf; 		/* number of bytes to copy */
		if (lwbf > c->wbuf_len)	
			lwbf = c->wbuf_len;
	}	
	if (lwbf > 0)
		memcpy(buf+orbf,c->wbuf+owbf,lwbf);

	return ret;
}

/*
 *	Check, if the out of band area is empty
 */
int jffs2_check_oob_empty( struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int mode)
{
	unsigned char *buf;
	int 	ret = 0;
	int	i,len,cnt,page;
	size_t  retlen;
	int	fsdata_pos,badblock_pos,oob_size;

	oob_size = c->mtd->oobsize;

	switch(c->mtd->ecctype) {
	case MTD_ECC_SW:		
		fsdata_pos = (c->wbuf_pagesize == 256) ? NAND_JFFS2_OOB8_FSDAPOS : NAND_JFFS2_OOB16_FSDAPOS;
		badblock_pos = NAND_BADBLOCK_POS;
		break;
	default:
		D1(printk(KERN_WARNING "jffs2_write_oob_empty(): Invalid ECC type\n"));
		return -EINVAL;
	}	

	/* allocate a buffer for all oob data in this sector */
	len = 4 * oob_size;
	buf = kmalloc(len, GFP_KERNEL);
	if (!buf) {
		printk(KERN_NOTICE "jffs2_check_oob_empty(): allocation of temporary data buffer for oob check failed\n");
		return -ENOMEM;
	}
	/* 
	 * if mode = 0, we scan for a total empty oob area, else we have
	 * to take care of the cleanmarker in the first page of the block
	*/
	ret = jffs2_flash_read_oob(c, jeb->offset, len , &retlen, buf);
	if (ret) {
		D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB failed %d for block at %08x\n", ret, jeb->offset));
		goto out;
	}
	
	if (retlen < len) {
		D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB return short read "
			  "(%zd bytes not %d) for block at %08x\n", retlen, len, jeb->offset));
		ret = -EIO;
		goto out;
	}
	
	/* Special check for first two pages */
	for (page = 0; page < 2 * oob_size; page += oob_size) {
		/* Check for bad block marker */
		if (buf[page+badblock_pos] != 0xff) {
			D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Bad or failed block at %08x\n",jeb->offset));
			/* Return 2 for bad and 3 for failed block 
			   bad goes to list_bad and failed to list_erase */
			ret = (!page) ? 2 : 3;
			goto out;
		}
		cnt = oob_size;
		if (mode)
			cnt -= fsdata_pos;
		for(i = 0; i < cnt ; i+=sizeof(unsigned short)) {
			unsigned short dat = *(unsigned short *)(&buf[page+i]);
			if(dat != 0xffff) {
				ret = 1; 
				goto out;
			}
		}
		/* only the first page can contain a cleanmarker !*/
		mode = 0;
	}	

	/* we know, we are aligned :) */	
	for (; page < len; page += sizeof(long)) {
		unsigned long dat = *(unsigned long *)(&buf[page]);
		if(dat != -1) {
			ret = 1; 
			goto out;
		}
	}

out:
	kfree(buf);	
	
	return ret;
}

/*
*	Scan for a valid cleanmarker and for bad blocks
*	For virtual blocks (concatenated physical blocks) check the cleanmarker
*	only in the first page of the first physical block, but scan for bad blocks in all
*	physical blocks
*/
int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
{
	struct jffs2_unknown_node n;
	unsigned char buf[32];
	unsigned char *p;
	int ret, i, cnt, retval = 0;
	size_t retlen, offset;
	int fsdata_pos, fsdata_len, oob_size, badblock_pos;

	offset = jeb->offset;
	oob_size = c->mtd->oobsize;

	switch (c->mtd->ecctype) {
	case MTD_ECC_SW:
		fsdata_pos = (c->wbuf_pagesize == 256) ? NAND_JFFS2_OOB8_FSDAPOS : NAND_JFFS2_OOB16_FSDAPOS;
		fsdata_len = (c->wbuf_pagesize == 256) ? NAND_JFFS2_OOB8_FSDALEN : NAND_JFFS2_OOB16_FSDALEN;
		badblock_pos = NAND_BADBLOCK_POS;
		break;
	default:
		D1 (printk (KERN_WARNING "jffs2_write_nand_cleanmarker(): Invalid ECC type\n"));
		return -EINVAL;
	}


	/* Loop through the physical blocks */
	for (cnt = 0; cnt < (c->sector_size / c->mtd->erasesize); cnt++) {
		/*
		   *    We read oob data from page 0 and 1 of the block.
		   *    page 0 contains cleanmarker and badblock info
		   *    page 1 contains failure count of this block
		 */
		ret = c->mtd->read_oob (c->mtd, offset, oob_size << 1, &retlen, buf);

		if (ret) {
			D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB failed %d for block at %08x\n", ret, jeb->offset));
			return ret;
		}
		if (retlen < (oob_size << 1)) {
			D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB return short read (%zd bytes not %d) for block at %08x\n", retlen, oob_size << 1, jeb->offset));
			return -EIO;
		}

		/* Check for bad block marker */
		if (buf[badblock_pos] != 0xff) {
			D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Bad block at %08x\n", jeb->offset));
			return 2;
		}

		/* Check for failure counter in the second page */
		if (buf[badblock_pos + oob_size] != 0xff) {
			D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Block marked as failed at %08x, fail count:%d\n", jeb->offset, buf[badblock_pos + oob_size]));
			return 3;
		}

		/* Check cleanmarker only on the first physical block */
		if (!cnt) {
			n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK);
			n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER);
			n.totlen = cpu_to_je32 (8);
			p = (unsigned char *) &n;

			for (i = 0; i < fsdata_len; i++) {
				if (buf[fsdata_pos + i] != p[i]) {
					D2 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Cleanmarker node not detected in block at %08x\n", jeb->offset));
					retval = 1;
				}
			}
		}
		offset += c->mtd->erasesize;
	}
	return retval;
}

int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
{
	struct 	jffs2_unknown_node n;
	int 	ret;
	int	fsdata_pos,fsdata_len;
	size_t 	retlen;

	switch(c->mtd->ecctype) {
	case MTD_ECC_SW:	
		fsdata_pos = (c->wbuf_pagesize == 256) ? NAND_JFFS2_OOB8_FSDAPOS : NAND_JFFS2_OOB16_FSDAPOS;
		fsdata_len = (c->wbuf_pagesize == 256) ? NAND_JFFS2_OOB8_FSDALEN : NAND_JFFS2_OOB16_FSDALEN;
		break;
	default:
		D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Invalid ECC type\n"));
		return -EINVAL;
	}	
	
	n.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
	n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER);
	n.totlen = cpu_to_je32(8);

	ret = jffs2_flash_write_oob(c, jeb->offset + fsdata_pos, fsdata_len, &retlen, (unsigned char *)&n);
	
	if (ret) {
		D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
		return ret;
	}
	if (retlen != fsdata_len) {
		D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Short write for block at %08x: %zd not %d\n", jeb->offset, retlen, fsdata_len));
		return ret;
	}
	return 0;
}

/* 
 * We try to get the failure count of this block.
 */
int jffs2_nand_read_failcnt(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) {

	unsigned char buf[16];
	int	ret;
	size_t 	retlen;
	int	oob_size, badblock_pos;

	oob_size = c->mtd->oobsize;

	switch(c->mtd->ecctype) {
	case MTD_ECC_SW:	
		badblock_pos = NAND_BADBLOCK_POS;
		break;
	default:
		D1(printk(KERN_WARNING "jffs2_nand_read_failcnt(): Invalid ECC type\n"));
		return -EINVAL;
	}	
	
	ret = c->mtd->read_oob(c->mtd, jeb->offset + c->mtd->oobblock, oob_size , &retlen, buf);
	
	if (ret) {
		D1(printk(KERN_WARNING "jffs2_nand_read_failcnt(): Read OOB failed %d for block at %08x\n", ret, jeb->offset));
		return ret;
	}

	if (retlen < oob_size) {
		D1(printk(KERN_WARNING "jffs2_nand_read_failcnt(): Read OOB return short read (%zd bytes not %d) for block at %08x\n", retlen, oob_size, jeb->offset));
		return -EIO;
	}

	jeb->bad_count =  buf[badblock_pos];	
	return 0;
}

/* 
 * On NAND we try to mark this block bad. We try to write how often
 * the block was erased and mark it finaly bad, if the count
 * is > MAX_ERASE_FAILURES. We read this information on mount !
 * jeb->bad_count contains the count before this erase.
 * Don't care about failures. This block remains on the erase-pending
 * or badblock list as long as nobody manipulates the flash with
 * a bootloader or something like that.
 */

int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
{
	unsigned char buf = 0x0;
	int 	ret,pos;
	size_t 	retlen;

	switch(c->mtd->ecctype) {
	case MTD_ECC_SW:	
		pos = NAND_BADBLOCK_POS;
		break;
	default:
		D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Invalid ECC type\n"));
		return -EINVAL;
	}	

	/* if the count is < max, we try to write the counter to the 2nd page oob area */
	if( ++jeb->bad_count < MAX_ERASE_FAILURES) {
		buf = (unsigned char)jeb->bad_count;
		pos += c->mtd->oobblock;
	}
	
	ret = jffs2_flash_write_oob(c, jeb->offset + pos, 1, &retlen, &buf);
	
	if (ret) {
		D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
		return ret;
	}
	if (retlen != 1) {
		D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Short write for block at %08x: %zd not 1\n", jeb->offset, retlen));
		return ret;
	}
	return 0;
}

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox