LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 0/8] v2 De-Couple sysfs memory directories from memory sections
From: Nathan Fontenot @ 2010-09-30 15:17 UTC (permalink / raw)
  To: Robin Holt
  Cc: linuxppc-dev, Greg KH, linux-kernel, Dave Hansen, linux-mm,
	KAMEZAWA Hiroyuki
In-Reply-To: <20100929192830.GK14068@sgi.com>

On 09/29/2010 02:28 PM, Robin Holt wrote:
> On Tue, Sep 28, 2010 at 01:17:33PM -0500, Nathan Fontenot wrote:
>> On 09/28/2010 07:38 AM, Robin Holt wrote:
>>> I was tasked with looking at a slowdown in similar sized SGI machines
>>> booting x86_64.  Jack Steiner had already looked into the memory_dev_init.
>>> I was looking at link_mem_sections().
>>>
>>> I made a dramatic improvement on a 16TB machine in that function by
>>> merely caching the most recent memory section and checking to see if
>>> the next memory section happens to be the subsequent in the linked list
>>> of kobjects.
>>>
>>> That simple cache reduced the time for link_mem_sections from 1 hour 27
>>> minutes down to 46 seconds.
>>
>> Nice!
>>
>>>
>>> I would like to propose we implement something along those lines also,
>>> but I am currently swamped.  I can probably get you a patch tomorrow
>>> afternoon that applies at the end of this set.
>>
>> Should this be done as a separate patch?  This patch set concentrates on
>> updates to the memory code with the node updates only being done due to the
>> memory changes.
>>
>> I think its a good idea to do the caching and have no problem adding on to
>> this patchset if no one else has any objections.
> 
> I am sorry.  I had meant to include you on the Cc: list.  I just posted a
> set of patches (3 small patches) which implement the cache most recent bit
> I aluded to above.  Search for a subject of "Speed up link_mem_sections
> during boot" and you will find them.  I did add you to the Cc: list for
> the next time I end up sending the set.
> 
> My next task is to implement a x86_64 SGI UV specific chunk of code
> to memory_block_size_bytes().  Would you consider adding that to your
> patch set?  I expect to have that either later today or early tomorrow.
> 

No problem. I'm putting together a new patch set with updates from all of
the comments now so go ahead and send it to me when you have it ready.

-Nathan

^ permalink raw reply

* Re: [PATCH 0/8] v2 De-Couple sysfs memory directories from memory sections
From: Robin Holt @ 2010-09-30 16:39 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: linux-mm, Greg KH, linux-kernel, Dave Hansen, linuxppc-dev,
	Robin Holt, H. Peter Anvin, Ingo Molnar, Thomas Gleixner,
	KAMEZAWA Hiroyuki
In-Reply-To: <20100929192830.GK14068@sgi.com>

On Wed, Sep 29, 2010 at 02:28:30PM -0500, Robin Holt wrote:
> On Tue, Sep 28, 2010 at 01:17:33PM -0500, Nathan Fontenot wrote:
...
> My next task is to implement a x86_64 SGI UV specific chunk of code
> to memory_block_size_bytes().  Would you consider adding that to your
> patch set?  I expect to have that either later today or early tomorrow.

The patch is below.

I left things at a u32, but I would really like it if you changed to an
unsigned long and adjusted my patch for me.

Thanks,
Robin

------------------------------------------------------------------------
Subject: [Patch] Implement memory_block_size_bytes for x86_64 when CONFIG_X86_UV


Nathan Fontenot has implemented a patch set for large memory configuration
systems which will combine drivers/base/memory.c memory sections
together into memory blocks with the default behavior being
unchanged from the current behavior.

In his patch set, he implements a memory_block_size_bytes() function
for PPC.  This is the equivalent patch for x86_64 when it has
CONFIG_X86_UV set.

Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Jack Steiner <steiner@sgi.com>
To: Nathan Fontenot <nfont@austin.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: lkml <linux-kernel@vger.kernel.org>

---

 arch/x86/mm/init_64.c |   15 +++++++++++++++
 1 file changed, 15 insertions(+)

Index: memory_block/arch/x86/mm/init_64.c
===================================================================
--- memory_block.orig/arch/x86/mm/init_64.c	2010-09-29 14:46:50.711824616 -0500
+++ memory_block/arch/x86/mm/init_64.c	2010-09-29 14:46:55.683997672 -0500
@@ -50,6 +50,7 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 #include <asm/init.h>
+#include <asm/uv/uv.h>
 #include <linux/bootmem.h>
 
 static unsigned long dma_reserve __initdata;
@@ -928,6 +929,20 @@ const char *arch_vma_name(struct vm_area
 	return NULL;
 }
 
+#ifdef CONFIG_X86_UV
+#define MIN_MEMORY_BLOCK_SIZE   (1 << SECTION_SIZE_BITS)
+
+u32 memory_block_size_bytes(void)
+{
+	if (is_uv_system()) {
+		printk("UV: memory block size 2GB\n");
+		return 2UL * 1024 * 1024 * 1024;
+	}
+	return MIN_MEMORY_BLOCK_SIZE;
+}
+#endif
+
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.

^ permalink raw reply

* [PATCH] PPC4xx: ADMA separating SoC specific functions
From: tmarri @ 2010-09-30 16:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: tmarri, yur, linux-raid, linux-crypto, dan.j.williams

From: Tirumala Marri <tmarri@apm.com>

This patch separates the SoC specific functions and moved
to different files.

The reason for ppc440spe-adma.h is to define in-line functions which
are called by both adma.c and ppc440spe-adma.c . 

Where as ppc440spe-adma.c is to define functions are completely
completely dependent on 440spe, also which are too big to define
as in-line functions.

Signed-off-by: Tirumala R Marri <tmarri@apm.com>
Acked-by: Yuri Tikhonov <yur@emcraft.com>
CC:  Dan Williams <dan.j.williams@intel.com>
CC:  Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 drivers/dma/ppc4xx/Makefile         |    2 +-
 drivers/dma/ppc4xx/adma.c           | 3913 +----------------------------------
 drivers/dma/ppc4xx/ppc440spe-adma.c | 1658 +++++++++++++++
 drivers/dma/ppc4xx/ppc440spe-adma.h | 2391 +++++++++++++++++++++
 4 files changed, 4123 insertions(+), 3841 deletions(-)
 create mode 100644 drivers/dma/ppc4xx/ppc440spe-adma.c
 create mode 100644 drivers/dma/ppc4xx/ppc440spe-adma.h

diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile
index b3d259b..ad9265a 100644
--- a/drivers/dma/ppc4xx/Makefile
+++ b/drivers/dma/ppc4xx/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc440spe-adma.o adma.o
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index 0d58a4a..a1053cb 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -46,149 +46,40 @@
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include "adma.h"
-
-enum ppc_adma_init_code {
-	PPC_ADMA_INIT_OK = 0,
-	PPC_ADMA_INIT_MEMRES,
-	PPC_ADMA_INIT_MEMREG,
-	PPC_ADMA_INIT_ALLOC,
-	PPC_ADMA_INIT_COHERENT,
-	PPC_ADMA_INIT_CHANNEL,
-	PPC_ADMA_INIT_IRQ1,
-	PPC_ADMA_INIT_IRQ2,
-	PPC_ADMA_INIT_REGISTER
-};
-
-static char *ppc_adma_errors[] = {
-	[PPC_ADMA_INIT_OK] = "ok",
-	[PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
-	[PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
-	[PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
-				"structure",
-	[PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
-				   "hardware descriptors",
-	[PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
-	[PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
-	[PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
-	[PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
-};
-
-static enum ppc_adma_init_code
-ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
-
-struct ppc_dma_chan_ref {
-	struct dma_chan *chan;
-	struct list_head node;
-};
-
-/* The list of channels exported by ppc440spe ADMA */
-struct list_head
-ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list);
-
-/* This flag is set when want to refetch the xor chain in the interrupt
- * handler
- */
-static u32 do_xor_refetch;
-
-/* Pointer to DMA0, DMA1 CP/CS FIFO */
-static void *ppc440spe_dma_fifo_buf;
-
-/* Pointers to last submitted to DMA0, DMA1 CDBs */
-static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
-static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
-
-/* Pointer to last linked and submitted xor CB */
-static struct ppc440spe_adma_desc_slot *xor_last_linked;
-static struct ppc440spe_adma_desc_slot *xor_last_submit;
-
-/* This array is used in data-check operations for storing a pattern */
-static char ppc440spe_qword[16];
-
-static atomic_t ppc440spe_adma_err_irq_ref;
-static dcr_host_t ppc440spe_mq_dcr_host;
-static unsigned int ppc440spe_mq_dcr_len;
-
-/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
- * the block size in transactions, then we do not allow to activate more than
- * only one RXOR transactions simultaneously. So use this var to store
- * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
- * set) or not (PPC440SPE_RXOR_RUN is clear).
- */
-static unsigned long ppc440spe_rxor_state;
-
-/* These are used in enable & check routines
- */
-static u32 ppc440spe_r6_enabled;
-static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
-static struct completion ppc440spe_r6_test_comp;
-
-static int ppc440spe_adma_dma2rxor_prep_src(
-		struct ppc440spe_adma_desc_slot *desc,
-		struct ppc440spe_rxor *cursor, int index,
-		int src_cnt, u32 addr);
-static void ppc440spe_adma_dma2rxor_set_src(
-		struct ppc440spe_adma_desc_slot *desc,
-		int index, dma_addr_t addr);
-static void ppc440spe_adma_dma2rxor_set_mult(
-		struct ppc440spe_adma_desc_slot *desc,
-		int index, u8 mult);
-
-#ifdef ADMA_LL_DEBUG
-#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
-#else
-#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
-#endif
-
-static void print_cb(struct ppc440spe_adma_chan *chan, void *block)
-{
-	struct dma_cdb *cdb;
-	struct xor_cb *cb;
-	int i;
-
-	switch (chan->device->id) {
-	case 0:
-	case 1:
-		cdb = block;
-
-		pr_debug("CDB at %p [%d]:\n"
-			"\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
-			"\t sg1u 0x%08x sg1l 0x%08x\n"
-			"\t sg2u 0x%08x sg2l 0x%08x\n"
-			"\t sg3u 0x%08x sg3l 0x%08x\n",
-			cdb, chan->device->id,
-			cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
-			le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
-			le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
-			le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
-		);
-		break;
-	case 2:
-		cb = block;
-
-		pr_debug("CB at %p [%d]:\n"
-			"\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
-			"\t cbtah 0x%08x cbtal 0x%08x\n"
-			"\t cblah 0x%08x cblal 0x%08x\n",
-			cb, chan->device->id,
-			cb->cbc, cb->cbbc, cb->cbs,
-			cb->cbtah, cb->cbtal,
-			cb->cblah, cb->cblal);
-		for (i = 0; i < 16; i++) {
-			if (i && !cb->ops[i].h && !cb->ops[i].l)
-				continue;
-			pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
-				i, cb->ops[i].h, cb->ops[i].l);
-		}
-		break;
-	}
-}
-
-static void print_cb_list(struct ppc440spe_adma_chan *chan,
-			  struct ppc440spe_adma_desc_slot *iter)
-{
-	for (; iter; iter = iter->hw_next)
-		print_cb(chan, iter->hw_desc);
-}
+#include "ppc440spe-adma.h"
+
+struct dma_async_tx_descriptor
+*ppc440spe_adma_prep_dma_pq(struct dma_chan *chan,
+			       dma_addr_t * dst,
+			       dma_addr_t * src,
+			       unsigned int src_cnt,
+			       const unsigned char *scf,
+			       size_t len,
+			       unsigned long flags);
+struct dma_async_tx_descriptor
+*ppc440spe_adma_prep_dma_pqzero_sum(struct dma_chan *chan,
+				       dma_addr_t * pq,
+				       dma_addr_t * src,
+				       unsigned int src_cnt,
+				       const unsigned char *scf,
+				       size_t len,
+				       enum sum_check_flags *pqres,
+				       unsigned long flags);
+struct dma_async_tx_descriptor
+*ppc440spe_adma_prep_dma_xor_zero_sum(struct dma_chan *chan,
+					dma_addr_t * src,
+					unsigned int src_cnt,
+					size_t len,
+					enum sum_check_flags *result,
+					unsigned long flags);
+void ppc440spe_adma_set_capabilities(struct ppc440spe_adma_device *adev);
+int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
+				  struct ppc440spe_adma_chan *chan, int *initcode);
+void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
+				     struct ppc440spe_adma_chan *chan);
+int __devexit ppc440spe_adma_remove(struct platform_device *ofdev);
+void __exit ppc440spe_adma_exit(void);
+__init int ppc440spe_adma_hw_init(void);
 
 static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
 			     unsigned int src_cnt)
@@ -200,7 +91,7 @@ static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
 		pr_debug("\t0x%016llx ", src[i]);
 	pr_debug("dst:\n\t0x%016llx\n", dst);
 }
-
+#if 0
 static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
 			    unsigned int src_cnt)
 {
@@ -213,8 +104,9 @@ static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
 	for (i = 0; i < 2; i++)
 		pr_debug("\t0x%016llx ", dst[i]);
 }
+#endif
 
-static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
+void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
 				    unsigned int src_cnt,
 				    const unsigned char *scf)
 {
@@ -237,782 +129,11 @@ static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
 /******************************************************************************
  * Command (Descriptor) Blocks low-level routines
  ******************************************************************************/
-/**
- * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
- * pseudo operation
- */
-static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc,
-					  struct ppc440spe_adma_chan *chan)
-{
-	struct xor_cb *p;
-
-	switch (chan->device->id) {
-	case PPC440SPE_XOR_ID:
-		p = desc->hw_desc;
-		memset(desc->hw_desc, 0, sizeof(struct xor_cb));
-		/* NOP with Command Block Complete Enable */
-		p->cbc = XOR_CBCR_CBCE_BIT;
-		break;
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
-		/* NOP with interrupt */
-		set_bit(PPC440SPE_DESC_INT, &desc->flags);
-		break;
-	default:
-		printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
-				__func__);
-		break;
-	}
-}
-
-/**
- * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
- * pseudo operation
- */
-static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
-{
-	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
-	desc->hw_next = NULL;
-	desc->src_cnt = 0;
-	desc->dst_cnt = 1;
-}
-
-/**
- * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
- */
-static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
-					 int src_cnt, unsigned long flags)
-{
-	struct xor_cb *hw_desc = desc->hw_desc;
-
-	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
-	desc->hw_next = NULL;
-	desc->src_cnt = src_cnt;
-	desc->dst_cnt = 1;
-
-	hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
-	if (flags & DMA_PREP_INTERRUPT)
-		/* Enable interrupt on completion */
-		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
-}
-
-/**
- * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
- * operation in DMA2 controller
- */
-static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
-		int dst_cnt, int src_cnt, unsigned long flags)
-{
-	struct xor_cb *hw_desc = desc->hw_desc;
-
-	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
-	desc->hw_next = NULL;
-	desc->src_cnt = src_cnt;
-	desc->dst_cnt = dst_cnt;
-	memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
-	desc->descs_per_op = 0;
-
-	hw_desc->cbc = XOR_CBCR_TGT_BIT;
-	if (flags & DMA_PREP_INTERRUPT)
-		/* Enable interrupt on completion */
-		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
-}
-
 #define DMA_CTRL_FLAGS_LAST	DMA_PREP_FENCE
 #define DMA_PREP_ZERO_P		(DMA_CTRL_FLAGS_LAST << 1)
 #define DMA_PREP_ZERO_Q		(DMA_PREP_ZERO_P << 1)
 
 /**
- * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
- * with DMA0/1
- */
-static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
-				int dst_cnt, int src_cnt, unsigned long flags,
-				unsigned long op)
-{
-	struct dma_cdb *hw_desc;
-	struct ppc440spe_adma_desc_slot *iter;
-	u8 dopc;
-
-	/* Common initialization of a PQ descriptors chain */
-	set_bits(op, &desc->flags);
-	desc->src_cnt = src_cnt;
-	desc->dst_cnt = dst_cnt;
-
-	/* WXOR MULTICAST if both P and Q are being computed
-	 * MV_SG1_SG2 if Q only
-	 */
-	dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
-		DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
-
-	list_for_each_entry(iter, &desc->group_list, chain_node) {
-		hw_desc = iter->hw_desc;
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-
-		if (likely(!list_is_last(&iter->chain_node,
-				&desc->group_list))) {
-			/* set 'next' pointer */
-			iter->hw_next = list_entry(iter->chain_node.next,
-				struct ppc440spe_adma_desc_slot, chain_node);
-			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-		} else {
-			/* this is the last descriptor.
-			 * this slot will be pasted from ADMA level
-			 * each time it wants to configure parameters
-			 * of the transaction (src, dst, ...)
-			 */
-			iter->hw_next = NULL;
-			if (flags & DMA_PREP_INTERRUPT)
-				set_bit(PPC440SPE_DESC_INT, &iter->flags);
-			else
-				clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-		}
-	}
-
-	/* Set OPS depending on WXOR/RXOR type of operation */
-	if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
-		/* This is a WXOR only chain:
-		 * - first descriptors are for zeroing destinations
-		 *   if PPC440SPE_ZERO_P/Q set;
-		 * - descriptors remained are for GF-XOR operations.
-		 */
-		iter = list_first_entry(&desc->group_list,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-
-		if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
-			hw_desc = iter->hw_desc;
-			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-			iter = list_first_entry(&iter->chain_node,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		}
-
-		if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
-			hw_desc = iter->hw_desc;
-			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-			iter = list_first_entry(&iter->chain_node,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		}
-
-		list_for_each_entry_from(iter, &desc->group_list, chain_node) {
-			hw_desc = iter->hw_desc;
-			hw_desc->opc = dopc;
-		}
-	} else {
-		/* This is either RXOR-only or mixed RXOR/WXOR */
-
-		/* The first 1 or 2 slots in chain are always RXOR,
-		 * if need to calculate P & Q, then there are two
-		 * RXOR slots; if only P or only Q, then there is one
-		 */
-		iter = list_first_entry(&desc->group_list,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		hw_desc = iter->hw_desc;
-		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-
-		if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
-			iter = list_first_entry(&iter->chain_node,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			hw_desc = iter->hw_desc;
-			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-		}
-
-		/* The remaining descs (if any) are WXORs */
-		if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
-			iter = list_first_entry(&iter->chain_node,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			list_for_each_entry_from(iter, &desc->group_list,
-						chain_node) {
-				hw_desc = iter->hw_desc;
-				hw_desc->opc = dopc;
-			}
-		}
-	}
-}
-
-/**
- * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
- * for PQ_ZERO_SUM operation
- */
-static void ppc440spe_desc_init_dma01pqzero_sum(
-				struct ppc440spe_adma_desc_slot *desc,
-				int dst_cnt, int src_cnt)
-{
-	struct dma_cdb *hw_desc;
-	struct ppc440spe_adma_desc_slot *iter;
-	int i = 0;
-	u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
-				   DMA_CDB_OPC_MV_SG1_SG2;
-	/*
-	 * Initialize starting from 2nd or 3rd descriptor dependent
-	 * on dst_cnt. First one or two slots are for cloning P
-	 * and/or Q to chan->pdest and/or chan->qdest as we have
-	 * to preserve original P/Q.
-	 */
-	iter = list_first_entry(&desc->group_list,
-				struct ppc440spe_adma_desc_slot, chain_node);
-	iter = list_entry(iter->chain_node.next,
-			  struct ppc440spe_adma_desc_slot, chain_node);
-
-	if (dst_cnt > 1) {
-		iter = list_entry(iter->chain_node.next,
-				  struct ppc440spe_adma_desc_slot, chain_node);
-	}
-	/* initialize each source descriptor in chain */
-	list_for_each_entry_from(iter, &desc->group_list, chain_node) {
-		hw_desc = iter->hw_desc;
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-		iter->src_cnt = 0;
-		iter->dst_cnt = 0;
-
-		/* This is a ZERO_SUM operation:
-		 * - <src_cnt> descriptors starting from 2nd or 3rd
-		 *   descriptor are for GF-XOR operations;
-		 * - remaining <dst_cnt> descriptors are for checking the result
-		 */
-		if (i++ < src_cnt)
-			/* MV_SG1_SG2 if only Q is being verified
-			 * MULTICAST if both P and Q are being verified
-			 */
-			hw_desc->opc = dopc;
-		else
-			/* DMA_CDB_OPC_DCHECK128 operation */
-			hw_desc->opc = DMA_CDB_OPC_DCHECK128;
-
-		if (likely(!list_is_last(&iter->chain_node,
-					 &desc->group_list))) {
-			/* set 'next' pointer */
-			iter->hw_next = list_entry(iter->chain_node.next,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-		} else {
-			/* this is the last descriptor.
-			 * this slot will be pasted from ADMA level
-			 * each time it wants to configure parameters
-			 * of the transaction (src, dst, ...)
-			 */
-			iter->hw_next = NULL;
-			/* always enable interrupt generation since we get
-			 * the status of pqzero from the handler
-			 */
-			set_bit(PPC440SPE_DESC_INT, &iter->flags);
-		}
-	}
-	desc->src_cnt = src_cnt;
-	desc->dst_cnt = dst_cnt;
-}
-
-/**
- * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
- */
-static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
-					unsigned long flags)
-{
-	struct dma_cdb *hw_desc = desc->hw_desc;
-
-	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
-	desc->hw_next = NULL;
-	desc->src_cnt = 1;
-	desc->dst_cnt = 1;
-
-	if (flags & DMA_PREP_INTERRUPT)
-		set_bit(PPC440SPE_DESC_INT, &desc->flags);
-	else
-		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
-
-	hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-}
-
-/**
- * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation
- */
-static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc,
-					int value, unsigned long flags)
-{
-	struct dma_cdb *hw_desc = desc->hw_desc;
-
-	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
-	desc->hw_next = NULL;
-	desc->src_cnt = 1;
-	desc->dst_cnt = 1;
-
-	if (flags & DMA_PREP_INTERRUPT)
-		set_bit(PPC440SPE_DESC_INT, &desc->flags);
-	else
-		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
-
-	hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value);
-	hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value);
-	hw_desc->opc = DMA_CDB_OPC_DFILL128;
-}
-
-/**
- * ppc440spe_desc_set_src_addr - set source address into the descriptor
- */
-static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
-					struct ppc440spe_adma_chan *chan,
-					int src_idx, dma_addr_t addrh,
-					dma_addr_t addrl)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-	phys_addr_t addr64, tmplow, tmphi;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		if (!addrh) {
-			addr64 = addrl;
-			tmphi = (addr64 >> 32);
-			tmplow = (addr64 & 0xFFFFFFFF);
-		} else {
-			tmphi = addrh;
-			tmplow = addrl;
-		}
-		dma_hw_desc = desc->hw_desc;
-		dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow);
-		dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi);
-		break;
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		xor_hw_desc->ops[src_idx].l = addrl;
-		xor_hw_desc->ops[src_idx].h |= addrh;
-		break;
-	}
-}
-
-/**
- * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
- */
-static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
-			struct ppc440spe_adma_chan *chan, u32 mult_index,
-			int sg_index, unsigned char mult_value)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-	u32 *psgu;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		switch (sg_index) {
-		/* for RXOR operations set multiplier
-		 * into source cued address
-		 */
-		case DMA_CDB_SG_SRC:
-			psgu = &dma_hw_desc->sg1u;
-			break;
-		/* for WXOR operations set multiplier
-		 * into destination cued address(es)
-		 */
-		case DMA_CDB_SG_DST1:
-			psgu = &dma_hw_desc->sg2u;
-			break;
-		case DMA_CDB_SG_DST2:
-			psgu = &dma_hw_desc->sg3u;
-			break;
-		default:
-			BUG();
-		}
-
-		*psgu |= cpu_to_le32(mult_value << mult_index);
-		break;
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		break;
-	default:
-		BUG();
-	}
-}
-
-/**
- * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
- */
-static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan,
-				dma_addr_t addrh, dma_addr_t addrl,
-				u32 dst_idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-	phys_addr_t addr64, tmphi, tmplow;
-	u32 *psgu, *psgl;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		if (!addrh) {
-			addr64 = addrl;
-			tmphi = (addr64 >> 32);
-			tmplow = (addr64 & 0xFFFFFFFF);
-		} else {
-			tmphi = addrh;
-			tmplow = addrl;
-		}
-		dma_hw_desc = desc->hw_desc;
-
-		psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
-		psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
-
-		*psgl = cpu_to_le32((u32)tmplow);
-		*psgu |= cpu_to_le32((u32)tmphi);
-		break;
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		xor_hw_desc->cbtal = addrl;
-		xor_hw_desc->cbtah |= addrh;
-		break;
-	}
-}
-
-/**
- * ppc440spe_desc_set_byte_count - set number of data bytes involved
- * into the operation
- */
-static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan,
-				u32 byte_count)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-		dma_hw_desc->cnt = cpu_to_le32(byte_count);
-		break;
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		xor_hw_desc->cbbc = byte_count;
-		break;
-	}
-}
-
-/**
- * ppc440spe_desc_set_rxor_block_size - set RXOR block size
- */
-static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
-{
-	/* assume that byte_count is aligned on the 512-boundary;
-	 * thus write it directly to the register (bits 23:31 are
-	 * reserved there).
-	 */
-	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
-}
-
-/**
- * ppc440spe_desc_set_dcheck - set CHECK pattern
- */
-static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, u8 *qword)
-{
-	struct dma_cdb *dma_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-		iowrite32(qword[0], &dma_hw_desc->sg3l);
-		iowrite32(qword[4], &dma_hw_desc->sg3u);
-		iowrite32(qword[8], &dma_hw_desc->sg2l);
-		iowrite32(qword[12], &dma_hw_desc->sg2u);
-		break;
-	default:
-		BUG();
-	}
-}
-
-/**
- * ppc440spe_xor_set_link - set link address in xor CB
- */
-static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
-				struct ppc440spe_adma_desc_slot *next_desc)
-{
-	struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
-
-	if (unlikely(!next_desc || !(next_desc->phys))) {
-		printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
-			__func__, next_desc,
-			next_desc ? next_desc->phys : 0);
-		BUG();
-	}
-
-	xor_hw_desc->cbs = 0;
-	xor_hw_desc->cblal = next_desc->phys;
-	xor_hw_desc->cblah = 0;
-	xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
-}
-
-/**
- * ppc440spe_desc_set_link - set the address of descriptor following this
- * descriptor in chain
- */
-static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
-				struct ppc440spe_adma_desc_slot *prev_desc,
-				struct ppc440spe_adma_desc_slot *next_desc)
-{
-	unsigned long flags;
-	struct ppc440spe_adma_desc_slot *tail = next_desc;
-
-	if (unlikely(!prev_desc || !next_desc ||
-		(prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
-		/* If previous next is overwritten something is wrong.
-		 * though we may refetch from append to initiate list
-		 * processing; in this case - it's ok.
-		 */
-		printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
-			"prev->hw_next=0x%p\n", __func__, prev_desc,
-			next_desc, prev_desc ? prev_desc->hw_next : 0);
-		BUG();
-	}
-
-	local_irq_save(flags);
-
-	/* do s/w chaining both for DMA and XOR descriptors */
-	prev_desc->hw_next = next_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		break;
-	case PPC440SPE_XOR_ID:
-		/* bind descriptor to the chain */
-		while (tail->hw_next)
-			tail = tail->hw_next;
-		xor_last_linked = tail;
-
-		if (prev_desc == xor_last_submit)
-			/* do not link to the last submitted CB */
-			break;
-		ppc440spe_xor_set_link(prev_desc, next_desc);
-		break;
-	}
-
-	local_irq_restore(flags);
-}
-
-/**
- * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
- */
-static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, int src_idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-		/* May have 0, 1, 2, or 3 sources */
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DFILL128:
-			return 0;
-		case DMA_CDB_OPC_DCHECK128:
-			if (unlikely(src_idx)) {
-				printk(KERN_ERR "%s: try to get %d source for"
-				    " DCHECK128\n", __func__, src_idx);
-				BUG();
-			}
-			return le32_to_cpu(dma_hw_desc->sg1l);
-		case DMA_CDB_OPC_MULTICAST:
-		case DMA_CDB_OPC_MV_SG1_SG2:
-			if (unlikely(src_idx > 2)) {
-				printk(KERN_ERR "%s: try to get %d source from"
-				    " DMA descr\n", __func__, src_idx);
-				BUG();
-			}
-			if (src_idx) {
-				if (le32_to_cpu(dma_hw_desc->sg1u) &
-				    DMA_CUED_XOR_WIN_MSK) {
-					u8 region;
-
-					if (src_idx == 1)
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							desc->unmap_len;
-
-					region = (le32_to_cpu(
-					    dma_hw_desc->sg1u)) >>
-						DMA_CUED_REGION_OFF;
-
-					region &= DMA_CUED_REGION_MSK;
-					switch (region) {
-					case DMA_RXOR123:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len << 1);
-					case DMA_RXOR124:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len * 3);
-					case DMA_RXOR125:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len << 2);
-					default:
-						printk(KERN_ERR
-						    "%s: try to"
-						    " get src3 for region %02x"
-						    "PPC440SPE_DESC_RXOR12?\n",
-						    __func__, region);
-						BUG();
-					}
-				} else {
-					printk(KERN_ERR
-						"%s: try to get %d"
-						" source for non-cued descr\n",
-						__func__, src_idx);
-					BUG();
-				}
-			}
-			return le32_to_cpu(dma_hw_desc->sg1l);
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-		return le32_to_cpu(dma_hw_desc->sg1l);
-	case PPC440SPE_XOR_ID:
-		/* May have up to 16 sources */
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->ops[src_idx].l;
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_dest_addr - extract the destination address from the
- * descriptor
- */
-static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, int idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		if (likely(!idx))
-			return le32_to_cpu(dma_hw_desc->sg2l);
-		return le32_to_cpu(dma_hw_desc->sg3l);
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->cbtal;
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_src_num - extract the number of source addresses from
- * the descriptor
- */
-static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DFILL128:
-			return 0;
-		case DMA_CDB_OPC_DCHECK128:
-			return 1;
-		case DMA_CDB_OPC_MV_SG1_SG2:
-		case DMA_CDB_OPC_MULTICAST:
-			/*
-			 * Only for RXOR operations we have more than
-			 * one source
-			 */
-			if (le32_to_cpu(dma_hw_desc->sg1u) &
-			    DMA_CUED_XOR_WIN_MSK) {
-				/* RXOR op, there are 2 or 3 sources */
-				if (((le32_to_cpu(dma_hw_desc->sg1u) >>
-				    DMA_CUED_REGION_OFF) &
-				      DMA_CUED_REGION_MSK) == DMA_RXOR12) {
-					/* RXOR 1-2 */
-					return 2;
-				} else {
-					/* RXOR 1-2-3/1-2-4/1-2-5 */
-					return 3;
-				}
-			}
-			return 1;
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-	case PPC440SPE_XOR_ID:
-		/* up to 16 sources */
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_dst_num - get the number of destination addresses in
- * this descriptor
- */
-static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan)
-{
-	struct dma_cdb *dma_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* May be 1 or 2 destinations */
-		dma_hw_desc = desc->hw_desc;
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DCHECK128:
-			return 0;
-		case DMA_CDB_OPC_MV_SG1_SG2:
-		case DMA_CDB_OPC_DFILL128:
-			return 1;
-		case DMA_CDB_OPC_MULTICAST:
-			if (desc->dst_cnt == 2)
-				return 2;
-			else
-				return 1;
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-	case PPC440SPE_XOR_ID:
-		/* Always only 1 destination */
-		return 1;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/**
  * ppc440spe_desc_get_link - get the address of the descriptor that
  * follows this one
  */
@@ -1055,643 +176,23 @@ static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt,
 	return slot_cnt;
 }
 
-/**
- * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
- * DMA2 PQ operation
- */
-static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs,
-		int src_cnt, size_t len)
-{
-	signed long long order = 0;
-	int state = 0;
-	int addr_count = 0;
-	int i;
-	for (i = 1; i < src_cnt; i++) {
-		dma_addr_t cur_addr = srcs[i];
-		dma_addr_t old_addr = srcs[i-1];
-		switch (state) {
-		case 0:
-			if (cur_addr == old_addr + len) {
-				/* direct RXOR */
-				order = 1;
-				state = 1;
-				if (i == src_cnt-1)
-					addr_count++;
-			} else if (old_addr == cur_addr + len) {
-				/* reverse RXOR */
-				order = -1;
-				state = 1;
-				if (i == src_cnt-1)
-					addr_count++;
-			} else {
-				state = 3;
-			}
-			break;
-		case 1:
-			if (i == src_cnt-2 || (order == -1
-				&& cur_addr != old_addr - len)) {
-				order = 0;
-				state = 0;
-				addr_count++;
-			} else if (cur_addr == old_addr + len*order) {
-				state = 2;
-				if (i == src_cnt-1)
-					addr_count++;
-			} else if (cur_addr == old_addr + 2*len) {
-				state = 2;
-				if (i == src_cnt-1)
-					addr_count++;
-			} else if (cur_addr == old_addr + 3*len) {
-				state = 2;
-				if (i == src_cnt-1)
-					addr_count++;
-			} else {
-				order = 0;
-				state = 0;
-				addr_count++;
-			}
-			break;
-		case 2:
-			order = 0;
-			state = 0;
-			addr_count++;
-				break;
-		}
-		if (state == 3)
-			break;
-	}
-	if (src_cnt <= 1 || (state != 1 && state != 2)) {
-		pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
-			__func__, src_cnt, state, addr_count, order);
-		for (i = 0; i < src_cnt; i++)
-			pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
-		BUG();
-	}
-
-	return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
-}
-
-
 /******************************************************************************
  * ADMA channel low-level routines
  ******************************************************************************/
 
-static u32
-ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan);
-static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan);
-
-/**
- * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
- */
-static void ppc440spe_adma_device_clear_eot_status(
-					struct ppc440spe_adma_chan *chan)
-{
-	struct dma_regs *dma_reg;
-	struct xor_regs *xor_reg;
-	u8 *p = chan->device->dma_desc_pool_virt;
-	struct dma_cdb *cdb;
-	u32 rv, i;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* read FIFO to ack */
-		dma_reg = chan->device->dma_reg;
-		while ((rv = ioread32(&dma_reg->csfpl))) {
-			i = rv & DMA_CDB_ADDR_MSK;
-			cdb = (struct dma_cdb *)&p[i -
-			    (u32)chan->device->dma_desc_pool];
-
-			/* Clear opcode to ack. This is necessary for
-			 * ZeroSum operations only
-			 */
-			cdb->opc = 0;
-
-			if (test_bit(PPC440SPE_RXOR_RUN,
-			    &ppc440spe_rxor_state)) {
-				/* probably this is a completed RXOR op,
-				 * get pointer to CDB using the fact that
-				 * physical and virtual addresses of CDB
-				 * in pools have the same offsets
-				 */
-				if (le32_to_cpu(cdb->sg1u) &
-				    DMA_CUED_XOR_BASE) {
-					/* this is a RXOR */
-					clear_bit(PPC440SPE_RXOR_RUN,
-						  &ppc440spe_rxor_state);
-				}
-			}
-
-			if (rv & DMA_CDB_STATUS_MSK) {
-				/* ZeroSum check failed
-				 */
-				struct ppc440spe_adma_desc_slot *iter;
-				dma_addr_t phys = rv & ~DMA_CDB_MSK;
-
-				/*
-				 * Update the status of corresponding
-				 * descriptor.
-				 */
-				list_for_each_entry(iter, &chan->chain,
-				    chain_node) {
-					if (iter->phys == phys)
-						break;
-				}
-				/*
-				 * if cannot find the corresponding
-				 * slot it's a bug
-				 */
-				BUG_ON(&iter->chain_node == &chan->chain);
-
-				if (iter->xor_check_result) {
-					if (test_bit(PPC440SPE_DESC_PCHECK,
-						     &iter->flags)) {
-						*iter->xor_check_result |=
-							SUM_CHECK_P_RESULT;
-					} else
-					if (test_bit(PPC440SPE_DESC_QCHECK,
-						     &iter->flags)) {
-						*iter->xor_check_result |=
-							SUM_CHECK_Q_RESULT;
-					} else
-						BUG();
-				}
-			}
-		}
-
-		rv = ioread32(&dma_reg->dsts);
-		if (rv) {
-			pr_err("DMA%d err status: 0x%x\n",
-			       chan->device->id, rv);
-			/* write back to clear */
-			iowrite32(rv, &dma_reg->dsts);
-		}
-		break;
-	case PPC440SPE_XOR_ID:
-		/* reset status bits to ack */
-		xor_reg = chan->device->xor_reg;
-		rv = ioread32be(&xor_reg->sr);
-		iowrite32be(rv, &xor_reg->sr);
-
-		if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) {
-			if (rv & XOR_IE_RPTIE_BIT) {
-				/* Read PLB Timeout Error.
-				 * Try to resubmit the CB
-				 */
-				u32 val = ioread32be(&xor_reg->ccbalr);
-
-				iowrite32be(val, &xor_reg->cblalr);
-
-				val = ioread32be(&xor_reg->crsr);
-				iowrite32be(val | XOR_CRSR_XAE_BIT,
-					    &xor_reg->crsr);
-			} else
-				pr_err("XOR ERR 0x%x status\n", rv);
-			break;
-		}
-
-		/*  if the XORcore is idle, but there are unprocessed CBs
-		 * then refetch the s/w chain here
-		 */
-		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
-		    do_xor_refetch)
-			ppc440spe_chan_append(chan);
-		break;
-	}
-}
-
-/**
- * ppc440spe_chan_is_busy - get the channel status
- */
-static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
-{
-	struct dma_regs *dma_reg;
-	struct xor_regs *xor_reg;
-	int busy = 0;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_reg = chan->device->dma_reg;
-		/*  if command FIFO's head and tail pointers are equal and
-		 * status tail is the same as command, then channel is free
-		 */
-		if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
-		    ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
-			busy = 1;
-		break;
-	case PPC440SPE_XOR_ID:
-		/* use the special status bit for the XORcore
-		 */
-		xor_reg = chan->device->xor_reg;
-		busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
-		break;
-	}
-
-	return busy;
-}
-
-/**
- * ppc440spe_chan_set_first_xor_descriptor -  init XORcore chain
- */
-static void ppc440spe_chan_set_first_xor_descriptor(
-				struct ppc440spe_adma_chan *chan,
-				struct ppc440spe_adma_desc_slot *next_desc)
-{
-	struct xor_regs *xor_reg = chan->device->xor_reg;
-
-	if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
-		printk(KERN_INFO "%s: Warn: XORcore is running "
-			"when try to set the first CDB!\n",
-			__func__);
-
-	xor_last_submit = xor_last_linked = next_desc;
-
-	iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
-
-	iowrite32be(next_desc->phys, &xor_reg->cblalr);
-	iowrite32be(0, &xor_reg->cblahr);
-	iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
-		    &xor_reg->cbcr);
-
-	chan->hw_chain_inited = 1;
-}
-
-/**
- * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
- * called with irqs disabled
- */
-static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
-		struct ppc440spe_adma_desc_slot *desc)
-{
-	u32 pcdb;
-	struct dma_regs *dma_reg = chan->device->dma_reg;
-
-	pcdb = desc->phys;
-	if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
-		pcdb |= DMA_CDB_NO_INT;
-
-	chan_last_sub[chan->device->id] = desc;
-
-	ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
-
-	iowrite32(pcdb, &dma_reg->cpfpl);
-}
-
-/**
- * ppc440spe_chan_append - update the h/w chain in the channel
- */
-static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
-{
-	struct xor_regs *xor_reg;
-	struct ppc440spe_adma_desc_slot *iter;
-	struct xor_cb *xcb;
-	u32 cur_desc;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		cur_desc = ppc440spe_chan_get_current_descriptor(chan);
-
-		if (likely(cur_desc)) {
-			iter = chan_last_sub[chan->device->id];
-			BUG_ON(!iter);
-		} else {
-			/* first peer */
-			iter = chan_first_cdb[chan->device->id];
-			BUG_ON(!iter);
-			ppc440spe_dma_put_desc(chan, iter);
-			chan->hw_chain_inited = 1;
-		}
-
-		/* is there something new to append */
-		if (!iter->hw_next)
-			break;
-
-		/* flush descriptors from the s/w queue to fifo */
-		list_for_each_entry_continue(iter, &chan->chain, chain_node) {
-			ppc440spe_dma_put_desc(chan, iter);
-			if (!iter->hw_next)
-				break;
-		}
-		break;
-	case PPC440SPE_XOR_ID:
-		/* update h/w links and refetch */
-		if (!xor_last_submit->hw_next)
-			break;
-
-		xor_reg = chan->device->xor_reg;
-		/* the last linked CDB has to generate an interrupt
-		 * that we'd be able to append the next lists to h/w
-		 * regardless of the XOR engine state at the moment of
-		 * appending of these next lists
-		 */
-		xcb = xor_last_linked->hw_desc;
-		xcb->cbc |= XOR_CBCR_CBCE_BIT;
-
-		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
-			/* XORcore is idle. Refetch now */
-			do_xor_refetch = 0;
-			ppc440spe_xor_set_link(xor_last_submit,
-				xor_last_submit->hw_next);
-
-			ADMA_LL_DBG(print_cb_list(chan,
-				xor_last_submit->hw_next));
-
-			xor_last_submit = xor_last_linked;
-			iowrite32be(ioread32be(&xor_reg->crsr) |
-				    XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
-				    &xor_reg->crsr);
-		} else {
-			/* XORcore is running. Refetch later in the handler */
-			do_xor_refetch = 1;
-		}
-
-		break;
-	}
-
-	local_irq_restore(flags);
-}
-
-/**
- * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
- */
-static u32
-ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan)
-{
-	struct dma_regs *dma_reg;
-	struct xor_regs *xor_reg;
-
-	if (unlikely(!chan->hw_chain_inited))
-		/* h/w descriptor chain is not initialized yet */
-		return 0;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_reg = chan->device->dma_reg;
-		return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
-	case PPC440SPE_XOR_ID:
-		xor_reg = chan->device->xor_reg;
-		return ioread32be(&xor_reg->ccbalr);
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_chan_run - enable the channel
- */
-static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
-{
-	struct xor_regs *xor_reg;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* DMAs are always enabled, do nothing */
-		break;
-	case PPC440SPE_XOR_ID:
-		/* drain write buffer */
-		xor_reg = chan->device->xor_reg;
-
-		/* fetch descriptor pointed to in <link> */
-		iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
-			    &xor_reg->crsr);
-		break;
-	}
-}
 
 /******************************************************************************
  * ADMA device level
  ******************************************************************************/
 
-static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
 static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan);
 
-static dma_cookie_t
-ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
-
-static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx,
-				    dma_addr_t addr, int index);
-static void
-ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx,
-				  dma_addr_t addr, int index);
-
-static void
-ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx,
-			   dma_addr_t *paddr, unsigned long flags);
-static void
-ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx,
-			  dma_addr_t addr, int index);
-static void
-ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx,
-			       unsigned char mult, int index, int dst_pos);
-static void
-ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx,
-				   dma_addr_t paddr, dma_addr_t qaddr);
-
-static struct page *ppc440spe_rxor_srcs[32];
-
-/**
- * ppc440spe_can_rxor - check if the operands may be processed with RXOR
- */
-static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
-{
-	int i, order = 0, state = 0;
-	int idx = 0;
-
-	if (unlikely(!(src_cnt > 1)))
-		return 0;
-
-	BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
-
-	/* Skip holes in the source list before checking */
-	for (i = 0; i < src_cnt; i++) {
-		if (!srcs[i])
-			continue;
-		ppc440spe_rxor_srcs[idx++] = srcs[i];
-	}
-	src_cnt = idx;
-
-	for (i = 1; i < src_cnt; i++) {
-		char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
-		char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
-
-		switch (state) {
-		case 0:
-			if (cur_addr == old_addr + len) {
-				/* direct RXOR */
-				order = 1;
-				state = 1;
-			} else if (old_addr == cur_addr + len) {
-				/* reverse RXOR */
-				order = -1;
-				state = 1;
-			} else
-				goto out;
-			break;
-		case 1:
-			if ((i == src_cnt - 2) ||
-			    (order == -1 && cur_addr != old_addr - len)) {
-				order = 0;
-				state = 0;
-			} else if ((cur_addr == old_addr + len * order) ||
-				   (cur_addr == old_addr + 2 * len) ||
-				   (cur_addr == old_addr + 3 * len)) {
-				state = 2;
-			} else {
-				order = 0;
-				state = 0;
-			}
-			break;
-		case 2:
-			order = 0;
-			state = 0;
-			break;
-		}
-	}
-
-out:
-	if (state == 1 || state == 2)
-		return 1;
-
-	return 0;
-}
-
-/**
- * ppc440spe_adma_device_estimate - estimate the efficiency of processing
- *	the operation given on this channel. It's assumed that 'chan' is
- *	capable to process 'cap' type of operation.
- * @chan: channel to use
- * @cap: type of transaction
- * @dst_lst: array of destination pointers
- * @dst_cnt: number of destination operands
- * @src_lst: array of source pointers
- * @src_cnt: number of source operands
- * @src_sz: size of each source operand
- */
-static int ppc440spe_adma_estimate(struct dma_chan *chan,
-	enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt,
-	struct page **src_lst, int src_cnt, size_t src_sz)
-{
-	int ef = 1;
-
-	if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
-		/* If RAID-6 capabilities were not activated don't try
-		 * to use them
-		 */
-		if (unlikely(!ppc440spe_r6_enabled))
-			return -1;
-	}
-	/*  In the current implementation of ppc440spe ADMA driver it
-	 * makes sense to pick out only pq case, because it may be
-	 * processed:
-	 * (1) either using Biskup method on DMA2;
-	 * (2) or on DMA0/1.
-	 *  Thus we give a favour to (1) if the sources are suitable;
-	 * else let it be processed on one of the DMA0/1 engines.
-	 *  In the sum_product case where destination is also the
-	 * source process it on DMA0/1 only.
-	 */
-	if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
-
-		if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
-			ef = 0; /* sum_product case, process on DMA0/1 */
-		else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
-			ef = 3; /* override (DMA0/1 + idle) */
-		else
-			ef = 0; /* can't process on DMA2 if !rxor */
-	}
-
-	/* channel idleness increases the priority */
-	if (likely(ef) &&
-	    !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
-		ef++;
-
-	return ef;
-}
-
-struct dma_chan *
-ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
-	struct page **dst_lst, int dst_cnt, struct page **src_lst,
-	int src_cnt, size_t src_sz)
-{
-	struct dma_chan *best_chan = NULL;
-	struct ppc_dma_chan_ref *ref;
-	int best_rank = -1;
-
-	if (unlikely(!src_sz))
-		return NULL;
-	if (src_sz > PAGE_SIZE) {
-		/*
-		 * should a user of the api ever pass > PAGE_SIZE requests
-		 * we sort out cases where temporary page-sized buffers
-		 * are used.
-		 */
-		switch (cap) {
-		case DMA_PQ:
-			if (src_cnt == 1 && dst_lst[1] == src_lst[0])
-				return NULL;
-			if (src_cnt == 2 && dst_lst[1] == src_lst[1])
-				return NULL;
-			break;
-		case DMA_PQ_VAL:
-		case DMA_XOR_VAL:
-			return NULL;
-		default:
-			break;
-		}
-	}
-
-	list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
-		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
-			int rank;
-
-			rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
-					dst_cnt, src_lst, src_cnt, src_sz);
-			if (rank > best_rank) {
-				best_rank = rank;
-				best_chan = ref->chan;
-			}
-		}
-	}
-
-	return best_chan;
-}
-EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
-
-/**
- * ppc440spe_get_group_entry - get group entry with index idx
- * @tdesc: is the last allocated slot in the group.
- */
-static struct ppc440spe_adma_desc_slot *
-ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
-{
-	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
-	int i = 0;
-
-	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
-		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
-			__func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
-		BUG();
-	}
-
-	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
-		if (i++ == entry_idx)
-			break;
-	}
-	return iter;
-}
-
 /**
  * ppc440spe_adma_free_slots - flags descriptor slots for reuse
  * @slot: Slot to free
  * Caller must hold &ppc440spe_chan->lock while calling this function
  */
-static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
+void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
 				      struct ppc440spe_adma_chan *chan)
 {
 	int stride = slot->slots_per_op;
@@ -1793,48 +294,6 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
 }
 
 /**
- * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
- */
-static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
-		struct ppc440spe_adma_chan *chan)
-{
-	/* the client is allowed to attach dependent operations
-	 * until 'ack' is set
-	 */
-	if (!async_tx_test_ack(&desc->async_tx))
-		return 0;
-
-	/* leave the last descriptor in the chain
-	 * so we can append to it
-	 */
-	if (list_is_last(&desc->chain_node, &chan->chain) ||
-	    desc->phys == ppc440spe_chan_get_current_descriptor(chan))
-		return 1;
-
-	if (chan->device->id != PPC440SPE_XOR_ID) {
-		/* our DMA interrupt handler clears opc field of
-		 * each processed descriptor. For all types of
-		 * operations except for ZeroSum we do not actually
-		 * need ack from the interrupt handler. ZeroSum is a
-		 * special case since the result of this operation
-		 * is available from the handler only, so if we see
-		 * such type of descriptor (which is unprocessed yet)
-		 * then leave it in chain.
-		 */
-		struct dma_cdb *cdb = desc->hw_desc;
-		if (cdb->opc == DMA_CDB_OPC_DCHECK128)
-			return 1;
-	}
-
-	dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
-		desc->phys, desc->idx, desc->slots_per_op);
-
-	list_del(&desc->chain_node);
-	ppc440spe_adma_free_slots(desc, chan);
-	return 0;
-}
-
-/**
  * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine
  *	which runs through the channel CDBs list until reach the descriptor
  *	currently processed. When routine determines that all CDBs of group
@@ -1991,7 +450,7 @@ static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
 /**
  * ppc440spe_adma_alloc_slots - allocate free slots (if any)
  */
-static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
+struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
 		struct ppc440spe_adma_chan *chan, int num_slots,
 		int slots_per_op)
 {
@@ -2166,40 +625,6 @@ static dma_cookie_t ppc440spe_desc_assign_cookie(
 }
 
 /**
- * ppc440spe_rxor_set_region_data -
- */
-static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
-	u8 xor_arg_no, u32 mask)
-{
-	struct xor_cb *xcb = desc->hw_desc;
-
-	xcb->ops[xor_arg_no].h |= mask;
-}
-
-/**
- * ppc440spe_rxor_set_src -
- */
-static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
-	u8 xor_arg_no, dma_addr_t addr)
-{
-	struct xor_cb *xcb = desc->hw_desc;
-
-	xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
-	xcb->ops[xor_arg_no].l = addr;
-}
-
-/**
- * ppc440spe_rxor_set_mult -
- */
-static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
-	u8 xor_arg_no, u8 idx, u8 mult)
-{
-	struct xor_cb *xcb = desc->hw_desc;
-
-	xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
-}
-
-/**
  * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold
  *	has been achieved
  */
@@ -2219,7 +644,7 @@ static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan)
  *	(it's not necessary that descriptors will be submitted to the h/w
  *	chains too right now)
  */
-static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct ppc440spe_adma_desc_slot *sw_desc;
 	struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan);
@@ -2424,1478 +849,6 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor(
 	return sw_desc ? &sw_desc->async_tx : NULL;
 }
 
-static inline void
-ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc,
-				int src_cnt);
-static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor);
-
-/**
- * ppc440spe_adma_init_dma2rxor_slot -
- */
-static void ppc440spe_adma_init_dma2rxor_slot(
-		struct ppc440spe_adma_desc_slot *desc,
-		dma_addr_t *src, int src_cnt)
-{
-	int i;
-
-	/* initialize CDB */
-	for (i = 0; i < src_cnt; i++) {
-		ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
-						 desc->src_cnt, (u32)src[i]);
-	}
-}
-
-/**
- * ppc440spe_dma01_prep_mult -
- * for Q operation where destination is also the source
- */
-static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult(
-		struct ppc440spe_adma_chan *ppc440spe_chan,
-		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
-		const unsigned char *scf, size_t len, unsigned long flags)
-{
-	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
-	unsigned long op = 0;
-	int slot_cnt;
-
-	set_bit(PPC440SPE_DESC_WXOR, &op);
-	slot_cnt = 2;
-
-	spin_lock_bh(&ppc440spe_chan->lock);
-
-	/* use WXOR, each descriptor occupies one slot */
-	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
-	if (sw_desc) {
-		struct ppc440spe_adma_chan *chan;
-		struct ppc440spe_adma_desc_slot *iter;
-		struct dma_cdb *hw_desc;
-
-		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-		set_bits(op, &sw_desc->flags);
-		sw_desc->src_cnt = src_cnt;
-		sw_desc->dst_cnt = dst_cnt;
-		/* First descriptor, zero data in the destination and copy it
-		 * to q page using MULTICAST transfer.
-		 */
-		iter = list_first_entry(&sw_desc->group_list,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-		/* set 'next' pointer */
-		iter->hw_next = list_entry(iter->chain_node.next,
-					   struct ppc440spe_adma_desc_slot,
-					   chain_node);
-		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-		hw_desc = iter->hw_desc;
-		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
-
-		ppc440spe_desc_set_dest_addr(iter, chan,
-					     DMA_CUED_XOR_BASE, dst[0], 0);
-		ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
-		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
-					    src[0]);
-		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
-		iter->unmap_len = len;
-
-		/*
-		 * Second descriptor, multiply data from the q page
-		 * and store the result in real destination.
-		 */
-		iter = list_first_entry(&iter->chain_node,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-		iter->hw_next = NULL;
-		if (flags & DMA_PREP_INTERRUPT)
-			set_bit(PPC440SPE_DESC_INT, &iter->flags);
-		else
-			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-
-		hw_desc = iter->hw_desc;
-		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-		ppc440spe_desc_set_src_addr(iter, chan, 0,
-					    DMA_CUED_XOR_HB, dst[1]);
-		ppc440spe_desc_set_dest_addr(iter, chan,
-					     DMA_CUED_XOR_BASE, dst[0], 0);
-
-		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
-					    DMA_CDB_SG_DST1, scf[0]);
-		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
-		iter->unmap_len = len;
-		sw_desc->async_tx.flags = flags;
-	}
-
-	spin_unlock_bh(&ppc440spe_chan->lock);
-
-	return sw_desc;
-}
-
-/**
- * ppc440spe_dma01_prep_sum_product -
- * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
- * the source.
- */
-static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product(
-		struct ppc440spe_adma_chan *ppc440spe_chan,
-		dma_addr_t *dst, dma_addr_t *src, int src_cnt,
-		const unsigned char *scf, size_t len, unsigned long flags)
-{
-	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
-	unsigned long op = 0;
-	int slot_cnt;
-
-	set_bit(PPC440SPE_DESC_WXOR, &op);
-	slot_cnt = 3;
-
-	spin_lock_bh(&ppc440spe_chan->lock);
-
-	/* WXOR, each descriptor occupies one slot */
-	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
-	if (sw_desc) {
-		struct ppc440spe_adma_chan *chan;
-		struct ppc440spe_adma_desc_slot *iter;
-		struct dma_cdb *hw_desc;
-
-		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-		set_bits(op, &sw_desc->flags);
-		sw_desc->src_cnt = src_cnt;
-		sw_desc->dst_cnt = 1;
-		/* 1st descriptor, src[1] data to q page and zero destination */
-		iter = list_first_entry(&sw_desc->group_list,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-		iter->hw_next = list_entry(iter->chain_node.next,
-					   struct ppc440spe_adma_desc_slot,
-					   chain_node);
-		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-		hw_desc = iter->hw_desc;
-		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
-
-		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
-					     *dst, 0);
-		ppc440spe_desc_set_dest_addr(iter, chan, 0,
-					     ppc440spe_chan->qdest, 1);
-		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
-					    src[1]);
-		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
-		iter->unmap_len = len;
-
-		/* 2nd descriptor, multiply src[1] data and store the
-		 * result in destination */
-		iter = list_first_entry(&iter->chain_node,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-		/* set 'next' pointer */
-		iter->hw_next = list_entry(iter->chain_node.next,
-					   struct ppc440spe_adma_desc_slot,
-					   chain_node);
-		if (flags & DMA_PREP_INTERRUPT)
-			set_bit(PPC440SPE_DESC_INT, &iter->flags);
-		else
-			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-
-		hw_desc = iter->hw_desc;
-		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
-					    ppc440spe_chan->qdest);
-		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
-					     *dst, 0);
-		ppc440spe_desc_set_src_mult(iter, chan,	DMA_CUED_MULT1_OFF,
-					    DMA_CDB_SG_DST1, scf[1]);
-		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
-		iter->unmap_len = len;
-
-		/*
-		 * 3rd descriptor, multiply src[0] data and xor it
-		 * with destination
-		 */
-		iter = list_first_entry(&iter->chain_node,
-					struct ppc440spe_adma_desc_slot,
-					chain_node);
-		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-		iter->hw_next = NULL;
-		if (flags & DMA_PREP_INTERRUPT)
-			set_bit(PPC440SPE_DESC_INT, &iter->flags);
-		else
-			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
-
-		hw_desc = iter->hw_desc;
-		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
-					    src[0]);
-		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
-					     *dst, 0);
-		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
-					    DMA_CDB_SG_DST1, scf[0]);
-		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
-		iter->unmap_len = len;
-		sw_desc->async_tx.flags = flags;
-	}
-
-	spin_unlock_bh(&ppc440spe_chan->lock);
-
-	return sw_desc;
-}
-
-static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(
-		struct ppc440spe_adma_chan *ppc440spe_chan,
-		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
-		const unsigned char *scf, size_t len, unsigned long flags)
-{
-	int slot_cnt;
-	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
-	unsigned long op = 0;
-	unsigned char mult = 1;
-
-	pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
-		 __func__, dst_cnt, src_cnt, len);
-	/*  select operations WXOR/RXOR depending on the
-	 * source addresses of operators and the number
-	 * of destinations (RXOR support only Q-parity calculations)
-	 */
-	set_bit(PPC440SPE_DESC_WXOR, &op);
-	if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
-		/* no active RXOR;
-		 * do RXOR if:
-		 * - there are more than 1 source,
-		 * - len is aligned on 512-byte boundary,
-		 * - source addresses fit to one of 4 possible regions.
-		 */
-		if (src_cnt > 1 &&
-		    !(len & MQ0_CF2H_RXOR_BS_MASK) &&
-		    (src[0] + len) == src[1]) {
-			/* may do RXOR R1 R2 */
-			set_bit(PPC440SPE_DESC_RXOR, &op);
-			if (src_cnt != 2) {
-				/* may try to enhance region of RXOR */
-				if ((src[1] + len) == src[2]) {
-					/* do RXOR R1 R2 R3 */
-					set_bit(PPC440SPE_DESC_RXOR123,
-						&op);
-				} else if ((src[1] + len * 2) == src[2]) {
-					/* do RXOR R1 R2 R4 */
-					set_bit(PPC440SPE_DESC_RXOR124, &op);
-				} else if ((src[1] + len * 3) == src[2]) {
-					/* do RXOR R1 R2 R5 */
-					set_bit(PPC440SPE_DESC_RXOR125,
-						&op);
-				} else {
-					/* do RXOR R1 R2 */
-					set_bit(PPC440SPE_DESC_RXOR12,
-						&op);
-				}
-			} else {
-				/* do RXOR R1 R2 */
-				set_bit(PPC440SPE_DESC_RXOR12, &op);
-			}
-		}
-
-		if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
-			/* can not do this operation with RXOR */
-			clear_bit(PPC440SPE_RXOR_RUN,
-				&ppc440spe_rxor_state);
-		} else {
-			/* can do; set block size right now */
-			ppc440spe_desc_set_rxor_block_size(len);
-		}
-	}
-
-	/* Number of necessary slots depends on operation type selected */
-	if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
-		/*  This is a WXOR only chain. Need descriptors for each
-		 * source to GF-XOR them with WXOR, and need descriptors
-		 * for each destination to zero them with WXOR
-		 */
-		slot_cnt = src_cnt;
-
-		if (flags & DMA_PREP_ZERO_P) {
-			slot_cnt++;
-			set_bit(PPC440SPE_ZERO_P, &op);
-		}
-		if (flags & DMA_PREP_ZERO_Q) {
-			slot_cnt++;
-			set_bit(PPC440SPE_ZERO_Q, &op);
-		}
-	} else {
-		/*  Need 1/2 descriptor for RXOR operation, and
-		 * need (src_cnt - (2 or 3)) for WXOR of sources
-		 * remained (if any)
-		 */
-		slot_cnt = dst_cnt;
-
-		if (flags & DMA_PREP_ZERO_P)
-			set_bit(PPC440SPE_ZERO_P, &op);
-		if (flags & DMA_PREP_ZERO_Q)
-			set_bit(PPC440SPE_ZERO_Q, &op);
-
-		if (test_bit(PPC440SPE_DESC_RXOR12, &op))
-			slot_cnt += src_cnt - 2;
-		else
-			slot_cnt += src_cnt - 3;
-
-		/*  Thus we have either RXOR only chain or
-		 * mixed RXOR/WXOR
-		 */
-		if (slot_cnt == dst_cnt)
-			/* RXOR only chain */
-			clear_bit(PPC440SPE_DESC_WXOR, &op);
-	}
-
-	spin_lock_bh(&ppc440spe_chan->lock);
-	/* for both RXOR/WXOR each descriptor occupies one slot */
-	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
-	if (sw_desc) {
-		ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
-				flags, op);
-
-		/* setup dst/src/mult */
-		pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
-			 __func__, dst[0], dst[1]);
-		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
-		while (src_cnt--) {
-			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
-						  src_cnt);
-
-			/* NOTE: "Multi = 0 is equivalent to = 1" as it
-			 * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
-			 * doesn't work for RXOR with DMA0/1! Instead, multi=0
-			 * leads to zeroing source data after RXOR.
-			 * So, for P case set-up mult=1 explicitly.
-			 */
-			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-				mult = scf[src_cnt];
-			ppc440spe_adma_pq_set_src_mult(sw_desc,
-				mult, src_cnt,  dst_cnt - 1);
-		}
-
-		/* Setup byte count foreach slot just allocated */
-		sw_desc->async_tx.flags = flags;
-		list_for_each_entry(iter, &sw_desc->group_list,
-				chain_node) {
-			ppc440spe_desc_set_byte_count(iter,
-				ppc440spe_chan, len);
-			iter->unmap_len = len;
-		}
-	}
-	spin_unlock_bh(&ppc440spe_chan->lock);
-
-	return sw_desc;
-}
-
-static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq(
-		struct ppc440spe_adma_chan *ppc440spe_chan,
-		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
-		const unsigned char *scf, size_t len, unsigned long flags)
-{
-	int slot_cnt, descs_per_op;
-	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
-	unsigned long op = 0;
-	unsigned char mult = 1;
-
-	BUG_ON(!dst_cnt);
-	/*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
-		 __func__, dst_cnt, src_cnt, len);*/
-
-	spin_lock_bh(&ppc440spe_chan->lock);
-	descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
-	if (descs_per_op < 0) {
-		spin_unlock_bh(&ppc440spe_chan->lock);
-		return NULL;
-	}
-
-	/* depending on number of sources we have 1 or 2 RXOR chains */
-	slot_cnt = descs_per_op * dst_cnt;
-
-	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
-	if (sw_desc) {
-		op = slot_cnt;
-		sw_desc->async_tx.flags = flags;
-		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
-			ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
-				--op ? 0 : flags);
-			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
-				len);
-			iter->unmap_len = len;
-
-			ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
-			iter->rxor_cursor.len = len;
-			iter->descs_per_op = descs_per_op;
-		}
-		op = 0;
-		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
-			op++;
-			if (op % descs_per_op == 0)
-				ppc440spe_adma_init_dma2rxor_slot(iter, src,
-								  src_cnt);
-			if (likely(!list_is_last(&iter->chain_node,
-						 &sw_desc->group_list))) {
-				/* set 'next' pointer */
-				iter->hw_next =
-					list_entry(iter->chain_node.next,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-				ppc440spe_xor_set_link(iter, iter->hw_next);
-			} else {
-				/* this is the last descriptor. */
-				iter->hw_next = NULL;
-			}
-		}
-
-		/* fixup head descriptor */
-		sw_desc->dst_cnt = dst_cnt;
-		if (flags & DMA_PREP_ZERO_P)
-			set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
-		if (flags & DMA_PREP_ZERO_Q)
-			set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
-
-		/* setup dst/src/mult */
-		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
-
-		while (src_cnt--) {
-			/* handle descriptors (if dst_cnt == 2) inside
-			 * the ppc440spe_adma_pq_set_srcxxx() functions
-			 */
-			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
-						  src_cnt);
-			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-				mult = scf[src_cnt];
-			ppc440spe_adma_pq_set_src_mult(sw_desc,
-					mult, src_cnt, dst_cnt - 1);
-		}
-	}
-	spin_unlock_bh(&ppc440spe_chan->lock);
-	ppc440spe_desc_set_rxor_block_size(len);
-	return sw_desc;
-}
-
-/**
- * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
- */
-static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(
-		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
-		unsigned int src_cnt, const unsigned char *scf,
-		size_t len, unsigned long flags)
-{
-	struct ppc440spe_adma_chan *ppc440spe_chan;
-	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
-	int dst_cnt = 0;
-
-	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
-
-	ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
-				    dst, src, src_cnt));
-	BUG_ON(!len);
-	BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT));
-	BUG_ON(!src_cnt);
-
-	if (src_cnt == 1 && dst[1] == src[0]) {
-		dma_addr_t dest[2];
-
-		/* dst[1] is real destination (Q) */
-		dest[0] = dst[1];
-		/* this is the page to multicast source data to */
-		dest[1] = ppc440spe_chan->qdest;
-		sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
-				dest, 2, src, src_cnt, scf, len, flags);
-		return sw_desc ? &sw_desc->async_tx : NULL;
-	}
-
-	if (src_cnt == 2 && dst[1] == src[1]) {
-		sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
-					&dst[1], src, 2, scf, len, flags);
-		return sw_desc ? &sw_desc->async_tx : NULL;
-	}
-
-	if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
-		BUG_ON(!dst[0]);
-		dst_cnt++;
-		flags |= DMA_PREP_ZERO_P;
-	}
-
-	if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
-		BUG_ON(!dst[1]);
-		dst_cnt++;
-		flags |= DMA_PREP_ZERO_Q;
-	}
-
-	BUG_ON(!dst_cnt);
-
-	dev_dbg(ppc440spe_chan->device->common.dev,
-		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
-		ppc440spe_chan->device->id, __func__, src_cnt, len,
-		flags & DMA_PREP_INTERRUPT ? 1 : 0);
-
-	switch (ppc440spe_chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
-				dst, dst_cnt, src, src_cnt, scf,
-				len, flags);
-		break;
-
-	case PPC440SPE_XOR_ID:
-		sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
-				dst, dst_cnt, src, src_cnt, scf,
-				len, flags);
-		break;
-	}
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-/**
- * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
- * a PQ_ZERO_SUM operation
- */
-static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum(
-		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
-		unsigned int src_cnt, const unsigned char *scf, size_t len,
-		enum sum_check_flags *pqres, unsigned long flags)
-{
-	struct ppc440spe_adma_chan *ppc440spe_chan;
-	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
-	dma_addr_t pdest, qdest;
-	int slot_cnt, slots_per_op, idst, dst_cnt;
-
-	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
-
-	if (flags & DMA_PREP_PQ_DISABLE_P)
-		pdest = 0;
-	else
-		pdest = pq[0];
-
-	if (flags & DMA_PREP_PQ_DISABLE_Q)
-		qdest = 0;
-	else
-		qdest = pq[1];
-
-	ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
-					    src, src_cnt, scf));
-
-	/* Always use WXOR for P/Q calculations (two destinations).
-	 * Need 1 or 2 extra slots to verify results are zero.
-	 */
-	idst = dst_cnt = (pdest && qdest) ? 2 : 1;
-
-	/* One additional slot per destination to clone P/Q
-	 * before calculation (we have to preserve destinations).
-	 */
-	slot_cnt = src_cnt + dst_cnt * 2;
-	slots_per_op = 1;
-
-	spin_lock_bh(&ppc440spe_chan->lock);
-	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
-					     slots_per_op);
-	if (sw_desc) {
-		ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
-
-		/* Setup byte count for each slot just allocated */
-		sw_desc->async_tx.flags = flags;
-		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
-			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
-						      len);
-			iter->unmap_len = len;
-		}
-
-		if (pdest) {
-			struct dma_cdb *hw_desc;
-			struct ppc440spe_adma_chan *chan;
-
-			iter = sw_desc->group_head;
-			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
-			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-			iter->hw_next = list_entry(iter->chain_node.next,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			hw_desc = iter->hw_desc;
-			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-			iter->src_cnt = 0;
-			iter->dst_cnt = 0;
-			ppc440spe_desc_set_dest_addr(iter, chan, 0,
-						     ppc440spe_chan->pdest, 0);
-			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
-			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
-						      len);
-			iter->unmap_len = 0;
-			/* override pdest to preserve original P */
-			pdest = ppc440spe_chan->pdest;
-		}
-		if (qdest) {
-			struct dma_cdb *hw_desc;
-			struct ppc440spe_adma_chan *chan;
-
-			iter = list_first_entry(&sw_desc->group_list,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
-
-			if (pdest) {
-				iter = list_entry(iter->chain_node.next,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			}
-
-			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
-			iter->hw_next = list_entry(iter->chain_node.next,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			hw_desc = iter->hw_desc;
-			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
-			iter->src_cnt = 0;
-			iter->dst_cnt = 0;
-			ppc440spe_desc_set_dest_addr(iter, chan, 0,
-						     ppc440spe_chan->qdest, 0);
-			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
-			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
-						      len);
-			iter->unmap_len = 0;
-			/* override qdest to preserve original Q */
-			qdest = ppc440spe_chan->qdest;
-		}
-
-		/* Setup destinations for P/Q ops */
-		ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
-
-		/* Setup zero QWORDs into DCHECK CDBs */
-		idst = dst_cnt;
-		list_for_each_entry_reverse(iter, &sw_desc->group_list,
-					    chain_node) {
-			/*
-			 * The last CDB corresponds to Q-parity check,
-			 * the one before last CDB corresponds
-			 * P-parity check
-			 */
-			if (idst == DMA_DEST_MAX_NUM) {
-				if (idst == dst_cnt) {
-					set_bit(PPC440SPE_DESC_QCHECK,
-						&iter->flags);
-				} else {
-					set_bit(PPC440SPE_DESC_PCHECK,
-						&iter->flags);
-				}
-			} else {
-				if (qdest) {
-					set_bit(PPC440SPE_DESC_QCHECK,
-						&iter->flags);
-				} else {
-					set_bit(PPC440SPE_DESC_PCHECK,
-						&iter->flags);
-				}
-			}
-			iter->xor_check_result = pqres;
-
-			/*
-			 * set it to zero, if check fail then result will
-			 * be updated
-			 */
-			*iter->xor_check_result = 0;
-			ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
-				ppc440spe_qword);
-
-			if (!(--dst_cnt))
-				break;
-		}
-
-		/* Setup sources and mults for P/Q ops */
-		list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
-						     chain_node) {
-			struct ppc440spe_adma_chan *chan;
-			u32 mult_dst;
-
-			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
-			ppc440spe_desc_set_src_addr(iter, chan, 0,
-						    DMA_CUED_XOR_HB,
-						    src[src_cnt - 1]);
-			if (qdest) {
-				mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
-							   DMA_CDB_SG_DST1;
-				ppc440spe_desc_set_src_mult(iter, chan,
-							    DMA_CUED_MULT1_OFF,
-							    mult_dst,
-							    scf[src_cnt - 1]);
-			}
-			if (!(--src_cnt))
-				break;
-		}
-	}
-	spin_unlock_bh(&ppc440spe_chan->lock);
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-/**
- * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
- * XOR ZERO_SUM operation
- */
-static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum(
-		struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
-		size_t len, enum sum_check_flags *result, unsigned long flags)
-{
-	struct dma_async_tx_descriptor *tx;
-	dma_addr_t pq[2];
-
-	/* validate P, disable Q */
-	pq[0] = src[0];
-	pq[1] = 0;
-	flags |= DMA_PREP_PQ_DISABLE_Q;
-
-	tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
-						src_cnt - 1, 0, len,
-						result, flags);
-	return tx;
-}
-
-/**
- * ppc440spe_adma_set_dest - set destination address into descriptor
- */
-static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
-		dma_addr_t addr, int index)
-{
-	struct ppc440spe_adma_chan *chan;
-
-	BUG_ON(index >= sw_desc->dst_cnt);
-
-	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* to do: support transfers lengths >
-		 * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT
-		 */
-		ppc440spe_desc_set_dest_addr(sw_desc->group_head,
-			chan, 0, addr, index);
-		break;
-	case PPC440SPE_XOR_ID:
-		sw_desc = ppc440spe_get_group_entry(sw_desc, index);
-		ppc440spe_desc_set_dest_addr(sw_desc,
-			chan, 0, addr, index);
-		break;
-	}
-}
-
-static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
-		struct ppc440spe_adma_chan *chan, dma_addr_t addr)
-{
-	/*  To clear destinations update the descriptor
-	 * (P or Q depending on index) as follows:
-	 * addr is destination (0 corresponds to SG2):
-	 */
-	ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
-
-	/* ... and the addr is source: */
-	ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
-
-	/* addr is always SG2 then the mult is always DST1 */
-	ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
-				    DMA_CDB_SG_DST1, 1);
-}
-
-/**
- * ppc440spe_adma_pq_set_dest - set destination address into descriptor
- * for the PQXOR operation
- */
-static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
-		dma_addr_t *addrs, unsigned long flags)
-{
-	struct ppc440spe_adma_desc_slot *iter;
-	struct ppc440spe_adma_chan *chan;
-	dma_addr_t paddr, qaddr;
-	dma_addr_t addr = 0, ppath, qpath;
-	int index = 0, i;
-
-	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-
-	if (flags & DMA_PREP_PQ_DISABLE_P)
-		paddr = 0;
-	else
-		paddr = addrs[0];
-
-	if (flags & DMA_PREP_PQ_DISABLE_Q)
-		qaddr = 0;
-	else
-		qaddr = addrs[1];
-
-	if (!paddr || !qaddr)
-		addr = paddr ? paddr : qaddr;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* walk through the WXOR source list and set P/Q-destinations
-		 * for each slot:
-		 */
-		if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
-			/* This is WXOR-only chain; may have 1/2 zero descs */
-			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
-				index++;
-			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
-				index++;
-
-			iter = ppc440spe_get_group_entry(sw_desc, index);
-			if (addr) {
-				/* one destination */
-				list_for_each_entry_from(iter,
-					&sw_desc->group_list, chain_node)
-					ppc440spe_desc_set_dest_addr(iter, chan,
-						DMA_CUED_XOR_BASE, addr, 0);
-			} else {
-				/* two destinations */
-				list_for_each_entry_from(iter,
-					&sw_desc->group_list, chain_node) {
-					ppc440spe_desc_set_dest_addr(iter, chan,
-						DMA_CUED_XOR_BASE, paddr, 0);
-					ppc440spe_desc_set_dest_addr(iter, chan,
-						DMA_CUED_XOR_BASE, qaddr, 1);
-				}
-			}
-
-			if (index) {
-				/*  To clear destinations update the descriptor
-				 * (1st,2nd, or both depending on flags)
-				 */
-				index = 0;
-				if (test_bit(PPC440SPE_ZERO_P,
-						&sw_desc->flags)) {
-					iter = ppc440spe_get_group_entry(
-							sw_desc, index++);
-					ppc440spe_adma_pq_zero_op(iter, chan,
-							paddr);
-				}
-
-				if (test_bit(PPC440SPE_ZERO_Q,
-						&sw_desc->flags)) {
-					iter = ppc440spe_get_group_entry(
-							sw_desc, index++);
-					ppc440spe_adma_pq_zero_op(iter, chan,
-							qaddr);
-				}
-
-				return;
-			}
-		} else {
-			/* This is RXOR-only or RXOR/WXOR mixed chain */
-
-			/* If we want to include destination into calculations,
-			 * then make dest addresses cued with mult=1 (XOR).
-			 */
-			ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
-					DMA_CUED_XOR_HB :
-					DMA_CUED_XOR_BASE |
-						(1 << DMA_CUED_MULT1_OFF);
-			qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
-					DMA_CUED_XOR_HB :
-					DMA_CUED_XOR_BASE |
-						(1 << DMA_CUED_MULT1_OFF);
-
-			/* Setup destination(s) in RXOR slot(s) */
-			iter = ppc440spe_get_group_entry(sw_desc, index++);
-			ppc440spe_desc_set_dest_addr(iter, chan,
-						paddr ? ppath : qpath,
-						paddr ? paddr : qaddr, 0);
-			if (!addr) {
-				/* two destinations */
-				iter = ppc440spe_get_group_entry(sw_desc,
-								 index++);
-				ppc440spe_desc_set_dest_addr(iter, chan,
-						qpath, qaddr, 0);
-			}
-
-			if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
-				/* Setup destination(s) in remaining WXOR
-				 * slots
-				 */
-				iter = ppc440spe_get_group_entry(sw_desc,
-								 index);
-				if (addr) {
-					/* one destination */
-					list_for_each_entry_from(iter,
-					    &sw_desc->group_list,
-					    chain_node)
-						ppc440spe_desc_set_dest_addr(
-							iter, chan,
-							DMA_CUED_XOR_BASE,
-							addr, 0);
-
-				} else {
-					/* two destinations */
-					list_for_each_entry_from(iter,
-					    &sw_desc->group_list,
-					    chain_node) {
-						ppc440spe_desc_set_dest_addr(
-							iter, chan,
-							DMA_CUED_XOR_BASE,
-							paddr, 0);
-						ppc440spe_desc_set_dest_addr(
-							iter, chan,
-							DMA_CUED_XOR_BASE,
-							qaddr, 1);
-					}
-				}
-			}
-
-		}
-		break;
-
-	case PPC440SPE_XOR_ID:
-		/* DMA2 descriptors have only 1 destination, so there are
-		 * two chains - one for each dest.
-		 * If we want to include destination into calculations,
-		 * then make dest addresses cued with mult=1 (XOR).
-		 */
-		ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
-				DMA_CUED_XOR_HB :
-				DMA_CUED_XOR_BASE |
-					(1 << DMA_CUED_MULT1_OFF);
-
-		qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
-				DMA_CUED_XOR_HB :
-				DMA_CUED_XOR_BASE |
-					(1 << DMA_CUED_MULT1_OFF);
-
-		iter = ppc440spe_get_group_entry(sw_desc, 0);
-		for (i = 0; i < sw_desc->descs_per_op; i++) {
-			ppc440spe_desc_set_dest_addr(iter, chan,
-				paddr ? ppath : qpath,
-				paddr ? paddr : qaddr, 0);
-			iter = list_entry(iter->chain_node.next,
-					  struct ppc440spe_adma_desc_slot,
-					  chain_node);
-		}
-
-		if (!addr) {
-			/* Two destinations; setup Q here */
-			iter = ppc440spe_get_group_entry(sw_desc,
-				sw_desc->descs_per_op);
-			for (i = 0; i < sw_desc->descs_per_op; i++) {
-				ppc440spe_desc_set_dest_addr(iter,
-					chan, qpath, qaddr, 0);
-				iter = list_entry(iter->chain_node.next,
-						struct ppc440spe_adma_desc_slot,
-						chain_node);
-			}
-		}
-
-		break;
-	}
-}
-
-/**
- * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
- * for the PQ_ZERO_SUM operation
- */
-static void ppc440spe_adma_pqzero_sum_set_dest(
-		struct ppc440spe_adma_desc_slot *sw_desc,
-		dma_addr_t paddr, dma_addr_t qaddr)
-{
-	struct ppc440spe_adma_desc_slot *iter, *end;
-	struct ppc440spe_adma_chan *chan;
-	dma_addr_t addr = 0;
-	int idx;
-
-	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-
-	/* walk through the WXOR source list and set P/Q-destinations
-	 * for each slot
-	 */
-	idx = (paddr && qaddr) ? 2 : 1;
-	/* set end */
-	list_for_each_entry_reverse(end, &sw_desc->group_list,
-				    chain_node) {
-		if (!(--idx))
-			break;
-	}
-	/* set start */
-	idx = (paddr && qaddr) ? 2 : 1;
-	iter = ppc440spe_get_group_entry(sw_desc, idx);
-
-	if (paddr && qaddr) {
-		/* two destinations */
-		list_for_each_entry_from(iter, &sw_desc->group_list,
-					 chain_node) {
-			if (unlikely(iter == end))
-				break;
-			ppc440spe_desc_set_dest_addr(iter, chan,
-						DMA_CUED_XOR_BASE, paddr, 0);
-			ppc440spe_desc_set_dest_addr(iter, chan,
-						DMA_CUED_XOR_BASE, qaddr, 1);
-		}
-	} else {
-		/* one destination */
-		addr = paddr ? paddr : qaddr;
-		list_for_each_entry_from(iter, &sw_desc->group_list,
-					 chain_node) {
-			if (unlikely(iter == end))
-				break;
-			ppc440spe_desc_set_dest_addr(iter, chan,
-						DMA_CUED_XOR_BASE, addr, 0);
-		}
-	}
-
-	/*  The remaining descriptors are DATACHECK. These have no need in
-	 * destination. Actually, these destinations are used there
-	 * as sources for check operation. So, set addr as source.
-	 */
-	ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
-
-	if (!addr) {
-		end = list_entry(end->chain_node.next,
-				 struct ppc440spe_adma_desc_slot, chain_node);
-		ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
-	}
-}
-
-/**
- * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
- */
-static inline void ppc440spe_desc_set_xor_src_cnt(
-			struct ppc440spe_adma_desc_slot *desc,
-			int src_cnt)
-{
-	struct xor_cb *hw_desc = desc->hw_desc;
-
-	hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
-	hw_desc->cbc |= src_cnt;
-}
-
-/**
- * ppc440spe_adma_pq_set_src - set source address into descriptor
- */
-static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
-		dma_addr_t addr, int index)
-{
-	struct ppc440spe_adma_chan *chan;
-	dma_addr_t haddr = 0;
-	struct ppc440spe_adma_desc_slot *iter = NULL;
-
-	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
-		 */
-		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
-			/* RXOR-only or RXOR/WXOR operation */
-			int iskip = test_bit(PPC440SPE_DESC_RXOR12,
-				&sw_desc->flags) ?  2 : 3;
-
-			if (index == 0) {
-				/* 1st slot (RXOR) */
-				/* setup sources region (R1-2-3, R1-2-4,
-				 * or R1-2-5)
-				 */
-				if (test_bit(PPC440SPE_DESC_RXOR12,
-						&sw_desc->flags))
-					haddr = DMA_RXOR12 <<
-						DMA_CUED_REGION_OFF;
-				else if (test_bit(PPC440SPE_DESC_RXOR123,
-				    &sw_desc->flags))
-					haddr = DMA_RXOR123 <<
-						DMA_CUED_REGION_OFF;
-				else if (test_bit(PPC440SPE_DESC_RXOR124,
-				    &sw_desc->flags))
-					haddr = DMA_RXOR124 <<
-						DMA_CUED_REGION_OFF;
-				else if (test_bit(PPC440SPE_DESC_RXOR125,
-				    &sw_desc->flags))
-					haddr = DMA_RXOR125 <<
-						DMA_CUED_REGION_OFF;
-				else
-					BUG();
-				haddr |= DMA_CUED_XOR_BASE;
-				iter = ppc440spe_get_group_entry(sw_desc, 0);
-			} else if (index < iskip) {
-				/* 1st slot (RXOR)
-				 * shall actually set source address only once
-				 * instead of first <iskip>
-				 */
-				iter = NULL;
-			} else {
-				/* 2nd/3d and next slots (WXOR);
-				 * skip first slot with RXOR
-				 */
-				haddr = DMA_CUED_XOR_HB;
-				iter = ppc440spe_get_group_entry(sw_desc,
-				    index - iskip + sw_desc->dst_cnt);
-			}
-		} else {
-			int znum = 0;
-
-			/* WXOR-only operation; skip first slots with
-			 * zeroing destinations
-			 */
-			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
-				znum++;
-			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
-				znum++;
-
-			haddr = DMA_CUED_XOR_HB;
-			iter = ppc440spe_get_group_entry(sw_desc,
-					index + znum);
-		}
-
-		if (likely(iter)) {
-			ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
-
-			if (!index &&
-			    test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
-			    sw_desc->dst_cnt == 2) {
-				/* if we have two destinations for RXOR, then
-				 * setup source in the second descr too
-				 */
-				iter = ppc440spe_get_group_entry(sw_desc, 1);
-				ppc440spe_desc_set_src_addr(iter, chan, 0,
-					haddr, addr);
-			}
-		}
-		break;
-
-	case PPC440SPE_XOR_ID:
-		/* DMA2 may do Biskup */
-		iter = sw_desc->group_head;
-		if (iter->dst_cnt == 2) {
-			/* both P & Q calculations required; set P src here */
-			ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
-
-			/* this is for Q */
-			iter = ppc440spe_get_group_entry(sw_desc,
-				sw_desc->descs_per_op);
-		}
-		ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
-		break;
-	}
-}
-
-/**
- * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
- */
-static void ppc440spe_adma_memcpy_xor_set_src(
-		struct ppc440spe_adma_desc_slot *sw_desc,
-		dma_addr_t addr, int index)
-{
-	struct ppc440spe_adma_chan *chan;
-
-	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-	sw_desc = sw_desc->group_head;
-
-	if (likely(sw_desc))
-		ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
-}
-
-/**
- * ppc440spe_adma_dma2rxor_inc_addr  -
- */
-static void ppc440spe_adma_dma2rxor_inc_addr(
-		struct ppc440spe_adma_desc_slot *desc,
-		struct ppc440spe_rxor *cursor, int index, int src_cnt)
-{
-	cursor->addr_count++;
-	if (index == src_cnt - 1) {
-		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
-	} else if (cursor->addr_count == XOR_MAX_OPS) {
-		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
-		cursor->addr_count = 0;
-		cursor->desc_count++;
-	}
-}
-
-/**
- * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
- */
-static int ppc440spe_adma_dma2rxor_prep_src(
-		struct ppc440spe_adma_desc_slot *hdesc,
-		struct ppc440spe_rxor *cursor, int index,
-		int src_cnt, u32 addr)
-{
-	int rval = 0;
-	u32 sign;
-	struct ppc440spe_adma_desc_slot *desc = hdesc;
-	int i;
-
-	for (i = 0; i < cursor->desc_count; i++) {
-		desc = list_entry(hdesc->chain_node.next,
-				  struct ppc440spe_adma_desc_slot,
-				  chain_node);
-	}
-
-	switch (cursor->state) {
-	case 0:
-		if (addr == cursor->addrl + cursor->len) {
-			/* direct RXOR */
-			cursor->state = 1;
-			cursor->xor_count++;
-			if (index == src_cnt-1) {
-				ppc440spe_rxor_set_region(desc,
-					cursor->addr_count,
-					DMA_RXOR12 << DMA_CUED_REGION_OFF);
-				ppc440spe_adma_dma2rxor_inc_addr(
-					desc, cursor, index, src_cnt);
-			}
-		} else if (cursor->addrl == addr + cursor->len) {
-			/* reverse RXOR */
-			cursor->state = 1;
-			cursor->xor_count++;
-			set_bit(cursor->addr_count, &desc->reverse_flags[0]);
-			if (index == src_cnt-1) {
-				ppc440spe_rxor_set_region(desc,
-					cursor->addr_count,
-					DMA_RXOR12 << DMA_CUED_REGION_OFF);
-				ppc440spe_adma_dma2rxor_inc_addr(
-					desc, cursor, index, src_cnt);
-			}
-		} else {
-			printk(KERN_ERR "Cannot build "
-				"DMA2 RXOR command block.\n");
-			BUG();
-		}
-		break;
-	case 1:
-		sign = test_bit(cursor->addr_count,
-				desc->reverse_flags)
-			? -1 : 1;
-		if (index == src_cnt-2 || (sign == -1
-			&& addr != cursor->addrl - 2*cursor->len)) {
-			cursor->state = 0;
-			cursor->xor_count = 1;
-			cursor->addrl = addr;
-			ppc440spe_rxor_set_region(desc,
-				cursor->addr_count,
-				DMA_RXOR12 << DMA_CUED_REGION_OFF);
-			ppc440spe_adma_dma2rxor_inc_addr(
-				desc, cursor, index, src_cnt);
-		} else if (addr == cursor->addrl + 2*sign*cursor->len) {
-			cursor->state = 2;
-			cursor->xor_count = 0;
-			ppc440spe_rxor_set_region(desc,
-				cursor->addr_count,
-				DMA_RXOR123 << DMA_CUED_REGION_OFF);
-			if (index == src_cnt-1) {
-				ppc440spe_adma_dma2rxor_inc_addr(
-					desc, cursor, index, src_cnt);
-			}
-		} else if (addr == cursor->addrl + 3*cursor->len) {
-			cursor->state = 2;
-			cursor->xor_count = 0;
-			ppc440spe_rxor_set_region(desc,
-				cursor->addr_count,
-				DMA_RXOR124 << DMA_CUED_REGION_OFF);
-			if (index == src_cnt-1) {
-				ppc440spe_adma_dma2rxor_inc_addr(
-					desc, cursor, index, src_cnt);
-			}
-		} else if (addr == cursor->addrl + 4*cursor->len) {
-			cursor->state = 2;
-			cursor->xor_count = 0;
-			ppc440spe_rxor_set_region(desc,
-				cursor->addr_count,
-				DMA_RXOR125 << DMA_CUED_REGION_OFF);
-			if (index == src_cnt-1) {
-				ppc440spe_adma_dma2rxor_inc_addr(
-					desc, cursor, index, src_cnt);
-			}
-		} else {
-			cursor->state = 0;
-			cursor->xor_count = 1;
-			cursor->addrl = addr;
-			ppc440spe_rxor_set_region(desc,
-				cursor->addr_count,
-				DMA_RXOR12 << DMA_CUED_REGION_OFF);
-			ppc440spe_adma_dma2rxor_inc_addr(
-				desc, cursor, index, src_cnt);
-		}
-		break;
-	case 2:
-		cursor->state = 0;
-		cursor->addrl = addr;
-		cursor->xor_count++;
-		if (index) {
-			ppc440spe_adma_dma2rxor_inc_addr(
-				desc, cursor, index, src_cnt);
-		}
-		break;
-	}
-
-	return rval;
-}
-
-/**
- * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
- *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
- */
-static void ppc440spe_adma_dma2rxor_set_src(
-		struct ppc440spe_adma_desc_slot *desc,
-		int index, dma_addr_t addr)
-{
-	struct xor_cb *xcb = desc->hw_desc;
-	int k = 0, op = 0, lop = 0;
-
-	/* get the RXOR operand which corresponds to index addr */
-	while (op <= index) {
-		lop = op;
-		if (k == XOR_MAX_OPS) {
-			k = 0;
-			desc = list_entry(desc->chain_node.next,
-				struct ppc440spe_adma_desc_slot, chain_node);
-			xcb = desc->hw_desc;
-
-		}
-		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
-		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
-			op += 2;
-		else
-			op += 3;
-	}
-
-	BUG_ON(k < 1);
-
-	if (test_bit(k-1, desc->reverse_flags)) {
-		/* reverse operand order; put last op in RXOR group */
-		if (index == op - 1)
-			ppc440spe_rxor_set_src(desc, k - 1, addr);
-	} else {
-		/* direct operand order; put first op in RXOR group */
-		if (index == lop)
-			ppc440spe_rxor_set_src(desc, k - 1, addr);
-	}
-}
-
-/**
- * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
- *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
- */
-static void ppc440spe_adma_dma2rxor_set_mult(
-		struct ppc440spe_adma_desc_slot *desc,
-		int index, u8 mult)
-{
-	struct xor_cb *xcb = desc->hw_desc;
-	int k = 0, op = 0, lop = 0;
-
-	/* get the RXOR operand which corresponds to index mult */
-	while (op <= index) {
-		lop = op;
-		if (k == XOR_MAX_OPS) {
-			k = 0;
-			desc = list_entry(desc->chain_node.next,
-					  struct ppc440spe_adma_desc_slot,
-					  chain_node);
-			xcb = desc->hw_desc;
-
-		}
-		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
-		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
-			op += 2;
-		else
-			op += 3;
-	}
-
-	BUG_ON(k < 1);
-	if (test_bit(k-1, desc->reverse_flags)) {
-		/* reverse order */
-		ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
-	} else {
-		/* direct order */
-		ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
-	}
-}
-
-/**
- * ppc440spe_init_rxor_cursor -
- */
-static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
-{
-	memset(cursor, 0, sizeof(struct ppc440spe_rxor));
-	cursor->state = 2;
-}
-
-/**
- * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
- * descriptor for the PQXOR operation
- */
-static void ppc440spe_adma_pq_set_src_mult(
-		struct ppc440spe_adma_desc_slot *sw_desc,
-		unsigned char mult, int index, int dst_pos)
-{
-	struct ppc440spe_adma_chan *chan;
-	u32 mult_idx, mult_dst;
-	struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
-
-	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
-			int region = test_bit(PPC440SPE_DESC_RXOR12,
-					&sw_desc->flags) ? 2 : 3;
-
-			if (index < region) {
-				/* RXOR multipliers */
-				iter = ppc440spe_get_group_entry(sw_desc,
-					sw_desc->dst_cnt - 1);
-				if (sw_desc->dst_cnt == 2)
-					iter1 = ppc440spe_get_group_entry(
-							sw_desc, 0);
-
-				mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
-				mult_dst = DMA_CDB_SG_SRC;
-			} else {
-				/* WXOR multiplier */
-				iter = ppc440spe_get_group_entry(sw_desc,
-							index - region +
-							sw_desc->dst_cnt);
-				mult_idx = DMA_CUED_MULT1_OFF;
-				mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
-						     DMA_CDB_SG_DST1;
-			}
-		} else {
-			int znum = 0;
-
-			/* WXOR-only;
-			 * skip first slots with destinations (if ZERO_DST has
-			 * place)
-			 */
-			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
-				znum++;
-			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
-				znum++;
-
-			iter = ppc440spe_get_group_entry(sw_desc, index + znum);
-			mult_idx = DMA_CUED_MULT1_OFF;
-			mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
-		}
-
-		if (likely(iter)) {
-			ppc440spe_desc_set_src_mult(iter, chan,
-				mult_idx, mult_dst, mult);
-
-			if (unlikely(iter1)) {
-				/* if we have two destinations for RXOR, then
-				 * we've just set Q mult. Set-up P now.
-				 */
-				ppc440spe_desc_set_src_mult(iter1, chan,
-					mult_idx, mult_dst, 1);
-			}
-
-		}
-		break;
-
-	case PPC440SPE_XOR_ID:
-		iter = sw_desc->group_head;
-		if (sw_desc->dst_cnt == 2) {
-			/* both P & Q calculations required; set P mult here */
-			ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
-
-			/* and then set Q mult */
-			iter = ppc440spe_get_group_entry(sw_desc,
-			       sw_desc->descs_per_op);
-		}
-		ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
-		break;
-	}
-}
-
 /**
  * ppc440spe_adma_free_chan_resources - free the resources allocated
  */
@@ -3971,7 +924,7 @@ static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
 /**
  * ppc440spe_adma_eot_handler - end of transfer interrupt handler
  */
-static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
+irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
 {
 	struct ppc440spe_adma_chan *chan = data;
 
@@ -3988,7 +941,7 @@ static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
  * ppc440spe_adma_err_handler - DMA error interrupt handler;
  *	do the same things as a eot handler
  */
-static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
+irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
 {
 	struct ppc440spe_adma_chan *chan = data;
 
@@ -4002,17 +955,9 @@ static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
 }
 
 /**
- * ppc440spe_test_callback - called when test operation has been done
- */
-static void ppc440spe_test_callback(void *unused)
-{
-	complete(&ppc440spe_r6_test_comp);
-}
-
-/**
  * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
  */
-static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
+void ppc440spe_adma_issue_pending(struct dma_chan *chan)
 {
 	struct ppc440spe_adma_chan *ppc440spe_chan;
 
@@ -4032,7 +977,7 @@ static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
  *	use FIFOs (as opposite to chains used in XOR) so this is a XOR
  *	specific operation)
  */
-static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
+void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
 {
 	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
 	dma_cookie_t cookie;
@@ -4076,100 +1021,8 @@ static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
 	spin_unlock_bh(&chan->lock);
 }
 
-/**
- * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
- *	For this we just perform one WXOR operation with the same source
- *	and destination addresses, the GF-multiplier is 1; so if RAID-6
- *	capabilities are enabled then we'll get src/dst filled with zero.
- */
-static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
-{
-	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
-	struct page *pg;
-	char *a;
-	dma_addr_t dma_addr, addrs[2];
-	unsigned long op = 0;
-	int rval = 0;
-
-	set_bit(PPC440SPE_DESC_WXOR, &op);
-
-	pg = alloc_page(GFP_KERNEL);
-	if (!pg)
-		return -ENOMEM;
-
-	spin_lock_bh(&chan->lock);
-	sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
-	if (sw_desc) {
-		/* 1 src, 1 dsr, int_ena, WXOR */
-		ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
-		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
-			ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
-			iter->unmap_len = PAGE_SIZE;
-		}
-	} else {
-		rval = -EFAULT;
-		spin_unlock_bh(&chan->lock);
-		goto exit;
-	}
-	spin_unlock_bh(&chan->lock);
-
-	/* Fill the test page with ones */
-	memset(page_address(pg), 0xFF, PAGE_SIZE);
-	dma_addr = dma_map_page(chan->device->dev, pg, 0,
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
-
-	/* Setup addresses */
-	ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
-	ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
-	addrs[0] = dma_addr;
-	addrs[1] = 0;
-	ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
-
-	async_tx_ack(&sw_desc->async_tx);
-	sw_desc->async_tx.callback = ppc440spe_test_callback;
-	sw_desc->async_tx.callback_param = NULL;
-
-	init_completion(&ppc440spe_r6_test_comp);
-
-	ppc440spe_adma_tx_submit(&sw_desc->async_tx);
-	ppc440spe_adma_issue_pending(&chan->common);
-
-	wait_for_completion(&ppc440spe_r6_test_comp);
-
-	/* Now check if the test page is zeroed */
-	a = page_address(pg);
-	if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) {
-		/* page is zero - RAID-6 enabled */
-		rval = 0;
-	} else {
-		/* RAID-6 was not enabled */
-		rval = -EINVAL;
-	}
-exit:
-	__free_page(pg);
-	return rval;
-}
-
 static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
 {
-	switch (adev->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
-		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
-		dma_cap_set(DMA_MEMSET, adev->common.cap_mask);
-		dma_cap_set(DMA_PQ, adev->common.cap_mask);
-		dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
-		dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
-		break;
-	case PPC440SPE_XOR_ID:
-		dma_cap_set(DMA_XOR, adev->common.cap_mask);
-		dma_cap_set(DMA_PQ, adev->common.cap_mask);
-		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
-		adev->common.cap_mask = adev->common.cap_mask;
-		break;
-	}
-
 	/* Set base routines */
 	adev->common.device_alloc_chan_resources =
 				ppc440spe_adma_alloc_chan_resources;
@@ -4193,47 +1046,14 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
 			ppc440spe_adma_prep_dma_xor;
 	}
 	if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
-		switch (adev->id) {
-		case PPC440SPE_DMA0_ID:
-			dma_set_maxpq(&adev->common,
-				DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0);
-			break;
-		case PPC440SPE_DMA1_ID:
-			dma_set_maxpq(&adev->common,
-				DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0);
-			break;
-		case PPC440SPE_XOR_ID:
-			adev->common.max_pq = XOR_MAX_OPS * 3;
-			break;
-		}
 		adev->common.device_prep_dma_pq =
 			ppc440spe_adma_prep_dma_pq;
 	}
 	if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
-		switch (adev->id) {
-		case PPC440SPE_DMA0_ID:
-			adev->common.max_pq = DMA0_FIFO_SIZE /
-						sizeof(struct dma_cdb);
-			break;
-		case PPC440SPE_DMA1_ID:
-			adev->common.max_pq = DMA1_FIFO_SIZE /
-						sizeof(struct dma_cdb);
-			break;
-		}
 		adev->common.device_prep_dma_pq_val =
 			ppc440spe_adma_prep_dma_pqzero_sum;
 	}
 	if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
-		switch (adev->id) {
-		case PPC440SPE_DMA0_ID:
-			adev->common.max_xor = DMA0_FIFO_SIZE /
-						sizeof(struct dma_cdb);
-			break;
-		case PPC440SPE_DMA1_ID:
-			adev->common.max_xor = DMA1_FIFO_SIZE /
-						sizeof(struct dma_cdb);
-			break;
-		}
 		adev->common.device_prep_dma_xor_val =
 			ppc440spe_adma_prep_dma_xor_zero_sum;
 	}
@@ -4241,205 +1061,34 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
 		adev->common.device_prep_dma_interrupt =
 			ppc440spe_adma_prep_dma_interrupt;
 	}
-	pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
-	  "( %s%s%s%s%s%s%s)\n",
-	  dev_name(adev->dev),
-	  dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
-	  dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
-	  dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
-	  dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "",
-	  dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
-	  dma_has_cap(DMA_MEMSET, adev->common.cap_mask)  ? "memset " : "",
-	  dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : "");
-}
-
-static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
-				     struct ppc440spe_adma_chan *chan,
-				     int *initcode)
-{
-	struct platform_device *ofdev;
-	struct device_node *np;
-	int ret;
-
-	ofdev = container_of(adev->dev, struct platform_device, dev);
-	np = ofdev->dev.of_node;
-	if (adev->id != PPC440SPE_XOR_ID) {
-		adev->err_irq = irq_of_parse_and_map(np, 1);
-		if (adev->err_irq == NO_IRQ) {
-			dev_warn(adev->dev, "no err irq resource?\n");
-			*initcode = PPC_ADMA_INIT_IRQ2;
-			adev->err_irq = -ENXIO;
-		} else
-			atomic_inc(&ppc440spe_adma_err_irq_ref);
-	} else {
-		adev->err_irq = -ENXIO;
-	}
-
-	adev->irq = irq_of_parse_and_map(np, 0);
-	if (adev->irq == NO_IRQ) {
-		dev_err(adev->dev, "no irq resource\n");
-		*initcode = PPC_ADMA_INIT_IRQ1;
-		ret = -ENXIO;
-		goto err_irq_map;
-	}
-	dev_dbg(adev->dev, "irq %d, err irq %d\n",
-		adev->irq, adev->err_irq);
-
-	ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
-			  0, dev_driver_string(adev->dev), chan);
-	if (ret) {
-		dev_err(adev->dev, "can't request irq %d\n",
-			adev->irq);
-		*initcode = PPC_ADMA_INIT_IRQ1;
-		ret = -EIO;
-		goto err_req1;
-	}
-
-	/* only DMA engines have a separate error IRQ
-	 * so it's Ok if err_irq < 0 in XOR engine case.
-	 */
-	if (adev->err_irq > 0) {
-		/* both DMA engines share common error IRQ */
-		ret = request_irq(adev->err_irq,
-				  ppc440spe_adma_err_handler,
-				  IRQF_SHARED,
-				  dev_driver_string(adev->dev),
-				  chan);
-		if (ret) {
-			dev_err(adev->dev, "can't request irq %d\n",
-				adev->err_irq);
-			*initcode = PPC_ADMA_INIT_IRQ2;
-			ret = -EIO;
-			goto err_req2;
-		}
-	}
-
-	if (adev->id == PPC440SPE_XOR_ID) {
-		/* enable XOR engine interrupts */
-		iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
-			    XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
-			    &adev->xor_reg->ier);
-	} else {
-		u32 mask, enable;
-
-		np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
-		if (!np) {
-			pr_err("%s: can't find I2O device tree node\n",
-				__func__);
-			ret = -ENODEV;
-			goto err_req2;
-		}
-		adev->i2o_reg = of_iomap(np, 0);
-		if (!adev->i2o_reg) {
-			pr_err("%s: failed to map I2O registers\n", __func__);
-			of_node_put(np);
-			ret = -EINVAL;
-			goto err_req2;
-		}
-		of_node_put(np);
-		/* Unmask 'CS FIFO Attention' interrupts and
-		 * enable generating interrupts on errors
-		 */
-		enable = (adev->id == PPC440SPE_DMA0_ID) ?
-			 ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
-			 ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
-		mask = ioread32(&adev->i2o_reg->iopim) & enable;
-		iowrite32(mask, &adev->i2o_reg->iopim);
-	}
-	return 0;
-
-err_req2:
-	free_irq(adev->irq, chan);
-err_req1:
-	irq_dispose_mapping(adev->irq);
-err_irq_map:
-	if (adev->err_irq > 0) {
-		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
-			irq_dispose_mapping(adev->err_irq);
-	}
-	return ret;
-}
-
-static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
-					struct ppc440spe_adma_chan *chan)
-{
-	u32 mask, disable;
-
-	if (adev->id == PPC440SPE_XOR_ID) {
-		/* disable XOR engine interrupts */
-		mask = ioread32be(&adev->xor_reg->ier);
-		mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
-			  XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
-		iowrite32be(mask, &adev->xor_reg->ier);
-	} else {
-		/* disable DMAx engine interrupts */
-		disable = (adev->id == PPC440SPE_DMA0_ID) ?
-			  (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
-			  (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
-		mask = ioread32(&adev->i2o_reg->iopim) | disable;
-		iowrite32(mask, &adev->i2o_reg->iopim);
-	}
-	free_irq(adev->irq, chan);
-	irq_dispose_mapping(adev->irq);
-	if (adev->err_irq > 0) {
-		free_irq(adev->err_irq, chan);
-		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
-			irq_dispose_mapping(adev->err_irq);
-			iounmap(adev->i2o_reg);
-		}
-	}
 }
 
 /**
  * ppc440spe_adma_probe - probe the asynch device
  */
-static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev,
+int __devinit ppc440spe_adma_probe(struct platform_device *ofdev,
 					  const struct of_device_id *match)
 {
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource res;
-	struct ppc440spe_adma_device *adev;
+	struct ppc440spe_adma_device *adev = NULL;
 	struct ppc440spe_adma_chan *chan;
 	struct ppc_dma_chan_ref *ref, *_ref;
 	int ret = 0, initcode = PPC_ADMA_INIT_OK;
-	const u32 *idx;
-	int len;
 	void *regs;
-	u32 id, pool_size;
-
-	if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
-		id = PPC440SPE_XOR_ID;
-		/* As far as the XOR engine is concerned, it does not
-		 * use FIFOs but uses linked list. So there is no dependency
-		 * between pool size to allocate and the engine configuration.
-		 */
-		pool_size = PAGE_SIZE << 1;
-	} else {
-		/* it is DMA0 or DMA1 */
-		idx = of_get_property(np, "cell-index", &len);
-		if (!idx || (len != sizeof(u32))) {
-			dev_err(&ofdev->dev, "Device node %s has missing "
-				"or invalid cell-index property\n",
-				np->full_name);
-			return -EINVAL;
-		}
-		id = *idx;
-		/* DMA0,1 engines use FIFO to maintain CDBs, so we
-		 * should allocate the pool accordingly to size of this
-		 * FIFO. Thus, the pool size depends on the FIFO depth:
-		 * how much CDBs pointers the FIFO may contain then so
-		 * much CDBs we should provide in the pool.
-		 * That is
-		 *   CDB size = 32B;
-		 *   CDBs number = (DMA0_FIFO_SIZE >> 3);
-		 *   Pool size = CDBs number * CDB size =
-		 *      = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
-		 */
-		pool_size = (id == PPC440SPE_DMA0_ID) ?
-			    DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
-		pool_size <<= 2;
-	}
+	u32 id = 0, pool_size;
 
+	/*
+	 *  get device ID
+	 */
+	adev->id = ppc440spe_adma_get_devid(ofdev, np);
+	/*
+	 * Get DMA pool size
+	 */
+	pool_size = ppc440spe_adma_get_pool_size(np, adev->id);
+	/*
+	 * Get resource info
+	 */
 	if (of_address_to_resource(np, 0, &res)) {
 		dev_err(&ofdev->dev, "failed to get memory resource\n");
 		initcode = PPC_ADMA_INIT_MEMRES;
@@ -4489,28 +1138,10 @@ static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev,
 		goto err_regs_alloc;
 	}
 
-	if (adev->id == PPC440SPE_XOR_ID) {
-		adev->xor_reg = regs;
-		/* Reset XOR */
-		iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
-		iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
-	} else {
-		size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
-				   DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
-		adev->dma_reg = regs;
-		/* DMAx_FIFO_SIZE is defined in bytes,
-		 * <fsiz> - is defined in number of CDB pointers (8byte).
-		 * DMA FIFO Length = CSlength + CPlength, where
-		 * CSlength = CPlength = (fsiz + 1) * 8.
-		 */
-		iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
-			  &adev->dma_reg->fsiz);
-		/* Configure DMA engine */
-		iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
-			  &adev->dma_reg->cfg);
-		/* Clear Status */
-		iowrite32(~0, &adev->dma_reg->dsts);
-	}
+	/*
+	 * reset DMA and config FIFO
+	 */
+	ppc440spe_adma_init_hw(adev, regs);
 
 	adev->dev = &ofdev->dev;
 	adev->common.dev = &ofdev->dev;
@@ -4535,26 +1166,11 @@ static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev,
 	tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
 		     (unsigned long)chan);
 
-	/* allocate and map helper pages for async validation or
-	 * async_mult/async_sum_product operations on DMA0/1.
+	/*
+	 * Create helper pages
 	 */
-	if (adev->id != PPC440SPE_XOR_ID) {
-		chan->pdest_page = alloc_page(GFP_KERNEL);
-		chan->qdest_page = alloc_page(GFP_KERNEL);
-		if (!chan->pdest_page ||
-		    !chan->qdest_page) {
-			if (chan->pdest_page)
-				__free_page(chan->pdest_page);
-			if (chan->qdest_page)
-				__free_page(chan->qdest_page);
-			ret = -ENOMEM;
+	if (ppc440spe_create_helper_pages(adev, ofdev, chan))
 			goto err_page_alloc;
-		}
-		chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
-					   PAGE_SIZE, DMA_BIDIRECTIONAL);
-		chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
-					   PAGE_SIZE, DMA_BIDIRECTIONAL);
-	}
 
 	ref = kmalloc(sizeof(*ref), GFP_KERNEL);
 	if (ref) {
@@ -4571,6 +1187,7 @@ static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev,
 	if (ret)
 		goto err_irq;
 
+	ppc440spe_adma_set_capabilities(adev);
 	ppc440spe_adma_init_capabilities(adev);
 
 	ret = dma_async_device_register(&adev->common);
@@ -4592,21 +1209,11 @@ err_irq:
 		}
 	}
 err_ref_alloc:
-	if (adev->id != PPC440SPE_XOR_ID) {
-		dma_unmap_page(&ofdev->dev, chan->pdest,
-			       PAGE_SIZE, DMA_BIDIRECTIONAL);
-		dma_unmap_page(&ofdev->dev, chan->qdest,
-			       PAGE_SIZE, DMA_BIDIRECTIONAL);
-		__free_page(chan->pdest_page);
-		__free_page(chan->qdest_page);
-	}
+	ppc440spe_free_ref(adev, ofdev, chan);
 err_page_alloc:
 	kfree(chan);
 err_chan_alloc:
-	if (adev->id == PPC440SPE_XOR_ID)
-		iounmap(adev->xor_reg);
-	else
-		iounmap(adev->dma_reg);
+	ppc440spe_free_reg(adev);
 err_regs_alloc:
 	dma_free_coherent(adev->dev, adev->pool_size,
 			  adev->dma_desc_pool_virt,
@@ -4622,327 +1229,6 @@ out:
 	return ret;
 }
 
-/**
- * ppc440spe_adma_remove - remove the asynch device
- */
-static int __devexit ppc440spe_adma_remove(struct platform_device *ofdev)
-{
-	struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev);
-	struct device_node *np = ofdev->dev.of_node;
-	struct resource res;
-	struct dma_chan *chan, *_chan;
-	struct ppc_dma_chan_ref *ref, *_ref;
-	struct ppc440spe_adma_chan *ppc440spe_chan;
-
-	dev_set_drvdata(&ofdev->dev, NULL);
-	if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
-		ppc440spe_adma_devices[adev->id] = -1;
-
-	dma_async_device_unregister(&adev->common);
-
-	list_for_each_entry_safe(chan, _chan, &adev->common.channels,
-				 device_node) {
-		ppc440spe_chan = to_ppc440spe_adma_chan(chan);
-		ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
-		tasklet_kill(&ppc440spe_chan->irq_tasklet);
-		if (adev->id != PPC440SPE_XOR_ID) {
-			dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
-					PAGE_SIZE, DMA_BIDIRECTIONAL);
-			dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
-					PAGE_SIZE, DMA_BIDIRECTIONAL);
-			__free_page(ppc440spe_chan->pdest_page);
-			__free_page(ppc440spe_chan->qdest_page);
-		}
-		list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
-					 node) {
-			if (ppc440spe_chan ==
-			    to_ppc440spe_adma_chan(ref->chan)) {
-				list_del(&ref->node);
-				kfree(ref);
-			}
-		}
-		list_del(&chan->device_node);
-		kfree(ppc440spe_chan);
-	}
-
-	dma_free_coherent(adev->dev, adev->pool_size,
-			  adev->dma_desc_pool_virt, adev->dma_desc_pool);
-	if (adev->id == PPC440SPE_XOR_ID)
-		iounmap(adev->xor_reg);
-	else
-		iounmap(adev->dma_reg);
-	of_address_to_resource(np, 0, &res);
-	release_mem_region(res.start, resource_size(&res));
-	kfree(adev);
-	return 0;
-}
-
-/*
- * /sys driver interface to enable h/w RAID-6 capabilities
- * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
- * directory are "devices", "enable" and "poly".
- * "devices" shows available engines.
- * "enable" is used to enable RAID-6 capabilities or to check
- * whether these has been activated.
- * "poly" allows setting/checking used polynomial (for PPC440SPe only).
- */
-
-static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
-{
-	ssize_t size = 0;
-	int i;
-
-	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
-		if (ppc440spe_adma_devices[i] == -1)
-			continue;
-		size += snprintf(buf + size, PAGE_SIZE - size,
-				 "PPC440SP(E)-ADMA.%d: %s\n", i,
-				 ppc_adma_errors[ppc440spe_adma_devices[i]]);
-	}
-	return size;
-}
-
-static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE,
-			"PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
-			ppc440spe_r6_enabled ? "EN" : "DIS");
-}
-
-static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
-					const char *buf, size_t count)
-{
-	unsigned long val;
-
-	if (!count || count > 11)
-		return -EINVAL;
-
-	if (!ppc440spe_r6_tchan)
-		return -EFAULT;
-
-	/* Write a key */
-	sscanf(buf, "%lx", &val);
-	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
-	isync();
-
-	/* Verify whether it really works now */
-	if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
-		pr_info("PPC440SP(e) RAID-6 has been activated "
-			"successfully\n");
-		ppc440spe_r6_enabled = 1;
-	} else {
-		pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
-			" Error key ?\n");
-		ppc440spe_r6_enabled = 0;
-	}
-	return count;
-}
-
-static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf)
-{
-	ssize_t size = 0;
-	u32 reg;
-
-#ifdef CONFIG_440SP
-	/* 440SP has fixed polynomial */
-	reg = 0x4d;
-#else
-	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
-	reg >>= MQ0_CFBHL_POLY;
-	reg &= 0xFF;
-#endif
-
-	size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
-			"uses 0x1%02x polynomial.\n", reg);
-	return size;
-}
-
-static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
-				      const char *buf, size_t count)
-{
-	unsigned long reg, val;
-
-#ifdef CONFIG_440SP
-	/* 440SP uses default 0x14D polynomial only */
-	return -EINVAL;
-#endif
-
-	if (!count || count > 6)
-		return -EINVAL;
-
-	/* e.g., 0x14D or 0x11D */
-	sscanf(buf, "%lx", &val);
-
-	if (val & ~0x1FF)
-		return -EINVAL;
-
-	val &= 0xFF;
-	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
-	reg &= ~(0xFF << MQ0_CFBHL_POLY);
-	reg |= val << MQ0_CFBHL_POLY;
-	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
-
-	return count;
-}
-
-static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL);
-static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable,
-		   store_ppc440spe_r6enable);
-static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly,
-		   store_ppc440spe_r6poly);
-
-/*
- * Common initialisation for RAID engines; allocate memory for
- * DMAx FIFOs, perform configuration common for all DMA engines.
- * Further DMA engine specific configuration is done at probe time.
- */
-static int ppc440spe_configure_raid_devices(void)
-{
-	struct device_node *np;
-	struct resource i2o_res;
-	struct i2o_regs __iomem *i2o_reg;
-	dcr_host_t i2o_dcr_host;
-	unsigned int dcr_base, dcr_len;
-	int i, ret;
-
-	np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
-	if (!np) {
-		pr_err("%s: can't find I2O device tree node\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	if (of_address_to_resource(np, 0, &i2o_res)) {
-		of_node_put(np);
-		return -EINVAL;
-	}
-
-	i2o_reg = of_iomap(np, 0);
-	if (!i2o_reg) {
-		pr_err("%s: failed to map I2O registers\n", __func__);
-		of_node_put(np);
-		return -EINVAL;
-	}
-
-	/* Get I2O DCRs base */
-	dcr_base = dcr_resource_start(np, 0);
-	dcr_len = dcr_resource_len(np, 0);
-	if (!dcr_base && !dcr_len) {
-		pr_err("%s: can't get DCR registers base/len!\n",
-			np->full_name);
-		of_node_put(np);
-		iounmap(i2o_reg);
-		return -ENODEV;
-	}
-
-	i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
-	if (!DCR_MAP_OK(i2o_dcr_host)) {
-		pr_err("%s: failed to map DCRs!\n", np->full_name);
-		of_node_put(np);
-		iounmap(i2o_reg);
-		return -ENODEV;
-	}
-	of_node_put(np);
-
-	/* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
-	 * the base address of FIFO memory space.
-	 * Actually we need twice more physical memory than programmed in the
-	 * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
-	 */
-	ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
-					 GFP_KERNEL);
-	if (!ppc440spe_dma_fifo_buf) {
-		pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
-		iounmap(i2o_reg);
-		dcr_unmap(i2o_dcr_host, dcr_len);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Configure h/w
-	 */
-	/* Reset I2O/DMA */
-	mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
-	mtdcri(SDR0, DCRN_SDR0_SRST, 0);
-
-	/* Setup the base address of mmaped registers */
-	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
-	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
-						I2O_REG_ENABLE);
-	dcr_unmap(i2o_dcr_host, dcr_len);
-
-	/* Setup FIFO memory space base address */
-	iowrite32(0, &i2o_reg->ifbah);
-	iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
-
-	/* set zero FIFO size for I2O, so the whole
-	 * ppc440spe_dma_fifo_buf is used by DMAs.
-	 * DMAx_FIFOs will be configured while probe.
-	 */
-	iowrite32(0, &i2o_reg->ifsiz);
-	iounmap(i2o_reg);
-
-	/* To prepare WXOR/RXOR functionality we need access to
-	 * Memory Queue Module DCRs (finally it will be enabled
-	 * via /sys interface of the ppc440spe ADMA driver).
-	 */
-	np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
-	if (!np) {
-		pr_err("%s: can't find MQ device tree node\n",
-			__func__);
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	/* Get MQ DCRs base */
-	dcr_base = dcr_resource_start(np, 0);
-	dcr_len = dcr_resource_len(np, 0);
-	if (!dcr_base && !dcr_len) {
-		pr_err("%s: can't get DCR registers base/len!\n",
-			np->full_name);
-		ret = -ENODEV;
-		goto out_mq;
-	}
-
-	ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
-	if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
-		pr_err("%s: failed to map DCRs!\n", np->full_name);
-		ret = -ENODEV;
-		goto out_mq;
-	}
-	of_node_put(np);
-	ppc440spe_mq_dcr_len = dcr_len;
-
-	/* Set HB alias */
-	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
-
-	/* Set:
-	 * - LL transaction passing limit to 1;
-	 * - Memory controller cycle limit to 1;
-	 * - Galois Polynomial to 0x14d (default)
-	 */
-	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
-		  (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
-		  (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
-
-	atomic_set(&ppc440spe_adma_err_irq_ref, 0);
-	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
-		ppc440spe_adma_devices[i] = -1;
-
-	return 0;
-
-out_mq:
-	of_node_put(np);
-out_free:
-	kfree(ppc440spe_dma_fifo_buf);
-	return ret;
-}
-
-static const struct of_device_id ppc440spe_adma_of_match[] __devinitconst = {
-	{ .compatible	= "ibm,dma-440spe", },
-	{ .compatible	= "amcc,xor-accelerator", },
-	{},
-};
 MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
 
 static struct of_platform_driver ppc440spe_adma_driver = {
@@ -4955,68 +1241,15 @@ static struct of_platform_driver ppc440spe_adma_driver = {
 	},
 };
 
-static __init int ppc440spe_adma_init(void)
+__init int ppc440spe_adma_init(void)
 {
-	int ret;
-
-	ret = ppc440spe_configure_raid_devices();
-	if (ret)
-		return ret;
-
-	ret = of_register_platform_driver(&ppc440spe_adma_driver);
-	if (ret) {
-		pr_err("%s: failed to register platform driver\n",
-			__func__);
-		goto out_reg;
-	}
-
-	/* Initialization status */
-	ret = driver_create_file(&ppc440spe_adma_driver.driver,
-				 &driver_attr_devices);
-	if (ret)
-		goto out_dev;
-
-	/* RAID-6 h/w enable entry */
-	ret = driver_create_file(&ppc440spe_adma_driver.driver,
-				 &driver_attr_enable);
-	if (ret)
-		goto out_en;
-
-	/* GF polynomial to use */
-	ret = driver_create_file(&ppc440spe_adma_driver.driver,
-				 &driver_attr_poly);
-	if (!ret)
-		return ret;
+	int ret = 0;
 
-	driver_remove_file(&ppc440spe_adma_driver.driver,
-			   &driver_attr_enable);
-out_en:
-	driver_remove_file(&ppc440spe_adma_driver.driver,
-			   &driver_attr_devices);
-out_dev:
-	/* User will not be able to enable h/w RAID-6 */
-	pr_err("%s: failed to create RAID-6 driver interface\n",
-		__func__);
-	of_unregister_platform_driver(&ppc440spe_adma_driver);
-out_reg:
-	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
-	kfree(ppc440spe_dma_fifo_buf);
+	ret = ppc440spe_adma_hw_init();
+	if(ret)
+		of_unregister_platform_driver(&ppc440spe_adma_driver);
 	return ret;
 }
-
-static void __exit ppc440spe_adma_exit(void)
-{
-	driver_remove_file(&ppc440spe_adma_driver.driver,
-			   &driver_attr_poly);
-	driver_remove_file(&ppc440spe_adma_driver.driver,
-			   &driver_attr_enable);
-	driver_remove_file(&ppc440spe_adma_driver.driver,
-			   &driver_attr_devices);
-	of_unregister_platform_driver(&ppc440spe_adma_driver);
-	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
-	kfree(ppc440spe_dma_fifo_buf);
-}
-
 arch_initcall(ppc440spe_adma_init);
 module_exit(ppc440spe_adma_exit);
 
diff --git a/drivers/dma/ppc4xx/ppc440spe-adma.c b/drivers/dma/ppc4xx/ppc440spe-adma.c
new file mode 100644
index 0000000..da467b4
--- /dev/null
+++ b/drivers/dma/ppc4xx/ppc440spe-adma.c
@@ -0,0 +1,1658 @@
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * Further porting to arch/powerpc by
+ * 	Anatolij Gustschin <agust@denx.de>
+ * 	Tirumala R Marri <tmarri@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ * ADMA driver written by D.Williams.
+ */
+
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <linux/async_tx.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include "adma.h"
+#include "ppc440spe-adma.h"
+
+/* This array is used in data-check operations for storing a pattern */
+static char ppc440spe_qword[16];
+static atomic_t ppc440spe_adma_err_irq_ref;
+static unsigned int ppc440spe_mq_dcr_len;
+
+/* These are used in enable & check routines
+ */
+static u32 ppc440spe_r6_enabled;
+static struct completion ppc440spe_r6_test_comp;
+
+static struct page *ppc440spe_rxor_srcs[32];
+
+static dcr_host_t ppc440spe_mq_dcr_host;
+/* Pointer to DMA0, DMA1 CP/CS FIFO */
+static void *ppc440spe_dma_fifo_buf;
+
+static char *ppc_adma_errors[] = {
+	[PPC_ADMA_INIT_OK] = "ok",
+	[PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
+	[PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
+	[PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
+	    "structure",
+	[PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
+	    "hardware descriptors",
+	[PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
+	[PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
+	[PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
+	[PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
+};
+
+static void ppc440spe_adma_dma2rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
+					  int index, u8 mult);
+static void print_cb_list(struct ppc440spe_adma_chan *chan,
+			  struct ppc440spe_adma_desc_slot *iter);
+/**
+ * ppc440spe_can_rxor - check if the operands may be processed with RXOR
+ */
+static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
+{
+	int i, order = 0, state = 0;
+	int idx = 0;
+
+	if (unlikely(!(src_cnt > 1)))
+		return 0;
+
+	BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
+
+	/* Skip holes in the source list before checking */
+	for (i = 0; i < src_cnt; i++) {
+		if (!srcs[i])
+			continue;
+		ppc440spe_rxor_srcs[idx++] = srcs[i];
+	}
+	src_cnt = idx;
+
+	for (i = 1; i < src_cnt; i++) {
+		char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
+		char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
+
+		switch (state) {
+		case 0:
+			if (cur_addr == old_addr + len) {
+				/* direct RXOR */
+				order = 1;
+				state = 1;
+			} else if (old_addr == cur_addr + len) {
+				/* reverse RXOR */
+				order = -1;
+				state = 1;
+			} else
+				goto out;
+			break;
+		case 1:
+			if ((i == src_cnt - 2) ||
+			    (order == -1 && cur_addr != old_addr - len)) {
+				order = 0;
+				state = 0;
+			} else if ((cur_addr == old_addr + len * order) ||
+				   (cur_addr == old_addr + 2 * len) ||
+				   (cur_addr == old_addr + 3 * len)) {
+				state = 2;
+			} else {
+				order = 0;
+				state = 0;
+			}
+			break;
+		case 2:
+			order = 0;
+			state = 0;
+			break;
+		}
+	}
+
+      out:
+	if (state == 1 || state == 2)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ppc440spe_init_rxor_cursor -
+ */
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
+{
+	memset(cursor, 0, sizeof(struct ppc440spe_rxor));
+	cursor->state = 2;
+}
+
+/**
+ * ppc440spe_adma_init_dma2rxor_slot -
+ */
+static  void ppc440spe_adma_init_dma2rxor_slot(struct ppc440spe_adma_desc_slot
+						  *desc, dma_addr_t * src,
+						  int src_cnt)
+{
+	int i;
+
+	/* initialize CDB */
+	for (i = 0; i < src_cnt; i++) {
+		ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
+					      desc->src_cnt, (u32) src[i]);
+	}
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_set_rxor_block_size - set RXOR block size
+ */
+static   void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
+{
+	/* assume that byte_count is aligned on the 512-boundary;
+	 * thus write it directly to the register (bits 23:31 are
+	 * reserved there).
+	 */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
+}
+
+
+static int ppc440spe_adma_estimate(struct dma_chan *chan,
+				enum dma_transaction_type cap,
+				struct page **dst_lst, int dst_cnt,
+				struct page **src_lst, int src_cnt,
+				size_t src_sz)
+{
+	int ef = 1;
+
+	if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
+		/* If RAID-6 capabilities were not activated don't try
+		 * to use them
+		 */
+		if (unlikely(!ppc440spe_r6_enabled))
+			return -1;
+	}
+	/*  In the current implementation of ppc440spe ADMA driver it
+
+
+
+	 * makes sense to pick out only pq case, because it may be
+	 * processed:
+	 * (1) either using Biskup method on DMA2;
+	 * (2) or on DMA0/1.
+	 *  Thus we give a favour to (1) if the sources are suitable;
+	 * else let it be processed on one of the DMA0/1 engines.
+	 *  In the sum_product case where destination is also the
+	 * source process it on DMA0/1 only.
+	 */
+	if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
+
+		if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
+			ef = 0;	/* sum_product case, process on DMA0/1 */
+		else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
+			ef = 3;	/* override (DMA0/1 + idle) */
+		else
+			ef = 0;	/* can't process on DMA2 if !rxor */
+	}
+
+	/* channel idleness increases the priority */
+	if (likely(ef) && !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
+		ef++;
+
+	return ef;
+}
+
+struct dma_chan *ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
+						      struct page **dst_lst,
+						      int dst_cnt,
+						      struct page **src_lst,
+						      int src_cnt,
+						      size_t src_sz)
+{
+	struct dma_chan *best_chan = NULL;
+	struct ppc_dma_chan_ref *ref;
+	int best_rank = -1;
+
+	if (unlikely(!src_sz))
+		return NULL;
+	if (src_sz > PAGE_SIZE) {
+		/*
+		 * should a user of the api ever pass > PAGE_SIZE requests
+		 * we sort out cases where temporary page-sized buffers
+		 * are used.
+		 */
+		switch (cap) {
+		case DMA_PQ:
+			if (src_cnt == 1 && dst_lst[1] == src_lst[0])
+				return NULL;
+			if (src_cnt == 2 && dst_lst[1] == src_lst[1])
+				return NULL;
+			break;
+		case DMA_PQ_VAL:
+		case DMA_XOR_VAL:
+			return NULL;
+		default:
+			break;
+		}
+	}
+
+	list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
+		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+			int rank;
+
+			rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
+						    dst_cnt, src_lst,
+						    src_cnt, src_sz);
+			if (rank > best_rank) {
+				best_rank = rank;
+				best_chan = ref->chan;
+			}
+		}
+	}
+
+	return best_chan;
+}
+
+EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
+
+/**
+ * ppc440spe_dma01_prep_sum_product -
+ * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
+ * the source.
+ */
+static  struct ppc440spe_adma_desc_slot
+*ppc440spe_dma01_prep_sum_product(struct ppc440spe_adma_chan*ppc440spe_chan,
+				dma_addr_t * dst,
+				dma_addr_t * src,
+				int src_cnt,
+				const unsigned char *scf,
+				size_t len,
+				unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	unsigned long op = 0;
+	int slot_cnt;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	slot_cnt = 3;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	/* WXOR, each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		struct ppc440spe_adma_chan *chan;
+		struct ppc440spe_adma_desc_slot *iter;
+		struct dma_cdb *hw_desc;
+
+		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+		set_bits(op, &sw_desc->flags);
+		sw_desc->src_cnt = src_cnt;
+		sw_desc->dst_cnt = 1;
+		/* 1st descriptor, src[1] data to q page and zero destination */
+		iter = list_first_entry(&sw_desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					  *dst, 0);
+		ppc440spe_desc_set_dest_addr(iter, chan, 0, ppc440spe_chan->qdest, 1);
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					 src[1]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/* 2nd descriptor, multiply src[1] data and store the
+		 * result in destination */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		/* set 'next' pointer */
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					 ppc440spe_chan->qdest);
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					  *dst, 0);
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					 DMA_CDB_SG_DST1, scf[1]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/*
+		 * 3rd descriptor, multiply src[0] data and xor it
+		 * with destination
+		 */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = NULL;
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					 src[0]);
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					  *dst, 0);
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					 DMA_CDB_SG_DST1, scf[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+static
+struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(struct ppc440spe_adma_chan *ppc440spe_chan,
+							dma_addr_t *dst,
+							int dst_cnt,
+							dma_addr_t *src,
+							int src_cnt,
+							const unsigned char *scf,
+							size_t len,
+							unsigned long flags)
+{
+	int slot_cnt;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+	unsigned long op = 0;
+	unsigned char mult = 1;
+
+	pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+		 __func__, dst_cnt, src_cnt, len);
+	/*  select operations WXOR/RXOR depending on the
+	 * source addresses of operators and the number
+	 * of destinations (RXOR support only Q-parity calculations)
+	 */
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
+		/* no active RXOR;
+		 * do RXOR if:
+		 * - there are more than 1 source,
+		 * - len is aligned on 512-byte boundary,
+		 * - source addresses fit to one of 4 possible regions.
+		 */
+		if (src_cnt > 1 &&
+		    !(len & MQ0_CF2H_RXOR_BS_MASK) &&
+		    (src[0] + len) == src[1]) {
+			/* may do RXOR R1 R2 */
+			set_bit(PPC440SPE_DESC_RXOR, &op);
+			if (src_cnt != 2) {
+				/* may try to enhance region of RXOR */
+				if ((src[1] + len) == src[2]) {
+					/* do RXOR R1 R2 R3 */
+					set_bit(PPC440SPE_DESC_RXOR123, &op);
+				} else if ((src[1] + len * 2) == src[2]) {
+					/* do RXOR R1 R2 R4 */
+					set_bit(PPC440SPE_DESC_RXOR124, &op);
+				} else if ((src[1] + len * 3) == src[2]) {
+					/* do RXOR R1 R2 R5 */
+					set_bit(PPC440SPE_DESC_RXOR125, &op);
+				} else {
+					/* do RXOR R1 R2 */
+					set_bit(PPC440SPE_DESC_RXOR12, &op);
+				}
+			} else {
+				/* do RXOR R1 R2 */
+				set_bit(PPC440SPE_DESC_RXOR12, &op);
+			}
+		}
+
+		if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+			/* can not do this operation with RXOR */
+			clear_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state);
+		} else {
+			/* can do; set block size right now */
+			ppc440spe_desc_set_rxor_block_size(len);
+		}
+	}
+
+	/* Number of necessary slots depends on operation type selected */
+	if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+		/*  This is a WXOR only chain. Need descriptors for each
+		 * source to GF-XOR them with WXOR, and need descriptors
+		 * for each destination to zero them with WXOR
+		 */
+		slot_cnt = src_cnt;
+
+		if (flags & DMA_PREP_ZERO_P) {
+			slot_cnt++;
+			set_bit(PPC440SPE_ZERO_P, &op);
+		}
+		if (flags & DMA_PREP_ZERO_Q) {
+			slot_cnt++;
+			set_bit(PPC440SPE_ZERO_Q, &op);
+		}
+	} else {
+		/*  Need 1/2 descriptor for RXOR operation, and
+		 * need (src_cnt - (2 or 3)) for WXOR of sources
+		 * remained (if any)
+		 */
+		slot_cnt = dst_cnt;
+
+		if (flags & DMA_PREP_ZERO_P)
+			set_bit(PPC440SPE_ZERO_P, &op);
+		if (flags & DMA_PREP_ZERO_Q)
+			set_bit(PPC440SPE_ZERO_Q, &op);
+
+		if (test_bit(PPC440SPE_DESC_RXOR12, &op))
+			slot_cnt += src_cnt - 2;
+		else
+			slot_cnt += src_cnt - 3;
+
+		/*  Thus we have either RXOR only chain or
+		 * mixed RXOR/WXOR
+		 */
+		if (slot_cnt == dst_cnt)
+			/* RXOR only chain */
+			clear_bit(PPC440SPE_DESC_WXOR, &op);
+	}
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	/* for both RXOR/WXOR each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt, flags, op);
+
+		/* setup dst/src/mult */
+		pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
+			 __func__, dst[0], dst[1]);
+		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+		while (src_cnt--) {
+			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], src_cnt);
+
+			/* NOTE: "Multi = 0 is equivalent to = 1" as it
+			 * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
+			 * doesn't work for RXOR with DMA0/1! Instead, multi=0
+			 * leads to zeroing source data after RXOR.
+			 * So, for P case set-up mult=1 explicitly.
+			 */
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				mult = scf[src_cnt];
+			ppc440spe_adma_pq_set_src_mult(sw_desc,
+						    mult, src_cnt, dst_cnt - 1);
+		}
+
+		/* Setup byte count foreach slot just allocated */
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+			iter->unmap_len = len;
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
+ * a PQ_ZERO_SUM operation
+ */
+struct dma_async_tx_descriptor
+*ppc440spe_adma_prep_dma_pqzero_sum(struct dma_chan *chan,
+					dma_addr_t * pq,
+					dma_addr_t * src,
+					unsigned int src_cnt,
+					const unsigned 	char *scf,
+					size_t len,
+					enum sum_check_flags *pqres,
+					unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+	dma_addr_t pdest, qdest;
+	int slot_cnt, slots_per_op, idst, dst_cnt;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		pdest = 0;
+	else
+		pdest = pq[0];
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		qdest = 0;
+	else
+		qdest = pq[1];
+
+	ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
+					    src, src_cnt, scf));
+
+	/* Always use WXOR for P/Q calculations (two destinations).
+	 * Need 1 or 2 extra slots to verify results are zero.
+	 */
+	idst = dst_cnt = (pdest && qdest) ? 2 : 1;
+
+	/* One additional slot per destination to clone P/Q
+	 * before calculation (we have to preserve destinations).
+	 */
+	slot_cnt = src_cnt + dst_cnt * 2;
+	slots_per_op = 1;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
+
+		/* Setup byte count for each slot just allocated */
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+			iter->unmap_len = len;
+		}
+
+		if (pdest) {
+			struct dma_cdb *hw_desc;
+			struct ppc440spe_adma_chan *chan;
+
+			iter = sw_desc->group_head;
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+			iter->hw_next = list_entry(iter->chain_node.next,
+						   struct ppc440spe_adma_desc_slot,
+						   chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter->src_cnt = 0;
+			iter->dst_cnt = 0;
+			ppc440spe_desc_set_dest_addr(iter, chan, 0,
+						  ppc440spe_chan->pdest, 0);
+			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+			iter->unmap_len = 0;
+			/* override pdest to preserve original P */
+			pdest = ppc440spe_chan->pdest;
+		}
+		if (qdest) {
+			struct dma_cdb *hw_desc;
+			struct ppc440spe_adma_chan *chan;
+
+			iter = list_first_entry(&sw_desc->group_list,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+
+			if (pdest) {
+				iter = list_entry(iter->chain_node.next,
+						  struct ppc440spe_adma_desc_slot,
+						  chain_node);
+			}
+
+			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+			iter->hw_next = list_entry(iter->chain_node.next,
+						   struct ppc440spe_adma_desc_slot,
+						   chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter->src_cnt = 0;
+			iter->dst_cnt = 0;
+			ppc440spe_desc_set_dest_addr(iter, chan, 0,
+						  ppc440spe_chan->qdest, 0);
+			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+			iter->unmap_len = 0;
+			/* override qdest to preserve original Q */
+			qdest = ppc440spe_chan->qdest;
+		}
+
+		/* Setup destinations for P/Q ops */
+		ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
+
+		/* Setup zero QWORDs into DCHECK CDBs */
+		idst = dst_cnt;
+		list_for_each_entry_reverse(iter, &sw_desc->group_list,
+					    chain_node) {
+			/*
+			 * The last CDB corresponds to Q-parity check,
+			 * the one before last CDB corresponds
+			 * P-parity check
+			 */
+			if (idst == DMA_DEST_MAX_NUM) {
+				if (idst == dst_cnt) {
+					set_bit(PPC440SPE_DESC_QCHECK,
+						&iter->flags);
+				} else {
+					set_bit(PPC440SPE_DESC_PCHECK,
+						&iter->flags);
+				}
+			} else {
+				if (qdest) {
+					set_bit(PPC440SPE_DESC_QCHECK,
+						&iter->flags);
+				} else {
+					set_bit(PPC440SPE_DESC_PCHECK,
+						&iter->flags);
+				}
+			}
+			iter->xor_check_result = pqres;
+
+			/*
+			 * set it to zero, if check fail then result will
+			 * be updated
+			 */
+			*iter->xor_check_result = 0;
+			ppc440spe_desc_set_dcheck(iter, ppc440spe_chan, ppc440spe_qword);
+
+			if (!(--dst_cnt))
+				break;
+		}
+
+		/* Setup sources and mults for P/Q ops */
+		list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
+						     chain_node) {
+			struct ppc440spe_adma_chan *chan;
+			u32 mult_dst;
+
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+			ppc440spe_desc_set_src_addr(iter, chan, 0,
+						 DMA_CUED_XOR_HB,
+						 src[src_cnt - 1]);
+			if (qdest) {
+				mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
+				    DMA_CDB_SG_DST1;
+				ppc440spe_desc_set_src_mult(iter, chan,
+							 DMA_CUED_MULT1_OFF,
+							 mult_dst,
+							 scf[src_cnt - 1]);
+			}
+			if (!(--src_cnt))
+				break;
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
+ * XOR ZERO_SUM operation
+ */
+struct dma_async_tx_descriptor
+*ppc440spe_adma_prep_dma_xor_zero_sum(struct dma_chan *chan,
+					dma_addr_t * src,
+					unsigned int src_cnt,
+					size_t len,
+					enum sum_check_flags *result,
+					unsigned long flags)
+{
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t pq[2];
+
+	/* validate P, disable Q */
+	pq[0] = src[0];
+	pq[1] = 0;
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+
+	tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
+					     src_cnt - 1, 0, len,
+					     result, flags);
+	return tx;
+}
+
+void ppc440spe_adma_set_capabilities(struct ppc440spe_adma_device *adev)
+{
+	switch (adev->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
+		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+		dma_cap_set(DMA_MEMSET, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
+		dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
+		break;
+	case PPC440SPE_XOR_ID:
+		dma_cap_set(DMA_XOR, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ, adev->common.cap_mask);
+		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+		adev->common.cap_mask = adev->common.cap_mask;
+		break;
+	}
+
+	if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			dma_set_maxpq(&adev->common,
+				      DMA0_FIFO_SIZE / sizeof(struct dma_cdb),
+				      0);
+			break;
+		case PPC440SPE_DMA1_ID:
+			dma_set_maxpq(&adev->common,
+				      DMA1_FIFO_SIZE / sizeof(struct dma_cdb),
+				      0);
+			break;
+		case PPC440SPE_XOR_ID:
+			adev->common.max_pq = XOR_MAX_OPS * 3;
+			break;
+		}
+	}
+	if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			adev->common.max_pq = DMA0_FIFO_SIZE /
+			    sizeof(struct dma_cdb);
+			break;
+		case PPC440SPE_DMA1_ID:
+			adev->common.max_pq = DMA1_FIFO_SIZE /
+			    sizeof(struct dma_cdb);
+			break;
+		}
+	}
+	if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			adev->common.max_xor = DMA0_FIFO_SIZE /
+			    sizeof(struct dma_cdb);
+			break;
+		case PPC440SPE_DMA1_ID:
+			adev->common.max_xor = DMA1_FIFO_SIZE /
+			    sizeof(struct dma_cdb);
+			break;
+		}
+	}
+	pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
+		"( %s%s%s%s%s%s%s)\n",
+		dev_name(adev->dev),
+		dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
+		dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
+		dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
+		dma_has_cap(DMA_XOR_VAL,
+			    adev->common.cap_mask) ? "xor_val " : "",
+		dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
+		dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "",
+		dma_has_cap(DMA_INTERRUPT,
+			    adev->common.cap_mask) ? "intr " : "");
+}
+static  struct ppc440spe_adma_desc_slot
+*ppc440spe_dma2_prep_pq(struct ppc440spe_adma_chan *ppc440spe_chan,
+			dma_addr_t * dst,
+			int dst_cnt,
+			dma_addr_t * src,
+			int src_cnt,
+			const unsigned char *scf,
+			size_t len,
+			unsigned long flags)
+{
+	int slot_cnt, descs_per_op;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+	unsigned long op = 0;
+	unsigned char mult = 1;
+
+	BUG_ON(!dst_cnt);
+	/*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+	   __func__, dst_cnt, src_cnt, len); */
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
+	if (descs_per_op < 0) {
+		spin_unlock_bh(&ppc440spe_chan->lock);
+		return NULL;
+	}
+
+	/* depending on number of sources we have 1 or 2 RXOR chains */
+	slot_cnt = descs_per_op * dst_cnt;
+
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		op = slot_cnt;
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
+						--op ? 0 : flags);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+			iter->unmap_len = len;
+
+			ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
+			iter->rxor_cursor.len = len;
+			iter->descs_per_op = descs_per_op;
+		}
+		op = 0;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			op++;
+			if (op % descs_per_op == 0)
+				ppc440spe_adma_init_dma2rxor_slot(iter, src,
+							       src_cnt);
+			if (likely(!list_is_last(&iter->chain_node,
+						 &sw_desc->group_list))) {
+				/* set 'next' pointer */
+				iter->hw_next =
+				    list_entry(iter->chain_node.next,
+					       struct ppc440spe_adma_desc_slot,
+					       chain_node);
+				ppc440spe_xor_set_link(iter, iter->hw_next);
+			} else {
+				/* this is the last descriptor. */
+				iter->hw_next = NULL;
+			}
+		}
+
+		/* fixup head descriptor */
+		sw_desc->dst_cnt = dst_cnt;
+		if (flags & DMA_PREP_ZERO_P)
+			set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
+		if (flags & DMA_PREP_ZERO_Q)
+			set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
+
+		/* setup dst/src/mult */
+		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+
+		while (src_cnt--) {
+			/* handle descriptors (if dst_cnt == 2) inside
+			 * the ppc440spe_adma_pq_set_srcxxx() functions
+			 */
+			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], src_cnt);
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				mult = scf[src_cnt];
+			ppc440spe_adma_pq_set_src_mult(sw_desc,
+						    mult, src_cnt, dst_cnt - 1);
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+	ppc440spe_desc_set_rxor_block_size(len);
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_dma01_prep_mult -
+ * for Q operation where destination is also the source
+ */
+static  struct ppc440spe_adma_desc_slot
+*ppc440spe_dma01_prep_mult(struct ppc440spe_adma_chan *ppc440spe_chan,
+			dma_addr_t * dst,
+			int dst_cnt,
+			dma_addr_t * src,
+			int src_cnt,
+			const unsigned char *scf,
+			size_t len,
+			unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	unsigned long op = 0;
+	int slot_cnt;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	slot_cnt = 2;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	/* use WXOR, each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		struct ppc440spe_adma_chan *chan;
+		struct ppc440spe_adma_desc_slot *iter;
+		struct dma_cdb *hw_desc;
+
+		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+		set_bits(op, &sw_desc->flags);
+		sw_desc->src_cnt = src_cnt;
+		sw_desc->dst_cnt = dst_cnt;
+		/* First descriptor, zero data in the destination and copy it
+		 * to q page using MULTICAST transfer.
+		 */
+		iter = list_first_entry(&sw_desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		/* set 'next' pointer */
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+		ppc440spe_desc_set_dest_addr(iter, chan,
+					  DMA_CUED_XOR_BASE, dst[0], 0);
+		ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					 src[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/*
+		 * Second descriptor, multiply data from the q page
+		 * and store the result in real destination.
+		 */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = NULL;
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0,
+					 DMA_CUED_XOR_HB, dst[1]);
+		ppc440spe_desc_set_dest_addr(iter, chan,
+					  DMA_CUED_XOR_BASE, dst[0], 0);
+
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					 DMA_CDB_SG_DST1, scf[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
+ */
+struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(struct dma_chan
+							*chan,
+							dma_addr_t * dst,
+							dma_addr_t * src,
+							unsigned int
+							src_cnt, const unsigned
+							char *scf,
+							size_t len,
+							unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	int dst_cnt = 0;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	BUG_ON(!len);
+	BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT));
+	BUG_ON(!src_cnt);
+
+	if (src_cnt == 1 && dst[1] == src[0]) {
+		dma_addr_t dest[2];
+
+		/* dst[1] is real destination (Q) */
+		dest[0] = dst[1];
+		/* this is the page to multicast source data to */
+		dest[1] = ppc440spe_chan->qdest;
+		sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
+						 dest, 2, src, src_cnt, scf,
+						 len, flags);
+		return sw_desc ? &sw_desc->async_tx : NULL;
+	}
+
+	if (src_cnt == 2 && dst[1] == src[1]) {
+		sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
+							&dst[1], src, 2, scf,
+							len, flags);
+		return sw_desc ? &sw_desc->async_tx : NULL;
+	}
+
+	if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
+		BUG_ON(!dst[0]);
+		dst_cnt++;
+		flags |= DMA_PREP_ZERO_P;
+	}
+
+	if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
+		BUG_ON(!dst[1]);
+		dst_cnt++;
+		flags |= DMA_PREP_ZERO_Q;
+	}
+
+	BUG_ON(!dst_cnt);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+		ppc440spe_chan->device->id, __func__, src_cnt, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+	switch (ppc440spe_chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
+						  dst, dst_cnt, src, src_cnt,
+						  scf, len, flags);
+		break;
+
+	case PPC440SPE_XOR_ID:
+		sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
+						 dst, dst_cnt, src, src_cnt,
+						 scf, len, flags);
+		break;
+	}
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
+			   struct ppc440spe_adma_chan *chan, int *initcode)
+{
+	struct platform_device *ofdev;
+	struct device_node *np;
+	int ret;
+
+	ofdev = container_of(adev->dev, struct platform_device, dev);
+	np = ofdev->dev.of_node;
+	if (adev->id != PPC440SPE_XOR_ID) {
+		adev->err_irq = irq_of_parse_and_map(np, 1);
+		if (adev->err_irq == NO_IRQ) {
+			dev_warn(adev->dev, "no err irq resource?\n");
+			*initcode = PPC_ADMA_INIT_IRQ2;
+			adev->err_irq = -ENXIO;
+		} else
+			atomic_inc(&ppc440spe_adma_err_irq_ref);
+	} else {
+		adev->err_irq = -ENXIO;
+	}
+
+	adev->irq = irq_of_parse_and_map(np, 0);
+	if (adev->irq == NO_IRQ) {
+		dev_err(adev->dev, "no irq resource\n");
+		*initcode = PPC_ADMA_INIT_IRQ1;
+		ret = -ENXIO;
+		goto err_irq_map;
+	}
+	dev_dbg(adev->dev, "irq %d, err irq %d\n", adev->irq, adev->err_irq);
+
+	ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
+			  0, dev_driver_string(adev->dev), chan);
+	if (ret) {
+		dev_err(adev->dev, "can't request irq %d\n", adev->irq);
+		*initcode = PPC_ADMA_INIT_IRQ1;
+		ret = -EIO;
+		goto err_req1;
+	}
+
+	/* only DMA engines have a separate error IRQ
+	 * so it's Ok if err_irq < 0 in XOR engine case.
+	 */
+	if (adev->err_irq > 0) {
+		/* both DMA engines share common error IRQ */
+		ret = request_irq(adev->err_irq,
+				  ppc440spe_adma_err_handler,
+				  IRQF_SHARED,
+				  dev_driver_string(adev->dev), chan);
+		if (ret) {
+			dev_err(adev->dev, "can't request irq %d\n",
+				adev->err_irq);
+			*initcode = PPC_ADMA_INIT_IRQ2;
+			ret = -EIO;
+			goto err_req2;
+		}
+	}
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		/* enable XOR engine interrupts */
+		iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+			    XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
+			    &adev->xor_reg->ier);
+	} else {
+		u32 mask, enable;
+
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+		np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+#endif
+		if (!np) {
+			pr_err("%s: can't find I2O device tree node\n",
+			       __func__);
+			ret = -ENODEV;
+			goto err_req2;
+		}
+		adev->i2o_reg = of_iomap(np, 0);
+		if (!adev->i2o_reg) {
+			pr_err("%s: failed to map I2O registers\n", __func__);
+			of_node_put(np);
+			ret = -EINVAL;
+			goto err_req2;
+		}
+		of_node_put(np);
+		/* Unmask 'CS FIFO Attention' interrupts and
+		 * enable generating interrupts on errors
+		 */
+		enable = (adev->id == PPC440SPE_DMA0_ID) ?
+		    ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+		    ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+		mask = ioread32(&adev->i2o_reg->iopim) & enable;
+		iowrite32(mask, &adev->i2o_reg->iopim);
+	}
+	return 0;
+
+      err_req2:
+	free_irq(adev->irq, chan);
+      err_req1:
+	irq_dispose_mapping(adev->irq);
+      err_irq_map:
+	if (adev->err_irq > 0) {
+		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
+			irq_dispose_mapping(adev->err_irq);
+	}
+	return ret;
+}
+
+void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
+			      struct ppc440spe_adma_chan *chan)
+{
+	u32 mask, disable;
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		/* disable XOR engine interrupts */
+		mask = ioread32be(&adev->xor_reg->ier);
+		mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+			  XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
+		iowrite32be(mask, &adev->xor_reg->ier);
+	} else {
+		/* disable DMAx engine interrupts */
+		disable = (adev->id == PPC440SPE_DMA0_ID) ?
+		    (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+		    (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+		mask = ioread32(&adev->i2o_reg->iopim) | disable;
+		iowrite32(mask, &adev->i2o_reg->iopim);
+	}
+	free_irq(adev->irq, chan);
+	irq_dispose_mapping(adev->irq);
+	if (adev->err_irq > 0) {
+		free_irq(adev->err_irq, chan);
+		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
+			irq_dispose_mapping(adev->err_irq);
+			iounmap(adev->i2o_reg);
+		}
+	}
+}
+
+/*
+ * Common initialisation for RAID engines; allocate memory for
+ * DMAx FIFOs, perform configuration common for all DMA engines.
+ * Further DMA engine specific configuration is done at probe time.
+ */
+static int ppc440spe_configure_raid_devices(void)
+{
+	struct device_node *np;
+	struct resource i2o_res;
+	struct i2o_regs __iomem *i2o_reg;
+	dcr_host_t i2o_dcr_host;
+	unsigned int dcr_base, dcr_len;
+	int i, ret;
+
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+	np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+#endif
+	if (!np) {
+		pr_err("%s: can't find I2O device tree node\n", __func__);
+		return -ENODEV;
+	}
+
+	if (of_address_to_resource(np, 0, &i2o_res)) {
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	i2o_reg = of_iomap(np, 0);
+	if (!i2o_reg) {
+		pr_err("%s: failed to map I2O registers\n", __func__);
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* Get I2O DCRs base */
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+	if (!dcr_base && !dcr_len) {
+		pr_err("%s: can't get DCR registers base/len!\n",
+		       np->full_name);
+		of_node_put(np);
+		iounmap(i2o_reg);
+		return -ENODEV;
+	}
+
+	i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(i2o_dcr_host)) {
+		pr_err("%s: failed to map DCRs!\n", np->full_name);
+		of_node_put(np);
+		iounmap(i2o_reg);
+		return -ENODEV;
+	}
+	of_node_put(np);
+
+	/* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+	 * the base address of FIFO memory space.
+	 * Actually we need twice more physical memory than programmed in the
+	 * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
+	 */
+	ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
+				      GFP_KERNEL);
+	if (!ppc440spe_dma_fifo_buf) {
+		pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
+		iounmap(i2o_reg);
+		dcr_unmap(i2o_dcr_host, dcr_len);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Configure h/w
+	 */
+	/* Reset I2O/DMA */
+	mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
+	mtdcri(SDR0, DCRN_SDR0_SRST, 0);
+
+	/* Setup the base address of mmaped registers */
+	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32) (i2o_res.start >> 32));
+	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32) (i2o_res.start) |
+		  I2O_REG_ENABLE);
+	dcr_unmap(i2o_dcr_host, dcr_len);
+
+	/* Setup FIFO memory space base address */
+	iowrite32(0, &i2o_reg->ifbah);
+	iowrite32(((u32) __pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
+
+	/* set zero FIFO size for I2O, so the whole
+	 * ppc440spe_dma_fifo_buf is used by DMAs.
+	 * DMAx_FIFOs will be configured while probe.
+	 */
+	iowrite32(0, &i2o_reg->ifsiz);
+	iounmap(i2o_reg);
+
+	/* To prepare WXOR/RXOR functionality we need access to
+	 * Memory Queue Module DCRs (finally it will be enabled
+	 * via /sys interface of the ppc440spe ADMA driver).
+	 */
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+	np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
+#endif
+	if (!np) {
+		pr_err("%s: can't find MQ device tree node\n", __func__);
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	/* Get MQ DCRs base */
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+	if (!dcr_base && !dcr_len) {
+		pr_err("%s: can't get DCR registers base/len!\n",
+		       np->full_name);
+		ret = -ENODEV;
+		goto out_mq;
+	}
+
+	ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
+		pr_err("%s: failed to map DCRs!\n", np->full_name);
+		ret = -ENODEV;
+		goto out_mq;
+	}
+	of_node_put(np);
+	ppc440spe_mq_dcr_len = dcr_len;
+
+	/* Set HB alias */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
+
+	/* Set:
+	 * - LL transaction passing limit to 1;
+	 * - Memory controller cycle limit to 1;
+	 * - Galois Polynomial to 0x14d (default)
+	 */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
+		  (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
+		  (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
+
+	atomic_set(&ppc440spe_adma_err_irq_ref, 0);
+	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
+		ppc440spe_adma_devices[i] = -1;
+
+	return 0;
+
+      out_mq:
+	of_node_put(np);
+      out_free:
+	kfree(ppc440spe_dma_fifo_buf);
+	return ret;
+}
+
+/**
+ * ppc440spe_test_callback - called when test operation has been done
+ */
+static void ppc440spe_test_callback(void *unused)
+{
+	complete(&ppc440spe_r6_test_comp);
+}
+
+/**
+ * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
+ *	For this we just perform one WXOR operation with the same source
+ *	and destination addresses, the GF-multiplier is 1; so if RAID-6
+ *	capabilities are enabled then we'll get src/dst filled with zero.
+ */
+static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+	struct page *pg;
+	char *a;
+	dma_addr_t dma_addr, addrs[2];
+	unsigned long op = 0;
+	int rval = 0;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+
+	pg = alloc_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+
+	spin_lock_bh(&chan->lock);
+	sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
+	if (sw_desc) {
+		/* 1 src, 1 dsr, int_ena, WXOR */
+		ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
+			iter->unmap_len = PAGE_SIZE;
+		}
+	} else {
+		rval = -EFAULT;
+		spin_unlock_bh(&chan->lock);
+		goto exit;
+	}
+	spin_unlock_bh(&chan->lock);
+
+	/* Fill the test page with ones */
+	memset(page_address(pg), 0xFF, PAGE_SIZE);
+	dma_addr = dma_map_page(chan->device->dev, pg, 0,
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Setup addresses */
+	ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
+	ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
+	addrs[0] = dma_addr;
+	addrs[1] = 0;
+	ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
+
+	async_tx_ack(&sw_desc->async_tx);
+	sw_desc->async_tx.callback = ppc440spe_test_callback;
+	sw_desc->async_tx.callback_param = NULL;
+
+	init_completion(&ppc440spe_r6_test_comp);
+
+	ppc440spe_adma_tx_submit(&sw_desc->async_tx);
+	ppc440spe_adma_issue_pending(&chan->common);
+
+	wait_for_completion(&ppc440spe_r6_test_comp);
+
+	/* Now check if the test page is zeroed */
+	a = page_address(pg);
+	if ((*(u32 *) a) == 0 && memcmp(a, a + 4, PAGE_SIZE - 4) == 0) {
+		/* page is zero - RAID-6 enabled */
+		rval = 0;
+	} else {
+		/* RAID-6 was not enabled */
+		rval = -EINVAL;
+	}
+      exit:
+	__free_page(pg);
+	return rval;
+}
+
+/**
+ * ppc440spe_adma_remove - remove the asynch device
+ */
+int __devexit ppc440spe_adma_remove(struct platform_device *ofdev)
+{
+	struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev);
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	struct dma_chan *chan, *_chan;
+	struct ppc_dma_chan_ref *ref, *_ref;
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+
+	dev_set_drvdata(&ofdev->dev, NULL);
+	if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
+		ppc440spe_adma_devices[adev->id] = -1;
+
+	dma_async_device_unregister(&adev->common);
+
+	list_for_each_entry_safe(chan, _chan, &adev->common.channels,
+				 device_node) {
+		ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+		ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
+		tasklet_kill(&ppc440spe_chan->irq_tasklet);
+		if (adev->id != PPC440SPE_XOR_ID) {
+			dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
+				       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
+				       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__free_page(ppc440spe_chan->pdest_page);
+			__free_page(ppc440spe_chan->qdest_page);
+		}
+		list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
+					 node) {
+			if (ppc440spe_chan == to_ppc440spe_adma_chan(ref->chan)) {
+				list_del(&ref->node);
+				kfree(ref);
+			}
+		}
+		list_del(&chan->device_node);
+		kfree(ppc440spe_chan);
+	}
+
+	dma_free_coherent(adev->dev, adev->pool_size,
+			  adev->dma_desc_pool_virt, adev->dma_desc_pool);
+	if (adev->id == PPC440SPE_XOR_ID)
+		iounmap(adev->xor_reg);
+	else
+		iounmap(adev->dma_reg);
+	of_address_to_resource(np, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+	kfree(adev);
+	return 0;
+}
+
+/*
+ * /sys driver interface to enable h/w RAID-6 capabilities
+ * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
+ * directory are "devices", "enable" and "poly".
+ * "devices" shows available engines.
+ * "enable" is used to enable RAID-6 capabilities or to check
+ * whether these has been activated.
+ * "poly" allows setting/checking used polynomial (for PPC440spe only).
+ */
+
+static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
+{
+	ssize_t size = 0;
+	int i;
+
+	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
+		if (ppc440spe_adma_devices[i] == -1)
+			continue;
+		size += snprintf(buf + size, PAGE_SIZE - size,
+				 "PPC440SPe-ADMA.%d: %s\n", i,
+				 ppc_adma_errors[ppc440spe_adma_devices[i]]);
+	}
+	return size;
+}
+static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE,
+			"PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
+			ppc440spe_r6_enabled ? "EN" : "DIS");
+}
+
+static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
+					const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (!count || count > 11)
+		return -EINVAL;
+
+	if (!ppc440spe_r6_tchan)
+		return -EFAULT;
+
+	/* Write a key */
+	sscanf(buf, "%lx", &val);
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
+	isync();
+
+	/* Verify whether it really works now */
+	if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
+		pr_info("PPC440SP(e) RAID-6 has been activated "
+			"successfully\n");
+		ppc440spe_r6_enabled = 1;
+	} else {
+		pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
+			" Error key ?\n");
+		ppc440spe_r6_enabled = 0;
+	}
+	return count;
+}
+
+static ssize_t show_ppc4440spe_r6poly(struct device_driver *dev, char *buf)
+{
+	ssize_t size = 0;
+	u32 reg;
+
+#ifdef CONFIG_440SP
+	/* 440SP has fixed polynomial */
+	reg = 0x4d;
+#else
+	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+	reg >>= MQ0_CFBHL_POLY;
+	reg &= 0xFF;
+#endif
+
+	size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
+			"uses 0x1%02x polynomial.\n", reg);
+	return size;
+}
+
+static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
+				      const char *buf, size_t count)
+{
+	unsigned long reg, val;
+
+#ifdef CONFIG_440SP
+	/* 440SP uses default 0x14D polynomial only */
+	return -EINVAL;
+#endif
+
+	if (!count || count > 6)
+		return -EINVAL;
+
+	/* e.g., 0x14D or 0x11D */
+	sscanf(buf, "%lx", &val);
+
+	if (val & ~0x1FF)
+		return -EINVAL;
+
+	val &= 0xFF;
+	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+	reg &= ~(0xFF << MQ0_CFBHL_POLY);
+	reg |= val << MQ0_CFBHL_POLY;
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
+
+	return count;
+}
+
+static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL);
+static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable,
+		   store_ppc440spe_r6enable);
+static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc4440spe_r6poly,
+		   store_ppc440spe_r6poly);
+int ppc440spe_adma_hw_init(void)
+{
+	int ret;
+
+	ret = ppc440spe_configure_raid_devices();
+	if (ret)
+		return ret;
+
+	ret = of_register_platform_driver(&ppc440spe_adma_driver);
+	if (ret) {
+		pr_err("%s: failed to register platform driver\n", __func__);
+		goto out_reg;
+	}
+
+	/* Initialization status */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_devices);
+	if (ret)
+		goto out_dev;
+
+	/* RAID-6 h/w enable entry */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_enable);
+	if (ret)
+		goto out_en;
+
+	/* GF polynomial to use */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver, &driver_attr_poly);
+	if (!ret)
+		return ret;
+
+	driver_remove_file(&ppc440spe_adma_driver.driver, &driver_attr_enable);
+      out_en:
+	driver_remove_file(&ppc440spe_adma_driver.driver, &driver_attr_devices);
+      out_dev:
+	/* User will not be able to enable h/w RAID-6 */
+	pr_err("%s: failed to create RAID-6 driver interface\n", __func__);
+      out_reg:
+	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+	kfree(ppc440spe_dma_fifo_buf);
+	return ret;
+}
+
+static void __exit ppc440spe_adma_exit(void)
+{
+	driver_remove_file(&ppc440spe_adma_driver.driver, &driver_attr_poly);
+	driver_remove_file(&ppc440spe_adma_driver.driver, &driver_attr_enable);
+	driver_remove_file(&ppc440spe_adma_driver.driver, &driver_attr_devices);
+	of_unregister_platform_driver(&ppc440spe_adma_driver);
+	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+	kfree(ppc440spe_dma_fifo_buf);
+}
diff --git a/drivers/dma/ppc4xx/ppc440spe-adma.h b/drivers/dma/ppc4xx/ppc440spe-adma.h
new file mode 100644
index 0000000..81a1f46
--- /dev/null
+++ b/drivers/dma/ppc4xx/ppc440spe-adma.h
@@ -0,0 +1,2391 @@
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * Further porting to arch/powerpc by
+ * 	Anatolij Gustschin <agust@denx.de>
+ * 	Tirumala R Marri <tmarri@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef __PPC440SPE_ADMA_H
+#define __PPC440SPE_ADMA_H
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include "adma.h"
+#include "dma.h"
+#include "xor.h"
+
+#ifdef ADMA_LL_DEBUG
+#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
+#else
+#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
+#endif
+
+enum ppc_adma_init_code {
+	PPC_ADMA_INIT_OK = 0,
+	PPC_ADMA_INIT_MEMRES,
+	PPC_ADMA_INIT_MEMREG,
+	PPC_ADMA_INIT_ALLOC,
+	PPC_ADMA_INIT_COHERENT,
+	PPC_ADMA_INIT_CHANNEL,
+	PPC_ADMA_INIT_IRQ1,
+	PPC_ADMA_INIT_IRQ2,
+	PPC_ADMA_INIT_REGISTER
+};
+
+struct ppc_dma_chan_ref {
+	struct dma_chan *chan;
+	struct list_head node;
+};
+
+/* This flag is set when want to refetch the xor chain in the interrupt
+ * handler
+ */
+static u32 do_xor_refetch;
+static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
+/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
+ * the block size in transactions, then we do not allow to activate more than
+ * only one RXOR transactions simultaneously. So use this var to store
+ * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
+ * set) or not (PPC440SPE_RXOR_RUN is clear).
+ */
+static unsigned long ppc440spe_rxor_state;
+
+/* Pointer to last linked and submitted xor CB */
+static struct ppc440spe_adma_desc_slot *xor_last_linked;
+static struct ppc440spe_adma_desc_slot *xor_last_submit;
+
+
+/* Pointers to last submitted to DMA0, DMA1 CDBs */
+static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
+static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
+
+/* The list of channels exported by ppc4xx ADMA */
+static struct list_head ppc440spe_adma_chan_list =
+LIST_HEAD_INIT(ppc440spe_adma_chan_list);
+
+static int ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
+static struct of_platform_driver ppc440spe_adma_driver;
+
+static const struct of_device_id ppc440spe_adma_of_match[] __devinitconst = {
+	{.compatible = "ibm,dma-440spe",},
+	{.compatible = "amcc,xor-accelerator",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
+
+
+
+irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data);
+irqreturn_t ppc440spe_adma_err_handler(int irq, void *data);
+
+void ppc440spe_adma_issue_pending(struct dma_chan *chan);
+struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(struct
+						      ppc440spe_adma_chan
+						      *chan,
+						      int num_slots,
+						      int slots_per_op);
+void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
+			    struct ppc440spe_adma_chan *chan);
+dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
+void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
+void prep_dma_pqzero_sum_dbg(int id, dma_addr_t * src,
+			     unsigned int src_cnt, const unsigned char *scf);
+/*
+ * ppc440spe_get_group_entry - get group entry with index idx
+ * @tdesc: is the last allocated slot in the group.
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_get_group_entry(struct
+							    ppc440spe_adma_desc_slot
+							    *tdesc,
+							    u32 entry_idx)
+{
+	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
+	int i = 0;
+
+	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
+		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
+		       __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
+		BUG();
+	}
+
+	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
+		if (i++ == entry_idx)
+			break;
+	}
+	return iter;
+}
+
+static inline void print_cb(struct ppc440spe_adma_chan *chan, void *block)
+{
+	struct dma_cdb *cdb;
+	struct xor_cb *cb;
+	int i;
+
+	switch (chan->device->id) {
+	case 0:
+	case 1:
+		cdb = block;
+
+		pr_debug("CDB at %p [%d]:\n"
+			 "\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
+			 "\t sg1u 0x%08x sg1l 0x%08x\n"
+			 "\t sg2u 0x%08x sg2l 0x%08x\n"
+			 "\t sg3u 0x%08x sg3l 0x%08x\n",
+			 cdb, chan->device->id,
+			 cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
+			 le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
+			 le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
+			 le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
+		    );
+		break;
+	case 2:
+		cb = block;
+
+		pr_debug("CB at %p [%d]:\n"
+			 "\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
+			 "\t cbtah 0x%08x cbtal 0x%08x\n"
+			 "\t cblah 0x%08x cblal 0x%08x\n",
+			 cb, chan->device->id,
+			 cb->cbc, cb->cbbc, cb->cbs,
+			 cb->cbtah, cb->cbtal, cb->cblah, cb->cblal);
+		for (i = 0; i < 16; i++) {
+			if (i && !cb->ops[i].h && !cb->ops[i].l)
+				continue;
+			pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
+				 i, cb->ops[i].h, cb->ops[i].l);
+		}
+		break;
+	}
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
+ * pseudo operation
+ */
+static inline void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot
+					      *desc,
+					      struct ppc440spe_adma_chan *chan)
+{
+	struct xor_cb *p;
+
+	switch (chan->device->id) {
+	case PPC440SPE_XOR_ID:
+		p = desc->hw_desc;
+		memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+		/* NOP with Command Block Complete Enable */
+		p->cbc = XOR_CBCR_CBCE_BIT;
+		break;
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+		/* NOP with interrupt */
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+		break;
+	default:
+		printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
+		       __func__);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
+ * pseudo operation
+ */
+static inline void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
+{
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 0;
+	desc->dst_cnt = 1;
+}
+
+/**
+ * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
+ */
+static inline void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
+					int src_cnt, unsigned long flags)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = 1;
+
+	hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Enable interrupt on completion */
+		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+/**
+ * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
+ * operation in DMA2 controller
+ */
+static inline void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
+					   int dst_cnt, int src_cnt,
+					   unsigned long flags)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+	memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
+	desc->descs_per_op = 0;
+
+	hw_desc->cbc = XOR_CBCR_TGT_BIT;
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Enable interrupt on completion */
+		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+#define DMA_CTRL_FLAGS_LAST	DMA_PREP_FENCE
+#define DMA_PREP_ZERO_P		(DMA_CTRL_FLAGS_LAST << 1)
+#define DMA_PREP_ZERO_Q		(DMA_PREP_ZERO_P << 1)
+
+/**
+ * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
+ * with DMA0/1
+ */
+static inline void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
+					    int dst_cnt, int src_cnt,
+					    unsigned long flags,
+					    unsigned long op)
+{
+	struct dma_cdb *hw_desc;
+	struct ppc440spe_adma_desc_slot *iter;
+	u8 dopc;
+
+	/* Common initialization of a PQ descriptors chain */
+	set_bits(op, &desc->flags);
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+
+	/* WXOR MULTICAST if both P and Q are being computed
+	 * MV_SG1_SG2 if Q only
+	 */
+	dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
+	    DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
+
+	list_for_each_entry(iter, &desc->group_list, chain_node) {
+		hw_desc = iter->hw_desc;
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+
+		if (likely(!list_is_last(&iter->chain_node, &desc->group_list))) {
+			/* set 'next' pointer */
+			iter->hw_next = list_entry(iter->chain_node.next,
+						   struct ppc440spe_adma_desc_slot,
+						   chain_node);
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		} else {
+			/* this is the last descriptor.
+			 * this slot will be pasted from ADMA level
+			 * each time it wants to configure parameters
+			 * of the transaction (src, dst, ...)
+			 */
+			iter->hw_next = NULL;
+			if (flags & DMA_PREP_INTERRUPT)
+				set_bit(PPC440SPE_DESC_INT, &iter->flags);
+			else
+				clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		}
+	}
+
+	/* Set OPS depending on WXOR/RXOR type of operation */
+	if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
+		/* This is a WXOR only chain:
+		 * - first descriptors are for zeroing destinations
+		 *   if PPC440SPE_ZERO_P/Q set;
+		 * - descriptors remained are for GF-XOR operations.
+		 */
+		iter = list_first_entry(&desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+
+		if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+		}
+
+		if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+		}
+
+		list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = dopc;
+		}
+	} else {
+		/* This is either RXOR-only or mixed RXOR/WXOR */
+
+		/* The first 1 or 2 slots in chain are always RXOR,
+		 * if need to calculate P & Q, then there are two
+		 * RXOR slots; if only P or only Q, then there is one
+		 */
+		iter = list_first_entry(&desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+
+		if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		}
+
+		/* The remaining descs (if any) are WXORs */
+		if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			list_for_each_entry_from(iter, &desc->group_list,
+						 chain_node) {
+				hw_desc = iter->hw_desc;
+				hw_desc->opc = dopc;
+			}
+		}
+	}
+}
+
+/**
+ * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
+ * for PQ_ZERO_SUM operation
+ */
+static inline void ppc440spe_desc_init_dma01pqzero_sum(struct ppc440spe_adma_desc_slot
+						    *desc, int dst_cnt,
+						    int src_cnt)
+{
+	struct dma_cdb *hw_desc;
+	struct ppc440spe_adma_desc_slot *iter;
+	int i = 0;
+	u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
+	    DMA_CDB_OPC_MV_SG1_SG2;
+	/*
+	 * Initialize starting from 2nd or 3rd descriptor dependent
+	 * on dst_cnt. First one or two slots are for cloning P
+	 * and/or Q to chan->pdest and/or chan->qdest as we have
+	 * to preserve original P/Q.
+	 */
+	iter = list_first_entry(&desc->group_list,
+				struct ppc440spe_adma_desc_slot, chain_node);
+	iter = list_entry(iter->chain_node.next,
+			  struct ppc440spe_adma_desc_slot, chain_node);
+
+	if (dst_cnt > 1) {
+		iter = list_entry(iter->chain_node.next,
+				  struct ppc440spe_adma_desc_slot, chain_node);
+	}
+	/* initialize each source descriptor in chain */
+	list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+		hw_desc = iter->hw_desc;
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->src_cnt = 0;
+		iter->dst_cnt = 0;
+
+		/* This is a ZERO_SUM operation:
+		 * - <src_cnt> descriptors starting from 2nd or 3rd
+		 *   descriptor are for GF-XOR operations;
+		 * - remaining <dst_cnt> descriptors are for checking the result
+		 */
+		if (i++ < src_cnt)
+			/* MV_SG1_SG2 if only Q is being verified
+			 * MULTICAST if both P and Q are being verified
+			 */
+			hw_desc->opc = dopc;
+		else
+			/* DMA_CDB_OPC_DCHECK128 operation */
+			hw_desc->opc = DMA_CDB_OPC_DCHECK128;
+
+		if (likely(!list_is_last(&iter->chain_node, &desc->group_list))) {
+			/* set 'next' pointer */
+			iter->hw_next = list_entry(iter->chain_node.next,
+						   struct ppc440spe_adma_desc_slot,
+						   chain_node);
+		} else {
+			/* this is the last descriptor.
+			 * this slot will be pasted from ADMA level
+			 * each time it wants to configure parameters
+			 * of the transaction (src, dst, ...)
+			 */
+			iter->hw_next = NULL;
+			/* always enable interrupt generation since we get
+			 * the status of pqzero from the handler
+			 */
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		}
+	}
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+}
+
+/**
+ * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
+ */
+static inline void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
+					   unsigned long flags)
+{
+	struct dma_cdb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 1;
+	desc->dst_cnt = 1;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+	else
+		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+	hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+}
+
+/**
+ * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation
+ */
+static inline void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc,
+					   int value, unsigned long flags)
+{
+	struct dma_cdb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 1;
+	desc->dst_cnt = 1;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+	else
+		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+	hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32) value);
+	hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32) value);
+	hw_desc->opc = DMA_CDB_OPC_DFILL128;
+}
+
+/**
+ * ppc440spe_desc_set_byte_count - set number of data bytes involved
+ * into the operation
+ */
+static inline void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot
+					      *desc,
+					      struct ppc440spe_adma_chan *chan,
+					      u32 byte_count)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		dma_hw_desc->cnt = cpu_to_le32(byte_count);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->cbbc = byte_count;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_dcheck - set CHECK pattern
+ */
+static inline void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
+					  struct ppc440spe_adma_chan *chan,
+					  u8 * qword)
+{
+	struct dma_cdb *dma_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		iowrite32(qword[0], &dma_hw_desc->sg3l);
+		iowrite32(qword[4], &dma_hw_desc->sg3u);
+		iowrite32(qword[8], &dma_hw_desc->sg2l);
+		iowrite32(qword[12], &dma_hw_desc->sg2u);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
+ */
+static inline u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
+					   struct ppc440spe_adma_chan *chan,
+					   int src_idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		/* May have 0, 1, 2, or 3 sources */
+		switch (dma_hw_desc->opc) {
+		case DMA_CDB_OPC_NO_OP:
+		case DMA_CDB_OPC_DFILL128:
+			return 0;
+		case DMA_CDB_OPC_DCHECK128:
+			if (unlikely(src_idx)) {
+				printk(KERN_ERR "%s: try to get %d source for"
+				       " DCHECK128\n", __func__, src_idx);
+				BUG();
+			}
+			return le32_to_cpu(dma_hw_desc->sg1l);
+		case DMA_CDB_OPC_MULTICAST:
+		case DMA_CDB_OPC_MV_SG1_SG2:
+			if (unlikely(src_idx > 2)) {
+				printk(KERN_ERR "%s: try to get %d source from"
+				       " DMA descr\n", __func__, src_idx);
+				BUG();
+			}
+			if (src_idx) {
+				if (le32_to_cpu(dma_hw_desc->sg1u) &
+				    DMA_CUED_XOR_WIN_MSK) {
+					u8 region;
+
+					if (src_idx == 1)
+						return le32_to_cpu(dma_hw_desc->
+								   sg1l) +
+						    desc->unmap_len;
+
+					region =
+					    (le32_to_cpu(dma_hw_desc->sg1u)) >>
+					    DMA_CUED_REGION_OFF;
+
+					region &= DMA_CUED_REGION_MSK;
+					switch (region) {
+					case DMA_RXOR123:
+						return le32_to_cpu(dma_hw_desc->
+								   sg1l) +
+						    (desc->unmap_len << 1);
+					case DMA_RXOR124:
+						return le32_to_cpu(dma_hw_desc->
+								   sg1l) +
+						    (desc->unmap_len * 3);
+					case DMA_RXOR125:
+						return le32_to_cpu(dma_hw_desc->
+								   sg1l) +
+						    (desc->unmap_len << 2);
+					default:
+						printk(KERN_ERR
+						       "%s: try to"
+						       " get src3 for region %02x"
+						       "PPC440SPE_DESC_RXOR12?\n",
+						       __func__, region);
+						BUG();
+					}
+				} else {
+					printk(KERN_ERR
+					       "%s: try to get %d"
+					       " source for non-cued descr\n",
+					       __func__, src_idx);
+					BUG();
+				}
+			}
+			return le32_to_cpu(dma_hw_desc->sg1l);
+		default:
+			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+			       __func__, dma_hw_desc->opc);
+			BUG();
+		}
+		return le32_to_cpu(dma_hw_desc->sg1l);
+	case PPC440SPE_XOR_ID:
+		/* May have up to 16 sources */
+		xor_hw_desc = desc->hw_desc;
+		return xor_hw_desc->ops[src_idx].l;
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_dest_addr - extract the destination address from the
+ * descriptor
+ */
+static inline u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+					    struct ppc440spe_adma_chan *chan,
+					    int idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		if (likely(!idx))
+			return le32_to_cpu(dma_hw_desc->sg2l);
+		return le32_to_cpu(dma_hw_desc->sg3l);
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		return xor_hw_desc->cbtal;
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_src_num - extract the number of source addresses from
+ * the descriptor
+ */
+static inline u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
+					  struct ppc440spe_adma_chan *chan)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		switch (dma_hw_desc->opc) {
+		case DMA_CDB_OPC_NO_OP:
+		case DMA_CDB_OPC_DFILL128:
+			return 0;
+		case DMA_CDB_OPC_DCHECK128:
+			return 1;
+		case DMA_CDB_OPC_MV_SG1_SG2:
+		case DMA_CDB_OPC_MULTICAST:
+			/*
+			 * Only for RXOR operations we have more than
+			 * one source
+			 */
+			if (le32_to_cpu(dma_hw_desc->sg1u) &
+			    DMA_CUED_XOR_WIN_MSK) {
+				/* RXOR op, there are 2 or 3 sources */
+				if (((le32_to_cpu(dma_hw_desc->sg1u) >>
+				      DMA_CUED_REGION_OFF) &
+				     DMA_CUED_REGION_MSK) == DMA_RXOR12) {
+					/* RXOR 1-2 */
+					return 2;
+				} else {
+					/* RXOR 1-2-3/1-2-4/1-2-5 */
+					return 3;
+				}
+			}
+			return 1;
+		default:
+			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+			       __func__, dma_hw_desc->opc);
+			BUG();
+		}
+	case PPC440SPE_XOR_ID:
+		/* up to 16 sources */
+		xor_hw_desc = desc->hw_desc;
+		return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_dst_num - get the number of destination addresses in
+ * this descriptor
+ */
+static inline u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
+					  struct ppc440spe_adma_chan *chan)
+{
+	struct dma_cdb *dma_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* May be 1 or 2 destinations */
+		dma_hw_desc = desc->hw_desc;
+		switch (dma_hw_desc->opc) {
+		case DMA_CDB_OPC_NO_OP:
+		case DMA_CDB_OPC_DCHECK128:
+			return 0;
+		case DMA_CDB_OPC_MV_SG1_SG2:
+		case DMA_CDB_OPC_DFILL128:
+			return 1;
+		case DMA_CDB_OPC_MULTICAST:
+			if (desc->dst_cnt == 2)
+				return 2;
+			else
+				return 1;
+		default:
+			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+			       __func__, dma_hw_desc->opc);
+			BUG();
+		}
+	case PPC440SPE_XOR_ID:
+		/* Always only 1 destination */
+		return 1;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+/******************************************************************************
+ * ADMA channel low-level routines
+ ******************************************************************************/
+
+static inline u32 ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan
+						     *chan);
+static inline void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
+				       struct ppc440spe_adma_desc_slot *desc);
+static inline void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
+				       struct ppc440spe_adma_desc_slot *next_desc);
+static inline void print_cb_list(struct ppc440spe_adma_chan *chan,
+				 struct ppc440spe_adma_desc_slot *iter);
+/**
+ * ppc440spe_chan_append - update the h/w chain in the channel
+ */
+static inline void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
+{
+	struct xor_regs *xor_reg;
+	struct ppc440spe_adma_desc_slot *iter;
+	struct xor_cb *xcb;
+	u32 cur_desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		cur_desc = ppc440spe_chan_get_current_descriptor(chan);
+
+		if (likely(cur_desc)) {
+			iter = chan_last_sub[chan->device->id];
+			BUG_ON(!iter);
+		} else {
+			/* first peer */
+			iter = chan_first_cdb[chan->device->id];
+			BUG_ON(!iter);
+			ppc440spe_dma_put_desc(chan, iter);
+			chan->hw_chain_inited = 1;
+		}
+
+		/* is there something new to append */
+		if (!iter->hw_next)
+			break;
+
+		/* flush descriptors from the s/w queue to fifo */
+		list_for_each_entry_continue(iter, &chan->chain, chain_node) {
+			ppc440spe_dma_put_desc(chan, iter);
+			if (!iter->hw_next)
+				break;
+		}
+		break;
+	case PPC440SPE_XOR_ID:
+		/* update h/w links and refetch */
+		if (!xor_last_submit->hw_next)
+			break;
+
+		xor_reg = chan->device->xor_reg;
+		/* the last linked CDB has to generate an interrupt
+		 * that we'd be able to append the next lists to h/w
+		 * regardless of the XOR engine state at the moment of
+		 * appending of these next lists
+		 */
+		xcb = xor_last_linked->hw_desc;
+		xcb->cbc |= XOR_CBCR_CBCE_BIT;
+
+		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
+			/* XORcore is idle. Refetch now */
+			do_xor_refetch = 0;
+			ppc440spe_xor_set_link(xor_last_submit,
+					    xor_last_submit->hw_next);
+
+			ADMA_LL_DBG(print_cb_list(chan,
+						  xor_last_submit->hw_next));
+
+			xor_last_submit = xor_last_linked;
+			iowrite32be(ioread32be(&xor_reg->crsr) |
+				    XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
+				    &xor_reg->crsr);
+		} else {
+			/* XORcore is running. Refetch later in the handler */
+			do_xor_refetch = 1;
+		}
+
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
+ */
+static inline void ppc440spe_adma_device_clear_eot_status(struct ppc440spe_adma_chan
+						       *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+	u8 *p = chan->device->dma_desc_pool_virt;
+	struct dma_cdb *cdb;
+	u32 rv, i;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* read FIFO to ack */
+		dma_reg = chan->device->dma_reg;
+		while ((rv = ioread32(&dma_reg->csfpl))) {
+			i = rv & DMA_CDB_ADDR_MSK;
+			cdb = (struct dma_cdb *)&p[i -
+						   (u32) chan->device->
+						   dma_desc_pool];
+
+			/* Clear opcode to ack. This is necessary for
+			 * ZeroSum operations only
+			 */
+			cdb->opc = 0;
+
+			if (test_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
+				/* probably this is a completed RXOR op,
+				 * get pointer to CDB using the fact that
+				 * physical and virtual addresses of CDB
+				 * in pools have the same offsets
+				 */
+				if (le32_to_cpu(cdb->sg1u) & DMA_CUED_XOR_BASE) {
+					/* this is a RXOR */
+					clear_bit(PPC440SPE_RXOR_RUN,
+						  &ppc440spe_rxor_state);
+				}
+			}
+
+			if (rv & DMA_CDB_STATUS_MSK) {
+				/* ZeroSum check failed
+				 */
+				struct ppc440spe_adma_desc_slot *iter;
+				dma_addr_t phys = rv & ~DMA_CDB_MSK;
+
+				/*
+				 * Update the status of corresponding
+				 * descriptor.
+				 */
+				list_for_each_entry(iter, &chan->chain,
+						    chain_node) {
+					if (iter->phys == phys)
+						break;
+				}
+				/*
+				 * if cannot find the corresponding
+				 * slot it's a bug
+				 */
+				BUG_ON(&iter->chain_node == &chan->chain);
+
+				if (iter->xor_check_result) {
+					if (test_bit(PPC440SPE_DESC_PCHECK,
+						     &iter->flags)) {
+						*iter->xor_check_result |=
+						    SUM_CHECK_P_RESULT;
+					} else
+					    if (test_bit(PPC440SPE_DESC_QCHECK,
+							 &iter->flags)) {
+						*iter->xor_check_result |=
+						    SUM_CHECK_Q_RESULT;
+					} else
+						BUG();
+				}
+			}
+		}
+
+		rv = ioread32(&dma_reg->dsts);
+		if (rv) {
+			pr_err("DMA%d err status: 0x%x\n",
+			       chan->device->id, rv);
+			/* write back to clear */
+			iowrite32(rv, &dma_reg->dsts);
+		}
+		break;
+	case PPC440SPE_XOR_ID:
+		/* reset status bits to ack */
+		xor_reg = chan->device->xor_reg;
+		rv = ioread32be(&xor_reg->sr);
+		iowrite32be(rv, &xor_reg->sr);
+
+		if (rv &
+		    (XOR_IE_ICBIE_BIT | XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT)) {
+			if (rv & XOR_IE_RPTIE_BIT) {
+				/* Read PLB Timeout Error.
+				 * Try to resubmit the CB
+				 */
+				u32 val = ioread32be(&xor_reg->ccbalr);
+
+				iowrite32be(val, &xor_reg->cblalr);
+
+				val = ioread32be(&xor_reg->crsr);
+				iowrite32be(val | XOR_CRSR_XAE_BIT,
+					    &xor_reg->crsr);
+			} else
+				pr_err("XOR ERR 0x%x status\n", rv);
+			break;
+		}
+
+		/*  if the XORcore is idle, but there are unprocessed CBs
+		 * then refetch the s/w chain here
+		 */
+		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
+		    do_xor_refetch)
+			ppc440spe_chan_append(chan);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_chan_is_busy - get the channel status
+ */
+static inline int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+	int busy = 0;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_reg = chan->device->dma_reg;
+		/*  if command FIFO's head and tail pointers are equal and
+		 * status tail is the same as command, then channel is free
+		 */
+		if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
+		    ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
+			busy = 1;
+		break;
+	case PPC440SPE_XOR_ID:
+		/* use the special status bit for the XORcore
+		 */
+		xor_reg = chan->device->xor_reg;
+		busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
+		break;
+	}
+
+	return busy;
+}
+
+/**
+ * ppc440spe_chan_set_first_xor_descriptor -  init XORcore chain
+ */
+static inline void ppc440spe_chan_set_first_xor_descriptor(struct ppc440spe_adma_chan
+							*chan,
+							struct
+							ppc440spe_adma_desc_slot
+							*next_desc)
+{
+	struct xor_regs *xor_reg = chan->device->xor_reg;
+
+	if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
+		printk(KERN_INFO "%s: Warn: XORcore is running "
+		       "when try to set the first CDB!\n", __func__);
+
+	xor_last_submit = xor_last_linked = next_desc;
+
+	iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
+
+	iowrite32be(next_desc->phys, &xor_reg->cblalr);
+	iowrite32be(0, &xor_reg->cblahr);
+	iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
+		    &xor_reg->cbcr);
+
+	chan->hw_chain_inited = 1;
+}
+
+/**
+ * ppc440spe_chan_run - enable the channel
+ */
+static inline void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
+{
+	struct xor_regs *xor_reg;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* DMAs are always enabled, do nothing */
+		break;
+	case PPC440SPE_XOR_ID:
+		/* drain write buffer */
+		xor_reg = chan->device->xor_reg;
+
+		/* fetch descriptor pointed to in <link> */
+		iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
+			    &xor_reg->crsr);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_device_estimate - estimate the efficiency of processing
+ *	the operation given on this channel. It's assumed that 'chan' is
+ *	capable to process 'cap' type of operation.
+ * @chan: channel to use
+ * @cap: type of transaction
+ * @dst_lst: array of destination pointers
+ * @dst_cnt: number of destination operands
+ * @src_lst: array of source pointers
+ * @src_cnt: number of source operands
+ * @src_sz: size of each source operand
+ */
+
+#define DMA_CTRL_FLAGS_LAST	DMA_PREP_FENCE
+#define DMA_PREP_ZERO_P		(DMA_CTRL_FLAGS_LAST << 1)
+#define DMA_PREP_ZERO_Q		(DMA_PREP_ZERO_P << 1)
+
+static inline void print_cb_list(struct ppc440spe_adma_chan *chan,
+				 struct ppc440spe_adma_desc_slot *iter)
+{
+	for (; iter; iter = iter->hw_next)
+		print_cb(chan, iter->hw_desc);
+}
+
+/**
+ * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
+ * called with irqs disabled
+ */
+static inline void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
+				       struct ppc440spe_adma_desc_slot *desc)
+{
+	u32 pcdb;
+	struct dma_regs *dma_reg = chan->device->dma_reg;
+
+	pcdb = desc->phys;
+	if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
+		pcdb |= DMA_CDB_NO_INT;
+
+	chan_last_sub[chan->device->id] = desc;
+
+	ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
+
+	iowrite32(pcdb, &dma_reg->cpfpl);
+}
+
+/**
+ * ppc440spe_xor_set_link - set link address in xor CB
+ */
+static inline void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
+				       struct ppc440spe_adma_desc_slot *next_desc)
+{
+	struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
+
+	if (unlikely(!next_desc || !(next_desc->phys))) {
+		printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
+		       __func__, next_desc, next_desc ? next_desc->phys : 0);
+		BUG();
+	}
+
+	xor_hw_desc->cbs = 0;
+	xor_hw_desc->cblal = next_desc->phys;
+	xor_hw_desc->cblah = 0;
+	xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
+}
+
+/**
+ * ppc440spe_desc_set_link - set the address of descriptor following this
+ * descriptor in chain
+ */
+static inline void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
+					struct ppc440spe_adma_desc_slot *prev_desc,
+					struct ppc440spe_adma_desc_slot *next_desc)
+{
+	unsigned long flags;
+	struct ppc440spe_adma_desc_slot *tail = next_desc;
+
+	if (unlikely(!prev_desc || !next_desc ||
+		     (prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
+		/* If previous next is overwritten something is wrong.
+		 * though we may refetch from append to initiate list
+		 * processing; in this case - it's ok.
+		 */
+		printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
+		       "prev->hw_next=0x%p\n", __func__, prev_desc,
+		       next_desc, prev_desc ? prev_desc->hw_next : 0);
+		BUG();
+	}
+
+	local_irq_save(flags);
+
+	/* do s/w chaining both for DMA and XOR descriptors */
+	prev_desc->hw_next = next_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		break;
+	case PPC440SPE_XOR_ID:
+		/* bind descriptor to the chain */
+		while (tail->hw_next)
+			tail = tail->hw_next;
+		xor_last_linked = tail;
+
+		if (prev_desc == xor_last_submit)
+			/* do not link to the last submitted CB */
+			break;
+		ppc440spe_xor_set_link(prev_desc, next_desc);
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/******************************************************************************
+ * CDB field manipulation routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
+ */
+static inline void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+					     struct ppc440spe_adma_chan *chan,
+					     dma_addr_t addrh, dma_addr_t addrl,
+					     u32 dst_idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	phys_addr_t addr64, tmphi, tmplow;
+	u32 *psgu, *psgl;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (!addrh) {
+			addr64 = addrl;
+			tmphi = (addr64 >> 32);
+			tmplow = (addr64 & 0xFFFFFFFF);
+		} else {
+			tmphi = addrh;
+			tmplow = addrl;
+		}
+		dma_hw_desc = desc->hw_desc;
+
+		psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
+		psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
+
+		*psgl = cpu_to_le32((u32) tmplow);
+		*psgu |= cpu_to_le32((u32) tmphi);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->cbtal = addrl;
+		xor_hw_desc->cbtah |= addrh;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_src_addr - set source address into the descriptor
+ */
+static inline void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
+					    struct ppc440spe_adma_chan *chan,
+					    int src_idx, dma_addr_t addrh,
+					    dma_addr_t addrl)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	phys_addr_t addr64, tmplow, tmphi;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (!addrh) {
+			addr64 = addrl;
+			tmphi = (addr64 >> 32);
+			tmplow = (addr64 & 0xFFFFFFFF);
+		} else {
+			tmphi = addrh;
+			tmplow = addrl;
+		}
+		dma_hw_desc = desc->hw_desc;
+		dma_hw_desc->sg1l = cpu_to_le32((u32) tmplow);
+		dma_hw_desc->sg1u |= cpu_to_le32((u32) tmphi);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->ops[src_idx].l = addrl;
+		xor_hw_desc->ops[src_idx].h |= addrh;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
+ */
+static inline void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
+					    struct ppc440spe_adma_chan *chan,
+					    u32 mult_index, int sg_index,
+					    unsigned char mult_value)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	u32 *psgu;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		switch (sg_index) {
+			/* for RXOR operations set multiplier
+			 * into source cued address
+			 */
+		case DMA_CDB_SG_SRC:
+			psgu = &dma_hw_desc->sg1u;
+			break;
+			/* for WXOR operations set multiplier
+			 * into destination cued address(es)
+			 */
+		case DMA_CDB_SG_DST1:
+			psgu = &dma_hw_desc->sg2u;
+			break;
+		case DMA_CDB_SG_DST2:
+			psgu = &dma_hw_desc->sg3u;
+			break;
+		default:
+			BUG();
+		}
+
+		*psgu |= cpu_to_le32(mult_value << mult_index);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/******************************************************************************
+ * ADMA channel low-level routines
+ ******************************************************************************/
+
+static void ppc440spe_adma_device_clear_eot_status(struct ppc440spe_adma_chan *chan);
+static inline int ppc440spe_adma_dma2rxor_prep_src(struct ppc440spe_adma_desc_slot
+						*hdesc,
+						struct ppc440spe_rxor *cursor,
+						int index, int src_cnt,
+						u32 addr);
+
+static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan);
+static void ppc440spe_chan_set_first_xor_descriptor(struct ppc440spe_adma_chan *chan, struct ppc440spe_adma_desc_slot
+						 *next_desc);
+/**
+ * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
+ */
+static inline u32 ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan
+						     *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+
+	if (unlikely(!chan->hw_chain_inited))
+		/* h/w descriptor chain is not initialized yet */
+		return 0;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_reg = chan->device->dma_reg;
+		return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
+	case PPC440SPE_XOR_ID:
+		xor_reg = chan->device->xor_reg;
+		return ioread32be(&xor_reg->ccbalr);
+	}
+	return 0;
+}
+
+static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan);
+/**
+ * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
+ */
+static inline int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
+					 struct ppc440spe_adma_chan *chan)
+{
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx))
+		return 0;
+
+	/* leave the last descriptor in the chain
+	 * so we can append to it
+	 */
+	if (list_is_last(&desc->chain_node, &chan->chain) ||
+	    desc->phys == ppc440spe_chan_get_current_descriptor(chan))
+		return 1;
+
+	if (chan->device->id != PPC440SPE_XOR_ID) {
+		/* our DMA interrupt handler clears opc field of
+		 * each processed descriptor. For all types of
+		 * operations except for ZeroSum we do not actually
+		 * need ack from the interrupt handler. ZeroSum is a
+		 * special case since the result of this operation
+		 * is available from the handler only, so if we see
+		 * such type of descriptor (which is unprocessed yet)
+		 * then leave it in chain.
+		 */
+		struct dma_cdb *cdb = desc->hw_desc;
+		if (cdb->opc == DMA_CDB_OPC_DCHECK128)
+			return 1;
+	}
+
+	dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
+		desc->phys, desc->idx, desc->slots_per_op);
+
+	list_del(&desc->chain_node);
+	ppc440spe_adma_free_slots(desc, chan);
+	return 0;
+}
+
+/**
+ * ppc440spe_rxor_set_region_data -
+ */
+static inline void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
+					  u8 xor_arg_no, u32 mask)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= mask;
+}
+
+/**
+ * ppc440spe_rxor_set_src -
+ */
+static inline void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
+				       u8 xor_arg_no, dma_addr_t addr)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
+	xcb->ops[xor_arg_no].l = addr;
+}
+
+/**
+ * ppc440spe_rxor_set_mult -
+ */
+static inline void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
+					u8 xor_arg_no, u8 idx, u8 mult)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
+ *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static inline void ppc440spe_adma_dma2rxor_set_src(struct ppc440spe_adma_desc_slot
+						*desc, int index,
+						dma_addr_t addr)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+	int k = 0, op = 0, lop = 0;
+
+	/* get the RXOR operand which corresponds to index addr */
+	while (op <= index) {
+		lop = op;
+		if (k == XOR_MAX_OPS) {
+			k = 0;
+			desc = list_entry(desc->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+			xcb = desc->hw_desc;
+
+		}
+		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+			op += 2;
+		else
+			op += 3;
+	}
+
+	BUG_ON(k < 1);
+
+	if (test_bit(k - 1, desc->reverse_flags)) {
+		/* reverse operand order; put last op in RXOR group */
+		if (index == op - 1)
+			ppc440spe_rxor_set_src(desc, k - 1, addr);
+	} else {
+		/* direct operand order; put first op in RXOR group */
+		if (index == lop)
+			ppc440spe_rxor_set_src(desc, k - 1, addr);
+	}
+}
+
+/**
+ * ppc440spe_adma_pq_set_src - set source address into descriptor
+ */
+static inline void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
+					  dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t haddr = 0;
+	struct ppc440spe_adma_desc_slot *iter = NULL;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
+		 */
+		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			/* RXOR-only or RXOR/WXOR operation */
+			int iskip = test_bit(PPC440SPE_DESC_RXOR12,
+					     &sw_desc->flags) ? 2 : 3;
+
+			if (index == 0) {
+				/* 1st slot (RXOR) */
+				/* setup sources region (R1-2-3, R1-2-4,
+				 * or R1-2-5)
+				 */
+				if (test_bit(PPC440SPE_DESC_RXOR12,
+					     &sw_desc->flags))
+					haddr = DMA_RXOR12 <<
+					    DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR123,
+						  &sw_desc->flags))
+					haddr = DMA_RXOR123 <<
+					    DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR124,
+						  &sw_desc->flags))
+					haddr = DMA_RXOR124 <<
+					    DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR125,
+						  &sw_desc->flags))
+					haddr = DMA_RXOR125 <<
+					    DMA_CUED_REGION_OFF;
+				else
+					BUG();
+				haddr |= DMA_CUED_XOR_BASE;
+				iter = ppc440spe_get_group_entry(sw_desc, 0);
+			} else if (index < iskip) {
+				/* 1st slot (RXOR)
+				 * shall actually set source address only once
+				 * instead of first <iskip>
+				 */
+				iter = NULL;
+			} else {
+				/* 2nd/3d and next slots (WXOR);
+				 * skip first slot with RXOR
+				 */
+				haddr = DMA_CUED_XOR_HB;
+				iter = ppc440spe_get_group_entry(sw_desc,
+							      index - iskip +
+							      sw_desc->dst_cnt);
+			}
+		} else {
+			int znum = 0;
+
+			/* WXOR-only operation; skip first slots with
+			 * zeroing destinations
+			 */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				znum++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				znum++;
+
+			haddr = DMA_CUED_XOR_HB;
+			iter = ppc440spe_get_group_entry(sw_desc, index + znum);
+		}
+
+		if (likely(iter)) {
+			ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
+
+			if (!index &&
+			    test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
+			    sw_desc->dst_cnt == 2) {
+				/* if we have two destinations for RXOR, then
+				 * setup source in the second descr too
+				 */
+				iter = ppc440spe_get_group_entry(sw_desc, 1);
+				ppc440spe_desc_set_src_addr(iter, chan, 0,
+							 haddr, addr);
+			}
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		/* DMA2 may do Biskup */
+		iter = sw_desc->group_head;
+		if (iter->dst_cnt == 2) {
+			/* both P & Q calculations required; set P src here */
+			ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+
+			/* this is for Q */
+			iter = ppc440spe_get_group_entry(sw_desc,
+						      sw_desc->descs_per_op);
+		}
+		ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
+ *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static inline void ppc440spe_adma_dma2rxor_set_mult(struct ppc440spe_adma_desc_slot
+						 *desc, int index, u8 mult)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+	int k = 0, op = 0, lop = 0;
+
+	/* get the RXOR operand which corresponds to index mult */
+	while (op <= index) {
+		lop = op;
+		if (k == XOR_MAX_OPS) {
+			k = 0;
+			desc = list_entry(desc->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+			xcb = desc->hw_desc;
+
+		}
+		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+			op += 2;
+		else
+			op += 3;
+	}
+
+	BUG_ON(k < 1);
+	if (test_bit(k - 1, desc->reverse_flags)) {
+		/* reverse order */
+		ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
+	} else {
+		/* direct order */
+		ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
+	}
+}
+
+/**
+ * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
+ * descriptor for the PQXOR operation
+ */
+static inline void ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot
+					       *sw_desc, unsigned char mult,
+					       int index, int dst_pos)
+{
+	struct ppc440spe_adma_chan *chan;
+	u32 mult_idx, mult_dst;
+	struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			int region = test_bit(PPC440SPE_DESC_RXOR12,
+					      &sw_desc->flags) ? 2 : 3;
+
+			if (index < region) {
+				/* RXOR multipliers */
+				iter = ppc440spe_get_group_entry(sw_desc,
+							      sw_desc->
+							      dst_cnt - 1);
+				if (sw_desc->dst_cnt == 2)
+					iter1 =
+					    ppc440spe_get_group_entry(sw_desc, 0);
+
+				mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
+				mult_dst = DMA_CDB_SG_SRC;
+			} else {
+				/* WXOR multiplier */
+				iter = ppc440spe_get_group_entry(sw_desc,
+							      index -
+							      region +
+							      sw_desc->dst_cnt);
+				mult_idx = DMA_CUED_MULT1_OFF;
+				mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
+				    DMA_CDB_SG_DST1;
+			}
+		} else {
+			int znum = 0;
+
+			/* WXOR-only;
+			 * skip first slots with destinations (if ZERO_DST has
+			 * place)
+			 */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				znum++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				znum++;
+
+			iter = ppc440spe_get_group_entry(sw_desc, index + znum);
+			mult_idx = DMA_CUED_MULT1_OFF;
+			mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
+		}
+
+		if (likely(iter)) {
+			ppc440spe_desc_set_src_mult(iter, chan,
+						 mult_idx, mult_dst, mult);
+
+			if (unlikely(iter1)) {
+				/* if we have two destinations for RXOR, then
+				 * we've just set Q mult. Set-up P now.
+				 */
+				ppc440spe_desc_set_src_mult(iter1, chan,
+							 mult_idx, mult_dst, 1);
+			}
+
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		iter = sw_desc->group_head;
+		if (sw_desc->dst_cnt == 2) {
+			/* both P & Q calculations required; set P mult here */
+			ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
+
+			/* and then set Q mult */
+			iter = ppc440spe_get_group_entry(sw_desc,
+						      sw_desc->descs_per_op);
+		}
+		ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
+ * for the PQ_ZERO_SUM operation
+ */
+static inline void ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot
+						   *sw_desc, dma_addr_t paddr,
+						   dma_addr_t qaddr)
+{
+	struct ppc440spe_adma_desc_slot *iter, *end;
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t addr = 0;
+	int idx;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	/* walk through the WXOR source list and set P/Q-destinations
+	 * for each slot
+	 */
+	idx = (paddr && qaddr) ? 2 : 1;
+	/* set end */
+	list_for_each_entry_reverse(end, &sw_desc->group_list, chain_node) {
+		if (!(--idx))
+			break;
+	}
+	/* set start */
+	idx = (paddr && qaddr) ? 2 : 1;
+	iter = ppc440spe_get_group_entry(sw_desc, idx);
+
+	if (paddr && qaddr) {
+		/* two destinations */
+		list_for_each_entry_from(iter, &sw_desc->group_list, chain_node) {
+			if (unlikely(iter == end))
+				break;
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						  DMA_CUED_XOR_BASE, paddr, 0);
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						  DMA_CUED_XOR_BASE, qaddr, 1);
+		}
+	} else {
+		/* one destination */
+		addr = paddr ? paddr : qaddr;
+		list_for_each_entry_from(iter, &sw_desc->group_list, chain_node) {
+			if (unlikely(iter == end))
+				break;
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						  DMA_CUED_XOR_BASE, addr, 0);
+		}
+	}
+
+	/*  The remaining descriptors are DATACHECK. These have no need in
+	 * destination. Actually, these destinations are used there
+	 * as sources for check operation. So, set addr as source.
+	 */
+	ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
+
+	if (!addr) {
+		end = list_entry(end->chain_node.next,
+				 struct ppc440spe_adma_desc_slot, chain_node);
+		ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
+	}
+}
+
+static inline void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
+					  struct ppc440spe_adma_chan *chan,
+					  dma_addr_t addr)
+{
+	/*  To clear destinations update the descriptor
+	 * (P or Q depending on index) as follows:
+	 * addr is destination (0 corresponds to SG2):
+	 */
+	ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
+
+	/* ... and the addr is source: */
+	ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
+
+	/* addr is always SG2 then the mult is always DST1 */
+	ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+				 DMA_CDB_SG_DST1, 1);
+}
+
+/**
+ * ppc440spe_adma_pq_set_dest - set destination address into descriptor
+ * for the PQXOR operation
+ */
+static inline void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot
+					   *sw_desc, dma_addr_t * addrs,
+					   unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *iter;
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t paddr, qaddr;
+	dma_addr_t addr = 0, ppath, qpath;
+	int index = 0, i;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		paddr = 0;
+	else
+		paddr = addrs[0];
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		qaddr = 0;
+	else
+		qaddr = addrs[1];
+
+	if (!paddr || !qaddr)
+		addr = paddr ? paddr : qaddr;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* walk through the WXOR source list and set P/Q-destinations
+		 * for each slot:
+		 */
+		if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			/* This is WXOR-only chain; may have 1/2 zero descs */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				index++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				index++;
+
+			iter = ppc440spe_get_group_entry(sw_desc, index);
+			if (addr) {
+				/* one destination */
+				list_for_each_entry_from(iter,
+							 &sw_desc->group_list,
+							 chain_node)
+				    ppc440spe_desc_set_dest_addr(iter, chan,
+							      DMA_CUED_XOR_BASE,
+							      addr, 0);
+			} else {
+				/* two destinations */
+				list_for_each_entry_from(iter,
+							 &sw_desc->group_list,
+							 chain_node) {
+					ppc440spe_desc_set_dest_addr(iter, chan,
+								  DMA_CUED_XOR_BASE,
+								  paddr, 0);
+					ppc440spe_desc_set_dest_addr(iter, chan,
+								  DMA_CUED_XOR_BASE,
+								  qaddr, 1);
+				}
+			}
+
+			if (index) {
+				/*  To clear destinations update the descriptor
+				 * (1st,2nd, or both depending on flags)
+				 */
+				index = 0;
+				if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) {
+					iter =
+					    ppc440spe_get_group_entry(sw_desc,
+								   index++);
+					ppc440spe_adma_pq_zero_op(iter, chan,
+							       paddr);
+				}
+
+				if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) {
+					iter =
+					    ppc440spe_get_group_entry(sw_desc,
+								   index++);
+					ppc440spe_adma_pq_zero_op(iter, chan,
+							       qaddr);
+				}
+
+				return;
+			}
+		} else {
+			/* This is RXOR-only or RXOR/WXOR mixed chain */
+
+			/* If we want to include destination into calculations,
+			 * then make dest addresses cued with mult=1 (XOR).
+			 */
+			ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+			    DMA_CUED_XOR_HB :
+			    DMA_CUED_XOR_BASE | (1 << DMA_CUED_MULT1_OFF);
+			qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+			    DMA_CUED_XOR_HB :
+			    DMA_CUED_XOR_BASE | (1 << DMA_CUED_MULT1_OFF);
+
+			/* Setup destination(s) in RXOR slot(s) */
+			iter = ppc440spe_get_group_entry(sw_desc, index++);
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						  paddr ? ppath : qpath,
+						  paddr ? paddr : qaddr, 0);
+			if (!addr) {
+				/* two destinations */
+				iter = ppc440spe_get_group_entry(sw_desc, index++);
+				ppc440spe_desc_set_dest_addr(iter, chan,
+							  qpath, qaddr, 0);
+			}
+
+			if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
+				/* Setup destination(s) in remaining WXOR
+				 * slots
+				 */
+				iter = ppc440spe_get_group_entry(sw_desc, index);
+				if (addr) {
+					/* one destination */
+					list_for_each_entry_from(iter,
+								 &sw_desc->
+								 group_list,
+								 chain_node)
+					    ppc440spe_desc_set_dest_addr(iter,
+								      chan,
+								      DMA_CUED_XOR_BASE,
+								      addr, 0);
+
+				} else {
+					/* two destinations */
+					list_for_each_entry_from(iter,
+								 &sw_desc->
+								 group_list,
+								 chain_node) {
+						ppc440spe_desc_set_dest_addr
+						    (iter, chan,
+						     DMA_CUED_XOR_BASE, paddr,
+						     0);
+						ppc440spe_desc_set_dest_addr
+						    (iter, chan,
+						     DMA_CUED_XOR_BASE, qaddr,
+						     1);
+					}
+				}
+			}
+
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		/* DMA2 descriptors have only 1 destination, so there are
+		 * two chains - one for each dest.
+		 * If we want to include destination into calculations,
+		 * then make dest addresses cued with mult=1 (XOR).
+		 */
+		ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+		    DMA_CUED_XOR_HB :
+		    DMA_CUED_XOR_BASE | (1 << DMA_CUED_MULT1_OFF);
+
+		qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+		    DMA_CUED_XOR_HB :
+		    DMA_CUED_XOR_BASE | (1 << DMA_CUED_MULT1_OFF);
+
+		iter = ppc440spe_get_group_entry(sw_desc, 0);
+		for (i = 0; i < sw_desc->descs_per_op; i++) {
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						  paddr ? ppath : qpath,
+						  paddr ? paddr : qaddr, 0);
+			iter = list_entry(iter->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+		}
+
+		if (!addr) {
+			/* Two destinations; setup Q here */
+			iter = ppc440spe_get_group_entry(sw_desc,
+						      sw_desc->descs_per_op);
+			for (i = 0; i < sw_desc->descs_per_op; i++) {
+				ppc440spe_desc_set_dest_addr(iter,
+							  chan, qpath, qaddr,
+							  0);
+				iter =
+				    list_entry(iter->chain_node.next,
+					       struct ppc440spe_adma_desc_slot,
+					       chain_node);
+			}
+		}
+
+		break;
+	}
+}
+
+/**
+ * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
+ * DMA2 PQ operation
+ */
+static inline int ppc440spe_dma2_pq_slot_count(dma_addr_t * srcs, int src_cnt,
+					    size_t len)
+{
+	signed long long order = 0;
+	int state = 0;
+	int addr_count = 0;
+	int i;
+	for (i = 1; i < src_cnt; i++) {
+		dma_addr_t cur_addr = srcs[i];
+		dma_addr_t old_addr = srcs[i - 1];
+		switch (state) {
+		case 0:
+			if (cur_addr == old_addr + len) {
+				/* direct RXOR */
+				order = 1;
+				state = 1;
+				if (i == src_cnt - 1)
+					addr_count++;
+			} else if (old_addr == cur_addr + len) {
+				/* reverse RXOR */
+				order = -1;
+				state = 1;
+				if (i == src_cnt - 1)
+					addr_count++;
+			} else {
+				state = 3;
+			}
+			break;
+		case 1:
+			if (i == src_cnt - 2 || (order == -1
+						 && cur_addr !=
+						 old_addr - len)) {
+				order = 0;
+				state = 0;
+				addr_count++;
+			} else if (cur_addr == old_addr + len * order) {
+				state = 2;
+				if (i == src_cnt - 1)
+					addr_count++;
+			} else if (cur_addr == old_addr + 2 * len) {
+				state = 2;
+				if (i == src_cnt - 1)
+					addr_count++;
+			} else if (cur_addr == old_addr + 3 * len) {
+				state = 2;
+				if (i == src_cnt - 1)
+					addr_count++;
+			} else {
+				order = 0;
+				state = 0;
+				addr_count++;
+			}
+			break;
+		case 2:
+			order = 0;
+			state = 0;
+			addr_count++;
+			break;
+		}
+		if (state == 3)
+			break;
+	}
+	if (src_cnt <= 1 || (state != 1 && state != 2)) {
+		pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
+		       __func__, src_cnt, state, addr_count, order);
+		for (i = 0; i < src_cnt; i++)
+			pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
+		BUG();
+	}
+
+	return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
+}
+
+/**
+ * ppc440spe_adma_set_dest - set destination address into descriptor
+ */
+static inline void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+					dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+
+	BUG_ON(index >= sw_desc->dst_cnt);
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* to do: support transfers lengths >
+		 * ppc440spe_adma_DMA/XOR_MAX_BYTE_COUNT
+		 */
+		ppc440spe_desc_set_dest_addr(sw_desc->group_head,
+					  chan, 0, addr, index);
+		break;
+	case PPC440SPE_XOR_ID:
+		sw_desc = ppc440spe_get_group_entry(sw_desc, index);
+		ppc440spe_desc_set_dest_addr(sw_desc, chan, 0, addr, index);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
+ */
+static inline void ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot
+					       *desc, int src_cnt)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
+	hw_desc->cbc |= src_cnt;
+}
+
+/**
+ * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
+ */
+static inline void ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot
+						  *sw_desc, dma_addr_t addr,
+						  int index)
+{
+	struct ppc440spe_adma_chan *chan;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+	sw_desc = sw_desc->group_head;
+
+	if (likely(sw_desc))
+		ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_inc_addr  -
+ */
+static inline void ppc440spe_adma_dma2rxor_inc_addr(struct ppc440spe_adma_desc_slot
+						 *desc,
+						 struct ppc440spe_rxor *cursor,
+						 int index, int src_cnt)
+{
+	cursor->addr_count++;
+	if (index == src_cnt - 1) {
+		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+	} else if (cursor->addr_count == XOR_MAX_OPS) {
+		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+		cursor->addr_count = 0;
+		cursor->desc_count++;
+	}
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
+ */
+static inline int ppc440spe_adma_dma2rxor_prep_src(struct ppc440spe_adma_desc_slot
+						*hdesc,
+						struct ppc440spe_rxor *cursor,
+						int index, int src_cnt,
+						u32 addr)
+{
+	int rval = 0;
+	u32 sign;
+	struct ppc440spe_adma_desc_slot *desc = hdesc;
+	int i;
+
+	for (i = 0; i < cursor->desc_count; i++) {
+		desc = list_entry(hdesc->chain_node.next,
+				  struct ppc440spe_adma_desc_slot, chain_node);
+	}
+
+	switch (cursor->state) {
+	case 0:
+		if (addr == cursor->addrl + cursor->len) {
+			/* direct RXOR */
+			cursor->state = 1;
+			cursor->xor_count++;
+			if (index == src_cnt - 1) {
+				ppc440spe_rxor_set_region(desc,
+						       cursor->addr_count,
+						       DMA_RXOR12 <<
+						       DMA_CUED_REGION_OFF);
+				ppc440spe_adma_dma2rxor_inc_addr(desc, cursor,
+							      index, src_cnt);
+			}
+		} else if (cursor->addrl == addr + cursor->len) {
+			/* reverse RXOR */
+			cursor->state = 1;
+			cursor->xor_count++;
+			set_bit(cursor->addr_count, &desc->reverse_flags[0]);
+			if (index == src_cnt - 1) {
+				ppc440spe_rxor_set_region(desc,
+						       cursor->addr_count,
+						       DMA_RXOR12 <<
+						       DMA_CUED_REGION_OFF);
+				ppc440spe_adma_dma2rxor_inc_addr(desc, cursor,
+							      index, src_cnt);
+			}
+		} else {
+			printk(KERN_ERR "Cannot build "
+			       "DMA2 RXOR command block.\n");
+			BUG();
+		}
+		break;
+	case 1:
+		sign = test_bit(cursor->addr_count, desc->reverse_flags)
+		    ? -1 : 1;
+		if (index == src_cnt - 2 || (sign == -1
+					     && addr !=
+					     cursor->addrl - 2 * cursor->len)) {
+			cursor->state = 0;
+			cursor->xor_count = 1;
+			cursor->addrl = addr;
+			ppc440spe_rxor_set_region(desc,
+					       cursor->addr_count,
+					       DMA_RXOR12 <<
+					       DMA_CUED_REGION_OFF);
+			ppc440spe_adma_dma2rxor_inc_addr(desc, cursor, index,
+						      src_cnt);
+		} else if (addr == cursor->addrl + 2 * sign * cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+					       cursor->addr_count,
+					       DMA_RXOR123 <<
+					       DMA_CUED_REGION_OFF);
+			if (index == src_cnt - 1) {
+				ppc440spe_adma_dma2rxor_inc_addr(desc, cursor,
+							      index, src_cnt);
+			}
+		} else if (addr == cursor->addrl + 3 * cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+					       cursor->addr_count,
+					       DMA_RXOR124 <<
+					       DMA_CUED_REGION_OFF);
+			if (index == src_cnt - 1) {
+				ppc440spe_adma_dma2rxor_inc_addr(desc, cursor,
+							      index, src_cnt);
+			}
+		} else if (addr == cursor->addrl + 4 * cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+					       cursor->addr_count,
+					       DMA_RXOR125 <<
+					       DMA_CUED_REGION_OFF);
+			if (index == src_cnt - 1) {
+				ppc440spe_adma_dma2rxor_inc_addr(desc, cursor,
+							      index, src_cnt);
+			}
+		} else {
+			cursor->state = 0;
+			cursor->xor_count = 1;
+			cursor->addrl = addr;
+			ppc440spe_rxor_set_region(desc,
+					       cursor->addr_count,
+					       DMA_RXOR12 <<
+					       DMA_CUED_REGION_OFF);
+			ppc440spe_adma_dma2rxor_inc_addr(desc, cursor, index,
+						      src_cnt);
+		}
+		break;
+	case 2:
+		cursor->state = 0;
+		cursor->addrl = addr;
+		cursor->xor_count++;
+		if (index) {
+			ppc440spe_adma_dma2rxor_inc_addr(desc, cursor, index,
+						      src_cnt);
+		}
+		break;
+	}
+
+	return rval;
+}
+
+static inline void ppc440spe_free_ref(struct ppc440spe_adma_device *adev,
+				   struct platform_device *ofdev,
+				   struct ppc440spe_adma_chan *chan)
+{
+	if (adev->id != PPC440SPE_XOR_ID) {
+		dma_unmap_page(&ofdev->dev, chan->pdest,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(&ofdev->dev, chan->qdest,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(chan->pdest_page);
+		__free_page(chan->qdest_page);
+	}
+}
+static inline void ppc440spe_free_reg(struct ppc440spe_adma_device *adev)
+{
+	if (adev->id == PPC440SPE_XOR_ID)
+		iounmap(adev->xor_reg);
+	else
+		iounmap(adev->dma_reg);
+}
+static inline int ppc440spe_get_cdb_size(struct ppc440spe_adma_chan *ppc440spe_chan)
+{
+	int db_sz;
+	if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID)
+		db_sz = sizeof(struct dma_cdb);
+	else
+		db_sz = sizeof(struct xor_cb);
+	return db_sz;
+}
+
+/*
+ * initialize the channel and the chain with a null operation
+ */
+static inline void ppc440spe_init_chan_null_op(struct ppc440spe_adma_chan
+					    *ppc440spe_chan)
+{
+	switch (ppc440spe_chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		ppc440spe_chan->hw_chain_inited = 0;
+		/* Use WXOR for self-testing */
+		if (!ppc440spe_r6_tchan)
+			ppc440spe_r6_tchan = ppc440spe_chan;
+		break;
+	case PPC440SPE_XOR_ID:
+		ppc440spe_chan_start_null_xor(ppc440spe_chan);
+		break;
+	default:
+		BUG();
+	}
+}
+static inline int ppc440spe_adma_get_devid(struct platform_device *ofdev,
+					struct device_node *np)
+{
+	unsigned int id;
+	unsigned int len;
+	const unsigned int *idx;
+	if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
+		id = PPC440SPE_XOR_ID;
+	} else {
+		/* it is DMA0 or DMA1 */
+		idx = of_get_property(np, "cell-index", &len);
+		/* it is DMA0 or DMA1 */
+		if (!idx || (len != sizeof(u32))) {
+			dev_err(&ofdev->dev, "Device node %s has missing "
+				"or invalid cell-index property\n",
+				np->full_name);
+			return -EINVAL;
+		}
+		id = *idx;
+	}
+	return id;
+}
+static inline int ppc440spe_adma_get_pool_size(struct device_node *np, int id)
+{
+	unsigned int pool_size;
+	if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
+		/* As far as the XOR engine is concerned, it does not
+		 * use FIFOs but uses linked list. So there is no dependency
+		 * between pool size to allocate and the engine configuration.
+		 */
+		pool_size = PAGE_SIZE << 1;
+	} else {
+		/* DMA0,1 engines use FIFO to maintain CDBs, so we
+		 * should allocate the pool accordingly to size of this
+		 * FIFO. Thus, the pool size depends on the FIFO depth:
+		 * how much CDBs pointers the FIFO may contain then so
+		 * much CDBs we should provide in the pool.
+		 * That is
+		 * CDB size = 32B;
+		 * CDBs number = (DMA0_FIFO_SIZE >> 3);
+		 * Pool size = CDBs number * CDB size =
+		 * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
+		 */
+		pool_size = (id == PPC440SPE_DMA0_ID) ?
+		    DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+		pool_size <<= 2;
+	}
+	return pool_size;
+}
+static inline void ppc440spe_adma_init_hw(struct ppc440spe_adma_device *adev,
+				       void *regs)
+{
+	if (adev->id == PPC440SPE_XOR_ID) {
+		adev->xor_reg = regs;
+		/* Reset XOR */
+		iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
+		iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
+	} else {
+		size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
+		    DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+		adev->dma_reg = regs;
+		/* DMAx_FIFO_SIZE is defined in bytes,
+		 * <fsiz> - is defined in number of CDB pointers (8byte).
+		 * DMA FIFO Length = CSlength + CPlength, where
+		 * CSlength = CPlength = (fsiz + 1) * 8.
+		 */
+		iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
+			  &adev->dma_reg->fsiz);
+		/* Configure DMA engine */
+		iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
+			  &adev->dma_reg->cfg);
+		/* Clear Status */
+		iowrite32(~0, &adev->dma_reg->dsts);
+	}
+}
+static inline int ppc440spe_create_helper_pages(struct ppc440spe_adma_device *adev,
+					     struct platform_device *ofdev,
+					     struct ppc440spe_adma_chan *chan)
+{
+	int ret = 0;
+	/* allocate and map helper pages for async validation or
+	 * async_mult/async_sum_product operations on DMA0/1.
+	 */
+	if (adev->id != PPC440SPE_XOR_ID) {
+		chan->pdest_page = alloc_page(GFP_KERNEL);
+		chan->qdest_page = alloc_page(GFP_KERNEL);
+		if (!chan->pdest_page || !chan->qdest_page) {
+			if (chan->pdest_page)
+				__free_page(chan->pdest_page);
+			if (chan->qdest_page)
+				__free_page(chan->qdest_page);
+			ret = -ENOMEM;
+			goto err_page_alloc;
+		}
+		chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+		chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+      err_page_alloc:
+	return ret;
+}
+
+#endif /*__PPC440SPE_ADMA_H*/
-- 
1.6.1.rc3

^ permalink raw reply related

* Re: [PATCH] PPC4xx: ADMA separating SoC specific functions
From: Wolfgang Denk @ 2010-09-30 19:08 UTC (permalink / raw)
  To: tmarri; +Cc: linux-raid, dan.j.williams, linuxppc-dev, linux-crypto, yur
In-Reply-To: <1285865736-32074-1-git-send-email-tmarri@apm.com>

Dear tmarri@apm.com,

In message <1285865736-32074-1-git-send-email-tmarri@apm.com> you wrote:
> From: Tirumala Marri <tmarri@apm.com>
> 
> This patch separates the SoC specific functions and moved
> to different files.
> 
> The reason for ppc440spe-adma.h is to define in-line functions which
> are called by both adma.c and ppc440spe-adma.c . 
> 
> Where as ppc440spe-adma.c is to define functions are completely
> completely dependent on 440spe, also which are too big to define
> as in-line functions.

When reposting a patch, please always indicate that this is new
version by using something like "[PATCH v2]" in the Subject line.

> Signed-off-by: Tirumala R Marri <tmarri@apm.com>
> Acked-by: Yuri Tikhonov <yur@emcraft.com>
> CC:  Dan Williams <dan.j.williams@intel.com>
> CC:  Josh Boyer <jwboyer@linux.vnet.ibm.com>
> ---

Also, please include here (i. e. below the "---" line, i. e. in the
comments section, a description of what was changed compared to the
previous version of this patch.

As is, you enforce us to rescan the whole patch again and check
manually if you have reacted to any of the comments sent before, and
how.  As is, you make reviewing your poatches harder than necessary.


> diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
> index 0d58a4a..a1053cb 100644
> --- a/drivers/dma/ppc4xx/adma.c
> +++ b/drivers/dma/ppc4xx/adma.c
...
> +#include "ppc440spe-adma.h"
> +
> +struct dma_async_tx_descriptor
> +*ppc440spe_adma_prep_dma_pq(struct dma_chan *chan,
> +			       dma_addr_t * dst,
> +			       dma_addr_t * src,
> +			       unsigned int src_cnt,
> +			       const unsigned char *scf,
> +			       size_t len,
> +			       unsigned long flags);
> +struct dma_async_tx_descriptor
> +*ppc440spe_adma_prep_dma_pqzero_sum(struct dma_chan *chan,

Should such 440SPe specific code not be removed here and placed into
ppc440spe-adma.c instead?

> +#if 0
>  static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
>  			    unsigned int src_cnt)
>  {
> @@ -213,8 +104,9 @@ static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
>  	for (i = 0; i < 2; i++)
>  		pr_debug("\t0x%016llx ", dst[i]);
>  }
> +#endif

Please do not add dead code - remove the whole "#if 0" block.


>  /******************************************************************************
>   * ADMA channel low-level routines
>   ******************************************************************************/
>  
> -static u32
...
...
> -}
>  
>  /******************************************************************************
>   * ADMA device level
>   ******************************************************************************/

It seems youremove all code, but leave the (now empty) comment
headers? This makes little sense to me.

...
>  /**
>   * ppc440spe_adma_free_slots - flags descriptor slots for reuse
>   * @slot: Slot to free
>   * Caller must hold &ppc440spe_chan->lock while calling this function
>   */

Again, all this is pretty low-level 440SPe specific code. Why do you
keep this in the common drive rfile instead of moving it into the new
440SPe specific file?


> diff --git a/drivers/dma/ppc4xx/ppc440spe-adma.c b/drivers/dma/ppc4xx/ppc440spe-adma.c
> new file mode 100644
> index 0000000..da467b4
...
> +	/*  In the current implementation of ppc440spe ADMA driver it
> +
> +
> +
> +	 * makes sense to pick out only pq case, because it may be

Formatting problems?


> diff --git a/drivers/dma/ppc4xx/ppc440spe-adma.h b/drivers/dma/ppc4xx/ppc440spe-adma.h
> new file mode 100644
> index 0000000..81a1f46
> --- /dev/null
> +++ b/drivers/dma/ppc4xx/ppc440spe-adma.h
...
> +/*
> + * ppc440spe_get_group_entry - get group entry with index idx
> + * @tdesc: is the last allocated slot in the group.
> + */
> +static struct ppc440spe_adma_desc_slot *ppc440spe_get_group_entry(struct
> +							    ppc440spe_adma_desc_slot
> +							    *tdesc,
> +							    u32 entry_idx)
> +{
> +	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
> +	int i = 0;
> +
> +	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
> +		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
> +		       __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
> +		BUG();
> +	}
> +
> +	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
> +		if (i++ == entry_idx)
> +			break;
> +	}
> +	return iter;
> +}

This is a header file, yet you add here literally thousands of lines of
code.


Note that more or less similar questions have been asked for the
previous version of this patch, but I fail to find any good
justification in your replies.


Selecting the architecture at build time is bad as it prevents using a
sinlge kernel image across a wide range of boards.  You only replied
"We select the architecture at build time." without any explanation if
there is a pressing technical reason to do it this way, or if this was
just a arbitrary decision.

The same goes for putting so much source code in a header file - I
really see no technical need for this (especially not if you build for
a single architecture only).

Also I wonder why you still keep so many 440SPe specific code in the
common file, even though you just create new 440SPe specific header
and source files.


Please elucidate.


Thanks.

Wolfgang Denk

-- 
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
"Unix is simple, but it takes a genius to understand the simplicity."
					             - Dennis Ritchie

^ permalink raw reply

* Re: [PATCH v3 6/7] mtd: m25p80: add a read function to read page by page
From: Grant Likely @ 2010-09-30 20:57 UTC (permalink / raw)
  To: Anton Vorontsov
  Cc: kumar.gala, David Brownell, linuxppc-dev, linux-mtd,
	spi-devel-general, Mingkai Hu
In-Reply-To: <20100930150633.GA13741@oksana.dev.rtsoft.ru>

On Fri, Oct 1, 2010 at 12:06 AM, Anton Vorontsov <cbouatmailru@gmail.com> w=
rote:
> On Thu, Sep 30, 2010 at 11:41:40PM +0900, Grant Likely wrote:
>> On Thu, Sep 30, 2010 at 11:16 PM, Grant Likely
>> <grant.likely@secretlab.ca> wrote:
>> > On Thu, Sep 30, 2010 at 7:46 PM, David Brownell <david-b@pacbell.net> =
wrote:
>> >>
>> >> --- On Thu, 9/30/10, Mingkai Hu <Mingkai.hu@freescale.com> wrote:
>> >>
>> >>> From: Mingkai Hu <Mingkai.hu@freescale.com>
>> >>> Subject: [PATCH v3 6/7] mtd: m25p80: add a read function to read pag=
e by page
>> >>
>> >> NAK.
>> >>
>> >> We went over this before.
>> >
>> > Yes, I agree with David on this. =A0If large transfers don't work, the=
n
>> > it is the SPI master driver that is buggy.
>>
>> By the way, does this fix your problem?
>>
>> https://patchwork.kernel.org/patch/184752/
>
> It shouldn't. AFAIK, eSPI is PIO-only controller, and the overrun
> fix is for the DMA mode.
>
> Thanks,
>
> p.s. Btw, in patch 3/7, is_dma_mapped argument of fsl_espi_bufs()
> is unneeded.

Thanks Anton.  Please reply to that patch with this comment so that
patchwork records it and I don't forget about it.

Thanks,
g.

>
> --
> Anton Vorontsov
> email: cbouatmailru@gmail.com
> irc://irc.freenode.net/bd2
>



--=20
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

^ permalink raw reply

* Re: [PATCH (Option 1)] of/i2c: fix module load order issue caused by of_i2c.c
From: Grant Likely @ 2010-09-30 21:11 UTC (permalink / raw)
  To: Jean Delvare
  Cc: mikpe, linux-kernel, rdunlap, linux-i2c, linuxppc-dev, Ben Dooks
In-Reply-To: <20100929164251.0243ac7f@endymion.delvare>

On Wed, Sep 29, 2010 at 11:42 PM, Jean Delvare <khali@linux-fr.org> wrote:
> On Wed, 29 Sep 2010 00:20:54 +0100, Ben Dooks wrote:
>> On Fri, Sep 24, 2010 at 04:14:53PM -0600, Grant Likely wrote:
>> > Commit 959e85f7, "i2c: add OF-style registration and binding" caused a
>> > module dependency loop where of_i2c.c calls functions in i2c-core, and
>> > i2c-core calls of_i2c_register_devices() in of_i2c. =A0This means that
>> > when i2c support is built as a module when CONFIG_OF is set, then
>> > neither i2c_core nor of_i2c are able to be loaded.
>> >
>> > This patch fixes the problem by moving the of_i2c_register_devices()
>> > function into the body of i2c_core and renaming it to
>> > i2c_scan_of_devices (of_i2c_register_devices is analogous to the
>> > existing i2c_scan_static_board_info function and so should be named
>> > similarly). =A0This function isn't called by any code outside of
>> > i2c_core, and it must always be present when CONFIG_OF is selected, so
>> > it makes sense to locate it there. =A0When CONFIG_OF is not selected,
>> > of_i2c_register_devices() becomes a no-op.
>>
>> I sort of go with this one.
>
> Actually I would prefer option #2, even though I understand it won't
> make Grant too happy. Having a large chunk of OF-specific code in
> i2c-core, leaving of_i2c.c almost empty, doesn't seem right.

I'm fine with this.  In the grand scheme, it ends up being an unimportant p=
oint.

> I took a look at what other relevant subsystems do. SPI is boolean so it
> doesn't have the issue. MDIO is tristate, the registration function is
> in of_mdio.c and individual drivers call it. And there are a lot more
> of these (9) than i2c drivers (3).
>
> So I would let individual drivers call of_i2c_register_devices(), as it
> used to be until 2.6.35. 2 extra functions calls doesn't seem a high
> price to pay to keep the code logically separated. This also make
> things consistent, with all OF registration functions living under
> drivers/of.

This is actually historical and somewhat in transition.  Now that all
the OF core code is cleaned up and generalized, I'm looking at the bus
specific hooks and deciding what would be best to do about them.  I'm
likely to move the SPI support into drivers/spi, and I'll probably
post a patch to do the same for drivers/net/phy and for the platform
bus.  The reason being that the data extraction code is far more bus
specific than it is OF-specific.

I will however back off from putting the registration hook directly
into the shared bus registration functions for the time being.  It is
a minor issue, and it does make a certain amount of sense for the
individual drivers to control the bus population.  Proof is in the
patches anyway and we can debate it after I actually post something
concrete.

g.

^ permalink raw reply

* Re: [PATCH v3 5/7] mtd: m25p80: add support to parse the SPI flash's partitions
From: Grant Likely @ 2010-09-30 21:34 UTC (permalink / raw)
  To: Mingkai Hu; +Cc: linuxppc-dev, kumar.gala, linux-mtd, spi-devel-general
In-Reply-To: <1285833646-12006-6-git-send-email-Mingkai.hu@freescale.com>

On Thu, Sep 30, 2010 at 5:00 PM, Mingkai Hu <Mingkai.hu@freescale.com> wrot=
e:
> Signed-off-by: Mingkai Hu <Mingkai.hu@freescale.com>
> ---
> v3:
> =A0- Move the SPI flash partition code to the probe function.
>
> =A0drivers/mtd/devices/m25p80.c | =A0 39 +++++++++++++++++++++++++++-----=
-------
> =A01 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
> index 6f512b5..47d53c7 100644
> --- a/drivers/mtd/devices/m25p80.c
> +++ b/drivers/mtd/devices/m25p80.c
> @@ -772,7 +772,7 @@ static const struct spi_device_id *__devinit jedec_pr=
obe(struct spi_device *spi)
> =A0static int __devinit m25p_probe(struct spi_device *spi)
> =A0{
> =A0 =A0 =A0 =A0const struct spi_device_id =A0 =A0 =A0*id =3D spi_get_devi=
ce_id(spi);
> - =A0 =A0 =A0 struct flash_platform_data =A0 =A0 =A0*data;
> + =A0 =A0 =A0 struct flash_platform_data =A0 =A0 =A0data, *pdata;
> =A0 =A0 =A0 =A0struct m25p =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 *flash=
;
> =A0 =A0 =A0 =A0struct flash_info =A0 =A0 =A0 =A0 =A0 =A0 =A0 *info;
> =A0 =A0 =A0 =A0unsigned =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0i;
> @@ -782,13 +782,27 @@ static int __devinit m25p_probe(struct spi_device *=
spi)
> =A0 =A0 =A0 =A0 * a chip ID, try the JEDEC id commands; they'll work for =
most
> =A0 =A0 =A0 =A0 * newer chips, even if we don't recognize the particular =
chip.
> =A0 =A0 =A0 =A0 */
> - =A0 =A0 =A0 data =3D spi->dev.platform_data;
> - =A0 =A0 =A0 if (data && data->type) {
> + =A0 =A0 =A0 pdata =3D spi->dev.platform_data;
> + =A0 =A0 =A0 if (!pdata && spi->dev.of_node) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 int nr_parts;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct mtd_partition *parts;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct device_node *np =3D spi->dev.of_node=
;
> +
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 nr_parts =3D of_mtd_parse_partitions(&spi->=
dev, np, &parts);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (nr_parts) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pdata =3D &data;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 memset(pdata, 0, sizeof(*pd=
ata));
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pdata->parts =3D parts;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pdata->nr_parts =3D nr_part=
s;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
> + =A0 =A0 =A0 }

Yes, this is the correct way to go about adding the partitions.
However, this patch can be made simpler by not renaming 'data' to
'pdata' and by moving the above code down to just before the partition
information is actually used.  in the OF case, only the parts and the
nr_parts values written into data, and those values aren't used until
the last part of the probe function.

Regardless, in principle this patch is correct:

Acked-by: Grant Likely <grant.likely@secretlab.ca>

> +
> + =A0 =A0 =A0 if (pdata && pdata->type) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const struct spi_device_id *plat_id;
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0for (i =3D 0; i < ARRAY_SIZE(m25p_ids) - 1=
; i++) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0plat_id =3D &m25p_ids[i];
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (strcmp(data->type, plat=
_id->name))
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (strcmp(pdata->type, pla=
t_id->name))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0continue;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> @@ -796,7 +810,8 @@ static int __devinit m25p_probe(struct spi_device *sp=
i)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (i < ARRAY_SIZE(m25p_ids) - 1)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0id =3D plat_id;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0else
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_warn(&spi->dev, "unreco=
gnized id %s\n", data->type);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_warn(&spi->dev, "unreco=
gnized id %s\n",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
 =A0 pdata->type);
> =A0 =A0 =A0 =A0}
>
> =A0 =A0 =A0 =A0info =3D (void *)id->driver_data;
> @@ -847,8 +862,8 @@ static int __devinit m25p_probe(struct spi_device *sp=
i)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0write_sr(flash, 0);
> =A0 =A0 =A0 =A0}
>
> - =A0 =A0 =A0 if (data && data->name)
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 flash->mtd.name =3D data->name;
> + =A0 =A0 =A0 if (pdata && pdata->name)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 flash->mtd.name =3D pdata->name;
> =A0 =A0 =A0 =A0else
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0flash->mtd.name =3D dev_name(&spi->dev);
>
> @@ -919,9 +934,9 @@ static int __devinit m25p_probe(struct spi_device *sp=
i)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0part_probes, &parts, 0);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
>
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (nr_parts <=3D 0 && data && data->parts)=
 {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 parts =3D data->parts;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 nr_parts =3D data->nr_parts=
;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (nr_parts <=3D 0 && pdata && pdata->part=
s) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 parts =3D pdata->parts;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 nr_parts =3D pdata->nr_part=
s;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}

As per my comment earlier; since parts and nr_parts isn't needed
before this point, this block could simply be:

if (nr_parts <=3D 0 && data && data->parts) {
        parts =3D data->parts;
        nr_parts =3D data->nr_parts;
}
if (nr_parts <=3D 0 && spi->dev.of_node)
  =A0 =A0 =A0 nr_parts =3D of_mtd_parse_partitions(&spi->dev, np, &parts);

And most of the other changes to this file goes away.  Simpler, yes?

g.

^ permalink raw reply

* Re: [PATCH v3 6/7] mtd: m25p80: add a read function to read page by page
From: Grant Likely @ 2010-09-30 21:41 UTC (permalink / raw)
  To: Mingkai Hu; +Cc: linuxppc-dev, kumar.gala, linux-mtd, spi-devel-general
In-Reply-To: <1285833646-12006-7-git-send-email-Mingkai.hu@freescale.com>

Hmmm.... for some reason the previous replies didn't get picked up by
patchwork, so I'm replying with my comment again for the public
record.

In this case the eSPI controller driver is buggy and needs to be
fixed.  If the hardware can only support small transfers, then it is
the responsibilty of the driver to chain up smaller chunks into one
big transfer, and make sure that the CS line doesn't go low in the
middle of it.

g.

On Thu, Sep 30, 2010 at 5:00 PM, Mingkai Hu <Mingkai.hu@freescale.com> wrot=
e:
> For Freescale's eSPI controller, the max transaction length one time
> is limitted by the SPCOM[TRANSLEN] field which is 0xFFFF. When used
> mkfs.ext2 command to create ext2 filesystem on the flash, the read
> length will exceed the max value of the SPCOM[TRANSLEN] field, so
> change the read function to read page by page.
>
> For other SPI flash driver, also needed to supply the read function
> if used the eSPI controller.
>
> Signed-off-by: Mingkai Hu <Mingkai.hu@freescale.com>
> ---
> v3:
> =A0- Add a quirks member for the SPI master to handle the contrains of th=
e
> =A0 SPI controller. I can't think of other method. :-(
>
> =A0drivers/mtd/devices/m25p80.c | =A0 78 ++++++++++++++++++++++++++++++++=
++++++++++
> =A0drivers/spi/spi_fsl_lib.c =A0 =A0| =A0 =A04 ++
> =A0include/linux/spi/spi.h =A0 =A0 =A0| =A0 =A05 +++
> =A03 files changed, 87 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
> index 47d53c7..f65cca8 100644
> --- a/drivers/mtd/devices/m25p80.c
> +++ b/drivers/mtd/devices/m25p80.c
> @@ -377,6 +377,81 @@ static int m25p80_read(struct mtd_info *mtd, loff_t =
from, size_t len,
> =A0}
>
> =A0/*
> + * Read an address range from the flash chip page by page.
> + * Some controller has transaction length limitation such as the
> + * Freescale's eSPI controller can only trasmit 0xFFFF bytes one
> + * time, so we have to read page by page if the len is more than
> + * the limitation.
> + */
> +static int m25p80_page_read(struct mtd_info *mtd, loff_t from, size_t le=
n,
> + =A0 =A0 =A0 size_t *retlen, u_char *buf)
> +{
> + =A0 =A0 =A0 struct m25p *flash =3D mtd_to_m25p(mtd);
> + =A0 =A0 =A0 struct spi_transfer t[2];
> + =A0 =A0 =A0 struct spi_message m;
> + =A0 =A0 =A0 u32 i, page_size =3D 0;
> +
> + =A0 =A0 =A0 DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %zd\n",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_name(&flash->spi->dev),=
 __func__, "from",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 (u32)from, len);
> +
> + =A0 =A0 =A0 /* sanity checks */
> + =A0 =A0 =A0 if (!len)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (from + len > flash->mtd.size)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return -EINVAL;
> +
> + =A0 =A0 =A0 spi_message_init(&m);
> + =A0 =A0 =A0 memset(t, 0, (sizeof t));
> +
> + =A0 =A0 =A0 /* NOTE:
> + =A0 =A0 =A0 =A0* OPCODE_FAST_READ (if available) is faster.
> + =A0 =A0 =A0 =A0* Should add 1 byte DUMMY_BYTE.
> + =A0 =A0 =A0 =A0*/
> + =A0 =A0 =A0 t[0].tx_buf =3D flash->command;
> + =A0 =A0 =A0 t[0].len =3D m25p_cmdsz(flash) + FAST_READ_DUMMY_BYTE;
> + =A0 =A0 =A0 spi_message_add_tail(&t[0], &m);
> +
> + =A0 =A0 =A0 t[1].rx_buf =3D buf;
> + =A0 =A0 =A0 spi_message_add_tail(&t[1], &m);
> +
> + =A0 =A0 =A0 /* Byte count starts at zero. */
> + =A0 =A0 =A0 if (retlen)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 *retlen =3D 0;
> +
> + =A0 =A0 =A0 mutex_lock(&flash->lock);
> +
> + =A0 =A0 =A0 /* Wait till previous write/erase is done. */
> + =A0 =A0 =A0 if (wait_till_ready(flash)) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* REVISIT status return?? */
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 mutex_unlock(&flash->lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 1;
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 /* Set up the write data buffer. */
> + =A0 =A0 =A0 flash->command[0] =3D OPCODE_READ;
> +
> + =A0 =A0 =A0 for (i =3D page_size; i < len; i +=3D page_size) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 page_size =3D len - i;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (page_size > flash->page_size)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 page_size =3D flash->page_s=
ize;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 m25p_addr2cmd(flash, from + i, flash->comma=
nd);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 t[1].len =3D page_size;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 t[1].rx_buf =3D buf + i;
> +
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spi_sync(flash->spi, &m);
> +
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 *retlen +=3D m.actual_length - m25p_cmdsz(f=
lash)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 - FAST_READ_DUMMY_BYTE;
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 mutex_unlock(&flash->lock);
> +
> + =A0 =A0 =A0 return 0;
> +}
> +
> +/*
> =A0* Write an address range to the flash chip. =A0Data must be written in
> =A0* FLASH_PAGESIZE chunks. =A0The address range may be any size provided
> =A0* it is within the physical boundaries.
> @@ -874,6 +949,9 @@ static int __devinit m25p_probe(struct spi_device *sp=
i)
> =A0 =A0 =A0 =A0flash->mtd.erase =3D m25p80_erase;
> =A0 =A0 =A0 =A0flash->mtd.read =3D m25p80_read;
>
> + =A0 =A0 =A0 if (spi->master->quirks & SPI_QUIRK_TRANS_LEN_LIMIT)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 flash->mtd.read =3D m25p80_page_read;
> +
> =A0 =A0 =A0 =A0/* sst flash chips use AAI word program */
> =A0 =A0 =A0 =A0if (info->jedec_id >> 16 =3D=3D 0xbf)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0flash->mtd.write =3D sst_write;
> diff --git a/drivers/spi/spi_fsl_lib.c b/drivers/spi/spi_fsl_lib.c
> index 5cd741f..c8d8c2d 100644
> --- a/drivers/spi/spi_fsl_lib.c
> +++ b/drivers/spi/spi_fsl_lib.c
> @@ -135,6 +135,10 @@ int mpc8xxx_spi_probe(struct device *dev, struct res=
ource *mem,
> =A0 =A0 =A0 =A0master->cleanup =3D mpc8xxx_spi_cleanup;
> =A0 =A0 =A0 =A0master->dev.of_node =3D dev->of_node;
>
> + =A0 =A0 =A0 if (of_get_property(dev->of_node,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "fsl,spi-qu=
irk-trans-len-limit", NULL))
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 master->quirks |=3D SPI_QUIRK_TRANS_LEN_LIM=
IT;
> +
> =A0 =A0 =A0 =A0mpc8xxx_spi =3D spi_master_get_devdata(master);
> =A0 =A0 =A0 =A0mpc8xxx_spi->dev =3D dev;
> =A0 =A0 =A0 =A0mpc8xxx_spi->get_rx =3D mpc8xxx_spi_rx_buf_u8;
> diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
> index 92e52a1..4234dfd 100644
> --- a/include/linux/spi/spi.h
> +++ b/include/linux/spi/spi.h
> @@ -304,6 +304,11 @@ struct spi_master {
>
> =A0 =A0 =A0 =A0/* called on release() to free memory provided by spi_mast=
er */
> =A0 =A0 =A0 =A0void =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(*cleanup)(str=
uct spi_device *spi);
> +
> + =A0 =A0 =A0 /* some constraints of the controller */
> + =A0 =A0 =A0 u16 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 quirks;
> +#define SPI_QUIRK_TRANS_LEN_LIMIT =A0 =A0 =A0BIT(0) =A0/* have trans len=
gth limit */
> +
> =A0};
>
> =A0static inline void *spi_master_get_devdata(struct spi_master *master)
> --
> 1.6.4
>
>
>



--=20
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

^ permalink raw reply

* [PATCH v4 2/4] fsldma: implement support for scatterlist to scatterlist copy
From: Ira W. Snyder @ 2010-09-30 21:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Dan Williams, linuxppc-dev
In-Reply-To: <1285883207-13761-1-git-send-email-iws@ovro.caltech.edu>

Now that the DMAEngine API has support for scatterlist to scatterlist
copy, implement support for the Freescale DMA controller.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
---
 drivers/dma/fsldma.c |  128 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index cea08be..1ed29d1 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -38,6 +38,8 @@
 #include <asm/fsldma.h>
 #include "fsldma.h"
 
+static const char msg_ld_oom[] = "No free memory for link descriptor\n";
+
 static void dma_init(struct fsldma_chan *chan)
 {
 	/* Reset the channel */
@@ -499,7 +501,7 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 
 	new = fsl_dma_alloc_descriptor(chan);
 	if (!new) {
-		dev_err(chan->dev, "No free memory for link descriptor\n");
+		dev_err(chan->dev, msg_ld_oom);
 		return NULL;
 	}
 
@@ -536,8 +538,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 		/* Allocate the link descriptor from DMA pool */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev,
-					"No free memory for link descriptor\n");
+			dev_err(chan->dev, msg_ld_oom);
 			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
@@ -583,6 +584,125 @@ fail:
 	return NULL;
 }
 
+static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
+	struct scatterlist *dst_sg, unsigned int dst_nents,
+	struct scatterlist *src_sg, unsigned int src_nents,
+	unsigned long flags)
+{
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	size_t dst_avail, src_avail;
+	dma_addr_t dst, src;
+	size_t len;
+
+	/* basic sanity checks */
+	if (dst_nents == 0 || src_nents == 0)
+		return NULL;
+
+	if (dst_sg == NULL || src_sg == NULL)
+		return NULL;
+
+	/*
+	 * TODO: should we check that both scatterlists have the same
+	 * TODO: number of bytes in total? Is that really an error?
+	 */
+
+	/* get prepared for the loop */
+	dst_avail = sg_dma_len(dst_sg);
+	src_avail = sg_dma_len(src_sg);
+
+	/* run until we are out of scatterlist entries */
+	while (true) {
+
+		/* create the largest transaction possible */
+		len = min_t(size_t, src_avail, dst_avail);
+		len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT);
+		if (len == 0)
+			goto fetch;
+
+		dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;
+		src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
+
+		/* allocate and populate the descriptor */
+		new = fsl_dma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, msg_ld_oom);
+			goto fail;
+		}
+#ifdef FSL_DMA_LD_DEBUG
+		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+		set_desc_cnt(chan, &new->hw, len);
+		set_desc_src(chan, &new->hw, src);
+		set_desc_dst(chan, &new->hw, dst);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+		prev = new;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+
+		/* update metadata */
+		dst_avail -= len;
+		src_avail -= len;
+
+fetch:
+		/* fetch the next dst scatterlist entry */
+		if (dst_avail == 0) {
+
+			/* no more entries: we're done */
+			if (dst_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			dst_sg = sg_next(dst_sg);
+			if (dst_sg == NULL)
+				break;
+
+			dst_nents--;
+			dst_avail = sg_dma_len(dst_sg);
+		}
+
+		/* fetch the next src scatterlist entry */
+		if (src_avail == 0) {
+
+			/* no more entries: we're done */
+			if (src_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			src_sg = sg_next(src_sg);
+			if (src_sg == NULL)
+				break;
+
+			src_nents--;
+			src_avail = sg_dma_len(src_sg);
+		}
+	}
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
+	return NULL;
+}
+
 /**
  * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
  * @chan: DMA channel
@@ -1327,11 +1447,13 @@ static int __devinit fsldma_of_probe(struct platform_device *op,
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+	dma_cap_set(DMA_SG, fdev->common.cap_mask);
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
 	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
 	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
 	fdev->common.device_tx_status = fsl_tx_status;
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
-- 
1.7.1

^ permalink raw reply related

* [PATCH v4 1/4] dma: add support for scatterlist to scatterlist copy
From: Ira W. Snyder @ 2010-09-30 21:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Dan Williams, linuxppc-dev
In-Reply-To: <1285883207-13761-1-git-send-email-iws@ovro.caltech.edu>

This adds support for scatterlist to scatterlist DMA transfers. A
similar interface is exposed by the fsldma driver (through the DMA_SLAVE
API) and by the ste_dma40 driver (through an exported function).

This patch paves the way for making this type of copy operation a part
of the generic DMAEngine API. Futher patches will add support in
individual drivers.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
---
 drivers/dma/dmaengine.c   |    2 ++
 include/linux/dmaengine.h |    6 ++++++
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9d31d5e..db403b8 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -690,6 +690,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
+	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
+		!device->device_prep_dma_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_prep_slave_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca..7c44620 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -64,6 +64,7 @@ enum dma_transaction_type {
 	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_INTERRUPT,
+	DMA_SG,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
@@ -473,6 +474,11 @@ struct dma_device {
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_sg)(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags);
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
-- 
1.7.1

^ permalink raw reply related

* [PATCH v4 0/4] dma: add support for scatterlist to scatterlist copy
From: Ira W. Snyder @ 2010-09-30 21:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Dan Williams, linuxppc-dev

This series adds support for scatterlist to scatterlist copies to the
generic DMAEngine API. Both the fsldma and ste_dma40 drivers currently
implement a similar API using different, non-generic methods. This series
converts both of them to the new, standardized API.

By doing this as part of the core DMAEngine API, the individual drivers
have control over how to chain their descriptors together. In addition,
this makes graceful failure much easier to support.

The fsldma implementation has been tested on real hardware, using slightly
modified versions of the patches posted a few weeks ago, titled:
[PATCH RFCv2 0/5] CARMA Board Support

These patches will be re-submitted with the appropriate changes after we're
happy with this series.

Thanks to all that provided input!
Ira

Changes since v3:
- ste40_dma's implementation now returns NULL (thanks Dan!)
- fsldma now uses a custom command for external control
- fsldma now uses the standard struct dma_slave_config
- tested on real hardware

Ira W. Snyder (4):
  dma: add support for scatterlist to scatterlist copy
  fsldma: implement support for scatterlist to scatterlist copy
  fsldma: improved DMA_SLAVE support
  ste_dma40: implement support for scatterlist to scatterlist copy

 arch/powerpc/include/asm/fsldma.h |  137 ---------------
 drivers/dma/dmaengine.c           |    2 +
 drivers/dma/fsldma.c              |  328 ++++++++++++++++++-------------------
 drivers/dma/ste_dma40.c           |   17 ++
 include/linux/dmaengine.h         |    9 +
 5 files changed, 184 insertions(+), 309 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/fsldma.h

^ permalink raw reply

* [PATCH v4 4/4] ste_dma40: implement support for scatterlist to scatterlist copy
From: Ira W. Snyder @ 2010-09-30 21:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Per Fridén, Linus Walleij, Dan Williams, linuxppc-dev
In-Reply-To: <1285883207-13761-1-git-send-email-iws@ovro.caltech.edu>

Now that the DMAEngine API has support for scatterlist to scatterlist
copy, implement support for the STE DMA40 DMA controller.

Cc: Linus Walleij <linus.ml.walleij@gmail.com>
Cc: Per Fridén <per.friden@stericsson.com>
Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
---
 drivers/dma/ste_dma40.c |   17 +++++++++++++++++
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 17e2600..d5fd098 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1857,6 +1857,18 @@ err:
 	return NULL;
 }
 
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *chan,
+	    struct scatterlist *dst_sg, unsigned int dst_nents,
+	    struct scatterlist *src_sg, unsigned int src_nents,
+	    unsigned long dma_flags)
+{
+	if (dst_nents != src_nents)
+		return NULL;
+
+	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
+}
+
 static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 				 struct d40_chan *d40c,
 				 struct scatterlist *sgl,
@@ -2281,6 +2293,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_slave.device_tx_status = d40_tx_status;
 	base->dma_slave.device_issue_pending = d40_issue_pending;
@@ -2301,10 +2314,12 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	dma_cap_zero(base->dma_memcpy.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
 
 	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_memcpy.device_tx_status = d40_tx_status;
 	base->dma_memcpy.device_issue_pending = d40_issue_pending;
@@ -2331,10 +2346,12 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	dma_cap_zero(base->dma_both.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
 
 	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_both.device_tx_status = d40_tx_status;
 	base->dma_both.device_issue_pending = d40_issue_pending;
-- 
1.7.1

^ permalink raw reply related

* [PATCH v4 3/4] fsldma: improved DMA_SLAVE support
From: Ira W. Snyder @ 2010-09-30 21:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Dan Williams, linuxppc-dev
In-Reply-To: <1285883207-13761-1-git-send-email-iws@ovro.caltech.edu>

Now that the generic DMAEngine API has support for scatterlist to
scatterlist copying, the device_prep_slave_sg() portion of the
DMA_SLAVE API is no longer necessary and has been removed.

However, the device_control() portion of the DMA_SLAVE API is still
useful to control device specific parameters, such as externally
controlled DMA transfers and maximum burst length.

A special dma_ctrl_cmd has been added to enable externally controlled
DMA transfers. This is currently specific to the Freescale DMA
controller, but can easily be made generic when another user is found.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
---
 arch/powerpc/include/asm/fsldma.h |  137 ----------------------
 drivers/dma/fsldma.c              |  226 +++++++-----------------------------
 include/linux/dmaengine.h         |    3 +
 3 files changed, 47 insertions(+), 319 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/fsldma.h

diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
deleted file mode 100644
index debc5ed..0000000
--- a/arch/powerpc/include/asm/fsldma.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Freescale MPC83XX / MPC85XX DMA Controller
- *
- * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
-#define __ARCH_POWERPC_ASM_FSLDMA_H__
-
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-
-/*
- * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
- *
- * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
- * transfers. An example usage would be an accelerated copy between two
- * scatterlists. Another example use would be an accelerated copy from
- * multiple non-contiguous device buffers into a single scatterlist.
- *
- * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
- * structure contains a list of hardware addresses that should be copied
- * to/from the scatterlist passed into device_prep_slave_sg(). The structure
- * also has some fields to enable hardware-specific features.
- */
-
-/**
- * struct fsl_dma_hw_addr
- * @entry: linked list entry
- * @address: the hardware address
- * @length: length to transfer
- *
- * Holds a single physical hardware address / length pair for use
- * with the DMAEngine DMA_SLAVE API.
- */
-struct fsl_dma_hw_addr {
-	struct list_head entry;
-
-	dma_addr_t address;
-	size_t length;
-};
-
-/**
- * struct fsl_dma_slave
- * @addresses: a linked list of struct fsl_dma_hw_addr structures
- * @request_count: value for DMA request count
- * @src_loop_size: setup and enable constant source-address DMA transfers
- * @dst_loop_size: setup and enable constant destination address DMA transfers
- * @external_start: enable externally started DMA transfers
- * @external_pause: enable externally paused DMA transfers
- *
- * Holds a list of address / length pairs for use with the DMAEngine
- * DMA_SLAVE API implementation for the Freescale DMA controller.
- */
-struct fsl_dma_slave {
-
-	/* List of hardware address/length pairs */
-	struct list_head addresses;
-
-	/* Support for extra controller features */
-	unsigned int request_count;
-	unsigned int src_loop_size;
-	unsigned int dst_loop_size;
-	bool external_start;
-	bool external_pause;
-};
-
-/**
- * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
- * @slave: the &struct fsl_dma_slave to add to
- * @address: the hardware address to add
- * @length: the length of bytes to transfer from @address
- *
- * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
- * success, -ERRNO otherwise.
- */
-static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
-				       dma_addr_t address, size_t length)
-{
-	struct fsl_dma_hw_addr *addr;
-
-	addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
-	if (!addr)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&addr->entry);
-	addr->address = address;
-	addr->length = length;
-
-	list_add_tail(&addr->entry, &slave->addresses);
-	return 0;
-}
-
-/**
- * fsl_dma_slave_free - free a struct fsl_dma_slave
- * @slave: the struct fsl_dma_slave to free
- *
- * Free a struct fsl_dma_slave and all associated address/length pairs
- */
-static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
-{
-	struct fsl_dma_hw_addr *addr, *tmp;
-
-	if (slave) {
-		list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
-			list_del(&addr->entry);
-			kfree(addr);
-		}
-
-		kfree(slave);
-	}
-}
-
-/**
- * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
- * @gfp: the flags to pass to kmalloc when allocating this structure
- *
- * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
- * struct fsl_dma_slave on success, or NULL on failure.
- */
-static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
-{
-	struct fsl_dma_slave *slave;
-
-	slave = kzalloc(sizeof(*slave), gfp);
-	if (!slave)
-		return NULL;
-
-	INIT_LIST_HEAD(&slave->addresses);
-	return slave;
-}
-
-#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 1ed29d1..286c3ac 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -35,7 +35,6 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
-#include <asm/fsldma.h>
 #include "fsldma.h"
 
 static const char msg_ld_oom[] = "No free memory for link descriptor\n";
@@ -719,207 +718,70 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
 	enum dma_data_direction direction, unsigned long flags)
 {
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
-	struct fsl_dma_slave *slave;
-	size_t copy;
-
-	int i;
-	struct scatterlist *sg;
-	size_t sg_used;
-	size_t hw_used;
-	struct fsl_dma_hw_addr *hw;
-	dma_addr_t dma_dst, dma_src;
-
-	if (!dchan)
-		return NULL;
-
-	if (!dchan->private)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-	slave = dchan->private;
-
-	if (list_empty(&slave->addresses))
-		return NULL;
-
-	hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
-	hw_used = 0;
-
 	/*
-	 * Build the hardware transaction to copy from the scatterlist to
-	 * the hardware, or from the hardware to the scatterlist
+	 * This operation is not supported on the Freescale DMA controller
 	 *
-	 * If you are copying from the hardware to the scatterlist and it
-	 * takes two hardware entries to fill an entire page, then both
-	 * hardware entries will be coalesced into the same page
-	 *
-	 * If you are copying from the scatterlist to the hardware and a
-	 * single page can fill two hardware entries, then the data will
-	 * be read out of the page into the first hardware entry, and so on
+	 * However, we need to provide the function pointer to allow the
+	 * device_control() method to work.
 	 */
-	for_each_sg(sgl, sg, sg_len, i) {
-		sg_used = 0;
-
-		/* Loop until the entire scatterlist entry is used */
-		while (sg_used < sg_dma_len(sg)) {
-
-			/*
-			 * If we've used up the current hardware address/length
-			 * pair, we need to load a new one
-			 *
-			 * This is done in a while loop so that descriptors with
-			 * length == 0 will be skipped
-			 */
-			while (hw_used >= hw->length) {
-
-				/*
-				 * If the current hardware entry is the last
-				 * entry in the list, we're finished
-				 */
-				if (list_is_last(&hw->entry, &slave->addresses))
-					goto finished;
-
-				/* Get the next hardware address/length pair */
-				hw = list_entry(hw->entry.next,
-						struct fsl_dma_hw_addr, entry);
-				hw_used = 0;
-			}
-
-			/* Allocate the link descriptor from DMA pool */
-			new = fsl_dma_alloc_descriptor(chan);
-			if (!new) {
-				dev_err(chan->dev, "No free memory for "
-						       "link descriptor\n");
-				goto fail;
-			}
-#ifdef FSL_DMA_LD_DEBUG
-			dev_dbg(chan->dev, "new link desc alloc %p\n", new);
-#endif
-
-			/*
-			 * Calculate the maximum number of bytes to transfer,
-			 * making sure it is less than the DMA controller limit
-			 */
-			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
-					     hw->length - hw_used);
-			copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
-
-			/*
-			 * DMA_FROM_DEVICE
-			 * from the hardware to the scatterlist
-			 *
-			 * DMA_TO_DEVICE
-			 * from the scatterlist to the hardware
-			 */
-			if (direction == DMA_FROM_DEVICE) {
-				dma_src = hw->address + hw_used;
-				dma_dst = sg_dma_address(sg) + sg_used;
-			} else {
-				dma_src = sg_dma_address(sg) + sg_used;
-				dma_dst = hw->address + hw_used;
-			}
-
-			/* Fill in the descriptor */
-			set_desc_cnt(chan, &new->hw, copy);
-			set_desc_src(chan, &new->hw, dma_src);
-			set_desc_dst(chan, &new->hw, dma_dst);
-
-			/*
-			 * If this is not the first descriptor, chain the
-			 * current descriptor after the previous descriptor
-			 */
-			if (!first) {
-				first = new;
-			} else {
-				set_desc_next(chan, &prev->hw,
-					      new->async_tx.phys);
-			}
-
-			new->async_tx.cookie = 0;
-			async_tx_ack(&new->async_tx);
-
-			prev = new;
-			sg_used += copy;
-			hw_used += copy;
-
-			/* Insert the link descriptor into the LD ring */
-			list_add_tail(&new->node, &first->tx_list);
-		}
-	}
-
-finished:
-
-	/* All of the hardware address/length pairs had length == 0 */
-	if (!first || !new)
-		return NULL;
-
-	new->async_tx.flags = flags;
-	new->async_tx.cookie = -EBUSY;
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	/* Enable extra controller features */
-	if (chan->set_src_loop_size)
-		chan->set_src_loop_size(chan, slave->src_loop_size);
-
-	if (chan->set_dst_loop_size)
-		chan->set_dst_loop_size(chan, slave->dst_loop_size);
-
-	if (chan->toggle_ext_start)
-		chan->toggle_ext_start(chan, slave->external_start);
-
-	if (chan->toggle_ext_pause)
-		chan->toggle_ext_pause(chan, slave->external_pause);
-
-	if (chan->set_request_count)
-		chan->set_request_count(chan, slave->request_count);
-
-	return &first->async_tx;
-
-fail:
-	/* If first was not set, then we failed to allocate the very first
-	 * descriptor, and we're done */
-	if (!first)
-		return NULL;
-
-	/*
-	 * First is set, so all of the descriptors we allocated have been added
-	 * to first->tx_list, INCLUDING "first" itself. Therefore we
-	 * must traverse the list backwards freeing each descriptor in turn
-	 *
-	 * We're re-using variables for the loop, oh well
-	 */
-	fsldma_free_desc_list_reverse(chan, &first->tx_list);
 	return NULL;
 }
 
 static int fsl_dma_device_control(struct dma_chan *dchan,
 				  enum dma_ctrl_cmd cmd, unsigned long arg)
 {
+	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
 	unsigned long flags;
-
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
-		return -ENXIO;
+	int size;
 
 	if (!dchan)
 		return -EINVAL;
 
 	chan = to_fsl_chan(dchan);
 
-	/* Halt the DMA engine */
-	dma_halt(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* Halt the DMA engine */
+		dma_halt(chan);
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* Remove and free all of the descriptors in the LD queue */
-	fsldma_free_desc_list(chan, &chan->ld_pending);
-	fsldma_free_desc_list(chan, &chan->ld_running);
+		/* Remove and free all of the descriptors in the LD queue */
+		fsldma_free_desc_list(chan, &chan->ld_pending);
+		fsldma_free_desc_list(chan, &chan->ld_running);
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+
+		/* make sure the channel supports setting burst size */
+		if (!chan->set_request_count)
+			return -ENXIO;
+
+		/* we set the controller burst size depending on direction */
+		if (config->direction == DMA_TO_DEVICE)
+			size = config->dst_addr_width * config->dst_maxburst;
+		else
+			size = config->src_addr_width * config->src_maxburst;
+
+		chan->set_request_count(chan, size);
+		return 0;
+
+	case FSLDMA_EXTERNAL_START:
+
+		/* make sure the channel supports external start */
+		if (!chan->toggle_ext_start)
+			return -ENXIO;
+
+		chan->toggle_ext_start(chan, arg);
+		return 0;
+
+	default:
+		return -ENXIO;
+	}
 
 	return 0;
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 7c44620..f92c139 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -120,12 +120,15 @@ enum dma_ctrl_flags {
  * configuration data in statically from the platform). An additional
  * argument of struct dma_slave_config must be passed in with this
  * command.
+ * @FSLDMA_EXTERNAL_START: this command will put the Freescale DMA controller
+ * into external start mode.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
 	DMA_SLAVE_CONFIG,
+	FSLDMA_EXTERNAL_START,
 };
 
 /**
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH] PPC4xx: ADMA separating SoC specific functions
From: Dan Williams @ 2010-09-30 22:52 UTC (permalink / raw)
  To: Wolfgang Denk; +Cc: tmarri, linux-raid, linuxppc-dev, linux-crypto, yur
In-Reply-To: <20100930190814.52268D2B48C@gemini.denx.de>

On Thu, Sep 30, 2010 at 12:08 PM, Wolfgang Denk <wd@denx.de> wrote:
[snip other valid review comments]
>
> This is a header file, yet you add here literally thousands of lines of
> code.

Yes, these functions are entirely too large to be inlined.  It looks
like you are trying to borrow too heavily from the iop-adma model.
The differences between iop13xx and iop33x from a adma perspective are
just in descriptor format and channel capabilities.  If you look at
the routines implemented in:
arch/arm/include/asm/hardware/iop3xx-adma.h
arch/arm/mach-iop13xx/include/mach/adma.h
...they are just simple helpers that abstract the descriptor details.
For example:

iop_adma_prep_dma_xor()
{
[snip]
        spin_lock_bh(&iop_chan->lock);
        slot_cnt =3D iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
        sw_desc =3D iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
        if (sw_desc) {
                grp_start =3D sw_desc->group_head;
                iop_desc_init_xor(grp_start, src_cnt, flags);
                iop_desc_set_byte_count(grp_start, iop_chan, len);
                iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
                sw_desc->unmap_src_cnt =3D src_cnt;
                sw_desc->unmap_len =3D len;
                sw_desc->async_tx.flags =3D flags;
                while (src_cnt--)
                        iop_desc_set_xor_src_addr(grp_start, src_cnt,
                                                  dma_src[src_cnt]);
        }
        spin_unlock_bh(&iop_chan->lock);

        return sw_desc ? &sw_desc->async_tx : NULL;
}

Where  iop_adma_alloc_slots() is implemented differently between
iop13xx and iop3xx.  In this case why does ppc440spe-adma.h exist?  If
it has code specific to ppe440spe it should just live in the ppe440spe
C file.  If it is truly generic it should move to the base adma.c
implementation.  If you want to reuse a ppe440spe routine just link to
it.

> Selecting the architecture at build time is bad as it prevents using a
> sinlge kernel image across a wide range of boards. =A0You only replied
> "We select the architecture at build time." without any explanation if
> there is a pressing technical reason to do it this way, or if this was
> just a arbitrary decision.

I agree I have yet to see any indication that this driver needs to
have an architecture selected at build time.

A potential compromise a first step would be to have a common C file
that is shared between two driver modules until such point that they
can be unified into a common module.

--
Dan

^ permalink raw reply

* RE: [PATCH] PPC4xx: ADMA separating SoC specific functions
From: Tirumala Marri @ 2010-10-01  0:03 UTC (permalink / raw)
  To: Wolfgang Denk; +Cc: linux-raid, dan.j.williams, linuxppc-dev, linux-crypto, yur
In-Reply-To: <20100930190814.52268D2B48C@gemini.denx.de>

>
> When reposting a patch, please always indicate that this is new
> version by using something like "[PATCH v2]" in the Subject line.
[Marri] I know, but this patch is not modification of previous patch.
It is complete brand new from scratch again. In that case isn't this
 will be first version ?


> > ---
>
> Also, please include here (i. e. below the "---" line, i. e. in the
> comments section, a description of what was changed compared to the
> previous version of this patch.
>
> As is, you enforce us to rescan the whole patch again and check
> manually if you have reacted to any of the comments sent before, and
> how.  As is, you make reviewing your poatches harder than necessary.

[Marri} I will include comments in the further versions of this patch.

>
>
> > diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
> > index 0d58a4a..a1053cb 100644
> > --- a/drivers/dma/ppc4xx/adma.c
> > +++ b/drivers/dma/ppc4xx/adma.c
> ...
> > +#include "ppc440spe-adma.h"
> > +
> > +struct dma_async_tx_descriptor
> > +*ppc440spe_adma_prep_dma_pq(struct dma_chan *chan,
> > +			       dma_addr_t * dst,
> > +			       dma_addr_t * src,
> > +			       unsigned int src_cnt,
> > +			       const unsigned char *scf,
> > +			       size_t len,
> > +			       unsigned long flags);
> > +struct dma_async_tx_descriptor
> > +*ppc440spe_adma_prep_dma_pqzero_sum(struct dma_chan *chan,
>
> Should such 440SPe specific code not be removed here and placed into
> ppc440spe-adma.c instead?
[Marri] It is 440SPe specific. Definition is moved ppc440spe-adma.c


>
> > +#if 0
> >  static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t
> *src,
> >  			    unsigned int src_cnt)
> >  {
> > @@ -213,8 +104,9 @@ static void prep_dma_pq_dbg(int id, dma_addr_t
> *dst, dma_addr_t *src,
> >  	for (i = 0; i < 2; i++)
> >  		pr_debug("\t0x%016llx ", dst[i]);
> >  }
> > +#endif
>
> Please do not add dead code - remove the whole "#if 0" block.
[Marri] Will remove it.


> *******/
>
> It seems youremove all code, but leave the (now empty) comment
> headers? This makes little sense to me.
>
[Marri] Will clean up in the next version.
> ...
> >  /**
> >   * ppc440spe_adma_free_slots - flags descriptor slots for reuse
> >   * @slot: Slot to free
> >   * Caller must hold &ppc440spe_chan->lock while calling this
> function
> >   */
>
> Again, all this is pretty low-level 440SPe specific code. Why do you
> keep this in the common drive rfile instead of moving it into the new
> 440SPe specific file?
[Marri]. With name change it can be used With any SoC ADMA driver.


>
>
> > diff --git a/drivers/dma/ppc4xx/ppc440spe-adma.c
> b/drivers/dma/ppc4xx/ppc440spe-adma.c
> > new file mode 100644
> > index 0000000..da467b4
> ...
> > +	/*  In the current implementation of ppc440spe ADMA driver it
> > +
> > +
> > +
> > +	 * makes sense to pick out only pq case, because it may be
>
> Formatting problems?
[Marri] Will fix in next version.

>
>
> > diff --git a/drivers/dma/ppc4xx/ppc440spe-adma.h
> b/drivers/dma/ppc4xx/ppc440spe-adma.h
> > new file mode 100644
> > index 0000000..81a1f46
> > --- /dev/null
> > +++ b/drivers/dma/ppc4xx/ppc440spe-adma.h
> ...
> > +/*
> > + * ppc440spe_get_group_entry - get group entry with index idx
> > + * @tdesc: is the last allocated slot in the group.
> > + */
> > +static struct ppc440spe_adma_desc_slot
> *ppc440spe_get_group_entry(struct
> > +
ppc440spe_adma_desc_slot
> > +							    *tdesc,
> > +							    u32 entry_idx)
> > +{
> > +	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
> > +	int i = 0;
> > +
> > +	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc-
> >dst_cnt)) {
> > +		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
> > +		       __func__, entry_idx, tdesc->src_cnt, tdesc-
> >dst_cnt);
> > +		BUG();
> > +	}
> > +
> > +	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
> > +		if (i++ == entry_idx)
> > +			break;
> > +	}
> > +	return iter;
> > +}
>
> This is a header file, yet you add here literally thousands of lines of
> code.
>
>
> Note that more or less similar questions have been asked for the
> previous version of this patch, but I fail to find any good
> justification in your replies.
[Marri] Reason for some functions in lined is 1) They are small enough
To be in lined 2) If keep them in ppc440spe-adma.c I will have to make
them
Non static to avoid "Used but not defined warnings". With too many
functions
Non static might cause name space pollution in the kernel ?

>
>
> Selecting the architecture at build time is bad as it prevents using a
> sinlge kernel image across a wide range of boards.  You only replied
> "We select the architecture at build time." without any explanation if
> there is a pressing technical reason to do it this way, or if this was
> just a arbitrary decision.
[Marri] Build time separation is only for entirely different SoC DMA
engine.
For example 440spe and 460sx has engierely different DMA architecture.
Whereas 440spe and 460ex can be determined at run time.I am planning to do
the run time selection for similar SoCs. I discussed This with Yuri as
well.

>
> The same goes for putting so much source code in a header file - I
> really see no technical need for this (especially not if you build for
> a single architecture only).
[Marri] I explained this above.

>
> Also I wonder why you still keep so many 440SPe specific code in the
> common file, even though you just create new 440SPe specific header
> and source files.
>
>
> Please elucidate.
>
>
[Marri] With Dan's suggestion first I am working on separating SoC
specific functions
To avoid too many  changes. Next step is to re-name the functions in the
Adma.c to common name line ppc4xx_xxx().

Regards,
Marri

^ permalink raw reply

* RE: [PATCH] PPC4xx: ADMA separating SoC specific functions
From: Tirumala Marri @ 2010-10-01  0:16 UTC (permalink / raw)
  To: Dan Williams, Wolfgang Denk; +Cc: linux-raid, linuxppc-dev, linux-crypto, yur
In-Reply-To: <AANLkTi=CJOsayM1YnwE3a22D2S35aM+BTV6_YfXjcjiO@mail.gmail.com>

> On Thu, Sep 30, 2010 at 12:08 PM, Wolfgang Denk <wd@denx.de> wrote:
> [snip other valid review comments]
> >
> > This is a header file, yet you add here literally thousands of lines
> of
> > code.
>
> Yes, these functions are entirely too large to be inlined.  It looks
> like you are trying to borrow too heavily from the iop-adma model.
> The differences between iop13xx and iop33x from a adma perspective are
> just in descriptor format and channel capabilities.  If you look at
> the routines implemented in:
> arch/arm/include/asm/hardware/iop3xx-adma.h
> arch/arm/mach-iop13xx/include/mach/adma.h
> ...they are just simple helpers that abstract the descriptor details.
> For example:
>
> iop_adma_prep_dma_xor()
> {
> [snip]
>         spin_lock_bh(&iop_chan->lock);
>         slot_cnt =3D iop_chan_xor_slot_count(len, src_cnt,
> &slots_per_op);
>         sw_desc =3D iop_adma_alloc_slots(iop_chan, slot_cnt,
> slots_per_op);
>         if (sw_desc) {
>                 grp_start =3D sw_desc->group_head;
>                 iop_desc_init_xor(grp_start, src_cnt, flags);
>                 iop_desc_set_byte_count(grp_start, iop_chan, len);
>                 iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
>                 sw_desc->unmap_src_cnt =3D src_cnt;
>                 sw_desc->unmap_len =3D len;
>                 sw_desc->async_tx.flags =3D flags;
>                 while (src_cnt--)
>                         iop_desc_set_xor_src_addr(grp_start, src_cnt,
>                                                   dma_src[src_cnt]);
>         }
>         spin_unlock_bh(&iop_chan->lock);
>
>         return sw_desc ? &sw_desc->async_tx : NULL;
> }
>
> Where  iop_adma_alloc_slots() is implemented differently between
> iop13xx and iop3xx.  In this case why does ppc440spe-adma.h exist?  If
> it has code specific to ppe440spe it should just live in the ppe440spe
> C file.  If it is truly generic it should move to the base adma.c
> implementation.  If you want to reuse a ppe440spe routine just link to
> it.
[Marri]That is how I started changing the code. And I see tons of warnings
Saying "Used but not defined" or "Defined but not used". How should I
suppress
Some functions from adma.c are used in ppc440spe-adma.c and some from
ppc440spe-adma.c
Are used in adma.c. So I created intermediate file ppc440spe-adma.h with
inlined
Functions. In future this will be converted into ppc4xx_adma.h and move
existing
SoC specific stuff to ppc440spe-adma.c file.

>
> > Selecting the architecture at build time is bad as it prevents using
> a
> > sinlge kernel image across a wide range of boards. =A0You only replied
> > "We select the architecture at build time." without any explanation
> if
> > there is a pressing technical reason to do it this way, or if this
> was
> > just a arbitrary decision.
>
> I agree I have yet to see any indication that this driver needs to
> have an architecture selected at build time.
>
> A potential compromise a first step would be to have a common C file
> that is shared between two driver modules until such point that they
> can be unified into a common module.

As I responded to Mr. Wolfgang in previous email, similar SoC DMA engines
will
Be resolved at run time. Whereas completely different architectures will
be
Resolved at build time.

Regards,
Marri

^ permalink raw reply

* Re: [PATCH] PPC4xx: ADMA separating SoC specific functions
From: Dan Williams @ 2010-10-01  0:57 UTC (permalink / raw)
  To: Tirumala Marri
  Cc: Wolfgang Denk, Greg KH, yur, linux-raid, linux-crypto,
	linuxppc-dev
In-Reply-To: <dc9b5d064d80ac2af2ccf932e94b2bb9@mail.gmail.com>

[ adding Greg ]

On Thu, Sep 30, 2010 at 5:16 PM, Tirumala Marri <tmarri@apm.com> wrote:
>> Where =A0iop_adma_alloc_slots() is implemented differently between
>> iop13xx and iop3xx. =A0In this case why does ppc440spe-adma.h exist? =A0=
If
>> it has code specific to ppe440spe it should just live in the ppe440spe
>> C file. =A0If it is truly generic it should move to the base adma.c
>> implementation. =A0If you want to reuse a ppe440spe routine just link to
>> it.
> [Marri]That is how I started changing the code. And I see tons of warning=
s
> Saying "Used but not defined" or "Defined but not used". How should I
> suppress
> Some functions from adma.c are used in ppc440spe-adma.c and some from
> ppc440spe-adma.c
> Are used in adma.c.

This is part of defining a common interface.  Maybe look at the
linkages of how the common ioat_probe() routine is used to support all
three versions of its dma hardware.

> So I created intermediate file ppc440spe-adma.h with
> inlined
> Functions. In future this will be converted into ppc4xx_adma.h and move
> existing
> SoC specific stuff to ppc440spe-adma.c file.

You definitely need to be able to resolve "used but not defined" and
"defined but not used" warnings before tackling a driver conversion
like this.  In light of this comment I wonder if it would be
appropriate to submit your original driver, that just duplicated
routines from the ppc440spe driver, to the -staging tree.  Then it
would be available for someone familiar with driver conversions to
take a shot at unifying.

Greg, is this an appropriate use of -staging?

--
Dan

^ permalink raw reply

* [PATCH 09/18] powerpc: Support device tree regardless of CPU endianness
From: Ian Munsie @ 2010-10-01  7:06 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh
  Cc: Michal Simek, devicetree-discuss, paulus, Ian Munsie, Jeremy Kerr
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

On PowerPC the device tree is always big endian, but the CPU could be
either, so add be32_to_cpu where appropriate and change the types of
device tree data to __be32 etc to allow sparse to locate endian issues.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 arch/powerpc/kernel/prom.c |   60 ++++++++++++++++++++++----------------------
 1 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fed9bf6..9b9ebb2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -188,16 +188,16 @@ static void __init check_cpu_pa_features(unsigned long node)
 #ifdef CONFIG_PPC_STD_MMU_64
 static void __init check_cpu_slb_size(unsigned long node)
 {
-	u32 *slb_size_ptr;
+	__be32 *slb_size_ptr;
 
 	slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
 	if (slb_size_ptr != NULL) {
-		mmu_slb_size = *slb_size_ptr;
+		mmu_slb_size = be32_to_cpup(slb_size_ptr);
 		return;
 	}
 	slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
 	if (slb_size_ptr != NULL) {
-		mmu_slb_size = *slb_size_ptr;
+		mmu_slb_size = be32_to_cpup(slb_size_ptr);
 	}
 }
 #else
@@ -252,11 +252,11 @@ static void __init check_cpu_feature_properties(unsigned long node)
 {
 	unsigned long i;
 	struct feature_property *fp = feature_properties;
-	const u32 *prop;
+	const __be32 *prop;
 
 	for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
 		prop = of_get_flat_dt_prop(node, fp->name, NULL);
-		if (prop && *prop >= fp->min_value) {
+		if (prop && be32_to_cpup(prop) >= fp->min_value) {
 			cur_cpu_spec->cpu_features |= fp->cpu_feature;
 			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr;
 		}
@@ -269,8 +269,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 {
 	static int logical_cpuid = 0;
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	const u32 *prop;
-	const u32 *intserv;
+	const __be32 *prop;
+	const __be32 *intserv;
 	int i, nthreads;
 	unsigned long len;
 	int found = 0;
@@ -297,9 +297,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		 * version 2 of the kexec param format adds the phys cpuid of
 		 * booted proc.
 		 */
-		if (initial_boot_params && initial_boot_params->version >= 2) {
-			if (intserv[i] ==
-					initial_boot_params->boot_cpuid_phys) {
+		if (initial_boot_params && be32_to_cpu(initial_boot_params->version) >= 2) {
+			if (be32_to_cpu(intserv[i]) ==
+					be32_to_cpu(initial_boot_params->boot_cpuid_phys)) {
 				found = 1;
 				break;
 			}
@@ -324,9 +324,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 
 	if (found) {
 		DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
-			intserv[i]);
+			be32_to_cpu(intserv[i]));
 		boot_cpuid = logical_cpuid;
-		set_hard_smp_processor_id(boot_cpuid, intserv[i]);
+		set_hard_smp_processor_id(boot_cpuid, be32_to_cpu(intserv[i]));
 
 		/*
 		 * PAPR defines "logical" PVR values for cpus that
@@ -343,8 +343,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		 * it uses 0x0f000001.
 		 */
 		prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
-		if (prop && (*prop & 0xff000000) == 0x0f000000)
-			identify_cpu(0, *prop);
+		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+			identify_cpu(0, be32_to_cpup(prop));
 
 		identical_pvr_fixup(node);
 	}
@@ -365,7 +365,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 
 void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
-	unsigned long *lprop;
+	unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
 
 #ifdef CONFIG_PPC64
 	/* check if iommu is forced on or off */
@@ -524,16 +524,16 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start,
 static void __init early_reserve_mem(void)
 {
 	u64 base, size;
-	u64 *reserve_map;
+	__be64 *reserve_map;
 	unsigned long self_base;
 	unsigned long self_size;
 
-	reserve_map = (u64 *)(((unsigned long)initial_boot_params) +
-					initial_boot_params->off_mem_rsvmap);
+	reserve_map = (__be64 *)(((unsigned long)initial_boot_params) +
+			be32_to_cpu(initial_boot_params->off_mem_rsvmap));
 
 	/* before we do anything, lets reserve the dt blob */
 	self_base = __pa((unsigned long)initial_boot_params);
-	self_size = initial_boot_params->totalsize;
+	self_size = be32_to_cpu(initial_boot_params->totalsize);
 	memblock_reserve(self_base, self_size);
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -547,13 +547,13 @@ static void __init early_reserve_mem(void)
 	 * Handle the case where we might be booting from an old kexec
 	 * image that setup the mem_rsvmap as pairs of 32-bit values
 	 */
-	if (*reserve_map > 0xffffffffull) {
+	if (be64_to_cpup(reserve_map) > 0xffffffffull) {
 		u32 base_32, size_32;
-		u32 *reserve_map_32 = (u32 *)reserve_map;
+		__be32 *reserve_map_32 = (__be32 *)reserve_map;
 
 		while (1) {
-			base_32 = *(reserve_map_32++);
-			size_32 = *(reserve_map_32++);
+			base_32 = be32_to_cpup(reserve_map_32++);
+			size_32 = be32_to_cpup(reserve_map_32++);
 			if (size_32 == 0)
 				break;
 			/* skip if the reservation is for the blob */
@@ -566,8 +566,8 @@ static void __init early_reserve_mem(void)
 	}
 #endif
 	while (1) {
-		base = *(reserve_map++);
-		size = *(reserve_map++);
+		base = be64_to_cpup(reserve_map++);
+		size = be64_to_cpup(reserve_map++);
 		if (size == 0)
 			break;
 		DBG("reserving: %llx -> %llx\n", base, size);
@@ -860,7 +860,7 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
 	hardid = get_hard_smp_processor_id(cpu);
 
 	for_each_node_by_type(np, "cpu") {
-		const u32 *intserv;
+		const __be32 *intserv;
 		unsigned int plen, t;
 
 		/* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
@@ -869,10 +869,10 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
 		intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
 				&plen);
 		if (intserv == NULL) {
-			const u32 *reg = of_get_property(np, "reg", NULL);
+			const __be32 *reg = of_get_property(np, "reg", NULL);
 			if (reg == NULL)
 				continue;
-			if (*reg == hardid) {
+			if (be32_to_cpup(reg) == hardid) {
 				if (thread)
 					*thread = 0;
 				return np;
@@ -880,7 +880,7 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
 		} else {
 			plen /= sizeof(u32);
 			for (t = 0; t < plen; t++) {
-				if (hardid == intserv[t]) {
+				if (hardid == be32_to_cpu(intserv[t])) {
 					if (thread)
 						*thread = t;
 					return np;
@@ -900,7 +900,7 @@ static int __init export_flat_device_tree(void)
 	struct dentry *d;
 
 	flat_dt_blob.data = initial_boot_params;
-	flat_dt_blob.size = initial_boot_params->totalsize;
+	flat_dt_blob.size = be32_to_cpu(initial_boot_params->totalsize);
 
 	d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR,
 				powerpc_debugfs_root, &flat_dt_blob);
-- 
1.7.1

^ permalink raw reply related

* [PATCH 13/18] powerpc 44x: Make DCR endianness agnostic
From: Ian Munsie @ 2010-10-01  7:06 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh
  Cc: Peter Zijlstra, devicetree-discuss, Ian Munsie, paulus,
	Thomas Gleixner, Stefan Roese, Anton Blanchard
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

The Device Control Register accesses parse the device tree and therefore
need to handle the possible differences of endianness between the CPU
and device tree.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 arch/powerpc/sysdev/dcr.c        |   18 +++++++++---------
 arch/powerpc/sysdev/ppc4xx_soc.c |   16 ++++++++--------
 arch/powerpc/sysdev/uic.c        |    6 +++---
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index bb44aa9..7f91e8a 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -27,7 +27,7 @@
 static struct device_node *find_dcr_parent(struct device_node *node)
 {
 	struct device_node *par, *tmp;
-	const u32 *p;
+	const __be32 *p;
 
 	for (par = of_node_get(node); par;) {
 		if (of_get_property(par, "dcr-controller", NULL))
@@ -37,7 +37,7 @@ static struct device_node *find_dcr_parent(struct device_node *node)
 		if (p == NULL)
 			par = of_get_parent(par);
 		else
-			par = of_find_node_by_phandle(*p);
+			par = of_find_node_by_phandle(be32_to_cpup(p));
 		of_node_put(tmp);
 	}
 	return par;
@@ -128,24 +128,24 @@ unsigned int dcr_resource_start(const struct device_node *np,
 				unsigned int index)
 {
 	unsigned int ds;
-	const u32 *dr = of_get_property(np, "dcr-reg", &ds);
+	const __be32 *dr = of_get_property(np, "dcr-reg", &ds);
 
 	if (dr == NULL || ds & 1 || index >= (ds / 8))
 		return 0;
 
-	return dr[index * 2];
+	return be32_to_cpu(dr[index * 2]);
 }
 EXPORT_SYMBOL_GPL(dcr_resource_start);
 
 unsigned int dcr_resource_len(const struct device_node *np, unsigned int index)
 {
 	unsigned int ds;
-	const u32 *dr = of_get_property(np, "dcr-reg", &ds);
+	const __be32 *dr = of_get_property(np, "dcr-reg", &ds);
 
 	if (dr == NULL || ds & 1 || index >= (ds / 8))
 		return 0;
 
-	return dr[index * 2 + 1];
+	return be32_to_cpu(dr[index * 2 + 1]);
 }
 EXPORT_SYMBOL_GPL(dcr_resource_len);
 
@@ -156,7 +156,7 @@ u64 of_translate_dcr_address(struct device_node *dev,
 			     unsigned int *out_stride)
 {
 	struct device_node *dp;
-	const u32 *p;
+	const __be32 *p;
 	unsigned int stride;
 	u64 ret = OF_BAD_ADDR;
 
@@ -166,7 +166,7 @@ u64 of_translate_dcr_address(struct device_node *dev,
 
 	/* Stride is not properly defined yet, default to 0x10 for Axon */
 	p = of_get_property(dp, "dcr-mmio-stride", NULL);
-	stride = (p == NULL) ? 0x10 : *p;
+	stride = (p == NULL) ? 0x10 : be32_to_cpup(p);
 
 	/* XXX FIXME: Which property name is to use of the 2 following ? */
 	p = of_get_property(dp, "dcr-mmio-range", NULL);
@@ -176,7 +176,7 @@ u64 of_translate_dcr_address(struct device_node *dev,
 		goto done;
 
 	/* Maybe could do some better range checking here */
-	ret = of_translate_address(dp, p);
+	ret = of_translate_address(dp, be32_to_cpup(p));
 	if (ret != OF_BAD_ADDR)
 		ret += (u64)(stride) * (u64)dcr_n;
 	if (out_stride)
diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/sysdev/ppc4xx_soc.c
index d3d6ce3..e5a7554 100644
--- a/arch/powerpc/sysdev/ppc4xx_soc.c
+++ b/arch/powerpc/sysdev/ppc4xx_soc.c
@@ -76,10 +76,10 @@ static int __init ppc4xx_l2c_probe(void)
 	u32 r;
 	unsigned long flags;
 	int irq;
-	const u32 *dcrreg;
+	const __be32 *dcrreg;
 	u32 dcrbase_isram;
 	int len;
-	const u32 *prop;
+	const __be32 *prop;
 	u32 l2_size;
 
 	np = of_find_compatible_node(NULL, NULL, "ibm,l2-cache");
@@ -93,7 +93,7 @@ static int __init ppc4xx_l2c_probe(void)
 		of_node_put(np);
 		return -ENODEV;
 	}
-	l2_size = prop[0];
+	l2_size = be32_to_cpu(prop[0]);
 
 	/* Map DCRs */
 	dcrreg = of_get_property(np, "dcr-reg", &len);
@@ -103,8 +103,8 @@ static int __init ppc4xx_l2c_probe(void)
 		of_node_put(np);
 		return -ENODEV;
 	}
-	dcrbase_isram = dcrreg[0];
-	dcrbase_l2c = dcrreg[2];
+	dcrbase_isram = be32_to_cpu(dcrreg[0]);
+	dcrbase_l2c = be32_to_cpu(dcrreg[2]);
 
 	/* Get and map irq number from device tree */
 	irq = irq_of_parse_and_map(np, 0);
@@ -198,7 +198,7 @@ void ppc4xx_reset_system(char *cmd)
 {
 	struct device_node *np;
 	u32 reset_type = DBCR0_RST_SYSTEM;
-	const u32 *prop;
+	const __be32 *prop;
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np) {
@@ -210,8 +210,8 @@ void ppc4xx_reset_system(char *cmd)
 		 * 2 - PPC4xx chip reset
 		 * 3 - PPC4xx system reset (default)
 		 */
-		if ((prop) && ((prop[0] >= 1) && (prop[0] <= 3)))
-			reset_type = prop[0] << 28;
+		if ((prop) && ((be32_to_cpu(prop[0]) >= 1) && (be32_to_cpu(prop[0]) <= 3)))
+			reset_type = be32_to_cpu(prop[0]) << 28;
 	}
 
 	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | reset_type);
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 0038fb7..e2f7de8 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -253,7 +253,7 @@ uic_irq_ret:
 static struct uic * __init uic_init_one(struct device_node *node)
 {
 	struct uic *uic;
-	const u32 *indexp, *dcrreg;
+	const __be32 *indexp, *dcrreg;
 	int len;
 
 	BUG_ON(! of_device_is_compatible(node, "ibm,uic"));
@@ -269,7 +269,7 @@ static struct uic * __init uic_init_one(struct device_node *node)
 		       "cell-index property\n", node->full_name);
 		return NULL;
 	}
-	uic->index = *indexp;
+	uic->index = be32_to_cpup(indexp);
 
 	dcrreg = of_get_property(node, "dcr-reg", &len);
 	if (!dcrreg || (len != 2*sizeof(u32))) {
@@ -277,7 +277,7 @@ static struct uic * __init uic_init_one(struct device_node *node)
 		       "dcr-reg property\n", node->full_name);
 		return NULL;
 	}
-	uic->dcrbase = *dcrreg;
+	uic->dcrbase = be32_to_cpup(dcrreg);
 
 	uic->irqhost = irq_alloc_host(node, IRQ_HOST_MAP_LINEAR,
 				      NR_UIC_INTS, &uic_host_ops, -1);
-- 
1.7.1

^ permalink raw reply related

* [PATCH 14/18] powerpc, of_serial: Endianness issues setting up the serial ports
From: Ian Munsie @ 2010-10-01  7:06 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh
  Cc: Michal Simek, Jiri Kosina, devicetree-discuss, Greg Kroah-Hartman,
	Stefan Weil, paulus, Ian Munsie, Sean MacLennan, David S. Miller
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

The speed and clock of the serial ports is retrieved from the device
tree in both the PowerPC legacy serial code and the Open Firmware serial
driver, therefore they need to handle the fact that the device tree is
always big endian, while the CPU may not be.

Also fix other device tree references in the legacy serial code.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 arch/powerpc/kernel/legacy_serial.c |   22 +++++++++++-----------
 drivers/serial/of_serial.c          |   12 ++++++------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index c1fd0f9..c834757 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -52,14 +52,14 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 				  phys_addr_t taddr, unsigned long irq,
 				  upf_t flags, int irq_check_parent)
 {
-	const u32 *clk, *spd;
+	const __be32 *clk, *spd;
 	u32 clock = BASE_BAUD * 16;
 	int index;
 
 	/* get clock freq. if present */
 	clk = of_get_property(np, "clock-frequency", NULL);
 	if (clk && *clk)
-		clock = *clk;
+		clock = be32_to_cpup(clk);
 
 	/* get default speed if present */
 	spd = of_get_property(np, "current-speed", NULL);
@@ -109,7 +109,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 	legacy_serial_infos[index].taddr = taddr;
 	legacy_serial_infos[index].np = of_node_get(np);
 	legacy_serial_infos[index].clock = clock;
-	legacy_serial_infos[index].speed = spd ? *spd : 0;
+	legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0;
 	legacy_serial_infos[index].irq_check_parent = irq_check_parent;
 
 	printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
@@ -168,7 +168,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
 static int __init add_legacy_isa_port(struct device_node *np,
 				      struct device_node *isa_brg)
 {
-	const u32 *reg;
+	const __be32 *reg;
 	const char *typep;
 	int index = -1;
 	u64 taddr;
@@ -181,7 +181,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 		return -1;
 
 	/* Verify it's an IO port, we don't support anything else */
-	if (!(reg[0] & 0x00000001))
+	if (!(be32_to_cpu(reg[0]) & 0x00000001))
 		return -1;
 
 	/* Now look for an "ibm,aix-loc" property that gives us ordering
@@ -202,7 +202,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 		taddr = 0;
 
 	/* Add port, irq will be dealt with later */
-	return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
+	return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr,
 			       NO_IRQ, UPF_BOOT_AUTOCONF, 0);
 
 }
@@ -251,9 +251,9 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	 * we get to their "reg" property
 	 */
 	if (np != pci_dev) {
-		const u32 *reg = of_get_property(np, "reg", NULL);
-		if (reg && (*reg < 4))
-			index = lindex = *reg;
+		const __be32 *reg = of_get_property(np, "reg", NULL);
+		if (reg && (be32_to_cpup(reg) < 4))
+			index = lindex = be32_to_cpup(reg);
 	}
 
 	/* Local index means it's the Nth port in the PCI chip. Unfortunately
@@ -507,7 +507,7 @@ static int __init check_legacy_serial_console(void)
 	struct device_node *prom_stdout = NULL;
 	int i, speed = 0, offset = 0;
 	const char *name;
-	const u32 *spd;
+	const __be32 *spd;
 
 	DBG(" -> check_legacy_serial_console()\n");
 
@@ -547,7 +547,7 @@ static int __init check_legacy_serial_console(void)
 	}
 	spd = of_get_property(prom_stdout, "current-speed", NULL);
 	if (spd)
-		speed = *spd;
+		speed = be32_to_cpup(spd);
 
 	if (strcmp(name, "serial") != 0)
 		goto not_found;
diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c
index 2af8fd1..17849dc 100644
--- a/drivers/serial/of_serial.c
+++ b/drivers/serial/of_serial.c
@@ -31,8 +31,8 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 {
 	struct resource resource;
 	struct device_node *np = ofdev->dev.of_node;
-	const unsigned int *clk, *spd;
-	const u32 *prop;
+	const __be32 *clk, *spd;
+	const __be32 *prop;
 	int ret, prop_size;
 
 	memset(port, 0, sizeof *port);
@@ -55,23 +55,23 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 	/* Check for shifted address mapping */
 	prop = of_get_property(np, "reg-offset", &prop_size);
 	if (prop && (prop_size == sizeof(u32)))
-		port->mapbase += *prop;
+		port->mapbase += be32_to_cpup(prop);
 
 	/* Check for registers offset within the devices address range */
 	prop = of_get_property(np, "reg-shift", &prop_size);
 	if (prop && (prop_size == sizeof(u32)))
-		port->regshift = *prop;
+		port->regshift = be32_to_cpup(prop);
 
 	port->irq = irq_of_parse_and_map(np, 0);
 	port->iotype = UPIO_MEM;
 	port->type = type;
-	port->uartclk = *clk;
+	port->uartclk = be32_to_cpup(clk);
 	port->flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_IOREMAP
 		| UPF_FIXED_PORT | UPF_FIXED_TYPE;
 	port->dev = &ofdev->dev;
 	/* If current-speed was set, then try not to change it. */
 	if (spd)
-		port->custom_divisor = *clk / (16 * (*spd));
+		port->custom_divisor = be32_to_cpup(clk) / (16 * (be32_to_cpup(spd)));
 
 	return 0;
 }
-- 
1.7.1

^ permalink raw reply related

* [PATCH 17/18] net: Fix endianess issues in IBM newemac driver
From: Ian Munsie @ 2010-10-01  7:06 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh
  Cc: Jiri Pirko, netdev, devicetree-discuss, paulus, Ian Munsie,
	Sean MacLennan, Tejun Heo, David S. Miller
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

This patch fixes all the device tree and ring buffer accesses in the IBM
newemac driver.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 drivers/net/ibm_newemac/core.c |   68 ++++++++++++++++++++--------------------
 drivers/net/ibm_newemac/mal.c  |    6 ++--
 drivers/net/ibm_newemac/mal.h  |    6 ++--
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 3506fd6..67238b8 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -981,12 +981,12 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
 	 * to simplify error recovery in the case of allocation failure later.
 	 */
 	for (i = 0; i < NUM_RX_BUFF; ++i) {
-		if (dev->rx_desc[i].ctrl & MAL_RX_CTRL_FIRST)
+		if (dev->rx_desc[i].ctrl & cpu_to_be16(MAL_RX_CTRL_FIRST))
 			++dev->estats.rx_dropped_resize;
 
 		dev->rx_desc[i].data_len = 0;
-		dev->rx_desc[i].ctrl = MAL_RX_CTRL_EMPTY |
-		    (i == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0);
+		dev->rx_desc[i].ctrl = cpu_to_be16(MAL_RX_CTRL_EMPTY |
+		    (i == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0));
 	}
 
 	/* Reallocate RX ring only if bigger skb buffers are required */
@@ -1005,9 +1005,9 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
 		dev_kfree_skb(dev->rx_skb[i]);
 
 		skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2);
-		dev->rx_desc[i].data_ptr =
+		dev->rx_desc[i].data_ptr = cpu_to_be32(
 		    dma_map_single(&dev->ofdev->dev, skb->data - 2, rx_sync_size,
-				   DMA_FROM_DEVICE) + 2;
+				   DMA_FROM_DEVICE) + 2);
 		dev->rx_skb[i] = skb;
 	}
  skip:
@@ -1067,7 +1067,7 @@ static void emac_clean_tx_ring(struct emac_instance *dev)
 		if (dev->tx_skb[i]) {
 			dev_kfree_skb(dev->tx_skb[i]);
 			dev->tx_skb[i] = NULL;
-			if (dev->tx_desc[i].ctrl & MAL_TX_CTRL_READY)
+			if (dev->tx_desc[i].ctrl & cpu_to_be16(MAL_TX_CTRL_READY))
 				++dev->estats.tx_dropped;
 		}
 		dev->tx_desc[i].ctrl = 0;
@@ -1104,12 +1104,12 @@ static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot,
 	dev->rx_desc[slot].data_len = 0;
 
 	skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2);
-	dev->rx_desc[slot].data_ptr =
+	dev->rx_desc[slot].data_ptr = cpu_to_be32(
 	    dma_map_single(&dev->ofdev->dev, skb->data - 2, dev->rx_sync_size,
-			   DMA_FROM_DEVICE) + 2;
+			   DMA_FROM_DEVICE) + 2);
 	wmb();
-	dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY |
-	    (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0);
+	dev->rx_desc[slot].ctrl = cpu_to_be16(MAL_RX_CTRL_EMPTY |
+	    (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0));
 
 	return 0;
 }
@@ -1373,12 +1373,12 @@ static int emac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	DBG2(dev, "xmit(%u) %d" NL, len, slot);
 
 	dev->tx_skb[slot] = skb;
-	dev->tx_desc[slot].data_ptr = dma_map_single(&dev->ofdev->dev,
+	dev->tx_desc[slot].data_ptr = cpu_to_be32(dma_map_single(&dev->ofdev->dev,
 						     skb->data, len,
-						     DMA_TO_DEVICE);
-	dev->tx_desc[slot].data_len = (u16) len;
+						     DMA_TO_DEVICE));
+	dev->tx_desc[slot].data_len = cpu_to_be16(len);
 	wmb();
-	dev->tx_desc[slot].ctrl = ctrl;
+	dev->tx_desc[slot].ctrl = cpu_to_be16(ctrl);
 
 	return emac_xmit_finish(dev, len);
 }
@@ -1399,9 +1399,9 @@ static inline int emac_xmit_split(struct emac_instance *dev, int slot,
 			ctrl |= MAL_TX_CTRL_WRAP;
 
 		dev->tx_skb[slot] = NULL;
-		dev->tx_desc[slot].data_ptr = pd;
-		dev->tx_desc[slot].data_len = (u16) chunk;
-		dev->tx_desc[slot].ctrl = ctrl;
+		dev->tx_desc[slot].data_ptr = cpu_to_be32(pd);
+		dev->tx_desc[slot].data_len = cpu_to_be16(chunk);
+		dev->tx_desc[slot].ctrl = cpu_to_be16(ctrl);
 		++dev->tx_cnt;
 
 		if (!len)
@@ -1442,9 +1442,9 @@ static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev)
 	/* skb data */
 	dev->tx_skb[slot] = NULL;
 	chunk = min(len, MAL_MAX_TX_SIZE);
-	dev->tx_desc[slot].data_ptr = pd =
-	    dma_map_single(&dev->ofdev->dev, skb->data, len, DMA_TO_DEVICE);
-	dev->tx_desc[slot].data_len = (u16) chunk;
+	dev->tx_desc[slot].data_ptr = cpu_to_be32(pd =
+	    dma_map_single(&dev->ofdev->dev, skb->data, len, DMA_TO_DEVICE));
+	dev->tx_desc[slot].data_len = cpu_to_be16(chunk);
 	len -= chunk;
 	if (unlikely(len))
 		slot = emac_xmit_split(dev, slot, pd + chunk, len, !nr_frags,
@@ -1473,7 +1473,7 @@ static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev)
 	if (dev->tx_slot == NUM_TX_BUFF - 1)
 		ctrl |= MAL_TX_CTRL_WRAP;
 	wmb();
-	dev->tx_desc[dev->tx_slot].ctrl = ctrl;
+	dev->tx_desc[dev->tx_slot].ctrl = cpu_to_be16(ctrl);
 	dev->tx_slot = (slot + 1) % NUM_TX_BUFF;
 
 	return emac_xmit_finish(dev, skb->len);
@@ -1541,7 +1541,7 @@ static void emac_poll_tx(void *param)
 		u16 ctrl;
 		int slot = dev->ack_slot, n = 0;
 	again:
-		ctrl = dev->tx_desc[slot].ctrl;
+		ctrl = be16_to_cpu(dev->tx_desc[slot].ctrl);
 		if (!(ctrl & MAL_TX_CTRL_READY)) {
 			struct sk_buff *skb = dev->tx_skb[slot];
 			++n;
@@ -1583,8 +1583,8 @@ static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot,
 
 	dev->rx_desc[slot].data_len = 0;
 	wmb();
-	dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY |
-	    (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0);
+	dev->rx_desc[slot].ctrl = cpu_to_be16(MAL_RX_CTRL_EMPTY |
+	    (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0));
 }
 
 static void emac_parse_rx_error(struct emac_instance *dev, u16 ctrl)
@@ -1628,7 +1628,7 @@ static inline void emac_rx_csum(struct emac_instance *dev,
 static inline int emac_rx_sg_append(struct emac_instance *dev, int slot)
 {
 	if (likely(dev->rx_sg_skb != NULL)) {
-		int len = dev->rx_desc[slot].data_len;
+		int len = be16_to_cpu(dev->rx_desc[slot].data_len);
 		int tot_len = dev->rx_sg_skb->len + len;
 
 		if (unlikely(tot_len + 2 > dev->rx_skb_size)) {
@@ -1659,14 +1659,14 @@ static int emac_poll_rx(void *param, int budget)
 	while (budget > 0) {
 		int len;
 		struct sk_buff *skb;
-		u16 ctrl = dev->rx_desc[slot].ctrl;
+		u16 ctrl = be16_to_cpu(dev->rx_desc[slot].ctrl);
 
 		if (ctrl & MAL_RX_CTRL_EMPTY)
 			break;
 
 		skb = dev->rx_skb[slot];
 		mb();
-		len = dev->rx_desc[slot].data_len;
+		len = be16_to_cpu(dev->rx_desc[slot].data_len);
 
 		if (unlikely(!MAL_IS_SINGLE_RX(ctrl)))
 			goto sg;
@@ -1757,7 +1757,7 @@ static int emac_poll_rx(void *param, int budget)
 
 	if (unlikely(budget && test_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags))) {
 		mb();
-		if (!(dev->rx_desc[slot].ctrl & MAL_RX_CTRL_EMPTY)) {
+		if (!(dev->rx_desc[slot].ctrl & cpu_to_be16(MAL_RX_CTRL_EMPTY))) {
 			DBG2(dev, "rx restart" NL);
 			received = 0;
 			goto again;
@@ -1783,7 +1783,7 @@ static int emac_peek_rx(void *param)
 {
 	struct emac_instance *dev = param;
 
-	return !(dev->rx_desc[dev->rx_slot].ctrl & MAL_RX_CTRL_EMPTY);
+	return !(dev->rx_desc[dev->rx_slot].ctrl & cpu_to_be16(MAL_RX_CTRL_EMPTY));
 }
 
 /* NAPI poll context */
@@ -1793,7 +1793,7 @@ static int emac_peek_rx_sg(void *param)
 
 	int slot = dev->rx_slot;
 	while (1) {
-		u16 ctrl = dev->rx_desc[slot].ctrl;
+		u16 ctrl = be16_to_cpu(dev->rx_desc[slot].ctrl);
 		if (ctrl & MAL_RX_CTRL_EMPTY)
 			return 0;
 		else if (ctrl & MAL_RX_CTRL_LAST)
@@ -2367,14 +2367,14 @@ static int __devinit emac_read_uint_prop(struct device_node *np, const char *nam
 					 u32 *val, int fatal)
 {
 	int len;
-	const u32 *prop = of_get_property(np, name, &len);
+	const __be32 *prop = of_get_property(np, name, &len);
 	if (prop == NULL || len < sizeof(u32)) {
 		if (fatal)
 			printk(KERN_ERR "%s: missing %s property\n",
 			       np->full_name, name);
 		return -ENODEV;
 	}
-	*val = *prop;
+	*val = be32_to_cpup(prop);
 	return 0;
 }
 
@@ -3013,7 +3013,7 @@ static void __init emac_make_bootlist(void)
 
 	/* Collect EMACs */
 	while((np = of_find_all_nodes(np)) != NULL) {
-		const u32 *idx;
+		const __be32 *idx;
 
 		if (of_match_node(emac_match, np) == NULL)
 			continue;
@@ -3022,7 +3022,7 @@ static void __init emac_make_bootlist(void)
 		idx = of_get_property(np, "cell-index", NULL);
 		if (idx == NULL)
 			continue;
-		cell_indices[i] = *idx;
+		cell_indices[i] = be32_to_cpup(idx);
 		emac_boot_list[i++] = of_node_get(np);
 		if (i >= EMAC_BOOT_LIST_SIZE) {
 			of_node_put(np);
diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c
index d5717e2..9e4939e 100644
--- a/drivers/net/ibm_newemac/mal.c
+++ b/drivers/net/ibm_newemac/mal.c
@@ -524,7 +524,7 @@ static int __devinit mal_probe(struct platform_device *ofdev,
 	int err = 0, i, bd_size;
 	int index = mal_count++;
 	unsigned int dcr_base;
-	const u32 *prop;
+	const __be32 *prop;
 	u32 cfg;
 	unsigned long irqflags;
 	irq_handler_t hdlr_serr, hdlr_txde, hdlr_rxde;
@@ -550,7 +550,7 @@ static int __devinit mal_probe(struct platform_device *ofdev,
 		err = -ENODEV;
 		goto fail;
 	}
-	mal->num_tx_chans = prop[0];
+	mal->num_tx_chans = be32_to_cpu(prop[0]);
 
 	prop = of_get_property(ofdev->dev.of_node, "num-rx-chans", NULL);
 	if (prop == NULL) {
@@ -560,7 +560,7 @@ static int __devinit mal_probe(struct platform_device *ofdev,
 		err = -ENODEV;
 		goto fail;
 	}
-	mal->num_rx_chans = prop[0];
+	mal->num_rx_chans = be32_to_cpu(prop[0]);
 
 	dcr_base = dcr_resource_start(ofdev->dev.of_node, 0);
 	if (dcr_base == 0) {
diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h
index 6608421..b8ee413 100644
--- a/drivers/net/ibm_newemac/mal.h
+++ b/drivers/net/ibm_newemac/mal.h
@@ -147,9 +147,9 @@ static inline int mal_tx_chunks(int len)
 
 /* MAL Buffer Descriptor structure */
 struct mal_descriptor {
-	u16 ctrl;		/* MAL / Commac status control bits */
-	u16 data_len;		/* Max length is 4K-1 (12 bits)     */
-	u32 data_ptr;		/* pointer to actual data buffer    */
+	__be16 ctrl;		/* MAL / Commac status control bits */
+	__be16 data_len;	/* Max length is 4K-1 (12 bits)     */
+	__be32 data_ptr;	/* pointer to actual data buffer    */
 };
 
 /* the following defines are for the MadMAL status and control registers. */
-- 
1.7.1

^ permalink raw reply related

* Introduce support for little endian PowerPC
From: Ian Munsie @ 2010-10-01  7:05 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh; +Cc: paulus

Some PowerPC processors can be run in either big or little endian modes, some
others can map selected pages of memory as little endian, which allows the same
thing. Until now we have only supported the default big endian mode in Linux.
This patch set introduces little endian support for the 44x family of PowerPC
processors.

This patch set in combination with a patched GCC, binutils, uClibc and
buildroot has allowed for a full proof of concept little endian environment on
a 440 Taishan board, which was able to successfully run busybox, OpenSSH and a
handful of other userspace programs without problems.

I am not currently in a position to release my patches for the toolchain, but
in the meantime I thought I would send my patches on the kernel side for review
and feedback.

While my toolchain patches are necessary to support the powerpcle-linux target,
the kernel can still be built with an unpatched toolchain using the
powerpcle-elf target instead. Userspace programs, however, do require the
patched toolchain to build.

Since the processor still starts as big endian, the boot wrapper must still be
compiled with a big endian 32bit toolchain. This can be passed into the build
as CROSSBE_COMPILE in the same manner as the CROSS_COMPILE variable. When the
boot wrapper is finished it runs some platform specific assembly to switch to
little endian and execute the kernel.

This is not yet complete support for little endian PowerPC, some outstanding
issues that I am aware of are:
 * We only support 32bit PowerPC for now (and indeed, only 44x)
 * The vdso has not been fixed to be endian agnostic - any userspace program
   accessing it will get an unexpected result.
 * I have not touched PCI at all
 * Remaining device tree accesses still need to be examined to ensure they are
   correctly handling the endianess of the device tree.
 * Any other driver that uses the device tree is likely be broken for the same reason.
 * I've included a patch for the alignment handler, however it is as yet
   completely untested due to a property of the hardware I've been using for
   testing.

^ permalink raw reply

* [PATCH 01/18] powerpc: Add ability to build little endian kernels
From: Ian Munsie @ 2010-10-01  7:05 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh
  Cc: Michal Marek, Albert Herranz, paulus, Ian Munsie, Andreas Schwab,
	Andrew Morton, Sam Ravnborg, Torez Smith
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

This patch allows the kbuild system to successfully compile a kernel for
the little endian PowerPC architecture.

To build such a kernel a supported platform must be used and
CONFIG_CPU_LITTLE_ENDIAN must be set. CROSS_COMPILE must be set to a
suitable toolchain prefix (compiled for the powerpcle-elf target, or
patched to allow the powerpcle-linux target).

Since the system will always start in big endian mode, the zImage
wrapper must still be compiled with a big endian toolchain, which can be
specified via CROSS32_COMPILE.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 arch/powerpc/Makefile                   |   22 +++++++++++++++++++---
 arch/powerpc/boot/Makefile              |    3 ++-
 arch/powerpc/kernel/vdso32/vdso32.lds.S |    4 ++++
 arch/powerpc/platforms/Kconfig.cputype  |   16 ++++++++++++++++
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index b7212b6..3eed685 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -56,11 +56,27 @@ endif
 
 UTS_MACHINE := $(OLDARCH)
 
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+override CC	+= -mlittle-endian
+override AS	+= -mlittle-endian
+override LD	+= -EL
+LDEMULATION	:= lppc
+GNUTARGET	:= powerpcle
+MULTIPLEWORD	:= -mno-multiple
+else
+override CC	+= -mbig-endian
+override AS	+= -mbig-endian
+override LD	+= -EB
+LDEMULATION	:= ppc
+GNUTARGET	:= powerpc
+MULTIPLEWORD	:= -mmultiple
+endif
+
 ifeq ($(HAS_BIARCH),y)
 override AS	+= -a$(CONFIG_WORD_SIZE)
-override LD	+= -m elf$(CONFIG_WORD_SIZE)ppc
+override LD	+= -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
 override CC	+= -m$(CONFIG_WORD_SIZE)
-override AR	:= GNUTARGET=elf$(CONFIG_WORD_SIZE)-powerpc $(AR)
+override AR	:= GNUTARGET=elf$(CONFIG_WORD_SIZE)-$(GNUTARGET) $(AR)
 endif
 
 LDFLAGS_vmlinux-yy := -Bstatic
@@ -68,7 +84,7 @@ LDFLAGS_vmlinux-$(CONFIG_PPC64)$(CONFIG_RELOCATABLE) := -pie
 LDFLAGS_vmlinux	:= $(LDFLAGS_vmlinux-yy)
 
 CFLAGS-$(CONFIG_PPC64)	:= -mminimal-toc -mtraceback=none  -mcall-aixdesc
-CFLAGS-$(CONFIG_PPC32)	:= -ffixed-r2 -mmultiple
+CFLAGS-$(CONFIG_PPC32)	:= -ffixed-r2 $(MULTIPLEWORD)
 KBUILD_CPPFLAGS	+= -Iarch/$(ARCH)
 KBUILD_AFLAGS	+= -Iarch/$(ARCH)
 KBUILD_CFLAGS	+= -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y)
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index fae8192..39f10a4 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -22,7 +22,8 @@ all: $(obj)/zImage
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-		 -isystem $(shell $(CROSS32CC) -print-file-name=include)
+		 -isystem $(shell $(CROSS32CC) -print-file-name=include) \
+		 -mbig-endian
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
 ifdef CONFIG_DEBUG_INFO
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 0546bcd..f4c6676 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -4,7 +4,11 @@
  */
 #include <asm/vdso.h>
 
+#ifdef __LITTLE_ENDIAN__
+OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
+#else
 OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+#endif
 OUTPUT_ARCH(powerpc:common)
 ENTRY(_start)
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index d361f81..074ff12 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -329,3 +329,19 @@ config CHECK_CACHE_COHERENCY
 	bool
 
 endmenu
+
+config ARCH_SUPPORTS_LITTLE_ENDIAN
+	bool
+
+config CPU_LITTLE_ENDIAN
+	bool "Build little endian kernel"
+	depends on ARCH_SUPPORTS_LITTLE_ENDIAN && EXPERIMENTAL
+	default n
+	help
+	  This option selects whether a big endian or little endian kernel will
+	  be built.
+
+	  Note that if building a little endian kernel, CROSS_COMPILE must
+	  point to a toolchain capable of targetting little endian powerpc,
+	  while the toolchain specified by CROSS32_COMPILE must be capable of
+	  targetting *BIG* endian PowerPC.
-- 
1.7.1

^ permalink raw reply related

* [PATCH 02/18] powerpc: Add CROSSBE_COMPILE to build big endian boot wrapper
From: Ian Munsie @ 2010-10-01  7:05 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh
  Cc: Michal Marek, Albert Herranz, paulus, Ian Munsie, Andrew Morton,
	Sam Ravnborg
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

Since the boot wrapper must be built by a big endian 32bit toolchain
regardless of what the rest of the kernel is using introduce a new
parameter to specify that toolchain - CROSSBE_COMPILE.

We already have CROSS32_COMPILE which is already used for the boot
wrapper, but it is also used to build the 32bit vdso which should be
build in the same endianness as the rest of the kernel, so it is
necessary to be able to specify the toolchain to build the boot wrapper
separately from that used to build the vdso and again separately from
that used to build the main kernel.

CROSSBE_COMPILE should be pointed to a toolchain capable of targeting
32bit big endian powerpc, either specifically targetted at 32bit or
bi-arch 64 and 32bit. If CROSSBE_COMPILE is not specified it will fall
back to CROSS32_COMPILE to maintain compatibility with big endian
targets.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 arch/powerpc/Makefile      |   19 +++++++++++++++++--
 arch/powerpc/boot/Makefile |   25 +++++++++++++++----------
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 3eed685..ab69b0e 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -14,7 +14,7 @@
 
 HAS_BIARCH	:= $(call cc-option-yn, -m32)
 
-# Set default 32 bits cross compilers for vdso and boot wrapper
+# Set default 32 bits cross compilers for vdso
 CROSS32_COMPILE ?=
 
 CROSS32CC		:= $(CROSS32_COMPILE)gcc
@@ -27,7 +27,22 @@ CROSS32AR	:= GNUTARGET=elf32-powerpc $(AR)
 endif
 endif
 
-export CROSS32CC CROSS32AR
+# Set default big endian 32 bits cross compiler for boot wrapper
+ifeq ($(CROSSBE_COMPILE),)
+CROSSBECC	:= $(CROSS32CC)
+CROSSBEAR	:= $(CROSS32AR)
+else
+CROSSBECC	:= $(CROSSBE_COMPILE)gcc
+CROSSBEAR	:= $(CROSSBE_COMPILE)ar
+endif
+
+ifeq ($(call try-run,\
+	$(CROSSBECC) -m32 -c -xc /dev/null -o "$$TMP",y,n),y)
+CROSSBECC	:= $(CROSSBECC) -m32
+CROSSBEAR	:= GNUTARGET=elf32-powerpc $(CROSSBEAR)
+endif
+
+export CROSS32CC CROSS32AR CROSSBECC CROSSBEAR
 
 ifeq ($(CROSS_COMPILE),)
 KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 39f10a4..79d7e69 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -6,23 +6,24 @@
 # Based on coffboot by Paul Mackerras
 # Simplified for ppc64 by Todd Inglett
 #
-# NOTE:	this code is built for 32 bit in ELF32 format even though
-#	it packages a 64 bit kernel.  We do this to simplify the
+# NOTE:	this code is built for 32 bit big endian in ELF32 format even
+#	though it packages a 64 bit kernel.  We do this to simplify the
 #	bootloader and increase compatibility with OpenFirmware.
 #
-#	To this end we need to define BOOTCC, etc, as the tools
-#	needed to build the 32 bit image.  That's normally the same
+#	To this end we need to define BOOTCC, etc, as the tools needed
+#	to build the 32 bit big endian image.  That's normally the same
 #	compiler for the rest of the kernel, with the -m32 flag added.
 #	To make it easier to setup a cross compiler,
-#	CROSS32_COMPILE is setup as a prefix just like CROSS_COMPILE
-#	in the toplevel makefile.
+#	CROSSBE_COMPILE is setup as a prefix just like CROSS_COMPILE
+#	in the toplevel makefile. If building a big endian kernel,
+#	CROSS32_COMPILE may be used in it's place.
 
 all: $(obj)/zImage
 
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-		 -isystem $(shell $(CROSS32CC) -print-file-name=include) \
+		 -isystem $(shell $(CROSSBECC) -print-file-name=include) \
 		 -mbig-endian
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
@@ -122,13 +123,13 @@ clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \
 		empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
 
 quiet_cmd_bootcc = BOOTCC  $@
-      cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
+      cmd_bootcc = $(CROSSBECC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
 
 quiet_cmd_bootas = BOOTAS  $@
-      cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
+      cmd_bootas = $(CROSSBECC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
 
 quiet_cmd_bootar = BOOTAR  $@
-      cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+      cmd_bootar = $(CROSSBEAR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
 
 $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
 	$(call if_changed_dep,bootcc)
@@ -157,6 +158,9 @@ wrapperbits	:= $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \
 #############
 # Bits for building various flavours of zImage
 
+ifneq ($(CROSSBE_COMPILE),)
+CROSSWRAP := -C "$(CROSSBE_COMPILE)"
+else
 ifneq ($(CROSS32_COMPILE),)
 CROSSWRAP := -C "$(CROSS32_COMPILE)"
 else
@@ -164,6 +168,7 @@ ifneq ($(CROSS_COMPILE),)
 CROSSWRAP := -C "$(CROSS_COMPILE)"
 endif
 endif
+endif
 
 # args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
 quiet_cmd_wrap	= WRAP    $@
-- 
1.7.1

^ permalink raw reply related

* [PATCH 03/18] powerpc: Support parsing a little endian kernel from zImage wrapper
From: Ian Munsie @ 2010-10-01  7:05 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, benh; +Cc: paulus, Ian Munsie
In-Reply-To: <1285916771-18033-1-git-send-email-imunsie@au1.ibm.com>

From: Ian Munsie <imunsie@au1.ibm.com>

This patch adds support to the PowerPC zImage wrapper (which always runs
in big endian mode) to detect if the zImage is little endian and parse
it's ELF header to enable it's successful extraction.

It also provides some infrastructure for executing a little endian
kernel - PowerPC platforms that support little endian should fill
platform_ops.le_kentry with a function pointer to the routine
responsible for switching the CPU to little endian and executing the
kernel. This routing takes the same arguments in the same positions as
kentry to allow them to easily be passed onto the kernel, with the
kentry pointer itself tacked on as argument 4.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
---
 arch/powerpc/boot/elf.h       |    2 +-
 arch/powerpc/boot/elf_util.c  |   48 +++++++++++++++++++++++++++++++++++++---
 arch/powerpc/boot/main.c      |   26 +++++++++++++++++-----
 arch/powerpc/boot/ops.h       |    3 ++
 arch/powerpc/boot/prpmc2800.c |    5 +++-
 arch/powerpc/boot/swab.h      |   26 ++++++++++++++++++++++
 6 files changed, 98 insertions(+), 12 deletions(-)
 create mode 100644 arch/powerpc/boot/swab.h

diff --git a/arch/powerpc/boot/elf.h b/arch/powerpc/boot/elf.h
index 1941bc5..9de8105 100644
--- a/arch/powerpc/boot/elf.h
+++ b/arch/powerpc/boot/elf.h
@@ -152,6 +152,6 @@ struct elf_info {
 	unsigned long elfoffset;
 };
 int parse_elf64(void *hdr, struct elf_info *info);
-int parse_elf32(void *hdr, struct elf_info *info);
+int parse_elf32(void *hdr, struct elf_info *info, int *little_endian);
 
 #endif				/* _PPC_BOOT_ELF_H_ */
diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
index 1567a0c..67bba80 100644
--- a/arch/powerpc/boot/elf_util.c
+++ b/arch/powerpc/boot/elf_util.c
@@ -14,6 +14,7 @@
 #include "page.h"
 #include "string.h"
 #include "stdio.h"
+#include "swab.h"
 
 int parse_elf64(void *hdr, struct elf_info *info)
 {
@@ -47,7 +48,35 @@ int parse_elf64(void *hdr, struct elf_info *info)
 	return 1;
 }
 
-int parse_elf32(void *hdr, struct elf_info *info)
+void byteswap_elf32(Elf32_Ehdr *elf32) {
+	Elf32_Phdr *elf32ph;
+
+	swab16s(&elf32->e_type);
+	swab16s(&elf32->e_machine);
+	swab32s(&elf32->e_version);
+	swab32s(&elf32->e_entry);
+	swab32s(&elf32->e_phoff);
+	swab32s(&elf32->e_shoff);
+	swab32s(&elf32->e_flags);
+	swab16s(&elf32->e_ehsize);
+	swab16s(&elf32->e_phentsize);
+	swab16s(&elf32->e_phnum);
+	swab16s(&elf32->e_shentsize);
+	swab16s(&elf32->e_shnum);
+	swab16s(&elf32->e_shstrndx);
+
+	elf32ph = (Elf32_Phdr *) ((unsigned long)elf32 + elf32->e_phoff);
+	swab32s(&elf32ph->p_type);
+	swab32s(&elf32ph->p_offset);
+	swab32s(&elf32ph->p_vaddr);
+	swab32s(&elf32ph->p_paddr);
+	swab32s(&elf32ph->p_filesz);
+	swab32s(&elf32ph->p_memsz);
+	swab32s(&elf32ph->p_flags);
+	swab32s(&elf32ph->p_align);
+}
+
+int parse_elf32(void *hdr, struct elf_info *info, int *little_endian)
 {
 	Elf32_Ehdr *elf32 = hdr;
 	Elf32_Phdr *elf32ph;
@@ -57,9 +86,20 @@ int parse_elf32(void *hdr, struct elf_info *info)
 	      elf32->e_ident[EI_MAG1]  == ELFMAG1	&&
 	      elf32->e_ident[EI_MAG2]  == ELFMAG2	&&
 	      elf32->e_ident[EI_MAG3]  == ELFMAG3	&&
-	      elf32->e_ident[EI_CLASS] == ELFCLASS32	&&
-	      elf32->e_ident[EI_DATA]  == ELFDATA2MSB	&&
-	      (elf32->e_type            == ET_EXEC ||
+	      elf32->e_ident[EI_CLASS] == ELFCLASS32))
+		return 0;
+	switch(elf32->e_ident[EI_DATA]) {
+		case ELFDATA2MSB:
+			*little_endian = 0;
+			break;
+		case ELFDATA2LSB:
+			*little_endian = 1;
+			byteswap_elf32(elf32);
+			break;
+		default:
+			return 0;
+	}
+	if (!((elf32->e_type            == ET_EXEC ||
 	       elf32->e_type            == ET_DYN)      &&
 	      elf32->e_machine         == EM_PPC))
 		return 0;
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index a28f021..36dd2b6 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -27,7 +27,7 @@ struct addr_range {
 
 #undef DEBUG
 
-static struct addr_range prep_kernel(void)
+static struct addr_range prep_kernel(int *little_endian)
 {
 	char elfheader[256];
 	void *vmlinuz_addr = _vmlinux_start;
@@ -40,8 +40,10 @@ static struct addr_range prep_kernel(void)
 	gunzip_start(&gzstate, vmlinuz_addr, vmlinuz_size);
 	gunzip_exactly(&gzstate, elfheader, sizeof(elfheader));
 
-	if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
+	if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei, little_endian))
 		fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
+	if (*little_endian && !platform_ops.le_kentry)
+		fatal("Little Endian kernel unsupported on this platform!");
 
 	if (platform_ops.image_hdr)
 		platform_ops.image_hdr(elfheader);
@@ -166,8 +168,10 @@ void start(void)
 {
 	struct addr_range vmlinux, initrd;
 	kernel_entry_t kentry;
+	le_kernel_entry_t le_kentry;
 	unsigned long ft_addr = 0;
 	void *chosen;
+	int little_endian = 0;
 
 	/* Do this first, because malloc() could clobber the loader's
 	 * command line.  Only use the loader command line if a
@@ -189,7 +193,7 @@ void start(void)
 	if (!chosen)
 		chosen = create_node(NULL, "chosen");
 
-	vmlinux = prep_kernel();
+	vmlinux = prep_kernel(&little_endian);
 	initrd = prep_initrd(vmlinux, chosen,
 			     loader_info.initrd_addr, loader_info.initrd_size);
 	prep_cmdline(chosen);
@@ -206,11 +210,21 @@ void start(void)
 		console_ops.close();
 
 	kentry = (kernel_entry_t) vmlinux.addr;
+	le_kentry = (le_kernel_entry_t)platform_ops.le_kentry;
 	if (ft_addr)
-		kentry(ft_addr, 0, NULL);
+		if (little_endian)
+			le_kentry(ft_addr, 0, NULL, kentry);
+		else
+			kentry(ft_addr, 0, NULL);
 	else
-		kentry((unsigned long)initrd.addr, initrd.size,
-		       loader_info.promptr);
+		if (little_endian)
+			le_kentry((unsigned long)initrd.addr, initrd.size,
+			       loader_info.promptr, kentry);
+		else
+			kentry((unsigned long)initrd.addr, initrd.size,
+			       loader_info.promptr);
+
+	kentry(ft_addr, 0, NULL);
 
 	/* console closed so printf in fatal below may not work */
 	fatal("Error: Linux kernel returned to zImage boot wrapper!\n\r");
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index b3218ce..cd6c7bf 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -20,6 +20,8 @@
 #define	MAX_PROP_LEN		256 /* What should this be? */
 
 typedef void (*kernel_entry_t)(unsigned long r3, unsigned long r4, void *r5);
+typedef void (*le_kernel_entry_t)(unsigned long r3, unsigned long r4, void *r5,
+				  kernel_entry_t kentry);
 
 /* Platform specific operations */
 struct platform_ops {
@@ -30,6 +32,7 @@ struct platform_ops {
 	void *	(*realloc)(void *ptr, unsigned long size);
 	void	(*exit)(void);
 	void *	(*vmlinux_alloc)(unsigned long size);
+	le_kernel_entry_t le_kentry;
 };
 extern struct platform_ops platform_ops;
 
diff --git a/arch/powerpc/boot/prpmc2800.c b/arch/powerpc/boot/prpmc2800.c
index da31d60..6bad899 100644
--- a/arch/powerpc/boot/prpmc2800.c
+++ b/arch/powerpc/boot/prpmc2800.c
@@ -519,6 +519,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 	void *vmlinuz_addr = _vmlinux_start;
 	unsigned long vmlinuz_size = _vmlinux_end - _vmlinux_start;
 	char elfheader[256];
+	int little_endian;
 
 	if (dt_size <= 0) /* No fdt */
 		exit();
@@ -533,7 +534,9 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 	gunzip_start(&gzstate, vmlinuz_addr, vmlinuz_size);
 	gunzip_exactly(&gzstate, elfheader, sizeof(elfheader));
 
-	if (!parse_elf32(elfheader, &ei))
+	if (!parse_elf32(elfheader, &ei, &little_endian))
+		exit();
+	if (little_endian)
 		exit();
 
 	heap_start = (char *)(ei.memsize + ei.elfoffset); /* end of kernel*/
diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h
new file mode 100644
index 0000000..b122c2d
--- /dev/null
+++ b/arch/powerpc/boot/swab.h
@@ -0,0 +1,26 @@
+#ifndef _SWAB_H_
+#define _SWAB_H_
+
+#include "types.h"
+
+static __inline__ void st_le16(volatile u16 *addr, const u16 val)
+{
+	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void swab16s(u16 *addr)
+{
+	st_le16(addr, *addr);
+}
+
+static __inline__ void st_le32(volatile u32 *addr, const u32 val)
+{
+	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void swab32s(u32 *addr)
+{
+	st_le32(addr, *addr);
+}
+
+#endif
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox