* [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
@ 2007-03-15 23:29 Wolfgang Denk
2007-03-16 5:27 ` Paul Mackerras
` (2 more replies)
0 siblings, 3 replies; 25+ messages in thread
From: Wolfgang Denk @ 2007-03-15 23:29 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-raid
This is a driver for the XOR and DMA1,2 engines on the PPC 440SPe
processors. It includes support for using the engines asynchronously
to perform such operations as copy, xor calculations, xor_zero_check
operations and some other. The driver is adapted for use inside the
ADMA sub-system.
This patch is based on and requires a set of patches posted to the
linux-raid mailing list by Dan Williams on 2007-01-23:
[PATCH 2.6.20-rc5 01/12] dmaengine: add base support for the async_tx api
http://marc.theaimsgroup.com/?l=linux-kernel&m=116957843221563&q=raw
[PATCH 02/12] dmaengine: add the async_tx api
http://marc.theaimsgroup.com/?l=linux-raid&m=116952392528235&q=raw
[PATCH 03/12] md: add raid5_run_ops and support routines
http://marc.theaimsgroup.com/?l=linux-kernel&m=116952392407474&q=raw
[PATCH 04/12] md: use raid5_run_ops for stripe cache operations
http://marc.theaimsgroup.com/?l=linux-kernel&m=116952392416825&q=raw
[PATCH 05/12] md: move write operations to raid5_run_ops
http://marc.theaimsgroup.com/?l=linux-raid&m=116952392615357&q=raw
[PATCH 06/12] md: move raid5 compute block operations to raid5_run_ops
http://marc.theaimsgroup.com/?l=linux-raid&m=116952392509989&q=raw
[PATCH 07/12] md: move raid5 parity checks to raid5_run_ops
http://marc.theaimsgroup.com/?l=linux-raid&m=116952306910263&q=raw
[PATCH 08/12] md: satisfy raid5 read requests via raid5_run_ops
http://marc.theaimsgroup.com/?l=linux-kernel&m=116952392304938&q=raw
[PATCH 09/12] md: use async_tx and raid5_run_ops for raid5 expansion operations
http://marc.theaimsgroup.com/?l=linux-raid&m=116952392405885&q=raw
[PATCH 10/12] md: move raid5 io requests to raid5_run_ops
http://marc.theaimsgroup.com/?l=linux-raid&m=116952392409725&q=raw
[PATCH 11/12] md: remove raid5 compute_block and compute_parity5
http://marc.theaimsgroup.com/?l=linux-kernel&m=116952392323697&q=raw
[PATCH 12/12] dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines
http://marc.theaimsgroup.com/?l=linux-kernel&m=116952307012911&q=raw
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Wolfgang Denk <wd@denx.de>
---
arch/ppc/platforms/4xx/ppc440spe.c | 209 +++++++
arch/ppc/syslib/ppc440spe_pcie.h | 2 +
drivers/dma/Kconfig | 7 +
drivers/dma/Makefile | 1 +
drivers/dma/spe-adma.c | 1071 ++++++++++++++++++++++++++++++++++++
include/asm-ppc/adma.h | 715 ++++++++++++++++++++++++
include/asm-ppc/ppc440spe_dma.h | 214 +++++++
include/asm-ppc/ppc440spe_xor.h | 131 +++++
8 files changed, 2350 insertions(+), 0 deletions(-)
create mode 100644 drivers/dma/spe-adma.c
create mode 100644 include/asm-ppc/adma.h
create mode 100644 include/asm-ppc/ppc440spe_dma.h
create mode 100644 include/asm-ppc/ppc440spe_xor.h
diff --git a/arch/ppc/platforms/4xx/ppc440spe.c b/arch/ppc/platforms/4xx/ppc440spe.c
index 1be5d1c..6bdfb47 100644
--- a/arch/ppc/platforms/4xx/ppc440spe.c
+++ b/arch/ppc/platforms/4xx/ppc440spe.c
@@ -22,6 +22,13 @@
#include <asm/ocp.h>
#include <asm/ppc4xx_pic.h>
+#if defined(CONFIG_AMCC_SPE_ADMA)
+#include <syslib/ppc440spe_pcie.h>
+#include <linux/async_tx.h>
+#include <linux/platform_device.h>
+#include <asm/adma.h>
+#endif
+
static struct ocp_func_emac_data ppc440spe_emac0_def = {
.rgmii_idx = -1, /* No RGMII */
.rgmii_mux = -1, /* No RGMII */
@@ -144,3 +151,205 @@ struct ppc4xx_uic_settings ppc4xx_core_uic_cfg[] __initdata = {
.ext_irq_mask = 0x00000000,
},
};
+
+#if defined(CONFIG_AMCC_SPE_ADMA)
+
+static u64 ppc440spe_adma_dmamask = DMA_32BIT_MASK;
+
+/* DMA and XOR platform devices' resources */
+static struct resource ppc440spe_dma_0_resources[] = {
+ {
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = DMA0_CS_FIFO_NEED_SERVICE,
+ .end = DMA0_CS_FIFO_NEED_SERVICE,
+ .flags = IORESOURCE_IRQ
+ }
+};
+
+static struct resource ppc440spe_dma_1_resources[] = {
+ {
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = DMA1_CS_FIFO_NEED_SERVICE,
+ .end = DMA1_CS_FIFO_NEED_SERVICE,
+ .flags = IORESOURCE_IRQ
+ }
+};
+
+static struct resource ppc440spe_xor_resources[] = {
+ {
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = XOR_INTERRUPT,
+ .end = XOR_INTERRUPT,
+ .flags = IORESOURCE_IRQ
+ }
+};
+
+/* DMA and XOR platform devices' data */
+static struct spe_adma_platform_data ppc440spe_dma_0_data = {
+ .hw_id = PPC440SPE_DMA0_ID,
+ .capabilities = DMA_CAP_MEMCPY | DMA_CAP_INTERRUPT,
+ .pool_size = PAGE_SIZE,
+};
+
+static struct spe_adma_platform_data ppc440spe_dma_1_data = {
+ .hw_id = PPC440SPE_DMA1_ID,
+ .capabilities = DMA_CAP_MEMCPY | DMA_CAP_INTERRUPT,
+ .pool_size = PAGE_SIZE,
+};
+
+static struct spe_adma_platform_data ppc440spe_xor_data = {
+ .hw_id = PPC440SPE_XOR_ID,
+ .capabilities = DMA_CAP_XOR | DMA_CAP_INTERRUPT,
+ .pool_size = PAGE_SIZE,
+};
+
+/* DMA and XOR platform devices definitions */
+static struct platform_device ppc440spe_dma_0_channel = {
+ .name = "SPE-ADMA",
+ .id = PPC440SPE_DMA0_ID,
+ .num_resources = ARRAY_SIZE(ppc440spe_dma_0_resources),
+ .resource = ppc440spe_dma_0_resources,
+ .dev = {
+ .dma_mask = &ppc440spe_adma_dmamask,
+ .coherent_dma_mask = DMA_64BIT_MASK,
+ .platform_data = (void *) &ppc440spe_dma_0_data,
+ },
+};
+
+static struct platform_device ppc440spe_dma_1_channel = {
+ .name = "SPE-ADMA",
+ .id = PPC440SPE_DMA1_ID,
+ .num_resources = ARRAY_SIZE(ppc440spe_dma_1_resources),
+ .resource = ppc440spe_dma_1_resources,
+ .dev = {
+ .dma_mask = &ppc440spe_adma_dmamask,
+ .coherent_dma_mask = DMA_64BIT_MASK,
+ .platform_data = (void *) &ppc440spe_dma_1_data,
+ },
+};
+
+static struct platform_device ppc440spe_xor_channel = {
+ .name = "SPE-ADMA",
+ .id = PPC440SPE_XOR_ID,
+ .num_resources = ARRAY_SIZE(ppc440spe_xor_resources),
+ .resource = ppc440spe_xor_resources,
+ .dev = {
+ .dma_mask = &ppc440spe_adma_dmamask,
+ .coherent_dma_mask = DMA_64BIT_MASK,
+ .platform_data = (void *) &ppc440spe_xor_data,
+ },
+};
+
+/*
+ * Init DMA0/1 and XOR engines; allocate memory for DMAx FIFOs; set platform_device
+ * memory resources addresses
+ */
+static void ppc440spe_configure_raid_devices(void)
+{
+ void *fifo_buf;
+ i2o_regs_t *i2o_reg;
+ dma_regs_t *dma_reg0, *dma_reg1;
+ xor_regs_t *xor_reg;
+ u32 mask;
+
+ printk ("%s\n", __FUNCTION__);
+
+ /*
+ * Map registers
+ */
+ i2o_reg = (i2o_regs_t *)ioremap64(I2O_MMAP_BASE, I2O_MMAP_SIZE);
+ dma_reg0 = (dma_regs_t *)ioremap64(DMA0_MMAP_BASE, DMA_MMAP_SIZE);
+ dma_reg1 = (dma_regs_t *)ioremap64(DMA1_MMAP_BASE, DMA_MMAP_SIZE);
+ xor_reg = (xor_regs_t *)ioremap64(XOR_MMAP_BASE,XOR_MMAP_SIZE);
+
+ /*
+ * Configure h/w
+ */
+
+ /* Reset I2O/DMA */
+ mtdcr(DCRN_SDR0_CFGADDR, 0x200);
+ mtdcr(DCRN_SDR0_CFGDATA, 0x10000);
+ mtdcr(DCRN_SDR0_CFGADDR, 0x200);
+ mtdcr(DCRN_SDR0_CFGDATA, 0x0);
+
+ /* Reset XOR */
+ out_be32(&xor_reg->crsr, XOR_CRSR_XASR_BIT);
+ out_be32(&xor_reg->crrr, XOR_CRSR_64BA_BIT);
+
+ /* Setup the base address of mmaped registers */
+ mtdcr(DCRN_I2O0_IBAH, 0x00000004);
+ mtdcr(DCRN_I2O0_IBAL, 0x00100001);
+
+ /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+ * the base address of FIFO memory space
+ */
+ fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE)<<1, GFP_KERNEL | __GFP_DMA);
+
+ /* SetUp FIFO memory space base address */
+ out_le32(&i2o_reg->ifbah, 0);
+ out_le32(&i2o_reg->ifbal, ((u32)__pa(fifo_buf)));
+
+ /* zero FIFO size for I2O, DMAs; 0x1000 to enable DMA */
+ out_le32(&i2o_reg->ifsiz, 0);
+ out_le32(&dma_reg0->fsiz, 0x1000 | ((DMA0_FIFO_SIZE>>3) - 1));
+ out_le32(&dma_reg1->fsiz, 0x1000 | ((DMA1_FIFO_SIZE>>3) - 1));
+
+ /* Configure DMA engine */
+ out_le32(&dma_reg0->cfg, 0x0D880000);
+ out_le32(&dma_reg1->cfg, 0x0D880000);
+
+ /* Clear Status */
+ out_le32(&dma_reg0->dsts, ~0);
+ out_le32(&dma_reg1->dsts, ~0);
+
+ /* Unmask 'CS FIFO Attention' interrupts */
+ mask = in_le32(&i2o_reg->iopim) & ~0x48;
+ out_le32(&i2o_reg->iopim, mask);
+
+ /* enable XOR engine interrupt */
+ out_be32(&xor_reg->ier, XOR_IE_CBLCI_BIT | XOR_IE_CBCIE_BIT | 0x34000);
+
+ /*
+ * Unmap I2O registers
+ */
+ iounmap(i2o_reg);
+
+ /*
+ * Set resource addresses
+ */
+ ppc440spe_dma_0_channel.resource[0].start = (resource_size_t)(dma_reg0);
+ ppc440spe_dma_0_channel.resource[0].end =
+ ppc440spe_dma_0_channel.resource[0].start+DMA_MMAP_SIZE;
+
+ ppc440spe_dma_1_channel.resource[0].start = (resource_size_t)(dma_reg1);
+ ppc440spe_dma_1_channel.resource[0].end =
+ ppc440spe_dma_1_channel.resource[0].start+DMA_MMAP_SIZE;
+
+ ppc440spe_xor_channel.resource[0].start = (resource_size_t)(xor_reg);
+ ppc440spe_xor_channel.resource[0].end =
+ ppc440spe_xor_channel.resource[0].start+XOR_MMAP_SIZE;
+}
+
+static struct platform_device *ppc440spe_devs[] __initdata = {
+ &ppc440spe_dma_0_channel,
+ &ppc440spe_dma_1_channel,
+ &ppc440spe_xor_channel,
+};
+
+static int __init ppc440spe_register_raid_devices(void)
+{
+ ppc440spe_configure_raid_devices();
+ platform_add_devices(ppc440spe_devs, ARRAY_SIZE(ppc440spe_devs));
+
+ return 0;
+}
+
+arch_initcall(ppc440spe_register_raid_devices);
+#endif /* CONFIG_AMCC_SPE_ADMA */
+
diff --git a/arch/ppc/syslib/ppc440spe_pcie.h b/arch/ppc/syslib/ppc440spe_pcie.h
index 902ef23..e7099a3 100644
--- a/arch/ppc/syslib/ppc440spe_pcie.h
+++ b/arch/ppc/syslib/ppc440spe_pcie.h
@@ -13,6 +13,8 @@
#define DCRN_SDR0_CFGADDR 0x00e
#define DCRN_SDR0_CFGDATA 0x00f
+#define DCRN_I2O0_IBAL 0x066
+#define DCRN_I2O0_IBAH 0x067
#define DCRN_PCIE0_BASE 0x100
#define DCRN_PCIE1_BASE 0x120
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d61e3e5..46a6e69 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -49,4 +49,11 @@ config INTEL_IOP_ADMA
---help---
Enable support for the Intel(R) IOP Series RAID engines.
+config AMCC_SPE_ADMA
+ tristate "AMCC SPE ADMA support"
+ depends on DMA_ENGINE && 440SPE
+ default y
+ ---help---
+ Enable support for the AMCC 440SPe RAID engines.
+
endmenu
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 8ebf10d..8568e31 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_NET_DMA) += iovlock.o
obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_ASYNC_TX_DMA) += async_tx.o xor.o
+obj-$(CONFIG_AMCC_SPE_ADMA) += spe-adma.o
diff --git a/drivers/dma/spe-adma.c b/drivers/dma/spe-adma.c
new file mode 100644
index 0000000..5b1ada0
--- /dev/null
+++ b/drivers/dma/spe-adma.c
@@ -0,0 +1,1071 @@
+/*
+ * Copyright(c) 2006 DENX Engineering. All rights reserved.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (SPE 32x, 33x, 134x)
+ * ADMA driver.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <asm/adma.h>
+
+#define to_spe_adma_chan(chan) container_of(chan, struct spe_adma_chan, common)
+#define to_spe_adma_device(dev) container_of(dev, struct spe_adma_device, common)
+#define to_spe_adma_slot(lh) container_of(lh, struct spe_adma_desc_slot, slot_node)
+#define tx_to_spe_adma_slot(tx) container_of(tx, struct spe_adma_desc_slot, async_tx)
+
+#define SPE_ADMA_MAX_BYTE_COUNT 0xFFFFFF
+
+#define SPE_ADMA_DEBUG 0
+#define PRINTK(x...) ((void)(SPE_ADMA_DEBUG && printk(x)))
+
+/**
+ * spe_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &spe_chan->lock while calling this function
+ */
+static inline void spe_adma_free_slots(struct spe_adma_desc_slot *slot)
+{
+ int stride = slot->stride;
+
+ while (stride--) {
+ slot->stride = 0;
+ slot = list_entry(slot->slot_node.next,
+ struct spe_adma_desc_slot,
+ slot_node);
+ }
+}
+
+static inline dma_cookie_t
+spe_adma_run_tx_complete_actions(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *spe_chan, dma_cookie_t cookie)
+{
+
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+ desc->async_tx.cookie = 0;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->async_tx.type != DMA_INTERRUPT) {
+ struct spe_adma_desc_slot *unmap = desc->group_head;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t addr = spe_desc_get_dest_addr(unmap,
+ spe_chan);
+
+ dma_unmap_page(&spe_chan->device->pdev->dev, addr,
+ unmap->unmap_len, DMA_FROM_DEVICE);
+ while(src_cnt--) {
+ addr = spe_desc_get_src_addr(unmap,
+ spe_chan,
+ src_cnt);
+ dma_unmap_page(&spe_chan->device->pdev->dev, addr,
+ unmap->unmap_len, DMA_TO_DEVICE);
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx, &spe_chan->common);
+
+ return cookie;
+}
+
+static inline int
+spe_adma_clean_slot(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *spe_chan)
+{
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!desc->async_tx.ack)
+ return 0;
+
+ /* leave the last descriptor in the chain
+ * so we can append to it
+ */
+ if (desc->chain_node.next == &spe_chan->chain ||
+ desc->phys == spe_chan_get_current_descriptor(spe_chan))
+ return 1;
+
+ PRINTK("\tfree slot %x: %d stride: %d\n", desc->phys, desc->idx, desc->stride);
+
+ list_del(&desc->chain_node);
+
+ spe_adma_free_slots(desc);
+
+ return 0;
+}
+
+int spe_check_stride (struct dma_async_tx_descriptor *tx)
+{
+ struct spe_adma_desc_slot *p = tx_to_spe_adma_slot(tx);
+
+ return p->stride;
+}
+
+static void __spe_adma_slot_cleanup(struct spe_adma_chan *spe_chan)
+{
+ struct spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
+ dma_cookie_t cookie = 0;
+ u32 current_desc = spe_chan_get_current_descriptor(spe_chan);
+ int busy = spe_chan_is_busy(spe_chan);
+ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+ PRINTK ("spe adma%d: %s\n", spe_chan->device->id, __FUNCTION__);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+ list_for_each_entry_safe(iter, _iter, &spe_chan->chain,
+ chain_node) {
+ PRINTK ("\tcookie: %d slot: %d busy: %d "
+ "this_desc: %#x next_desc: %#x cur: %#x ack: %d\n",
+ iter->async_tx.cookie, iter->idx, busy, iter->phys,
+ spe_desc_get_next_desc(iter, spe_chan),
+ current_desc,
+ iter->async_tx.ack);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in process
+ * or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy, or if it appears that the current descriptor
+ * needs to be re-read (i.e. has been appended to)
+ */
+ if (iter->phys == current_desc) {
+ BUG_ON(seen_current++);
+ if (busy || spe_desc_get_next_desc(iter, spe_chan)) {
+ spe_adma_run_tx_complete_actions(iter, spe_chan, cookie);
+ break;
+ }
+ }
+
+ /* detect the start of a group transaction */
+ if (!slot_cnt && !slots_per_op) {
+ slot_cnt = iter->slot_cnt;
+ slots_per_op = iter->slots_per_op;
+ if (slot_cnt <= slots_per_op) {
+ slot_cnt = 0;
+ slots_per_op = 0;
+ }
+ }
+
+ if (slot_cnt) {
+ PRINTK("\tgroup++\n");
+ if (!group_start)
+ group_start = iter;
+ slot_cnt -= slots_per_op;
+ }
+
+ /* all the members of a group are complete */
+ if (slots_per_op != 0 && slot_cnt == 0) {
+ struct spe_adma_desc_slot *grp_iter, *_grp_iter;
+ int end_of_chain = 0;
+ PRINTK("\tgroup end\n");
+
+ /* collect the total results */
+ if (group_start->xor_check_result) {
+ u32 zero_sum_result = 0;
+ slot_cnt = group_start->slot_cnt;
+ grp_iter = group_start;
+
+ list_for_each_entry_from(grp_iter,
+ &spe_chan->chain, chain_node) {
+ PRINTK("\titer%d result: %d\n", grp_iter->idx,
+ zero_sum_result);
+ slot_cnt -= slots_per_op;
+ if (slot_cnt == 0)
+ break;
+ }
+ PRINTK("\tgroup_start->xor_check_result: %p\n",
+ group_start->xor_check_result);
+ *group_start->xor_check_result = zero_sum_result;
+ }
+
+ /* clean up the group */
+ slot_cnt = group_start->slot_cnt;
+ grp_iter = group_start;
+ list_for_each_entry_safe_from(grp_iter, _grp_iter,
+ &spe_chan->chain, chain_node) {
+
+ cookie = spe_adma_run_tx_complete_actions(
+ grp_iter, spe_chan, cookie);
+
+ slot_cnt -= slots_per_op;
+ end_of_chain = spe_adma_clean_slot(grp_iter,
+ spe_chan);
+
+ if (slot_cnt == 0 || end_of_chain)
+ break;
+ }
+
+ /* the group should be complete at this point */
+ BUG_ON(slot_cnt);
+
+ slots_per_op = 0;
+ group_start = NULL;
+ if (end_of_chain)
+ break;
+ else
+ continue;
+ } else if (slots_per_op) /* wait for group completion */
+ continue;
+
+ cookie = spe_adma_run_tx_complete_actions(iter, spe_chan, cookie);
+
+ if (spe_adma_clean_slot(iter, spe_chan))
+ break;
+ }
+
+ if (!seen_current) {
+ BUG();
+ }
+
+ if (cookie > 0) {
+ spe_chan->completed_cookie = cookie;
+ PRINTK("\tcompleted cookie %d\n", cookie);
+ }
+}
+
+static inline void
+spe_adma_slot_cleanup(struct spe_adma_chan *spe_chan)
+{
+ spin_lock_bh(&spe_chan->lock);
+ __spe_adma_slot_cleanup(spe_chan);
+ spin_unlock_bh(&spe_chan->lock);
+}
+
+static struct spe_adma_chan *spe_adma_chan_array[3];
+static void spe_adma0_task(unsigned long data)
+{
+ __spe_adma_slot_cleanup(spe_adma_chan_array[0]);
+}
+
+static void spe_adma1_task(unsigned long data)
+{
+ __spe_adma_slot_cleanup(spe_adma_chan_array[1]);
+}
+
+static void spe_adma2_task(unsigned long data)
+{
+ __spe_adma_slot_cleanup(spe_adma_chan_array[2]);
+}
+
+DECLARE_TASKLET(spe_adma0_tasklet, spe_adma0_task, 0);
+DECLARE_TASKLET(spe_adma1_tasklet, spe_adma1_task, 0);
+DECLARE_TASKLET(spe_adma2_tasklet, spe_adma2_task, 0);
+struct tasklet_struct *spe_adma_tasklet[] = {
+ &spe_adma0_tasklet,
+ &spe_adma1_tasklet,
+ &spe_adma2_tasklet,
+};
+
+static struct spe_adma_desc_slot *
+__spe_adma_alloc_slots(struct spe_adma_chan *spe_chan, int num_slots,
+ int slots_per_op, int recurse)
+{
+ struct spe_adma_desc_slot *iter = NULL, *alloc_start = NULL;
+ struct spe_adma_desc_slot *last_used = NULL, *last_op_head = NULL;
+ struct list_head chain = LIST_HEAD_INIT(chain);
+ int i;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+
+ for (i = 0; i < 2; i++) {
+ int slots_found = 0;
+ if (i == 0)
+ iter = spe_chan->last_used;
+ else {
+ iter = list_entry(&spe_chan->all_slots,
+ struct spe_adma_desc_slot,
+ slot_node);
+ }
+
+ list_for_each_entry_continue(iter, &spe_chan->all_slots, slot_node) {
+ if (iter->stride) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (i == 1)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++) {
+ if (spe_desc_is_aligned(iter, slots_per_op))
+ alloc_start = iter;
+ else {
+ slots_found = 0;
+ continue;
+ }
+ }
+
+ if (slots_found == num_slots) {
+ iter = alloc_start;
+ i = 0;
+ while (num_slots) {
+
+ /* pre-ack all but the last descriptor */
+ if (num_slots != slots_per_op)
+ iter->async_tx.ack = 1;
+ else
+ iter->async_tx.ack = 0;
+
+ PRINTK ("spe adma%d: allocated slot: %d "
+ "(desc %p phys: %#x) stride %d"
+ ",ack = %d\n",
+ spe_chan->device->id,
+ iter->idx, iter->hw_desc, iter->phys,
+ slots_per_op, iter->async_tx.ack);
+
+ list_add_tail(&iter->chain_node, &chain);
+ last_op_head = iter;
+ iter->async_tx.cookie = 0;
+ iter->hw_next = NULL;
+ iter->flags = 0;
+ iter->slot_cnt = num_slots;
+ iter->slots_per_op = slots_per_op;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->stride = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct spe_adma_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ last_op_head->group_head = alloc_start;
+ last_op_head->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &last_op_head->group_list);
+ spe_chan->last_used = last_used;
+ return last_op_head;
+ }
+ }
+ }
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(spe_adma_tasklet[spe_chan->device->id]);
+ return NULL;
+}
+
+static struct spe_adma_desc_slot *
+spe_adma_alloc_slots(struct spe_adma_chan *spe_chan,
+ int num_slots,
+ int slots_per_op)
+{
+ return __spe_adma_alloc_slots(spe_chan, num_slots, slots_per_op, 1);
+}
+
+static void spe_chan_start_null_xor(struct spe_adma_chan *spe_chan);
+
+/* returns the actual number of allocated descriptors */
+static int spe_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ struct spe_adma_desc_slot *slot = NULL;
+ char *hw_desc;
+ int i, db_sz;
+ int init = spe_chan->slots_allocated ? 0 : 1;
+ struct spe_adma_platform_data *plat_data;
+
+ chan->chan_id = spe_chan->device->id;
+ plat_data = spe_chan->device->pdev->dev.platform_data;
+
+ spin_lock_bh(&spe_chan->lock);
+ /* Allocate descriptor slots */
+ i = spe_chan->slots_allocated;
+ if (spe_chan->device->id != PPC440SPE_XOR_ID)
+ db_sz = sizeof (dma_cdb_t);
+ else
+ db_sz = sizeof (xor_cb_t);
+
+ for (; i < (plat_data->pool_size/db_sz); i++) {
+ slot = kzalloc(sizeof(struct spe_adma_desc_slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "SPE ADMA Channel only initialized"
+ " %d descriptor slots", i--);
+ break;
+ }
+
+ hw_desc = (char *) spe_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[i * db_sz];
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->group_list);
+ hw_desc = (char *) spe_chan->device->dma_desc_pool;
+ slot->phys = (dma_addr_t) &hw_desc[i * db_sz];
+ slot->idx = i;
+ list_add_tail(&slot->slot_node, &spe_chan->all_slots);
+ }
+
+ if (i && !spe_chan->last_used)
+ spe_chan->last_used = list_entry(spe_chan->all_slots.next,
+ struct spe_adma_desc_slot,
+ slot_node);
+
+ spe_chan->slots_allocated = i;
+ PRINTK("spe adma%d: allocated %d descriptor slots last_used: %p\n",
+ spe_chan->device->id, i, spe_chan->last_used);
+ spin_unlock_bh(&spe_chan->lock);
+
+ /* initialize the channel and the chain with a null operation */
+ if (init) {
+ if (test_bit(DMA_XOR,
+ &spe_chan->device->common.capabilities))
+ spe_chan_start_null_xor(spe_chan);
+ }
+
+ return (i > 0) ? i : -ENOMEM;
+}
+
+static inline dma_cookie_t
+spe_desc_assign_cookie(struct spe_adma_chan *spe_chan,
+ struct spe_adma_desc_slot *desc)
+{
+ dma_cookie_t cookie = spe_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ spe_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+static inline void spe_adma_check_threshold(struct spe_adma_chan *spe_chan)
+{
+ PRINTK("spe adma%d: pending: %d\n", spe_chan->device->id,
+ spe_chan->pending);
+
+ if (spe_chan->pending >= SPE_ADMA_THRESHOLD) {
+ spe_chan->pending = 0;
+ spe_chan_append(spe_chan);
+ }
+}
+
+
+static dma_cookie_t
+spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct spe_adma_desc_slot *sw_desc = tx_to_spe_adma_slot(tx);
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(tx->chan);
+ struct spe_adma_desc_slot *group_start, *old_chain_tail;
+ int slot_cnt;
+ int slots_per_op;
+ dma_cookie_t cookie;
+
+ group_start = sw_desc->group_head;
+ slot_cnt = group_start->slot_cnt;
+ slots_per_op = group_start->slots_per_op;
+
+ spin_lock_bh(&spe_chan->lock);
+
+ cookie = spe_desc_assign_cookie(spe_chan, sw_desc);
+
+ old_chain_tail = list_entry(spe_chan->chain.prev,
+ struct spe_adma_desc_slot, chain_node);
+ list_splice_init(&sw_desc->group_list, &old_chain_tail->chain_node);
+
+ /* fix up the hardware chain */
+ spe_desc_set_next_desc(old_chain_tail, spe_chan, group_start);
+
+ /* increment the pending count by the number of operations */
+ spe_chan->pending += slot_cnt / slots_per_op;
+ spe_adma_check_threshold(spe_chan);
+ spin_unlock_bh(&spe_chan->lock);
+
+ PRINTK("spe adma%d: %s cookie: %d slot: %d tx %p\n", spe_chan->device->id,
+ __FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx, sw_desc);
+
+ return cookie;
+}
+
+struct dma_async_tx_descriptor *
+spe_adma_prep_dma_interrupt(struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ struct spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op = 0;
+
+ PRINTK("*** spe adma%d: %s\n", spe_chan->device->id, __FUNCTION__);
+ spin_lock_bh(&spe_chan->lock);
+ slot_cnt = spe_chan_interrupt_slot_count(&slots_per_op, spe_chan);
+ sw_desc = spe_adma_alloc_slots(spe_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ spe_desc_init_interrupt(group_start, spe_chan);
+ sw_desc->async_tx.type = DMA_INTERRUPT;
+ }
+ spin_unlock_bh(&spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+struct dma_async_tx_descriptor *
+spe_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ struct spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > SPE_ADMA_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&spe_chan->lock);
+
+ PRINTK("spe adma%d: %s len: %u int_en %d\n",
+ spe_chan->device->id, __FUNCTION__, len, int_en);
+
+ slot_cnt = spe_chan_memcpy_slot_count(len, &slots_per_op);
+ sw_desc = spe_adma_alloc_slots(spe_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ spe_desc_init_memcpy(group_start, int_en);
+ spe_desc_set_byte_count(group_start, spe_chan, len);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.type = DMA_MEMCPY;
+ }
+ spin_unlock_bh(&spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+struct dma_async_tx_descriptor *
+spe_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
+ int int_en)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ struct spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > SPE_ADMA_XOR_MAX_BYTE_COUNT));
+
+ PRINTK("spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+ spe_chan->device->id, __FUNCTION__, src_cnt, len, int_en);
+
+ spin_lock_bh(&spe_chan->lock);
+ slot_cnt = spe_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = spe_adma_alloc_slots(spe_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ spe_desc_init_xor(group_start, src_cnt, int_en);
+ spe_desc_set_byte_count(group_start, spe_chan, len);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.type = DMA_XOR;
+ }
+ spin_unlock_bh(&spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void
+spe_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
+ int index)
+{
+ struct spe_adma_desc_slot *sw_desc = tx_to_spe_adma_slot(tx);
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(tx->chan);
+
+ /* to do: support transfers lengths > SPE_ADMA_MAX_BYTE_COUNT */
+ spe_desc_set_dest_addr(sw_desc->group_head, spe_chan, addr);
+}
+
+static void
+spe_adma_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
+ int index)
+{
+ struct spe_adma_desc_slot *sw_desc = tx_to_spe_adma_slot(tx);
+ struct spe_adma_desc_slot *group_start = sw_desc->group_head;
+
+ switch (tx->type) {
+ case DMA_MEMCPY:
+ spe_desc_set_memcpy_src_addr(
+ group_start,
+ addr,
+ group_start->slot_cnt,
+ group_start->slots_per_op);
+ break;
+ case DMA_XOR:
+ spe_desc_set_xor_src_addr(
+ group_start,
+ index,
+ addr,
+ group_start->slot_cnt,
+ group_start->slots_per_op);
+ break;
+ /* todo: case DMA_ZERO_SUM: */
+ /* todo: case DMA_PQ_XOR: */
+ /* todo: case DMA_DUAL_XOR: */
+ /* todo: case DMA_PQ_UPDATE: */
+ /* todo: case DMA_PQ_ZERO_SUM: */
+ /* todo: case DMA_MEMCPY_CRC32C: */
+ case DMA_MEMSET:
+ default:
+ do {
+ struct spe_adma_chan *spe_chan =
+ to_spe_adma_chan(tx->chan);
+ printk(KERN_ERR "spe adma%d: unsupport tx_type: %d\n",
+ spe_chan->device->id, tx->type);
+ BUG();
+ } while (0);
+ }
+}
+
+static inline void spe_adma_schedule_cleanup(unsigned long id)
+{
+ tasklet_schedule(spe_adma_tasklet[id]);
+}
+
+static void spe_adma_dependency_added(struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+
+ spe_adma_schedule_cleanup(spe_chan->device->id);
+}
+
+static void spe_adma_free_chan_resources(struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ struct spe_adma_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ spe_adma_slot_cleanup(spe_chan);
+
+ spin_lock_bh(&spe_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &spe_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe_reverse(iter, _iter, &spe_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ spe_chan->slots_allocated--;
+ }
+ spe_chan->last_used = NULL;
+
+ PRINTK("spe adma%d %s slots_allocated %d\n", spe_chan->device->id,
+ __FUNCTION__, spe_chan->slots_allocated);
+ spin_unlock_bh(&spe_chan->lock);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+}
+
+/**
+ * spe_adma_is_complete - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ */
+static enum dma_status spe_adma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = spe_chan->completed_cookie;
+
+ if (done)
+ *done= last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS)
+ return ret;
+
+ spe_adma_slot_cleanup(spe_chan);
+
+ last_used = chan->cookie;
+ last_complete = spe_chan->completed_cookie;
+
+ if (done)
+ *done= last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/*
+ * End of transfer interrupt
+ */
+static irqreturn_t spe_adma_eot_handler(int irq, void *data)
+{
+ int id = *(int *) data;
+
+ PRINTK("spe adma%d: %s\n", id, __FUNCTION__);
+
+ tasklet_schedule(spe_adma_tasklet[id]);
+ spe_adma_device_clear_eot_status(spe_adma_chan_array[id]);
+
+ return IRQ_HANDLED;
+}
+
+static void spe_adma_issue_pending(struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+
+ PRINTK("spe adma%d: %s %d \n", spe_chan->device->id, __FUNCTION__,
+ spe_chan->pending);
+
+ if (spe_chan->pending) {
+ spe_chan->pending = 0;
+ spe_chan_append(spe_chan);
+ }
+}
+
+void spe_block_ch (struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+
+ spin_lock_bh(&spe_chan->lock);
+}
+
+void spe_unblock_ch (struct dma_chan *chan)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+
+ spin_unlock_bh(&spe_chan->lock);
+}
+
+static dma_addr_t spe_adma_map_page(struct dma_chan *chan, struct page *page,
+ unsigned long offset, size_t size,
+ int direction)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ return dma_map_page(&spe_chan->device->pdev->dev, page, offset, size,
+ direction);
+}
+
+static dma_addr_t spe_adma_map_single(struct dma_chan *chan, void *cpu_addr,
+ size_t size, int direction)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ return dma_map_single(&spe_chan->device->pdev->dev, cpu_addr, size,
+ direction);
+}
+
+static void spe_adma_unmap_page(struct dma_chan *chan, dma_addr_t handle,
+ size_t size, int direction)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ dma_unmap_page(&spe_chan->device->pdev->dev, handle, size, direction);
+}
+
+static void spe_adma_unmap_single(struct dma_chan *chan, dma_addr_t handle,
+ size_t size, int direction)
+{
+ struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
+ dma_unmap_single(&spe_chan->device->pdev->dev, handle, size, direction);
+}
+
+static int __devexit spe_adma_remove(struct platform_device *dev)
+{
+ struct spe_adma_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct spe_adma_chan *spe_chan;
+ int i;
+ struct spe_adma_platform_data *plat_data = dev->dev.platform_data;
+
+ PRINTK("%s\n", __FUNCTION__);
+
+ dma_async_device_unregister(&device->common);
+
+ for (i = 0; i < 3; i++) {
+ unsigned int irq;
+ irq = platform_get_irq(dev, i);
+ free_irq(irq, device);
+ }
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ do {
+ struct resource *res;
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ release_mem_region(res->start, res->end - res->start);
+ } while (0);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ spe_chan = to_spe_adma_chan(chan);
+ list_del(&chan->device_node);
+ kfree(spe_chan);
+ }
+ kfree(device);
+
+ return 0;
+}
+
+static int __devinit spe_adma_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int ret=0, irq_eot=0, irq;
+ struct spe_adma_device *adev;
+ struct spe_adma_chan *spe_chan;
+ struct spe_adma_platform_data *plat_data = pdev->dev.platform_data;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ if (!request_mem_region(res->start, res->end - res->start, pdev->name))
+ return -EBUSY;
+
+ if ((adev = kzalloc(sizeof(*adev), GFP_KERNEL)) == NULL) {
+ ret = -ENOMEM;
+ goto err_adev_alloc;
+ }
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly drain the write buffer
+ */
+ if ((adev->dma_desc_pool_virt = dma_alloc_coherent(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL)) == NULL) {
+ ret = -ENOMEM;
+ goto err_dma_alloc;
+ }
+
+ PRINTK("%s: allocted descriptor pool virt %p phys %p\n",
+ __FUNCTION__, adev->dma_desc_pool_virt, (void *) adev->dma_desc_pool);
+
+ adev->id = plat_data->hw_id;
+ adev->common.capabilities = plat_data->capabilities;
+
+ /* clear errors before enabling interrupts */
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = -ENXIO;
+ } else {
+ irq_eot = irq;
+ ret = request_irq(irq, spe_adma_eot_handler,
+ 0, pdev->name, &adev->id);
+ if (ret) {
+ ret = -EIO;
+ goto err_irq0;
+ }
+ }
+
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ INIT_LIST_HEAD(&adev->common.channels);
+
+ /* set base routines */
+ adev->common.device_tx_submit = spe_adma_tx_submit;
+ adev->common.device_set_dest = spe_adma_set_dest;
+ adev->common.device_set_src = spe_adma_set_src;
+ adev->common.device_alloc_chan_resources = spe_adma_alloc_chan_resources;
+ adev->common.device_free_chan_resources = spe_adma_free_chan_resources;
+ adev->common.device_is_tx_complete = spe_adma_is_complete;
+ adev->common.device_issue_pending = spe_adma_issue_pending;
+ adev->common.device_dependency_added = spe_adma_dependency_added;
+
+ adev->common.map_page = spe_adma_map_page;
+ adev->common.map_single = spe_adma_map_single;
+ adev->common.unmap_page = spe_adma_unmap_page;
+ adev->common.unmap_single = spe_adma_unmap_single;
+
+ /* set prep routines based on capability */
+ if (test_bit(DMA_MEMCPY, &adev->common.capabilities))
+ adev->common.device_prep_dma_memcpy = spe_adma_prep_dma_memcpy;
+ if (test_bit(DMA_XOR, &adev->common.capabilities)) {
+ adev->common.max_xor = spe_adma_get_max_xor();
+ adev->common.device_prep_dma_xor = spe_adma_prep_dma_xor;
+ }
+ if (test_bit(DMA_INTERRUPT, &adev->common.capabilities))
+ adev->common.device_prep_dma_interrupt =
+ spe_adma_prep_dma_interrupt;
+
+ if ((spe_chan = kzalloc(sizeof(struct spe_adma_chan), GFP_KERNEL)) == NULL) {
+ ret = -ENOMEM;
+ goto err_chan_alloc;
+ }
+
+ spe_adma_chan_array[adev->id] = spe_chan;
+
+ spe_chan->device = adev;
+ spin_lock_init(&spe_chan->lock);
+ init_timer(&spe_chan->cleanup_watchdog);
+ spe_chan->cleanup_watchdog.data = adev->id;
+ spe_chan->cleanup_watchdog.function = spe_adma_schedule_cleanup;
+ INIT_LIST_HEAD(&spe_chan->chain);
+ INIT_LIST_HEAD(&spe_chan->all_slots);
+ INIT_RCU_HEAD(&spe_chan->common.rcu);
+ spe_chan->common.device = &adev->common;
+ list_add_tail(&spe_chan->common.device_node, &adev->common.channels);
+
+ printk(KERN_INFO "Intel(R) SPE ADMA Engine found [%d]: "
+ "( %s%s%s%s%s%s%s%s%s%s)\n",
+ adev->id,
+ test_bit(DMA_PQ_XOR, &adev->common.capabilities) ? "pq_xor " : "",
+ test_bit(DMA_PQ_UPDATE, &adev->common.capabilities) ? "pq_update " : "",
+ test_bit(DMA_PQ_ZERO_SUM, &adev->common.capabilities) ? "pq_zero_sum " : "",
+ test_bit(DMA_XOR, &adev->common.capabilities) ? "xor " : "",
+ test_bit(DMA_DUAL_XOR, &adev->common.capabilities) ? "dual_xor " : "",
+ test_bit(DMA_ZERO_SUM, &adev->common.capabilities) ? "xor_zero_sum " : "",
+ test_bit(DMA_MEMSET, &adev->common.capabilities) ? "memset " : "",
+ test_bit(DMA_MEMCPY_CRC32C, &adev->common.capabilities) ? "memcpy+crc " : "",
+ test_bit(DMA_MEMCPY, &adev->common.capabilities) ? "memcpy " : "",
+ test_bit(DMA_INTERRUPT, &adev->common.capabilities) ? "int " : "");
+
+ dma_async_device_register(&adev->common);
+ goto out;
+
+err_chan_alloc:
+err_irq0:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+err_dma_alloc:
+ kfree(adev);
+err_adev_alloc:
+ release_mem_region(res->start, res->end - res->start);
+out:
+ return ret;
+}
+
+static char src1[16], src2[16], dst[16];
+
+static void spe_chan_start_null_xor(struct spe_adma_chan *spe_chan)
+{
+ struct spe_adma_desc_slot *sw_desc, *group_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ PRINTK("spe adma%d: %s\n", spe_chan->device->id, __FUNCTION__);
+
+ spin_lock_bh(&spe_chan->lock);
+ slot_cnt = spe_chan_xor_slot_count(0, 2, &slots_per_op);
+ sw_desc = spe_adma_alloc_slots(spe_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ list_splice_init(&sw_desc->group_list, &spe_chan->chain);
+ sw_desc->async_tx.ack = 1;
+ spe_desc_init_null_xor(group_start, 2, 0);
+ spe_desc_set_byte_count(group_start, spe_chan, 16);
+ spe_desc_set_dest_addr(group_start, spe_chan, __pa(dst));
+ spe_desc_set_xor_src_addr(group_start, 0, __pa(src1), 1, 1);
+ spe_desc_set_xor_src_addr(group_start, 1, __pa(src2), 1, 1);
+
+ cookie = spe_chan->common.cookie;
+ cookie++;
+ if (cookie <= 1)
+ cookie = 2;
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ spe_chan->completed_cookie = cookie - 1;
+ spe_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+ /* channel should not be busy */
+ BUG_ON(spe_chan_is_busy(spe_chan));
+
+ /* disable operation */
+ spe_chan_disable(spe_chan);
+
+ /* set the descriptor address */
+ spe_chan_set_next_descriptor(spe_chan, sw_desc);
+
+ /* run the descriptor */
+ spe_chan_enable(spe_chan);
+ } else
+ printk(KERN_ERR "spe adma%d failed to allocate null descriptor\n",
+ spe_chan->device->id);
+ spin_unlock_bh(&spe_chan->lock);
+}
+
+static struct platform_driver spe_adma_driver = {
+ .probe = spe_adma_probe,
+ .remove = spe_adma_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "SPE-ADMA",
+ },
+};
+
+static int __init spe_adma_init (void)
+{
+ /* it's currently unsafe to unload this module */
+ /* if forced, worst case is that rmmod hangs */
+ __unsafe(THIS_MODULE);
+
+ return platform_driver_register(&spe_adma_driver);
+}
+
+static void __exit spe_adma_exit (void)
+{
+ platform_driver_unregister(&spe_adma_driver);
+ return;
+}
+
+module_init(spe_adma_init);
+module_exit(spe_adma_exit);
+
+MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>");
+MODULE_DESCRIPTION("SPE ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/asm-ppc/adma.h b/include/asm-ppc/adma.h
new file mode 100644
index 0000000..0be88f1
--- /dev/null
+++ b/include/asm-ppc/adma.h
@@ -0,0 +1,715 @@
+/*
+ * include/asm/ppc440spe_adma.h
+ *
+ * 2006 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#ifndef PPC440SPE_ADMA_H
+#define PPC440SPE_ADMA_H
+
+#include <linux/types.h>
+#include <asm/ppc440spe_dma.h>
+#include <asm/ppc440spe_xor.h>
+
+#define SPE_ADMA_SLOT_SIZE sizeof(struct spe_adma_desc_slot)
+#define SPE_ADMA_THRESHOLD 5
+
+#define PPC440SPE_DMA0_ID 0
+#define PPC440SPE_DMA1_ID 1
+#define PPC440SPE_XOR_ID 2
+
+#define SPE_DESC_INT (1<<1)
+#define SPE_DESC_PROCESSED (1<<2)
+
+#define SPE_ADMA_XOR_MAX_BYTE_COUNT (1 << 31) /* this is the XOR_CBBCR width */
+#define SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT SPE_ADMA_XOR_MAX_BYTE_COUNT
+
+#undef ADMA_LL_DEBUG
+
+/**
+ * struct spe_adma_device - internal representation of an ADMA device
+ * @pdev: Platform device
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct spe_adma_device {
+ struct platform_device *pdev;
+ void *dma_desc_pool_virt;
+
+ int id;
+ dma_addr_t dma_desc_pool;
+ struct dma_device common;
+};
+
+/**
+ * struct spe_adma_device - internal representation of an ADMA device
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @device: parent device
+ * @chain: device chain view of the descriptors
+ * @common: common dmaengine channel object members
+ * @all_slots: complete domain of slots usable by the channel
+ * @pending: allows batching of hardware operations
+ * @result_accumulator: allows zero result sums of buffers > the hw maximum
+ * @zero_sum_group: flag to the clean up routine to collect zero sum results
+ * @completed_cookie: identifier for the most recently completed operation
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ */
+struct spe_adma_chan {
+ spinlock_t lock;
+ struct spe_adma_device *device;
+ struct timer_list cleanup_watchdog;
+ struct list_head chain;
+ struct dma_chan common;
+ struct list_head all_slots;
+ struct spe_adma_desc_slot *last_used;
+ int pending;
+ u8 result_accumulator;
+ u8 zero_sum_group;
+ dma_cookie_t completed_cookie;
+ int slots_allocated;
+};
+
+struct spe_adma_desc_slot {
+ dma_addr_t phys;
+ struct spe_adma_desc_slot *group_head, *hw_next;
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head slot_node;
+ struct list_head chain_node; /* node in channel ops list */
+ struct list_head group_list; /* list */
+ unsigned int unmap_len;
+ unsigned int unmap_src_cnt;
+ dma_cookie_t cookie;
+ void *hw_desc;
+ u16 stride;
+ u16 idx;
+ u16 slot_cnt;
+ u8 src_cnt;
+ u8 slots_per_op;
+ unsigned long flags;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+};
+
+struct spe_adma_platform_data {
+ int hw_id;
+ unsigned long capabilities;
+ size_t pool_size;
+};
+
+static u32 xor_refetch = 0;
+static struct spe_adma_desc_slot *last_sub[2] = { NULL, NULL };
+
+#ifdef ADMA_LL_DEBUG
+static void print_dma_desc (struct spe_adma_desc_slot *desc)
+{
+ dma_cdb_t *p = desc->hw_desc;
+
+ printk( "**************************\n"
+ "%s: CDB at %p (phys %x)\n"
+ "DMA OpCode=0x%x\n"
+ "Upper Half of SG1 Address=0x%x\n"
+ "Lower Half of SG1 Address=0x%x\n"
+ "SG (Scatter/Gather) Count=%x\n"
+ "Upper Half of SG2 Address=0x%x\n"
+ "Lower Half of SG2 Address=0x%x\n"
+ "Upper Half of SG3 Address=0x%x\n"
+ "Lower Half of SG3 Address=0x%x\n",
+ __FUNCTION__, p, desc->phys,
+ cpu_to_le32(p->opc),
+ cpu_to_le32(p->sg1u), cpu_to_le32(p->sg1l),
+ cpu_to_le32(p->cnt),
+ cpu_to_le32(p->sg2u), cpu_to_le32(p->sg2l),
+ cpu_to_le32(p->sg3u), cpu_to_le32(p->sg3l)
+ );
+}
+
+
+static void print_xor_desc (struct spe_adma_desc_slot *desc)
+{
+ xor_cb_t *p = desc->hw_desc;
+ int i;
+
+ printk( "**************************\n"
+ "%s(%p) [phys %x]\n"
+ "XOR0_CBCR=%x; XOR0_CBBCR=%x; XOR0_CBSR=%x;\n"
+ "XOR0_CBTAH=%x; XOR0_CBTAL=%x; XOR0_CBLAL=%x;\n",
+ __FUNCTION__, p, (u32)(desc->phys),
+ p->cbc, p->cbbc, p->cbs,
+ p->cbtah, p->cbtal, p->cblal
+ );
+ for (i=0; i < 16; i++) {
+ printk("Operand[%d]=%x; ", i, p->ops[i]);
+ if (i && !(i%3))
+ printk("\n");
+ }
+}
+
+static void print_xor_chain (xor_cb_t *p)
+{
+ int i;
+
+ do {
+ printk( "####### \n"
+ "%s(%p) [phys %x]\n"
+ "XOR0_CBCR=%x; XOR0_CBBCR=%x; XOR0_CBSR=%x;\n"
+ "XOR0_CBTAH=%x; XOR0_CBTAL=%x; XOR0_CBLAL=%x;\n",
+ __FUNCTION__, p, (u32)__pa(p),
+ p->cbc, p->cbbc, p->cbs,
+ p->cbtah, p->cbtal, p->cblal
+ );
+ for (i=0; i < 16; i++) {
+ printk("Operand[%d]=%x; ", i, p->ops[i]);
+ if (i && !(i%3))
+ printk("\n");
+ }
+
+ if (!p->cblal)
+ break;
+ p = __va(p->cblal);
+ } while (p);
+}
+
+static void print_xor_regs (struct spe_adma_chan *spe_chan)
+{
+ volatile xor_regs_t *p = (xor_regs_t *)spe_chan->device->pdev->resource[0].start;
+
+ printk("------ regs -------- \n");
+ printk( "\tcbcr=%x; cbbcr=%x; cbsr=%x;\n"
+ "\tcblalr=%x;crsr=%x;crrr=%x;\n"
+ "\tccbalr=%x;ier=%x;sr=%x\n"
+ "\tplbr=%x;cbtalr=%x\n"
+ "\top1=%x;op2=%x;op3=%x\n",
+ in_be32(&p->cbcr), in_be32(&p->cbbcr),in_be32(&p->cbsr),
+ in_be32(&p->cblalr),in_be32(&p->crsr),in_be32(&p->crrr),
+ in_be32(&p->ccbalr),in_be32(&p->ier),in_be32(&p->sr),
+ in_be32(&p->plbr),in_be32(&p->cbtalr),
+ p->op_ar[0][1], p->op_ar[1][1], p->op_ar[2][1]);
+}
+#endif
+
+static inline int spe_chan_interrupt_slot_count (int *slots_per_op, struct spe_adma_chan *chan)
+{
+ *slots_per_op = 1;
+ return *slots_per_op;
+}
+
+static inline void spe_desc_init_interrupt (struct spe_adma_desc_slot *desc, struct spe_adma_chan *chan)
+{
+ xor_cb_t *p;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ printk("%s is not supported for chan %d\n", __FUNCTION__,
+ chan->device->id);
+ break;
+ case PPC440SPE_XOR_ID:
+ p = desc->hw_desc;
+ memset (desc->hw_desc, 0, sizeof(xor_cb_t));
+ p->cbc = XOR_CBCR_CBCE_BIT; /* NOP */
+ break;
+ }
+}
+
+static inline void spe_adma_device_clear_eot_status (struct spe_adma_chan *chan)
+{
+ volatile dma_regs_t *dma_reg;
+ volatile xor_regs_t *xor_reg;
+ u32 rv;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* read FIFO to ack */
+ dma_reg = (dma_regs_t *)chan->device->pdev->resource[0].start;
+ rv = le32_to_cpu(dma_reg->csfpl);
+ if (!rv) {
+ printk ("%s: CSFPL is NULL\n", __FUNCTION__);
+ }
+ break;
+ case PPC440SPE_XOR_ID:
+ /* reset status bit to ack*/
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+ rv = in_be32(&xor_reg->sr);
+ /* clear status */
+ out_be32(&xor_reg->sr, rv);
+
+ if (!(xor_reg->sr & XOR_SR_XCP_BIT) && xor_refetch) {
+ xor_reg->crsr = XOR_CRSR_RCBE_BIT;
+ xor_refetch = 0;
+ }
+
+ break;
+ }
+}
+
+static inline u32 spe_adma_get_max_xor (void)
+{
+ return 16;
+}
+
+static inline u32 spe_chan_get_current_descriptor(struct spe_adma_chan *chan)
+{
+ int id = chan->device->id;
+ volatile dma_regs_t *dma_reg;
+ volatile xor_regs_t *xor_reg;
+
+ switch (id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = (dma_regs_t *)chan->device->pdev->resource[0].start;
+ return (le32_to_cpu(dma_reg->acpl)) & (~0xF);
+ case PPC440SPE_XOR_ID:
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+ return xor_reg->ccbalr;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static inline void spe_desc_init_null_xor(struct spe_adma_desc_slot *desc,
+ int src_cnt, int unknown_param)
+{
+ xor_cb_t *hw_desc = desc->hw_desc;
+
+ desc->src_cnt = 0;
+ hw_desc->cbc = src_cnt; /* NOP ? */
+ hw_desc->cblal = 0;
+}
+
+static inline void spe_chan_set_next_descriptor(struct spe_adma_chan *chan,
+ struct spe_adma_desc_slot *next_desc)
+{
+ int id = chan->device->id;
+ volatile xor_regs_t *xor_reg;
+ unsigned long flags;
+
+ switch (id) {
+ case PPC440SPE_XOR_ID:
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+
+ /* Set Link Address and mark that it's valid */
+ local_irq_save(flags);
+ while (xor_reg->sr & XOR_SR_XCP_BIT);
+ xor_reg->cblalr = next_desc->phys;
+ local_irq_restore(flags);
+ break;
+ }
+}
+
+static inline int spe_chan_is_busy(struct spe_adma_chan *chan)
+{
+ int id = chan->device->id, busy;
+ volatile xor_regs_t *xor_reg;
+ volatile dma_regs_t *dma_reg;
+
+ switch (id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = (dma_regs_t *)chan->device->pdev->resource[0].start;
+ /* if command FIFO's head and tail pointers are equal -
+ * channel is free
+ */
+ busy = (dma_reg->cpfhp != dma_reg->cpftp) ? 1 : 0;
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+ busy = (xor_reg->sr & XOR_SR_XCP_BIT) ? 1 : 0;
+ break;
+ default:
+ busy = 0;
+ BUG();
+ }
+
+ return busy;
+}
+
+static inline int spe_desc_is_aligned(struct spe_adma_desc_slot *desc,
+ int num_slots)
+{
+ return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/* to do: support large (i.e. > hw max) buffer sizes */
+static inline int spe_chan_memcpy_slot_count(size_t len, int *slots_per_op)
+{
+ *slots_per_op = 1;
+ return 1;
+}
+
+static inline int ppc440spe_xor_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ /* Each XOR descriptor provides up to 16 source operands */
+ *slots_per_op = (src_cnt + 15)/16;
+ return *slots_per_op;
+}
+
+static inline int spe_chan_xor_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ /* Number of slots depends on
+ * - the number of operators
+ * - the operator width (len)
+ * the maximum <len> may be 4K since the StripeHead size is PAGE_SIZE, so
+ * if we'll use this driver for RAID purposes only we'll assume this maximum
+ */
+ int slot_cnt = ppc440spe_xor_slot_count(len, src_cnt, slots_per_op);
+
+ if (likely(len <= SPE_ADMA_XOR_MAX_BYTE_COUNT))
+ return slot_cnt;
+
+ printk("%s: len %d > max %d !!\n", __FUNCTION__, len, SPE_ADMA_XOR_MAX_BYTE_COUNT);
+ BUG();
+ return slot_cnt;
+}
+
+static inline u32 spe_desc_get_dest_addr(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *chan)
+{
+ dma_cdb_t *dma_hw_desc;
+ xor_cb_t *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ return le32_to_cpu(dma_hw_desc->sg2l);
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->cbtal;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static inline u32 spe_desc_get_byte_count(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *chan)
+{
+ dma_cdb_t *dma_hw_desc;
+ xor_cb_t *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ return le32_to_cpu(dma_hw_desc->cnt);
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->cbbc;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static inline u32 spe_desc_get_src_addr(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *chan,
+ int src_idx)
+{
+ dma_cdb_t *dma_hw_desc;
+ xor_cb_t *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ return le32_to_cpu(dma_hw_desc->sg1l);
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->ops[src_idx];
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static inline void spe_xor_desc_set_src_addr(xor_cb_t *hw_desc,
+ int src_idx, dma_addr_t addr)
+{
+ out_be32(&hw_desc->ops[src_idx], addr);
+}
+
+static inline void spe_desc_init_memcpy(struct spe_adma_desc_slot *desc,
+ int int_en)
+{
+ dma_cdb_t *hw_desc = desc->hw_desc;
+
+ memset (desc->hw_desc, 0, sizeof(dma_cdb_t));
+
+ if (int_en)
+ desc->flags |= SPE_DESC_INT;
+ else
+ desc->flags &= ~SPE_DESC_INT;
+
+ desc->src_cnt = 1;
+ hw_desc->opc = cpu_to_le32(1<<24);
+}
+
+static inline void spe_desc_init_xor(struct spe_adma_desc_slot *desc,
+ int src_cnt,
+ int int_en)
+{
+ xor_cb_t *hw_desc;
+
+ memset (desc->hw_desc, 0, sizeof(xor_cb_t));
+
+ desc->src_cnt = src_cnt;
+ hw_desc = desc->hw_desc;
+ hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
+ if (int_en)
+ hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+static inline void spe_desc_set_byte_count(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *chan,
+ u32 byte_count)
+{
+ dma_cdb_t *dma_hw_desc;
+ xor_cb_t *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ dma_hw_desc->cnt = cpu_to_le32(byte_count);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->cbbc = byte_count;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void spe_desc_set_dest_addr(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *chan,
+ dma_addr_t addr)
+{
+ dma_cdb_t *dma_hw_descr;
+ xor_cb_t *xor_hw_descr;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_descr = desc->hw_desc;
+ dma_hw_descr->sg2l = cpu_to_le32(addr);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_descr = desc->hw_desc;
+ xor_hw_descr->cbtal = addr;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void spe_desc_set_memcpy_src_addr(struct spe_adma_desc_slot *desc,
+ dma_addr_t addr, int slot_cnt,
+ int slots_per_op)
+{
+ dma_cdb_t *hw_desc = desc->hw_desc;
+ hw_desc->sg1l = cpu_to_le32(addr);
+}
+
+static inline void spe_desc_set_xor_src_addr(struct spe_adma_desc_slot *desc,
+ int src_idx, dma_addr_t addr, int slot_cnt,
+ int slots_per_op)
+{
+ xor_cb_t *hw_desc = desc->hw_desc;
+
+ if (unlikely(slot_cnt != 1)) {
+ printk("%s: slot cnt = %d !!! \n", __FUNCTION__, slot_cnt);
+ BUG();
+ }
+
+ hw_desc->ops[src_idx] = addr;
+}
+
+static inline void spe_desc_set_next_desc(struct spe_adma_desc_slot *prev_desc,
+ struct spe_adma_chan *chan,
+ struct spe_adma_desc_slot *next_desc)
+{
+ volatile xor_cb_t *xor_hw_desc;
+ volatile xor_regs_t *xor_reg;
+ unsigned long flags;
+
+ if (!prev_desc)
+ return;
+
+ prev_desc->hw_next = next_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ break;
+ case PPC440SPE_XOR_ID:
+
+ next_desc->flags |= (1<<16);
+ next_desc->flags &= ~(1<<17);
+
+ /* bind descriptor to the chain */
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+
+ /* modify link fields */
+ local_irq_save(flags);
+
+ xor_hw_desc = next_desc->hw_desc;
+ xor_hw_desc->cblal = 0;
+ xor_hw_desc->cbc &= ~XOR_CBCR_LNK_BIT;
+
+ xor_hw_desc = prev_desc->hw_desc;
+ xor_hw_desc->cbs = 0;
+ xor_hw_desc->cblal = next_desc->phys;
+ xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
+
+ local_irq_restore(flags);
+
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline u32 spe_desc_get_next_desc(struct spe_adma_desc_slot *desc,
+ struct spe_adma_chan *chan)
+{
+ volatile xor_cb_t *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (desc->hw_next)
+ return desc->hw_next->phys;
+ return 0;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ return xor_hw_desc->cblal;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static inline void spe_chan_append(struct spe_adma_chan *chan)
+{
+ volatile dma_regs_t *dma_reg;
+ volatile xor_regs_t *xor_reg;
+ struct spe_adma_desc_slot *iter;
+ int id = chan->device->id;
+ u32 cur_desc;
+ unsigned long flags;
+
+ switch (id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = (dma_regs_t *)chan->device->pdev->resource[0].start;
+ cur_desc = spe_chan_get_current_descriptor(chan);
+ if (likely(cur_desc)) {
+ /* flush descriptors from queue to fifo */
+ iter = last_sub[chan->device->id];
+ if (!iter->hw_next)
+ return;
+
+ local_irq_save(flags);
+ list_for_each_entry_continue(iter, &chan->chain, chain_node) {
+ cur_desc = iter->phys;
+ if (!list_empty(&iter->async_tx.depend_list)) {
+ iter->flags |= SPE_DESC_INT;
+ }
+
+ out_le32 (&dma_reg->cpfpl, cur_desc);
+ if (!iter->hw_next)
+ break;
+ }
+ last_sub[chan->device->id] = iter;
+ local_irq_restore(flags);
+ } else {
+ /* first peer */
+ cur_desc = chan->last_used->phys;
+ last_sub[chan->device->id] = chan->last_used;
+ if (!(chan->last_used->flags & SPE_DESC_INT))
+ cur_desc |= 1 << 3;
+ out_le32 (&dma_reg->cpfpl, cur_desc);
+ }
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+ local_irq_save(flags);
+
+ /* update current descriptor and refetch link */
+ if (!(xor_reg->sr & XOR_SR_XCP_BIT)) {
+ xor_reg->crsr = XOR_CRSR_RCBE_BIT;
+ } else {
+ xor_refetch = 1;
+ }
+
+ local_irq_restore(flags);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void spe_chan_disable(struct spe_adma_chan *chan)
+{
+ int id = chan->device->id;
+ volatile xor_regs_t *xor_reg;
+
+ switch (id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+ xor_reg->crsr = XOR_CRSR_PAUS_BIT;
+
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void spe_chan_enable(struct spe_adma_chan *chan)
+{
+ int id = chan->device->id;
+ volatile xor_regs_t *xor_reg;
+ unsigned long flags;
+
+ switch (id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* always enable, do nothing */
+ break;
+ case PPC440SPE_XOR_ID:
+ /* drain write buffer */
+ xor_reg = (xor_regs_t *)chan->device->pdev->resource[0].start;
+
+ local_irq_save(flags);
+ xor_reg->crrr = XOR_CRSR_PAUS_BIT;
+ /* fetch descriptor pointed in <link> */
+ xor_reg->crrr = XOR_CRSR_64BA_BIT;
+ xor_reg->crsr = XOR_CRSR_XAE_BIT;
+ local_irq_restore(flags);
+
+ break;
+ default:
+ BUG();
+ }
+}
+
+#endif /* PPC440SPE_ADMA_H */
diff --git a/include/asm-ppc/ppc440spe_dma.h b/include/asm-ppc/ppc440spe_dma.h
new file mode 100644
index 0000000..e04c512
--- /dev/null
+++ b/include/asm-ppc/ppc440spe_dma.h
@@ -0,0 +1,214 @@
+/*
+ * include/asm/ppc440spe_dma.h
+ *
+ * 440SPe's DMA engines support header file
+ *
+ * 2006 (c) DENX Software Engineering
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef PPC440SPE_DMA_H
+#define PPC440SPE_DMA_H
+
+#include <asm/types.h>
+
+/* Number of elements in the array with statical CDBs */
+#define MAX_STAT_DMA_CDBS 16
+/* Number of DMA engines available on the contoller */
+#define DMA_ENGINES_NUM 2
+
+/* FIFO's params */
+#define DMA0_FIFO_SIZE 0x1000
+#define DMA1_FIFO_SIZE 0x1000
+
+/* DMA Opcodes */
+#define DMA_NOP_OPC (u8)(0x00)
+#define DMA_MOVE_SG1_SF2_OPC (u8)(0x01)
+#define DMA_MULTICAST_OPC (u8)(0x05)
+
+/* I2O Memory Mapped Registers base address */
+#define I2O_MMAP_BASE 0x400100000ULL
+#define I2O_MMAP_SIZE 0xF4ULL
+
+/* DMA Memory Mapped Registers base address */
+#define DMA0_MMAP_BASE 0x400100100ULL
+#define DMA1_MMAP_BASE 0x400100200ULL
+#define DMA_MMAP_SIZE 0x80
+
+/* DMA Interrupt Sources, UIC0[20],[22] */
+#define DMA0_CP_FIFO_NEED_SERVICE 19
+#define DMA0_CS_FIFO_NEED_SERVICE 20
+#define DMA1_CP_FIFO_NEED_SERVICE 21
+#define DMA1_CS_FIFO_NEED_SERVICE 22
+
+/*UIC0:*/
+#define D0CPF_INT (1<<12)
+#define D0CSF_INT (1<<11)
+#define D1CPF_INT (1<<10)
+#define D1CSF_INT (1<<9)
+/*UIC1:*/
+#define DMAE_INT (1<<9)
+
+
+/*
+ * DMAx engines Command Descriptor Block Type
+ */
+typedef struct dma_cdb {
+ /*
+ * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf)
+ */
+ u32 opc; /* opcode */
+#if 0
+ u8 pad0[2]; /* reserved */
+ u8 attr; /* attributes */
+ u8 opc; /* opcode */
+#endif
+ u32 sg1u; /* upper SG1 address */
+ u32 sg1l; /* lower SG1 address */
+ u32 cnt; /* SG count, 3B used */
+ u32 sg2u; /* upper SG2 address */
+ u32 sg2l; /* lower SG2 address */
+ u32 sg3u; /* upper SG3 address */
+ u32 sg3l; /* lower SG3 address */
+} dma_cdb_t;
+
+/*
+ * Descriptor of allocated CDB
+ */
+typedef struct {
+ dma_cdb_t *vaddr; /* virtual address of CDB */
+ dma_addr_t paddr; /* physical address of CDB */
+ /*
+ * Additional fields
+ */
+ struct list_head link; /* link in processing list */
+ u32 status; /* status of the CDB */
+ /* status bits: */
+ #define DMA_CDB_DONE (1<<0) /* CDB processing competed */
+ #define DMA_CDB_CANCEL (1<<1) /* waiting thread was interrupted */
+#if 0
+ #define DMA_CDB_STALLOC (1<<2) /* CDB allocated dynamically */
+
+ /*
+ * Each CDB must be 16B-alligned, if we use static array we should
+ * take care of aligment for each array's element.
+ */
+ u8 pad1[1];
+#endif
+} dma_cdbd_t;
+
+/*
+ * DMAx hardware registers (p.515 in 440SPe UM 1.22)
+ */
+typedef struct {
+ u32 cpfpl;
+ u32 cpfph;
+ u32 csfpl;
+ u32 csfph;
+ u32 dsts;
+ u32 cfg;
+ u8 pad0[0x8];
+ u16 cpfhp;
+ u16 cpftp;
+ u16 csfhp;
+ u16 csftp;
+ u8 pad1[0x8];
+ u32 acpl;
+ u32 acph;
+ u32 s1bpl;
+ u32 s1bph;
+ u32 s2bpl;
+ u32 s2bph;
+ u32 s3bpl;
+ u32 s3bph;
+ u8 pad2[0x10];
+ u32 earl;
+ u32 earh;
+ u8 pad3[0x8];
+ u32 seat;
+ u32 sead;
+ u32 op;
+ u32 fsiz;
+} dma_regs_t;
+
+/*
+ * I2O hardware registers (p.528 in 440SPe UM 1.22)
+ */
+typedef struct {
+ u32 ists;
+ u32 iseat;
+ u32 isead;
+ u8 pad0[0x14];
+ u32 idbel;
+ u8 pad1[0xc];
+ u32 ihis;
+ u32 ihim;
+ u8 pad2[0x8];
+ u32 ihiq;
+ u32 ihoq;
+ u8 pad3[0x8];
+ u32 iopis;
+ u32 iopim;
+ u32 iopiq;
+ u8 iopoq;
+ u8 pad4[3];
+ u16 iiflh;
+ u16 iiflt;
+ u16 iiplh;
+ u16 iiplt;
+ u16 ioflh;
+ u16 ioflt;
+ u16 ioplh;
+ u16 ioplt;
+ u32 iidc;
+ u32 ictl;
+ u32 ifcpp;
+ u8 pad5[0x4];
+ u16 mfac0;
+ u16 mfac1;
+ u16 mfac2;
+ u16 mfac3;
+ u16 mfac4;
+ u16 mfac5;
+ u16 mfac6;
+ u16 mfac7;
+ u16 ifcfh;
+ u16 ifcht;
+ u8 pad6[0x4];
+ u32 iifmc;
+ u32 iodb;
+ u32 iodbc;
+ u32 ifbal;
+ u32 ifbah;
+ u32 ifsiz;
+ u32 ispd0;
+ u32 ispd1;
+ u32 ispd2;
+ u32 ispd3;
+ u32 ihipl;
+ u32 ihiph;
+ u32 ihopl;
+ u32 ihoph;
+ u32 iiipl;
+ u32 iiiph;
+ u32 iiopl;
+ u32 iioph;
+ u32 ifcpl;
+ u32 ifcph;
+ u8 pad7[0x8];
+ u32 iopt;
+} i2o_regs_t;
+
+/*
+ * Prototypes
+ */
+int dma_copy (char *dst,char *src, unsigned int data_sz);
+
+
+#endif /* PPC440SPE_DMA_H */
+
diff --git a/include/asm-ppc/ppc440spe_xor.h b/include/asm-ppc/ppc440spe_xor.h
new file mode 100644
index 0000000..fa135d7
--- /dev/null
+++ b/include/asm-ppc/ppc440spe_xor.h
@@ -0,0 +1,131 @@
+/*
+ * include/asm/ppc440spe_xor.h
+ *
+ * 440SPe's XOR engines support header file
+ *
+ * 2006 (c) DENX Software Engineering
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef PPC440SPE_XOR_H
+#define PPC440SPE_XOR_H
+
+#include <asm/types.h>
+
+/* XOR Memory Mapped Registers base address */
+#define XOR_MMAP_BASE 0x400200000ULL
+#define XOR_MMAP_SIZE 0x224ULL
+
+/* XOR Interrupt Source, UIC1[31] */
+#define XOR_INTERRUPT 63
+
+/*
+ * XOR Command Block Control Register bits
+ */
+#define XOR_CBCR_LNK_BIT (1<<31) /* link present */
+#define XOR_CBCR_TGT_BIT (1<<30) /* target present */
+#define XOR_CBCR_CBCE_BIT (1<<29) /* command block compete enable */
+#define XOR_CBCR_RNZE_BIT (1<<28) /* result not zero enable */
+#define XOR_CBCR_XNOR_BIT (1<<15) /* XOR/XNOR */
+
+/*
+ * XORCore Status Register bits
+ */
+#define XOR_SR_XCP_BIT (1<<31) /* core processing */
+#define XOR_SR_ICB_BIT (1<<17) /* invalid CB */
+#define XOR_SR_IC_BIT (1<<16) /* invalid command */
+#define XOR_SR_IPE_BIT (1<<15) /* internal parity error */
+#define XOR_SR_RNZ_BIT (1<<2) /* result not Zero */
+#define XOR_SR_CBC_BIT (1<<1) /* CB complete */
+#define XOR_SR_CBLC_BIT (1<<0) /* CB list complete */
+
+/*
+ * XORCore Control Set and Reset Register bits
+ */
+#define XOR_CRSR_XASR_BIT (1<<31) /* soft reset */
+#define XOR_CRSR_XAE_BIT (1<<30) /* enable */
+#define XOR_CRSR_RCBE_BIT (1<<29) /* refetch CB enable */
+#define XOR_CRSR_PAUS_BIT (1<<28) /* pause */
+#define XOR_CRSR_64BA_BIT (1<<27) /* 64/32 CB format */
+#define XOR_CRSR_CLP_BIT (1<<25) /* continue list processing */
+
+/*
+ * XORCore Interrupt Enable Register
+ */
+#define XOR_IE_CBCIE_BIT (1<<1) /* CB complete interrupt enable */
+#define XOR_IE_CBLCI_BIT (1<<0) /* CB list complete interrupt enable */
+
+/*
+ * XOR Accelerator engine Command Block Type
+ */
+typedef struct {
+ /*
+ * Basic 32-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf)
+ */
+ u32 cbc; /* control */
+ u32 cbbc; /* byte count */
+ u32 cbs; /* status */
+ u8 pad0[4]; /* reserved */
+ u32 cbtah; /* target address high */
+ u32 cbtal; /* target address low */
+ u8 pad1[4]; /* reserved */
+ u32 cblal; /* link address low */
+ u32 ops[16]; /* operands addresses */
+} __attribute__ ((packed)) xor_cb_t;
+
+typedef struct {
+ xor_cb_t *vaddr;
+ dma_addr_t paddr;
+
+ /*
+ * Additional fields
+ */
+ struct list_head link; /* link to processing CBs */
+ u32 status; /* status of the CB */
+ /* status bits: */
+ #define XOR_CB_DONE (1<<0) /* CB processing competed */
+ #define XOR_CB_CANCEL (1<<1) /* waiting thread was interrupted */
+#if 0
+ #define XOR_CB_STALLOC (1<<2) /* CB allocated statically */
+#endif
+} xor_cbd_t;
+
+
+/*
+ * XOR hardware registers Table 19-3, UM 1.22
+ */
+typedef struct {
+ u32 op_ar[16][2]; /* operand address[0]-high,[1]-low registers */
+ u8 pad0[352]; /* reserved */
+ u32 cbcr; /* CB control register */
+ u32 cbbcr; /* CB byte count register */
+ u32 cbsr; /* CB status register */
+ u8 pad1[4]; /* reserved */
+ u32 cbtahr; /* operand target address high register */
+ u32 cbtalr; /* operand target address low register */
+ u32 cblahr; /* CB link address high register */
+ u32 cblalr; /* CB link address low register */
+ u32 crsr; /* control set register */
+ u32 crrr; /* control reset register */
+ u32 ccbahr; /* current CB address high register */
+ u32 ccbalr; /* current CB address low register */
+ u32 plbr; /* PLB configuration register */
+ u32 ier; /* interrupt enable register */
+ u32 pecr; /* parity error count register */
+ u32 sr; /* status register */
+ u32 revidr; /* revision ID register */
+} __attribute__ ((packed)) xor_regs_t;
+
+/*
+ * Prototypes
+ */
+int init_xor_eng(void);
+int spe440_xor_block (unsigned int ops_count, unsigned int op_len, void **ops);
+
+#endif /* PPC440SPE_XOR_H */
+
--
1.5.0.2
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-15 23:29 [PATCH] [PPC32] ADMA support for PPC 440SPe processors Wolfgang Denk
@ 2007-03-16 5:27 ` Paul Mackerras
2007-03-16 5:55 ` Dan Williams
2007-03-16 8:29 ` Benjamin Herrenschmidt
2007-03-16 18:00 ` Dan Williams
2 siblings, 1 reply; 25+ messages in thread
From: Paul Mackerras @ 2007-03-16 5:27 UTC (permalink / raw)
To: Wolfgang Denk; +Cc: linux-raid, linuxppc-dev
Wolfgang Denk writes:
> This patch is based on and requires a set of patches posted to the
> linux-raid mailing list by Dan Williams on 2007-01-23:
Those patches don't seem to be upstream in Linus' tree. Are they in
-mm, or is anyone pushing for them to be?
Paul.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 5:27 ` Paul Mackerras
@ 2007-03-16 5:55 ` Dan Williams
2007-03-16 10:16 ` Wolfgang Denk
0 siblings, 1 reply; 25+ messages in thread
From: Dan Williams @ 2007-03-16 5:55 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linux-raid, linuxppc-dev
On 3/15/07, Paul Mackerras <paulus@samba.org> wrote:
> Wolfgang Denk writes:
>
> > This patch is based on and requires a set of patches posted to the
> > linux-raid mailing list by Dan Williams on 2007-01-23:
>
> Those patches don't seem to be upstream in Linus' tree. Are they in
> -mm, or is anyone pushing for them to be?
>
They are in -mm (git-md-accel.patch). I'll review this driver and and
integrate it into my next push to Andrew, along with some further
cleanups.
> Paul.
Dan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-15 23:29 [PATCH] [PPC32] ADMA support for PPC 440SPe processors Wolfgang Denk
2007-03-16 5:27 ` Paul Mackerras
@ 2007-03-16 8:29 ` Benjamin Herrenschmidt
2007-03-16 10:23 ` Wolfgang Denk
` (2 more replies)
2007-03-16 18:00 ` Dan Williams
2 siblings, 3 replies; 25+ messages in thread
From: Benjamin Herrenschmidt @ 2007-03-16 8:29 UTC (permalink / raw)
To: Wolfgang Denk; +Cc: linux-raid, linuxppc-dev
Hi !
I'm short on time, so no really in-depth review right now, but a few
nits I've spotted while browsing the patch..
> static u64 ppc440spe_adma_dmamask = DMA_32BIT_MASK;
> +
> +/* DMA and XOR platform devices' resources */
> +static struct resource ppc440spe_dma_0_resources[] = {
> + {
> + .flags = IORESOURCE_MEM,
> + },
> + {
> + .start = DMA0_CS_FIFO_NEED_SERVICE,
> + .end = DMA0_CS_FIFO_NEED_SERVICE,
> + .flags = IORESOURCE_IRQ
> + }
> +};
.../...
This is all very ugly, hopefully, can be replaced by a proper
device-tree representation in arch/powerpc. What are your plans for
porting 440SP/SPe over ?
> +/*
> + * Init DMA0/1 and XOR engines; allocate memory for DMAx FIFOs; set platform_device
> + * memory resources addresses
> + */
> +static void ppc440spe_configure_raid_devices(void)
> +{
> + void *fifo_buf;
> + i2o_regs_t *i2o_reg;
> + dma_regs_t *dma_reg0, *dma_reg1;
> + xor_regs_t *xor_reg;
> + u32 mask;
> +
> + printk ("%s\n", __FUNCTION__);
The above should probably go...
> + /*
> + * Map registers
> + */
> + i2o_reg = (i2o_regs_t *)ioremap64(I2O_MMAP_BASE, I2O_MMAP_SIZE);
> + dma_reg0 = (dma_regs_t *)ioremap64(DMA0_MMAP_BASE, DMA_MMAP_SIZE);
> + dma_reg1 = (dma_regs_t *)ioremap64(DMA1_MMAP_BASE, DMA_MMAP_SIZE);
> + xor_reg = (xor_regs_t *)ioremap64(XOR_MMAP_BASE,XOR_MMAP_SIZE);
You should test the result of these. Also, the move to arch/powerpc here
as well will cleanup as ioremap will always take 64 bits resource_size_t
(can't you make that working on arch/ppc too and use normal ioremap
there as well ?).
In addition, the casting is ugly and your types lack __iomem
annotations.
> + /*
> + * Configure h/w
> + */
> +
> + /* Reset I2O/DMA */
> + mtdcr(DCRN_SDR0_CFGADDR, 0x200);
> + mtdcr(DCRN_SDR0_CFGDATA, 0x10000);
> + mtdcr(DCRN_SDR0_CFGADDR, 0x200);
> + mtdcr(DCRN_SDR0_CFGDATA, 0x0);
The above could use some symbolic constants... Is this the only piece of
code to access the SDR0 indirect config registers ? If not, then some
global locking is needed as well.
(See my old thread about providing a global lock/mutex for that sort of
system wide, low pressure, config registers accesses).
> + /* Reset XOR */
> + out_be32(&xor_reg->crsr, XOR_CRSR_XASR_BIT);
> + out_be32(&xor_reg->crrr, XOR_CRSR_64BA_BIT);
> +
> + /* Setup the base address of mmaped registers */
> + mtdcr(DCRN_I2O0_IBAH, 0x00000004);
> + mtdcr(DCRN_I2O0_IBAL, 0x00100001);
Some symbolic constants here too, also am I right to assume you are hard
coding an address here ? That need at least some bold comments as there
seem to be no resource management involved to make sure that address
hasn't been used elsewhere.
That's also things that should be handled via the device-tree
hopefully.
> + /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
> + * the base address of FIFO memory space
> + */
> + fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE)<<1, GFP_KERNEL | __GFP_DMA);
Error checking ? Also, what is the rationale for GFP_KERNEL | __GFP_DMA
here ? I don't think you need the later and probably not with
underscores if you do anyway.
> + /* SetUp FIFO memory space base address */
> + out_le32(&i2o_reg->ifbah, 0);
> + out_le32(&i2o_reg->ifbal, ((u32)__pa(fifo_buf)));
>
> + /* zero FIFO size for I2O, DMAs; 0x1000 to enable DMA */
> + out_le32(&i2o_reg->ifsiz, 0);
> + out_le32(&dma_reg0->fsiz, 0x1000 | ((DMA0_FIFO_SIZE>>3) - 1));
> + out_le32(&dma_reg1->fsiz, 0x1000 | ((DMA1_FIFO_SIZE>>3) - 1));
Symbolicm constants ?
> + /* Configure DMA engine */
> + out_le32(&dma_reg0->cfg, 0x0D880000);
> + out_le32(&dma_reg1->cfg, 0x0D880000);
Same ?
> + /* Clear Status */
> + out_le32(&dma_reg0->dsts, ~0);
> + out_le32(&dma_reg1->dsts, ~0);
> +
> + /* Unmask 'CS FIFO Attention' interrupts */
> + mask = in_le32(&i2o_reg->iopim) & ~0x48;
> + out_le32(&i2o_reg->iopim, mask);
Same ?
> + /* enable XOR engine interrupt */
> + out_be32(&xor_reg->ier, XOR_IE_CBLCI_BIT | XOR_IE_CBCIE_BIT | 0x34000);
Same ?
> + PRINTK("\tfree slot %x: %d stride: %d\n", desc->phys, desc->idx, desc->stride);
Why don't you use the kernel existing debugging facilitie, like
pr_debug, or dev_dbg if you have a proper struct device (which you
should have with an arch/powerpc port hopefully using
of_platform_device).
> + spin_lock_bh(&spe_chan->lock);
> + /* Allocate descriptor slots */
> + i = spe_chan->slots_allocated;
> + if (spe_chan->device->id != PPC440SPE_XOR_ID)
> + db_sz = sizeof (dma_cdb_t);
> + else
> + db_sz = sizeof (xor_cb_t);
> +
> + for (; i < (plat_data->pool_size/db_sz); i++) {
> + slot = kzalloc(sizeof(struct spe_adma_desc_slot), GFP_KERNEL);
GFP_KERNEL within spin_lock_bh is no good...
> diff --git a/include/asm-ppc/adma.h b/include/asm-ppc/adma.h
> new file mode 100644
> index 0000000..0be88f1
> --- /dev/null
> +++ b/include/asm-ppc/adma.h
There's way too many code in this .h file, too big inline functions. It
should mostly be moved to a .c file
Also, it's a bit rude to have a file called asm-ppc/adma.h that contains
ppc440SPe specific code without any guard. I don't care much about
asm-ppc for now but once that's moving to asm-powerpc, you might end up
with more than one platform doing ADMA differently and several of them
buildable in a single kernel, so keep that in mind.
> @@ -0,0 +1,715 @@
> +/*
> + * include/asm/ppc440spe_adma.h
> +
Comment doesn't match file name. Just remove the comment anyway.
Cheers,
Ben.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 5:55 ` Dan Williams
@ 2007-03-16 10:16 ` Wolfgang Denk
2007-03-16 16:33 ` Dan Williams
0 siblings, 1 reply; 25+ messages in thread
From: Wolfgang Denk @ 2007-03-16 10:16 UTC (permalink / raw)
To: Dan Williams; +Cc: linux-raid, linuxppc-dev, Paul Mackerras
In message <e9c3a7c20703152255q16563803q33a5893c210634d1@mail.gmail.com> you wrote:
>
> They are in -mm (git-md-accel.patch). I'll review this driver and and
> integrate it into my next push to Andrew, along with some further
> cleanups.
Thanks.
We're doing some cleanup now based on the feedback we receive.
What is easier for you to handle - a complete new patch, or an
incrementan one on top of what we submitted now? (I'd prefer
incremental, but will do whatever works better for you).
Best regards,
Wolfgang Denk
--
DENX Software Engineering GmbH, HRB 165235 Munich, CEO: Wolfgang Denk
Office: Kirchenstr. 5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
You may call me by my name, Wirth, or by my value, Worth.
- Nicklaus Wirth
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 8:29 ` Benjamin Herrenschmidt
@ 2007-03-16 10:23 ` Wolfgang Denk
2007-03-16 12:44 ` Stefan Roese
2007-03-16 16:57 ` Dan Williams
2 siblings, 0 replies; 25+ messages in thread
From: Wolfgang Denk @ 2007-03-16 10:23 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linux-raid, linuxppc-dev
Dear Ben,
in message <1174033758.6861.41.camel@localhost.localdomain> you wrote:
>
> This is all very ugly, hopefully, can be replaced by a proper
> device-tree representation in arch/powerpc. What are your plans for
> porting 440SP/SPe over ?
We will start working on it as soon as we can lay fingers on an
arch/powerpc port for the 440SP/SPe...
> > +/*
> > + * Init DMA0/1 and XOR engines; allocate memory for DMAx FIFOs; set platform_device
> > + * memory resources addresses
> > + */
> > +static void ppc440spe_configure_raid_devices(void)
> > +{
> > + void *fifo_buf;
> > + i2o_regs_t *i2o_reg;
> > + dma_regs_t *dma_reg0, *dma_reg1;
> > + xor_regs_t *xor_reg;
> > + u32 mask;
> > +
> > + printk ("%s\n", __FUNCTION__);
>
> The above should probably go...
Agreed, sorry.
> > + i2o_reg = (i2o_regs_t *)ioremap64(I2O_MMAP_BASE, I2O_MMAP_SIZE);
> > + dma_reg0 = (dma_regs_t *)ioremap64(DMA0_MMAP_BASE, DMA_MMAP_SIZE);
> > + dma_reg1 = (dma_regs_t *)ioremap64(DMA1_MMAP_BASE, DMA_MMAP_SIZE);
> > + xor_reg = (xor_regs_t *)ioremap64(XOR_MMAP_BASE,XOR_MMAP_SIZE);
>
> You should test the result of these. Also, the move to arch/powerpc here
> as well will cleanup as ioremap will always take 64 bits resource_size_t
> (can't you make that working on arch/ppc too and use normal ioremap
> there as well ?).
>
> In addition, the casting is ugly and your types lack __iomem
> annotations.
Will fix. Thanks for pointing out.
> > + mtdcr(DCRN_SDR0_CFGADDR, 0x200);
> > + mtdcr(DCRN_SDR0_CFGDATA, 0x10000);
> > + mtdcr(DCRN_SDR0_CFGADDR, 0x200);
> > + mtdcr(DCRN_SDR0_CFGDATA, 0x0);
>
> The above could use some symbolic constants... Is this the only piece of
> code to access the SDR0 indirect config registers ? If not, then some
> global locking is needed as well.
>
> (See my old thread about providing a global lock/mutex for that sort of
> system wide, low pressure, config registers accesses).
Will check.
> > + /* Setup the base address of mmaped registers */
> > + mtdcr(DCRN_I2O0_IBAH, 0x00000004);
> > + mtdcr(DCRN_I2O0_IBAL, 0x00100001);
>
> Some symbolic constants here too, also am I right to assume you are hard
> coding an address here ? That need at least some bold comments as there
> seem to be no resource management involved to make sure that address
> hasn't been used elsewhere.
Will check.
> That's also things that should be handled via the device-tree
> hopefully.
Agreed.
...
> > + PRINTK("\tfree slot %x: %d stride: %d\n", desc->phys, desc->idx, desc->stride);
>
> Why don't you use the kernel existing debugging facilitie, like
> pr_debug, or dev_dbg if you have a proper struct device (which you
> should have with an arch/powerpc port hopefully using
> of_platform_device).
You are right once more..
> > + for (; i < (plat_data->pool_size/db_sz); i++) {
> > + slot = kzalloc(sizeof(struct spe_adma_desc_slot), GFP_KERNEL);
>
> GFP_KERNEL within spin_lock_bh is no good...
Thanks for poointing out
> > diff --git a/include/asm-ppc/adma.h b/include/asm-ppc/adma.h
> > new file mode 100644
> > index 0000000..0be88f1
> > --- /dev/null
> > +++ b/include/asm-ppc/adma.h
>
> There's way too many code in this .h file, too big inline functions. It
> should mostly be moved to a .c file
>
> Also, it's a bit rude to have a file called asm-ppc/adma.h that contains
> ppc440SPe specific code without any guard. I don't care much about
Agreed. Will rename.
> > + * include/asm/ppc440spe_adma.h
>
> Comment doesn't match file name. Just remove the comment anyway.
...and change the file name.
Thanks for your valuable input. Will try to clean up.
Best regards,
Wolfgang Denk
--
DENX Software Engineering GmbH, HRB 165235 Munich, CEO: Wolfgang Denk
Office: Kirchenstr. 5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
Microsoft Multimedia:
You have nice graphics, sound and animations when the system crashes.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 8:29 ` Benjamin Herrenschmidt
2007-03-16 10:23 ` Wolfgang Denk
@ 2007-03-16 12:44 ` Stefan Roese
2007-03-16 16:57 ` Dan Williams
2 siblings, 0 replies; 25+ messages in thread
From: Stefan Roese @ 2007-03-16 12:44 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-raid
On Friday 16 March 2007 09:29, Benjamin Herrenschmidt wrote:
> > + /*
> > + * Map registers
> > + */
> > + i2o_reg = (i2o_regs_t *)ioremap64(I2O_MMAP_BASE, I2O_MMAP_SIZE);
> > + dma_reg0 = (dma_regs_t *)ioremap64(DMA0_MMAP_BASE, DMA_MMAP_SIZE);
> > + dma_reg1 = (dma_regs_t *)ioremap64(DMA1_MMAP_BASE, DMA_MMAP_SIZE);
> > + xor_reg = (xor_regs_t *)ioremap64(XOR_MMAP_BASE,XOR_MMAP_SIZE);
>
> You should test the result of these. Also, the move to arch/powerpc here
> as well will cleanup as ioremap will always take 64 bits resource_size_t
> (can't you make that working on arch/ppc too and use normal ioremap
> there as well ?).
It's quite easy: Just configure CONFIG_RESOURCES_64BIT and you can use
the "normal" ioremap(). Wolfgang, please give it a try. It already works on
the Katmai and Taishan.
> > + /*
> > + * Configure h/w
> > + */
> > +
> > + /* Reset I2O/DMA */
> > + mtdcr(DCRN_SDR0_CFGADDR, 0x200);
> > + mtdcr(DCRN_SDR0_CFGDATA, 0x10000);
> > + mtdcr(DCRN_SDR0_CFGADDR, 0x200);
> > + mtdcr(DCRN_SDR0_CFGDATA, 0x0);
>
> The above could use some symbolic constants... Is this the only piece of
> code to access the SDR0 indirect config registers ? If not, then some
> global locking is needed as well.
Please add
#define DCRN_SDR0_SRST0 0x200
#define SDR0_SRST_I2ODMA (0x80000000 > 15)
to include/asm-ppc/ppc44x.h
and then use
SDR_WRITE(DCRN_SDR0_SRST0, SDR0_SRST_I2ODMA);
SDR_WRITE(DCRN_SDR0_SRST0, 0);
here.
Best regards,
Stefan
=====================================================================
DENX Software Engineering GmbH, HRB 165235 Munich, CEO: Wolfgang Denk
Office: Kirchenstr. 5, D-82194 Groebenzell, Germany
=====================================================================
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 10:16 ` Wolfgang Denk
@ 2007-03-16 16:33 ` Dan Williams
0 siblings, 0 replies; 25+ messages in thread
From: Dan Williams @ 2007-03-16 16:33 UTC (permalink / raw)
To: Wolfgang Denk; +Cc: linux-raid, linuxppc-dev, Paul Mackerras
On 3/16/07, Wolfgang Denk <wd@denx.de> wrote:
> In message <e9c3a7c20703152255q16563803q33a5893c210634d1@mail.gmail.com> you wrote:
> >
> > They are in -mm (git-md-accel.patch). I'll review this driver and and
> > integrate it into my next push to Andrew, along with some further
> > cleanups.
>
> Thanks.
>
> We're doing some cleanup now based on the feedback we receive.
>
> What is easier for you to handle - a complete new patch, or an
> incrementan one on top of what we submitted now? (I'd prefer
> incremental, but will do whatever works better for you).
>
I can handle incremental, but I will probably fold everything together
in the patch that goes to -mm.
Check out Stacked GIT (http://www.procode.org/stgit/) when you get a
chance, it handles this situation well.
> Best regards,
>
> Wolfgang Denk
>
Dan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 8:29 ` Benjamin Herrenschmidt
2007-03-16 10:23 ` Wolfgang Denk
2007-03-16 12:44 ` Stefan Roese
@ 2007-03-16 16:57 ` Dan Williams
2 siblings, 0 replies; 25+ messages in thread
From: Dan Williams @ 2007-03-16 16:57 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linux-raid, linuxppc-dev
On 3/16/07, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> > + PRINTK("\tfree slot %x: %d stride: %d\n", desc->phys, desc->idx, desc->stride);
>
> Why don't you use the kernel existing debugging facilitie, like
> pr_debug, or dev_dbg if you have a proper struct device (which you
> should have with an arch/powerpc port hopefully using
> of_platform_device).
>
This came from the the iop-adma driver. I blindly copied it from
drivers/md/raid5.c, but yes it should change to dev_dbg.
> > + spin_lock_bh(&spe_chan->lock);
> > + /* Allocate descriptor slots */
> > + i = spe_chan->slots_allocated;
> > + if (spe_chan->device->id != PPC440SPE_XOR_ID)
> > + db_sz = sizeof (dma_cdb_t);
> > + else
> > + db_sz = sizeof (xor_cb_t);
> > +
> > + for (; i < (plat_data->pool_size/db_sz); i++) {
> > + slot = kzalloc(sizeof(struct spe_adma_desc_slot), GFP_KERNEL);
>
> GFP_KERNEL within spin_lock_bh is no good...
>
This is an iop-adma wart... will fix.
> > diff --git a/include/asm-ppc/adma.h b/include/asm-ppc/adma.h
> > new file mode 100644
> > index 0000000..0be88f1
> > --- /dev/null
> > +++ b/include/asm-ppc/adma.h
>
> There's way too many code in this .h file, too big inline functions. It
> should mostly be moved to a .c file
>
The iop-adma driver uses separate .h files because the driver is
shared between iop3xx and iop13xx implementations and I did not want
the overhead of another indirect-branch layer. In this case the
hardware specific routines can be written inline since the driver is
only supporting one architecture... other suggestions?
> Cheers,
> Ben.
>
Regards,
Dan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-15 23:29 [PATCH] [PPC32] ADMA support for PPC 440SPe processors Wolfgang Denk
2007-03-16 5:27 ` Paul Mackerras
2007-03-16 8:29 ` Benjamin Herrenschmidt
@ 2007-03-16 18:00 ` Dan Williams
2007-03-17 8:09 ` Stefan Roese
2007-03-17 8:57 ` Yuri Tikhonov
2 siblings, 2 replies; 25+ messages in thread
From: Dan Williams @ 2007-03-16 18:00 UTC (permalink / raw)
To: Wolfgang Denk; +Cc: linux-raid, linuxppc-dev
Here are some additional comments/nits:
> +/*
> + * Init DMA0/1 and XOR engines; allocate memory for DMAx FIFOs; set platform_device
> + * memory resources addresses
> + */
> +static void ppc440spe_configure_raid_devices(void)
Any reason not to move most of this function into spe_adma_probe? The
"set resource address" section is the only piece that spe_adma_probe
should not handle.
> +++ b/drivers/dma/spe-adma.c
> @@ -0,0 +1,1071 @@
> +/*
> + * Copyright(c) 2006 DENX Engineering. All rights reserved.
> + *
> + * Author: Yuri Tikhonov <yur@emcraft.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the Free
> + * Software Foundation; either version 2 of the License, or (at your option)
> + * any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59
> + * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * The full GNU General Public License is included in this distribution in the
> + * file called COPYING.
> + */
> +
> +/*
> + * This driver supports the asynchrounous DMA copy and RAID engines available
> + * on the AMCC PPC440SPe Processors.
> + * Based on the Intel Xscale(R) family of I/O Processors (SPE 32x, 33x, 134x)
SPE should be IOP on this line.
../..
> +static inline void
> +spe_adma_slot_cleanup(struct spe_adma_chan *spe_chan)
> +{
> + spin_lock_bh(&spe_chan->lock);
> + __spe_adma_slot_cleanup(spe_chan);
> + spin_unlock_bh(&spe_chan->lock);
> +}
> +
> +static struct spe_adma_chan *spe_adma_chan_array[3];
> +static void spe_adma0_task(unsigned long data)
> +{
> + __spe_adma_slot_cleanup(spe_adma_chan_array[0]);
> +}
> +
> +static void spe_adma1_task(unsigned long data)
> +{
> + __spe_adma_slot_cleanup(spe_adma_chan_array[1]);
> +}
> +
> +static void spe_adma2_task(unsigned long data)
> +{
> + __spe_adma_slot_cleanup(spe_adma_chan_array[2]);
> +}
> +
> +DECLARE_TASKLET(spe_adma0_tasklet, spe_adma0_task, 0);
> +DECLARE_TASKLET(spe_adma1_tasklet, spe_adma1_task, 0);
> +DECLARE_TASKLET(spe_adma2_tasklet, spe_adma2_task, 0);
> +struct tasklet_struct *spe_adma_tasklet[] = {
> + &spe_adma0_tasklet,
> + &spe_adma1_tasklet,
> + &spe_adma2_tasklet,
> +};
> +
This is something I am cleaning up in iop-adma by adding a struct
tasklet * to each channel. I'll post an incremental diff of my
iop-adma changes so you can see what I have cleaned up since the
2.6.20-rc5 posting.
> +static dma_addr_t spe_adma_map_page(struct dma_chan *chan, struct page *page,
> + unsigned long offset, size_t size,
> + int direction)
> +{
> + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> + return dma_map_page(&spe_chan->device->pdev->dev, page, offset, size,
> + direction);
> +}
> +
> +static dma_addr_t spe_adma_map_single(struct dma_chan *chan, void *cpu_addr,
> + size_t size, int direction)
> +{
> + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> + return dma_map_single(&spe_chan->device->pdev->dev, cpu_addr, size,
> + direction);
> +}
> +
> +static void spe_adma_unmap_page(struct dma_chan *chan, dma_addr_t handle,
> + size_t size, int direction)
> +{
> + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> + dma_unmap_page(&spe_chan->device->pdev->dev, handle, size, direction);
> +}
> +
> +static void spe_adma_unmap_single(struct dma_chan *chan, dma_addr_t handle,
> + size_t size, int direction)
> +{
> + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> + dma_unmap_single(&spe_chan->device->pdev->dev, handle, size, direction);
> +}
> +
...these are gone as well in the latest code.
> +static int __devinit spe_adma_probe(struct platform_device *pdev)
../..
> + printk(KERN_INFO "Intel(R) SPE ADMA Engine found [%d]: "
Intel(R)? :-)
Regards,
Dan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 18:00 ` Dan Williams
@ 2007-03-17 8:09 ` Stefan Roese
2007-03-17 18:17 ` Dan Williams
2007-03-17 8:57 ` Yuri Tikhonov
1 sibling, 1 reply; 25+ messages in thread
From: Stefan Roese @ 2007-03-17 8:09 UTC (permalink / raw)
To: Dan Williams; +Cc: linux-raid, linuxppc-dev
Dan,
I just noticed that your patch "dmaengine: add the async_tx api":
@@ -22,6 +22,17 @@ config NET_DMA
Since this is the main user of the DMA engine, it should be enabled;
say Y here.
+config ASYNC_TX_DMA
+ tristate "Asynchronous Bulk Memory Transfers/Transforms API"
+ default y
+ ---help---
+ This enables the async_tx management layer for dma engines.
+ Subsystems coded to this API will use offload engines for bulk
+ memory operations where present. Software implementations are
+ called when a dma engine is not present or fails to allocate
+ memory to carry out the transaction.
+ Current subsystems ported to async_tx: MD_RAID4,5
+
adds ASYNC_TX_DMA unconditionally to _all_ platforms. You might what to bundle
this with something like DMA_ENGINE.
Best regards,
Stefan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-16 18:00 ` Dan Williams
2007-03-17 8:09 ` Stefan Roese
@ 2007-03-17 8:57 ` Yuri Tikhonov
1 sibling, 0 replies; 25+ messages in thread
From: Yuri Tikhonov @ 2007-03-17 8:57 UTC (permalink / raw)
To: Dan Williams, linuxppc-dev, linux-raid
Hi Dan,
On Friday 16 March 2007 21:00, you wrote:
> Here are some additional comments/nits:
> > +/*
> > + * Init DMA0/1 and XOR engines; allocate memory for DMAx FIFOs; set
> > platform_device + * memory resources addresses
> > + */
> > +static void ppc440spe_configure_raid_devices(void)
>
> Any reason not to move most of this function into spe_adma_probe?
> The "set resource address" section is the only piece that spe_adma_probe
> should not handle.
Two ppc440spe's DMA1 and DMA2 engines have to be configured together with one
I2O engine (all these units share the same pointer to the FIFO area in the
main memory but use different parts of this common area). So, setting-up of
this common FIFO area should be performed in the
ppc440spe_configure_raid_devices(). As regarding other code in this function
(except "set resource address"), it might be moved to the spe_adma_probe()
indeed.
> > +++ b/drivers/dma/spe-adma.c
...
> > + *
> > + * The full GNU General Public License is included in this distribution
> > in the + * file called COPYING.
> > + */
> > +
> > +/*
> > + * This driver supports the asynchrounous DMA copy and RAID engines
> > available + * on the AMCC PPC440SPe Processors.
> > + * Based on the Intel Xscale(R) family of I/O Processors (SPE 32x, 33x,
> > 134x)
>
> SPE should be IOP on this line.
Correct.
...
> > +
> > +DECLARE_TASKLET(spe_adma0_tasklet, spe_adma0_task, 0);
> > +DECLARE_TASKLET(spe_adma1_tasklet, spe_adma1_task, 0);
> > +DECLARE_TASKLET(spe_adma2_tasklet, spe_adma2_task, 0);
> > +struct tasklet_struct *spe_adma_tasklet[] = {
> > + &spe_adma0_tasklet,
> > + &spe_adma1_tasklet,
> > + &spe_adma2_tasklet,
> > +};
> > +
>
> This is something I am cleaning up in iop-adma by adding a struct
> tasklet * to each channel. I'll post an incremental diff of my
> iop-adma changes so you can see what I have cleaned up since the
> 2.6.20-rc5 posting.
Thanks.
> > +static dma_addr_t spe_adma_map_page(struct dma_chan *chan, struct page
> > *page, + unsigned long offset,
> > size_t size, + int direction)
> > +{
> > + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> > + return dma_map_page(&spe_chan->device->pdev->dev, page, offset,
> > size, + direction);
> > +}
> > +
> > +static dma_addr_t spe_adma_map_single(struct dma_chan *chan, void
> > *cpu_addr, + size_t size, int
> > direction) +{
> > + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> > + return dma_map_single(&spe_chan->device->pdev->dev, cpu_addr,
> > size, + direction);
> > +}
> > +
> > +static void spe_adma_unmap_page(struct dma_chan *chan, dma_addr_t
> > handle, + size_t size, int direction)
> > +{
> > + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> > + dma_unmap_page(&spe_chan->device->pdev->dev, handle, size,
> > direction); +}
> > +
> > +static void spe_adma_unmap_single(struct dma_chan *chan, dma_addr_t
> > handle, + size_t size, int direction)
> > +{
> > + struct spe_adma_chan *spe_chan = to_spe_adma_chan(chan);
> > + dma_unmap_single(&spe_chan->device->pdev->dev, handle, size,
> > direction); +}
> > +
>
> ....these are gone as well in the latest code.
>
> > +static int __devinit spe_adma_probe(struct platform_device *pdev)
>
> .../..
>
> > + printk(KERN_INFO "Intel(R) SPE ADMA Engine found [%d]: "
>
> Intel(R)? :-)
Right : )
Regards, Yuri.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-17 8:09 ` Stefan Roese
@ 2007-03-17 18:17 ` Dan Williams
2007-03-17 18:43 ` Stefan Roese
0 siblings, 1 reply; 25+ messages in thread
From: Dan Williams @ 2007-03-17 18:17 UTC (permalink / raw)
To: Stefan Roese; +Cc: linux-raid, linuxppc-dev
On 3/17/07, Stefan Roese <sr@denx.de> wrote:
> Dan,
>
> I just noticed that your patch "dmaengine: add the async_tx api":
>
> @@ -22,6 +22,17 @@ config NET_DMA
> Since this is the main user of the DMA engine, it should be enabled;
> say Y here.
>
> +config ASYNC_TX_DMA
> + tristate "Asynchronous Bulk Memory Transfers/Transforms API"
> + default y
> + ---help---
> + This enables the async_tx management layer for dma engines.
> + Subsystems coded to this API will use offload engines for bulk
> + memory operations where present. Software implementations are
> + called when a dma engine is not present or fails to allocate
> + memory to carry out the transaction.
> + Current subsystems ported to async_tx: MD_RAID4,5
> +
>
> adds ASYNC_TX_DMA unconditionally to _all_ platforms. You might what to bundle
> this with something like DMA_ENGINE.
>
Yes, defaulting to 'y' is not necessary, but ASYNC_TX_DMA=y &&
DMA_ENGINE=n is an explicit feature of the interface. When DMA_ENGINE
is not selected all the asynchronous paths in the API are compiled
out. This allows subsytems, like md-raid5, to be written in an
asynchronous fashion without regard for the architecture[1] or
availability of offload engines.
> Best regards,
> Stefan
Regards,
Dan
[1] The API implicitly handles channel switching depending on the
offload engine architecture. Where an iop13xx engine can handle a
copy+xor sequence on one channel, a 440sp or iop3xx platform will need
to switch between copy and xor capable engines. Resolving operation
dependencies and channel switching is handled behind the scenes.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-17 18:17 ` Dan Williams
@ 2007-03-17 18:43 ` Stefan Roese
2007-03-17 19:09 ` Dan Williams
0 siblings, 1 reply; 25+ messages in thread
From: Stefan Roese @ 2007-03-17 18:43 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-raid, Dan Williams
On Saturday 17 March 2007 19:17, Dan Williams wrote:
> Yes, defaulting to 'y' is not necessary, but ASYNC_TX_DMA=y &&
> DMA_ENGINE=n is an explicit feature of the interface. When DMA_ENGINE
> is not selected all the asynchronous paths in the API are compiled
> out. This allows subsytems, like md-raid5, to be written in an
> asynchronous fashion without regard for the architecture[1] or
> availability of offload engines.
The current implementation builds on my embedded PPC4xx system without any
disks the objects async_tx.o and xor.o into the kernel which I definitely
don't need and want. And I get something like:
async_tx: api initialized (sync-only)
xor: measuring software checksumming speed
8regs : 145.000 MB/sec
8regs_prefetch: 115.000 MB/sec
32regs : 176.000 MB/sec
32regs_prefetch: 135.000 MB/sec
xor: using function: 32regs (176.000 MB/sec)
upon bootup.
Best regards,
Stefan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-17 18:43 ` Stefan Roese
@ 2007-03-17 19:09 ` Dan Williams
2007-03-19 16:13 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 25+ messages in thread
From: Dan Williams @ 2007-03-17 19:09 UTC (permalink / raw)
To: Stefan Roese; +Cc: linux-raid, linuxppc-dev
> The current implementation builds on my embedded PPC4xx system without any
> disks the objects async_tx.o and xor.o into the kernel which I definitely
> don't need and want. And I get something like:
>
> async_tx: api initialized (sync-only)
> xor: measuring software checksumming speed
> 8regs : 145.000 MB/sec
> 8regs_prefetch: 115.000 MB/sec
> 32regs : 176.000 MB/sec
> 32regs_prefetch: 135.000 MB/sec
> xor: using function: 32regs (176.000 MB/sec)
>
> upon bootup.
Understood I'll change it so that xor.o and async_tx.o are off by default.
>
> Best regards,
> Stefan
Dan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-17 19:09 ` Dan Williams
@ 2007-03-19 16:13 ` Benjamin Herrenschmidt
2007-03-20 3:06 ` Michael Ellerman
2007-03-21 14:10 ` Segher Boessenkool
0 siblings, 2 replies; 25+ messages in thread
From: Benjamin Herrenschmidt @ 2007-03-19 16:13 UTC (permalink / raw)
To: Dan Williams; +Cc: linux-raid, linuxppc-dev, Stefan Roese
BTW folks. Would it be hard to change your spe_ prefixes to something
else ? There's already enough confusion between the freescale SPE unit
and the cell SPEs :-)
(such confusion is annoying when grepp'ing for code that might touch a
given functionality for example).
Cheers,
Ben.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-19 16:13 ` Benjamin Herrenschmidt
@ 2007-03-20 3:06 ` Michael Ellerman
2007-03-20 5:39 ` Stefan Roese
2007-03-21 14:10 ` Segher Boessenkool
1 sibling, 1 reply; 25+ messages in thread
From: Michael Ellerman @ 2007-03-20 3:06 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: Dan Williams, linux-raid, Stefan Roese, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 639 bytes --]
On Mon, 2007-03-19 at 17:13 +0100, Benjamin Herrenschmidt wrote:
> BTW folks. Would it be hard to change your spe_ prefixes to something
> else ? There's already enough confusion between the freescale SPE unit
> and the cell SPEs :-)
>
> (such confusion is annoying when grepp'ing for code that might touch a
> given functionality for example).
Please please please!
cheers
--
Michael Ellerman
OzLabs, IBM Australia Development Lab
wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)
We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-20 3:06 ` Michael Ellerman
@ 2007-03-20 5:39 ` Stefan Roese
0 siblings, 0 replies; 25+ messages in thread
From: Stefan Roese @ 2007-03-20 5:39 UTC (permalink / raw)
To: michael; +Cc: Dan Williams, linux-raid, linuxppc-dev
On Tuesday 20 March 2007 04:06, Michael Ellerman wrote:
> On Mon, 2007-03-19 at 17:13 +0100, Benjamin Herrenschmidt wrote:
> > BTW folks. Would it be hard to change your spe_ prefixes to something
> > else ? There's already enough confusion between the freescale SPE unit
> > and the cell SPEs :-)
> >
> > (such confusion is annoying when grepp'ing for code that might touch a
> > given functionality for example).
>
> Please please please!
OK. Who can resist so much pleading. ;-)
Best regards,
Stefan
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-19 16:13 ` Benjamin Herrenschmidt
2007-03-20 3:06 ` Michael Ellerman
@ 2007-03-21 14:10 ` Segher Boessenkool
2007-03-21 19:55 ` Benjamin Herrenschmidt
1 sibling, 1 reply; 25+ messages in thread
From: Segher Boessenkool @ 2007-03-21 14:10 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: linux-raid, Dan Williams, Stefan Roese, linuxppc-dev
> BTW folks. Would it be hard to change your spe_ prefixes to something
> else ? There's already enough confusion between the freescale SPE unit
> and the cell SPEs :-)
Will you change _your_ prefixes too? :-) :-)
Segher
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-21 14:10 ` Segher Boessenkool
@ 2007-03-21 19:55 ` Benjamin Herrenschmidt
2007-03-21 20:03 ` Segher Boessenkool
0 siblings, 1 reply; 25+ messages in thread
From: Benjamin Herrenschmidt @ 2007-03-21 19:55 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: linux-raid, Dan Williams, Stefan Roese, linuxppc-dev
On Wed, 2007-03-21 at 15:10 +0100, Segher Boessenkool wrote:
> > BTW folks. Would it be hard to change your spe_ prefixes to something
> > else ? There's already enough confusion between the freescale SPE unit
> > and the cell SPEs :-)
>
> Will you change _your_ prefixes too? :-) :-)
Which ones ? I'm not in charge of the fsl spe thingy nor the spe
scheduler code :-) Beside, that code is already in vs. new code getting
in. Thus I fail to see your point.
Ben.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-21 19:55 ` Benjamin Herrenschmidt
@ 2007-03-21 20:03 ` Segher Boessenkool
2007-03-22 11:38 ` Christoph Hellwig
0 siblings, 1 reply; 25+ messages in thread
From: Segher Boessenkool @ 2007-03-21 20:03 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: linux-raid, Dan Williams, Stefan Roese, linuxppc-dev
>>> BTW folks. Would it be hard to change your spe_ prefixes to something
>>> else ? There's already enough confusion between the freescale SPE
>>> unit
>>> and the cell SPEs :-)
>>
>> Will you change _your_ prefixes too? :-) :-)
>
> Which ones ? I'm not in charge of the fsl spe thingy nor the spe
> scheduler code :-)
I meant the Cell code of course.
> Beside, that code is already in vs. new code getting
> in. Thus I fail to see your point.
My point was that spe_ on the existing code is a bad
prefix, too. And there were a lot of smileys, I don't
actually expect anything to change.
Let's stop this now, there are too many innocents on CC:.
Sorry people.
Segher
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-21 20:03 ` Segher Boessenkool
@ 2007-03-22 11:38 ` Christoph Hellwig
2007-03-22 12:36 ` Segher Boessenkool
0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2007-03-22 11:38 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: linux-raid, Stefan Roese, Dan Williams, linuxppc-dev
On Wed, Mar 21, 2007 at 09:03:27PM +0100, Segher Boessenkool wrote:
> >>> BTW folks. Would it be hard to change your spe_ prefixes to something
> >>> else ? There's already enough confusion between the freescale SPE
> >>> unit
> >>> and the cell SPEs :-)
> >>
> >> Will you change _your_ prefixes too? :-) :-)
> >
> > Which ones ? I'm not in charge of the fsl spe thingy nor the spe
> > scheduler code :-)
>
> I meant the Cell code of course.
Did you ever take a look at the cell code? The only place 'spe'
is used in a prefix are a handfull of hardware datatstructures and the
sony crap hypervisor hvcalls. Everything else uses spu_ as a prefix.
But yeah, grep is hard and life is a bitch.. :)
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-22 11:38 ` Christoph Hellwig
@ 2007-03-22 12:36 ` Segher Boessenkool
2007-03-22 13:20 ` Geert Uytterhoeven
0 siblings, 1 reply; 25+ messages in thread
From: Segher Boessenkool @ 2007-03-22 12:36 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-raid, Dan Williams, Stefan Roese, linuxppc-dev
>>>>> BTW folks. Would it be hard to change your spe_ prefixes to
>>>>> something
>>>>> else ? There's already enough confusion between the freescale SPE
>>>>> unit
>>>>> and the cell SPEs :-)
>>>>
>>>> Will you change _your_ prefixes too? :-) :-)
>>>
>>> Which ones ? I'm not in charge of the fsl spe thingy nor the spe
>>> scheduler code :-)
>>
>> I meant the Cell code of course.
>
> Did you ever take a look at the cell code?
Not often, no. I'm perfectly happy that I don't have
to touch that.
> The only place 'spe'
> is used in a prefix are a handfull of hardware datatstructures and the
> sony crap hypervisor hvcalls. Everything else uses spu_ as a prefix.
That sounds nice. There are slightly fewer things called
SPU than there are called SPE I imagine? Or is it just a
historical misnomer.
> But yeah, grep is hard and life is a bitch.. :)
If I'm told there are all these other things in the kernel
called spe_ I don't feel obliged to grep for it to check
if this is really so. Maybe I shouldn't trust people that
much, dunno.
Segher
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-22 12:36 ` Segher Boessenkool
@ 2007-03-22 13:20 ` Geert Uytterhoeven
2007-03-22 13:38 ` Segher Boessenkool
0 siblings, 1 reply; 25+ messages in thread
From: Geert Uytterhoeven @ 2007-03-22 13:20 UTC (permalink / raw)
To: Segher Boessenkool
Cc: linux-raid, Dan Williams, Stefan Roese, Linux/PPC Development
On Thu, 22 Mar 2007, Segher Boessenkool wrote:
> >>>>> BTW folks. Would it be hard to change your spe_ prefixes to
> >>>>> something
> >>>>> else ? There's already enough confusion between the freescale SPE
> >>>>> unit
> >>>>> and the cell SPEs :-)
> >>>>
> >>>> Will you change _your_ prefixes too? :-) :-)
> >>>
> >>> Which ones ? I'm not in charge of the fsl spe thingy nor the spe
> >>> scheduler code :-)
> >>
> >> I meant the Cell code of course.
> >
> > Did you ever take a look at the cell code?
>
> Not often, no. I'm perfectly happy that I don't have
> to touch that.
>
> > The only place 'spe'
> > is used in a prefix are a handfull of hardware datatstructures and the
> > sony crap hypervisor hvcalls. Everything else uses spu_ as a prefix.
>
> That sounds nice. There are slightly fewer things called
> SPU than there are called SPE I imagine? Or is it just a
> historical misnomer.
AFAIK SPE is the preferred name, as the SPU is only a part of the SPE.
That's what I was told.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- Sony Network and Software Technology Center Europe (NSCE)
Geert.Uytterhoeven@sonycom.com ------- The Corporate Village, Da Vincilaan 7-D1
Voice +32-2-7008453 Fax +32-2-7008622 ---------------- B-1935 Zaventem, Belgium
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH] [PPC32] ADMA support for PPC 440SPe processors.
2007-03-22 13:20 ` Geert Uytterhoeven
@ 2007-03-22 13:38 ` Segher Boessenkool
0 siblings, 0 replies; 25+ messages in thread
From: Segher Boessenkool @ 2007-03-22 13:38 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: linux-raid, Linux/PPC Development, Stefan Roese, Dan Williams
>> That sounds nice. There are slightly fewer things called
>> SPU than there are called SPE I imagine? Or is it just a
>> historical misnomer.
>
> AFAIK SPE is the preferred name, as the SPU is only a part of the SPE.
> That's what I was told.
And you were told right. I was trying to be sarcastic
here, but you missed it. Don't worry, it wasn't aimed
at you anyway ;-)
Segher
^ permalink raw reply [flat|nested] 25+ messages in thread
end of thread, other threads:[~2007-03-22 13:38 UTC | newest]
Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-15 23:29 [PATCH] [PPC32] ADMA support for PPC 440SPe processors Wolfgang Denk
2007-03-16 5:27 ` Paul Mackerras
2007-03-16 5:55 ` Dan Williams
2007-03-16 10:16 ` Wolfgang Denk
2007-03-16 16:33 ` Dan Williams
2007-03-16 8:29 ` Benjamin Herrenschmidt
2007-03-16 10:23 ` Wolfgang Denk
2007-03-16 12:44 ` Stefan Roese
2007-03-16 16:57 ` Dan Williams
2007-03-16 18:00 ` Dan Williams
2007-03-17 8:09 ` Stefan Roese
2007-03-17 18:17 ` Dan Williams
2007-03-17 18:43 ` Stefan Roese
2007-03-17 19:09 ` Dan Williams
2007-03-19 16:13 ` Benjamin Herrenschmidt
2007-03-20 3:06 ` Michael Ellerman
2007-03-20 5:39 ` Stefan Roese
2007-03-21 14:10 ` Segher Boessenkool
2007-03-21 19:55 ` Benjamin Herrenschmidt
2007-03-21 20:03 ` Segher Boessenkool
2007-03-22 11:38 ` Christoph Hellwig
2007-03-22 12:36 ` Segher Boessenkool
2007-03-22 13:20 ` Geert Uytterhoeven
2007-03-22 13:38 ` Segher Boessenkool
2007-03-17 8:57 ` Yuri Tikhonov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).