public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
@ 2011-09-30 21:38 Alexandre Bounine
  2011-09-30 21:38 ` [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support Alexandre Bounine
  2011-10-01 18:01 ` [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Vinod Koul
  0 siblings, 2 replies; 21+ messages in thread
From: Alexandre Bounine @ 2011-09-30 21:38 UTC (permalink / raw)
  To: akpm, linux-kernel, linuxppc-dev
  Cc: Alexandre Bounine, Vinod Koul, Kumar Gala, Matt Porter, Li Yang

Adds DMA Engine framework support into RapidIO subsystem.
Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from remote
RapidIO target devices. Uses scatterlist to describe local data buffer and
dma_chan.private member to pass target specific information. Supports flat
data buffer only for a remote side.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
---
 drivers/dma/dmaengine.c   |    4 ++
 drivers/rapidio/Kconfig   |    6 +++
 drivers/rapidio/rio.c     |   79 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h |    1 +
 include/linux/rio.h       |   47 ++++++++++++++++++++++++++
 include/linux/rio_drv.h   |    9 +++++
 6 files changed, 146 insertions(+), 0 deletions(-)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b48967b..11fdc2c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -699,6 +699,10 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
+	BUG_ON(dma_has_cap(DMA_RAPIDIO, device->cap_mask) &&
+		!device->device_prep_slave_sg);
+	BUG_ON(dma_has_cap(DMA_RAPIDIO, device->cap_mask) &&
+		!device->device_control);
 
 	BUG_ON(!device->device_alloc_chan_resources);
 	BUG_ON(!device->device_free_chan_resources);
diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index bc87192..c4aa279 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -34,3 +34,9 @@ config RAPIDIO_DEBUG
 	  If you are unsure about this, say N here.
 
 source "drivers/rapidio/switches/Kconfig"
+
+# This option to be turned on by a device selection
+config RAPIDIO_DMA_ENGINE
+	bool
+	select DMADEVICES
+	select DMA_ENGINE
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 86c9a09..e5905fc 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1121,6 +1121,85 @@ int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
 	return 0;
 }
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+#include <linux/dmaengine.h>
+
+static bool rio_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct rio_dev *rdev = arg;
+
+	/* Check that DMA device belongs to the right MPORT */
+	return (rdev->net->hport ==
+		container_of(chan->device, struct rio_mport, dma));
+}
+
+/**
+ * rio_request_dma - request RapidIO capable DMA channel that supports
+ *   specified target RapidIO device.
+ * @rdev: RIO device control structure
+ *
+ * Returns pointer to allocated DMA channel or NULL if failed.
+ */
+struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *dchan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_RAPIDIO, mask);
+	dchan = dma_request_channel(mask, rio_chan_filter, rdev);
+
+	return dchan;
+}
+EXPORT_SYMBOL_GPL(rio_request_dma);
+
+/**
+ * rio_release_dma - release specified DMA channel
+ * @dchan: DMA channel to release
+ */
+void rio_release_dma(struct dma_chan *dchan)
+{
+	dma_release_channel(dchan);
+}
+EXPORT_SYMBOL_GPL(rio_release_dma);
+
+/**
+ * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ *   for device_prep_slave_sg callback defined by DMAENGINE.
+ * @rdev: RIO device control structure
+ * @dchan: DMA channel to configure
+ * @data: RIO specific data descriptor
+ * @direction: DMA data transfer direction (TO or FROM the device)
+ * @flags: dmaengine defined flags
+ *
+ * Initializes RapidIO capable DMA channel for the specified data transfer.
+ * Uses DMA channel private extension to pass information related to remote
+ * target RIO device.
+ * Returns pointer to DMA transaction descriptor or NULL if failed.
+ */
+struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
+	struct dma_chan *dchan, struct rio_dma_data *data,
+	enum dma_data_direction direction, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *txd = NULL;
+	struct rio_dma_ext rio_ext;
+
+	rio_ext.destid = rdev->destid;
+	rio_ext.rio_addr_u = data->rio_addr_u;
+	rio_ext.rio_addr = data->rio_addr;
+	rio_ext.wr_type = data->wr_type;
+	dchan->private = &rio_ext;
+
+	txd = dchan->device->device_prep_slave_sg(dchan, data->sg, data->sg_len,
+						  direction, flags);
+
+	return txd;
+}
+EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
+
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
 static void rio_fixup_device(struct rio_dev *dev)
 {
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8fbf40e..867b685 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -70,6 +70,7 @@ enum dma_transaction_type {
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
+	DMA_RAPIDIO,
 	DMA_CYCLIC,
 };
 
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4d50611..a6d1054c 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -20,6 +20,9 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/rio_regs.h>
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#include <linux/dmaengine.h>
+#endif
 
 #define RIO_NO_HOPCOUNT		-1
 #define RIO_INVALID_DESTID	0xffff
@@ -254,6 +257,9 @@ struct rio_mport {
 	u32 phys_efptr;
 	unsigned char name[40];
 	void *priv;		/* Master port private data */
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_device	dma;
+#endif
 };
 
 /**
@@ -395,6 +401,47 @@ union rio_pw_msg {
 	u32 raw[RIO_PW_MSG_SIZE/sizeof(u32)];
 };
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+/**
+ * enum rio_write_type - RIO write transaction types used in DMA transfers
+ *
+ * Note: RapidIO specification defines write (NWRITE) and
+ * write-with-response (NWRITE_R) data transfer operations.
+ * Existing DMA controllers that service RapidIO may use one of these operations
+ * for entire data transfer or their combination with only the last data packet
+ * requires response.
+ */
+enum rio_write_type {
+	RDW_DEFAULT,		/* default method used by DMA driver */
+	RDW_ALL_NWRITE,		/* all packets use NWRITE */
+	RDW_ALL_NWRITE_R,	/* all packets use NWRITE_R */
+	RDW_LAST_NWRITE_R,	/* last packet uses NWRITE_R, all other - NWRITE */
+};
+
+struct rio_dma_ext {
+	u16 destid;
+	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
+	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
+	enum rio_write_type wr_type; /* preferred RIO write operation type */
+};
+
+struct rio_dma_data {
+	/* Local data (as scatterlist) */
+	struct scatterlist	*sg;	/* I/O scatter list */
+	unsigned int		sg_len;	/* size of scatter list */
+	/* Remote device address (flat buffer) */
+	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
+	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
+	enum rio_write_type wr_type; /* preferred RIO write operation type */
+};
+
+static inline struct rio_mport *dma_to_mport(struct dma_device *ddev)
+{
+	return container_of(ddev, struct rio_mport, dma);
+}
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
 /* Architecture and hardware-specific functions */
 extern int rio_register_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 229b3ca..d140996 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -378,6 +378,15 @@ void rio_unregister_driver(struct rio_driver *);
 struct rio_dev *rio_dev_get(struct rio_dev *);
 void rio_dev_put(struct rio_dev *);
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern void rio_release_dma(struct dma_chan *dchan);
+extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
+		struct rio_dev *rdev, struct dma_chan *dchan,
+		struct rio_dma_data *data, enum dma_data_direction direction,
+		unsigned long flags);
+#endif
+
 /**
  * rio_name - Get the unique RIO device identifier
  * @rdev: RIO device
-- 
1.7.6


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-09-30 21:38 [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Alexandre Bounine
@ 2011-09-30 21:38 ` Alexandre Bounine
  2011-09-30 22:15   ` Andrew Morton
  2011-10-01 18:06   ` Vinod Koul
  2011-10-01 18:01 ` [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Vinod Koul
  1 sibling, 2 replies; 21+ messages in thread
From: Alexandre Bounine @ 2011-09-30 21:38 UTC (permalink / raw)
  To: akpm, linux-kernel, linuxppc-dev
  Cc: Alexandre Bounine, Vinod Koul, Kumar Gala, Matt Porter, Li Yang

Adds support for DMA Engine API.

Includes following changes:
- Modifies BDMA register offset definitions to support per-channel handling
- Separates BDMA channel reserved for RIO Maintenance requests
- Adds DMA Engine callback routines

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
---
 drivers/rapidio/devices/Kconfig      |    8 +
 drivers/rapidio/devices/Makefile     |    1 +
 drivers/rapidio/devices/tsi721.c     |  201 ++++++----
 drivers/rapidio/devices/tsi721.h     |  107 ++++-
 drivers/rapidio/devices/tsi721_dma.c |  802 ++++++++++++++++++++++++++++++++++
 5 files changed, 1029 insertions(+), 90 deletions(-)
 create mode 100644 drivers/rapidio/devices/tsi721_dma.c

diff --git a/drivers/rapidio/devices/Kconfig b/drivers/rapidio/devices/Kconfig
index 12a9d7f..3a2db3d 100644
--- a/drivers/rapidio/devices/Kconfig
+++ b/drivers/rapidio/devices/Kconfig
@@ -8,3 +8,11 @@ config RAPIDIO_TSI721
 	default "n"
 	---help---
 	  Include support for IDT Tsi721 PCI Express Serial RapidIO controller.
+
+config TSI721_DMA
+	bool "IDT Tsi721 RapidIO DMA support"
+	depends on RAPIDIO_TSI721
+	default "n"
+	select RAPIDIO_DMA_ENGINE
+	help
+	  Enable DMA support for IDT Tsi721 PCIe-to-SRIO controller.
diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
index 3b7b4e2..8cbce45 100644
--- a/drivers/rapidio/devices/Makefile
+++ b/drivers/rapidio/devices/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_RAPIDIO_TSI721)	+= tsi721.o
+obj-$(CONFIG_TSI721_DMA) += tsi721_dma.o
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index 5225930..5e893a6 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -108,6 +108,7 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 			u16 destid, u8 hopcount, u32 offset, int len,
 			u32 *data, int do_wr)
 {
+	void __iomem *regs = priv->regs + TSI721_DMAC_BASE(priv->mdma.ch_id);
 	struct tsi721_dma_desc *bd_ptr;
 	u32 rd_count, swr_ptr, ch_stat;
 	int i, err = 0;
@@ -116,10 +117,9 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 	if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32)))
 		return -EINVAL;
 
-	bd_ptr = priv->bdma[TSI721_DMACH_MAINT].bd_base;
+	bd_ptr = priv->mdma.bd_base;
 
-	rd_count = ioread32(
-			priv->regs + TSI721_DMAC_DRDCNT(TSI721_DMACH_MAINT));
+	rd_count = ioread32(regs + TSI721_DMAC_DRDCNT);
 
 	/* Initialize DMA descriptor */
 	bd_ptr[0].type_id = cpu_to_le32((DTYPE2 << 29) | (op << 19) | destid);
@@ -134,19 +134,18 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 	mb();
 
 	/* Start DMA operation */
-	iowrite32(rd_count + 2,
-		priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
-	ioread32(priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+	iowrite32(rd_count + 2,	regs + TSI721_DMAC_DWRCNT);
+	ioread32(regs + TSI721_DMAC_DWRCNT);
 	i = 0;
 
 	/* Wait until DMA transfer is finished */
-	while ((ch_stat = ioread32(priv->regs +
-		TSI721_DMAC_STS(TSI721_DMACH_MAINT))) & TSI721_DMAC_STS_RUN) {
+	while ((ch_stat = ioread32(regs + TSI721_DMAC_STS))
+							& TSI721_DMAC_STS_RUN) {
 		udelay(1);
 		if (++i >= 5000000) {
 			dev_dbg(&priv->pdev->dev,
 				"%s : DMA[%d] read timeout ch_status=%x\n",
-				__func__, TSI721_DMACH_MAINT, ch_stat);
+				__func__, priv->mdma.ch_id, ch_stat);
 			if (!do_wr)
 				*data = 0xffffffff;
 			err = -EIO;
@@ -162,13 +161,10 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 			__func__, ch_stat);
 		dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
 			do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
-		iowrite32(TSI721_DMAC_INT_ALL,
-			priv->regs + TSI721_DMAC_INT(TSI721_DMACH_MAINT));
-		iowrite32(TSI721_DMAC_CTL_INIT,
-			priv->regs + TSI721_DMAC_CTL(TSI721_DMACH_MAINT));
+		iowrite32(TSI721_DMAC_INT_ALL, regs + TSI721_DMAC_INT);
+		iowrite32(TSI721_DMAC_CTL_INIT, regs + TSI721_DMAC_CTL);
 		udelay(10);
-		iowrite32(0, priv->regs +
-				TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+		iowrite32(0, regs + TSI721_DMAC_DWRCNT);
 		udelay(1);
 		if (!do_wr)
 			*data = 0xffffffff;
@@ -184,8 +180,8 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 	 * NOTE: Skipping check and clear FIFO entries because we are waiting
 	 * for transfer to be completed.
 	 */
-	swr_ptr = ioread32(priv->regs + TSI721_DMAC_DSWP(TSI721_DMACH_MAINT));
-	iowrite32(swr_ptr, priv->regs + TSI721_DMAC_DSRP(TSI721_DMACH_MAINT));
+	swr_ptr = ioread32(regs + TSI721_DMAC_DSWP);
+	iowrite32(swr_ptr, regs + TSI721_DMAC_DSRP);
 err_out:
 
 	return err;
@@ -540,6 +536,22 @@ static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
 			tsi721_pw_handler(mport);
 	}
 
+#ifdef CONFIG_TSI721_DMA
+	if (dev_int & TSI721_DEV_INT_BDMA_CH) {
+		int ch;
+
+		if (dev_ch_int & TSI721_INT_BDMA_CHAN_M) {
+			dev_dbg(&priv->pdev->dev,
+				"IRQ from DMA channel 0x%08x\n", dev_ch_int);
+
+			for (ch = 0; ch < TSI721_DMA_MAXCH; ch++) {
+				if (!(dev_ch_int & TSI721_INT_BDMA_CHAN(ch)))
+					continue;
+				tsi721_bdma_handler(&priv->bdma[ch]);
+			}
+		}
+	}
+#endif
 	return IRQ_HANDLED;
 }
 
@@ -552,18 +564,26 @@ static void tsi721_interrupts_init(struct tsi721_device *priv)
 		priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
 	iowrite32(TSI721_SR_CHINT_IDBQRCV,
 		priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
-	iowrite32(TSI721_INT_SR2PC_CHAN(IDB_QUEUE),
-		priv->regs + TSI721_DEV_CHAN_INTE);
 
 	/* Enable SRIO MAC interrupts */
 	iowrite32(TSI721_RIO_EM_DEV_INT_EN_INT,
 		priv->regs + TSI721_RIO_EM_DEV_INT_EN);
 
+	/* Enable interrupts from channels in use */
+#ifdef CONFIG_TSI721_DMA
+	intr = TSI721_INT_SR2PC_CHAN(IDB_QUEUE) |
+		(TSI721_INT_BDMA_CHAN_M &
+		 ~TSI721_INT_BDMA_CHAN(TSI721_DMACH_MAINT));
+#else
+	intr = TSI721_INT_SR2PC_CHAN(IDB_QUEUE);
+#endif
+	iowrite32(intr,	priv->regs + TSI721_DEV_CHAN_INTE);
+
 	if (priv->flags & TSI721_USING_MSIX)
 		intr = TSI721_DEV_INT_SRIO;
 	else
 		intr = TSI721_DEV_INT_SR2PC_CH | TSI721_DEV_INT_SRIO |
-			TSI721_DEV_INT_SMSG_CH;
+			TSI721_DEV_INT_SMSG_CH | TSI721_DEV_INT_BDMA_CH;
 
 	iowrite32(intr, priv->regs + TSI721_DEV_INTE);
 	ioread32(priv->regs + TSI721_DEV_INTE);
@@ -714,12 +734,29 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
 					TSI721_MSIX_OMSG_INT(i);
 	}
 
+#ifdef CONFIG_TSI721_DMA
+	/*
+	 * Initialize MSI-X entries for Block DMA Engine:
+	 * this driver supports XXX DMA channels
+	 * (one is reserved for SRIO maintenance transactions)
+	 */
+	for (i = 0; i < TSI721_DMA_CHNUM; i++) {
+		entries[TSI721_VECT_DMA0_DONE + i].entry =
+					TSI721_MSIX_DMACH_DONE(i);
+		entries[TSI721_VECT_DMA0_INT + i].entry =
+					TSI721_MSIX_DMACH_INT(i);
+	}
+#endif /* CONFIG_TSI721_DMA */
+
 	err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries));
 	if (err) {
 		if (err > 0)
 			dev_info(&priv->pdev->dev,
 				 "Only %d MSI-X vectors available, "
 				 "not using MSI-X\n", err);
+		else
+			dev_err(&priv->pdev->dev,
+				"Failed to enable MSI-X (err=%d)\n", err);
 		return err;
 	}
 
@@ -759,6 +796,22 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
 			 i, pci_name(priv->pdev));
 	}
 
+#ifdef CONFIG_TSI721_DMA
+	for (i = 0; i < TSI721_DMA_CHNUM; i++) {
+		priv->msix[TSI721_VECT_DMA0_DONE + i].vector =
+				entries[TSI721_VECT_DMA0_DONE + i].vector;
+		snprintf(priv->msix[TSI721_VECT_DMA0_DONE + i].irq_name,
+			 IRQ_DEVICE_NAME_MAX, DRV_NAME "-dmad%d@pci:%s",
+			 i, pci_name(priv->pdev));
+
+		priv->msix[TSI721_VECT_DMA0_INT + i].vector =
+				entries[TSI721_VECT_DMA0_INT + i].vector;
+		snprintf(priv->msix[TSI721_VECT_DMA0_INT + i].irq_name,
+			 IRQ_DEVICE_NAME_MAX, DRV_NAME "-dmai%d@pci:%s",
+			 i, pci_name(priv->pdev));
+	}
+#endif /* CONFIG_TSI721_DMA */
+
 	return 0;
 }
 #endif /* CONFIG_PCI_MSI */
@@ -889,20 +942,34 @@ static void tsi721_doorbell_free(struct tsi721_device *priv)
 	priv->idb_base = NULL;
 }
 
-static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
+/**
+ * tsi721_bdma_maint_init - Initialize maintenance request BDMA channel.
+ * @priv: pointer to tsi721 private data
+ *
+ * Initialize BDMA channel allocated for RapidIO maintenance read/write
+ * request generation
+ * Returns %0 on success or %-ENOMEM on failure.
+ */
+static int tsi721_bdma_maint_init(struct tsi721_device *priv)
 {
 	struct tsi721_dma_desc *bd_ptr;
 	u64		*sts_ptr;
 	dma_addr_t	bd_phys, sts_phys;
 	int		sts_size;
-	int		bd_num = priv->bdma[chnum].bd_num;
+	int		bd_num = 2;
+	void __iomem	*regs;
 
-	dev_dbg(&priv->pdev->dev, "Init Block DMA Engine, CH%d\n", chnum);
+	dev_dbg(&priv->pdev->dev,
+		"Init Block DMA Engine for Maintenance requests, CH%d\n",
+		TSI721_DMACH_MAINT);
 
 	/*
 	 * Initialize DMA channel for maintenance requests
 	 */
 
+	priv->mdma.ch_id = TSI721_DMACH_MAINT;
+	regs = priv->regs + TSI721_DMAC_BASE(TSI721_DMACH_MAINT);
+
 	/* Allocate space for DMA descriptors */
 	bd_ptr = dma_alloc_coherent(&priv->pdev->dev,
 					bd_num * sizeof(struct tsi721_dma_desc),
@@ -910,8 +977,9 @@ static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
 	if (!bd_ptr)
 		return -ENOMEM;
 
-	priv->bdma[chnum].bd_phys = bd_phys;
-	priv->bdma[chnum].bd_base = bd_ptr;
+	priv->mdma.bd_num = bd_num;
+	priv->mdma.bd_phys = bd_phys;
+	priv->mdma.bd_base = bd_ptr;
 
 	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
 
@@ -930,13 +998,13 @@ static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
 		dma_free_coherent(&priv->pdev->dev,
 				  bd_num * sizeof(struct tsi721_dma_desc),
 				  bd_ptr, bd_phys);
-		priv->bdma[chnum].bd_base = NULL;
+		priv->mdma.bd_base = NULL;
 		return -ENOMEM;
 	}
 
-	priv->bdma[chnum].sts_phys = sts_phys;
-	priv->bdma[chnum].sts_base = sts_ptr;
-	priv->bdma[chnum].sts_size = sts_size;
+	priv->mdma.sts_phys = sts_phys;
+	priv->mdma.sts_base = sts_ptr;
+	priv->mdma.sts_size = sts_size;
 
 	memset(sts_ptr, 0, sts_size);
 
@@ -951,83 +1019,61 @@ static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
 	bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
 
 	/* Setup DMA descriptor pointers */
-	iowrite32(((u64)bd_phys >> 32),
-		priv->regs + TSI721_DMAC_DPTRH(chnum));
+	iowrite32(((u64)bd_phys >> 32),	regs + TSI721_DMAC_DPTRH);
 	iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
-		priv->regs + TSI721_DMAC_DPTRL(chnum));
+		regs + TSI721_DMAC_DPTRL);
 
 	/* Setup descriptor status FIFO */
-	iowrite32(((u64)sts_phys >> 32),
-		priv->regs + TSI721_DMAC_DSBH(chnum));
+	iowrite32(((u64)sts_phys >> 32), regs + TSI721_DMAC_DSBH);
 	iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
-		priv->regs + TSI721_DMAC_DSBL(chnum));
+		regs + TSI721_DMAC_DSBL);
 	iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
-		priv->regs + TSI721_DMAC_DSSZ(chnum));
+		regs + TSI721_DMAC_DSSZ);
 
 	/* Clear interrupt bits */
-	iowrite32(TSI721_DMAC_INT_ALL,
-		priv->regs + TSI721_DMAC_INT(chnum));
+	iowrite32(TSI721_DMAC_INT_ALL, regs + TSI721_DMAC_INT);
 
-	ioread32(priv->regs + TSI721_DMAC_INT(chnum));
+	ioread32(regs + TSI721_DMAC_INT);
 
 	/* Toggle DMA channel initialization */
-	iowrite32(TSI721_DMAC_CTL_INIT,	priv->regs + TSI721_DMAC_CTL(chnum));
-	ioread32(priv->regs + TSI721_DMAC_CTL(chnum));
+	iowrite32(TSI721_DMAC_CTL_INIT,	regs + TSI721_DMAC_CTL);
+	ioread32(regs + TSI721_DMAC_CTL);
 	udelay(10);
 
 	return 0;
 }
 
-static int tsi721_bdma_ch_free(struct tsi721_device *priv, int chnum)
+static int tsi721_bdma_maint_free(struct tsi721_device *priv)
 {
 	u32 ch_stat;
+	struct tsi721_bdma_maint *mdma = &priv->mdma;
+	void __iomem *regs = priv->regs + TSI721_DMAC_BASE(mdma->ch_id);
 
-	if (priv->bdma[chnum].bd_base == NULL)
+	if (mdma->bd_base == NULL)
 		return 0;
 
 	/* Check if DMA channel still running */
-	ch_stat = ioread32(priv->regs +	TSI721_DMAC_STS(chnum));
+	ch_stat = ioread32(regs + TSI721_DMAC_STS);
 	if (ch_stat & TSI721_DMAC_STS_RUN)
 		return -EFAULT;
 
 	/* Put DMA channel into init state */
-	iowrite32(TSI721_DMAC_CTL_INIT,
-		priv->regs + TSI721_DMAC_CTL(chnum));
+	iowrite32(TSI721_DMAC_CTL_INIT,	regs + TSI721_DMAC_CTL);
 
 	/* Free space allocated for DMA descriptors */
 	dma_free_coherent(&priv->pdev->dev,
-		priv->bdma[chnum].bd_num * sizeof(struct tsi721_dma_desc),
-		priv->bdma[chnum].bd_base, priv->bdma[chnum].bd_phys);
-	priv->bdma[chnum].bd_base = NULL;
+		mdma->bd_num * sizeof(struct tsi721_dma_desc),
+		mdma->bd_base, mdma->bd_phys);
+	mdma->bd_base = NULL;
 
 	/* Free space allocated for status FIFO */
 	dma_free_coherent(&priv->pdev->dev,
-		priv->bdma[chnum].sts_size * sizeof(struct tsi721_dma_sts),
-		priv->bdma[chnum].sts_base, priv->bdma[chnum].sts_phys);
-	priv->bdma[chnum].sts_base = NULL;
-	return 0;
-}
-
-static int tsi721_bdma_init(struct tsi721_device *priv)
-{
-	/* Initialize BDMA channel allocated for RapidIO maintenance read/write
-	 * request generation
-	 */
-	priv->bdma[TSI721_DMACH_MAINT].bd_num = 2;
-	if (tsi721_bdma_ch_init(priv, TSI721_DMACH_MAINT)) {
-		dev_err(&priv->pdev->dev, "Unable to initialize maintenance DMA"
-			" channel %d, aborting\n", TSI721_DMACH_MAINT);
-		return -ENOMEM;
-	}
-
+		mdma->sts_size * sizeof(struct tsi721_dma_sts),
+		mdma->sts_base, mdma->sts_phys);
+	mdma->sts_base = NULL;
 	return 0;
 }
 
-static void tsi721_bdma_free(struct tsi721_device *priv)
-{
-	tsi721_bdma_ch_free(priv, TSI721_DMACH_MAINT);
-}
-
 /* Enable Inbound Messaging Interrupts */
 static void
 tsi721_imsg_interrupt_enable(struct tsi721_device *priv, int ch,
@@ -2043,7 +2089,8 @@ static void tsi721_disable_ints(struct tsi721_device *priv)
 
 	/* Disable all BDMA Channel interrupts */
 	for (ch = 0; ch < TSI721_DMA_MAXCH; ch++)
-		iowrite32(0, priv->regs + TSI721_DMAC_INTE(ch));
+		iowrite32(0,
+			priv->regs + TSI721_DMAC_BASE(ch) + TSI721_DMAC_INTE);
 
 	/* Disable all general BDMA interrupts */
 	iowrite32(0, priv->regs + TSI721_BDMA_INTE);
@@ -2292,7 +2339,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
 	tsi721_init_pc2sr_mapping(priv);
 	tsi721_init_sr2pc_mapping(priv);
 
-	if (tsi721_bdma_init(priv)) {
+	if (tsi721_bdma_maint_init(priv)) {
 		dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
 		err = -ENOMEM;
 		goto err_unmap_bars;
@@ -2312,12 +2359,16 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_free_consistent;
 
+#ifdef CONFIG_TSI721_DMA
+	tsi721_register_dma(priv);
+#endif
+
 	return 0;
 
 err_free_consistent:
 	tsi721_doorbell_free(priv);
 err_free_bdma:
-	tsi721_bdma_free(priv);
+	tsi721_bdma_maint_free(priv);
 err_unmap_bars:
 	if (priv->regs)
 		iounmap(priv->regs);
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 58be4de..2f756dc 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -21,6 +21,10 @@
 #ifndef __TSI721_H
 #define __TSI721_H
 
+#ifdef CONFIG_TSI721_DMA
+#include <linux/dmaengine.h>
+#endif
+
 #define DRV_NAME	"tsi721"
 
 #define DEFAULT_HOPCOUNT	0xff
@@ -165,6 +169,8 @@
 #define TSI721_DEV_INTE		0x29840
 #define TSI721_DEV_INT		0x29844
 #define TSI721_DEV_INTSET	0x29848
+#define TSI721_DEV_INT_BDMA_CH	0x00002000
+#define TSI721_DEV_INT_BDMA_NCH	0x00001000
 #define TSI721_DEV_INT_SMSG_CH	0x00000800
 #define TSI721_DEV_INT_SMSG_NCH	0x00000400
 #define TSI721_DEV_INT_SR2PC_CH	0x00000200
@@ -179,6 +185,8 @@
 #define TSI721_INT_IMSG_CHAN(x)	(1 << (16 + (x)))
 #define TSI721_INT_OMSG_CHAN_M	0x0000ff00
 #define TSI721_INT_OMSG_CHAN(x)	(1 << (8 + (x)))
+#define TSI721_INT_BDMA_CHAN_M	0x000000ff
+#define TSI721_INT_BDMA_CHAN(x)	(1 << (x))
 
 /*
  * PC2SR block registers
@@ -233,14 +241,16 @@
  *   x = 0..7
  */
 
-#define TSI721_DMAC_DWRCNT(x)	(0x51000 + (x) * 0x1000)
-#define TSI721_DMAC_DRDCNT(x)	(0x51004 + (x) * 0x1000)
+#define TSI721_DMAC_BASE(x)	(0x51000 + (x) * 0x1000)
+
+#define TSI721_DMAC_DWRCNT	0x000
+#define TSI721_DMAC_DRDCNT	0x004
 
-#define TSI721_DMAC_CTL(x)	(0x51008 + (x) * 0x1000)
+#define TSI721_DMAC_CTL		0x008
 #define TSI721_DMAC_CTL_SUSP	0x00000002
 #define TSI721_DMAC_CTL_INIT	0x00000001
 
-#define TSI721_DMAC_INT(x)	(0x5100c + (x) * 0x1000)
+#define TSI721_DMAC_INT		0x00c
 #define TSI721_DMAC_INT_STFULL	0x00000010
 #define TSI721_DMAC_INT_DONE	0x00000008
 #define TSI721_DMAC_INT_SUSP	0x00000004
@@ -248,34 +258,33 @@
 #define TSI721_DMAC_INT_IOFDONE	0x00000001
 #define TSI721_DMAC_INT_ALL	0x0000001f
 
-#define TSI721_DMAC_INTSET(x)	(0x51010 + (x) * 0x1000)
+#define TSI721_DMAC_INTSET	0x010
 
-#define TSI721_DMAC_STS(x)	(0x51014 + (x) * 0x1000)
+#define TSI721_DMAC_STS		0x014
 #define TSI721_DMAC_STS_ABORT	0x00400000
 #define TSI721_DMAC_STS_RUN	0x00200000
 #define TSI721_DMAC_STS_CS	0x001f0000
 
-#define TSI721_DMAC_INTE(x)	(0x51018 + (x) * 0x1000)
+#define TSI721_DMAC_INTE	0x018
 
-#define TSI721_DMAC_DPTRL(x)	(0x51024 + (x) * 0x1000)
+#define TSI721_DMAC_DPTRL	0x024
 #define TSI721_DMAC_DPTRL_MASK	0xffffffe0
 
-#define TSI721_DMAC_DPTRH(x)	(0x51028 + (x) * 0x1000)
+#define TSI721_DMAC_DPTRH	0x028
 
-#define TSI721_DMAC_DSBL(x)	(0x5102c + (x) * 0x1000)
+#define TSI721_DMAC_DSBL	0x02c
 #define TSI721_DMAC_DSBL_MASK	0xffffffc0
 
-#define TSI721_DMAC_DSBH(x)	(0x51030 + (x) * 0x1000)
+#define TSI721_DMAC_DSBH	0x030
 
-#define TSI721_DMAC_DSSZ(x)	(0x51034 + (x) * 0x1000)
+#define TSI721_DMAC_DSSZ	0x034
 #define TSI721_DMAC_DSSZ_SIZE_M	0x0000000f
 #define TSI721_DMAC_DSSZ_SIZE(size)	(__fls(size) - 4)
 
-
-#define TSI721_DMAC_DSRP(x)	(0x51038 + (x) * 0x1000)
+#define TSI721_DMAC_DSRP	0x038
 #define TSI721_DMAC_DSRP_MASK	0x0007ffff
 
-#define TSI721_DMAC_DSWP(x)	(0x5103c + (x) * 0x1000)
+#define TSI721_DMAC_DSWP	0x03c
 #define TSI721_DMAC_DSWP_MASK	0x0007ffff
 
 #define TSI721_BDMA_INTE	0x5f000
@@ -624,7 +633,48 @@ enum tsi721_smsg_int_flag {
 
 /* Structures */
 
+#ifdef CONFIG_TSI721_DMA
+
+struct tsi721_tx_desc {
+	struct dma_async_tx_descriptor	txd;
+	struct tsi721_dma_desc		*hw_desc;
+	u16				destid;
+	/* low 64-bits of 66-bit RIO address */
+	u64				rio_addr;
+	/* upper 2-bits of 66-bit RIO address */
+	u8				rio_addr_u;
+	bool				interrupt;
+	struct list_head		desc_node;
+	struct list_head		tx_list;
+};
+
 struct tsi721_bdma_chan {
+	int		id;
+	void __iomem	*regs;
+	int		bd_num;		/* number of buffer descriptors */
+	void		*bd_base;	/* start of DMA descriptors */
+	dma_addr_t	bd_phys;
+	void		*sts_base;	/* start of DMA BD status FIFO */
+	dma_addr_t	sts_phys;
+	int		sts_size;
+	u32		sts_rdptr;
+	u32		wr_count;
+	u32		wr_count_next;
+
+	struct dma_chan		dchan;
+	struct tsi721_tx_desc	*tx_desc;
+	spinlock_t		lock;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	dma_cookie_t		completed_cookie;
+	struct tasklet_struct	tasklet;
+};
+
+#endif /* CONFIG_TSI721_DMA */
+
+struct tsi721_bdma_maint {
+	int		ch_id;		/* BDMA channel number */
 	int		bd_num;		/* number of buffer descriptors */
 	void		*bd_base;	/* start of DMA descriptors */
 	dma_addr_t	bd_phys;
@@ -719,6 +769,24 @@ enum tsi721_msix_vect {
 	TSI721_VECT_IMB1_INT,
 	TSI721_VECT_IMB2_INT,
 	TSI721_VECT_IMB3_INT,
+#ifdef CONFIG_TSI721_DMA
+	TSI721_VECT_DMA0_DONE,
+	TSI721_VECT_DMA1_DONE,
+	TSI721_VECT_DMA2_DONE,
+	TSI721_VECT_DMA3_DONE,
+	TSI721_VECT_DMA4_DONE,
+	TSI721_VECT_DMA5_DONE,
+	TSI721_VECT_DMA6_DONE,
+	TSI721_VECT_DMA7_DONE,
+	TSI721_VECT_DMA0_INT,
+	TSI721_VECT_DMA1_INT,
+	TSI721_VECT_DMA2_INT,
+	TSI721_VECT_DMA3_INT,
+	TSI721_VECT_DMA4_INT,
+	TSI721_VECT_DMA5_INT,
+	TSI721_VECT_DMA6_INT,
+	TSI721_VECT_DMA7_INT,
+#endif /* CONFIG_TSI721_DMA */
 	TSI721_VECT_MAX
 };
 
@@ -752,7 +820,11 @@ struct tsi721_device {
 	u32		pw_discard_count;
 
 	/* BDMA Engine */
+	struct tsi721_bdma_maint mdma; /* Maintenance rd/wr request channel */
+
+#ifdef CONFIG_TSI721_DMA
 	struct tsi721_bdma_chan bdma[TSI721_DMA_CHNUM];
+#endif
 
 	/* Inbound Messaging */
 	int		imsg_init[TSI721_IMSG_CHNUM];
@@ -763,4 +835,9 @@ struct tsi721_device {
 	struct tsi721_omsg_ring	omsg_ring[TSI721_OMSG_CHNUM];
 };
 
+#ifdef CONFIG_TSI721_DMA
+extern void tsi721_bdma_handler(struct tsi721_bdma_chan *chan);
+extern int __devinit tsi721_register_dma(struct tsi721_device *priv);
+#endif
+
 #endif
diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
new file mode 100644
index 0000000..75a27a0
--- /dev/null
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -0,0 +1,802 @@
+/*
+ * DMA Engine support for Tsi721 PCIExpress-to-SRIO bridge
+ *
+ * Copyright 2011 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/kfifo.h>
+#include <linux/delay.h>
+
+#include "tsi721.h"
+
+static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct tsi721_bdma_chan, dchan);
+}
+
+static inline struct tsi721_device *to_tsi721(struct dma_device *ddev)
+{
+	return container_of(ddev, struct rio_mport, dma)->priv;
+}
+
+static inline
+struct tsi721_tx_desc *to_tsi721_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct tsi721_tx_desc, txd);
+}
+
+static inline
+struct tsi721_tx_desc *tsi721_dma_first_active(struct tsi721_bdma_chan *chan)
+{
+	return list_first_entry(&chan->active_list,
+				struct tsi721_tx_desc, desc_node);
+}
+
+static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *chan)
+{
+	struct tsi721_dma_desc *bd_ptr;
+	struct device *dev = chan->dchan.device->dev;
+	u64		*sts_ptr;
+	dma_addr_t	bd_phys;
+	dma_addr_t	sts_phys;
+	int		sts_size;
+	int		bd_num = chan->bd_num;
+
+	dev_dbg(dev, "Init Block DMA Engine, CH%d\n", chan->id);
+
+	/* Allocate space for DMA descriptors */
+	bd_ptr = dma_alloc_coherent(dev,
+				bd_num * sizeof(struct tsi721_dma_desc),
+				&bd_phys, GFP_KERNEL);
+	if (!bd_ptr)
+		return -ENOMEM;
+
+	chan->bd_phys = bd_phys;
+	chan->bd_base = bd_ptr;
+
+	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
+
+	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
+		bd_ptr, (unsigned long long)bd_phys);
+
+	/* Allocate space for descriptor status FIFO */
+	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
+					bd_num : TSI721_DMA_MINSTSSZ;
+	sts_size = roundup_pow_of_two(sts_size);
+	sts_ptr = dma_alloc_coherent(dev,
+				     sts_size * sizeof(struct tsi721_dma_sts),
+				     &sts_phys, GFP_KERNEL);
+	if (!sts_ptr) {
+		/* Free space allocated for DMA descriptors */
+		dma_free_coherent(dev,
+				  bd_num * sizeof(struct tsi721_dma_desc),
+				  bd_ptr, bd_phys);
+		chan->bd_base = NULL;
+		return -ENOMEM;
+	}
+
+	chan->sts_phys = sts_phys;
+	chan->sts_base = sts_ptr;
+	chan->sts_size = sts_size;
+
+	memset(sts_ptr, 0, sts_size);
+
+	dev_dbg(dev,
+		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
+		sts_ptr, (unsigned long long)sts_phys, sts_size);
+
+	/* Initialize DMA descriptors ring */
+	bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
+	bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
+						 TSI721_DMAC_DPTRL_MASK);
+	bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
+
+	/* Setup DMA descriptor pointers */
+	iowrite32(((u64)bd_phys >> 32),
+		chan->regs + TSI721_DMAC_DPTRH);
+	iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
+		chan->regs + TSI721_DMAC_DPTRL);
+
+	/* Setup descriptor status FIFO */
+	iowrite32(((u64)sts_phys >> 32),
+		chan->regs + TSI721_DMAC_DSBH);
+	iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
+		chan->regs + TSI721_DMAC_DSBL);
+	iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
+		chan->regs + TSI721_DMAC_DSSZ);
+
+	/* Clear interrupt bits */
+	iowrite32(TSI721_DMAC_INT_ALL,
+		chan->regs + TSI721_DMAC_INT);
+
+	ioread32(chan->regs + TSI721_DMAC_INT);
+
+	/* Toggle DMA channel initialization */
+	iowrite32(TSI721_DMAC_CTL_INIT,	chan->regs + TSI721_DMAC_CTL);
+	ioread32(chan->regs + TSI721_DMAC_CTL);
+	chan->wr_count = chan->wr_count_next = 0;
+	chan->sts_rdptr = 0;
+	udelay(10);
+
+	return 0;
+}
+
+static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *chan)
+{
+	u32 ch_stat;
+
+	if (chan->bd_base == NULL)
+		return 0;
+
+	/* Check if DMA channel still running */
+	ch_stat = ioread32(chan->regs +	TSI721_DMAC_STS);
+	if (ch_stat & TSI721_DMAC_STS_RUN)
+		return -EFAULT;
+
+	/* Put DMA channel into init state */
+	iowrite32(TSI721_DMAC_CTL_INIT,	chan->regs + TSI721_DMAC_CTL);
+
+	/* Free space allocated for DMA descriptors */
+	dma_free_coherent(chan->dchan.device->dev,
+		chan->bd_num * sizeof(struct tsi721_dma_desc),
+		chan->bd_base, chan->bd_phys);
+	chan->bd_base = NULL;
+
+	/* Free space allocated for status FIFO */
+	dma_free_coherent(chan->dchan.device->dev,
+		chan->sts_size * sizeof(struct tsi721_dma_sts),
+		chan->sts_base, chan->sts_phys);
+	chan->sts_base = NULL;
+	return 0;
+}
+
+static
+void tsi721_bdma_interrupt_enable(struct tsi721_bdma_chan *chan, int enable)
+{
+	if (enable) {
+		/* Clear pending BDMA channel interrupts */
+		iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INT);
+		ioread32(chan->regs + TSI721_DMAC_INT);
+		/* Enable BDMA channel interrupts */
+		iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INTE);
+	} else {
+		/* Disable BDMA channel interrupts */
+		iowrite32(0, chan->regs + TSI721_DMAC_INTE);
+		/* Clear pending BDMA channel interrupts */
+		iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INT);
+	}
+
+}
+
+static bool tsi721_dma_is_idle(struct tsi721_bdma_chan *chan)
+{
+	u32 sts;
+
+	sts = ioread32(chan->regs + TSI721_DMAC_STS);
+	return ((sts & TSI721_DMAC_STS_RUN) == 0);
+}
+
+void tsi721_bdma_handler(struct tsi721_bdma_chan *chan)
+{
+	/* Disable BDMA channel interrupts */
+	iowrite32(0, chan->regs + TSI721_DMAC_INTE);
+
+	tasklet_schedule(&chan->tasklet);
+}
+
+#ifdef CONFIG_PCI_MSI
+/**
+ * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles outbound messaging interrupts signaled using MSI-X.
+ */
+static irqreturn_t tsi721_bdma_msix(int irq, void *ptr)
+{
+	struct tsi721_bdma_chan *chan = ptr;
+
+	tsi721_bdma_handler(chan);
+	return IRQ_HANDLED;
+}
+#endif /* CONFIG_PCI_MSI */
+
+/* Must be called with the spinlock held */
+static void tsi721_start_dma(struct tsi721_bdma_chan *chan)
+{
+	if (!tsi721_dma_is_idle(chan)) {
+		dev_err(chan->dchan.device->dev,
+			"BUG: Attempt to start non-idle channel\n");
+		return;
+	}
+
+	if (chan->wr_count == chan->wr_count_next) {
+		dev_err(chan->dchan.device->dev,
+			"BUG: Attempt to start DMA with no BDs ready\n");
+		return;
+	}
+
+	dev_dbg(chan->dchan.device->dev,
+		"tx_chan: %p, chan: %d, regs: %p\n",
+		chan, chan->dchan.chan_id, chan->regs);
+
+	iowrite32(chan->wr_count_next, chan->regs + TSI721_DMAC_DWRCNT);
+	ioread32(chan->regs + TSI721_DMAC_DWRCNT);
+
+	chan->wr_count = chan->wr_count_next;
+}
+
+static void tsi721_desc_put(struct tsi721_bdma_chan *chan,
+			    struct tsi721_tx_desc *desc)
+{
+	dev_dbg(chan->dchan.device->dev, "Put desc: %p into free list\n", desc);
+
+	if (desc) {
+		spin_lock_bh(&chan->lock);
+		list_splice_init(&desc->tx_list, &chan->free_list);
+		list_add(&desc->desc_node, &chan->free_list);
+		chan->wr_count_next = chan->wr_count;
+		spin_unlock_bh(&chan->lock);
+	}
+}
+
+static struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *chan)
+{
+	struct tsi721_tx_desc *tx_desc, *_tx_desc;
+	struct tsi721_tx_desc *ret = NULL;
+	int i;
+
+	spin_lock_bh(&chan->lock);
+	list_for_each_entry_safe(tx_desc, _tx_desc,
+				 &chan->free_list, desc_node) {
+		if (async_tx_test_ack(&tx_desc->txd)) {
+			list_del(&tx_desc->desc_node);
+			ret = tx_desc;
+			break;
+		}
+		dev_dbg(chan->dchan.device->dev,
+			"desc %p not ACKed\n", tx_desc);
+	}
+
+	i = chan->wr_count_next % chan->bd_num;
+	if (i == chan->bd_num - 1) {
+		i = 0;
+		chan->wr_count_next++; /* skip link descriptor */
+	}
+
+	chan->wr_count_next++;
+	tx_desc->txd.phys = chan->bd_phys + i * sizeof(struct tsi721_dma_desc);
+	tx_desc->hw_desc = &((struct tsi721_dma_desc *)chan->bd_base)[i];
+
+	spin_unlock_bh(&chan->lock);
+
+	return ret;
+}
+
+static
+int tsi721_fill_desc(struct tsi721_bdma_chan *chan, struct tsi721_tx_desc *desc,
+	struct scatterlist *sg, enum dma_rtype rtype, u32 sys_size)
+{
+	struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
+	u64 rio_addr;
+
+	if (sg_dma_len(sg) > TSI721_DMAD_BCOUNT1 + 1) {
+		dev_err(chan->dchan.device->dev, "SG element is too large\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(chan->dchan.device->dev,
+		"desc: 0x%llx, addr: 0x%llx len: 0x%x\n",
+		(u64)desc->txd.phys, (unsigned long long)sg_dma_address(sg),
+		sg_dma_len(sg));
+
+	dev_dbg(chan->dchan.device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
+		bd_ptr, desc->destid, desc->rio_addr);
+
+	/* Initialize DMA descriptor */
+	bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
+					(rtype << 19) | desc->destid);
+	if (desc->interrupt)
+		bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
+	bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
+					(sys_size << 26) | sg_dma_len(sg));
+	rio_addr = (desc->rio_addr >> 2) |
+				((u64)(desc->rio_addr_u & 0x3) << 62);
+	bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff);
+	bd_ptr->raddr_hi = cpu_to_le32(rio_addr >> 32);
+	bd_ptr->t1.bufptr_lo = cpu_to_le32(
+					(u64)sg_dma_address(sg) & 0xffffffff);
+	bd_ptr->t1.bufptr_hi = cpu_to_le32((u64)sg_dma_address(sg) >> 32);
+	bd_ptr->t1.s_dist = 0;
+	bd_ptr->t1.s_size = 0;
+
+	mb();
+
+	return 0;
+}
+
+static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *chan,
+				      struct tsi721_tx_desc *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->txd;
+	dma_async_tx_callback callback = txd->callback;
+	void *param = txd->callback_param;
+
+	list_splice_init(&desc->tx_list, &chan->free_list);
+	list_move(&desc->desc_node, &chan->free_list);
+	chan->completed_cookie = txd->cookie;
+
+	if (callback)
+		callback(param);
+}
+
+static void tsi721_dma_complete_all(struct tsi721_bdma_chan *chan)
+{
+	struct tsi721_tx_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	BUG_ON(!tsi721_dma_is_idle(chan));
+
+	if (!list_empty(&chan->queue))
+		tsi721_start_dma(chan);
+
+	list_splice_init(&chan->active_list, &list);
+	list_splice_init(&chan->queue, &chan->active_list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		tsi721_dma_chain_complete(chan, desc);
+}
+
+static void tsi721_clr_stat(struct tsi721_bdma_chan *chan)
+{
+	u32 srd_ptr;
+	u64 *sts_ptr;
+	int i, j;
+
+	/* Check and clear descriptor status FIFO entries */
+	srd_ptr = chan->sts_rdptr;
+	sts_ptr = chan->sts_base;
+	j = srd_ptr * 8;
+	while (sts_ptr[j]) {
+		for (i = 0; i < 8 && sts_ptr[j]; i++, j++)
+			sts_ptr[j] = 0;
+
+		++srd_ptr;
+		srd_ptr %= chan->sts_size;
+		j = srd_ptr * 8;
+	}
+
+	iowrite32(srd_ptr, chan->regs + TSI721_DMAC_DSRP);
+	chan->sts_rdptr = srd_ptr;
+}
+
+static void tsi721_advance_work(struct tsi721_bdma_chan *chan)
+{
+	if (list_empty(&chan->active_list) ||
+		list_is_singular(&chan->active_list)) {
+		dev_dbg(chan->dchan.device->dev,
+			"%s: Active_list empty\n", __func__);
+		tsi721_dma_complete_all(chan);
+	} else {
+		dev_dbg(chan->dchan.device->dev,
+			"%s: Active_list NOT empty\n", __func__);
+		tsi721_dma_chain_complete(chan, tsi721_dma_first_active(chan));
+		tsi721_start_dma(chan);
+	}
+}
+
+static void tsi721_dma_tasklet(unsigned long data)
+{
+	struct tsi721_bdma_chan *chan = (struct tsi721_bdma_chan *)data;
+	u32 dmac_int, dmac_sts;
+
+	dmac_int = ioread32(chan->regs + TSI721_DMAC_INT);
+	dev_dbg(chan->dchan.device->dev, "%s: DMAC%d_INT = 0x%x\n",
+		__func__, chan->id, dmac_int);
+	/* Clear channel interrupts */
+	iowrite32(dmac_int, chan->regs + TSI721_DMAC_INT);
+
+	if (dmac_int & TSI721_DMAC_INT_ERR) {
+		dmac_sts = ioread32(chan->regs + TSI721_DMAC_STS);
+		dev_err(chan->dchan.device->dev,
+			"%s: DMA ERROR - DMAC%d_STS = 0x%x\n",
+			__func__, chan->id, dmac_sts);
+	}
+
+	if (dmac_int & TSI721_DMAC_INT_STFULL) {
+		dev_err(chan->dchan.device->dev,
+			"%s: DMAC%d descriptor status FIFO is full\n",
+			__func__, chan->id);
+	}
+
+	if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
+		tsi721_clr_stat(chan);
+		spin_lock(&chan->lock);
+		tsi721_advance_work(chan);
+		spin_unlock(&chan->lock);
+	}
+
+	/* Re-Enable BDMA channel interrupts */
+	iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INTE);
+}
+
+static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct tsi721_tx_desc *desc = to_tsi721_desc(txd);
+	struct tsi721_bdma_chan *chan = to_tsi721_chan(txd->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&chan->lock);
+
+	cookie = txd->chan->cookie;
+	if (++cookie < 0)
+		cookie = 1;
+	txd->chan->cookie = cookie;
+	txd->cookie = cookie;
+
+	if (list_empty(&chan->active_list)) {
+		list_add_tail(&desc->desc_node, &chan->active_list);
+		tsi721_start_dma(chan);
+	} else {
+		list_add_tail(&desc->desc_node, &chan->queue);
+	}
+
+	spin_unlock_bh(&chan->lock);
+	return cookie;
+}
+
+static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
+	struct tsi721_device *priv = to_tsi721(dchan->device);
+	struct tsi721_tx_desc *desc = NULL;
+	LIST_HEAD(tmp_list);
+	int i;
+	int rc;
+
+	if (chan->bd_base)
+		return chan->bd_num - 1;
+
+	/* Initialize BDMA channel */
+	if (tsi721_bdma_ch_init(chan)) {
+		dev_err(dchan->device->dev, "Unable to initialize data DMA"
+			" channel %d, aborting\n", chan->id);
+		return -ENOMEM;
+	}
+
+	/* Allocate matching number of logical descriptors */
+	desc = kzalloc((chan->bd_num - 1) * sizeof(struct tsi721_tx_desc),
+			GFP_KERNEL);
+	if (!desc) {
+		dev_err(dchan->device->dev,
+			"Failed to allocate logical descriptors\n");
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	chan->tx_desc = desc;
+
+	for (i = 0; i < chan->bd_num - 1; i++) {
+		dma_async_tx_descriptor_init(&desc[i].txd, dchan);
+		desc[i].txd.tx_submit = tsi721_tx_submit;
+		desc[i].txd.flags = DMA_CTRL_ACK;
+		INIT_LIST_HEAD(&desc[i].tx_list);
+		list_add_tail(&desc[i].desc_node, &tmp_list);
+	}
+
+	spin_lock_bh(&chan->lock);
+	list_splice(&tmp_list, &chan->free_list);
+	chan->completed_cookie = dchan->cookie = 1;
+	spin_unlock_bh(&chan->lock);
+
+#ifdef CONFIG_PCI_MSI
+	if (priv->flags & TSI721_USING_MSIX) {
+		/* Request interrupt service if we are in MSI-X mode */
+		rc = request_irq(
+			priv->msix[TSI721_VECT_DMA0_DONE + chan->id].vector,
+			tsi721_bdma_msix, 0,
+			priv->msix[TSI721_VECT_DMA0_DONE + chan->id].irq_name,
+			(void *)chan);
+
+		if (rc) {
+			dev_dbg(dchan->device->dev,
+				"Unable to allocate MSI-X interrupt for "
+				"BDMA%d-DONE\n", chan->id);
+			goto err_out;
+		}
+
+		rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
+					    chan->id].vector,
+			tsi721_bdma_msix, 0,
+			priv->msix[TSI721_VECT_DMA0_INT + chan->id].irq_name,
+			(void *)chan);
+
+		if (rc)	{
+			dev_dbg(dchan->device->dev,
+				"Unable to allocate MSI-X interrupt for "
+				"BDMA%d-INT\n", chan->id);
+			free_irq(
+				priv->msix[TSI721_VECT_DMA0_DONE +
+					   chan->id].vector,
+				(void *)chan);
+			rc = -EIO;
+			goto err_out;
+		}
+	}
+#endif /* CONFIG_PCI_MSI */
+
+	tsi721_bdma_interrupt_enable(chan, 1);
+
+	return chan->bd_num - 1;
+
+err_out:
+	kfree(desc);
+	tsi721_bdma_ch_free(chan);
+	return rc;
+}
+
+static void tsi721_free_chan_resources(struct dma_chan *dchan)
+{
+	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
+	struct tsi721_device *priv = to_tsi721(dchan->device);
+	LIST_HEAD(list);
+
+	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+
+	BUG_ON(!list_empty(&chan->active_list));
+	BUG_ON(!list_empty(&chan->queue));
+
+	spin_lock_irq(&chan->lock);
+	list_splice_init(&chan->free_list, &list);
+	spin_unlock_irq(&chan->lock);
+
+	tsi721_bdma_interrupt_enable(chan, 0);
+
+#ifdef CONFIG_PCI_MSI
+	if (priv->flags & TSI721_USING_MSIX) {
+		free_irq(priv->msix[TSI721_VECT_DMA0_DONE + chan->id].vector,
+			 (void *)chan);
+		free_irq(priv->msix[TSI721_VECT_DMA0_INT + chan->id].vector,
+			 (void *)chan);
+	}
+#endif /* CONFIG_PCI_MSI */
+
+	tsi721_bdma_ch_free(chan);
+	kfree(chan->tx_desc);
+}
+
+static
+enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+				 struct dma_tx_state *txstate)
+{
+	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
+	dma_cookie_t		last_used;
+	dma_cookie_t		last_completed;
+	int			ret;
+
+	spin_lock_irq(&bdma_chan->lock);
+	last_completed = bdma_chan->completed_cookie;
+	last_used = dchan->cookie;
+	spin_unlock_irq(&bdma_chan->lock);
+
+	ret = dma_async_is_complete(cookie, last_completed, last_used);
+
+	dma_set_tx_state(txstate, last_completed, last_used, 0);
+
+	dev_dbg(dchan->device->dev,
+		"%s: exit, ret: %d, last_completed: %d, last_used: %d\n",
+		__func__, ret, last_completed, last_used);
+
+	return ret;
+}
+
+static void tsi721_issue_pending(struct dma_chan *dchan)
+{
+	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
+
+	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+
+	if (tsi721_dma_is_idle(chan)) {
+		spin_lock_bh(&chan->lock);
+		tsi721_advance_work(chan);
+		spin_unlock_bh(&chan->lock);
+	} else
+		dev_dbg(dchan->device->dev,
+			"%s: DMA channel still busy\n", __func__);
+}
+
+static
+struct dma_async_tx_descriptor *tsi721_prep_slave_sg(struct dma_chan *dchan,
+			struct scatterlist *sgl, unsigned int sg_len,
+			enum dma_data_direction direction, unsigned long flags)
+{
+	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
+	struct tsi721_tx_desc *desc = NULL;
+	struct tsi721_tx_desc *first = NULL;
+	struct scatterlist *sg;
+	struct rio_dma_ext *rext = dchan->private;
+	u64 rio_addr = rext->rio_addr; /* FIXME: assuming 64-bit rio_addr for now */
+	unsigned int i;
+	u32 sys_size = dma_to_mport(dchan->device)->sys_size;
+	enum dma_rtype rtype;
+
+	if (!sgl || !sg_len) {
+		dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
+		return NULL;
+	}
+
+	if (direction == DMA_FROM_DEVICE)
+		rtype = NREAD;
+	else if (direction == DMA_TO_DEVICE) {
+		switch (rext->wr_type) {
+		case RDW_ALL_NWRITE:
+			rtype = ALL_NWRITE;
+			break;
+		case RDW_ALL_NWRITE_R:
+			rtype = ALL_NWRITE_R;
+			break;
+		case RDW_LAST_NWRITE_R:
+		default:
+			rtype = LAST_NWRITE_R;
+			break;
+		}
+	} else {
+		dev_err(dchan->device->dev,
+			"%s: Unsupported DMA direction option\n", __func__);
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		int err;
+
+		dev_dbg(dchan->device->dev, "%s: sg #%d\n", __func__, i);
+		desc = tsi721_desc_get(bdma_chan);
+		if (!desc) {
+			dev_err(dchan->device->dev,
+				"Not enough descriptors available\n");
+			goto err_desc_get;
+		}
+
+		if (sg_is_last(sg))
+			desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
+		else
+			desc->interrupt = false;
+
+		desc->destid = rext->destid;
+		desc->rio_addr = rio_addr;
+		desc->rio_addr_u = 0;
+
+		err = tsi721_fill_desc(bdma_chan, desc, sg, rtype, sys_size);
+		if (err) {
+			dev_err(dchan->device->dev,
+				"Failed to build desc: %d\n", err);
+			goto err_desc_get;
+		}
+
+		rio_addr += sg_dma_len(sg);
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->desc_node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	desc->txd.flags = flags;
+
+	return &first->txd;
+
+err_desc_get:
+	tsi721_desc_put(bdma_chan, first);
+	return NULL;
+}
+
+static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
+			     unsigned long arg)
+{
+	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
+	struct tsi721_tx_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	spin_lock_bh(&chan->lock);
+
+	/* make sure to stop the transfer */
+	iowrite32(TSI721_DMAC_CTL_SUSP, chan->regs + TSI721_DMAC_CTL);
+
+	list_splice_init(&chan->active_list, &list);
+	list_splice_init(&chan->queue, &list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		tsi721_dma_chain_complete(chan, desc);
+
+	spin_unlock_bh(&chan->lock);
+
+	return 0;
+}
+
+int __devinit tsi721_register_dma(struct tsi721_device *priv)
+{
+	int i;
+	int nr_channels = TSI721_DMA_MAXCH;
+	int err;
+	struct rio_mport *mport = priv->mport;
+
+	mport->dma.dev = &priv->pdev->dev;
+	mport->dma.chancnt = nr_channels;
+
+	INIT_LIST_HEAD(&mport->dma.channels);
+
+	for (i = 0; i < nr_channels; i++) {
+		struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i];
+
+		if (i == TSI721_DMACH_MAINT)
+			continue;
+
+		bdma_chan->bd_num = 64;
+		bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
+
+		bdma_chan->dchan.device = &mport->dma;
+		bdma_chan->dchan.cookie = 1;
+		bdma_chan->dchan.chan_id = i;
+		bdma_chan->id = i;
+
+		spin_lock_init(&bdma_chan->lock);
+
+		INIT_LIST_HEAD(&bdma_chan->active_list);
+		INIT_LIST_HEAD(&bdma_chan->queue);
+		INIT_LIST_HEAD(&bdma_chan->free_list);
+
+		tasklet_init(&bdma_chan->tasklet, tsi721_dma_tasklet,
+			     (unsigned long)bdma_chan);
+		list_add_tail(&bdma_chan->dchan.device_node,
+			      &mport->dma.channels);
+	}
+
+	dma_cap_zero(mport->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask);
+	dma_cap_set(DMA_RAPIDIO, mport->dma.cap_mask);
+
+	mport->dma.device_alloc_chan_resources = tsi721_alloc_chan_resources;
+	mport->dma.device_free_chan_resources = tsi721_free_chan_resources;
+	mport->dma.device_tx_status = tsi721_tx_status;
+	mport->dma.device_issue_pending = tsi721_issue_pending;
+	mport->dma.device_prep_slave_sg = tsi721_prep_slave_sg;
+	mport->dma.device_control = tsi721_device_control;
+
+	err = dma_async_device_register(&mport->dma);
+	if (err)
+		dev_err(&priv->pdev->dev, "Failed to register DMA device\n");
+
+	return err;
+}
-- 
1.7.6


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-09-30 21:38 ` [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support Alexandre Bounine
@ 2011-09-30 22:15   ` Andrew Morton
  2011-10-03 17:53     ` Bounine, Alexandre
  2011-10-01 18:06   ` Vinod Koul
  1 sibling, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2011-09-30 22:15 UTC (permalink / raw)
  To: Alexandre Bounine
  Cc: linux-kernel, linuxppc-dev, Vinod Koul, Kumar Gala, Matt Porter,
	Li Yang

On Fri, 30 Sep 2011 17:38:35 -0400
Alexandre Bounine <alexandre.bounine@idt.com> wrote:

> Adds support for DMA Engine API.
> 
> Includes following changes:
> - Modifies BDMA register offset definitions to support per-channel handling
> - Separates BDMA channel reserved for RIO Maintenance requests
> - Adds DMA Engine callback routines
> 
> ...
>
>  5 files changed, 1029 insertions(+), 90 deletions(-)

hm, what a lot of code.

> +config TSI721_DMA
> +	bool "IDT Tsi721 RapidIO DMA support"
> +	depends on RAPIDIO_TSI721
> +	default "n"
> +	select RAPIDIO_DMA_ENGINE
> +	help
> +	  Enable DMA support for IDT Tsi721 PCIe-to-SRIO controller.

Do we really need to offer this decision to the user?  If possible it
would be better to always enable the feature where that makes sense. 
Better code coverage, less maintenance effort, more effective testing
effort, possibly cleaner code.

>
> ...
>
> +static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *chan)
> +{
> +	struct tsi721_dma_desc *bd_ptr;
> +	struct device *dev = chan->dchan.device->dev;
> +	u64		*sts_ptr;
> +	dma_addr_t	bd_phys;
> +	dma_addr_t	sts_phys;
> +	int		sts_size;
> +	int		bd_num = chan->bd_num;
> +
> +	dev_dbg(dev, "Init Block DMA Engine, CH%d\n", chan->id);
> +
> +	/* Allocate space for DMA descriptors */
> +	bd_ptr = dma_alloc_coherent(dev,
> +				bd_num * sizeof(struct tsi721_dma_desc),
> +				&bd_phys, GFP_KERNEL);
> +	if (!bd_ptr)
> +		return -ENOMEM;
> +
> +	chan->bd_phys = bd_phys;
> +	chan->bd_base = bd_ptr;
> +
> +	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
> +
> +	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
> +		bd_ptr, (unsigned long long)bd_phys);
> +
> +	/* Allocate space for descriptor status FIFO */
> +	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
> +					bd_num : TSI721_DMA_MINSTSSZ;
> +	sts_size = roundup_pow_of_two(sts_size);
> +	sts_ptr = dma_alloc_coherent(dev,
> +				     sts_size * sizeof(struct tsi721_dma_sts),
> +				     &sts_phys, GFP_KERNEL);
> +	if (!sts_ptr) {
> +		/* Free space allocated for DMA descriptors */
> +		dma_free_coherent(dev,
> +				  bd_num * sizeof(struct tsi721_dma_desc),
> +				  bd_ptr, bd_phys);
> +		chan->bd_base = NULL;
> +		return -ENOMEM;
> +	}
> +
> +	chan->sts_phys = sts_phys;
> +	chan->sts_base = sts_ptr;
> +	chan->sts_size = sts_size;
> +
> +	memset(sts_ptr, 0, sts_size);

You meant

--- a/drivers/rapidio/devices/tsi721.c~rapidio-tsi721-add-dma-engine-support-fix
+++ a/drivers/rapidio/devices/tsi721.c
@@ -1006,7 +1006,7 @@ static int tsi721_bdma_maint_init(struct
 	priv->mdma.sts_base = sts_ptr;
 	priv->mdma.sts_size = sts_size;
 
-	memset(sts_ptr, 0, sts_size);
+	memset(sts_ptr, 0, sts_size * sizeof(struct tsi721_dma_sts));
 
 	dev_dbg(&priv->pdev->dev,
 		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",

However that's at least two instances where you wanted a
dma_zalloc_coherent().  How's about we give ourselves one?


> +	dev_dbg(dev,
> +		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
> +		sts_ptr, (unsigned long long)sts_phys, sts_size);
> +
> +	/* Initialize DMA descriptors ring */
> +	bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
> +	bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
> +						 TSI721_DMAC_DPTRL_MASK);
> +	bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
> +
> +	/* Setup DMA descriptor pointers */
> +	iowrite32(((u64)bd_phys >> 32),
> +		chan->regs + TSI721_DMAC_DPTRH);
> +	iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
> +		chan->regs + TSI721_DMAC_DPTRL);
> +
> +	/* Setup descriptor status FIFO */
> +	iowrite32(((u64)sts_phys >> 32),
> +		chan->regs + TSI721_DMAC_DSBH);
> +	iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
> +		chan->regs + TSI721_DMAC_DSBL);
> +	iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
> +		chan->regs + TSI721_DMAC_DSSZ);
> +
> +	/* Clear interrupt bits */
> +	iowrite32(TSI721_DMAC_INT_ALL,
> +		chan->regs + TSI721_DMAC_INT);
> +
> +	ioread32(chan->regs + TSI721_DMAC_INT);
> +
> +	/* Toggle DMA channel initialization */
> +	iowrite32(TSI721_DMAC_CTL_INIT,	chan->regs + TSI721_DMAC_CTL);
> +	ioread32(chan->regs + TSI721_DMAC_CTL);
> +	chan->wr_count = chan->wr_count_next = 0;
> +	chan->sts_rdptr = 0;
> +	udelay(10);
> +
> +	return 0;
> +}
> +
>
> ...
>
> +{
> +	/* Disable BDMA channel interrupts */
> +	iowrite32(0, chan->regs + TSI721_DMAC_INTE);
> +
> +	tasklet_schedule(&chan->tasklet);

I'm not seeing any tasklet_disable()s on the shutdown/rmmod paths.  Is
there anything here which prevents shutdown races against a
still-pending tasklet?

> +}
> +
>
> ...
>
> +static
> +int tsi721_fill_desc(struct tsi721_bdma_chan *chan, struct tsi721_tx_desc *desc,
> +	struct scatterlist *sg, enum dma_rtype rtype, u32 sys_size)
> +{
> +	struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
> +	u64 rio_addr;
> +
> +	if (sg_dma_len(sg) > TSI721_DMAD_BCOUNT1 + 1) {
> +		dev_err(chan->dchan.device->dev, "SG element is too large\n");
> +		return -EINVAL;
> +	}
> +
> +	dev_dbg(chan->dchan.device->dev,
> +		"desc: 0x%llx, addr: 0x%llx len: 0x%x\n",
> +		(u64)desc->txd.phys, (unsigned long long)sg_dma_address(sg),
> +		sg_dma_len(sg));
> +
> +	dev_dbg(chan->dchan.device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
> +		bd_ptr, desc->destid, desc->rio_addr);
> +
> +	/* Initialize DMA descriptor */
> +	bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
> +					(rtype << 19) | desc->destid);
> +	if (desc->interrupt)
> +		bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
> +	bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
> +					(sys_size << 26) | sg_dma_len(sg));
> +	rio_addr = (desc->rio_addr >> 2) |
> +				((u64)(desc->rio_addr_u & 0x3) << 62);
> +	bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff);
> +	bd_ptr->raddr_hi = cpu_to_le32(rio_addr >> 32);
> +	bd_ptr->t1.bufptr_lo = cpu_to_le32(
> +					(u64)sg_dma_address(sg) & 0xffffffff);
> +	bd_ptr->t1.bufptr_hi = cpu_to_le32((u64)sg_dma_address(sg) >> 32);
> +	bd_ptr->t1.s_dist = 0;
> +	bd_ptr->t1.s_size = 0;
> +
> +	mb();

Mystery barrier needs a comment explaining why it's here, please.  This
is almost always the case with barriers.

> +	return 0;
> +}
> +
>
> ...
>
> +static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
> +{
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> +	struct tsi721_device *priv = to_tsi721(dchan->device);
> +	struct tsi721_tx_desc *desc = NULL;
> +	LIST_HEAD(tmp_list);
> +	int i;
> +	int rc;
> +
> +	if (chan->bd_base)
> +		return chan->bd_num - 1;
> +
> +	/* Initialize BDMA channel */
> +	if (tsi721_bdma_ch_init(chan)) {
> +		dev_err(dchan->device->dev, "Unable to initialize data DMA"
> +			" channel %d, aborting\n", chan->id);
> +		return -ENOMEM;
> +	}
> +
> +	/* Allocate matching number of logical descriptors */
> +	desc = kzalloc((chan->bd_num - 1) * sizeof(struct tsi721_tx_desc),
> +			GFP_KERNEL);

kcalloc() would be a better fit here.

> +	if (!desc) {
> +		dev_err(dchan->device->dev,
> +			"Failed to allocate logical descriptors\n");
> +		rc = -ENOMEM;
> +		goto err_out;
> +	}
> +
> +	chan->tx_desc = desc;
> +
> +	for (i = 0; i < chan->bd_num - 1; i++) {
> +		dma_async_tx_descriptor_init(&desc[i].txd, dchan);
> +		desc[i].txd.tx_submit = tsi721_tx_submit;
> +		desc[i].txd.flags = DMA_CTRL_ACK;
> +		INIT_LIST_HEAD(&desc[i].tx_list);
> +		list_add_tail(&desc[i].desc_node, &tmp_list);
> +	}
> +
> +	spin_lock_bh(&chan->lock);
> +	list_splice(&tmp_list, &chan->free_list);
> +	chan->completed_cookie = dchan->cookie = 1;
> +	spin_unlock_bh(&chan->lock);
> +
> +#ifdef CONFIG_PCI_MSI
> +	if (priv->flags & TSI721_USING_MSIX) {
> +		/* Request interrupt service if we are in MSI-X mode */
> +		rc = request_irq(
> +			priv->msix[TSI721_VECT_DMA0_DONE + chan->id].vector,
> +			tsi721_bdma_msix, 0,
> +			priv->msix[TSI721_VECT_DMA0_DONE + chan->id].irq_name,
> +			(void *)chan);
> +
> +		if (rc) {
> +			dev_dbg(dchan->device->dev,
> +				"Unable to allocate MSI-X interrupt for "
> +				"BDMA%d-DONE\n", chan->id);
> +			goto err_out;
> +		}
> +
> +		rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
> +					    chan->id].vector,
> +			tsi721_bdma_msix, 0,
> +			priv->msix[TSI721_VECT_DMA0_INT + chan->id].irq_name,
> +			(void *)chan);
> +
> +		if (rc)	{
> +			dev_dbg(dchan->device->dev,
> +				"Unable to allocate MSI-X interrupt for "
> +				"BDMA%d-INT\n", chan->id);
> +			free_irq(
> +				priv->msix[TSI721_VECT_DMA0_DONE +
> +					   chan->id].vector,
> +				(void *)chan);
> +			rc = -EIO;
> +			goto err_out;
> +		}
> +	}
> +#endif /* CONFIG_PCI_MSI */
> +
> +	tsi721_bdma_interrupt_enable(chan, 1);
> +
> +	return chan->bd_num - 1;
> +
> +err_out:
> +	kfree(desc);
> +	tsi721_bdma_ch_free(chan);
> +	return rc;
> +}
> +
>
> ...
>
> +static
> +enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
> +				 struct dma_tx_state *txstate)
> +{
> +	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
> +	dma_cookie_t		last_used;
> +	dma_cookie_t		last_completed;
> +	int			ret;
> +
> +	spin_lock_irq(&bdma_chan->lock);
> +	last_completed = bdma_chan->completed_cookie;
> +	last_used = dchan->cookie;
> +	spin_unlock_irq(&bdma_chan->lock);
> +
> +	ret = dma_async_is_complete(cookie, last_completed, last_used);
> +
> +	dma_set_tx_state(txstate, last_completed, last_used, 0);
> +
> +	dev_dbg(dchan->device->dev,
> +		"%s: exit, ret: %d, last_completed: %d, last_used: %d\n",
> +		__func__, ret, last_completed, last_used);
> +
> +	return ret;
> +}
> +
> +static void tsi721_issue_pending(struct dma_chan *dchan)
> +{
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> +
> +	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
> +
> +	if (tsi721_dma_is_idle(chan)) {
> +		spin_lock_bh(&chan->lock);
> +		tsi721_advance_work(chan);
> +		spin_unlock_bh(&chan->lock);
> +	} else
> +		dev_dbg(dchan->device->dev,
> +			"%s: DMA channel still busy\n", __func__);
> +}

I really don't like that a "struct tsi721_bdma_chan *" is called "chan"
in come places and "bdma_chan" in others.  "bdma_chan" is better.

The code takes that lock with spin_lock_bh() in some places and
spin_lock_irq() in others.  I trust there's some method to it all ;) Has
it been carefully tested with lockdep enabled?

>
> ...
>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-09-30 21:38 [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Alexandre Bounine
  2011-09-30 21:38 ` [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support Alexandre Bounine
@ 2011-10-01 18:01 ` Vinod Koul
  2011-10-03 16:52   ` Bounine, Alexandre
  1 sibling, 1 reply; 21+ messages in thread
From: Vinod Koul @ 2011-10-01 18:01 UTC (permalink / raw)
  To: Alexandre Bounine, Dan
  Cc: akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

On Fri, 2011-09-30 at 17:38 -0400, Alexandre Bounine wrote:
Please CC *maintainers* on your patches, get_maintainers.pl will tell
you who. Adding Dan here
> Adds DMA Engine framework support into RapidIO subsystem.
> Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from remote
> RapidIO target devices. Uses scatterlist to describe local data buffer and
> dma_chan.private member to pass target specific information. Supports flat
> data buffer only for a remote side.
The way dmaengine works today is that it doesn't know anything about
client subsystem. But this brings in a subsystem details to dmaengine
which I don't agree with yet.
Why can't we abstract this out??

After going thru the patch, I do not believe that this this is case of
SLAVE transfers, Dan can you please take a look at this patch


> Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Kumar Gala <galak@kernel.crashing.org>
> Cc: Matt Porter <mporter@kernel.crashing.org>
> Cc: Li Yang <leoli@freescale.com>
> ---
>  drivers/dma/dmaengine.c   |    4 ++
>  drivers/rapidio/Kconfig   |    6 +++
>  drivers/rapidio/rio.c     |   79 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/dmaengine.h |    1 +
>  include/linux/rio.h       |   47 ++++++++++++++++++++++++++
>  include/linux/rio_drv.h   |    9 +++++
>  6 files changed, 146 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> index b48967b..11fdc2c 100644
> --- a/drivers/dma/dmaengine.c
> +++ b/drivers/dma/dmaengine.c
> @@ -699,6 +699,10 @@ int dma_async_device_register(struct dma_device *device)
>  		!device->device_prep_dma_cyclic);
>  	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
>  		!device->device_control);
> +	BUG_ON(dma_has_cap(DMA_RAPIDIO, device->cap_mask) &&
> +		!device->device_prep_slave_sg);
> +	BUG_ON(dma_has_cap(DMA_RAPIDIO, device->cap_mask) &&
> +		!device->device_control);
>  
>  	BUG_ON(!device->device_alloc_chan_resources);
>  	BUG_ON(!device->device_free_chan_resources);
> diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
> index bc87192..c4aa279 100644
> --- a/drivers/rapidio/Kconfig
> +++ b/drivers/rapidio/Kconfig
> @@ -34,3 +34,9 @@ config RAPIDIO_DEBUG
>  	  If you are unsure about this, say N here.
>  
>  source "drivers/rapidio/switches/Kconfig"
> +
> +# This option to be turned on by a device selection
> +config RAPIDIO_DMA_ENGINE
> +	bool
> +	select DMADEVICES
> +	select DMA_ENGINE
> diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
> index 86c9a09..e5905fc 100644
> --- a/drivers/rapidio/rio.c
> +++ b/drivers/rapidio/rio.c
> @@ -1121,6 +1121,85 @@ int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
> +
> +#include <linux/dmaengine.h>
> +
> +static bool rio_chan_filter(struct dma_chan *chan, void *arg)
> +{
> +	struct rio_dev *rdev = arg;
> +
> +	/* Check that DMA device belongs to the right MPORT */
> +	return (rdev->net->hport ==
> +		container_of(chan->device, struct rio_mport, dma));
> +}
> +
> +/**
> + * rio_request_dma - request RapidIO capable DMA channel that supports
> + *   specified target RapidIO device.
> + * @rdev: RIO device control structure
> + *
> + * Returns pointer to allocated DMA channel or NULL if failed.
> + */
> +struct dma_chan *rio_request_dma(struct rio_dev *rdev)
> +{
> +	dma_cap_mask_t mask;
> +	struct dma_chan *dchan;
> +
> +	dma_cap_zero(mask);
> +	dma_cap_set(DMA_RAPIDIO, mask);
> +	dchan = dma_request_channel(mask, rio_chan_filter, rdev);
> +
> +	return dchan;
> +}
> +EXPORT_SYMBOL_GPL(rio_request_dma);
> +
> +/**
> + * rio_release_dma - release specified DMA channel
> + * @dchan: DMA channel to release
> + */
> +void rio_release_dma(struct dma_chan *dchan)
> +{
> +	dma_release_channel(dchan);
> +}
> +EXPORT_SYMBOL_GPL(rio_release_dma);
> +
> +/**
> + * rio_dma_prep_slave_sg - RapidIO specific wrapper
> + *   for device_prep_slave_sg callback defined by DMAENGINE.
> + * @rdev: RIO device control structure
> + * @dchan: DMA channel to configure
> + * @data: RIO specific data descriptor
> + * @direction: DMA data transfer direction (TO or FROM the device)
> + * @flags: dmaengine defined flags
> + *
> + * Initializes RapidIO capable DMA channel for the specified data transfer.
> + * Uses DMA channel private extension to pass information related to remote
> + * target RIO device.
> + * Returns pointer to DMA transaction descriptor or NULL if failed.
> + */
> +struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
> +	struct dma_chan *dchan, struct rio_dma_data *data,
> +	enum dma_data_direction direction, unsigned long flags)
> +{
> +	struct dma_async_tx_descriptor *txd = NULL;
> +	struct rio_dma_ext rio_ext;
> +
> +	rio_ext.destid = rdev->destid;
> +	rio_ext.rio_addr_u = data->rio_addr_u;
> +	rio_ext.rio_addr = data->rio_addr;
> +	rio_ext.wr_type = data->wr_type;
> +	dchan->private = &rio_ext;
> +
> +	txd = dchan->device->device_prep_slave_sg(dchan, data->sg, data->sg_len,
> +						  direction, flags);
> +
> +	return txd;
> +}
> +EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
You should move the rdev and data to dma_slave_config, that way you
should be able to use the existing _prep_slave_sg function.

> +
> +#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
> +
>  static void rio_fixup_device(struct rio_dev *dev)
>  {
>  }
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 8fbf40e..867b685 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -70,6 +70,7 @@ enum dma_transaction_type {
>  	DMA_PRIVATE,
>  	DMA_ASYNC_TX,
>  	DMA_SLAVE,
> +	DMA_RAPIDIO,
>  	DMA_CYCLIC,
>  };
>  
> diff --git a/include/linux/rio.h b/include/linux/rio.h
> index 4d50611..a6d1054c 100644
> --- a/include/linux/rio.h
> +++ b/include/linux/rio.h
> @@ -20,6 +20,9 @@
>  #include <linux/errno.h>
>  #include <linux/device.h>
>  #include <linux/rio_regs.h>
> +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
> +#include <linux/dmaengine.h>
> +#endif
>  
>  #define RIO_NO_HOPCOUNT		-1
>  #define RIO_INVALID_DESTID	0xffff
> @@ -254,6 +257,9 @@ struct rio_mport {
>  	u32 phys_efptr;
>  	unsigned char name[40];
>  	void *priv;		/* Master port private data */
> +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
> +	struct dma_device	dma;
> +#endif
>  };
>  
>  /**
> @@ -395,6 +401,47 @@ union rio_pw_msg {
>  	u32 raw[RIO_PW_MSG_SIZE/sizeof(u32)];
>  };
>  
> +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
> +
> +/**
> + * enum rio_write_type - RIO write transaction types used in DMA transfers
> + *
> + * Note: RapidIO specification defines write (NWRITE) and
> + * write-with-response (NWRITE_R) data transfer operations.
> + * Existing DMA controllers that service RapidIO may use one of these operations
> + * for entire data transfer or their combination with only the last data packet
> + * requires response.
> + */
> +enum rio_write_type {
> +	RDW_DEFAULT,		/* default method used by DMA driver */
> +	RDW_ALL_NWRITE,		/* all packets use NWRITE */
> +	RDW_ALL_NWRITE_R,	/* all packets use NWRITE_R */
> +	RDW_LAST_NWRITE_R,	/* last packet uses NWRITE_R, all other - NWRITE */
> +};
Why not use the current mechanism of specifying callback or ACK flags if
you want a response or not.

> +
> +struct rio_dma_ext {
> +	u16 destid;
> +	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
> +	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
> +	enum rio_write_type wr_type; /* preferred RIO write operation type */
> +};
will this address translated to a dma_addr_t or not?
> +
> +struct rio_dma_data {
> +	/* Local data (as scatterlist) */
> +	struct scatterlist	*sg;	/* I/O scatter list */
> +	unsigned int		sg_len;	/* size of scatter list */
> +	/* Remote device address (flat buffer) */
> +	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
> +	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
> +	enum rio_write_type wr_type; /* preferred RIO write operation type */
> +};
> +
> +static inline struct rio_mport *dma_to_mport(struct dma_device *ddev)
> +{
> +	return container_of(ddev, struct rio_mport, dma);
> +}
> +#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
> +
>  /* Architecture and hardware-specific functions */
>  extern int rio_register_mport(struct rio_mport *);
>  extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
> diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
> index 229b3ca..d140996 100644
> --- a/include/linux/rio_drv.h
> +++ b/include/linux/rio_drv.h
> @@ -378,6 +378,15 @@ void rio_unregister_driver(struct rio_driver *);
>  struct rio_dev *rio_dev_get(struct rio_dev *);
>  void rio_dev_put(struct rio_dev *);
>  
> +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
> +extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
> +extern void rio_release_dma(struct dma_chan *dchan);
> +extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
> +		struct rio_dev *rdev, struct dma_chan *dchan,
> +		struct rio_dma_data *data, enum dma_data_direction direction,
> +		unsigned long flags);
> +#endif
> +
>  /**
>   * rio_name - Get the unique RIO device identifier
>   * @rdev: RIO device


-- 
~Vinod


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-09-30 21:38 ` [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support Alexandre Bounine
  2011-09-30 22:15   ` Andrew Morton
@ 2011-10-01 18:06   ` Vinod Koul
  1 sibling, 0 replies; 21+ messages in thread
From: Vinod Koul @ 2011-10-01 18:06 UTC (permalink / raw)
  To: Alexandre Bounine, Dan
  Cc: akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

On Fri, 2011-09-30 at 17:38 -0400, Alexandre Bounine wrote:
> Adds support for DMA Engine API.
> 
> Includes following changes:
> - Modifies BDMA register offset definitions to support per-channel handling
> - Separates BDMA channel reserved for RIO Maintenance requests
> - Adds DMA Engine callback routines
Dan please review this, I donot agree with approach here
> 
> Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Kumar Gala <galak@kernel.crashing.org>
> Cc: Matt Porter <mporter@kernel.crashing.org>
> Cc: Li Yang <leoli@freescale.com>
> ---
>  drivers/rapidio/devices/Kconfig      |    8 +
>  drivers/rapidio/devices/Makefile     |    1 +
>  drivers/rapidio/devices/tsi721.c     |  201 ++++++----
>  drivers/rapidio/devices/tsi721.h     |  107 ++++-
>  drivers/rapidio/devices/tsi721_dma.c |  802 ++++++++++++++++++++++++++++++++++
>  5 files changed, 1029 insertions(+), 90 deletions(-)
>  create mode 100644 drivers/rapidio/devices/tsi721_dma.c
> 
> diff --git a/drivers/rapidio/devices/Kconfig b/drivers/rapidio/devices/Kconfig
> index 12a9d7f..3a2db3d 100644
> --- a/drivers/rapidio/devices/Kconfig
> +++ b/drivers/rapidio/devices/Kconfig
> @@ -8,3 +8,11 @@ config RAPIDIO_TSI721
>  	default "n"
>  	---help---
>  	  Include support for IDT Tsi721 PCI Express Serial RapidIO controller.
> +
> +config TSI721_DMA
> +	bool "IDT Tsi721 RapidIO DMA support"
> +	depends on RAPIDIO_TSI721
> +	default "n"
> +	select RAPIDIO_DMA_ENGINE
> +	help
> +	  Enable DMA support for IDT Tsi721 PCIe-to-SRIO controller.
> diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
> index 3b7b4e2..8cbce45 100644
> --- a/drivers/rapidio/devices/Makefile
> +++ b/drivers/rapidio/devices/Makefile
> @@ -3,3 +3,4 @@
>  #
>  
>  obj-$(CONFIG_RAPIDIO_TSI721)	+= tsi721.o
> +obj-$(CONFIG_TSI721_DMA) += tsi721_dma.o
> diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
> index 5225930..5e893a6 100644
> --- a/drivers/rapidio/devices/tsi721.c
> +++ b/drivers/rapidio/devices/tsi721.c
> @@ -108,6 +108,7 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
>  			u16 destid, u8 hopcount, u32 offset, int len,
>  			u32 *data, int do_wr)
>  {
> +	void __iomem *regs = priv->regs + TSI721_DMAC_BASE(priv->mdma.ch_id);
>  	struct tsi721_dma_desc *bd_ptr;
>  	u32 rd_count, swr_ptr, ch_stat;
>  	int i, err = 0;
> @@ -116,10 +117,9 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
>  	if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32)))
>  		return -EINVAL;
>  
> -	bd_ptr = priv->bdma[TSI721_DMACH_MAINT].bd_base;
> +	bd_ptr = priv->mdma.bd_base;
>  
> -	rd_count = ioread32(
> -			priv->regs + TSI721_DMAC_DRDCNT(TSI721_DMACH_MAINT));
> +	rd_count = ioread32(regs + TSI721_DMAC_DRDCNT);
>  
>  	/* Initialize DMA descriptor */
>  	bd_ptr[0].type_id = cpu_to_le32((DTYPE2 << 29) | (op << 19) | destid);
> @@ -134,19 +134,18 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
>  	mb();
>  
>  	/* Start DMA operation */
> -	iowrite32(rd_count + 2,
> -		priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
> -	ioread32(priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
> +	iowrite32(rd_count + 2,	regs + TSI721_DMAC_DWRCNT);
> +	ioread32(regs + TSI721_DMAC_DWRCNT);
>  	i = 0;
>  
>  	/* Wait until DMA transfer is finished */
> -	while ((ch_stat = ioread32(priv->regs +
> -		TSI721_DMAC_STS(TSI721_DMACH_MAINT))) & TSI721_DMAC_STS_RUN) {
> +	while ((ch_stat = ioread32(regs + TSI721_DMAC_STS))
> +							& TSI721_DMAC_STS_RUN) {
>  		udelay(1);
>  		if (++i >= 5000000) {
>  			dev_dbg(&priv->pdev->dev,
>  				"%s : DMA[%d] read timeout ch_status=%x\n",
> -				__func__, TSI721_DMACH_MAINT, ch_stat);
> +				__func__, priv->mdma.ch_id, ch_stat);
>  			if (!do_wr)
>  				*data = 0xffffffff;
>  			err = -EIO;
> @@ -162,13 +161,10 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
>  			__func__, ch_stat);
>  		dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
>  			do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
> -		iowrite32(TSI721_DMAC_INT_ALL,
> -			priv->regs + TSI721_DMAC_INT(TSI721_DMACH_MAINT));
> -		iowrite32(TSI721_DMAC_CTL_INIT,
> -			priv->regs + TSI721_DMAC_CTL(TSI721_DMACH_MAINT));
> +		iowrite32(TSI721_DMAC_INT_ALL, regs + TSI721_DMAC_INT);
> +		iowrite32(TSI721_DMAC_CTL_INIT, regs + TSI721_DMAC_CTL);
>  		udelay(10);
> -		iowrite32(0, priv->regs +
> -				TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
> +		iowrite32(0, regs + TSI721_DMAC_DWRCNT);
>  		udelay(1);
>  		if (!do_wr)
>  			*data = 0xffffffff;
> @@ -184,8 +180,8 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
>  	 * NOTE: Skipping check and clear FIFO entries because we are waiting
>  	 * for transfer to be completed.
>  	 */
> -	swr_ptr = ioread32(priv->regs + TSI721_DMAC_DSWP(TSI721_DMACH_MAINT));
> -	iowrite32(swr_ptr, priv->regs + TSI721_DMAC_DSRP(TSI721_DMACH_MAINT));
> +	swr_ptr = ioread32(regs + TSI721_DMAC_DSWP);
> +	iowrite32(swr_ptr, regs + TSI721_DMAC_DSRP);
>  err_out:
>  
>  	return err;
> @@ -540,6 +536,22 @@ static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
>  			tsi721_pw_handler(mport);
>  	}
>  
> +#ifdef CONFIG_TSI721_DMA
> +	if (dev_int & TSI721_DEV_INT_BDMA_CH) {
> +		int ch;
> +
> +		if (dev_ch_int & TSI721_INT_BDMA_CHAN_M) {
> +			dev_dbg(&priv->pdev->dev,
> +				"IRQ from DMA channel 0x%08x\n", dev_ch_int);
> +
> +			for (ch = 0; ch < TSI721_DMA_MAXCH; ch++) {
> +				if (!(dev_ch_int & TSI721_INT_BDMA_CHAN(ch)))
> +					continue;
> +				tsi721_bdma_handler(&priv->bdma[ch]);
> +			}
> +		}
> +	}
> +#endif
>  	return IRQ_HANDLED;
>  }
>  
> @@ -552,18 +564,26 @@ static void tsi721_interrupts_init(struct tsi721_device *priv)
>  		priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
>  	iowrite32(TSI721_SR_CHINT_IDBQRCV,
>  		priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
> -	iowrite32(TSI721_INT_SR2PC_CHAN(IDB_QUEUE),
> -		priv->regs + TSI721_DEV_CHAN_INTE);
>  
>  	/* Enable SRIO MAC interrupts */
>  	iowrite32(TSI721_RIO_EM_DEV_INT_EN_INT,
>  		priv->regs + TSI721_RIO_EM_DEV_INT_EN);
>  
> +	/* Enable interrupts from channels in use */
> +#ifdef CONFIG_TSI721_DMA
> +	intr = TSI721_INT_SR2PC_CHAN(IDB_QUEUE) |
> +		(TSI721_INT_BDMA_CHAN_M &
> +		 ~TSI721_INT_BDMA_CHAN(TSI721_DMACH_MAINT));
> +#else
> +	intr = TSI721_INT_SR2PC_CHAN(IDB_QUEUE);
> +#endif
> +	iowrite32(intr,	priv->regs + TSI721_DEV_CHAN_INTE);
> +
>  	if (priv->flags & TSI721_USING_MSIX)
>  		intr = TSI721_DEV_INT_SRIO;
>  	else
>  		intr = TSI721_DEV_INT_SR2PC_CH | TSI721_DEV_INT_SRIO |
> -			TSI721_DEV_INT_SMSG_CH;
> +			TSI721_DEV_INT_SMSG_CH | TSI721_DEV_INT_BDMA_CH;
>  
>  	iowrite32(intr, priv->regs + TSI721_DEV_INTE);
>  	ioread32(priv->regs + TSI721_DEV_INTE);
> @@ -714,12 +734,29 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
>  					TSI721_MSIX_OMSG_INT(i);
>  	}
>  
> +#ifdef CONFIG_TSI721_DMA
> +	/*
> +	 * Initialize MSI-X entries for Block DMA Engine:
> +	 * this driver supports XXX DMA channels
> +	 * (one is reserved for SRIO maintenance transactions)
> +	 */
> +	for (i = 0; i < TSI721_DMA_CHNUM; i++) {
> +		entries[TSI721_VECT_DMA0_DONE + i].entry =
> +					TSI721_MSIX_DMACH_DONE(i);
> +		entries[TSI721_VECT_DMA0_INT + i].entry =
> +					TSI721_MSIX_DMACH_INT(i);
> +	}
> +#endif /* CONFIG_TSI721_DMA */
> +
>  	err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries));
>  	if (err) {
>  		if (err > 0)
>  			dev_info(&priv->pdev->dev,
>  				 "Only %d MSI-X vectors available, "
>  				 "not using MSI-X\n", err);
> +		else
> +			dev_err(&priv->pdev->dev,
> +				"Failed to enable MSI-X (err=%d)\n", err);
>  		return err;
>  	}
>  
> @@ -759,6 +796,22 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
>  			 i, pci_name(priv->pdev));
>  	}
>  
> +#ifdef CONFIG_TSI721_DMA
> +	for (i = 0; i < TSI721_DMA_CHNUM; i++) {
> +		priv->msix[TSI721_VECT_DMA0_DONE + i].vector =
> +				entries[TSI721_VECT_DMA0_DONE + i].vector;
> +		snprintf(priv->msix[TSI721_VECT_DMA0_DONE + i].irq_name,
> +			 IRQ_DEVICE_NAME_MAX, DRV_NAME "-dmad%d@pci:%s",
> +			 i, pci_name(priv->pdev));
> +
> +		priv->msix[TSI721_VECT_DMA0_INT + i].vector =
> +				entries[TSI721_VECT_DMA0_INT + i].vector;
> +		snprintf(priv->msix[TSI721_VECT_DMA0_INT + i].irq_name,
> +			 IRQ_DEVICE_NAME_MAX, DRV_NAME "-dmai%d@pci:%s",
> +			 i, pci_name(priv->pdev));
> +	}
> +#endif /* CONFIG_TSI721_DMA */
> +
>  	return 0;
>  }
>  #endif /* CONFIG_PCI_MSI */
> @@ -889,20 +942,34 @@ static void tsi721_doorbell_free(struct tsi721_device *priv)
>  	priv->idb_base = NULL;
>  }
>  
> -static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
> +/**
> + * tsi721_bdma_maint_init - Initialize maintenance request BDMA channel.
> + * @priv: pointer to tsi721 private data
> + *
> + * Initialize BDMA channel allocated for RapidIO maintenance read/write
> + * request generation
> + * Returns %0 on success or %-ENOMEM on failure.
> + */
> +static int tsi721_bdma_maint_init(struct tsi721_device *priv)
>  {
>  	struct tsi721_dma_desc *bd_ptr;
>  	u64		*sts_ptr;
>  	dma_addr_t	bd_phys, sts_phys;
>  	int		sts_size;
> -	int		bd_num = priv->bdma[chnum].bd_num;
> +	int		bd_num = 2;
> +	void __iomem	*regs;
>  
> -	dev_dbg(&priv->pdev->dev, "Init Block DMA Engine, CH%d\n", chnum);
> +	dev_dbg(&priv->pdev->dev,
> +		"Init Block DMA Engine for Maintenance requests, CH%d\n",
> +		TSI721_DMACH_MAINT);
>  
>  	/*
>  	 * Initialize DMA channel for maintenance requests
>  	 */
>  
> +	priv->mdma.ch_id = TSI721_DMACH_MAINT;
> +	regs = priv->regs + TSI721_DMAC_BASE(TSI721_DMACH_MAINT);
> +
>  	/* Allocate space for DMA descriptors */
>  	bd_ptr = dma_alloc_coherent(&priv->pdev->dev,
>  					bd_num * sizeof(struct tsi721_dma_desc),
> @@ -910,8 +977,9 @@ static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
>  	if (!bd_ptr)
>  		return -ENOMEM;
>  
> -	priv->bdma[chnum].bd_phys = bd_phys;
> -	priv->bdma[chnum].bd_base = bd_ptr;
> +	priv->mdma.bd_num = bd_num;
> +	priv->mdma.bd_phys = bd_phys;
> +	priv->mdma.bd_base = bd_ptr;
>  
>  	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
>  
> @@ -930,13 +998,13 @@ static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
>  		dma_free_coherent(&priv->pdev->dev,
>  				  bd_num * sizeof(struct tsi721_dma_desc),
>  				  bd_ptr, bd_phys);
> -		priv->bdma[chnum].bd_base = NULL;
> +		priv->mdma.bd_base = NULL;
>  		return -ENOMEM;
>  	}
>  
> -	priv->bdma[chnum].sts_phys = sts_phys;
> -	priv->bdma[chnum].sts_base = sts_ptr;
> -	priv->bdma[chnum].sts_size = sts_size;
> +	priv->mdma.sts_phys = sts_phys;
> +	priv->mdma.sts_base = sts_ptr;
> +	priv->mdma.sts_size = sts_size;
>  
>  	memset(sts_ptr, 0, sts_size);
>  
> @@ -951,83 +1019,61 @@ static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
>  	bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
>  
>  	/* Setup DMA descriptor pointers */
> -	iowrite32(((u64)bd_phys >> 32),
> -		priv->regs + TSI721_DMAC_DPTRH(chnum));
> +	iowrite32(((u64)bd_phys >> 32),	regs + TSI721_DMAC_DPTRH);
>  	iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
> -		priv->regs + TSI721_DMAC_DPTRL(chnum));
> +		regs + TSI721_DMAC_DPTRL);
>  
>  	/* Setup descriptor status FIFO */
> -	iowrite32(((u64)sts_phys >> 32),
> -		priv->regs + TSI721_DMAC_DSBH(chnum));
> +	iowrite32(((u64)sts_phys >> 32), regs + TSI721_DMAC_DSBH);
>  	iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
> -		priv->regs + TSI721_DMAC_DSBL(chnum));
> +		regs + TSI721_DMAC_DSBL);
>  	iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
> -		priv->regs + TSI721_DMAC_DSSZ(chnum));
> +		regs + TSI721_DMAC_DSSZ);
>  
>  	/* Clear interrupt bits */
> -	iowrite32(TSI721_DMAC_INT_ALL,
> -		priv->regs + TSI721_DMAC_INT(chnum));
> +	iowrite32(TSI721_DMAC_INT_ALL, regs + TSI721_DMAC_INT);
>  
> -	ioread32(priv->regs + TSI721_DMAC_INT(chnum));
> +	ioread32(regs + TSI721_DMAC_INT);
>  
>  	/* Toggle DMA channel initialization */
> -	iowrite32(TSI721_DMAC_CTL_INIT,	priv->regs + TSI721_DMAC_CTL(chnum));
> -	ioread32(priv->regs + TSI721_DMAC_CTL(chnum));
> +	iowrite32(TSI721_DMAC_CTL_INIT,	regs + TSI721_DMAC_CTL);
> +	ioread32(regs + TSI721_DMAC_CTL);
>  	udelay(10);
>  
>  	return 0;
>  }
>  
> -static int tsi721_bdma_ch_free(struct tsi721_device *priv, int chnum)
> +static int tsi721_bdma_maint_free(struct tsi721_device *priv)
>  {
>  	u32 ch_stat;
> +	struct tsi721_bdma_maint *mdma = &priv->mdma;
> +	void __iomem *regs = priv->regs + TSI721_DMAC_BASE(mdma->ch_id);
>  
> -	if (priv->bdma[chnum].bd_base == NULL)
> +	if (mdma->bd_base == NULL)
>  		return 0;
>  
>  	/* Check if DMA channel still running */
> -	ch_stat = ioread32(priv->regs +	TSI721_DMAC_STS(chnum));
> +	ch_stat = ioread32(regs + TSI721_DMAC_STS);
>  	if (ch_stat & TSI721_DMAC_STS_RUN)
>  		return -EFAULT;
>  
>  	/* Put DMA channel into init state */
> -	iowrite32(TSI721_DMAC_CTL_INIT,
> -		priv->regs + TSI721_DMAC_CTL(chnum));
> +	iowrite32(TSI721_DMAC_CTL_INIT,	regs + TSI721_DMAC_CTL);
>  
>  	/* Free space allocated for DMA descriptors */
>  	dma_free_coherent(&priv->pdev->dev,
> -		priv->bdma[chnum].bd_num * sizeof(struct tsi721_dma_desc),
> -		priv->bdma[chnum].bd_base, priv->bdma[chnum].bd_phys);
> -	priv->bdma[chnum].bd_base = NULL;
> +		mdma->bd_num * sizeof(struct tsi721_dma_desc),
> +		mdma->bd_base, mdma->bd_phys);
> +	mdma->bd_base = NULL;
>  
>  	/* Free space allocated for status FIFO */
>  	dma_free_coherent(&priv->pdev->dev,
> -		priv->bdma[chnum].sts_size * sizeof(struct tsi721_dma_sts),
> -		priv->bdma[chnum].sts_base, priv->bdma[chnum].sts_phys);
> -	priv->bdma[chnum].sts_base = NULL;
> -	return 0;
> -}
> -
> -static int tsi721_bdma_init(struct tsi721_device *priv)
> -{
> -	/* Initialize BDMA channel allocated for RapidIO maintenance read/write
> -	 * request generation
> -	 */
> -	priv->bdma[TSI721_DMACH_MAINT].bd_num = 2;
> -	if (tsi721_bdma_ch_init(priv, TSI721_DMACH_MAINT)) {
> -		dev_err(&priv->pdev->dev, "Unable to initialize maintenance DMA"
> -			" channel %d, aborting\n", TSI721_DMACH_MAINT);
> -		return -ENOMEM;
> -	}
> -
> +		mdma->sts_size * sizeof(struct tsi721_dma_sts),
> +		mdma->sts_base, mdma->sts_phys);
> +	mdma->sts_base = NULL;
>  	return 0;
>  }
>  
> -static void tsi721_bdma_free(struct tsi721_device *priv)
> -{
> -	tsi721_bdma_ch_free(priv, TSI721_DMACH_MAINT);
> -}
> -
>  /* Enable Inbound Messaging Interrupts */
>  static void
>  tsi721_imsg_interrupt_enable(struct tsi721_device *priv, int ch,
> @@ -2043,7 +2089,8 @@ static void tsi721_disable_ints(struct tsi721_device *priv)
>  
>  	/* Disable all BDMA Channel interrupts */
>  	for (ch = 0; ch < TSI721_DMA_MAXCH; ch++)
> -		iowrite32(0, priv->regs + TSI721_DMAC_INTE(ch));
> +		iowrite32(0,
> +			priv->regs + TSI721_DMAC_BASE(ch) + TSI721_DMAC_INTE);
>  
>  	/* Disable all general BDMA interrupts */
>  	iowrite32(0, priv->regs + TSI721_BDMA_INTE);
> @@ -2292,7 +2339,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
>  	tsi721_init_pc2sr_mapping(priv);
>  	tsi721_init_sr2pc_mapping(priv);
>  
> -	if (tsi721_bdma_init(priv)) {
> +	if (tsi721_bdma_maint_init(priv)) {
>  		dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
>  		err = -ENOMEM;
>  		goto err_unmap_bars;
> @@ -2312,12 +2359,16 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
>  	if (err)
>  		goto err_free_consistent;
>  
> +#ifdef CONFIG_TSI721_DMA
> +	tsi721_register_dma(priv);
> +#endif
> +
>  	return 0;
>  
>  err_free_consistent:
>  	tsi721_doorbell_free(priv);
>  err_free_bdma:
> -	tsi721_bdma_free(priv);
> +	tsi721_bdma_maint_free(priv);
>  err_unmap_bars:
>  	if (priv->regs)
>  		iounmap(priv->regs);
> diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
> index 58be4de..2f756dc 100644
> --- a/drivers/rapidio/devices/tsi721.h
> +++ b/drivers/rapidio/devices/tsi721.h
> @@ -21,6 +21,10 @@
>  #ifndef __TSI721_H
>  #define __TSI721_H
>  
> +#ifdef CONFIG_TSI721_DMA
> +#include <linux/dmaengine.h>
> +#endif
> +
>  #define DRV_NAME	"tsi721"
>  
>  #define DEFAULT_HOPCOUNT	0xff
> @@ -165,6 +169,8 @@
>  #define TSI721_DEV_INTE		0x29840
>  #define TSI721_DEV_INT		0x29844
>  #define TSI721_DEV_INTSET	0x29848
> +#define TSI721_DEV_INT_BDMA_CH	0x00002000
> +#define TSI721_DEV_INT_BDMA_NCH	0x00001000
>  #define TSI721_DEV_INT_SMSG_CH	0x00000800
>  #define TSI721_DEV_INT_SMSG_NCH	0x00000400
>  #define TSI721_DEV_INT_SR2PC_CH	0x00000200
> @@ -179,6 +185,8 @@
>  #define TSI721_INT_IMSG_CHAN(x)	(1 << (16 + (x)))
>  #define TSI721_INT_OMSG_CHAN_M	0x0000ff00
>  #define TSI721_INT_OMSG_CHAN(x)	(1 << (8 + (x)))
> +#define TSI721_INT_BDMA_CHAN_M	0x000000ff
> +#define TSI721_INT_BDMA_CHAN(x)	(1 << (x))
>  
>  /*
>   * PC2SR block registers
> @@ -233,14 +241,16 @@
>   *   x = 0..7
>   */
>  
> -#define TSI721_DMAC_DWRCNT(x)	(0x51000 + (x) * 0x1000)
> -#define TSI721_DMAC_DRDCNT(x)	(0x51004 + (x) * 0x1000)
> +#define TSI721_DMAC_BASE(x)	(0x51000 + (x) * 0x1000)
> +
> +#define TSI721_DMAC_DWRCNT	0x000
> +#define TSI721_DMAC_DRDCNT	0x004
>  
> -#define TSI721_DMAC_CTL(x)	(0x51008 + (x) * 0x1000)
> +#define TSI721_DMAC_CTL		0x008
>  #define TSI721_DMAC_CTL_SUSP	0x00000002
>  #define TSI721_DMAC_CTL_INIT	0x00000001
>  
> -#define TSI721_DMAC_INT(x)	(0x5100c + (x) * 0x1000)
> +#define TSI721_DMAC_INT		0x00c
>  #define TSI721_DMAC_INT_STFULL	0x00000010
>  #define TSI721_DMAC_INT_DONE	0x00000008
>  #define TSI721_DMAC_INT_SUSP	0x00000004
> @@ -248,34 +258,33 @@
>  #define TSI721_DMAC_INT_IOFDONE	0x00000001
>  #define TSI721_DMAC_INT_ALL	0x0000001f
>  
> -#define TSI721_DMAC_INTSET(x)	(0x51010 + (x) * 0x1000)
> +#define TSI721_DMAC_INTSET	0x010
>  
> -#define TSI721_DMAC_STS(x)	(0x51014 + (x) * 0x1000)
> +#define TSI721_DMAC_STS		0x014
>  #define TSI721_DMAC_STS_ABORT	0x00400000
>  #define TSI721_DMAC_STS_RUN	0x00200000
>  #define TSI721_DMAC_STS_CS	0x001f0000
>  
> -#define TSI721_DMAC_INTE(x)	(0x51018 + (x) * 0x1000)
> +#define TSI721_DMAC_INTE	0x018
>  
> -#define TSI721_DMAC_DPTRL(x)	(0x51024 + (x) * 0x1000)
> +#define TSI721_DMAC_DPTRL	0x024
>  #define TSI721_DMAC_DPTRL_MASK	0xffffffe0
>  
> -#define TSI721_DMAC_DPTRH(x)	(0x51028 + (x) * 0x1000)
> +#define TSI721_DMAC_DPTRH	0x028
>  
> -#define TSI721_DMAC_DSBL(x)	(0x5102c + (x) * 0x1000)
> +#define TSI721_DMAC_DSBL	0x02c
>  #define TSI721_DMAC_DSBL_MASK	0xffffffc0
>  
> -#define TSI721_DMAC_DSBH(x)	(0x51030 + (x) * 0x1000)
> +#define TSI721_DMAC_DSBH	0x030
>  
> -#define TSI721_DMAC_DSSZ(x)	(0x51034 + (x) * 0x1000)
> +#define TSI721_DMAC_DSSZ	0x034
>  #define TSI721_DMAC_DSSZ_SIZE_M	0x0000000f
>  #define TSI721_DMAC_DSSZ_SIZE(size)	(__fls(size) - 4)
>  
> -
> -#define TSI721_DMAC_DSRP(x)	(0x51038 + (x) * 0x1000)
> +#define TSI721_DMAC_DSRP	0x038
>  #define TSI721_DMAC_DSRP_MASK	0x0007ffff
>  
> -#define TSI721_DMAC_DSWP(x)	(0x5103c + (x) * 0x1000)
> +#define TSI721_DMAC_DSWP	0x03c
>  #define TSI721_DMAC_DSWP_MASK	0x0007ffff
>  
>  #define TSI721_BDMA_INTE	0x5f000
> @@ -624,7 +633,48 @@ enum tsi721_smsg_int_flag {
>  
>  /* Structures */
>  
> +#ifdef CONFIG_TSI721_DMA
> +
> +struct tsi721_tx_desc {
> +	struct dma_async_tx_descriptor	txd;
> +	struct tsi721_dma_desc		*hw_desc;
> +	u16				destid;
> +	/* low 64-bits of 66-bit RIO address */
> +	u64				rio_addr;
> +	/* upper 2-bits of 66-bit RIO address */
> +	u8				rio_addr_u;
> +	bool				interrupt;
> +	struct list_head		desc_node;
> +	struct list_head		tx_list;
> +};
> +
>  struct tsi721_bdma_chan {
> +	int		id;
> +	void __iomem	*regs;
> +	int		bd_num;		/* number of buffer descriptors */
> +	void		*bd_base;	/* start of DMA descriptors */
> +	dma_addr_t	bd_phys;
> +	void		*sts_base;	/* start of DMA BD status FIFO */
> +	dma_addr_t	sts_phys;
> +	int		sts_size;
> +	u32		sts_rdptr;
> +	u32		wr_count;
> +	u32		wr_count_next;
> +
> +	struct dma_chan		dchan;
> +	struct tsi721_tx_desc	*tx_desc;
> +	spinlock_t		lock;
> +	struct list_head	active_list;
> +	struct list_head	queue;
> +	struct list_head	free_list;
> +	dma_cookie_t		completed_cookie;
> +	struct tasklet_struct	tasklet;
> +};
> +
> +#endif /* CONFIG_TSI721_DMA */
> +
> +struct tsi721_bdma_maint {
> +	int		ch_id;		/* BDMA channel number */
>  	int		bd_num;		/* number of buffer descriptors */
>  	void		*bd_base;	/* start of DMA descriptors */
>  	dma_addr_t	bd_phys;
> @@ -719,6 +769,24 @@ enum tsi721_msix_vect {
>  	TSI721_VECT_IMB1_INT,
>  	TSI721_VECT_IMB2_INT,
>  	TSI721_VECT_IMB3_INT,
> +#ifdef CONFIG_TSI721_DMA
> +	TSI721_VECT_DMA0_DONE,
> +	TSI721_VECT_DMA1_DONE,
> +	TSI721_VECT_DMA2_DONE,
> +	TSI721_VECT_DMA3_DONE,
> +	TSI721_VECT_DMA4_DONE,
> +	TSI721_VECT_DMA5_DONE,
> +	TSI721_VECT_DMA6_DONE,
> +	TSI721_VECT_DMA7_DONE,
> +	TSI721_VECT_DMA0_INT,
> +	TSI721_VECT_DMA1_INT,
> +	TSI721_VECT_DMA2_INT,
> +	TSI721_VECT_DMA3_INT,
> +	TSI721_VECT_DMA4_INT,
> +	TSI721_VECT_DMA5_INT,
> +	TSI721_VECT_DMA6_INT,
> +	TSI721_VECT_DMA7_INT,
> +#endif /* CONFIG_TSI721_DMA */
>  	TSI721_VECT_MAX
>  };
>  
> @@ -752,7 +820,11 @@ struct tsi721_device {
>  	u32		pw_discard_count;
>  
>  	/* BDMA Engine */
> +	struct tsi721_bdma_maint mdma; /* Maintenance rd/wr request channel */
> +
> +#ifdef CONFIG_TSI721_DMA
>  	struct tsi721_bdma_chan bdma[TSI721_DMA_CHNUM];
> +#endif
>  
>  	/* Inbound Messaging */
>  	int		imsg_init[TSI721_IMSG_CHNUM];
> @@ -763,4 +835,9 @@ struct tsi721_device {
>  	struct tsi721_omsg_ring	omsg_ring[TSI721_OMSG_CHNUM];
>  };
>  
> +#ifdef CONFIG_TSI721_DMA
> +extern void tsi721_bdma_handler(struct tsi721_bdma_chan *chan);
> +extern int __devinit tsi721_register_dma(struct tsi721_device *priv);
> +#endif
> +
>  #endif
> diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
> new file mode 100644
> index 0000000..75a27a0
> --- /dev/null
> +++ b/drivers/rapidio/devices/tsi721_dma.c
> @@ -0,0 +1,802 @@
> +/*
> + * DMA Engine support for Tsi721 PCIExpress-to-SRIO bridge
> + *
> + * Copyright 2011 Integrated Device Technology, Inc.
> + * Alexandre Bounine <alexandre.bounine@idt.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the Free
> + * Software Foundation; either version 2 of the License, or (at your option)
> + * any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59
> + * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> + */
> +
> +#include <linux/io.h>
> +#include <linux/errno.h>
> +#include <linux/init.h>
> +#include <linux/ioport.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/rio.h>
> +#include <linux/rio_drv.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/interrupt.h>
> +#include <linux/kfifo.h>
> +#include <linux/delay.h>
> +
> +#include "tsi721.h"
> +
> +static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan)
> +{
> +	return container_of(chan, struct tsi721_bdma_chan, dchan);
> +}
> +
> +static inline struct tsi721_device *to_tsi721(struct dma_device *ddev)
> +{
> +	return container_of(ddev, struct rio_mport, dma)->priv;
> +}
> +
> +static inline
> +struct tsi721_tx_desc *to_tsi721_desc(struct dma_async_tx_descriptor *txd)
> +{
> +	return container_of(txd, struct tsi721_tx_desc, txd);
> +}
> +
> +static inline
> +struct tsi721_tx_desc *tsi721_dma_first_active(struct tsi721_bdma_chan *chan)
> +{
> +	return list_first_entry(&chan->active_list,
> +				struct tsi721_tx_desc, desc_node);
> +}
> +
> +static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *chan)
> +{
> +	struct tsi721_dma_desc *bd_ptr;
> +	struct device *dev = chan->dchan.device->dev;
> +	u64		*sts_ptr;
> +	dma_addr_t	bd_phys;
> +	dma_addr_t	sts_phys;
> +	int		sts_size;
> +	int		bd_num = chan->bd_num;
> +
> +	dev_dbg(dev, "Init Block DMA Engine, CH%d\n", chan->id);
> +
> +	/* Allocate space for DMA descriptors */
> +	bd_ptr = dma_alloc_coherent(dev,
> +				bd_num * sizeof(struct tsi721_dma_desc),
> +				&bd_phys, GFP_KERNEL);
> +	if (!bd_ptr)
> +		return -ENOMEM;
> +
> +	chan->bd_phys = bd_phys;
> +	chan->bd_base = bd_ptr;
> +
> +	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
> +
> +	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
> +		bd_ptr, (unsigned long long)bd_phys);
> +
> +	/* Allocate space for descriptor status FIFO */
> +	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
> +					bd_num : TSI721_DMA_MINSTSSZ;
> +	sts_size = roundup_pow_of_two(sts_size);
> +	sts_ptr = dma_alloc_coherent(dev,
> +				     sts_size * sizeof(struct tsi721_dma_sts),
> +				     &sts_phys, GFP_KERNEL);
> +	if (!sts_ptr) {
> +		/* Free space allocated for DMA descriptors */
> +		dma_free_coherent(dev,
> +				  bd_num * sizeof(struct tsi721_dma_desc),
> +				  bd_ptr, bd_phys);
> +		chan->bd_base = NULL;
> +		return -ENOMEM;
> +	}
> +
> +	chan->sts_phys = sts_phys;
> +	chan->sts_base = sts_ptr;
> +	chan->sts_size = sts_size;
> +
> +	memset(sts_ptr, 0, sts_size);
> +
> +	dev_dbg(dev,
> +		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
> +		sts_ptr, (unsigned long long)sts_phys, sts_size);
> +
> +	/* Initialize DMA descriptors ring */
> +	bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
> +	bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
> +						 TSI721_DMAC_DPTRL_MASK);
> +	bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
> +
> +	/* Setup DMA descriptor pointers */
> +	iowrite32(((u64)bd_phys >> 32),
> +		chan->regs + TSI721_DMAC_DPTRH);
> +	iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
> +		chan->regs + TSI721_DMAC_DPTRL);
> +
> +	/* Setup descriptor status FIFO */
> +	iowrite32(((u64)sts_phys >> 32),
> +		chan->regs + TSI721_DMAC_DSBH);
> +	iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
> +		chan->regs + TSI721_DMAC_DSBL);
> +	iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
> +		chan->regs + TSI721_DMAC_DSSZ);
> +
> +	/* Clear interrupt bits */
> +	iowrite32(TSI721_DMAC_INT_ALL,
> +		chan->regs + TSI721_DMAC_INT);
> +
> +	ioread32(chan->regs + TSI721_DMAC_INT);
> +
> +	/* Toggle DMA channel initialization */
> +	iowrite32(TSI721_DMAC_CTL_INIT,	chan->regs + TSI721_DMAC_CTL);
> +	ioread32(chan->regs + TSI721_DMAC_CTL);
> +	chan->wr_count = chan->wr_count_next = 0;
> +	chan->sts_rdptr = 0;
> +	udelay(10);
> +
> +	return 0;
> +}
> +
> +static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *chan)
> +{
> +	u32 ch_stat;
> +
> +	if (chan->bd_base == NULL)
> +		return 0;
> +
> +	/* Check if DMA channel still running */
> +	ch_stat = ioread32(chan->regs +	TSI721_DMAC_STS);
> +	if (ch_stat & TSI721_DMAC_STS_RUN)
> +		return -EFAULT;
> +
> +	/* Put DMA channel into init state */
> +	iowrite32(TSI721_DMAC_CTL_INIT,	chan->regs + TSI721_DMAC_CTL);
> +
> +	/* Free space allocated for DMA descriptors */
> +	dma_free_coherent(chan->dchan.device->dev,
> +		chan->bd_num * sizeof(struct tsi721_dma_desc),
> +		chan->bd_base, chan->bd_phys);
> +	chan->bd_base = NULL;
> +
> +	/* Free space allocated for status FIFO */
> +	dma_free_coherent(chan->dchan.device->dev,
> +		chan->sts_size * sizeof(struct tsi721_dma_sts),
> +		chan->sts_base, chan->sts_phys);
> +	chan->sts_base = NULL;
> +	return 0;
> +}
> +
> +static
> +void tsi721_bdma_interrupt_enable(struct tsi721_bdma_chan *chan, int enable)
> +{
> +	if (enable) {
> +		/* Clear pending BDMA channel interrupts */
> +		iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INT);
> +		ioread32(chan->regs + TSI721_DMAC_INT);
> +		/* Enable BDMA channel interrupts */
> +		iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INTE);
> +	} else {
> +		/* Disable BDMA channel interrupts */
> +		iowrite32(0, chan->regs + TSI721_DMAC_INTE);
> +		/* Clear pending BDMA channel interrupts */
> +		iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INT);
> +	}
> +
> +}
> +
> +static bool tsi721_dma_is_idle(struct tsi721_bdma_chan *chan)
> +{
> +	u32 sts;
> +
> +	sts = ioread32(chan->regs + TSI721_DMAC_STS);
> +	return ((sts & TSI721_DMAC_STS_RUN) == 0);
> +}
> +
> +void tsi721_bdma_handler(struct tsi721_bdma_chan *chan)
> +{
> +	/* Disable BDMA channel interrupts */
> +	iowrite32(0, chan->regs + TSI721_DMAC_INTE);
> +
> +	tasklet_schedule(&chan->tasklet);
> +}
> +
> +#ifdef CONFIG_PCI_MSI
> +/**
> + * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
> + * @irq: Linux interrupt number
> + * @ptr: Pointer to interrupt-specific data (mport structure)
> + *
> + * Handles outbound messaging interrupts signaled using MSI-X.
> + */
> +static irqreturn_t tsi721_bdma_msix(int irq, void *ptr)
> +{
> +	struct tsi721_bdma_chan *chan = ptr;
> +
> +	tsi721_bdma_handler(chan);
> +	return IRQ_HANDLED;
> +}
> +#endif /* CONFIG_PCI_MSI */
> +
> +/* Must be called with the spinlock held */
> +static void tsi721_start_dma(struct tsi721_bdma_chan *chan)
> +{
> +	if (!tsi721_dma_is_idle(chan)) {
> +		dev_err(chan->dchan.device->dev,
> +			"BUG: Attempt to start non-idle channel\n");
> +		return;
> +	}
> +
> +	if (chan->wr_count == chan->wr_count_next) {
> +		dev_err(chan->dchan.device->dev,
> +			"BUG: Attempt to start DMA with no BDs ready\n");
> +		return;
> +	}
> +
> +	dev_dbg(chan->dchan.device->dev,
> +		"tx_chan: %p, chan: %d, regs: %p\n",
> +		chan, chan->dchan.chan_id, chan->regs);
> +
> +	iowrite32(chan->wr_count_next, chan->regs + TSI721_DMAC_DWRCNT);
> +	ioread32(chan->regs + TSI721_DMAC_DWRCNT);
> +
> +	chan->wr_count = chan->wr_count_next;
> +}
> +
> +static void tsi721_desc_put(struct tsi721_bdma_chan *chan,
> +			    struct tsi721_tx_desc *desc)
> +{
> +	dev_dbg(chan->dchan.device->dev, "Put desc: %p into free list\n", desc);
> +
> +	if (desc) {
> +		spin_lock_bh(&chan->lock);
> +		list_splice_init(&desc->tx_list, &chan->free_list);
> +		list_add(&desc->desc_node, &chan->free_list);
> +		chan->wr_count_next = chan->wr_count;
> +		spin_unlock_bh(&chan->lock);
> +	}
> +}
> +
> +static struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *chan)
> +{
> +	struct tsi721_tx_desc *tx_desc, *_tx_desc;
> +	struct tsi721_tx_desc *ret = NULL;
> +	int i;
> +
> +	spin_lock_bh(&chan->lock);
> +	list_for_each_entry_safe(tx_desc, _tx_desc,
> +				 &chan->free_list, desc_node) {
> +		if (async_tx_test_ack(&tx_desc->txd)) {
> +			list_del(&tx_desc->desc_node);
> +			ret = tx_desc;
> +			break;
> +		}
> +		dev_dbg(chan->dchan.device->dev,
> +			"desc %p not ACKed\n", tx_desc);
> +	}
> +
> +	i = chan->wr_count_next % chan->bd_num;
> +	if (i == chan->bd_num - 1) {
> +		i = 0;
> +		chan->wr_count_next++; /* skip link descriptor */
> +	}
> +
> +	chan->wr_count_next++;
> +	tx_desc->txd.phys = chan->bd_phys + i * sizeof(struct tsi721_dma_desc);
> +	tx_desc->hw_desc = &((struct tsi721_dma_desc *)chan->bd_base)[i];
> +
> +	spin_unlock_bh(&chan->lock);
> +
> +	return ret;
> +}
> +
> +static
> +int tsi721_fill_desc(struct tsi721_bdma_chan *chan, struct tsi721_tx_desc *desc,
> +	struct scatterlist *sg, enum dma_rtype rtype, u32 sys_size)
> +{
> +	struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
> +	u64 rio_addr;
> +
> +	if (sg_dma_len(sg) > TSI721_DMAD_BCOUNT1 + 1) {
> +		dev_err(chan->dchan.device->dev, "SG element is too large\n");
> +		return -EINVAL;
> +	}
> +
> +	dev_dbg(chan->dchan.device->dev,
> +		"desc: 0x%llx, addr: 0x%llx len: 0x%x\n",
> +		(u64)desc->txd.phys, (unsigned long long)sg_dma_address(sg),
> +		sg_dma_len(sg));
> +
> +	dev_dbg(chan->dchan.device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
> +		bd_ptr, desc->destid, desc->rio_addr);
> +
> +	/* Initialize DMA descriptor */
> +	bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
> +					(rtype << 19) | desc->destid);
> +	if (desc->interrupt)
> +		bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
> +	bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
> +					(sys_size << 26) | sg_dma_len(sg));
> +	rio_addr = (desc->rio_addr >> 2) |
> +				((u64)(desc->rio_addr_u & 0x3) << 62);
> +	bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff);
> +	bd_ptr->raddr_hi = cpu_to_le32(rio_addr >> 32);
> +	bd_ptr->t1.bufptr_lo = cpu_to_le32(
> +					(u64)sg_dma_address(sg) & 0xffffffff);
> +	bd_ptr->t1.bufptr_hi = cpu_to_le32((u64)sg_dma_address(sg) >> 32);
> +	bd_ptr->t1.s_dist = 0;
> +	bd_ptr->t1.s_size = 0;
> +
> +	mb();
> +
> +	return 0;
> +}
> +
> +static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *chan,
> +				      struct tsi721_tx_desc *desc)
> +{
> +	struct dma_async_tx_descriptor *txd = &desc->txd;
> +	dma_async_tx_callback callback = txd->callback;
> +	void *param = txd->callback_param;
> +
> +	list_splice_init(&desc->tx_list, &chan->free_list);
> +	list_move(&desc->desc_node, &chan->free_list);
> +	chan->completed_cookie = txd->cookie;
> +
> +	if (callback)
> +		callback(param);
> +}
> +
> +static void tsi721_dma_complete_all(struct tsi721_bdma_chan *chan)
> +{
> +	struct tsi721_tx_desc *desc, *_d;
> +	LIST_HEAD(list);
> +
> +	BUG_ON(!tsi721_dma_is_idle(chan));
> +
> +	if (!list_empty(&chan->queue))
> +		tsi721_start_dma(chan);
> +
> +	list_splice_init(&chan->active_list, &list);
> +	list_splice_init(&chan->queue, &chan->active_list);
> +
> +	list_for_each_entry_safe(desc, _d, &list, desc_node)
> +		tsi721_dma_chain_complete(chan, desc);
> +}
> +
> +static void tsi721_clr_stat(struct tsi721_bdma_chan *chan)
> +{
> +	u32 srd_ptr;
> +	u64 *sts_ptr;
> +	int i, j;
> +
> +	/* Check and clear descriptor status FIFO entries */
> +	srd_ptr = chan->sts_rdptr;
> +	sts_ptr = chan->sts_base;
> +	j = srd_ptr * 8;
> +	while (sts_ptr[j]) {
> +		for (i = 0; i < 8 && sts_ptr[j]; i++, j++)
> +			sts_ptr[j] = 0;
> +
> +		++srd_ptr;
> +		srd_ptr %= chan->sts_size;
> +		j = srd_ptr * 8;
> +	}
> +
> +	iowrite32(srd_ptr, chan->regs + TSI721_DMAC_DSRP);
> +	chan->sts_rdptr = srd_ptr;
> +}
> +
> +static void tsi721_advance_work(struct tsi721_bdma_chan *chan)
> +{
> +	if (list_empty(&chan->active_list) ||
> +		list_is_singular(&chan->active_list)) {
> +		dev_dbg(chan->dchan.device->dev,
> +			"%s: Active_list empty\n", __func__);
> +		tsi721_dma_complete_all(chan);
> +	} else {
> +		dev_dbg(chan->dchan.device->dev,
> +			"%s: Active_list NOT empty\n", __func__);
> +		tsi721_dma_chain_complete(chan, tsi721_dma_first_active(chan));
> +		tsi721_start_dma(chan);
> +	}
> +}
> +
> +static void tsi721_dma_tasklet(unsigned long data)
> +{
> +	struct tsi721_bdma_chan *chan = (struct tsi721_bdma_chan *)data;
> +	u32 dmac_int, dmac_sts;
> +
> +	dmac_int = ioread32(chan->regs + TSI721_DMAC_INT);
> +	dev_dbg(chan->dchan.device->dev, "%s: DMAC%d_INT = 0x%x\n",
> +		__func__, chan->id, dmac_int);
> +	/* Clear channel interrupts */
> +	iowrite32(dmac_int, chan->regs + TSI721_DMAC_INT);
> +
> +	if (dmac_int & TSI721_DMAC_INT_ERR) {
> +		dmac_sts = ioread32(chan->regs + TSI721_DMAC_STS);
> +		dev_err(chan->dchan.device->dev,
> +			"%s: DMA ERROR - DMAC%d_STS = 0x%x\n",
> +			__func__, chan->id, dmac_sts);
> +	}
> +
> +	if (dmac_int & TSI721_DMAC_INT_STFULL) {
> +		dev_err(chan->dchan.device->dev,
> +			"%s: DMAC%d descriptor status FIFO is full\n",
> +			__func__, chan->id);
> +	}
> +
> +	if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
> +		tsi721_clr_stat(chan);
> +		spin_lock(&chan->lock);
> +		tsi721_advance_work(chan);
> +		spin_unlock(&chan->lock);
> +	}
> +
> +	/* Re-Enable BDMA channel interrupts */
> +	iowrite32(TSI721_DMAC_INT_ALL, chan->regs + TSI721_DMAC_INTE);
> +}
> +
> +static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
> +{
> +	struct tsi721_tx_desc *desc = to_tsi721_desc(txd);
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(txd->chan);
> +	dma_cookie_t cookie;
> +
> +	spin_lock_bh(&chan->lock);
> +
> +	cookie = txd->chan->cookie;
> +	if (++cookie < 0)
> +		cookie = 1;
> +	txd->chan->cookie = cookie;
> +	txd->cookie = cookie;
> +
> +	if (list_empty(&chan->active_list)) {
> +		list_add_tail(&desc->desc_node, &chan->active_list);
> +		tsi721_start_dma(chan);
> +	} else {
> +		list_add_tail(&desc->desc_node, &chan->queue);
> +	}
> +
> +	spin_unlock_bh(&chan->lock);
> +	return cookie;
> +}
> +
> +static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
> +{
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> +	struct tsi721_device *priv = to_tsi721(dchan->device);
> +	struct tsi721_tx_desc *desc = NULL;
> +	LIST_HEAD(tmp_list);
> +	int i;
> +	int rc;
> +
> +	if (chan->bd_base)
> +		return chan->bd_num - 1;
> +
> +	/* Initialize BDMA channel */
> +	if (tsi721_bdma_ch_init(chan)) {
> +		dev_err(dchan->device->dev, "Unable to initialize data DMA"
> +			" channel %d, aborting\n", chan->id);
> +		return -ENOMEM;
> +	}
> +
> +	/* Allocate matching number of logical descriptors */
> +	desc = kzalloc((chan->bd_num - 1) * sizeof(struct tsi721_tx_desc),
> +			GFP_KERNEL);
> +	if (!desc) {
> +		dev_err(dchan->device->dev,
> +			"Failed to allocate logical descriptors\n");
> +		rc = -ENOMEM;
> +		goto err_out;
> +	}
> +
> +	chan->tx_desc = desc;
> +
> +	for (i = 0; i < chan->bd_num - 1; i++) {
> +		dma_async_tx_descriptor_init(&desc[i].txd, dchan);
> +		desc[i].txd.tx_submit = tsi721_tx_submit;
> +		desc[i].txd.flags = DMA_CTRL_ACK;
> +		INIT_LIST_HEAD(&desc[i].tx_list);
> +		list_add_tail(&desc[i].desc_node, &tmp_list);
> +	}
> +
> +	spin_lock_bh(&chan->lock);
> +	list_splice(&tmp_list, &chan->free_list);
> +	chan->completed_cookie = dchan->cookie = 1;
> +	spin_unlock_bh(&chan->lock);
> +
> +#ifdef CONFIG_PCI_MSI
> +	if (priv->flags & TSI721_USING_MSIX) {
> +		/* Request interrupt service if we are in MSI-X mode */
> +		rc = request_irq(
> +			priv->msix[TSI721_VECT_DMA0_DONE + chan->id].vector,
> +			tsi721_bdma_msix, 0,
> +			priv->msix[TSI721_VECT_DMA0_DONE + chan->id].irq_name,
> +			(void *)chan);
> +
> +		if (rc) {
> +			dev_dbg(dchan->device->dev,
> +				"Unable to allocate MSI-X interrupt for "
> +				"BDMA%d-DONE\n", chan->id);
> +			goto err_out;
> +		}
> +
> +		rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
> +					    chan->id].vector,
> +			tsi721_bdma_msix, 0,
> +			priv->msix[TSI721_VECT_DMA0_INT + chan->id].irq_name,
> +			(void *)chan);
> +
> +		if (rc)	{
> +			dev_dbg(dchan->device->dev,
> +				"Unable to allocate MSI-X interrupt for "
> +				"BDMA%d-INT\n", chan->id);
> +			free_irq(
> +				priv->msix[TSI721_VECT_DMA0_DONE +
> +					   chan->id].vector,
> +				(void *)chan);
> +			rc = -EIO;
> +			goto err_out;
> +		}
> +	}
> +#endif /* CONFIG_PCI_MSI */
> +
> +	tsi721_bdma_interrupt_enable(chan, 1);
> +
> +	return chan->bd_num - 1;
> +
> +err_out:
> +	kfree(desc);
> +	tsi721_bdma_ch_free(chan);
> +	return rc;
> +}
> +
> +static void tsi721_free_chan_resources(struct dma_chan *dchan)
> +{
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> +	struct tsi721_device *priv = to_tsi721(dchan->device);
> +	LIST_HEAD(list);
> +
> +	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
> +
> +	BUG_ON(!list_empty(&chan->active_list));
> +	BUG_ON(!list_empty(&chan->queue));
> +
> +	spin_lock_irq(&chan->lock);
> +	list_splice_init(&chan->free_list, &list);
> +	spin_unlock_irq(&chan->lock);
> +
> +	tsi721_bdma_interrupt_enable(chan, 0);
> +
> +#ifdef CONFIG_PCI_MSI
> +	if (priv->flags & TSI721_USING_MSIX) {
> +		free_irq(priv->msix[TSI721_VECT_DMA0_DONE + chan->id].vector,
> +			 (void *)chan);
> +		free_irq(priv->msix[TSI721_VECT_DMA0_INT + chan->id].vector,
> +			 (void *)chan);
> +	}
> +#endif /* CONFIG_PCI_MSI */
> +
> +	tsi721_bdma_ch_free(chan);
> +	kfree(chan->tx_desc);
> +}
> +
> +static
> +enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
> +				 struct dma_tx_state *txstate)
> +{
> +	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
> +	dma_cookie_t		last_used;
> +	dma_cookie_t		last_completed;
> +	int			ret;
> +
> +	spin_lock_irq(&bdma_chan->lock);
> +	last_completed = bdma_chan->completed_cookie;
> +	last_used = dchan->cookie;
> +	spin_unlock_irq(&bdma_chan->lock);
> +
> +	ret = dma_async_is_complete(cookie, last_completed, last_used);
> +
> +	dma_set_tx_state(txstate, last_completed, last_used, 0);
> +
> +	dev_dbg(dchan->device->dev,
> +		"%s: exit, ret: %d, last_completed: %d, last_used: %d\n",
> +		__func__, ret, last_completed, last_used);
> +
> +	return ret;
> +}
> +
> +static void tsi721_issue_pending(struct dma_chan *dchan)
> +{
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> +
> +	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
> +
> +	if (tsi721_dma_is_idle(chan)) {
> +		spin_lock_bh(&chan->lock);
> +		tsi721_advance_work(chan);
> +		spin_unlock_bh(&chan->lock);
> +	} else
> +		dev_dbg(dchan->device->dev,
> +			"%s: DMA channel still busy\n", __func__);
> +}
> +
> +static
> +struct dma_async_tx_descriptor *tsi721_prep_slave_sg(struct dma_chan *dchan,
> +			struct scatterlist *sgl, unsigned int sg_len,
> +			enum dma_data_direction direction, unsigned long flags)
> +{
> +	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
> +	struct tsi721_tx_desc *desc = NULL;
> +	struct tsi721_tx_desc *first = NULL;
> +	struct scatterlist *sg;
> +	struct rio_dma_ext *rext = dchan->private;
> +	u64 rio_addr = rext->rio_addr; /* FIXME: assuming 64-bit rio_addr for now */
> +	unsigned int i;
> +	u32 sys_size = dma_to_mport(dchan->device)->sys_size;
> +	enum dma_rtype rtype;
> +
> +	if (!sgl || !sg_len) {
> +		dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
> +		return NULL;
> +	}
> +
> +	if (direction == DMA_FROM_DEVICE)
> +		rtype = NREAD;
> +	else if (direction == DMA_TO_DEVICE) {
> +		switch (rext->wr_type) {
> +		case RDW_ALL_NWRITE:
> +			rtype = ALL_NWRITE;
> +			break;
> +		case RDW_ALL_NWRITE_R:
> +			rtype = ALL_NWRITE_R;
> +			break;
> +		case RDW_LAST_NWRITE_R:
> +		default:
> +			rtype = LAST_NWRITE_R;
> +			break;
> +		}
> +	} else {
> +		dev_err(dchan->device->dev,
> +			"%s: Unsupported DMA direction option\n", __func__);
> +		return NULL;
> +	}
> +
> +	for_each_sg(sgl, sg, sg_len, i) {
> +		int err;
> +
> +		dev_dbg(dchan->device->dev, "%s: sg #%d\n", __func__, i);
> +		desc = tsi721_desc_get(bdma_chan);
> +		if (!desc) {
> +			dev_err(dchan->device->dev,
> +				"Not enough descriptors available\n");
> +			goto err_desc_get;
> +		}
> +
> +		if (sg_is_last(sg))
> +			desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
> +		else
> +			desc->interrupt = false;
> +
> +		desc->destid = rext->destid;
> +		desc->rio_addr = rio_addr;
> +		desc->rio_addr_u = 0;
> +
> +		err = tsi721_fill_desc(bdma_chan, desc, sg, rtype, sys_size);
> +		if (err) {
> +			dev_err(dchan->device->dev,
> +				"Failed to build desc: %d\n", err);
> +			goto err_desc_get;
> +		}
> +
> +		rio_addr += sg_dma_len(sg);
> +
> +		if (!first)
> +			first = desc;
> +		else
> +			list_add_tail(&desc->desc_node, &first->tx_list);
> +	}
> +
> +	first->txd.cookie = -EBUSY;
> +	desc->txd.flags = flags;
> +
> +	return &first->txd;
> +
> +err_desc_get:
> +	tsi721_desc_put(bdma_chan, first);
> +	return NULL;
> +}
> +
> +static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
> +			     unsigned long arg)
> +{
> +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> +	struct tsi721_tx_desc *desc, *_d;
> +	LIST_HEAD(list);
> +
> +	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
> +
> +	if (cmd != DMA_TERMINATE_ALL)
> +		return -ENXIO;
> +
> +	spin_lock_bh(&chan->lock);
> +
> +	/* make sure to stop the transfer */
> +	iowrite32(TSI721_DMAC_CTL_SUSP, chan->regs + TSI721_DMAC_CTL);
> +
> +	list_splice_init(&chan->active_list, &list);
> +	list_splice_init(&chan->queue, &list);
> +
> +	list_for_each_entry_safe(desc, _d, &list, desc_node)
> +		tsi721_dma_chain_complete(chan, desc);
> +
> +	spin_unlock_bh(&chan->lock);
> +
> +	return 0;
> +}
> +
> +int __devinit tsi721_register_dma(struct tsi721_device *priv)
> +{
> +	int i;
> +	int nr_channels = TSI721_DMA_MAXCH;
> +	int err;
> +	struct rio_mport *mport = priv->mport;
> +
> +	mport->dma.dev = &priv->pdev->dev;
> +	mport->dma.chancnt = nr_channels;
> +
> +	INIT_LIST_HEAD(&mport->dma.channels);
> +
> +	for (i = 0; i < nr_channels; i++) {
> +		struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i];
> +
> +		if (i == TSI721_DMACH_MAINT)
> +			continue;
> +
> +		bdma_chan->bd_num = 64;
> +		bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
> +
> +		bdma_chan->dchan.device = &mport->dma;
> +		bdma_chan->dchan.cookie = 1;
> +		bdma_chan->dchan.chan_id = i;
> +		bdma_chan->id = i;
> +
> +		spin_lock_init(&bdma_chan->lock);
> +
> +		INIT_LIST_HEAD(&bdma_chan->active_list);
> +		INIT_LIST_HEAD(&bdma_chan->queue);
> +		INIT_LIST_HEAD(&bdma_chan->free_list);
> +
> +		tasklet_init(&bdma_chan->tasklet, tsi721_dma_tasklet,
> +			     (unsigned long)bdma_chan);
> +		list_add_tail(&bdma_chan->dchan.device_node,
> +			      &mport->dma.channels);
> +	}
> +
> +	dma_cap_zero(mport->dma.cap_mask);
> +	dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask);
> +	dma_cap_set(DMA_RAPIDIO, mport->dma.cap_mask);
> +
> +	mport->dma.device_alloc_chan_resources = tsi721_alloc_chan_resources;
> +	mport->dma.device_free_chan_resources = tsi721_free_chan_resources;
> +	mport->dma.device_tx_status = tsi721_tx_status;
> +	mport->dma.device_issue_pending = tsi721_issue_pending;
> +	mport->dma.device_prep_slave_sg = tsi721_prep_slave_sg;
> +	mport->dma.device_control = tsi721_device_control;
> +
> +	err = dma_async_device_register(&mport->dma);
> +	if (err)
> +		dev_err(&priv->pdev->dev, "Failed to register DMA device\n");
> +
> +	return err;
> +}


-- 
~Vinod


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-01 18:01 ` [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Vinod Koul
@ 2011-10-03 16:52   ` Bounine, Alexandre
  2011-10-05 20:38     ` Williams, Dan J
  2011-10-07  5:27     ` Vinod Koul
  0 siblings, 2 replies; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-03 16:52 UTC (permalink / raw)
  To: Vinod Koul, Dan
  Cc: akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="UTF-8", Size: 5929 bytes --]

Vinod Koul wrote:
> 
> On Fri, 2011-09-30 at 17:38 -0400, Alexandre Bounine wrote:
> Please CC *maintainers* on your patches, get_maintainers.pl will tell
> you who. Adding Dan here

Based on https://lkml.org/lkml/2011/2/14/67 and use of DMA_SLAVE in this
patch I decided that you are the best match among two and there is no reason
to disturb Dan ;) 

> > Adds DMA Engine framework support into RapidIO subsystem.
> > Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from remote
> > RapidIO target devices. Uses scatterlist to describe local data buffer and
> > dma_chan.private member to pass target specific information. Supports flat
> > data buffer only for a remote side.
> The way dmaengine works today is that it doesn't know anything about
> client subsystem. But this brings in a subsystem details to dmaengine
> which I don't agree with yet.
> Why can't we abstract this out??

The only thing that brings subsystem knowledge into dmaengine is DMA_RAPIDIO flag.
I am actually on the fence about this. From RapidIO side point of view I do not
need that flag at all.
RapidIO uses a filter routine that is sufficient to identify dmaengine channels
associated with specific RapidIO mport. Use of DMA_SLAVE flag is safe here.
Use of private member of dma_chan is "private" business of RapidIO and does
not break anything. 

My concern here is that other subsystems may use/request DMA_SLAVE channel(s) as well
and wrongfully acquire one that belongs to RapidIO. In this case separation with another
flag may have a sense - it is possible to have a system that uses RapidIO
and other "traditional" DMA slave channel.

This is why I put that proposed interface for discussion instead of keeping everything
inside of RapidIO.
If you think that situation above will not happen I will be happy to remove
that subsystem knowledge from dmaengine files.

My most recent test implementation runs without DMA_RAPIDIO flag though.

> 
> After going thru the patch, I do not believe that this this is case of
> SLAVE transfers, Dan can you please take a look at this patch

I agree, this is not a case of "pure" slave transfers but existing DMA_SLAVE
interface fits well into the RapidIO operations.

First, we have only one memory mapped location on the host side. We transfer
data to/from location that is not mapped into memory on the same side.  

Second, having ability to pass private target information allows me to pass
information about remote target device on per-transfer basis.

> 
> 
> > Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
> > Cc: Vinod Koul <vinod.koul@intel.com>
> > Cc: Kumar Gala <galak@kernel.crashing.org>
> > Cc: Matt Porter <mporter@kernel.crashing.org>
> > Cc: Li Yang <leoli@freescale.com>
> > ---
> >  drivers/dma/dmaengine.c   |    4 ++
... skip ...
> > +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
> > +
> > +#include <linux/dmaengine.h>
> > +
... skip ...
> > + */
> > +struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
> > +	struct dma_chan *dchan, struct rio_dma_data *data,
> > +	enum dma_data_direction direction, unsigned long flags)
> > +{
> > +	struct dma_async_tx_descriptor *txd = NULL;
> > +	struct rio_dma_ext rio_ext;
> > +
> > +	rio_ext.destid = rdev->destid;
> > +	rio_ext.rio_addr_u = data->rio_addr_u;
> > +	rio_ext.rio_addr = data->rio_addr;
> > +	rio_ext.wr_type = data->wr_type;
> > +	dchan->private = &rio_ext;
> > +
> > +	txd = dchan->device->device_prep_slave_sg(dchan, data->sg, data-
> >sg_len,
> > +						  direction, flags);
> > +
> > +	return txd;
> > +}
> > +EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
> You should move the rdev and data to dma_slave_config, that way you
> should be able to use the existing _prep_slave_sg function.
 
RapidIO network usually has more than one device attached to it and
single DMA channel may service data transfers to/from several devices.
In this case device information should be passed on per-transfer basis.

> > +
> > +#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
> > +
... skip ...
> > + *
> > + * Note: RapidIO specification defines write (NWRITE) and
> > + * write-with-response (NWRITE_R) data transfer operations.
> > + * Existing DMA controllers that service RapidIO may use one of these operations
> > + * for entire data transfer or their combination with only the last data packet
> > + * requires response.
> > + */
> > +enum rio_write_type {
> > +	RDW_DEFAULT,		/* default method used by DMA driver */
> > +	RDW_ALL_NWRITE,		/* all packets use NWRITE */
> > +	RDW_ALL_NWRITE_R,	/* all packets use NWRITE_R */
> > +	RDW_LAST_NWRITE_R,	/* last packet uses NWRITE_R, all other - NWRITE */
> > +};
> Why not use the current mechanism of specifying callback or ACK flags
> if you want a response or not.

That response is handled by RapidIO hardware and ensures reliable
packet delivery when response is used. User may not need callback or ACK
for his operation (in terms of dmaengine) but error handling will be initiated
if there is no response from the target device. 
 
> 
> > +
> > +struct rio_dma_ext {
> > +	u16 destid;
> > +	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
> > +	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
> > +	enum rio_write_type wr_type; /* preferred RIO write operation type */
> > +};
> will this address translated to a dma_addr_t or not?

No. This is a RapidIO specific address on the remote device.

> > +
> > +struct rio_dma_data {
> > +	/* Local data (as scatterlist) */
> > +	struct scatterlist	*sg;	/* I/O scatter list */
> > +	unsigned int		sg_len;	/* size of scatter list */
> > +	/* Remote device address (flat buffer) */
.... skip ...
> >   * rio_name - Get the unique RIO device identifier
> >   * @rdev: RIO device
> 
> 
> --
> ~Vinod

Regards,

Alex.

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-09-30 22:15   ` Andrew Morton
@ 2011-10-03 17:53     ` Bounine, Alexandre
  2011-10-04 21:43       ` Andrew Morton
  0 siblings, 1 reply; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-03 17:53 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linuxppc-dev, Vinod Koul, Kumar Gala, Matt Porter,
	Li Yang

Andrew Morton wroye:
> 
> On Fri, 30 Sep 2011 17:38:35 -0400
> Alexandre Bounine <alexandre.bounine@idt.com> wrote:
> 
> > Adds support for DMA Engine API.
> >
> > Includes following changes:
> > - Modifies BDMA register offset definitions to support per-channel
> handling
> > - Separates BDMA channel reserved for RIO Maintenance requests
> > - Adds DMA Engine callback routines
> >
> > ...
> >
> >  5 files changed, 1029 insertions(+), 90 deletions(-)
> 
> hm, what a lot of code.

This is mostly new stuff for that driver.

> 
> > +config TSI721_DMA
> > +	bool "IDT Tsi721 RapidIO DMA support"
> > +	depends on RAPIDIO_TSI721
> > +	default "n"
> > +	select RAPIDIO_DMA_ENGINE
> > +	help
> > +	  Enable DMA support for IDT Tsi721 PCIe-to-SRIO controller.
> 
> Do we really need to offer this decision to the user?  If possible it
> would be better to always enable the feature where that makes sense.
> Better code coverage, less maintenance effort, more effective testing
> effort, possibly cleaner code.

Agree. Influence of dmaengine here ;)
But we still need RAPIDIO_DMA_ENGINE option to control DMA
configuration for devices that are RIO targets only. 

> 
> >
> > ...
> >
> > +static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *chan)
> > +{
> > +	struct tsi721_dma_desc *bd_ptr;
> > +	struct device *dev = chan->dchan.device->dev;
> > +	u64		*sts_ptr;
> > +	dma_addr_t	bd_phys;
> > +	dma_addr_t	sts_phys;
> > +	int		sts_size;
> > +	int		bd_num = chan->bd_num;
> > +
> > +	dev_dbg(dev, "Init Block DMA Engine, CH%d\n", chan->id);
> > +
> > +	/* Allocate space for DMA descriptors */
> > +	bd_ptr = dma_alloc_coherent(dev,
> > +				bd_num * sizeof(struct tsi721_dma_desc),
> > +				&bd_phys, GFP_KERNEL);
> > +	if (!bd_ptr)
> > +		return -ENOMEM;
> > +
> > +	chan->bd_phys = bd_phys;
> > +	chan->bd_base = bd_ptr;
> > +
> > +	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
> > +
> > +	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
> > +		bd_ptr, (unsigned long long)bd_phys);
> > +
> > +	/* Allocate space for descriptor status FIFO */
> > +	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
> > +					bd_num : TSI721_DMA_MINSTSSZ;
> > +	sts_size = roundup_pow_of_two(sts_size);
> > +	sts_ptr = dma_alloc_coherent(dev,
> > +				     sts_size * sizeof(struct
tsi721_dma_sts),
> > +				     &sts_phys, GFP_KERNEL);
> > +	if (!sts_ptr) {
> > +		/* Free space allocated for DMA descriptors */
> > +		dma_free_coherent(dev,
> > +				  bd_num * sizeof(struct
tsi721_dma_desc),
> > +				  bd_ptr, bd_phys);
> > +		chan->bd_base = NULL;
> > +		return -ENOMEM;
> > +	}
> > +
> > +	chan->sts_phys = sts_phys;
> > +	chan->sts_base = sts_ptr;
> > +	chan->sts_size = sts_size;
> > +
> > +	memset(sts_ptr, 0, sts_size);
> 
> You meant

I really need it here. That status block tracks progress by keeping
non-zero addresses of processed descriptors.
 
> 
> --- a/drivers/rapidio/devices/tsi721.c~rapidio-tsi721-add-dma-engine-
> support-fix
> +++ a/drivers/rapidio/devices/tsi721.c
> @@ -1006,7 +1006,7 @@ static int tsi721_bdma_maint_init(struct
>  	priv->mdma.sts_base = sts_ptr;
>  	priv->mdma.sts_size = sts_size;
> 
> -	memset(sts_ptr, 0, sts_size);
> +	memset(sts_ptr, 0, sts_size * sizeof(struct tsi721_dma_sts));
> 
>  	dev_dbg(&priv->pdev->dev,
>  		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
> 
> However that's at least two instances where you wanted a
> dma_zalloc_coherent().  How's about we give ourselves one?

Does this mean that I am on hook for it as a "most frequent user"?

> 
> 
> > +	dev_dbg(dev,
> > +		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
> > +		sts_ptr, (unsigned long long)sts_phys, sts_size);
> > +
> > +	/* Initialize DMA descriptors ring */
> > +	bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
> > +	bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
> > +
TSI721_DMAC_DPTRL_MASK);
> > +	bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
> > +
> > +	/* Setup DMA descriptor pointers */
> > +	iowrite32(((u64)bd_phys >> 32),
> > +		chan->regs + TSI721_DMAC_DPTRH);
> > +	iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
> > +		chan->regs + TSI721_DMAC_DPTRL);
> > +
> > +	/* Setup descriptor status FIFO */
> > +	iowrite32(((u64)sts_phys >> 32),
> > +		chan->regs + TSI721_DMAC_DSBH);
> > +	iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
> > +		chan->regs + TSI721_DMAC_DSBL);
> > +	iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
> > +		chan->regs + TSI721_DMAC_DSSZ);
> > +
> > +	/* Clear interrupt bits */
> > +	iowrite32(TSI721_DMAC_INT_ALL,
> > +		chan->regs + TSI721_DMAC_INT);
> > +
> > +	ioread32(chan->regs + TSI721_DMAC_INT);
> > +
> > +	/* Toggle DMA channel initialization */
> > +	iowrite32(TSI721_DMAC_CTL_INIT,	chan->regs + TSI721_DMAC_CTL);
> > +	ioread32(chan->regs + TSI721_DMAC_CTL);
> > +	chan->wr_count = chan->wr_count_next = 0;
> > +	chan->sts_rdptr = 0;
> > +	udelay(10);
> > +
> > +	return 0;
> > +}
> > +
> >
> > ...
> >
> > +{
> > +	/* Disable BDMA channel interrupts */
> > +	iowrite32(0, chan->regs + TSI721_DMAC_INTE);
> > +
> > +	tasklet_schedule(&chan->tasklet);
> 
> I'm not seeing any tasklet_disable()s on the shutdown/rmmod paths.  Is
> there anything here which prevents shutdown races against a
> still-pending tasklet?

Marked for review.

> 
> > +}
> > +
> >
> > ...
> >
> > +static
> > +int tsi721_fill_desc(struct tsi721_bdma_chan *chan, struct
> tsi721_tx_desc *desc,
> > +	struct scatterlist *sg, enum dma_rtype rtype, u32 sys_size)
> > +{
> > +	struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
> > +	u64 rio_addr;
> > +
> > +	if (sg_dma_len(sg) > TSI721_DMAD_BCOUNT1 + 1) {
> > +		dev_err(chan->dchan.device->dev, "SG element is too
> large\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	dev_dbg(chan->dchan.device->dev,
> > +		"desc: 0x%llx, addr: 0x%llx len: 0x%x\n",
> > +		(u64)desc->txd.phys, (unsigned long
> long)sg_dma_address(sg),
> > +		sg_dma_len(sg));
> > +
> > +	dev_dbg(chan->dchan.device->dev, "bd_ptr = %p did=%d
> raddr=0x%llx\n",
> > +		bd_ptr, desc->destid, desc->rio_addr);
> > +
> > +	/* Initialize DMA descriptor */
> > +	bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
> > +					(rtype << 19) | desc->destid);
> > +	if (desc->interrupt)
> > +		bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
> > +	bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
> > +					(sys_size << 26) |
sg_dma_len(sg));
> > +	rio_addr = (desc->rio_addr >> 2) |
> > +				((u64)(desc->rio_addr_u & 0x3) << 62);
> > +	bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff);
> > +	bd_ptr->raddr_hi = cpu_to_le32(rio_addr >> 32);
> > +	bd_ptr->t1.bufptr_lo = cpu_to_le32(
> > +					(u64)sg_dma_address(sg) &
0xffffffff);
> > +	bd_ptr->t1.bufptr_hi = cpu_to_le32((u64)sg_dma_address(sg) >>
> 32);
> > +	bd_ptr->t1.s_dist = 0;
> > +	bd_ptr->t1.s_size = 0;
> > +
> > +	mb();
> 
> Mystery barrier needs a comment explaining why it's here, please.
This
> is almost always the case with barriers.

Marked for review.

> 
> > +	return 0;
> > +}
> > +
> >
> > ...
> >
> > +static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
> > +{
> > +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> > +	struct tsi721_device *priv = to_tsi721(dchan->device);
> > +	struct tsi721_tx_desc *desc = NULL;
> > +	LIST_HEAD(tmp_list);
> > +	int i;
> > +	int rc;
> > +
> > +	if (chan->bd_base)
> > +		return chan->bd_num - 1;
> > +
> > +	/* Initialize BDMA channel */
> > +	if (tsi721_bdma_ch_init(chan)) {
> > +		dev_err(dchan->device->dev, "Unable to initialize data
DMA"
> > +			" channel %d, aborting\n", chan->id);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/* Allocate matching number of logical descriptors */
> > +	desc = kzalloc((chan->bd_num - 1) * sizeof(struct
> tsi721_tx_desc),
> > +			GFP_KERNEL);
> 
> kcalloc() would be a better fit here.

Agree. Would look more clear.

> 
> > +	if (!desc) {
> > +		dev_err(dchan->device->dev,
> > +			"Failed to allocate logical descriptors\n");
> > +		rc = -ENOMEM;
> > +		goto err_out;
> > +	}
> > +
> > +	chan->tx_desc = desc;
> > +
> > +	for (i = 0; i < chan->bd_num - 1; i++) {
> > +		dma_async_tx_descriptor_init(&desc[i].txd, dchan);
> > +		desc[i].txd.tx_submit = tsi721_tx_submit;
> > +		desc[i].txd.flags = DMA_CTRL_ACK;
> > +		INIT_LIST_HEAD(&desc[i].tx_list);
> > +		list_add_tail(&desc[i].desc_node, &tmp_list);
> > +	}
> > +
> > +	spin_lock_bh(&chan->lock);
> > +	list_splice(&tmp_list, &chan->free_list);
> > +	chan->completed_cookie = dchan->cookie = 1;
> > +	spin_unlock_bh(&chan->lock);
> > +
> > +#ifdef CONFIG_PCI_MSI
> > +	if (priv->flags & TSI721_USING_MSIX) {
> > +		/* Request interrupt service if we are in MSI-X mode */
> > +		rc = request_irq(
> > +			priv->msix[TSI721_VECT_DMA0_DONE +
chan->id].vector,
> > +			tsi721_bdma_msix, 0,
> > +			priv->msix[TSI721_VECT_DMA0_DONE + chan-
> >id].irq_name,
> > +			(void *)chan);
> > +
> > +		if (rc) {
> > +			dev_dbg(dchan->device->dev,
> > +				"Unable to allocate MSI-X interrupt for
"
> > +				"BDMA%d-DONE\n", chan->id);
> > +			goto err_out;
> > +		}
> > +
> > +		rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
> > +					    chan->id].vector,
> > +			tsi721_bdma_msix, 0,
> > +			priv->msix[TSI721_VECT_DMA0_INT +
chan->id].irq_name,
> > +			(void *)chan);
> > +
> > +		if (rc)	{
> > +			dev_dbg(dchan->device->dev,
> > +				"Unable to allocate MSI-X interrupt for
"
> > +				"BDMA%d-INT\n", chan->id);
> > +			free_irq(
> > +				priv->msix[TSI721_VECT_DMA0_DONE +
> > +					   chan->id].vector,
> > +				(void *)chan);
> > +			rc = -EIO;
> > +			goto err_out;
> > +		}
> > +	}
> > +#endif /* CONFIG_PCI_MSI */
> > +
> > +	tsi721_bdma_interrupt_enable(chan, 1);
> > +
> > +	return chan->bd_num - 1;
> > +
> > +err_out:
> > +	kfree(desc);
> > +	tsi721_bdma_ch_free(chan);
> > +	return rc;
> > +}
> > +
> >
> > ...
> >
> > +static
> > +enum dma_status tsi721_tx_status(struct dma_chan *dchan,
> dma_cookie_t cookie,
> > +				 struct dma_tx_state *txstate)
> > +{
> > +	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
> > +	dma_cookie_t		last_used;
> > +	dma_cookie_t		last_completed;
> > +	int			ret;
> > +
> > +	spin_lock_irq(&bdma_chan->lock);
> > +	last_completed = bdma_chan->completed_cookie;
> > +	last_used = dchan->cookie;
> > +	spin_unlock_irq(&bdma_chan->lock);
> > +
> > +	ret = dma_async_is_complete(cookie, last_completed, last_used);
> > +
> > +	dma_set_tx_state(txstate, last_completed, last_used, 0);
> > +
> > +	dev_dbg(dchan->device->dev,
> > +		"%s: exit, ret: %d, last_completed: %d, last_used:
%d\n",
> > +		__func__, ret, last_completed, last_used);
> > +
> > +	return ret;
> > +}
> > +
> > +static void tsi721_issue_pending(struct dma_chan *dchan)
> > +{
> > +	struct tsi721_bdma_chan *chan = to_tsi721_chan(dchan);
> > +
> > +	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
> > +
> > +	if (tsi721_dma_is_idle(chan)) {
> > +		spin_lock_bh(&chan->lock);
> > +		tsi721_advance_work(chan);
> > +		spin_unlock_bh(&chan->lock);
> > +	} else
> > +		dev_dbg(dchan->device->dev,
> > +			"%s: DMA channel still busy\n", __func__);
> > +}
> 
> I really don't like that a "struct tsi721_bdma_chan *" is called
"chan"
> in come places and "bdma_chan" in others.  "bdma_chan" is better.
> 
Agree. "bdma_chan" gives more device-specific meaning.
Opposite comment that I have heard was that this driver uses "dma" too
much.
Will unify to "bdma_chan".

> The code takes that lock with spin_lock_bh() in some places and
> spin_lock_irq() in others.  I trust there's some method to it all ;)
> Has
> it been carefully tested with lockdep enabled?

Ooops. Another prove that global replace does not work.
Cleared spin_lock_irqsave() well though ;)

lockdep is enabled on my test machine and it did not complain in
this case. I am using a test adopted from one in dmaengine and it
calls both routines that have spin_lock_irq(). 

> 
> >
> > ...
> >

Thank you,

Alex.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-10-03 17:53     ` Bounine, Alexandre
@ 2011-10-04 21:43       ` Andrew Morton
  2011-10-05  1:38         ` Bounine, Alexandre
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2011-10-04 21:43 UTC (permalink / raw)
  To: Bounine, Alexandre
  Cc: linux-kernel, linuxppc-dev, Vinod Koul, Kumar Gala, Matt Porter,
	Li Yang

On Mon, 3 Oct 2011 10:53:45 -0700
"Bounine, Alexandre" <Alexandre.Bounine@idt.com> wrote:

> > > +	memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
> > > +
> > > +	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
> > > +		bd_ptr, (unsigned long long)bd_phys);
> > > +
> > > +	/* Allocate space for descriptor status FIFO */
> > > +	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
> > > +					bd_num : TSI721_DMA_MINSTSSZ;
> > > +	sts_size = roundup_pow_of_two(sts_size);
> > > +	sts_ptr = dma_alloc_coherent(dev,
> > > +				     sts_size * sizeof(struct
> tsi721_dma_sts),
> > > +				     &sts_phys, GFP_KERNEL);
> > > +	if (!sts_ptr) {
> > > +		/* Free space allocated for DMA descriptors */
> > > +		dma_free_coherent(dev,
> > > +				  bd_num * sizeof(struct
> tsi721_dma_desc),
> > > +				  bd_ptr, bd_phys);
> > > +		chan->bd_base = NULL;
> > > +		return -ENOMEM;
> > > +	}
> > > +
> > > +	chan->sts_phys = sts_phys;
> > > +	chan->sts_base = sts_ptr;
> > > +	chan->sts_size = sts_size;
> > > +
> > > +	memset(sts_ptr, 0, sts_size);
> > 
> > You meant
> 
> I really need it here. That status block tracks progress by keeping
> non-zero addresses of processed descriptors.

Confused.  Are you saying that the use of "sts_size" there was
intentional?

> > 
> > --- a/drivers/rapidio/devices/tsi721.c~rapidio-tsi721-add-dma-engine-
> > support-fix
> > +++ a/drivers/rapidio/devices/tsi721.c
> > @@ -1006,7 +1006,7 @@ static int tsi721_bdma_maint_init(struct
> >  	priv->mdma.sts_base = sts_ptr;
> >  	priv->mdma.sts_size = sts_size;
> > 
> > -	memset(sts_ptr, 0, sts_size);
> > +	memset(sts_ptr, 0, sts_size * sizeof(struct tsi721_dma_sts));
> > 
> >  	dev_dbg(&priv->pdev->dev,
> >  		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
> > 
> > However that's at least two instances where you wanted a
> > dma_zalloc_coherent().  How's about we give ourselves one?
> 
> Does this mean that I am on hook for it as a "most frequent user"?

No, it can be used all over the place: drivers/net/irda/w83977af_ir.c,
drivers/scsi/bnx2fc/bnx2fc_tgt.c,
drivers/net/wireless/rt2x00/rt2x00pci.c,
drivers/crypto/amcc/crypto4xx_core.c and many nmore.



^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-10-04 21:43       ` Andrew Morton
@ 2011-10-05  1:38         ` Bounine, Alexandre
  2011-10-05  1:57           ` Andrew Morton
  0 siblings, 1 reply; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-05  1:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Vinod Koul, linux-kernel, linuxppc-dev

Andrew Morton wrote:
> 
> On Mon, 3 Oct 2011 10:53:45 -0700
> "Bounine, Alexandre" <Alexandre.Bounine@idt.com> wrote:
> 
> > > > +	memset(bd_ptr, 0, bd_num * sizeof(struct
tsi721_dma_desc));
> > > > +
> > > > +	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
> > > > +		bd_ptr, (unsigned long long)bd_phys);
> > > > +
> > > > +	/* Allocate space for descriptor status FIFO */
> > > > +	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
> > > > +					bd_num :
TSI721_DMA_MINSTSSZ;
> > > > +	sts_size = roundup_pow_of_two(sts_size);
> > > > +	sts_ptr = dma_alloc_coherent(dev,
> > > > +				     sts_size * sizeof(struct
> > tsi721_dma_sts),
> > > > +				     &sts_phys, GFP_KERNEL);
> > > > +	if (!sts_ptr) {
> > > > +		/* Free space allocated for DMA descriptors */
> > > > +		dma_free_coherent(dev,
> > > > +				  bd_num * sizeof(struct
> > tsi721_dma_desc),
> > > > +				  bd_ptr, bd_phys);
> > > > +		chan->bd_base = NULL;
> > > > +		return -ENOMEM;
> > > > +	}
> > > > +
> > > > +	chan->sts_phys = sts_phys;
> > > > +	chan->sts_base = sts_ptr;
> > > > +	chan->sts_size = sts_size;
> > > > +
> > > > +	memset(sts_ptr, 0, sts_size);
> > >
> > > You meant
> >
> > I really need it here. That status block tracks progress by keeping
> > non-zero addresses of processed descriptors.
> 
> Confused.  Are you saying that the use of "sts_size" there was
> intentional?

Sorry, somehow I mistakenly linked this comment to dma_zalloc_coherent()
because of use of memset.
Yes, it should be "sts_size * sizeof(struct tsi721_dma_sts)" as
allocated.

> > >
> > > --- a/drivers/rapidio/devices/tsi721.c~rapidio-tsi721-add-dma-
> engine-
> > > support-fix
> > > +++ a/drivers/rapidio/devices/tsi721.c
> > > @@ -1006,7 +1006,7 @@ static int tsi721_bdma_maint_init(struct
> > >  	priv->mdma.sts_base = sts_ptr;
> > >  	priv->mdma.sts_size = sts_size;
> > >
> > > -	memset(sts_ptr, 0, sts_size);
> > > +	memset(sts_ptr, 0, sts_size * sizeof(struct tsi721_dma_sts));
> > >
> > >  	dev_dbg(&priv->pdev->dev,
> > >  		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
> > >
> > > However that's at least two instances where you wanted a
> > > dma_zalloc_coherent().  How's about we give ourselves one?
> >
> > Does this mean that I am on hook for it as a "most frequent user"?
> 
> No, it can be used all over the place: drivers/net/irda/w83977af_ir.c,
> drivers/scsi/bnx2fc/bnx2fc_tgt.c,
> drivers/net/wireless/rt2x00/rt2x00pci.c,
> drivers/crypto/amcc/crypto4xx_core.c and many nmore.

In this case I will happily use dma_zalloc_coherent() as soon as
it is available


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-10-05  1:38         ` Bounine, Alexandre
@ 2011-10-05  1:57           ` Andrew Morton
  2011-10-05  2:57             ` Bounine, Alexandre
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2011-10-05  1:57 UTC (permalink / raw)
  To: Bounine, Alexandre; +Cc: Vinod Koul, linux-kernel, linuxppc-dev

On Tue, 4 Oct 2011 18:38:09 -0700 "Bounine, Alexandre" <Alexandre.Bounine@idt.com> wrote:

> > No, it can be used all over the place: drivers/net/irda/w83977af_ir.c,
> > drivers/scsi/bnx2fc/bnx2fc_tgt.c,
> > drivers/net/wireless/rt2x00/rt2x00pci.c,
> > drivers/crypto/amcc/crypto4xx_core.c and many nmore.
> 
> In this case I will happily use dma_zalloc_coherent() as soon as
> it is available

geeze, which of us is more lazy?

Here we go...


From: Andrew Morton <akpm@linux-foundation.org>
Subject: include/linux/dma-mapping.h: add dma_zalloc_coherent()

Lots of driver code does a dma_alloc_coherent() and then zeroes out the
memory with a memset.  Make it easy for them.

Cc: Alexandre Bounine <alexandre.bounine@idt.com>
From: Andrew Morton <akpm@linux-foundation.org>
Subject: include-linux-dma-mappingh-add-dma_zalloc_coherent
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 Documentation/DMA-API.txt   |    7 +++++++
 include/linux/dma-mapping.h |   10 ++++++++++
 2 files changed, 17 insertions(+)

diff -puN include/linux/dma-mapping.h~include-linux-dma-mappingh-add-dma_zalloc_coherent include/linux/dma-mapping.h
--- a/include/linux/dma-mapping.h~include-linux-dma-mappingh-add-dma_zalloc_coherent
+++ a/include/linux/dma-mapping.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
 
+#include <linux/string.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/dma-attrs.h>
@@ -117,6 +118,15 @@ static inline int dma_set_seg_boundary(s
 		return -EIO;
 }
 
+static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flag)
+{
+	void *ret = dma_alloc_coherent(dev, size, dma_handle, flag);
+	if (ret)
+		memset(ret, 0, size);
+	return ret;
+}
+
 #ifdef CONFIG_HAS_DMA
 static inline int dma_get_cache_alignment(void)
 {
diff -puN Documentation/DMA-API.txt~include-linux-dma-mappingh-add-dma_zalloc_coherent Documentation/DMA-API.txt
--- a/Documentation/DMA-API.txt~include-linux-dma-mappingh-add-dma_zalloc_coherent
+++ a/Documentation/DMA-API.txt
@@ -50,6 +50,13 @@ specify the GFP_ flags (see kmalloc) for
 implementation may choose to ignore flags that affect the location of
 the returned memory, like GFP_DMA).
 
+void *
+dma_zalloc_coherent(struct device *dev, size_t size,
+			     dma_addr_t *dma_handle, gfp_t flag)
+
+Wraps dma_alloc_coherent() and also zeroes the returned memory if the
+allocation attempt succeeded.
+
 void
 dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 			   dma_addr_t dma_handle)
_


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support
  2011-10-05  1:57           ` Andrew Morton
@ 2011-10-05  2:57             ` Bounine, Alexandre
  0 siblings, 0 replies; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-05  2:57 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Vinod Koul, linux-kernel, linuxppc-dev

Andrew Morton wrote:
> 
> > > No, it can be used all over the place:
> drivers/net/irda/w83977af_ir.c,
> > > drivers/scsi/bnx2fc/bnx2fc_tgt.c,
> > > drivers/net/wireless/rt2x00/rt2x00pci.c,
> > > drivers/crypto/amcc/crypto4xx_core.c and many nmore.
> >
> > In this case I will happily use dma_zalloc_coherent() as soon as
> > it is available
> 
> geeze, which of us is more lazy?
> 
> Here we go...
> 
I have an excuse - I am on vacation. Back to the office in almost two
weeks.
WebMail is the only tool for me right now ;) 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-03 16:52   ` Bounine, Alexandre
@ 2011-10-05 20:38     ` Williams, Dan J
  2011-10-07 16:12       ` Bounine, Alexandre
  2011-10-07  5:27     ` Vinod Koul
  1 sibling, 1 reply; 21+ messages in thread
From: Williams, Dan J @ 2011-10-05 20:38 UTC (permalink / raw)
  To: Bounine, Alexandre
  Cc: Vinod Koul, akpm, linux-kernel, linuxppc-dev, Kumar Gala,
	Matt Porter, Li Yang, Dave Jiang

On Mon, Oct 3, 2011 at 9:52 AM, Bounine, Alexandre
<Alexandre.Bounine@idt.com> wrote:
> Vinod Koul wrote:
>>
>> On Fri, 2011-09-30 at 17:38 -0400, Alexandre Bounine wrote:
>> Please CC *maintainers* on your patches, get_maintainers.pl will tell
>> you who. Adding Dan here
>
> Based on https://lkml.org/lkml/2011/2/14/67 and use of DMA_SLAVE in this
> patch I decided that you are the best match among two and there is no reason
> to disturb Dan ;)
>
>> > Adds DMA Engine framework support into RapidIO subsystem.
>> > Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from remote
>> > RapidIO target devices. Uses scatterlist to describe local data buffer and
>> > dma_chan.private member to pass target specific information. Supports flat
>> > data buffer only for a remote side.
>> The way dmaengine works today is that it doesn't know anything about
>> client subsystem. But this brings in a subsystem details to dmaengine
>> which I don't agree with yet.
>> Why can't we abstract this out??
>
> The only thing that brings subsystem knowledge into dmaengine is DMA_RAPIDIO flag.
> I am actually on the fence about this. From RapidIO side point of view I do not
> need that flag at all.
> RapidIO uses a filter routine that is sufficient to identify dmaengine channels
> associated with specific RapidIO mport. Use of DMA_SLAVE flag is safe here.
> Use of private member of dma_chan is "private" business of RapidIO and does
> not break anything.
>
> My concern here is that other subsystems may use/request DMA_SLAVE channel(s) as well
> and wrongfully acquire one that belongs to RapidIO. In this case separation with another
> flag may have a sense - it is possible to have a system that uses RapidIO
> and other "traditional" DMA slave channel.
>
> This is why I put that proposed interface for discussion instead of keeping everything
> inside of RapidIO.
> If you think that situation above will not happen I will be happy to remove
> that subsystem knowledge from dmaengine files.

I don't think that situation will happen, even on the same arch I
don't think DMA_SLAVE is ready to be enabled generically.  So you're
probably safe here.

> My most recent test implementation runs without DMA_RAPIDIO flag though.
>
>>
>> After going thru the patch, I do not believe that this this is case of
>> SLAVE transfers, Dan can you please take a look at this patch
>
> I agree, this is not a case of "pure" slave transfers but existing DMA_SLAVE
> interface fits well into the RapidIO operations.
>
> First, we have only one memory mapped location on the host side. We transfer
> data to/from location that is not mapped into memory on the same side.
>
> Second, having ability to pass private target information allows me to pass
> information about remote target device on per-transfer basis.

...but there is no expectation that these engines will be generically
useful to other subsytems.  To be clear you are just using dmaengine
as a match making service for your dma providers to clients, right?

>
>>
>>
>> > Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
>> > Cc: Vinod Koul <vinod.koul@intel.com>
>> > Cc: Kumar Gala <galak@kernel.crashing.org>
>> > Cc: Matt Porter <mporter@kernel.crashing.org>
>> > Cc: Li Yang <leoli@freescale.com>
>> > ---
>> >  drivers/dma/dmaengine.c   |    4 ++
> ... skip ...
>> > +#ifdef CONFIG_RAPIDIO_DMA_ENGINE
>> > +
>> > +#include <linux/dmaengine.h>
>> > +
> ... skip ...
>> > + */
>> > +struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
>> > +   struct dma_chan *dchan, struct rio_dma_data *data,
>> > +   enum dma_data_direction direction, unsigned long flags)
>> > +{
>> > +   struct dma_async_tx_descriptor *txd = NULL;
>> > +   struct rio_dma_ext rio_ext;
>> > +
>> > +   rio_ext.destid = rdev->destid;
>> > +   rio_ext.rio_addr_u = data->rio_addr_u;
>> > +   rio_ext.rio_addr = data->rio_addr;
>> > +   rio_ext.wr_type = data->wr_type;
>> > +   dchan->private = &rio_ext;
>> > +
>> > +   txd = dchan->device->device_prep_slave_sg(dchan, data->sg, data-
>> >sg_len,
>> > +                                             direction, flags);
>> > +
>> > +   return txd;
>> > +}
>> > +EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
>> You should move the rdev and data to dma_slave_config, that way you
>> should be able to use the existing _prep_slave_sg function.
>
> RapidIO network usually has more than one device attached to it and
> single DMA channel may service data transfers to/from several devices.
> In this case device information should be passed on per-transfer basis.
>

You could maybe do what async_tx does and just apply the extra context
after the ->prep(), but before ->submit(), but it looks like that
context is critical to setting up the operation.

This looks pretty dangerous without knowing the other details.  What
prevents another thread from changing dchan->private before the the
prep routine reads it?

DMA_SLAVE assumes a static relationship between dma device and
slave-device, instead this rapid-io case is a per-operation slave
context.  It sounds like you really do want a new dma operation type
that is just an extra-parameter version of the current
->device_prep_slave_sg.  But now we're getting into to
dma_transaction_type proliferation again.  This is probably more fuel
for the fire of creating a structure transfer template that defines
multiple possible operation types and clients just fill in the fields
that they need, rather than adding new operation types for every
possible permutation of copy operation (DMA_SLAVE, DMA_MEMCPY,
DMA_CYCLIC, DMA_SG, DMA_INTERLEAVE, DMA_RAPIDIO), it's getting to be a
bit much.

As a starting point, since this the first driver proposal to have
per-operation slave context and there are other rapid-io specific
considerations, maybe it's ok to have a rio_dma_prep_slave_sg() that
does something like:

struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);

bdma_chan->prep_sg(rext, sgl, sg_len, direction, flags);

Thoughts?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-03 16:52   ` Bounine, Alexandre
  2011-10-05 20:38     ` Williams, Dan J
@ 2011-10-07  5:27     ` Vinod Koul
  2011-10-07 19:08       ` Bounine, Alexandre
  1 sibling, 1 reply; 21+ messages in thread
From: Vinod Koul @ 2011-10-07  5:27 UTC (permalink / raw)
  To: Bounine, Alexandre
  Cc: Dan, akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

On Mon, 2011-10-03 at 09:52 -0700, Bounine, Alexandre wrote:
> Vinod Koul wrote:
> > 
> > On Fri, 2011-09-30 at 17:38 -0400, Alexandre Bounine wrote:
> > Please CC *maintainers* on your patches, get_maintainers.pl will tell
> > you who. Adding Dan here
> 
> Based on https://lkml.org/lkml/2011/2/14/67 and use of DMA_SLAVE in this
> patch I decided that you are the best match among two and there is no reason
> to disturb Dan ;) 
I don't think he minds :) and we can all benefit from his wisdom

> 
> > > Adds DMA Engine framework support into RapidIO subsystem.
> > > Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from remote
> > > RapidIO target devices. Uses scatterlist to describe local data buffer and
> > > dma_chan.private member to pass target specific information. Supports flat
> > > data buffer only for a remote side.
> > The way dmaengine works today is that it doesn't know anything about
> > client subsystem. But this brings in a subsystem details to dmaengine
> > which I don't agree with yet.
> > Why can't we abstract this out??
> 
> The only thing that brings subsystem knowledge into dmaengine is DMA_RAPIDIO flag.
> I am actually on the fence about this. From RapidIO side point of view I do not
> need that flag at all.
> RapidIO uses a filter routine that is sufficient to identify dmaengine channels
> associated with specific RapidIO mport. Use of DMA_SLAVE flag is safe here.
> Use of private member of dma_chan is "private" business of RapidIO and does
> not break anything. 
> 
> My concern here is that other subsystems may use/request DMA_SLAVE channel(s) as well
> and wrongfully acquire one that belongs to RapidIO. In this case separation with another
> flag may have a sense - it is possible to have a system that uses RapidIO
> and other "traditional" DMA slave channel.
Nope that will never happen in current form.
Every controller driver today "magically" ensures that it doesn't get
any other dma controllers channel. We use filter function for that.
Although it is not clean yet and we are working to fix that but that's
another discussion.
Even specifying plain DMA_SLAVE should work if you code your filter
function properly :)

> 
> This is why I put that proposed interface for discussion instead of keeping everything
> inside of RapidIO.
> If you think that situation above will not happen I will be happy to remove
> that subsystem knowledge from dmaengine files.
> 
> My most recent test implementation runs without DMA_RAPIDIO flag though.
> 
> > 
> > After going thru the patch, I do not believe that this this is case of
> > SLAVE transfers, Dan can you please take a look at this patch
> 
> I agree, this is not a case of "pure" slave transfers but existing DMA_SLAVE
> interface fits well into the RapidIO operations.
> 
> First, we have only one memory mapped location on the host side. We transfer
> data to/from location that is not mapped into memory on the same side.  
> 
> Second, having ability to pass private target information allows me to pass
> information about remote target device on per-transfer basis.
Okay, then why not pass the dma address and make your dma driver
transparent (i saw you passed RIO address, IIRC 64+2 bits)
Currently using dma_slave_config we pass channel specific information,
things like peripheral address and config don't change typically between
transfers and if you have some controller specific properties you can
pass them by embedding dma_slave_config in your specific structure.
Worst case, you can configure slave before every prepare


-- 
~Vinod


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-05 20:38     ` Williams, Dan J
@ 2011-10-07 16:12       ` Bounine, Alexandre
  0 siblings, 0 replies; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-07 16:12 UTC (permalink / raw)
  To: Williams, Dan J
  Cc: Vinod Koul, akpm, linux-kernel, linuxppc-dev, Kumar Gala,
	Matt Porter, Li Yang, Dave Jiang

Dan J Williams wrote:
> 
> On Mon, Oct 3, 2011 at 9:52 AM, Bounine, Alexandre
> <Alexandre.Bounine@idt.com> wrote:
> >
> > My concern here is that other subsystems may use/request DMA_SLAVE
channel(s) as well
> > and wrongfully acquire one that belongs to RapidIO. In this case
separation with another
> > flag may have a sense - it is possible to have a system that uses
RapidIO
> > and other "traditional" DMA slave channel.
> >
> > This is why I put that proposed interface for discussion instead of
keeping everything
> > inside of RapidIO.
> > If you think that situation above will not happen I will be happy to
remove
> > that subsystem knowledge from dmaengine files.
> 
> I don't think that situation will happen, even on the same arch I
> don't think DMA_SLAVE is ready to be enabled generically.  So you're
> probably safe here.

Thank you for confirmation. I will rely on DMA_SLAVE only as in my most
recent
version. 
 
> >
> > I agree, this is not a case of "pure" slave transfers but existing
DMA_SLAVE
> > interface fits well into the RapidIO operations.
> >
> > First, we have only one memory mapped location on the host side. We
transfer
> > data to/from location that is not mapped into memory on the same
side.
> >
> > Second, having ability to pass private target information allows me
to pass
> > information about remote target device on per-transfer basis.
> 
> ...but there is no expectation that these engines will be generically
> useful to other subsytems.  To be clear you are just using dmaengine
> as a match making service for your dma providers to clients, right?

Not only that. As an example I am offering other defined DMA slave mode
callbacks
in tsi721_dma driver. I think that RapidIO specific DMA channels should
follow
unified DMA engine interface. I am expecting that other DMA defined
functions
to be used directly without RapidIO specifics. E.g. tx_submit, tx_status
and
issue_pending are implemented in tsi721_dma driver. Similar approach may
be
applied to fsl_dma driver which is capable to RapidIO data transfers on
platforms
with RapidIO interface. 

> > ... skip ...
> > RapidIO network usually has more than one device attached to it and
> > single DMA channel may service data transfers to/from several
devices.
> > In this case device information should be passed on per-transfer
basis.
> >
> 
> You could maybe do what async_tx does and just apply the extra context
> after the ->prep(), but before ->submit(), but it looks like that
> context is critical to setting up the operation.

Yes, it is possible to do but does not look as quite safe and effective
as passing all related parameters during single call. This would require
RIO DMA drivers to hold a "half cooked" descriptor chain until next
portion
of information arrives. This requires to have prep and post-configure
calls
coupled together by locks. In this situation prep with private data
looks
safer and more effective.    

> This looks pretty dangerous without knowing the other details.  What
> prevents another thread from changing dchan->private before the the
> prep routine reads it?

Yes, locking is needed in rio_dma_prep_slave_sg() around a call for prep
routine. After all internal descriptors are set dchan can be submitted
with new private content.  

> 
> DMA_SLAVE assumes a static relationship between dma device and
> slave-device, instead this rapid-io case is a per-operation slave
> context.  It sounds like you really do want a new dma operation type
> that is just an extra-parameter version of the current
> ->device_prep_slave_sg.  But now we're getting into to
> dma_transaction_type proliferation again.  This is probably more fuel
> for the fire of creating a structure transfer template that defines
> multiple possible operation types and clients just fill in the fields
> that they need, rather than adding new operation types for every
> possible permutation of copy operation (DMA_SLAVE, DMA_MEMCPY,
> DMA_CYCLIC, DMA_SG, DMA_INTERLEAVE, DMA_RAPIDIO), it's getting to be a
> bit much.

Exactly. I need an ability of passing private parameter to prep
function.
I chose to adopt DMA engine interface instead of adding one completely
internal to RapidIO because having one common API has much more sense.
Passing user defined data structure when building individual request
would
make not only my life easier but probably will help some other drivers
as well.   
 
> As a starting point, since this the first driver proposal to have
> per-operation slave context and there are other rapid-io specific
> considerations, maybe it's ok to have a rio_dma_prep_slave_sg() that
> does something like:
> 
> struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
> 
> bdma_chan->prep_sg(rext, sgl, sg_len, direction, flags);
> 
> Thoughts?

This brings HW dependence into generic RapidIO layer.
I would like to see it as a generic interface that is available
to other RIO-capable devices (e.g Freescale's 85xx and QorIQ).

I would probably make  ->prep_sg callback part of rio_mport structure
(which holds dma_device). In this case HW will be well abstracted.
The only problem will be with registering with DMA engine but this can
be solved by providing a pointer to a stub routine.

Second option may be keeping rio_dma_prep_slave_sg() "as is" but with an
appropriate
locking as I mentioned above (maybe remove "slave" from its name to
reduce confusion).

Thank you,

Alex.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-07  5:27     ` Vinod Koul
@ 2011-10-07 19:08       ` Bounine, Alexandre
  2011-10-15 17:35         ` Vinod Koul
  0 siblings, 1 reply; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-07 19:08 UTC (permalink / raw)
  To: Vinod Koul
  Cc: Dan, akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="UTF-8", Size: 3134 bytes --]

Vinod Koul wrote:
> 
> On Mon, 2011-10-03 at 09:52 -0700, Bounine, Alexandre wrote:
> >
> > My concern here is that other subsystems may use/request DMA_SLAVE channel(s) as well
> > and wrongfully acquire one that belongs to RapidIO. In this case separation with another
> > flag may have a sense - it is possible to have a system that uses RapidIO
> > and other "traditional" DMA slave channel.
> Nope that will never happen in current form.
> Every controller driver today "magically" ensures that it doesn't get
> any other dma controllers channel. We use filter function for that.
> Although it is not clean yet and we are working to fix that but that's
> another discussion.
> Even specifying plain DMA_SLAVE should work if you code your filter
> function properly :)

RIO filter checks for DMA device associated with RapidIO mport object.
This should work reliable from the RapidIO side. It also verifies
that returned DMA channel is capable to service corresponding RIO device
(in system that has more than one RIO controller). 

... skip ...
> >
> > Second, having ability to pass private target information allows me to pass
> > information about remote target device on per-transfer basis.
> Okay, then why not pass the dma address and make your dma driver
> transparent (i saw you passed RIO address, IIRC 64+2 bits)
> Currently using dma_slave_config we pass channel specific information,
> things like peripheral address and config don't change typically
> between
> transfers and if you have some controller specific properties you can
> pass them by embedding dma_slave_config in your specific structure.
> Worst case, you can configure slave before every prepare

In addition to address on target RIO device I need to pass corresponding
device destination ID. With single channel capable to transfer data between
local memory and different RapidIO devices I have to pass device specific
information on per transfer basis (destID + 66-bit address + type of write ops, etc.).

Even having 8 channels (each set for specific target) will not help me with
full support of RapidIO network where I have 8- or 16-bit destID (256 or 64K
devices respectively).

RapidIO controller device (and its DMA component) may provide services to
multiple device drivers which service individual devices on RapidIO network
(similar to PCIe having multiple peripherals, but not using memory mapped
model - destID defines route to a device). 

Generic RapidIO controller may have only one DMA channel which services all
target devices forming the network. We may have multiple concurrent data
transfer requests for different devices.

Parallel discussion with Dan touches the same post-config approach and
another option. I like Dan's idea of having RIO-specific version of prep_sg().
At the same time my current implementation of rio_dma_prep_slave_sg() with
added appropriate locking may do job as well and keeps DMA part within
existing API (DMA_RAPIDIO removed).      

Alex.

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-07 19:08       ` Bounine, Alexandre
@ 2011-10-15 17:35         ` Vinod Koul
  2011-10-17 14:33           ` Bounine, Alexandre
  2011-10-17 15:52           ` Jassi Brar
  0 siblings, 2 replies; 21+ messages in thread
From: Vinod Koul @ 2011-10-15 17:35 UTC (permalink / raw)
  To: Bounine, Alexandre, Jassi Brar
  Cc: Dan, akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

On Fri, 2011-10-07 at 12:08 -0700, Bounine, Alexandre wrote:
> Vinod Koul wrote:
> > 
> > On Mon, 2011-10-03 at 09:52 -0700, Bounine, Alexandre wrote:
Adding Jassi to this and sorry for late reply...

> ... skip ...
> > >
> > > Second, having ability to pass private target information allows me to pass
> > > information about remote target device on per-transfer basis.
> > Okay, then why not pass the dma address and make your dma driver
> > transparent (i saw you passed RIO address, IIRC 64+2 bits)
> > Currently using dma_slave_config we pass channel specific information,
> > things like peripheral address and config don't change typically
> > between
> > transfers and if you have some controller specific properties you can
> > pass them by embedding dma_slave_config in your specific structure.
> > Worst case, you can configure slave before every prepare
> 
> In addition to address on target RIO device I need to pass corresponding
> device destination ID. With single channel capable to transfer data between
> local memory and different RapidIO devices I have to pass device specific
> information on per transfer basis (destID + 66-bit address + type of write ops, etc.).
> 
> Even having 8 channels (each set for specific target) will not help me with
> full support of RapidIO network where I have 8- or 16-bit destID (256 or 64K
> devices respectively).
> 
> RapidIO controller device (and its DMA component) may provide services to
> multiple device drivers which service individual devices on RapidIO network
> (similar to PCIe having multiple peripherals, but not using memory mapped
> model - destID defines route to a device). 
> 
> Generic RapidIO controller may have only one DMA channel which services all
> target devices forming the network. We may have multiple concurrent data
> transfer requests for different devices.
> 
> Parallel discussion with Dan touches the same post-config approach and
> another option. I like Dan's idea of having RIO-specific version of prep_sg().
> At the same time my current implementation of rio_dma_prep_slave_sg() with
> added appropriate locking may do job as well and keeps DMA part within
> existing API (DMA_RAPIDIO removed). 
Thanks this helps to understand your model.

Per above you need destID, 66bit address and type of write to be passed
for each transfer. Looking at your rio_dma_data, i cant see the destID.

Nevertheless we have few ways to solve this, pass this using
dma_slave_config _every-time_ before doing a prep_slave_sg. Or as Dan
pointed out create RIO specific version. Principally I am not for adding
a subsystem specific stuff, even more when we are talking about
converging and cutting down on dmaengine API :)

Another alternate approach could be to add one more argument to
prep_slave_sg API which allows us to pass additional runtime specific
parameters. This can be NULL and unused for existing drivers and used in
RIO and any future subsystems which want to use dmaengine.
Thoughts...?

-- 
~Vinod


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-15 17:35         ` Vinod Koul
@ 2011-10-17 14:33           ` Bounine, Alexandre
  2011-10-17 15:52           ` Jassi Brar
  1 sibling, 0 replies; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-17 14:33 UTC (permalink / raw)
  To: Vinod Koul, Jassi Brar
  Cc: Dan, akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="UTF-8", Size: 3794 bytes --]

On Sat, Oct 15, 2011 at 1:36 PM, Vinod Koul <vinod.koul@intel.com> wrote:
> 
> > ... skip ...
> > > >
> > > > Second, having ability to pass private target information allows me to pass
> > > > information about remote target device on per-transfer basis.
> > > Okay, then why not pass the dma address and make your dma driver
> > > transparent (i saw you passed RIO address, IIRC 64+2 bits)
> > > Currently using dma_slave_config we pass channel specific information,
> > > things like peripheral address and config don't change typically
> > > between
> > > transfers and if you have some controller specific properties you can
> > > pass them by embedding dma_slave_config in your specific structure.
> > > Worst case, you can configure slave before every prepare
> >
> > In addition to address on target RIO device I need to pass corresponding
> > device destination ID. With single channel capable to transfer data between
> > local memory and different RapidIO devices I have to pass device specific
> > information on per transfer basis (destID + 66-bit address + type of write ops, etc.).
> >
> > Even having 8 channels (each set for specific target) will not help me with
> > full support of RapidIO network where I have 8- or 16-bit destID (256 or 64K
> > devices respectively).
> >
> > RapidIO controller device (and its DMA component) may provide services to
> > multiple device drivers which service individual devices on RapidIO network
> > (similar to PCIe having multiple peripherals, but not using memory mapped
> > model - destID defines route to a device).
> >
> > Generic RapidIO controller may have only one DMA channel which services all
> > target devices forming the network. We may have multiple concurrent data
> > transfer requests for different devices.
> >
> > Parallel discussion with Dan touches the same post-config approach and
> > another option. I like Dan's idea of having RIO-specific version of prep_sg().
> > At the same time my current implementation of rio_dma_prep_slave_sg() with
> > added appropriate locking may do job as well and keeps DMA part within
> > existing API (DMA_RAPIDIO removed).
> Thanks this helps to understand your model.
> 
> Per above you need destID, 66bit address and type of write to be passed
> for each transfer. Looking at your rio_dma_data, i cant see the destID.
> 
destID is extracted by rio_dma_prep_slave_sg() from the rio_dev parameter
and passed down prep_slave_sg() as part of rio_dma_ext structure
(as dchan->private). 

> Nevertheless we have few ways to solve this, pass this using
> dma_slave_config _every-time_ before doing a prep_slave_sg. Or as Dan
> pointed out create RIO specific version. Principally I am not for
> adding
> a subsystem specific stuff, even more when we are talking about
> converging and cutting down on dmaengine API :)
> 
Agree. This is why I am trying to find my way within boundaries of DMA_SLAVE API first.

> Another alternate approach could be to add one more argument to
> prep_slave_sg API which allows us to pass additional runtime specific
> parameters. This can be NULL and unused for existing drivers and used
> in
> RIO and any future subsystems which want to use dmaengine.
> Thoughts...?
> 
This is exactly what I need and it may be useful for future systems.
This makes DMA_SLAVE prepared for variations in device addressing.

Plus, this will make porting RIO DMA to previous kernel versions
relatively easy for those who prefers/requires that.

Based on the fact that you started transition to the new DMA transfer
direction flags, it may be a good time to add that extra argument
to prep_slave_sg().

Alex.

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-15 17:35         ` Vinod Koul
  2011-10-17 14:33           ` Bounine, Alexandre
@ 2011-10-17 15:52           ` Jassi Brar
  2011-10-17 17:01             ` Vinod Koul
  2011-10-17 18:16             ` Bounine, Alexandre
  1 sibling, 2 replies; 21+ messages in thread
From: Jassi Brar @ 2011-10-17 15:52 UTC (permalink / raw)
  To: Vinod Koul
  Cc: Bounine, Alexandre, Dan, akpm, linux-kernel, linuxppc-dev,
	Kumar Gala, Matt Porter, Li Yang

On 15 October 2011 23:05, Vinod Koul <vinod.koul@intel.com> wrote:

> Another alternate approach could be to add one more argument to
> prep_slave_sg API which allows us to pass additional runtime specific
> parameters. This can be NULL and unused for existing drivers and used in
> RIO and any future subsystems which want to use dmaengine.
> Thoughts...?
>
That doesn't sound much different than passing the data via
dma_chan.private during prep_slave_sg. Only now we add to
the number of arguments.
And then either this argument would be RapidIO specific (unfair
to other users) or generic. In latter case what would it look like ?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-17 15:52           ` Jassi Brar
@ 2011-10-17 17:01             ` Vinod Koul
  2011-10-17 19:39               ` Bounine, Alexandre
  2011-10-17 18:16             ` Bounine, Alexandre
  1 sibling, 1 reply; 21+ messages in thread
From: Vinod Koul @ 2011-10-17 17:01 UTC (permalink / raw)
  To: Jassi Brar
  Cc: Bounine, Alexandre, Dan, akpm, linux-kernel, linuxppc-dev,
	Kumar Gala, Matt Porter, Li Yang

On Mon, 2011-10-17 at 21:22 +0530, Jassi Brar wrote:
> On 15 October 2011 23:05, Vinod Koul <vinod.koul@intel.com> wrote:
> 
> > Another alternate approach could be to add one more argument to
> > prep_slave_sg API which allows us to pass additional runtime specific
> > parameters. This can be NULL and unused for existing drivers and used in
> > RIO and any future subsystems which want to use dmaengine.
> > Thoughts...?
> >
> That doesn't sound much different than passing the data via
> dma_chan.private during prep_slave_sg. Only now we add to
> the number of arguments.
Yes agreed, but we already decided and marked it as depreciated.

> And then either this argument would be RapidIO specific (unfair
> to other users) or generic. In latter case what would it look like ?
My initial thoughts were to add an argument which only the specific dmac
knows howto decode and is filled by its client. As i said for existing
users and people who don't require dynamic information wouldn't bother.
The pros
 - allows us to support RIO kind of subsystems where one needs to pass
subsystem specific information for programing the dmac
 - doesn't require us to add subsystem specific stuff in dmaengine,
today its RIO tomorrow some other folks may want to add. We want to
maintain dmaengine as a generic framework, while also trying to support
multiple audiences.
Cons:
 - there is no guarantee;  dmac expects foo and clients pass bar

I am okay if we have alternate way to support this with above goal :)

-- 
~Vinod


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-17 15:52           ` Jassi Brar
  2011-10-17 17:01             ` Vinod Koul
@ 2011-10-17 18:16             ` Bounine, Alexandre
  1 sibling, 0 replies; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-17 18:16 UTC (permalink / raw)
  To: Jassi Brar, Vinod Koul
  Cc: Dan, akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

On Mon, Oct 17, 2011 at 11:53 AM, Jassi Brar
<jaswinder.singh@linaro.org> wrote:
> 
> On 15 October 2011 23:05, Vinod Koul <vinod.koul@intel.com> wrote:
> 
> > Another alternate approach could be to add one more argument to
> > prep_slave_sg API which allows us to pass additional runtime
specific
> > parameters. This can be NULL and unused for existing drivers and
used
> in
> > RIO and any future subsystems which want to use dmaengine.
> > Thoughts...?
> >
> That doesn't sound much different than passing the data via
> dma_chan.private during prep_slave_sg. Only now we add to
> the number of arguments.
One dma_chan may be used by multiple drivers requesting data transfer.
In this case we will need a lock to keep dma_chan and its private
coupled together. 
If we consider this coupling as a valid way we may think about
adding a lock member into dma_chan structure. This will make locking
more effective for configurations with multiple DMA channels. 

> And then either this argument would be RapidIO specific (unfair
> to other users) or generic. In latter case what would it look like ?
It should not be RapidIO specific. Just transfer specific context that
will be interpreted by participants. Given that we have a channel
filtering mechanism there is a little chance of wrongful usage of
that parameter.

Alex.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers
  2011-10-17 17:01             ` Vinod Koul
@ 2011-10-17 19:39               ` Bounine, Alexandre
  0 siblings, 0 replies; 21+ messages in thread
From: Bounine, Alexandre @ 2011-10-17 19:39 UTC (permalink / raw)
  To: Vinod Koul, Jassi Brar
  Cc: Dan, akpm, linux-kernel, linuxppc-dev, Kumar Gala, Matt Porter,
	Li Yang

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="UTF-8", Size: 2895 bytes --]

On Mon, Oct 17, 2011 at 1:01 PM, Vinod Koul <vinod.koul@intel.com> wrote:
> 
> On Mon, 2011-10-17 at 21:22 +0530, Jassi Brar wrote:
> > On 15 October 2011 23:05, Vinod Koul <vinod.koul@intel.com> wrote:
> >
> > > Another alternate approach could be to add one more argument to
> > > prep_slave_sg API which allows us to pass additional runtime
> specific
> > > parameters. This can be NULL and unused for existing drivers and
> used in
> > > RIO and any future subsystems which want to use dmaengine.
> > > Thoughts...?
> > >
> > That doesn't sound much different than passing the data via
> > dma_chan.private during prep_slave_sg. Only now we add to
> > the number of arguments.
> Yes agreed, but we already decided and marked it as depreciated.
> 
> > And then either this argument would be RapidIO specific (unfair
> > to other users) or generic. In latter case what would it look like ?
> My initial thoughts were to add an argument which only the specific
> dmac
> knows howto decode and is filled by its client. As i said for existing
> users and people who don't require dynamic information wouldn't bother.
> The pros
>  - allows us to support RIO kind of subsystems where one needs to pass
> subsystem specific information for programing the dmac
>  - doesn't require us to add subsystem specific stuff in dmaengine,
> today its RIO tomorrow some other folks may want to add. We want to
> maintain dmaengine as a generic framework, while also trying to support
> multiple audiences.
> Cons:
>  - there is no guarantee;  dmac expects foo and clients pass bar
> 
This was my concern that I mentioned in the beginning of this thread:
how to differentiate between different types of slave channels. At that
time we decided that channel filtering routines should be sufficient
to properly identify a suitable channel. And this is true - if RIO filter
detects registered RapidIO capable DMA channel it should be safe to pass
a parameter specific to RIO client implementation.   

If we want to protect pure slave channels from mixing with some specific
implementations we may consider adding new values for dma_transaction_type
as it done for RapidIO (DMA_RAPIDIO, DMA_FOO, DMA_BAR etc.).
This way we may keep single API with extra argument and differentiate
between "flavors" of DMA_SLAVE to make decisions about use of that
extra parameter. E.g. channels registered as DMA_SLAVE will ignore
the new parameter (pure slave mode), DMA_RAPIDIO and DMA_FOO use it
according to the "flavor".  

Yes, I am planning to drop that flag from the current RIO implementation but
It may have a new meaning if prep_slave_sg() gets the extra argument.  

> I am okay if we have alternate way to support this with above goal :)
> 
> --
> ~Vinod

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2011-10-17 19:40 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-30 21:38 [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Alexandre Bounine
2011-09-30 21:38 ` [RFC PATCH 2/2 -mm] RapidIO: TSI721 Add DMA Engine support Alexandre Bounine
2011-09-30 22:15   ` Andrew Morton
2011-10-03 17:53     ` Bounine, Alexandre
2011-10-04 21:43       ` Andrew Morton
2011-10-05  1:38         ` Bounine, Alexandre
2011-10-05  1:57           ` Andrew Morton
2011-10-05  2:57             ` Bounine, Alexandre
2011-10-01 18:06   ` Vinod Koul
2011-10-01 18:01 ` [RFC PATCH 1/2] RapidIO: Add DMA Engine support for RIO data transfers Vinod Koul
2011-10-03 16:52   ` Bounine, Alexandre
2011-10-05 20:38     ` Williams, Dan J
2011-10-07 16:12       ` Bounine, Alexandre
2011-10-07  5:27     ` Vinod Koul
2011-10-07 19:08       ` Bounine, Alexandre
2011-10-15 17:35         ` Vinod Koul
2011-10-17 14:33           ` Bounine, Alexandre
2011-10-17 15:52           ` Jassi Brar
2011-10-17 17:01             ` Vinod Koul
2011-10-17 19:39               ` Bounine, Alexandre
2011-10-17 18:16             ` Bounine, Alexandre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox