LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] powerpc/85xx: workaround for chips with MSI hareware errata to support MSI-X
From: Jia Hongtao @ 2012-07-16  3:35 UTC (permalink / raw)
  To: linuxppc-dev, galak; +Cc: soniccat.liu, b38951

From: Liu Shuo <soniccat.liu@gmail.com>

The MPIC chip with version 2.0 has a MSI errata (errata PIC1 of mpc8544),
It causes that neither MSI nor MSI-X can work fine. There is a workaround
to allow MSI-X to function properly.

Signed-off-by: Liu Shuo <soniccat.liu@gmail.com>
Signed-off-by: Li Yang <leoli@freescale.com>
---
 arch/powerpc/include/asm/mpic.h |    3 ++
 arch/powerpc/sysdev/fsl_msi.c   |   63 +++++++++++++++++++++++++++++++++++++-
 arch/powerpc/sysdev/fsl_msi.h   |    3 ++
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index c9f698a..a9e4f937 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -110,6 +110,9 @@
 #define 	MPIC_VECPRI_SENSE_MASK			0x00400000
 #define MPIC_IRQ_DESTINATION		0x00010
 
+#define 	MPIC_FSL_BRR1				0x00000
+#define 	MPIC_FSL_BRR1_VER			0x0000ffff
+
 #define MPIC_MAX_IRQ_SOURCES	2048
 #define MPIC_MAX_CPUS		32
 #define MPIC_MAX_ISU		32
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 6e097de..f2d340a 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -98,8 +98,23 @@ static int fsl_msi_init_allocator(struct fsl_msi *msi_data)
 
 static int fsl_msi_check_device(struct pci_dev *pdev, int nvec, int type)
 {
+	struct fsl_msi *msi;
+
 	if (type == PCI_CAP_ID_MSIX)
 		pr_debug("fslmsi: MSI-X untested, trying anyway.\n");
+	else if (type == PCI_CAP_ID_MSI) {
+		/*
+		 * MPIC chip with 2.0 version has erratum PIC1. It
+		 * causes that neither MSI nor MSI-X can work fine.
+		 * There is a workaround to allow MSI-X to function
+		 * properly.
+		 */
+		list_for_each_entry(msi, &msi_head, list) {
+			if ((msi->feature & MSI_HW_ERRATA_MASK)
+						== MSI_HW_ERRATA_ENDIAN)
+				return -EINVAL;
+		}
+	}
 
 	return 0;
 }
@@ -142,7 +157,11 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 	msg->address_lo = lower_32_bits(address);
 	msg->address_hi = upper_32_bits(address);
 
-	msg->data = hwirq;
+	/* See the comment in fsl_msi_check_device() */
+	if ((msi_data->feature & MSI_HW_ERRATA_MASK) == MSI_HW_ERRATA_ENDIAN)
+		msg->data = __swab32(hwirq);
+	else
+		msg->data = hwirq;
 
 	pr_debug("%s: allocated srs: %d, ibs: %d\n",
 		__func__, hwirq / IRQS_PER_MSI_REG, hwirq % IRQS_PER_MSI_REG);
@@ -359,13 +378,43 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
 	return 0;
 }
 
+/* MPIC chip with 2.0 version has erratum PIC1 */
+static int mpic_has_errata(struct platform_device *dev)
+{
+	struct device_node *mpic_node;
+
+	mpic_node = of_irq_find_parent(dev->dev.of_node);
+	if (mpic_node) {
+		u32 *reg_base, brr1 = 0;
+		/* Get the PIC reg base */
+		reg_base = of_iomap(mpic_node, 0);
+		of_node_put(mpic_node);
+		if (!reg_base) {
+			dev_err(&dev->dev, "ioremap problem failed.\n");
+			return -EIO;
+		}
+
+		/* Get the mpic chip version from block revision register 1 */
+		brr1 = in_be32(reg_base + MPIC_FSL_BRR1);
+		iounmap(reg_base);
+		if ((brr1 & MPIC_FSL_BRR1_VER) == 0x0200)
+			return 1;
+	} else {
+		dev_err(&dev->dev, "MSI can't find his parent mpic node.\n");
+		of_node_put(mpic_node);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
 	const struct of_device_id *match;
 	struct fsl_msi *msi;
 	struct resource res;
-	int err, i, j, irq_index, count;
+	int err, i, j, irq_index, count, errata;
 	int rc;
 	const u32 *p;
 	struct fsl_msi_feature *features;
@@ -421,6 +470,16 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 
 	msi->feature = features->fsl_pic_ip;
 
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) == FSL_PIC_IP_MPIC) {
+		errata = mpic_has_errata(dev);
+		if (errata > 0) {
+			msi->feature |= MSI_HW_ERRATA_ENDIAN;
+		} else if (errata < 0) {
+			err = errata;
+			goto error_out;
+		}
+	}
+
 	/*
 	 * Remember the phandle, so that we can match with any PCI nodes
 	 * that have an "fsl,msi" property.
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
index 8225f86..354d546 100644
--- a/arch/powerpc/sysdev/fsl_msi.h
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -25,6 +25,9 @@
 #define FSL_PIC_IP_IPIC   0x00000002
 #define FSL_PIC_IP_VMPIC  0x00000003
 
+#define MSI_HW_ERRATA_MASK   0x000000F0
+#define MSI_HW_ERRATA_ENDIAN 0x00000010
+
 struct fsl_msi {
 	struct irq_domain *irqhost;
 
-- 
1.7.5.1

^ permalink raw reply related

* [PATCH v3 0/4] Raid: enable talitos xor offload for improving performance
From: Qiang Liu @ 2012-07-16  4:07 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev; +Cc: qiang.liu, herbert, dan.j.williams

The following 4 patches enabling fsl-dma and talitos offload raid
operations for improving raid performance and balancing CPU load.

Write performance will be improved by 25-30% tested by iozone.
Write performance is improved about 2% after using spin_lock_bh replace
spin_lock_irqsave.
CPU load will be reduced by 8%.

Changes in v3:
	- change release process of fsl-dma descriptor for resolve the
	potential race condition
	- add test result when use spin_lock_bh replace spin_lock_irqsave
	- modify the benchmark results according to the latest patch

Changes in v2:
	- rebase onto cryptodev tree
	- split the patch 3/4 up to 3 independent patches
	- remove the patch 4/4, the fix is not for cryptodev tree


Qiang Liu (4):
      Talitos: Support for async_tx XOR offload
      fsl-dma: remove attribute DMA_INTERRUPT of dmaengine
      fsl-dma: change release process of dma descriptor for supporting async_tx
      fsl-dma: use spin_lock_bh to instead of spin_lock_irqsave

 drivers/crypto/Kconfig   |    9 +
 drivers/crypto/talitos.c |  410 +++++++++++++++++++++++++++++++++++++++++++
 drivers/crypto/talitos.h |   53 ++++++
 drivers/dma/fsldma.c     |  436 +++++++++++++++++++++++++---------------------
 drivers/dma/fsldma.h     |    1 +
 5 files changed, 708 insertions(+), 201 deletions(-)

^ permalink raw reply

* [PATCH v3 2/4] fsl-dma: remove attribute DMA_INTERRUPT of dmaengine
From: Qiang Liu @ 2012-07-16  4:08 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev
  Cc: Vinod Koul, Qiang Liu, herbert, Dan Williams, davem

Delete attribute DMA_INTERRUPT because fsl-dma doesn't support this function,
exception will be thrown if talitos is used to offload xor at the same time.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
Acked-by: Ira W. Snyder <iws@ovro.caltech.edu>
---
 drivers/dma/fsldma.c |   31 -------------------------------
 1 files changed, 0 insertions(+), 31 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 8f84761..4f2f212 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -543,35 +543,6 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 }

 static struct dma_async_tx_descriptor *
-fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
-{
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *new;
-
-	if (!dchan)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-
-	new = fsl_dma_alloc_descriptor(chan);
-	if (!new) {
-		chan_err(chan, "%s\n", msg_ld_oom);
-		return NULL;
-	}
-
-	new->async_tx.cookie = -EBUSY;
-	new->async_tx.flags = flags;
-
-	/* Insert the link descriptor to the LD ring */
-	list_add_tail(&new->node, &new->tx_list);
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	return &new->async_tx;
-}
-
-static struct dma_async_tx_descriptor *
 fsl_dma_prep_memcpy(struct dma_chan *dchan,
 	dma_addr_t dma_dst, dma_addr_t dma_src,
 	size_t len, unsigned long flags)
@@ -1352,12 +1323,10 @@ static int __devinit fsldma_of_probe(struct platform_device *op)
 	fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);

 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
-	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
 	dma_cap_set(DMA_SG, fdev->common.cap_mask);
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
-	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
 	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
 	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
 	fdev->common.device_tx_status = fsl_tx_status;
--
1.7.5.1

^ permalink raw reply related

* [PATCH v3 3/4] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Qiang Liu @ 2012-07-16  4:08 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev
  Cc: Ira W. Snyder, Vinod Koul, Qiang Liu, herbert, Dan Williams,
	davem

Fix the potential risk when enable config NET_DMA and ASYNC_TX.
Async_tx is lack of support in current release process of dma descriptor,
all descriptors will be released whatever is acked or no-acked by async_tx,
so there is a potential race condition when dma engine is uesd by others
clients (e.g. when enable NET_DMA to offload TCP).

In our case, a race condition which is raised when use both of talitos
and dmaengine to offload xor is because napi scheduler will sync all
pending requests in dma channels, it affects the process of raid operations
due to ack_tx is not checked in fsl dma. The no-acked descriptor is freed
which is submitted just now, as a dependent tx, this freed descriptor trigger
BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
---
 drivers/dma/fsldma.c |  378 +++++++++++++++++++++++++++++--------------------
 drivers/dma/fsldma.h |    1 +
 2 files changed, 225 insertions(+), 154 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 4f2f212..4ee1b8f 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -400,6 +400,217 @@ out_splice:
 	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
 }

+/**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = DMA_IN(chan, &chan->regs->mr, 32);
+		mode &= ~FSL_DMA_MR_CS;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+static int
+fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
+
+		if (async_tx_test_ack(&desc->async_tx)) {
+			/* Remove from the list of transactions */
+			list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+			chan_dbg(chan, "LD %p free\n", desc);
+#endif
+			dma_pool_free(chan->desc_pool, desc,
+					desc->async_tx.phys);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * fsldma_run_tx_complete_actions - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ * @cookie: Freescale DMA transaction identifier
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static dma_cookie_t fsldma_run_tx_complete_actions(struct fsl_desc_sw *desc,
+		struct fsldma_chan *chan, dma_cookie_t cookie)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	struct device *dev = chan->common.device->dev;
+	dma_addr_t src = get_desc_src(chan, desc);
+	dma_addr_t dst = get_desc_dst(chan, desc);
+	u32 len = get_desc_cnt(chan, desc);
+
+	BUG_ON(txd->cookie < 0);
+
+	if (txd->cookie > 0) {
+		cookie = txd->cookie;
+
+		/* Run the link descriptor callback function */
+		if (txd->callback) {
+#ifdef FSL_DMA_LD_DEBUG
+			chan_dbg(chan, "LD %p callback\n", desc);
+#endif
+			txd->callback(txd->callback_param);
+		}
+
+		/* Unmap the dst buffer, if requested */
+		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
+			else
+				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
+		}
+
+		/* Unmap the src buffer, if requested */
+		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
+			else
+				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
+		}
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	return cookie;
+}
+
+static int
+fsldma_clean_running_descriptor(struct fsl_desc_sw *desc,
+			struct fsldma_chan *chan)
+{
+	/* Remove from the list of transactions */
+	list_del(&desc->node);
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/* move this slot to the completed */
+		list_add_tail(&desc->node, &chan->ld_completed);
+		return 0;
+	}
+
+	dma_pool_free(chan->desc_pool, desc,
+			desc->async_tx.phys);
+	return 0;
+}
+
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+	dma_cookie_t cookie = 0;
+	dma_addr_t curr_phys = get_cdar(chan);
+	int idle = dma_is_idle(chan);
+	int seen_current = 0;
+
+	fsldma_clean_completed_descriptor(chan);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (desc->async_tx.phys == curr_phys) {
+			seen_current = 1;
+			if (!idle)
+				break;
+		}
+
+		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
+
+		if (fsldma_clean_running_descriptor(desc, chan))
+			break;
+
+	}
+
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
+
+	if (cookie > 0)
+		chan->common.completed_cookie = cookie;
+}
+
 static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
@@ -534,8 +745,10 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)

 	chan_dbg(chan, "free all channel resources\n");
 	spin_lock_irqsave(&chan->desc_lock, flags);
+	fsldma_cleanup_descriptor(chan);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
+	fsldma_free_desc_list(chan, &chan->ld_completed);
 	spin_unlock_irqrestore(&chan->desc_lock, flags);

 	dma_pool_destroy(chan->desc_pool);
@@ -811,124 +1024,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 }

 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
- * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
- *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
- */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
-				      struct fsl_desc_sw *desc)
-{
-	struct dma_async_tx_descriptor *txd = &desc->async_tx;
-	struct device *dev = chan->common.device->dev;
-	dma_addr_t src = get_desc_src(chan, desc);
-	dma_addr_t dst = get_desc_dst(chan, desc);
-	u32 len = get_desc_cnt(chan, desc);
-
-	/* Run the link descriptor callback function */
-	if (txd->callback) {
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p callback\n", desc);
-#endif
-		txd->callback(txd->callback_param);
-	}
-
-	/* Run any dependencies */
-	dma_run_dependencies(txd);
-
-	/* Unmap the dst buffer, if requested */
-	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
-		else
-			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
-	}
-
-	/* Unmap the src buffer, if requested */
-	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
-	}
-
-#ifdef FSL_DMA_LD_DEBUG
-	chan_dbg(chan, "LD %p free\n", desc);
-#endif
-	dma_pool_free(chan->desc_pool, desc, txd->phys);
-}
-
-/**
- * fsl_chan_xfer_ld_queue - transfer any pending transactions
- * @chan : Freescale DMA channel
- *
- * HARDWARE STATE: idle
- * LOCKING: must hold chan->desc_lock
- */
-static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc;
-
-	/*
-	 * If the list of pending descriptors is empty, then we
-	 * don't need to do any work at all
-	 */
-	if (list_empty(&chan->ld_pending)) {
-		chan_dbg(chan, "no pending LDs\n");
-		return;
-	}
-
-	/*
-	 * The DMA controller is not idle, which means that the interrupt
-	 * handler will start any queued transactions when it runs after
-	 * this transaction finishes
-	 */
-	if (!chan->idle) {
-		chan_dbg(chan, "DMA controller still busy\n");
-		return;
-	}
-
-	/*
-	 * If there are some link descriptors which have not been
-	 * transferred, we need to start the controller
-	 */
-
-	/*
-	 * Move all elements from the queue of pending transactions
-	 * onto the list of running transactions
-	 */
-	chan_dbg(chan, "idle, starting controller\n");
-	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
-	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
-
-	/*
-	 * The 85xx DMA controller doesn't clear the channel start bit
-	 * automatically at the end of a transfer. Therefore we must clear
-	 * it in software before starting the transfer.
-	 */
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		u32 mode;
-
-		mode = DMA_IN(chan, &chan->regs->mr, 32);
-		mode &= ~FSL_DMA_MR_CS;
-		DMA_OUT(chan, &chan->regs->mr, mode, 32);
-	}
-
-	/*
-	 * Program the descriptor's address into the DMA controller,
-	 * then start the DMA transaction
-	 */
-	set_cdar(chan, desc->async_tx.phys);
-	get_cdar(chan);
-
-	dma_start(chan);
-	chan->idle = false;
-}
-
-/**
  * fsl_dma_memcpy_issue_pending - Issue the DMA start command
  * @chan : Freescale DMA channel
  */
@@ -954,11 +1049,17 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 	enum dma_status ret;
 	unsigned long flags;

-	spin_lock_irqsave(&chan->desc_lock, flags);
 	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_SUCCESS) {
+		fsldma_clean_completed_descriptor(chan);
+		return ret;
+	}
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	fsldma_cleanup_descriptor(chan);
 	spin_unlock_irqrestore(&chan->desc_lock, flags);

-	return ret;
+	return dma_cookie_status(dchan, cookie, txstate);
 }

 /*----------------------------------------------------------------------------*/
@@ -1035,53 +1136,21 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
-	struct fsl_desc_sw *desc, *_desc;
-	LIST_HEAD(ld_cleanup);
 	unsigned long flags;

 	chan_dbg(chan, "tasklet entry\n");

 	spin_lock_irqsave(&chan->desc_lock, flags);

-	/* update the cookie if we have some descriptors to cleanup */
-	if (!list_empty(&chan->ld_running)) {
-		dma_cookie_t cookie;
-
-		desc = to_fsl_desc(chan->ld_running.prev);
-		cookie = desc->async_tx.cookie;
-		dma_cookie_complete(&desc->async_tx);
-
-		chan_dbg(chan, "completed_cookie=%d\n", cookie);
-	}
-
-	/*
-	 * move the descriptors to a temporary list so we can drop the lock
-	 * during the entire cleanup operation
-	 */
-	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
+	/* Run all cleanup for this descriptor */
+	fsldma_cleanup_descriptor(chan);

 	/* the hardware is now idle and ready for more */
 	chan->idle = true;

-	/*
-	 * Start any pending transactions automatically
-	 *
-	 * In the ideal case, we keep the DMA controller busy while we go
-	 * ahead and free the descriptors below.
-	 */
 	fsl_chan_xfer_ld_queue(chan);
 	spin_unlock_irqrestore(&chan->desc_lock, flags);

-	/* Run the callback for each descriptor, in order */
-	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
-
-		/* Remove from the list of transactions */
-		list_del(&desc->node);
-
-		/* Run all cleanup for this descriptor */
-		fsldma_cleanup_descriptor(chan, desc);
-	}
-
 	chan_dbg(chan, "tasklet exit\n");
 }

@@ -1262,6 +1331,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 	spin_lock_init(&chan->desc_lock);
 	INIT_LIST_HEAD(&chan->ld_pending);
 	INIT_LIST_HEAD(&chan->ld_running);
+	INIT_LIST_HEAD(&chan->ld_completed);
 	chan->idle = true;

 	chan->common.device = &fdev->common;
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index f5c3879..7ede908 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -140,6 +140,7 @@ struct fsldma_chan {
 	spinlock_t desc_lock;		/* Descriptor operation lock */
 	struct list_head ld_pending;	/* Link descriptors queue */
 	struct list_head ld_running;	/* Link descriptors queue */
+	struct list_head ld_completed;	/* Link descriptors queue */
 	struct dma_chan common;		/* DMA common channel */
 	struct dma_pool *desc_pool;	/* Descriptors pool */
 	struct device *dev;		/* Channel device */
--
1.7.5.1

^ permalink raw reply related

* [PATCH v3 4/4] fsl-dma: use spin_lock_bh to instead of spin_lock_irqsave
From: Qiang Liu @ 2012-07-16  4:09 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev
  Cc: Vinod Koul, Qiang Liu, herbert, Dan Williams, davem

Use spin_lock_bh to instead of spin_lock_irqsave for improving performance.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
---
 drivers/dma/fsldma.c |   29 ++++++++++++-----------------
 1 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 4ee1b8f..e975719 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -616,10 +616,9 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
 	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
 	struct fsl_desc_sw *child;
-	unsigned long flags;
 	dma_cookie_t cookie;

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);

 	/*
 	 * assign cookies to all of the software descriptors
@@ -632,7 +631,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	/* put this transaction onto the tail of the pending queue */
 	append_ld_queue(chan, desc);

-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	return cookie;
 }
@@ -741,15 +740,14 @@ static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
 static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	unsigned long flags;

 	chan_dbg(chan, "free all channel resources\n");
-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsldma_cleanup_descriptor(chan);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
 	fsldma_free_desc_list(chan, &chan->ld_completed);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	dma_pool_destroy(chan->desc_pool);
 	chan->desc_pool = NULL;
@@ -968,7 +966,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 {
 	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
-	unsigned long flags;
 	int size;

 	if (!dchan)
@@ -978,7 +975,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,

 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_bh(&chan->desc_lock);

 		/* Halt the DMA engine */
 		dma_halt(chan);
@@ -988,7 +985,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 		fsldma_free_desc_list(chan, &chan->ld_running);
 		chan->idle = true;

-		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_bh(&chan->desc_lock);
 		return 0;

 	case DMA_SLAVE_CONFIG:
@@ -1030,11 +1027,10 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	unsigned long flags;

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsl_chan_xfer_ld_queue(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);
 }

 /**
@@ -1047,7 +1043,6 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	enum dma_status ret;
-	unsigned long flags;

 	ret = dma_cookie_status(dchan, cookie, txstate);
 	if (ret == DMA_SUCCESS) {
@@ -1055,9 +1050,9 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 		return ret;
 	}

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsldma_cleanup_descriptor(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	return dma_cookie_status(dchan, cookie, txstate);
 }
@@ -1140,7 +1135,7 @@ static void dma_do_tasklet(unsigned long data)

 	chan_dbg(chan, "tasklet entry\n");

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);

 	/* Run all cleanup for this descriptor */
 	fsldma_cleanup_descriptor(chan);
@@ -1149,7 +1144,7 @@ static void dma_do_tasklet(unsigned long data)
 	chan->idle = true;

 	fsl_chan_xfer_ld_queue(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	chan_dbg(chan, "tasklet exit\n");
 }
--
1.7.5.1

^ permalink raw reply related

* [PATCH v3 1/4] Talitos: Support for async_tx XOR offload
From: Qiang Liu @ 2012-07-16  4:11 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev
  Cc: Qiang Liu, herbert, dan.j.williams, David S. Miller

Expose Talitos's XOR functionality to be used for RAID parity
calculation via the Async_tx layer.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Dipen Dudhat <Dipen.Dudhat@freescale.com>
Signed-off-by: Maneesh Gupta <Maneesh.Gupta@freescale.com>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
---
 drivers/crypto/Kconfig   |    9 +
 drivers/crypto/talitos.c |  410 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/crypto/talitos.h |   53 ++++++
 3 files changed, 472 insertions(+), 0 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index be6b2ba..f0a7c29 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,6 +222,15 @@ config CRYPTO_DEV_TALITOS
 	  To compile this driver as a module, choose M here: the module
 	  will be called talitos.

+config CRYPTO_DEV_TALITOS_RAIDXOR
+	bool "Talitos RAID5 XOR Calculation Offload"
+	default y
+	select DMA_ENGINE
+	depends on CRYPTO_DEV_TALITOS
+	help
+	  Say 'Y' here to use the Freescale Security Engine (SEC) to
+	  offload RAID XOR parity Calculation
+
 config CRYPTO_DEV_IXP4XX
 	tristate "Driver for IXP4xx crypto hardware acceleration"
 	depends on ARCH_IXP4XX
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index efff788..1e0c9e6 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -619,6 +619,396 @@ static void talitos_unregister_rng(struct device *dev)
 	hwrng_unregister(&priv->rng);
 }

+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error);
+
+static enum dma_status talitos_is_tx_complete(struct dma_chan *chan,
+					      dma_cookie_t cookie,
+					      struct dma_tx_state *state)
+{
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	last_used = chan->cookie;
+	last_complete = xor_chan->completed_cookie;
+
+	if (state->last)
+		state->last = last_complete;
+
+	if (state->used)
+		state->used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
+{
+	struct talitos_xor_desc *desc, *_desc;
+	unsigned long flags;
+	int status;
+	struct talitos_private *priv;
+	int ch;
+
+	priv = dev_get_drvdata(xor_chan->dev);
+	ch = atomic_inc_return(&priv->last_chan) &
+				  (priv->num_channels - 1);
+	spin_lock_irqsave(&xor_chan->desc_lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		status = talitos_submit(xor_chan->dev, ch, &desc->hwdesc,
+					talitos_release_xor, desc);
+		if (status != -EINPROGRESS)
+			break;
+
+		list_del(&desc->node);
+		list_add_tail(&desc->node, &xor_chan->in_progress_q);
+	}
+
+	spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
+}
+
+static void talitos_xor_run_tx_complete_actions(struct talitos_xor_desc *desc,
+		struct talitos_xor_chan *xor_chan)
+{
+	struct device *dev = xor_chan->dev;
+	dma_addr_t dest, addr;
+	unsigned int src_cnt = desc->unmap_src_cnt;
+	unsigned int len = desc->unmap_len;
+	enum dma_ctrl_flags flags = desc->async_tx.flags;
+	struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+	/* unmap dma addresses */
+	dest = desc->hwdesc.ptr[6].ptr;
+	if (likely(!(flags & DMA_COMPL_SKIP_DEST_UNMAP)))
+		dma_unmap_page(dev, dest, len, DMA_BIDIRECTIONAL);
+
+	desc->idx = 6 - src_cnt;
+	while(desc->idx < 6) {
+		addr = desc->hwdesc.ptr[desc->idx++].ptr;
+		if (likely(!(flags & DMA_COMPL_SKIP_SRC_UNMAP)))
+			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(tx);
+}
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error)
+{
+	struct talitos_xor_desc *desc = context;
+	struct talitos_xor_chan *xor_chan;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	if (unlikely(error))
+		dev_err(dev, "xor operation: talitos error %d\n", error);
+
+	xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
+				common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (xor_chan->completed_cookie < desc->async_tx.cookie)
+		xor_chan->completed_cookie = desc->async_tx.cookie;
+
+	callback = desc->async_tx.callback;
+	callback_param = desc->async_tx.callback_param;
+
+	if (callback) {
+		spin_unlock_bh(&xor_chan->desc_lock);
+		callback(callback_param);
+		spin_lock_bh(&xor_chan->desc_lock);
+	}
+
+	talitos_xor_run_tx_complete_actions(desc, xor_chan);
+
+	list_del(&desc->node);
+	list_add_tail(&desc->node, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->pending_q))
+		talitos_process_pending(xor_chan);
+}
+
+/**
+ * talitos_issue_pending - move the descriptors in submit
+ * queue to pending queue and submit them for processing
+ * @chan: DMA channel
+ */
+static void talitos_issue_pending(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_tail_init(&xor_chan->submit_q,
+				 &xor_chan->pending_q);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	talitos_process_pending(xor_chan);
+}
+
+static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct talitos_xor_desc *desc;
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t cookie;
+
+	desc = container_of(tx, struct talitos_xor_desc, async_tx);
+	xor_chan = container_of(tx->chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	cookie = xor_chan->common.cookie + 1;
+	if (cookie < 0)
+		cookie = 1;
+
+	desc->async_tx.cookie = cookie;
+	xor_chan->common.cookie = desc->async_tx.cookie;
+
+	list_splice_tail_init(&desc->tx_list,
+				 &xor_chan->submit_q);
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return cookie;
+}
+
+static struct talitos_xor_desc *talitos_xor_alloc_descriptor(
+				struct talitos_xor_chan *xor_chan, gfp_t flags)
+{
+	struct talitos_xor_desc *desc;
+
+	desc = kmalloc(sizeof(*desc), flags);
+	if (desc) {
+		xor_chan->total_desc++;
+		desc->async_tx.tx_submit = talitos_async_tx_submit;
+	}
+
+	return desc;
+}
+
+static void talitos_free_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc, *_desc;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+
+	/* Some descriptor not freed? */
+	if (unlikely(xor_chan->total_desc))
+		dev_warn(chan->device->dev, "Failed to free xor channel resource\n");
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+}
+
+static int talitos_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	if (!list_empty(&xor_chan->free_desc))
+		return xor_chan->total_desc;
+
+	for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) {
+		desc = talitos_xor_alloc_descriptor(xor_chan,
+				GFP_KERNEL | GFP_DMA);
+		if (!desc) {
+			dev_err(xor_chan->common.device->dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->node, &tmp_list);
+	}
+
+	if (!i)
+		return -ENOMEM;
+
+	/* At least one desc is allocated */
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_init(&tmp_list, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return xor_chan->total_desc;
+}
+
+static struct dma_async_tx_descriptor *talitos_prep_dma_xor(
+			struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+			unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *new;
+	struct talitos_desc *desc;
+	int i, j;
+
+	BUG_ON(len > TALITOS_MAX_DATA_LEN);
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->free_desc)) {
+		new = container_of(xor_chan->free_desc.next,
+				   struct talitos_xor_desc, node);
+		list_del(&new->node);
+	} else {
+		 new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL | GFP_DMA);
+	}
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	if (!new) {
+		dev_err(xor_chan->common.device->dev,
+			"No free memory for XOR DMA descriptor\n");
+		return NULL;
+	}
+	dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
+
+	INIT_LIST_HEAD(&new->node);
+	INIT_LIST_HEAD(&new->tx_list);
+
+	desc = &new->hwdesc;
+	/* Set destination: Last pointer pair */
+	to_talitos_ptr(&desc->ptr[6], dest);
+	desc->ptr[6].len = cpu_to_be16(len);
+	desc->ptr[6].j_extent = 0;
+	new->unmap_src_cnt = src_cnt;
+	new->unmap_len = len;
+
+	/* Set Sources: End loading from second-last pointer pair */
+	for (i = 5, j = 0; j < src_cnt && i >= 0; i--, j++) {
+		to_talitos_ptr(&desc->ptr[i], src[j]);
+		desc->ptr[i].len = cpu_to_be16(len);
+		desc->ptr[i].j_extent = 0;
+	}
+
+	/*
+	 * documentation states first 0 ptr/len combo marks end of sources
+	 * yet device produces scatter boundary error unless all subsequent
+	 * sources are zeroed out
+	 */
+	for (; i >= 0; i--) {
+		to_talitos_ptr(&desc->ptr[i], 0);
+		desc->ptr[i].len = 0;
+		desc->ptr[i].j_extent = 0;
+	}
+
+	desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR |
+		DESC_HDR_TYPE_RAID_XOR;
+
+	new->async_tx.parent = NULL;
+	new->async_tx.next = NULL;
+	new->async_tx.cookie = 0;
+	async_tx_ack(&new->async_tx);
+
+	list_add_tail(&new->node, &new->tx_list);
+
+	new->async_tx.flags = flags;
+	new->async_tx.cookie = -EBUSY;
+
+	return &new->async_tx;
+}
+
+static void talitos_unregister_async_xor(struct device *dev)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct talitos_xor_chan *xor_chan;
+	struct dma_chan *chan, *_chan;
+
+	if (priv->dma_dev_common.chancnt)
+		dma_async_device_unregister(&priv->dma_dev_common);
+
+	list_for_each_entry_safe(chan, _chan, &priv->dma_dev_common.channels,
+				device_node) {
+		xor_chan = container_of(chan, struct talitos_xor_chan,
+					common);
+		list_del(&chan->device_node);
+		priv->dma_dev_common.chancnt--;
+		kfree(xor_chan);
+	}
+}
+
+/**
+ * talitos_register_dma_async - Initialize the Freescale XOR ADMA device
+ * It is registered as a DMA device with the capability to perform
+ * XOR operation with the Async_tx layer.
+ * The various queues and channel resources are also allocated.
+ */
+static int talitos_register_async_tx(struct device *dev, int max_xor_srcs)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct dma_device *dma_dev = &priv->dma_dev_common;
+	struct talitos_xor_chan *xor_chan;
+	int err;
+
+	xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
+	if (!xor_chan) {
+		dev_err(dev, "unable to allocate xor channel\n");
+		return -ENOMEM;
+	}
+
+	dma_dev->dev = dev;
+	dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = talitos_free_chan_resources;
+	dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
+	dma_dev->max_xor = max_xor_srcs;
+	dma_dev->device_tx_status = talitos_is_tx_complete;
+	dma_dev->device_issue_pending = talitos_issue_pending;
+	INIT_LIST_HEAD(&dma_dev->channels);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+	xor_chan->dev = dev;
+	xor_chan->common.device = dma_dev;
+	xor_chan->total_desc = 0;
+	INIT_LIST_HEAD(&xor_chan->submit_q);
+	INIT_LIST_HEAD(&xor_chan->pending_q);
+	INIT_LIST_HEAD(&xor_chan->in_progress_q);
+	INIT_LIST_HEAD(&xor_chan->free_desc);
+	spin_lock_init(&xor_chan->desc_lock);
+
+	list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
+	dma_dev->chancnt++;
+
+	err = dma_async_device_register(dma_dev);
+	if (err) {
+		dev_err(dev, "Unable to register XOR with Async_tx\n");
+		goto err_out;
+	}
+
+	return err;
+
+err_out:
+	talitos_unregister_async_xor(dev);
+	return err;
+}
+#endif
+
 /*
  * crypto alg
  */
@@ -2891,6 +3281,26 @@ static int talitos_probe(struct platform_device *ofdev)
 			dev_info(dev, "hwrng\n");
 	}

+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+	/*
+	 * register with async_tx xor, if capable
+	 * SEC 2.x support up to 3 RAID sources,
+	 * SEC 3.x support up to 6
+	 */
+	if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) {
+		int max_xor_srcs = 3;
+		if (of_device_is_compatible(np, "fsl,sec3.0"))
+			max_xor_srcs = 6;
+		err = talitos_register_async_tx(dev, max_xor_srcs);
+		if (err) {
+			dev_err(dev, "failed to register async_tx xor: %d\n",
+					err);
+			goto err_out;
+		}
+		dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs);
+	}
+#endif
+
 	/* register crypto algorithms the device supports */
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 61a1405..fc9d125 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -30,6 +30,7 @@

 #define TALITOS_TIMEOUT 100000
 #define TALITOS_MAX_DATA_LEN 65535
+#define TALITOS_MAX_DESCRIPTOR_NR 256

 #define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
 #define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
@@ -131,7 +132,57 @@ struct talitos_private {

 	/* hwrng device */
 	struct hwrng rng;
+
+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+	/* XOR Device */
+	struct dma_device dma_dev_common;
+#endif
+};
+
+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+/**
+ * talitos_xor_chan - context management for the async_tx channel
+ * @completed_cookie: the last completed cookie
+ * @desc_lock: lock for tx queue
+ * @total_desc: number of descriptors allocated
+ * @submit_q: queue of submitted descriptors
+ * @pending_q: queue of pending descriptors
+ * @in_progress_q: queue of descriptors in progress
+ * @free_desc: queue of unused descriptors
+ * @dev: talitos device implementing this channel
+ * @common: the corresponding xor channel in async_tx
+ */
+struct talitos_xor_chan {
+	dma_cookie_t completed_cookie;
+	spinlock_t desc_lock;
+	unsigned int total_desc;
+	struct list_head submit_q;
+	struct list_head pending_q;
+	struct list_head in_progress_q;
+	struct list_head free_desc;
+	struct device *dev;
+	struct dma_chan common;
+};
+
+/**
+ * talitos_xor_desc - software xor descriptor
+ * @async_tx: the referring async_tx descriptor
+ * @node:
+ * @hwdesc: h/w descriptor
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction byte count
+ * @idx: index of xor sources
+ */
+struct talitos_xor_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head tx_list;
+	struct list_head node;
+	struct talitos_desc hwdesc;
+	unsigned int unmap_src_cnt;
+	unsigned int unmap_len;
+	unsigned int idx;
 };
+#endif

 extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 			  void (*callback)(struct device *dev,
@@ -284,6 +335,7 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 /* primary execution unit mode (MODE0) and derivatives */
 #define	DESC_HDR_MODE0_ENCRYPT		cpu_to_be32(0x00100000)
 #define	DESC_HDR_MODE0_AESU_CBC		cpu_to_be32(0x00200000)
+#define	DESC_HDR_MODE0_AESU_XOR         cpu_to_be32(0x0c600000)
 #define	DESC_HDR_MODE0_DEU_CBC		cpu_to_be32(0x00400000)
 #define	DESC_HDR_MODE0_DEU_3DES		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_CONT	cpu_to_be32(0x08000000)
@@ -344,6 +396,7 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 #define DESC_HDR_TYPE_IPSEC_ESP			cpu_to_be32(1 << 3)
 #define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU	cpu_to_be32(2 << 3)
 #define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU	cpu_to_be32(4 << 3)
+#define DESC_HDR_TYPE_RAID_XOR                  cpu_to_be32(21 << 3)

 /* link table extent field bits */
 #define DESC_PTR_LNKTBL_JUMP			0x80
--
1.7.5.1

^ permalink raw reply related

* RE: [PATCH V3] powerpc/85xx: Add machine check handler to fix PCIe erratum on mpc85xx
From: David Laight @ 2012-07-16  8:45 UTC (permalink / raw)
  To: Jia Hongtao, linuxppc-dev, galak
In-Reply-To: <1342406750-32694-1-git-send-email-B38951@freescale.com>

> A PCIe erratum of mpc85xx may causes a core hang when a link of PCIe
> goes down. when the link goes down, Non-posted transactions issued
> via the ATMU requiring completion result in an instruction stall.
> At the same time a machine-check exception is generated to the core
> to allow further processing by the handler. We implements the handler
> which skips the instruction caused the stall.

Does skipping the faulting instruction really help?

We use the mpc83xx and have seen PCIe issues that may be
related to a documented errata in the CSB-PEX bridge (which
I can't find my copy of).
Do the 85xx and 83xx have the same PCIe block?
Recovery from it seems almost impossible - my interpretation
was that you have to hard reset the PCIe block and rewrite
the entire configuration.

The problem we see happens during some PEX DMA transfers
(possibly due to issues with the target), the DMA doesn't
complete and any further PIO transfers fault (and panic).
Unfortunately the PEX has no documented status registers,
so it is difficult to determine what is (or rather isn't)
Happening.

	David

^ permalink raw reply

* RE: Standalone SRIO Driver for Linux
From: Bounine, Alexandre @ 2012-07-16 14:15 UTC (permalink / raw)
  To: Saravanan S; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <CAEqOc-QzeNz0z33yegaa-9FVpbcCUaMix4-Gfkn0P8Y3487YsA@mail.gmail.com>

Hi,

With current way of device reporting in RapidIO subsystem you will need
to have at least one SRIO device (switch or EP) attached to your mport
to be able to find a reference to that mport device object. Like in the cod=
e fragment below:

static int rioport_init_module(void)
{
	int ret, i;
	dev_t dev =3D 0;
	struct rio_mport *mport;
	struct rio_net *net;
	struct rio_dev *rdev =3D NULL;
	struct rio_mport *port;

... skip ...

	rdev =3D rio_get_device(RIO_ANY_ID, RIO_ANY_ID, rdev);
	if (NULL =3D=3D rdev)
		return -ENODEV;

	mport =3D rdev->net->hport;
	net =3D rdev->net;
... skip ...
}

I would like to mention that 2.6.34 kernel is quite behind on rapidio suppo=
rt updates.
There are multiple changes/updates added added since 2.6.34.
You may consider backporting these updates to your kernel version
if you are unable to use one of the latest kernel versions.

Alex.

From: Saravanan S [mailto:sarans1987@gmail.com]=20
Sent: Saturday, July 14, 2012 2:25 AM
To: Bounine, Alexandre
Cc: linuxppc-dev@lists.ozlabs.org
Subject: Re: Standalone SRIO Driver for Linux

Hi ,
=A0 =A0 =A0Thanks for the reply . i will try to share some of my code later=
 . Looking forward to ur ideas.=A0

Regards,

S.Saravanan
On Fri, Jul 13, 2012 at 6:18 PM, Bounine, Alexandre <Alexandre.Bounine@idt.=
com> wrote:
This should work. We use similar approach to test our mport HW drivers.
=A0
Alex.
=A0
From: Linuxppc-dev [mailto:linuxppc-dev-bounces+alexandre.bounine=3Didt.com=
@lists.ozlabs.org] On Behalf Of Saravanan S
Sent: Friday, July 13, 2012 2:16 AM
To: linuxppc-dev@lists.ozlabs.org
Subject: Standalone SRIO Driver for Linux
=A0
Hi ,

=A0=A0=A0=A0=A0 Iam currently working on the GE make DSP230 board consistin=
g of Quad PowerPC8640 nodes =A0 interconnected by SRIO with Linux 2.6.34 . =
However the only way to access the SRIO is through rionet facility . Our re=
quirement is to use the SRIO interconnect without the Ethernet overheads. T=
his would definitely enable higher speeds (though I cant find any throughpu=
t figures for SRIO in Linux on the net ??? ).=A0 My query is that whether a=
ny attempt has been made=A0 to develop a=A0 standalone driver and API to ac=
cess the messaging=A0 and doorbell services of SRIO . If no then request yo=
u to please provide inputs on the same. From my study I have the following =
thoughts for the driver :

a) Have a character device interface for user .
b) Basically use the rio support functions provided in rio.c like=A0 rio_ad=
d_inb_buffer ,=A0 rio_add_outb_message to transfer and receive messages and=
 add buffers .
c) Maintain a dedicated ring of buffers in the driver and transfer=A0 data =
to and from the buffer to user space .

Is this the right direction . Would really appreciate any inputs . thanks i=
n advance.

Regards,

S.Saravanan

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [linuxppc-release] [PATCH v3 4/4] fsl-dma: use spin_lock_bh to instead of spin_lock_irqsave
From: Tabi Timur-B04825 @ 2012-07-16 14:25 UTC (permalink / raw)
  To: Liu Qiang-B32616
  Cc: Li Yang-R58472, Vinod Koul, herbert@gondor.hengli.com.au,
	linux-crypto@vger.kernel.org, Dan Williams,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <1342411780-29930-1-git-send-email-qiang.liu@freescale.com>

Qiang Liu wrote:
> Use spin_lock_bh to instead of spin_lock_irqsave for improving performanc=
e.

You forgot to include the evidence that performance has improved, as well=20
as an explanation why it's okay to use spin_lock_bh, and why it's faster.=20
  I told you to respin the patch with that information in the patch=20
description.

--=20
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH] ppc44x/watchdog: Select WATCHDOG_NOWAYOUT option
From: Scott Wood @ 2012-07-16 14:43 UTC (permalink / raw)
  To: lu.jiang; +Cc: linuxppc-dev
In-Reply-To: <50037762.4000800@windriver.com>

On 07/15/2012 09:07 PM, Lu.Jiang wrote:
> =E4=BA=8E 2012=E5=B9=B407=E6=9C=8813=E6=97=A5 19:50, Kumar Gala =E5=86=99=
=E9=81=93:
>> On Jul 12, 2012, at 9:44 PM, Jiang Lu wrote:
>>
>>> On PPC44x core, the WRC(Watchdog-timer Reset Control) field of TCR
>>> of timer can not reset by software after set to a non-zero value.
>>> Which means software can not reset the timeout behaviour of watchdog
>>> timer.
>>>
>>> This patch selects WATCHDOG_NOWAYOUT option for 44x platforms to
>>> indicate the watchdog timer can not be disabled once fired.
>>>
>>> Signed-off-by: Jiang Lu <lu.jiang@windriver.com>
>>> ---
>>> drivers/watchdog/Kconfig |    1 +
>>> 1 files changed, 1 insertions(+), 0 deletions(-)
>> I believe this is not 44x specific, but how Book-E watchdog is
>> architected.
>>
>>> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
>>> index 3709624..41f3dff 100644
>>> --- a/drivers/watchdog/Kconfig
>>> +++ b/drivers/watchdog/Kconfig
>>> @@ -1084,6 +1084,7 @@ config PIKA_WDT
>>> config BOOKE_WDT
>>>     tristate "PowerPC Book-E Watchdog Timer"
>>>     depends on BOOKE || 4xx
>>> +    select WATCHDOG_NOWAYOUT if 44x
>> This should probably be 'select WATCHDOG_NOWAYOUT if BOOKE'
>=20
> On ppc44x's processor, if we disabled 'WATCHDOG_NOWAYOUT ' option. The
> driver's release routine will try to disable the watchdog , by clearing
> the WIE & WTDP field in TCR.
> Since the ppc44x's watch dog can not reset by software, such operation
> only set the timeout value(WDTP) to minimum, and cause the system reboo=
t
> immediately.
>=20
> I checked ppc 476, 405 & 450's manual, these document said the
> WRC(Watchdog-timer Reset Control) field of TCR of timer
> can not reset by software after set to a non-zero value. I think all
> ppc44x core should got same limitation.

This is (supposed to be) true on FSL e500 as well.

> While on FSL's platform, we did not met such issue.

You tested this and were able to clear WRC on an e500-based chip?  Which
one?

-Scott

^ permalink raw reply

* Re: [PATCH 5/7] pci: minimal alignment for bars of P2P bridges
From: Bjorn Helgaas @ 2012-07-16 14:58 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linux-pci, yinghai, linuxram, linuxppc-dev
In-Reply-To: <20120716035044.GC24203@shangw>

On Sun, Jul 15, 2012 at 9:50 PM, Gavin Shan <shangw@linux.vnet.ibm.com> wrote:
> On Fri, Jul 13, 2012 at 02:12:50PM -0600, Bjorn Helgaas wrote:
>>On Fri, Jun 29, 2012 at 02:47:48PM +0800, Gavin Shan wrote:
>>> On some powerpc platforms, device BARs need to be assigned to separate
>>> "segments" of the address space in order for the error isolation and HW
>>> virtualization mechanisms (EEH) to work properly. Those "segments" have
>>> a minimum size that can be fairly large (16M). In order to be able to
>>> use the generic resource assignment code rather than re-inventing our
>>> own, we chose to group devices by bus. That way, a simple change of the
>>> minimum alignment requirements of resources assigned to PCI to PCI (P2P)
>>> bridges is enough to ensure that all BARs for devices below those bridges
>>> will fit into contiguous sets of segments and there will be no overlap.
>
> I send the previous reply in a rush and that missed some
> necessary information. So I resend it with some makeup.
>
>>If I understand correctly, you might have something like this:
>>
>>  PCI host bridge to bus 0000:00
>>  pci_bus 0000:00: root bus resource [mem 0xc0000000-0xcfffffff]
>>  0000:00:01.0: PCI bridge to [bus 10-1f]
>>  0000:00:01.0:   bridge window [mem 0xc1000000-0xc1ffffff]
>>  0000:00:02.0: PCI bridge to [bus 20-2f]
>>  0000:00:02.0:   bridge window [mem 0xc2000000-0xc2ffffff]
>>
>>where everything under bridge 00:01.0 is in one EEH segment, and
>>everything under 00:02.0 is in another.  In this case, each EEH
>>segment is 16MB.
>>
>>I think your proposal is basically that when we add up resources required
>>below the P2P bridges, we round up to the default 1MB (the minimum P2P
>>bridge memory aperture size per spec) *or* to a larger value, e.g., 16MB,
>>if the architecture requires it.
>
> Yes, you're correct.
>
>>That makes sense to me, but I have some implementation questions.
>>
>>Your patches make the required alignment a property of the host bridge.
>>But don't you want to do this rounding up only at certain levels of the
>>hierarchy?  For example, what if you had another P2P bridge:
>>
>>  0000:10:01.0: PCI bridge to [bus 18-1f]
>>
>>I assume the devices on bus 0000:18 would still be in the first EEH
>>segment, and you wouldn't necessarily want to round up the 10:01.0
>>apertures to 16MB.
>>
>>Maybe there should be an interface like this:
>>
>>  resource_size_t __weak pcibios_window_alignment(struct pci_bus *bus,
>>                                                 unsigned long type)
>>  {
>>    if (type & IORESOURCE_MEM)
>>      return 1024*1024;                /* mem windows must be 1MB aligned */
>>    if (bus->self->io_window_1k)
>>      return 1024;
>>    return 4*1024;             /* I/O windows default to 4K alignment */
>>  }
>>
>>that the arch could override?  Then you could return the 16MB alignment
>>for the top-level P2P bridge leading to an EEH segment, and use the
>>default alignment for P2P bridges *inside* the segment.
>>
>
> Yes. I think it's good mechanism to apply the minimal resource alignment
> for P2P bridges. Also, it wouldn't waste more resources if the specific
> PCI bridge (not PCIe bridge) needn't form separate EEH segment. However,
> I have some implementation questions for this.
>
> 1. I just checked out source code from following link, but it seems that
>    pci_dev doesn't have field called "io_window_1k", so I'm not sure I
>    should add that by myself?
>
>         git clone git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git linux-pci-next

No, don't add io_window_1k in your patch.  That was added in a recent
patch and should already be in my "next" branch:

http://git.kernel.org/?p=linux/kernel/git/helgaas/pci.git;a=commitdiff;h=2b28ae1912e5ce5bb0527e352ae6ff04e76183d1

If there are merge conflicts with your patch, I can resolve them.

> 2. With the mechanism (__weak function) you suggested, the alignment of
>    the specific P2P bridge should be figured out by PowerPC platform. That's
>    to say, the PPC platform has to introduce function pcibios_window_alignment()
>    and return the appropriate alignment.

Yes.

> 3. In order to return appropriate alignment from PPC platform, We need
>    to introduce same function for PPC platform. Also, we probably need
>    introduce function "ppc_md.pcibios_window_alignment" so that those
>    specific platforms (e.g. powernv) could override that.

Yes.

^ permalink raw reply

* Re: [PATCH 1/2 v2] PCI: Add PCI_DEV_FLAGS_USE_NON_MSI_INTX_IRQ to enable non MSI/INTx interrupt
From: Scott Wood @ 2012-07-16 15:47 UTC (permalink / raw)
  To: Shengzhou Liu; +Cc: bhelgaas, linux-pci, linuxppc-dev
In-Reply-To: <1342409487-28256-1-git-send-email-Shengzhou.Liu@freescale.com>

On 07/15/2012 10:31 PM, Shengzhou Liu wrote:
> On some platforms, in RC mode, root port has neither MSI/MSI-X nor INTx
> interrupt generated, which are available only in EP mode on those platform.
> In this case, we try to use other interrupt for port service driver to have
> AER, Hot-plug, etc, services to work.
> 
> Signed-off-by: Shengzhou Liu <Shengzhou.Liu@freescale.com>
> ---
> v2: separated platform-specific part to arch/powerpc/sysdev.
> 
>  drivers/pci/pcie/portdrv_core.c |   10 ++++++++--
>  drivers/pci/quirks.c            |    9 +++++++++
>  include/linux/pci.h             |    5 +++++
>  3 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
> index 75915b3..837ad15 100644
> --- a/drivers/pci/pcie/portdrv_core.c
> +++ b/drivers/pci/pcie/portdrv_core.c
> @@ -212,8 +212,14 @@ static int init_service_irqs(struct pci_dev *dev, int *irqs, int mask)
>  	if (!pcie_port_enable_msix(dev, irqs, mask))
>  		return 0;
>  
> -	/* We're not going to use MSI-X, so try MSI and fall back to INTx */
> -	if (!pci_enable_msi(dev) || dev->pin)
> +	/*
> +	 * We're not going to use MSI-X, so try MSI and fall back to INTx.
> +	 * Eventually, if neither MSI/MSI-X nor INTx available, try other
> +	 * interrupt. (On some platforms, root port doesn't support generating
> +	 * MSI/MSI-X/INTx in RC mode)
> +	 */
> +	if (!pci_enable_msi(dev) || dev->pin || ((dev->dev_flags &
> +			PCI_DEV_FLAGS_USE_NON_MSI_INTX_IRQ) && dev->irq))
>  		irq = dev->irq;

I didn't see a response on the question about what would happen if we
did this unconditionally (i.e. just s/dev->pin/dev->irq/ in the original
code).  We should avoid introducing flags like this unless there's a
good reason.

Maybe submit a patch that does it unconditionally, and see if that draws
a complaint.

-Scott

^ permalink raw reply

* Re: Build regressions/improvements in v3.5-rc7
From: Geert Uytterhoeven @ 2012-07-16 18:59 UTC (permalink / raw)
  To: linux-kernel; +Cc: Linuxppc-dev, Chris Zankel, Linux-sh list
In-Reply-To: <1342464946-30236-1-git-send-email-geert@linux-m68k.org>

On Mon, Jul 16, 2012 at 8:55 PM, Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> JFYI, when comparing v3.5-rc7 to v3.5-rc6[3], the summaries are:
>   - build errors: +5/-24

5 regressions:
  + arch/powerpc/sysdev/mpic.c: error: case label does not reduce to
an integer constant:  => 890:9, 898:9, 886:9, 894:9

powerpc-randconfig

  + drivers/scsi/bnx2i/bnx2i_iscsi.c: error: implicit declaration of
function 'pci_iomap' [-Werror=implicit-function-declaration]:  =>
816:3
  + drivers/scsi/bnx2i/bnx2i_iscsi.c: error: implicit declaration of
function 'pci_iounmap' [-Werror=implicit-function-declaration]:  =>
887:3

xtensa-allmodconfig

  + error: idt77252.c: relocation truncated to fit: R_PPC64_REL24
against symbol `_restgpr0_29' defined in .text section in
arch/powerpc/lib/built-in.o:  => (.text+0x1ff83a0)

powerpc-allyesconfig

  + kernel/sys.c: error: 'mmap_min_addr' undeclared (first use in this
function): 1864:34 => 1868:37, 1868:34

sh-randconfig, sh-allyesconfig, sh-allmodconfig

> [1] http://kisskb.ellerman.id.au/kisskb/head/5243/ (all 116 configs)

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH] mlx4_en: map entire pages to increase throughput
From: Thadeu Lima de Souza Cascardo @ 2012-07-16 19:06 UTC (permalink / raw)
  To: Rick Jones
  Cc: netdev@vger.kernel.org, leitao@linux.vnet.ibm.com,
	amirv@mellanox.com, yevgenyp@mellanox.co.il,
	klebers@linux.vnet.ibm.com, anton, brking@linux.vnet.ibm.com,
	ogerlitz@mellanox.com, linuxppc-dev, davem@davemloft.net
In-Reply-To: <50044F1D.6000703@hp.com>

On Mon, Jul 16, 2012 at 10:27:57AM -0700, Rick Jones wrote:
> On 07/16/2012 10:01 AM, Thadeu Lima de Souza Cascardo wrote:
> >In its receive path, mlx4_en driver maps each page chunk that it pushes
> >to the hardware and unmaps it when pushing it up the stack. This limits
> >throughput to about 3Gbps on a Power7 8-core machine.
> 
> That seems rather extraordinarily low - Power7 is supposed to be a
> rather high performance CPU.  The last time I noticed O(3Gbit/s) on
> 10G for bulk transfer was before the advent of LRO/GRO - that was in
> the x86 space though.  Is mapping really that expensive with Power7?
> 

Copying linuxppc-dev and Anton here. But I can tell you that we have
lock contention when doing the mapping on the same adapter (map table
per device). Anton has sent some patches that improves that *a lot*.

However, for 1500 MTU, mlx4_en was doing two unmaps and two maps per
packet. The problem is not the CPU power needed to do the mappings, but
that we find the lock contention and end up with the CPUs more than 30%
of the time spent on spin locking.

> 
> >One solution is to map the entire allocated page at once. However, this
> >requires that we keep track of every page fragment we give to a
> >descriptor. We also need to work with the discipline that all fragments will
> >be released (in the sense that it will not be reused by the driver
> >anymore) in the order they are allocated to the driver.
> >
> >This requires that we don't reuse any fragments, every single one of
> >them must be reallocated. We do that by releasing all the fragments that
> >are processed and only after finished processing the descriptors, we
> >start the refill.
> >
> >We also must somehow guarantee that we either refill all fragments in a
> >descriptor or none at all, without resorting to giving up a page
> >fragment that we would have already given. Otherwise, we would break the
> >discipline of only releasing the fragments in the order they were
> >allocated.
> >
> >This has passed page allocation fault injections (restricted to the
> >driver by using required-start and required-end) and device hotplug
> >while 16 TCP streams were able to deliver more than 9Gbps.
> 
> What is the effect on packet-per-second performance?  (eg aggregate,
> burst-mode netperf TCP_RR with TCP_NODELAY set or perhaps UDP_RR)
> 

I used uperf with TCP_NODELAY and 16 threads sending from another
machine 64000-sized writes for 60 seconds.

I get 5898op/s (3.02Gb/s) without the patch against 18022ops/s
(9.23Gb/s) with the patch.

Best regards.
Cascardo.


> rick jones
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [PATCH] powerpc: set stack limit properly in crit_transfer_to_handler
From: Stuart Yoder @ 2012-07-16 19:06 UTC (permalink / raw)
  To: benh; +Cc: sfr, Stuart Yoder, paulus, linuxppc-dev, agraf

From: Stuart Yoder <stuart.yoder@freescale.com>

without setting the stack limit like this there is the possibility
of stack overflow which corrupts the thread info but
is not detected by stack overflow detection

Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
---
 arch/powerpc/kernel/entry_32.S |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5207d5a..ead5016 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -89,10 +89,14 @@ crit_transfer_to_handler:
 	mfspr	r0,SPRN_SRR1
 	stw	r0,_SRR1(r11)
 
+	/* set the stack limit to the current stack
+	 * and set the limit to protect the thread_info
+	 * struct
+	 */
 	mfspr	r8,SPRN_SPRG_THREAD
 	lwz	r0,KSP_LIMIT(r8)
 	stw	r0,SAVED_KSP_LIMIT(r11)
-	CURRENT_THREAD_INFO(r0, r1)
+	rlwimi	r0,r1,0,0,(31-THREAD_SHIFT)
 	stw	r0,KSP_LIMIT(r8)
 	/* fall through */
 #endif
@@ -109,10 +113,14 @@ crit_transfer_to_handler:
 	mfspr	r0,SPRN_SRR1
 	stw	r0,crit_srr1@l(0)
 
+	/* set the stack limit to the current stack
+	 * and set the limit to protect the thread_info
+	 * struct
+	 */
 	mfspr	r8,SPRN_SPRG_THREAD
 	lwz	r0,KSP_LIMIT(r8)
 	stw	r0,saved_ksp_limit@l(0)
-	CURRENT_THREAD_INFO(r0, r1)
+	rlwimi	r0,r1,0,0,(31-THREAD_SHIFT)
 	stw	r0,KSP_LIMIT(r8)
 	/* fall through */
 #endif
-- 
1.7.3.4

^ permalink raw reply related

* Re: [PATCH] MIPS: fix bug.h MIPS build regression
From: Geert Uytterhoeven @ 2012-07-16 19:27 UTC (permalink / raw)
  To: David Daney
  Cc: Linux MIPS Mailing List, Linux-sh list, linux-kernel,
	Ralf Baechle, Linuxppc-dev, Paul Mundt, Chris Zankel,
	Yoichi Yuasa
In-Reply-To: <4FE4B13E.10709@caviumnetworks.com>

On Fri, Jun 22, 2012 at 7:54 PM, David Daney <ddaney@caviumnetworks.com> wrote:
> On 06/20/2012 09:12 AM, Ralf Baechle wrote:
>>
>> On Wed, Jun 20, 2012 at 03:27:59PM +0900, Yoichi Yuasa wrote:
>>
>>> Commit: 3777808873b0c49c5cf27e44c948dfb02675d578 breaks all MIPS builds.
>>
>>
>> Thanks, fix applied.
>>
>
> Where was it applied?
>
> It doesn't show up in linux-next for 20120622, which is where it is needed.

It's also desperately needed in mainline for 3.5.

Ralf?

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH] mlx4_en: map entire pages to increase throughput
From: Rick Jones @ 2012-07-16 19:42 UTC (permalink / raw)
  To: Thadeu Lima de Souza Cascardo
  Cc: netdev@vger.kernel.org, leitao@linux.vnet.ibm.com,
	amirv@mellanox.com, yevgenyp@mellanox.co.il,
	klebers@linux.vnet.ibm.com, anton@samba.org,
	brking@linux.vnet.ibm.com, ogerlitz@mellanox.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <20120716190611.GA1023@oc1711230544.ibm.com>

On 07/16/2012 12:06 PM, Thadeu Lima de Souza Cascardo wrote:
> On Mon, Jul 16, 2012 at 10:27:57AM -0700, Rick Jones wrote:
>
>> What is the effect on packet-per-second performance?  (eg aggregate,
>> burst-mode netperf TCP_RR with TCP_NODELAY set or perhaps UDP_RR)
>>
> I used uperf with TCP_NODELAY and 16 threads sending from another
> machine 64000-sized writes for 60 seconds.
>
> I get 5898op/s (3.02Gb/s) without the patch against 18022ops/s
> (9.23Gb/s) with the patch.

I was thinking more along the lines of an additional comparison, 
explicitly using netperf TCP_RR or something like it, not just the 
packets per second from a bulk transfer test.

rick

^ permalink raw reply

* Re: [PATCH v3 3/4] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Ira W. Snyder @ 2012-07-16 20:01 UTC (permalink / raw)
  To: Qiang Liu
  Cc: Vinod Koul, herbert, linux-crypto, Dan Williams, linuxppc-dev,
	davem
In-Reply-To: <1342411709-29895-1-git-send-email-qiang.liu@freescale.com>

On Mon, Jul 16, 2012 at 12:08:29PM +0800, Qiang Liu wrote:
> Fix the potential risk when enable config NET_DMA and ASYNC_TX.
> Async_tx is lack of support in current release process of dma descriptor,
> all descriptors will be released whatever is acked or no-acked by async_tx,
> so there is a potential race condition when dma engine is uesd by others
> clients (e.g. when enable NET_DMA to offload TCP).
> 
> In our case, a race condition which is raised when use both of talitos
> and dmaengine to offload xor is because napi scheduler will sync all
> pending requests in dma channels, it affects the process of raid operations
> due to ack_tx is not checked in fsl dma. The no-acked descriptor is freed
> which is submitted just now, as a dependent tx, this freed descriptor trigger
> BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().
> 
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Li Yang <leoli@freescale.com>
> Cc: Ira W. Snyder <iws@ovro.caltech.edu>
> Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> ---
>  drivers/dma/fsldma.c |  378 +++++++++++++++++++++++++++++--------------------
>  drivers/dma/fsldma.h |    1 +
>  2 files changed, 225 insertions(+), 154 deletions(-)
> 
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 4f2f212..4ee1b8f 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -400,6 +400,217 @@ out_splice:
>  	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
>  }
> 
> +/**
> + * fsl_chan_xfer_ld_queue - transfer any pending transactions
> + * @chan : Freescale DMA channel
> + *
> + * HARDWARE STATE: idle
> + * LOCKING: must hold chan->desc_lock
> + */
> +static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
> +{
> +	struct fsl_desc_sw *desc;
> +
> +	/*
> +	 * If the list of pending descriptors is empty, then we
> +	 * don't need to do any work at all
> +	 */
> +	if (list_empty(&chan->ld_pending)) {
> +		chan_dbg(chan, "no pending LDs\n");
> +		return;
> +	}
> +
> +	/*
> +	 * The DMA controller is not idle, which means that the interrupt
> +	 * handler will start any queued transactions when it runs after
> +	 * this transaction finishes
> +	 */
> +	if (!chan->idle) {
> +		chan_dbg(chan, "DMA controller still busy\n");
> +		return;
> +	}
> +
> +	/*
> +	 * If there are some link descriptors which have not been
> +	 * transferred, we need to start the controller
> +	 */
> +
> +	/*
> +	 * Move all elements from the queue of pending transactions
> +	 * onto the list of running transactions
> +	 */
> +	chan_dbg(chan, "idle, starting controller\n");
> +	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
> +	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
> +
> +	/*
> +	 * The 85xx DMA controller doesn't clear the channel start bit
> +	 * automatically at the end of a transfer. Therefore we must clear
> +	 * it in software before starting the transfer.
> +	 */
> +	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
> +		u32 mode;
> +
> +		mode = DMA_IN(chan, &chan->regs->mr, 32);
> +		mode &= ~FSL_DMA_MR_CS;
> +		DMA_OUT(chan, &chan->regs->mr, mode, 32);
> +	}
> +
> +	/*
> +	 * Program the descriptor's address into the DMA controller,
> +	 * then start the DMA transaction
> +	 */
> +	set_cdar(chan, desc->async_tx.phys);
> +	get_cdar(chan);
> +
> +	dma_start(chan);
> +	chan->idle = false;
> +}
> +

Please add a note about the locking requirements here, and for the other
new functions you've added.

You call this function from two places:

1) fsldma_cleanup_descriptor() - called with mod->desc_lock held
2) fsl_tx_status() - WITHOUT mod->desc_lock held

One of them is definitely wrong, and I'd bet that it is #2. You're
modifying ld_completed without a lock.

> +static int
> +fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
> +{
> +	struct fsl_desc_sw *desc, *_desc;
> +
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
> +
> +		if (async_tx_test_ack(&desc->async_tx)) {
> +			/* Remove from the list of transactions */
> +			list_del(&desc->node);
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p free\n", desc);
> +#endif
> +			dma_pool_free(chan->desc_pool, desc,
> +					desc->async_tx.phys);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * fsldma_run_tx_complete_actions - cleanup and free a single link descriptor
> + * @chan: Freescale DMA channel
> + * @desc: descriptor to cleanup and free
> + * @cookie: Freescale DMA transaction identifier
> + *
> + * This function is used on a descriptor which has been executed by the DMA
> + * controller. It will run any callbacks, submit any dependencies, and then
> + * free the descriptor.
> + */
> +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsl_desc_sw *desc,
> +		struct fsldma_chan *chan, dma_cookie_t cookie)
> +{
> +	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> +	struct device *dev = chan->common.device->dev;
> +	dma_addr_t src = get_desc_src(chan, desc);
> +	dma_addr_t dst = get_desc_dst(chan, desc);
> +	u32 len = get_desc_cnt(chan, desc);
> +
> +	BUG_ON(txd->cookie < 0);
> +
> +	if (txd->cookie > 0) {
> +		cookie = txd->cookie;
> +
> +		/* Run the link descriptor callback function */
> +		if (txd->callback) {
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p callback\n", desc);
> +#endif
> +			txd->callback(txd->callback_param);
> +		}
> +
> +		/* Unmap the dst buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> +				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> +			else
> +				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> +		}
> +
> +		/* Unmap the src buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> +				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> +			else
> +				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> +		}
> +	}
> +
> +	/* Run any dependencies */
> +	dma_run_dependencies(txd);
> +
> +	return cookie;
> +}
> +
> +static int
> +fsldma_clean_running_descriptor(struct fsl_desc_sw *desc,
> +			struct fsldma_chan *chan)
> +{
> +	/* Remove from the list of transactions */
> +	list_del(&desc->node);
> +	/* the client is allowed to attach dependent operations
> +	 * until 'ack' is set
> +	 */

This comment is does not follow the coding style. It should be:

/*
 * the client is allowed to attech dependent operations
 * until 'ack' is set
 */

> +	if (!async_tx_test_ack(&desc->async_tx)) {
> +		/* move this slot to the completed */

Perhaps a better comment would be:

Move this descriptor to the list of descriptors which is complete, but
still awaiting the 'ack' bit to be set.

> +		list_add_tail(&desc->node, &chan->ld_completed);
> +		return 0;
> +	}
> +
> +	dma_pool_free(chan->desc_pool, desc,
> +			desc->async_tx.phys);

This should all be on one line. It is less than 80 characters wide.

> +	return 0;
> +}
> +

Locking notes please.

> +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
> +{
> +	struct fsl_desc_sw *desc, *_desc;
> +	dma_cookie_t cookie = 0;
> +	dma_addr_t curr_phys = get_cdar(chan);
> +	int idle = dma_is_idle(chan);
> +	int seen_current = 0;
> +
> +	fsldma_clean_completed_descriptor(chan);
> +
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> +		/* do not advance past the current descriptor loaded into the
> +		 * hardware channel, subsequent descriptors are either in
> +		 * process or have not been submitted
> +		 */

Coding style.

> +		if (seen_current)
> +			break;
> +
> +		/* stop the search if we reach the current descriptor and the
> +		 * channel is busy
> +		 */

Coding style.

> +		if (desc->async_tx.phys == curr_phys) {
> +			seen_current = 1;
> +			if (!idle)
> +				break;
> +		}
> +

This trick where you try to look at the hardware state to determine how
much to clean up has been a source of headaches in the past versions of
this driver.

It is much easier to reason about the state of the hardware and avoid
race conditions if you complete entire blocks of descriptors after the
hardware interrupts you to tell you it is finished.

This is the philosophy employed by the driver before your modifications:
ld_pending: a block of descriptors which is ready to run as soon as the
hardware becomes idle.
ld_running: a block of descriptors which is being executed by the
hardware.

> +		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
> +
> +		if (fsldma_clean_running_descriptor(desc, chan))
> +			break;
> +
> +	}
> +
> +	/*
> +	 * Start any pending transactions automatically
> +	 *
> +	 * In the ideal case, we keep the DMA controller busy while we go
> +	 * ahead and free the descriptors below.
> +	 */
> +	fsl_chan_xfer_ld_queue(chan);
> +
> +	if (cookie > 0)
> +		chan->common.completed_cookie = cookie;
> +}
> +
>  static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
>  {
>  	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
> @@ -534,8 +745,10 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
> 
>  	chan_dbg(chan, "free all channel resources\n");
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptor(chan);
>  	fsldma_free_desc_list(chan, &chan->ld_pending);
>  	fsldma_free_desc_list(chan, &chan->ld_running);
> +	fsldma_free_desc_list(chan, &chan->ld_completed);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
>  	dma_pool_destroy(chan->desc_pool);
> @@ -811,124 +1024,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
>  }
> 
>  /**
> - * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
> - * @chan: Freescale DMA channel
> - * @desc: descriptor to cleanup and free
> - *
> - * This function is used on a descriptor which has been executed by the DMA
> - * controller. It will run any callbacks, submit any dependencies, and then
> - * free the descriptor.
> - */
> -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
> -				      struct fsl_desc_sw *desc)
> -{
> -	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> -	struct device *dev = chan->common.device->dev;
> -	dma_addr_t src = get_desc_src(chan, desc);
> -	dma_addr_t dst = get_desc_dst(chan, desc);
> -	u32 len = get_desc_cnt(chan, desc);
> -
> -	/* Run the link descriptor callback function */
> -	if (txd->callback) {
> -#ifdef FSL_DMA_LD_DEBUG
> -		chan_dbg(chan, "LD %p callback\n", desc);
> -#endif
> -		txd->callback(txd->callback_param);
> -	}
> -
> -	/* Run any dependencies */
> -	dma_run_dependencies(txd);
> -
> -	/* Unmap the dst buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> -			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> -		else
> -			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> -	}
> -
> -	/* Unmap the src buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> -			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> -		else
> -			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> -	}
> -
> -#ifdef FSL_DMA_LD_DEBUG
> -	chan_dbg(chan, "LD %p free\n", desc);
> -#endif
> -	dma_pool_free(chan->desc_pool, desc, txd->phys);
> -}
> -
> -/**
> - * fsl_chan_xfer_ld_queue - transfer any pending transactions
> - * @chan : Freescale DMA channel
> - *
> - * HARDWARE STATE: idle
> - * LOCKING: must hold chan->desc_lock
> - */
> -static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
> -{
> -	struct fsl_desc_sw *desc;
> -
> -	/*
> -	 * If the list of pending descriptors is empty, then we
> -	 * don't need to do any work at all
> -	 */
> -	if (list_empty(&chan->ld_pending)) {
> -		chan_dbg(chan, "no pending LDs\n");
> -		return;
> -	}
> -
> -	/*
> -	 * The DMA controller is not idle, which means that the interrupt
> -	 * handler will start any queued transactions when it runs after
> -	 * this transaction finishes
> -	 */
> -	if (!chan->idle) {
> -		chan_dbg(chan, "DMA controller still busy\n");
> -		return;
> -	}
> -
> -	/*
> -	 * If there are some link descriptors which have not been
> -	 * transferred, we need to start the controller
> -	 */
> -
> -	/*
> -	 * Move all elements from the queue of pending transactions
> -	 * onto the list of running transactions
> -	 */
> -	chan_dbg(chan, "idle, starting controller\n");
> -	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
> -	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
> -
> -	/*
> -	 * The 85xx DMA controller doesn't clear the channel start bit
> -	 * automatically at the end of a transfer. Therefore we must clear
> -	 * it in software before starting the transfer.
> -	 */
> -	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
> -		u32 mode;
> -
> -		mode = DMA_IN(chan, &chan->regs->mr, 32);
> -		mode &= ~FSL_DMA_MR_CS;
> -		DMA_OUT(chan, &chan->regs->mr, mode, 32);
> -	}
> -
> -	/*
> -	 * Program the descriptor's address into the DMA controller,
> -	 * then start the DMA transaction
> -	 */
> -	set_cdar(chan, desc->async_tx.phys);
> -	get_cdar(chan);
> -
> -	dma_start(chan);
> -	chan->idle = false;
> -}
> -
> -/**
>   * fsl_dma_memcpy_issue_pending - Issue the DMA start command
>   * @chan : Freescale DMA channel
>   */
> @@ -954,11 +1049,17 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
>  	enum dma_status ret;
>  	unsigned long flags;
> 
> -	spin_lock_irqsave(&chan->desc_lock, flags);
>  	ret = dma_cookie_status(dchan, cookie, txstate);
> +	if (ret == DMA_SUCCESS) {
> +		fsldma_clean_completed_descriptor(chan);
> +		return ret;
> +	}
> +
> +	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptor(chan);

You've made status from a very quick operation (compare two cookies)
into a potentially long running operation. It may now run callbacks,
unmap pages, etc.

I note that Documentation/crypto/async-tx-api.txt section 3.6
"Constraints" does say that async_*() functions cannot be called from
IRQ context. However, the DMAEngine API itself does not have anything to
say about IRQ context.

I expect fsl_tx_status() to be quick, and not potentially call other
arbitrary code.

Is it possible to leave this function unchanged?

Also note that if you leave this function unchanged, the locking error
pointed out above (fsldma_clean_completed_descriptor() is not called
with the lock held) goes away.

>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
> -	return ret;
> +	return dma_cookie_status(dchan, cookie, txstate);
>  }
> 
>  /*----------------------------------------------------------------------------*/
> @@ -1035,53 +1136,21 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
>  static void dma_do_tasklet(unsigned long data)
>  {
>  	struct fsldma_chan *chan = (struct fsldma_chan *)data;
> -	struct fsl_desc_sw *desc, *_desc;
> -	LIST_HEAD(ld_cleanup);
>  	unsigned long flags;
> 
>  	chan_dbg(chan, "tasklet entry\n");
> 
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> 
> -	/* update the cookie if we have some descriptors to cleanup */
> -	if (!list_empty(&chan->ld_running)) {
> -		dma_cookie_t cookie;
> -
> -		desc = to_fsl_desc(chan->ld_running.prev);
> -		cookie = desc->async_tx.cookie;
> -		dma_cookie_complete(&desc->async_tx);
> -
> -		chan_dbg(chan, "completed_cookie=%d\n", cookie);
> -	}
> -
> -	/*
> -	 * move the descriptors to a temporary list so we can drop the lock
> -	 * during the entire cleanup operation
> -	 */
> -	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
> +	/* Run all cleanup for this descriptor */
> +	fsldma_cleanup_descriptor(chan);
> 
>  	/* the hardware is now idle and ready for more */
>  	chan->idle = true;
> 
> -	/*
> -	 * Start any pending transactions automatically
> -	 *
> -	 * In the ideal case, we keep the DMA controller busy while we go
> -	 * ahead and free the descriptors below.
> -	 */
>  	fsl_chan_xfer_ld_queue(chan);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
> -	/* Run the callback for each descriptor, in order */
> -	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
> -
> -		/* Remove from the list of transactions */
> -		list_del(&desc->node);
> -
> -		/* Run all cleanup for this descriptor */
> -		fsldma_cleanup_descriptor(chan, desc);
> -	}
> -
>  	chan_dbg(chan, "tasklet exit\n");
>  }
> 
> @@ -1262,6 +1331,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
>  	spin_lock_init(&chan->desc_lock);
>  	INIT_LIST_HEAD(&chan->ld_pending);
>  	INIT_LIST_HEAD(&chan->ld_running);
> +	INIT_LIST_HEAD(&chan->ld_completed);
>  	chan->idle = true;
> 
>  	chan->common.device = &fdev->common;
> diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
> index f5c3879..7ede908 100644
> --- a/drivers/dma/fsldma.h
> +++ b/drivers/dma/fsldma.h
> @@ -140,6 +140,7 @@ struct fsldma_chan {
>  	spinlock_t desc_lock;		/* Descriptor operation lock */
>  	struct list_head ld_pending;	/* Link descriptors queue */
>  	struct list_head ld_running;	/* Link descriptors queue */
> +	struct list_head ld_completed;	/* Link descriptors queue */
>  	struct dma_chan common;		/* DMA common channel */
>  	struct dma_pool *desc_pool;	/* Descriptors pool */
>  	struct device *dev;		/* Channel device */
> --
> 1.7.5.1
> 
> 

^ permalink raw reply

* Re: [PATCH] ppc44x/watchdog: Select WATCHDOG_NOWAYOUT option
From: Tabi Timur-B04825 @ 2012-07-16 20:28 UTC (permalink / raw)
  To: Josh Boyer; +Cc: linuxppc-dev@lists.ozlabs.org, Jiang Lu
In-Reply-To: <CA+5PVA6RG1m8c2OUL2g0gBK7dRCrmeu47rZfoqAjxODm1zcNRg@mail.gmail.com>

On Fri, Jul 13, 2012 at 7:25 AM, Josh Boyer <jwboyer@gmail.com> wrote:

> Right now, if the option is not set we call booke_wdt_disable which
> indeed does not actually _disable_ the WDT, but it does set the timer
> period to the maxium value.  We could go one step further and implement
> a simple timer that pops and calls booke_wdt_ping if WATCHDOG_NOWAYOUT
> is not set, then rearms itself.  That would leave the user with the
> ability to perform recovery of the userspace process that exited or
> died and was responsible for pinging the watchdog.

I think the maximum value is still decades or centuries, so there's no
real reason to complicate the code.

The NOWAYOUT feature is working as designed, so I don't understand the
need for this patch.

--=20
Timur Tabi
Linux kernel developer at Freescale=

^ permalink raw reply

* Re: [PATCH] ppc44x/watchdog: Select WATCHDOG_NOWAYOUT option
From: Tabi Timur-B04825 @ 2012-07-16 20:30 UTC (permalink / raw)
  To: lu.jiang@windriver.com; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <50037762.4000800@windriver.com>

On Sun, Jul 15, 2012 at 9:07 PM, Lu.Jiang <lu.jiang@windriver.com> wrote:
>
> Since the ppc44x's watch dog can not reset by software, such operation on=
ly
> set the timeout value(WDTP) to minimum, and cause the system reboot
> immediately.

It's supposed to set it to the maximum.  That's what WDTP_MASK is
supposed to do.

--=20
Timur Tabi
Linux kernel developer at Freescale=

^ permalink raw reply

* Re: [PATCH] mlx4_en: map entire pages to increase throughput
From: Or Gerlitz @ 2012-07-16 20:36 UTC (permalink / raw)
  To: Rick Jones
  Cc: netdev@vger.kernel.org, leitao@linux.vnet.ibm.com,
	amirv@mellanox.com, yevgenyp@mellanox.co.il,
	klebers@linux.vnet.ibm.com, Thadeu Lima de Souza Cascardo,
	brking@linux.vnet.ibm.com, ogerlitz@mellanox.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net,
	anton@samba.org
In-Reply-To: <50046EB1.5040909@hp.com>

[-- Attachment #1: Type: text/plain, Size: 312 bytes --]

On Mon, Jul 16, 2012 at 10:42 PM, Rick Jones <rick.jones2@hp.com> wrote:

> I was thinking more along the lines of an additional comparison,
> explicitly using netperf TCP_RR or something like it, not just the packets
> per second from a bulk transfer test.
>

TCP_STREAM would be good to know here as well

Or.

[-- Attachment #2: Type: text/html, Size: 636 bytes --]

^ permalink raw reply

* Re: [PATCH] powerpc: set stack limit properly in crit_transfer_to_handler
From: Kumar Gala @ 2012-07-16 20:36 UTC (permalink / raw)
  To: Stuart Yoder; +Cc: agraf, paulus, linuxppc-dev, sfr
In-Reply-To: <1342465608-20596-1-git-send-email-stuart.yoder@freescale.com>


On Jul 16, 2012, at 2:06 PM, Stuart Yoder wrote:

> From: Stuart Yoder <stuart.yoder@freescale.com>
> 
> without setting the stack limit like this there is the possibility
> of stack overflow which corrupts the thread info but
> is not detected by stack overflow detection
> 
> Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
> ---
> arch/powerpc/kernel/entry_32.S |   12 ++++++++++--
> 1 files changed, 10 insertions(+), 2 deletions(-)

Do we need this for debug & mcheck ?

- k

^ permalink raw reply

* Re: [PATCH] mlx4_en: map entire pages to increase throughput
From: Or Gerlitz @ 2012-07-16 20:43 UTC (permalink / raw)
  To: Rick Jones, Thadeu Lima de Souza Cascardo
  Cc: netdev@vger.kernel.org, leitao@linux.vnet.ibm.com,
	amirv@mellanox.com, yevgenyp@mellanox.co.il,
	klebers@linux.vnet.ibm.com, anton@samba.org,
	brking@linux.vnet.ibm.com, ogerlitz@mellanox.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <50046EB1.5040909@hp.com>

On Mon, Jul 16, 2012 at 10:42 PM, Rick Jones <rick.jones2@hp.com> wrote:

> I was thinking more along the lines of an additional comparison,
> explicitly using netperf TCP_RR or something like it, not just the packets
> per second from a bulk transfer test.


TCP_STREAM from this setup before the patch would be good to know as well

^ permalink raw reply

* Re: [PATCH] mlx4_en: map entire pages to increase throughput
From: Thadeu Lima de Souza Cascardo @ 2012-07-16 20:47 UTC (permalink / raw)
  To: Rick Jones
  Cc: netdev@vger.kernel.org, leitao@linux.vnet.ibm.com,
	amirv@mellanox.com, yevgenyp@mellanox.co.il,
	klebers@linux.vnet.ibm.com, anton@samba.org,
	brking@linux.vnet.ibm.com, ogerlitz@mellanox.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <50046EB1.5040909@hp.com>

On Mon, Jul 16, 2012 at 12:42:41PM -0700, Rick Jones wrote:
> On 07/16/2012 12:06 PM, Thadeu Lima de Souza Cascardo wrote:
> >On Mon, Jul 16, 2012 at 10:27:57AM -0700, Rick Jones wrote:
> >
> >>What is the effect on packet-per-second performance?  (eg aggregate,
> >>burst-mode netperf TCP_RR with TCP_NODELAY set or perhaps UDP_RR)
> >>
> >I used uperf with TCP_NODELAY and 16 threads sending from another
> >machine 64000-sized writes for 60 seconds.
> >
> >I get 5898op/s (3.02Gb/s) without the patch against 18022ops/s
> >(9.23Gb/s) with the patch.
> 
> I was thinking more along the lines of an additional comparison,
> explicitly using netperf TCP_RR or something like it, not just the
> packets per second from a bulk transfer test.
> 
> rick
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

I used a uperf profile that is similar to TCP_RR. It writes, then reads
some bytes. I kept the TCP_NODELAY flag.

Without the patch, I saw the following:

packet size	ops/s		Gb/s
1		337024		0.0027
90		276620		0.199
900		190455		1.37
4000		68863		2.20
9000		45638		3.29
60000		9409		4.52

With the patch:

packet size	ops/s		Gb/s
1		451738		0.0036
90		345682		0.248
900		272258		1.96
4000		127055		4.07
9000		106614		7.68
60000		30671		14.72

^ permalink raw reply

* Re: [PATCH] mlx4_en: map entire pages to increase throughput
From: Thadeu Lima de Souza Cascardo @ 2012-07-16 20:57 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: netdev@vger.kernel.org, leitao@linux.vnet.ibm.com, Rick Jones,
	amirv@mellanox.com, yevgenyp@mellanox.co.il,
	klebers@linux.vnet.ibm.com, anton@samba.org,
	brking@linux.vnet.ibm.com, ogerlitz@mellanox.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <CAJZOPZL3F+xdHSFfhg7v9A6DDjT6CPK=kgwyzcE6c0pGYFyupg@mail.gmail.com>

On Mon, Jul 16, 2012 at 11:43:33PM +0300, Or Gerlitz wrote:
> On Mon, Jul 16, 2012 at 10:42 PM, Rick Jones <rick.jones2@hp.com> wrote:
> 
> > I was thinking more along the lines of an additional comparison,
> > explicitly using netperf TCP_RR or something like it, not just the packets
> > per second from a bulk transfer test.
> 
> 
> TCP_STREAM from this setup before the patch would be good to know as well
> 

Hi, Or.

Does the stream test that I did with uperf using messages of 64000 bytes
fit?

TCP_NODELAY does not make a difference in this case. I get something
around 3Gbps before the patch and something around 9Gbps after the
patch.

Before the patch:

# ./uperf-1.0.3-beta/src/uperf -m tcp.xml
Starting 16 threads running profile:tcp_stream ...   0.00 seconds
Txn1          0 /1.00(s) =            0          16op/s
Txn2    20.81GB /59.26(s) =     3.02Gb/s        5914op/s
Txn3          0 /0.00(s) =            0      128295op/s
-------------------------------------------------------------------------------------------------------------------------------
Total   20.81GB /61.37(s) =     2.91Gb/s        5712op/s

Netstat statistics for this run
-------------------------------------------------------------------------------------------------------------------------------
Nic       opkts/s     ipkts/s     obits/s     ibits/s
eth6       252459       31694   3.06Gb/s  16.74Mb/s
eth0            2          18   3.87Kb/s  14.28Kb/s
-------------------------------------------------------------------------------------------------------------------------------

Run Statistics
Hostname           Time        Data   Throughput   Operations
Errors
-------------------------------------------------------------------------------------------------------------------------------
10.0.0.2         61.47s     20.81GB     2.91Gb/s       350528
0.00
master           61.37s     20.81GB     2.91Gb/s       350528
0.00
-------------------------------------------------------------------------------------------------------------------------------
Difference(%)     -0.16%      0.00%        0.16%        0.00%
0.00%


After the patch:

# ./uperf-1.0.3-beta/src/uperf -m tcp.xml
Starting 16 threads running profile:tcp_stream ...   0.00 seconds
Txn1          0 /1.00(s) =            0          16op/s
Txn2    64.50GB /60.27(s) =     9.19Gb/s       17975op/s
Txn3          0 /0.00(s) =            0
-------------------------------------------------------------------------------------------------------------------------------
Total   64.50GB /62.27(s) =     8.90Gb/s       17397op/s

Netstat statistics for this run
-------------------------------------------------------------------------------------------------------------------------------
Nic       opkts/s     ipkts/s     obits/s     ibits/s
eth6       769428       96018   9.31Gb/s  50.72Mb/s
eth0            1          15   2.48Kb/s  13.59Kb/s
-------------------------------------------------------------------------------------------------------------------------------

Run Statistics
Hostname           Time        Data   Throughput   Operations
Errors
-------------------------------------------------------------------------------------------------------------------------------
10.0.0.2         62.27s     64.36GB     8.88Gb/s      1081096
0.00
master           62.27s     64.50GB     8.90Gb/s      1083325
0.00
-------------------------------------------------------------------------------------------------------------------------------
Difference(%)     -0.00%      0.21%        0.21%        0.21%
0.00%


Profile tcp.xml:

<?xml version="1.0"?>
<profile name="TCP_STREAM">
  <group nthreads="16">
        <transaction iterations="1">
            <flowop type="connect" options="remotehost=10.0.0.2 protocol=tcp tcp_nodelay"/>
        </transaction>
        <transaction duration="60">
            <flowop type="write" options="count=160 size=64000"/>
        </transaction>
        <transaction iterations="1">
            <flowop type="disconnect" />
        </transaction>
  </group>
</profile>

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox