* [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled @ 2009-10-15 6:41 Vishnu Suresh 2009-10-15 6:41 ` [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload Vishnu Suresh 2009-10-16 1:25 ` [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Dan Williams 0 siblings, 2 replies; 7+ messages in thread From: Vishnu Suresh @ 2009-10-15 6:41 UTC (permalink / raw) To: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert, dan.j.williams Cc: Vishnu Suresh This patch disables the use of DMA_INTERRUPT capability with Async_tx The fsldma produces a null transfer with DMA_INTERRUPT capability when used with Async_tx. When RAID devices queue a transaction via Async_tx, this results in a hang. Signed-off-by: Vishnu Suresh <Vishnu@freescale.com> --- drivers/dma/fsldma.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 296f9e7..66d9b39 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, - fdev->reg.start + 1); dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); +#ifndef CONFIG_ASYNC_CORE + /* + * The DMA_INTERRUPT async_tx is a NULL transfer, which will + * triger a PE interrupt. + */ dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); +#endif dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; -- 1.6.4.2 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload 2009-10-15 6:41 [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Vishnu Suresh @ 2009-10-15 6:41 ` Vishnu Suresh 2009-10-29 22:46 ` Dan Williams 2009-10-16 1:25 ` [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Dan Williams 1 sibling, 1 reply; 7+ messages in thread From: Vishnu Suresh @ 2009-10-15 6:41 UTC (permalink / raw) To: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert, dan.j.williams Cc: Dipen Dudhat, Maneesh Gupta, Vishnu Suresh Expose Talitos's XOR functionality to be used for RAID Parity calculation via the Async_tx layer. Thanks to Surender Kumar and Lee Nipper for their help in realising this driver Signed-off-by: Kim Phillips <kim.phillips@freescale.com> Signed-off-by: Dipen Dudhat <Dipen.Dudhat@freescale.com> Signed-off-by: Maneesh Gupta <Maneesh.Gupta@freescale.com> Signed-off-by: Vishnu Suresh <Vishnu@freescale.com> --- drivers/crypto/Kconfig | 2 + drivers/crypto/talitos.c | 423 +++++++++++++++++++++++++++++++++++++++++++++- drivers/crypto/talitos.h | 2 + 3 files changed, 426 insertions(+), 1 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index b08403d..343e578 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS select CRYPTO_ALGAPI select CRYPTO_AUTHENC select HW_RANDOM + select DMA_ENGINE + select ASYNC_XOR depends on FSL_SOC help Say 'Y' here to use the Freescale Security Engine (SEC) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c47ffe8..84819d4 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1,7 +1,7 @@ /* * talitos - Freescale Integrated Security Engine (SEC) device driver * - * Copyright (c) 2008 Freescale Semiconductor, Inc. + * Copyright (c) 2008-2009 Freescale Semiconductor, Inc. * * Scatterlist Crypto API glue code copied from files with the following: * Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au> @@ -37,6 +37,8 @@ #include <linux/io.h> #include <linux/spinlock.h> #include <linux/rtnetlink.h> +#include <linux/dmaengine.h> +#include <linux/raid/xor.h> #include <crypto/algapi.h> #include <crypto/aes.h> @@ -140,6 +142,9 @@ struct talitos_private { /* hwrng device */ struct hwrng rng; + + /* XOR Device */ + struct dma_device dma_dev_common; }; /* .features flag */ @@ -685,6 +690,401 @@ static void talitos_unregister_rng(struct device *dev) } /* + * async_tx interface for XOR-capable SECs + * + * Dipen Dudhat <Dipen.Dudhat@freescale.com> + * Maneesh Gupta <Maneesh.Gupta@freescale.com> + * Vishnu Suresh <Vishnu@freescale.com> + */ + +/** + * talitos_xor_chan - context management for the async_tx channel + * @completed_cookie: the last completed cookie + * @desc_lock: lock for tx queue + * @total_desc: number of descriptors allocated + * @submit_q: queue of submitted descriptors + * @pending_q: queue of pending descriptors + * @in_progress_q: queue of descriptors in progress + * @free_desc: queue of unused descriptors + * @dev: talitos device implementing this channel + * @common: the corresponding xor channel in async_tx + */ +struct talitos_xor_chan { + dma_cookie_t completed_cookie; + spinlock_t desc_lock; + unsigned int total_desc; + struct list_head submit_q; + struct list_head pending_q; + struct list_head in_progress_q; + struct list_head free_desc; + struct device *dev; + struct dma_chan common; +}; + +/** + * talitos_xor_desc - software xor descriptor + * @async_tx: the referring async_tx descriptor + * @node: + * @hwdesc: h/w descriptor + */ +struct talitos_xor_desc { + struct dma_async_tx_descriptor async_tx; + struct list_head tx_list; + struct list_head node; + struct talitos_desc hwdesc; +}; + +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc, + void *context, int error); + +static enum dma_status talitos_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct talitos_xor_chan *xor_chan; + dma_cookie_t last_used; + dma_cookie_t last_complete; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + last_used = chan->cookie; + last_complete = xor_chan->completed_cookie; + + if (done) + *done = last_complete; + + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void talitos_process_pending(struct talitos_xor_chan *xor_chan) +{ + struct talitos_xor_desc *desc, *_desc; + unsigned long flags; + int status; + + spin_lock_irqsave(&xor_chan->desc_lock, flags); + + list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) { + status = talitos_submit(xor_chan->dev, &desc->hwdesc, + talitos_release_xor, desc); + if (status != -EINPROGRESS) + break; + + list_del(&desc->node); + list_add_tail(&desc->node, &xor_chan->in_progress_q); + } + + spin_unlock_irqrestore(&xor_chan->desc_lock, flags); +} + +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc, + void *context, int error) +{ + struct talitos_xor_desc *desc = context; + struct talitos_xor_chan *xor_chan; + dma_async_tx_callback callback; + void *callback_param; + + if (unlikely(error)) { + dev_err(dev, "xor operation: talitos error %d\n", error); + BUG(); + } + + xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan, + common); + spin_lock_bh(&xor_chan->desc_lock); + if (xor_chan->completed_cookie < desc->async_tx.cookie) + xor_chan->completed_cookie = desc->async_tx.cookie; + + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + + if (callback) { + spin_unlock_bh(&xor_chan->desc_lock); + callback(callback_param); + spin_lock_bh(&xor_chan->desc_lock); + } + + list_del(&desc->node); + list_add_tail(&desc->node, &xor_chan->free_desc); + spin_unlock_bh(&xor_chan->desc_lock); + if (!list_empty(&xor_chan->pending_q)) + talitos_process_pending(xor_chan); +} + +/** + * talitos_issue_pending - move the descriptors in submit + * queue to pending queue and submit them for processing + * @chan: DMA channel + */ +static void talitos_issue_pending(struct dma_chan *chan) +{ + struct talitos_xor_chan *xor_chan; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + spin_lock_bh(&xor_chan->desc_lock); + list_splice_tail_init(&xor_chan->submit_q, + &xor_chan->pending_q); + spin_unlock_bh(&xor_chan->desc_lock); + talitos_process_pending(xor_chan); +} + +static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct talitos_xor_desc *desc; + struct talitos_xor_chan *xor_chan; + dma_cookie_t cookie; + + desc = container_of(tx, struct talitos_xor_desc, async_tx); + xor_chan = container_of(tx->chan, struct talitos_xor_chan, common); + + spin_lock_bh(&xor_chan->desc_lock); + + cookie = xor_chan->common.cookie + 1; + if (cookie < 0) + cookie = 1; + + desc->async_tx.cookie = cookie; + xor_chan->common.cookie = desc->async_tx.cookie; + + list_splice_tail_init(&desc->tx_list, + &xor_chan->submit_q); + + spin_unlock_bh(&xor_chan->desc_lock); + + return cookie; +} + +static struct talitos_xor_desc *talitos_xor_alloc_descriptor( + struct talitos_xor_chan *xor_chan, gfp_t flags) +{ + struct talitos_xor_desc *desc; + + desc = kmalloc(sizeof(*desc), flags); + if (desc) { + xor_chan->total_desc++; + desc->async_tx.tx_submit = talitos_async_tx_submit; + } + + return desc; +} + +static void talitos_free_chan_resources(struct dma_chan *chan) +{ + struct talitos_xor_chan *xor_chan; + struct talitos_xor_desc *desc, *_desc; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + spin_lock_bh(&xor_chan->desc_lock); + + list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + BUG_ON(unlikely(xor_chan->total_desc)); /* Some descriptor not freed? */ + + spin_unlock_bh(&xor_chan->desc_lock); +} + +static int talitos_alloc_chan_resources(struct dma_chan *chan) +{ + struct talitos_xor_chan *xor_chan; + struct talitos_xor_desc *desc; + LIST_HEAD(tmp_list); + int i; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + if (!list_empty(&xor_chan->free_desc)) + return xor_chan->total_desc; + + /* 256 initial descriptors */ + for (i = 0; i < 256; i++) { + desc = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL); + if (!desc) { + dev_err(xor_chan->common.device->dev, + "Only %d initial descriptors\n", i); + break; + } + list_add_tail(&desc->node, &tmp_list); + } + + if (!i) + return -ENOMEM; + + /* At least one desc is allocated */ + spin_lock_bh(&xor_chan->desc_lock); + list_splice_init(&tmp_list, &xor_chan->free_desc); + spin_unlock_bh(&xor_chan->desc_lock); + + return xor_chan->total_desc; +} + +static struct dma_async_tx_descriptor * talitos_prep_dma_xor( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct talitos_xor_chan *xor_chan; + struct talitos_xor_desc *new; + struct talitos_desc *desc; + int i, j; + + BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN)); + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + spin_lock_bh(&xor_chan->desc_lock); + if (!list_empty(&xor_chan->free_desc)) { + new = container_of(xor_chan->free_desc.next, + struct talitos_xor_desc, node); + list_del(&new->node); + } else { + new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL); + } + spin_unlock_bh(&xor_chan->desc_lock); + + if (!new) { + dev_err(xor_chan->common.device->dev, + "No free memory for XOR DMA descriptor\n"); + return NULL; + } + dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common); + + INIT_LIST_HEAD(&new->node); + INIT_LIST_HEAD(&new->tx_list); + + desc = &new->hwdesc; + /* Set destination: Last pointer pair */ + to_talitos_ptr(&desc->ptr[6], dest); + desc->ptr[6].len = cpu_to_be16(len); + desc->ptr[6].j_extent = 0; + + /* Set Sources: End loading from second-last pointer pair */ + for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) { + to_talitos_ptr(&desc->ptr[i], src[j]); + desc->ptr[i].len = cpu_to_be16(len); + desc->ptr[i].j_extent = 0; + } + + /* + * documentation states first 0 ptr/len combo marks end of sources + * yet device produces scatter boundary error unless all subsequent + * sources are zeroed out + */ + for (; i >= 0; i--) { + to_talitos_ptr(&desc->ptr[i], 0); + desc->ptr[i].len = 0; + desc->ptr[i].j_extent = 0; + } + + desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR + | DESC_HDR_TYPE_RAID_XOR; + + new->async_tx.parent = NULL; + new->async_tx.next = NULL; + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + list_add_tail(&new->node, &new->tx_list); + + new->async_tx.flags = flags; + new->async_tx.cookie = -EBUSY; + + return &new->async_tx; +} + +static void talitos_unregister_async_xor(struct device *dev) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + struct talitos_xor_chan *xor_chan; + struct dma_chan *chan; + + if (priv->dma_dev_common.chancnt) + dma_async_device_unregister(&priv->dma_dev_common); + + list_for_each_entry(chan, &priv->dma_dev_common.channels, device_node) { + xor_chan = container_of(chan, struct talitos_xor_chan, common); + list_del(&chan->device_node); + priv->dma_dev_common.chancnt--; + kfree(xor_chan); + } +} + +/** + * talitos_register_dma_async - Initialize the Freescale XOR ADMA device + * It is registered as a DMA device with the capability to perform + * XOR operation with the Async_tx layer. + * The various queues and channel resources are also allocated. + */ +static int talitos_register_async_tx(struct device *dev, int max_xor_srcs) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + struct dma_device *dma_dev = &priv->dma_dev_common; + struct talitos_xor_chan *xor_chan; + int err; + + xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL); + if (!xor_chan) { + dev_err(dev, "unable to allocate xor channel\n"); + return -ENOMEM; + } + + dma_dev->dev = dev; + dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources; + dma_dev->device_free_chan_resources = talitos_free_chan_resources; + dma_dev->device_prep_dma_xor = talitos_prep_dma_xor; + dma_dev->max_xor = max_xor_srcs; + dma_dev->device_is_tx_complete = talitos_is_tx_complete; + dma_dev->device_issue_pending = talitos_issue_pending; + INIT_LIST_HEAD(&dma_dev->channels); + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + + xor_chan->dev = dev; + xor_chan->common.device = dma_dev; + xor_chan->total_desc = 0; + INIT_LIST_HEAD(&xor_chan->submit_q); + INIT_LIST_HEAD(&xor_chan->pending_q); + INIT_LIST_HEAD(&xor_chan->in_progress_q); + INIT_LIST_HEAD(&xor_chan->free_desc); + spin_lock_init(&xor_chan->desc_lock); + + list_add_tail(&xor_chan->common.device_node, &dma_dev->channels); + dma_dev->chancnt++; + + err = dma_async_device_register(dma_dev); + if (err) { + dev_err(dev, "Unable to register XOR with Async_tx\n"); + goto err_out; + } + + return err; + +err_out: + talitos_unregister_async_xor(dev); + return err; +} +/* * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 @@ -1768,6 +2168,8 @@ static int talitos_remove(struct of_device *ofdev) tasklet_kill(&priv->done_task); iounmap(priv->reg); + if (priv->dma_dev_common.chancnt) + talitos_unregister_async_xor(dev); dev_set_drvdata(dev, NULL); @@ -1926,6 +2328,25 @@ static int talitos_probe(struct of_device *ofdev, dev_info(dev, "hwrng\n"); } + /* + * register with async_tx xor, if capable + * SEC 2.x support up to 3 RAID sources, + * SEC 3.x support up to 6 + */ + if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) { + int max_xor_srcs = 3; + if (of_device_is_compatible(np, "fsl,sec3.0")) + max_xor_srcs = 6; + + err = talitos_register_async_tx(dev, max_xor_srcs); + if (err) { + dev_err(dev, "failed to register async_tx xor: %d\n", + err); + goto err_out; + } + dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs); + } + /* register crypto algorithms the device supports */ for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { if (hw_supports(dev, driver_algs[i].desc_hdr_template)) { diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index ff5a145..b6197bc 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -155,6 +155,7 @@ /* primary execution unit mode (MODE0) and derivatives */ #define DESC_HDR_MODE0_ENCRYPT cpu_to_be32(0x00100000) #define DESC_HDR_MODE0_AESU_CBC cpu_to_be32(0x00200000) +#define DESC_HDR_MODE0_AESU_XOR cpu_to_be32(0x0c600000) #define DESC_HDR_MODE0_DEU_CBC cpu_to_be32(0x00400000) #define DESC_HDR_MODE0_DEU_3DES cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_MDEU_INIT cpu_to_be32(0x01000000) @@ -202,6 +203,7 @@ #define DESC_HDR_TYPE_IPSEC_ESP cpu_to_be32(1 << 3) #define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU cpu_to_be32(2 << 3) #define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU cpu_to_be32(4 << 3) +#define DESC_HDR_TYPE_RAID_XOR cpu_to_be32(21 << 3) /* link table extent field bits */ #define DESC_PTR_LNKTBL_JUMP 0x80 -- 1.6.4.2 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload 2009-10-15 6:41 ` [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload Vishnu Suresh @ 2009-10-29 22:46 ` Dan Williams 2009-11-03 0:43 ` hank peng 0 siblings, 1 reply; 7+ messages in thread From: Dan Williams @ 2009-10-29 22:46 UTC (permalink / raw) To: Vishnu Suresh Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto, Dipen Dudhat, Maneesh Gupta On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrot= e: [..] > diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig > index b08403d..343e578 100644 > --- a/drivers/crypto/Kconfig > +++ b/drivers/crypto/Kconfig > @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS > =A0 =A0 =A0 =A0select CRYPTO_ALGAPI > =A0 =A0 =A0 =A0select CRYPTO_AUTHENC > =A0 =A0 =A0 =A0select HW_RANDOM > + =A0 =A0 =A0 select DMA_ENGINE > + =A0 =A0 =A0 select ASYNC_XOR You need only select DMA_ENGINE. ASYNC_XOR is selected by its users. > =A0 =A0 =A0 =A0depends on FSL_SOC > =A0 =A0 =A0 =A0help > =A0 =A0 =A0 =A0 =A0Say 'Y' here to use the Freescale Security Engine (SEC= ) > diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c > index c47ffe8..84819d4 100644 > --- a/drivers/crypto/talitos.c > +++ b/drivers/crypto/talitos.c [..] > +static void talitos_process_pending(struct talitos_xor_chan *xor_chan) > +{ > + =A0 =A0 =A0 struct talitos_xor_desc *desc, *_desc; > + =A0 =A0 =A0 unsigned long flags; > + =A0 =A0 =A0 int status; > + > + =A0 =A0 =A0 spin_lock_irqsave(&xor_chan->desc_lock, flags); > + > + =A0 =A0 =A0 list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q,= node) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D talitos_submit(xor_chan->dev, &d= esc->hwdesc, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0= =A0 talitos_release_xor, desc); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status !=3D -EINPROGRESS) > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break; > + > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 list_del(&desc->node); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 list_add_tail(&desc->node, &xor_chan->in_pr= ogress_q); > + =A0 =A0 =A0 } > + > + =A0 =A0 =A0 spin_unlock_irqrestore(&xor_chan->desc_lock, flags); > +} The driver uses spin_lock_bh everywhere else which is either a bug, or this code is being overly protective. In any event lockdep will rightly complain about this. The API and its users do not submit operations in hard-irq context. > + > +static void talitos_release_xor(struct device *dev, struct talitos_desc = *hwdesc, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void *conte= xt, int error) > +{ > + =A0 =A0 =A0 struct talitos_xor_desc *desc =3D context; > + =A0 =A0 =A0 struct talitos_xor_chan *xor_chan; > + =A0 =A0 =A0 dma_async_tx_callback callback; > + =A0 =A0 =A0 void *callback_param; > + > + =A0 =A0 =A0 if (unlikely(error)) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_err(dev, "xor operation: talitos error = %d\n", error); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 BUG(); > + =A0 =A0 =A0 } > + > + =A0 =A0 =A0 xor_chan =3D container_of(desc->async_tx.chan, struct talit= os_xor_chan, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 common); > + =A0 =A0 =A0 spin_lock_bh(&xor_chan->desc_lock); > + =A0 =A0 =A0 if (xor_chan->completed_cookie < desc->async_tx.cookie) > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 xor_chan->completed_cookie =3D desc->async_= tx.cookie; > + > + =A0 =A0 =A0 callback =3D desc->async_tx.callback; > + =A0 =A0 =A0 callback_param =3D desc->async_tx.callback_param; > + > + =A0 =A0 =A0 if (callback) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock_bh(&xor_chan->desc_lock); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 callback(callback_param); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock_bh(&xor_chan->desc_lock); > + =A0 =A0 =A0 } You do not need to unlock to call the callback. Users of the API assume that they are called in bh context and that they are not allowed to submit new operations directly from the callback (this simplifies the descriptor cleanup routines in other drivers). > + > + =A0 =A0 =A0 list_del(&desc->node); > + =A0 =A0 =A0 list_add_tail(&desc->node, &xor_chan->free_desc); > + =A0 =A0 =A0 spin_unlock_bh(&xor_chan->desc_lock); > + =A0 =A0 =A0 if (!list_empty(&xor_chan->pending_q)) > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 talitos_process_pending(xor_chan); > +} It appears this routine is missing a call to dma_run_dependencies()? This is needed if another channel is handling memory copy offload. I assume this is the case due to your other patch to fsldma. See iop_adma_run_tx_complete_actions(). If this xor resource can also handle copy operations that would be more efficient as it eliminates channel switching. See the ioatdma driver and its use of ASYNC_TX_DISABLE_CHANNEL_SWITCH. > +static struct dma_async_tx_descriptor * talitos_prep_dma_xor( > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct dma_chan *chan, dma_= addr_t dest, dma_addr_t *src, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 unsigned int src_cnt, size_= t len, unsigned long flags) > +{ > + =A0 =A0 =A0 struct talitos_xor_chan *xor_chan; > + =A0 =A0 =A0 struct talitos_xor_desc *new; > + =A0 =A0 =A0 struct talitos_desc *desc; > + =A0 =A0 =A0 int i, j; > + > + =A0 =A0 =A0 BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN)); > + > + =A0 =A0 =A0 xor_chan =3D container_of(chan, struct talitos_xor_chan, co= mmon); > + > + =A0 =A0 =A0 spin_lock_bh(&xor_chan->desc_lock); > + =A0 =A0 =A0 if (!list_empty(&xor_chan->free_desc)) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D container_of(xor_chan->free_desc.ne= xt, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0stru= ct talitos_xor_desc, node); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 list_del(&new->node); > + =A0 =A0 =A0 } else { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D talitos_xor_alloc_descriptor(xor_ch= an, GFP_KERNEL); > + =A0 =A0 =A0 } > + =A0 =A0 =A0 spin_unlock_bh(&xor_chan->desc_lock); > + > + =A0 =A0 =A0 if (!new) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_err(xor_chan->common.device->dev, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "No free memory for XOR DMA= descriptor\n"); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return NULL; > + =A0 =A0 =A0 } > + =A0 =A0 =A0 dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->com= mon); > + > + =A0 =A0 =A0 INIT_LIST_HEAD(&new->node); > + =A0 =A0 =A0 INIT_LIST_HEAD(&new->tx_list); You can save some overhead in the fast path by moving this initialization to talitos_xor_alloc_descriptor. > + > + =A0 =A0 =A0 desc =3D &new->hwdesc; > + =A0 =A0 =A0 /* Set destination: Last pointer pair */ > + =A0 =A0 =A0 to_talitos_ptr(&desc->ptr[6], dest); > + =A0 =A0 =A0 desc->ptr[6].len =3D cpu_to_be16(len); > + =A0 =A0 =A0 desc->ptr[6].j_extent =3D 0; > + > + =A0 =A0 =A0 /* Set Sources: End loading from second-last pointer pair *= / > + =A0 =A0 =A0 for (i =3D 5, j =3D 0; (j < src_cnt) && (i > 0); i--, j++) = { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 to_talitos_ptr(&desc->ptr[i], src[j]); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].len =3D cpu_to_be16(len); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].j_extent =3D 0; > + =A0 =A0 =A0 } > + > + =A0 =A0 =A0 /* > + =A0 =A0 =A0 =A0* documentation states first 0 ptr/len combo marks end o= f sources > + =A0 =A0 =A0 =A0* yet device produces scatter boundary error unless all = subsequent > + =A0 =A0 =A0 =A0* sources are zeroed out > + =A0 =A0 =A0 =A0*/ > + =A0 =A0 =A0 for (; i >=3D 0; i--) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 to_talitos_ptr(&desc->ptr[i], 0); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].len =3D 0; > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].j_extent =3D 0; > + =A0 =A0 =A0 } > + > + =A0 =A0 =A0 desc->hdr =3D DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 | DESC_HDR_TYPE_RAID_XOR; > + > + =A0 =A0 =A0 new->async_tx.parent =3D NULL; > + =A0 =A0 =A0 new->async_tx.next =3D NULL; These fields are managed by the async_tx channel switch code. No need to manage it here. > + =A0 =A0 =A0 new->async_tx.cookie =3D 0; This is set below to -EBUSY, it's redundant to touch it here. > + =A0 =A0 =A0 async_tx_ack(&new->async_tx); This makes it impossible to attach a dependency to this operation. Not sure this is what you want. -- Dan ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload 2009-10-29 22:46 ` Dan Williams @ 2009-11-03 0:43 ` hank peng 0 siblings, 0 replies; 7+ messages in thread From: hank peng @ 2009-11-03 0:43 UTC (permalink / raw) To: Dan Williams Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, Vishnu Suresh, linux-crypto, Dipen Dudhat, Maneesh Gupta I have tested this patch on my MPC8548 machine box, kernel version is 2.6.30. There is a problem. #mdadm -C /dev/md0 -l5 -n3 /dev/sd{a,b,c} Recovery can be done successfully, interrupts looks normal. # cat /proc/interrupts CPU0 16: 16091057 OpenPIC Level mvSata 17: 0 OpenPIC Level mvSata 18: 4 OpenPIC Level phy_interrupt, phy_interrupt 20: 39 OpenPIC Level fsldma-channel 21: 0 OpenPIC Level fsldma-channel 22: 0 OpenPIC Level fsldma-channel 23: 0 OpenPIC Level fsldma-channel 29: 241 OpenPIC Level eth0_tx 30: 1004692 OpenPIC Level eth0_rx 34: 0 OpenPIC Level eth0_er 35: 0 OpenPIC Level eth1_tx 36: 0 OpenPIC Level eth1_rx 40: 0 OpenPIC Level eth1_er 42: 73060 OpenPIC Level serial 43: 9854 OpenPIC Level i2c-mpc, i2c-mpc 45: 61264188 OpenPIC Level talitos BAD: 0 Then, I plan to create a VG, but problem occured. #pvcreate /dev/md0 After I input this command, system hangs there without any messages. BTW, all is OK before using this patch. 2009/10/30 Dan Williams <dan.j.williams@intel.com>: > On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wr= ote: > [..] >> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig >> index b08403d..343e578 100644 >> --- a/drivers/crypto/Kconfig >> +++ b/drivers/crypto/Kconfig >> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS >> =C2=A0 =C2=A0 =C2=A0 =C2=A0select CRYPTO_ALGAPI >> =C2=A0 =C2=A0 =C2=A0 =C2=A0select CRYPTO_AUTHENC >> =C2=A0 =C2=A0 =C2=A0 =C2=A0select HW_RANDOM >> + =C2=A0 =C2=A0 =C2=A0 select DMA_ENGINE >> + =C2=A0 =C2=A0 =C2=A0 select ASYNC_XOR > > You need only select DMA_ENGINE. =C2=A0ASYNC_XOR is selected by its users= . > >> =C2=A0 =C2=A0 =C2=A0 =C2=A0depends on FSL_SOC >> =C2=A0 =C2=A0 =C2=A0 =C2=A0help >> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0Say 'Y' here to use the Freescale Secu= rity Engine (SEC) >> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c >> index c47ffe8..84819d4 100644 >> --- a/drivers/crypto/talitos.c >> +++ b/drivers/crypto/talitos.c > [..] >> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan) >> +{ >> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_desc *desc, *_desc; >> + =C2=A0 =C2=A0 =C2=A0 unsigned long flags; >> + =C2=A0 =C2=A0 =C2=A0 int status; >> + >> + =C2=A0 =C2=A0 =C2=A0 spin_lock_irqsave(&xor_chan->desc_lock, flags); >> + >> + =C2=A0 =C2=A0 =C2=A0 list_for_each_entry_safe(desc, _desc, &xor_chan->= pending_q, node) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 status =3D talitos_su= bmit(xor_chan->dev, &desc->hwdesc, >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 talitos_rele= ase_xor, desc); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if (status !=3D -EINP= ROGRESS) >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 break; >> + >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 list_del(&desc->node)= ; >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 list_add_tail(&desc->= node, &xor_chan->in_progress_q); >> + =C2=A0 =C2=A0 =C2=A0 } >> + >> + =C2=A0 =C2=A0 =C2=A0 spin_unlock_irqrestore(&xor_chan->desc_lock, flag= s); >> +} > > The driver uses spin_lock_bh everywhere else which is either a bug, or > this code is being overly protective. =C2=A0In any event lockdep will > rightly complain about this. =C2=A0The API and its users do not submit > operations in hard-irq context. > >> + >> +static void talitos_release_xor(struct device *dev, struct talitos_desc= *hwdesc, >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 void *context, int error) >> +{ >> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_desc *desc =3D context; >> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_chan *xor_chan; >> + =C2=A0 =C2=A0 =C2=A0 dma_async_tx_callback callback; >> + =C2=A0 =C2=A0 =C2=A0 void *callback_param; >> + >> + =C2=A0 =C2=A0 =C2=A0 if (unlikely(error)) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 dev_err(dev, "xor ope= ration: talitos error %d\n", error); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 BUG(); >> + =C2=A0 =C2=A0 =C2=A0 } >> + >> + =C2=A0 =C2=A0 =C2=A0 xor_chan =3D container_of(desc->async_tx.chan, st= ruct talitos_xor_chan, >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 common); >> + =C2=A0 =C2=A0 =C2=A0 spin_lock_bh(&xor_chan->desc_lock); >> + =C2=A0 =C2=A0 =C2=A0 if (xor_chan->completed_cookie < desc->async_tx.c= ookie) >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 xor_chan->completed_c= ookie =3D desc->async_tx.cookie; >> + >> + =C2=A0 =C2=A0 =C2=A0 callback =3D desc->async_tx.callback; >> + =C2=A0 =C2=A0 =C2=A0 callback_param =3D desc->async_tx.callback_param; >> + >> + =C2=A0 =C2=A0 =C2=A0 if (callback) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 spin_unlock_bh(&xor_c= han->desc_lock); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 callback(callback_par= am); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 spin_lock_bh(&xor_cha= n->desc_lock); >> + =C2=A0 =C2=A0 =C2=A0 } > > You do not need to unlock to call the callback. =C2=A0Users of the API > assume that they are called in bh context and that they are not > allowed to submit new operations directly from the callback (this > simplifies the descriptor cleanup routines in other drivers). > >> + >> + =C2=A0 =C2=A0 =C2=A0 list_del(&desc->node); >> + =C2=A0 =C2=A0 =C2=A0 list_add_tail(&desc->node, &xor_chan->free_desc); >> + =C2=A0 =C2=A0 =C2=A0 spin_unlock_bh(&xor_chan->desc_lock); >> + =C2=A0 =C2=A0 =C2=A0 if (!list_empty(&xor_chan->pending_q)) >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 talitos_process_pendi= ng(xor_chan); >> +} > > It appears this routine is missing a call to dma_run_dependencies()? > This is needed if another channel is handling memory copy offload. =C2=A0= I > assume this is the case due to your other patch to fsldma. =C2=A0See > iop_adma_run_tx_complete_actions(). =C2=A0If this xor resource can also > handle copy operations that would be more efficient as it eliminates > channel switching. =C2=A0See the ioatdma driver and its use of > ASYNC_TX_DISABLE_CHANNEL_SWITCH. > >> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor( >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 unsigned int src_cnt, size_t len, unsigned long flags) >> +{ >> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_chan *xor_chan; >> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_desc *new; >> + =C2=A0 =C2=A0 =C2=A0 struct talitos_desc *desc; >> + =C2=A0 =C2=A0 =C2=A0 int i, j; >> + >> + =C2=A0 =C2=A0 =C2=A0 BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN)); >> + >> + =C2=A0 =C2=A0 =C2=A0 xor_chan =3D container_of(chan, struct talitos_xo= r_chan, common); >> + >> + =C2=A0 =C2=A0 =C2=A0 spin_lock_bh(&xor_chan->desc_lock); >> + =C2=A0 =C2=A0 =C2=A0 if (!list_empty(&xor_chan->free_desc)) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 new =3D container_of(= xor_chan->free_desc.next, >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0struct talitos_xor_desc, no= de); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 list_del(&new->node); >> + =C2=A0 =C2=A0 =C2=A0 } else { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 new =3D talitos_xor_a= lloc_descriptor(xor_chan, GFP_KERNEL); >> + =C2=A0 =C2=A0 =C2=A0 } >> + =C2=A0 =C2=A0 =C2=A0 spin_unlock_bh(&xor_chan->desc_lock); >> + >> + =C2=A0 =C2=A0 =C2=A0 if (!new) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 dev_err(xor_chan->com= mon.device->dev, >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 "No free memory for XOR DMA descriptor\n"); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 return NULL; >> + =C2=A0 =C2=A0 =C2=A0 } >> + =C2=A0 =C2=A0 =C2=A0 dma_async_tx_descriptor_init(&new->async_tx, &xor= _chan->common); >> + >> + =C2=A0 =C2=A0 =C2=A0 INIT_LIST_HEAD(&new->node); >> + =C2=A0 =C2=A0 =C2=A0 INIT_LIST_HEAD(&new->tx_list); > > You can save some overhead in the fast path by moving this > initialization to talitos_xor_alloc_descriptor. > >> + >> + =C2=A0 =C2=A0 =C2=A0 desc =3D &new->hwdesc; >> + =C2=A0 =C2=A0 =C2=A0 /* Set destination: Last pointer pair */ >> + =C2=A0 =C2=A0 =C2=A0 to_talitos_ptr(&desc->ptr[6], dest); >> + =C2=A0 =C2=A0 =C2=A0 desc->ptr[6].len =3D cpu_to_be16(len); >> + =C2=A0 =C2=A0 =C2=A0 desc->ptr[6].j_extent =3D 0; >> + >> + =C2=A0 =C2=A0 =C2=A0 /* Set Sources: End loading from second-last poin= ter pair */ >> + =C2=A0 =C2=A0 =C2=A0 for (i =3D 5, j =3D 0; (j < src_cnt) && (i > 0); = i--, j++) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 to_talitos_ptr(&desc-= >ptr[i], src[j]); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].len =3D = cpu_to_be16(len); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].j_extent= =3D 0; >> + =C2=A0 =C2=A0 =C2=A0 } >> + >> + =C2=A0 =C2=A0 =C2=A0 /* >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0* documentation states first 0 ptr/len comb= o marks end of sources >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0* yet device produces scatter boundary erro= r unless all subsequent >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0* sources are zeroed out >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0*/ >> + =C2=A0 =C2=A0 =C2=A0 for (; i >=3D 0; i--) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 to_talitos_ptr(&desc-= >ptr[i], 0); >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].len =3D = 0; >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].j_extent= =3D 0; >> + =C2=A0 =C2=A0 =C2=A0 } >> + >> + =C2=A0 =C2=A0 =C2=A0 desc->hdr =3D DESC_HDR_SEL0_AESU | DESC_HDR_MODE0= _AESU_XOR >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 | DESC_= HDR_TYPE_RAID_XOR; >> + >> + =C2=A0 =C2=A0 =C2=A0 new->async_tx.parent =3D NULL; >> + =C2=A0 =C2=A0 =C2=A0 new->async_tx.next =3D NULL; > > These fields are managed by the async_tx channel switch code. =C2=A0No ne= ed > to manage it here. > >> + =C2=A0 =C2=A0 =C2=A0 new->async_tx.cookie =3D 0; > > This is set below to -EBUSY, it's redundant to touch it here. > >> + =C2=A0 =C2=A0 =C2=A0 async_tx_ack(&new->async_tx); > > This makes it impossible to attach a dependency to this operation. > Not sure this is what you want. > > -- > Dan > -- > To unsubscribe from this list: send the line "unsubscribe linux-raid" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.html > --=20 The simplest is not all best but the best is surely the simplest! ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled 2009-10-15 6:41 [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Vishnu Suresh 2009-10-15 6:41 ` [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload Vishnu Suresh @ 2009-10-16 1:25 ` Dan Williams 2009-10-16 15:33 ` Ira W. Snyder 1 sibling, 1 reply; 7+ messages in thread From: Dan Williams @ 2009-10-16 1:25 UTC (permalink / raw) To: Vishnu Suresh Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto, Timur Tabi [ added Leo and Timur to the Cc ] On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrot= e: > This patch disables the use of DMA_INTERRUPT capability with Async_tx > > The fsldma produces a null transfer with DMA_INTERRUPT > capability when used with Async_tx. When RAID devices queue > a transaction via Async_tx, this =A0results in a hang. > > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com> > --- > =A0drivers/dma/fsldma.c | =A0 =A06 ++++++ > =A01 files changed, 6 insertions(+), 0 deletions(-) > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c > index 296f9e7..66d9b39 100644 > --- a/drivers/dma/fsldma.c > +++ b/drivers/dma/fsldma.c > @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_de= vice *dev, > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0 =A0 =A0 =A0 =A0- fdev->reg.start + 1); > > =A0 =A0 =A0 =A0dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); > +#ifndef CONFIG_ASYNC_CORE > + =A0 =A0 =A0 /* > + =A0 =A0 =A0 =A0* The DMA_INTERRUPT async_tx is a NULL transfer, which w= ill > + =A0 =A0 =A0 =A0* triger a PE interrupt. > + =A0 =A0 =A0 =A0*/ > =A0 =A0 =A0 =A0dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); > +#endif > =A0 =A0 =A0 =A0dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); > =A0 =A0 =A0 =A0fdev->common.device_alloc_chan_resources =3D fsl_dma_alloc= _chan_resources; > =A0 =A0 =A0 =A0fdev->common.device_free_chan_resources =3D fsl_dma_free_c= han_resources; You are basically saying that fsl_dma_prep_interrupt() is buggy. Can that routine be fixed rather than this piecemeal solution? If it cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt() should just be disabled/deleted altogether. -- Dan ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled 2009-10-16 1:25 ` [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Dan Williams @ 2009-10-16 15:33 ` Ira W. Snyder 2009-10-20 13:09 ` Suresh Vishnu-B05022 0 siblings, 1 reply; 7+ messages in thread From: Ira W. Snyder @ 2009-10-16 15:33 UTC (permalink / raw) To: Dan Williams Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, Vishnu Suresh, linux-crypto, Timur Tabi On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote: > [ added Leo and Timur to the Cc ] > > On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote: > > This patch disables the use of DMA_INTERRUPT capability with Async_tx > > > > The fsldma produces a null transfer with DMA_INTERRUPT > > capability when used with Async_tx. When RAID devices queue > > a transaction via Async_tx, this results in a hang. > > > > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com> > > --- > > drivers/dma/fsldma.c | 6 ++++++ > > 1 files changed, 6 insertions(+), 0 deletions(-) > > > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c > > index 296f9e7..66d9b39 100644 > > --- a/drivers/dma/fsldma.c > > +++ b/drivers/dma/fsldma.c > > @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, > > - fdev->reg.start + 1); > > > > dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); > > +#ifndef CONFIG_ASYNC_CORE > > + /* > > + * The DMA_INTERRUPT async_tx is a NULL transfer, which will > > + * triger a PE interrupt. > > + */ > > dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); > > +#endif > > dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); > > fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; > > fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; > > You are basically saying that fsl_dma_prep_interrupt() is buggy. Can > that routine be fixed rather than this piecemeal solution? If it > cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt() > should just be disabled/deleted altogether. > For what it's worth, I've used the following code in the recent past, without any issues. This was on an 83xx, within the last few kernel releases. I haven't tried it on the latest -rc. Using device_prep_dma_memcpy() can trigger a callback as well, so the interrupt feature isn't strictly needed. Just attach the callback to the last memcpy operation. static dma_cookie_t dma_async_interrupt(struct dma_chan *chan, dma_async_tx_callback callback, void *data) { struct dma_device *dev = chan->device; struct dma_async_tx_descriptor *tx; /* Set up the DMA */ tx = dev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT); if (!tx) return -ENOMEM; tx->callback = callback; tx->callback_param = data; return tx->tx_submit(tx); } Ira ^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled 2009-10-16 15:33 ` Ira W. Snyder @ 2009-10-20 13:09 ` Suresh Vishnu-B05022 0 siblings, 0 replies; 7+ messages in thread From: Suresh Vishnu-B05022 @ 2009-10-20 13:09 UTC (permalink / raw) To: Ira W. Snyder, Dan Williams Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto, Tabi Timur-B04825 > -----Original Message----- > From: Ira W. Snyder [mailto:iws@ovro.caltech.edu]=20 > Sent: Friday, October 16, 2009 9:04 PM > To: Dan Williams > Cc: Suresh Vishnu-B05022; herbert@gondor.apana.org.au;=20 > linux-kernel@vger.kernel.org; linux-raid@vger.kernel.org;=20 > linuxppc-dev@ozlabs.org; linux-crypto@vger.kernel.org; Tabi=20 > Timur-B04825 > Subject: Re: [PATCH v0 1/2] DMA: fsldma: Disable=20 > DMA_INTERRUPT when Async_tx enabled >=20 > On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote: > > [ added Leo and Timur to the Cc ] > >=20 > > On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh=20 > <Vishnu@freescale.com> wrote: > > > This patch disables the use of DMA_INTERRUPT capability with=20 > > > Async_tx > > > > > > The fsldma produces a null transfer with DMA_INTERRUPT capability=20 > > > when used with Async_tx. When RAID devices queue a=20 > transaction via=20 > > > Async_tx, this =A0results in a hang. > > > > > > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com> > > > --- > > > =A0drivers/dma/fsldma.c | =A0 =A06 ++++++ > > > =A01 files changed, 6 insertions(+), 0 deletions(-) > > > > > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index=20 > > > 296f9e7..66d9b39 100644 > > > --- a/drivers/dma/fsldma.c > > > +++ b/drivers/dma/fsldma.c > > > @@ -1200,7 +1200,13 @@ static int __devinit=20 > of_fsl_dma_probe(struct=20 > > > of_device *dev, > > > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0-=20 > fdev->reg.start +=20 > > > 1); > > > > > > =A0 =A0 =A0 =A0dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); > > > +#ifndef CONFIG_ASYNC_CORE > > > + =A0 =A0 =A0 /* > > > + =A0 =A0 =A0 =A0* The DMA_INTERRUPT async_tx is a NULL transfer,=20 > which will > > > + =A0 =A0 =A0 =A0* triger a PE interrupt. > > > + =A0 =A0 =A0 =A0*/ > > > =A0 =A0 =A0 =A0dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); > > > +#endif > > > =A0 =A0 =A0 =A0dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); > > > =A0 =A0 =A0 =A0fdev->common.device_alloc_chan_resources =3D=20 > > > fsl_dma_alloc_chan_resources; > > > =A0 =A0 =A0 =A0fdev->common.device_free_chan_resources =3D=20 > > > fsl_dma_free_chan_resources; > >=20 > > You are basically saying that fsl_dma_prep_interrupt() is=20 > buggy. Can=20 > > that routine be fixed rather than this piecemeal solution? If it=20 > > cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()=20 > > should just be disabled/deleted altogether. We are working to fix this issue. > >=20 >=20 > For what it's worth, I've used the following code in the=20 > recent past, without any issues. This was on an 83xx, within=20 > the last few kernel releases. I haven't tried it on the latest -rc. This works fine as long as only DMA_MEMCPY is being used. The async_tx_channel_switch does not occur and the=20 device_prep_dma_interrupt is not called.=20 However, when a DMA_XOR capable device is exposed,=20 which is differnet from the DMA_MEMCPY/INTERRUPT device, this path is = hit. Is it proper to schedule a dma_interrupt from the channel switch call,=20 even when the depend_tx and tx channels correspond to different devices? >=20 > Using device_prep_dma_memcpy() can trigger a callback as=20 > well, so the interrupt feature isn't strictly needed. Just=20 > attach the callback to the last memcpy operation. >=20 > static dma_cookie_t dma_async_interrupt(struct dma_chan *chan, > dma_async_tx_callback=20 > callback, > void *data) { > struct dma_device *dev =3D chan->device; > struct dma_async_tx_descriptor *tx;=20 >=20 > /* Set up the DMA */ > tx =3D dev->device_prep_dma_interrupt(chan, = DMA_PREP_INTERRUPT); > if (!tx) > return -ENOMEM; >=20 > tx->callback =3D callback; > tx->callback_param =3D data; >=20 > return tx->tx_submit(tx); > } >=20 > Ira >=20 >=20 ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-11-03 0:43 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-10-15 6:41 [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Vishnu Suresh 2009-10-15 6:41 ` [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload Vishnu Suresh 2009-10-29 22:46 ` Dan Williams 2009-11-03 0:43 ` hank peng 2009-10-16 1:25 ` [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Dan Williams 2009-10-16 15:33 ` Ira W. Snyder 2009-10-20 13:09 ` Suresh Vishnu-B05022
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).