* [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion
@ 2015-09-06 11:40 Robert Jarzmik
2015-09-06 11:40 ` [PATCH 2/3] dmaengine: enable DMA_CTRL_REUSE Robert Jarzmik
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Robert Jarzmik @ 2015-09-06 11:40 UTC (permalink / raw)
To: linux-arm-kernel
This patch attempts to enhance the case of a transfer submitted multiple
times, and where the cost of creating the descriptors chain is not
negligible.
This happens with big video buffers (several megabytes, ie. several
thousands of linked descriptors in one scatter-gather list). In these
cases, a video driver would want to do :
- tx = dmaengine_prep_slave_sg()
- dma_engine_submit(tx);
- dma_async_issue_pending()
- wait for video completion
- read video data (or not, skipping a frame is also possible)
- dma_engine_submit(tx)
=> here, the descriptors chain recalculation will take time
=> the dma coherent allocation over and over might create holes in
the dma pool, which is counter-productive.
- dma_async_issue_pending()
- etc ...
In order to cope with this case, virt-dma is modified to prevent freeing
the descriptors upon completion if DMA_CTRL_REUSE flag is set in the
transfer.
This patch is a respin of the former DMA_CTRL_ACK approach, which was
reverted due to a regression in audio drivers.
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
drivers/dma/virt-dma.c | 36 ++++++++++++++++++++++++++++++------
drivers/dma/virt-dma.h | 15 ++++++++++++++-
2 files changed, 44 insertions(+), 7 deletions(-)
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 6f80432a3f0a..e33caa87cbbb 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
spin_lock_irqsave(&vc->lock, flags);
cookie = dma_cookie_assign(tx);
- list_add_tail(&vd->node, &vc->desc_submitted);
+ list_move_tail(&vd->node, &vc->desc_submitted);
spin_unlock_irqrestore(&vc->lock, flags);
dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
@@ -39,6 +39,23 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
}
EXPORT_SYMBOL_GPL(vchan_tx_submit);
+int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
+{
+ struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+ struct virt_dma_desc *vd = to_virt_desc(tx);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+ list_del(&vd->node);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
+ vc, vd, vd->tx.cookie);
+ vc->desc_free(vd);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
+
struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *vc,
dma_cookie_t cookie)
{
@@ -83,8 +100,10 @@ static void vchan_complete(unsigned long arg)
cb_data = vd->tx.callback_param;
list_del(&vd->node);
-
- vc->desc_free(vd);
+ if (dmaengine_desc_test_reuse(&vd->tx))
+ list_add(&vd->node, &vc->desc_allocated);
+ else
+ vc->desc_free(vd);
if (cb)
cb(cb_data);
@@ -96,9 +115,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
while (!list_empty(head)) {
struct virt_dma_desc *vd = list_first_entry(head,
struct virt_dma_desc, node);
- list_del(&vd->node);
- dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
- vc->desc_free(vd);
+ if (dmaengine_desc_test_reuse(&vd->tx)) {
+ list_move_tail(&vd->node, &vc->desc_allocated);
+ } else {
+ dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
+ list_del(&vd->node);
+ vc->desc_free(vd);
+ }
}
}
EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
@@ -108,6 +131,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
dma_cookie_init(&vc->chan);
spin_lock_init(&vc->lock);
+ INIT_LIST_HEAD(&vc->desc_allocated);
INIT_LIST_HEAD(&vc->desc_submitted);
INIT_LIST_HEAD(&vc->desc_issued);
INIT_LIST_HEAD(&vc->desc_completed);
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index 181b95267866..a4156af93d88 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -29,6 +29,7 @@ struct virt_dma_chan {
spinlock_t lock;
/* protected by vc.lock */
+ struct list_head desc_allocated;
struct list_head desc_submitted;
struct list_head desc_issued;
struct list_head desc_completed;
@@ -55,10 +56,17 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan
struct virt_dma_desc *vd, unsigned long tx_flags)
{
extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+ extern int vchan_tx_desc_free(struct dma_async_tx_descriptor *);
+ unsigned long flags;
dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
vd->tx.flags = tx_flags;
vd->tx.tx_submit = vchan_tx_submit;
+ vd->tx.desc_free = vchan_tx_desc_free;
+
+ spin_lock_irqsave(&vc->lock, flags);
+ list_add_tail(&vd->node, &vc->desc_allocated);
+ spin_unlock_irqrestore(&vc->lock, flags);
return &vd->tx;
}
@@ -122,7 +130,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
}
/**
- * vchan_get_all_descriptors - obtain all submitted and issued descriptors
+ * vchan_get_all_descriptors - obtain all allocated, submitted and issued
+ * descriptors
* vc: virtual channel to get descriptors from
* head: list of descriptors found
*
@@ -134,6 +143,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
struct list_head *head)
{
+ list_splice_tail_init(&vc->desc_allocated, head);
list_splice_tail_init(&vc->desc_submitted, head);
list_splice_tail_init(&vc->desc_issued, head);
list_splice_tail_init(&vc->desc_completed, head);
@@ -141,11 +151,14 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
{
+ struct virt_dma_desc *vd;
unsigned long flags;
LIST_HEAD(head);
spin_lock_irqsave(&vc->lock, flags);
vchan_get_all_descriptors(vc, &head);
+ list_for_each_entry(vd, &head, node)
+ dmaengine_desc_clear_reuse(&vd->tx);
spin_unlock_irqrestore(&vc->lock, flags);
vchan_dma_desc_free_list(vc, &head);
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] dmaengine: enable DMA_CTRL_REUSE
2015-09-06 11:40 [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
@ 2015-09-06 11:40 ` Robert Jarzmik
2015-09-06 11:40 ` [PATCH 3/3] dmaengine: pxa_dma: declare transfer are reusable Robert Jarzmik
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Robert Jarzmik @ 2015-09-06 11:40 UTC (permalink / raw)
To: linux-arm-kernel
In the current state, the capability of transfer reuse can neither be
set by a slave dmaengine driver, nor used by a client driver, because
the capability is not available to dma_get_slave_caps().
Fix this by adding a way to declare the capability.
Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE")
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
drivers/dma/dmaengine.c | 1 +
include/linux/dmaengine.h | 2 ++
2 files changed, 3 insertions(+)
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 3ff284c8e3d5..bb33e2ff5a42 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -493,6 +493,7 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
caps->dst_addr_widths = device->dst_addr_widths;
caps->directions = device->directions;
caps->residue_granularity = device->residue_granularity;
+ caps->descriptor_reuse = device->descriptor_reuse;
/*
* Some devices implement only pause (e.g. to get residuum) but no
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index e2f5eb419976..71b95257d835 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -635,6 +635,7 @@ struct dma_tx_state {
* struct with auxiliary transfer status information, otherwise the call
* will just return a simple status code
* @device_issue_pending: push pending transactions to hardware
+ * @descriptor_reuse: a submitted transfer can be resubmitted after completion
*/
struct dma_device {
@@ -657,6 +658,7 @@ struct dma_device {
u32 src_addr_widths;
u32 dst_addr_widths;
u32 directions;
+ bool descriptor_reuse;
enum dma_residue_granularity residue_granularity;
int (*device_alloc_chan_resources)(struct dma_chan *chan);
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] dmaengine: pxa_dma: declare transfer are reusable
2015-09-06 11:40 [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
2015-09-06 11:40 ` [PATCH 2/3] dmaengine: enable DMA_CTRL_REUSE Robert Jarzmik
@ 2015-09-06 11:40 ` Robert Jarzmik
2015-09-12 13:08 ` [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
2015-09-24 17:26 ` Vinod Koul
3 siblings, 0 replies; 7+ messages in thread
From: Robert Jarzmik @ 2015-09-06 11:40 UTC (permalink / raw)
To: linux-arm-kernel
As this driver provides a mechanism to reuse transfers, declare it in
its probe function.
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
drivers/dma/pxa_dma.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index ddcbbf5cd9e9..bb85cd7b5b70 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -1396,6 +1396,7 @@ static int pxad_probe(struct platform_device *op)
pdev->slave.dst_addr_widths = widths;
pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ pdev->slave.descriptor_reuse = true;
pdev->slave.dev = &op->dev;
ret = pxad_init_dmadev(op, pdev, dma_channels);
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion
2015-09-06 11:40 [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
2015-09-06 11:40 ` [PATCH 2/3] dmaengine: enable DMA_CTRL_REUSE Robert Jarzmik
2015-09-06 11:40 ` [PATCH 3/3] dmaengine: pxa_dma: declare transfer are reusable Robert Jarzmik
@ 2015-09-12 13:08 ` Robert Jarzmik
2015-09-16 19:20 ` Robert Jarzmik
2015-09-24 17:26 ` Vinod Koul
3 siblings, 1 reply; 7+ messages in thread
From: Robert Jarzmik @ 2015-09-12 13:08 UTC (permalink / raw)
To: linux-arm-kernel
Robert Jarzmik <robert.jarzmik@free.fr> writes:
> This patch attempts to enhance the case of a transfer submitted multiple
> times, and where the cost of creating the descriptors chain is not
> negligible.
>
> This happens with big video buffers (several megabytes, ie. several
> thousands of linked descriptors in one scatter-gather list). In these
> cases, a video driver would want to do :
> - tx = dmaengine_prep_slave_sg()
> - dma_engine_submit(tx);
> - dma_async_issue_pending()
> - wait for video completion
> - read video data (or not, skipping a frame is also possible)
> - dma_engine_submit(tx)
> => here, the descriptors chain recalculation will take time
> => the dma coherent allocation over and over might create holes in
> the dma pool, which is counter-productive.
> - dma_async_issue_pending()
> - etc ...
>
> In order to cope with this case, virt-dma is modified to prevent freeing
> the descriptors upon completion if DMA_CTRL_REUSE flag is set in the
> transfer.
>
> This patch is a respin of the former DMA_CTRL_ACK approach, which was
> reverted due to a regression in audio drivers.
>
> Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Hi Jun, Lars-Peter and Vinod,
The revert of the former patch of this type, 8c8fe97b2b8a ("Revert "dmaengine:
virt-dma: don't always free descriptor upon completion"") broke pxa_dma driver.
The reason behind is that pxa_dma was designed, upon transfer submission (in
pxad_tx_submit()), to "move" the virtual descriptor to the submitted list,
rather than adding it at its tail (because it was previously on the allocated
list).
As a consequence, the list_head is not initialized, pxa_dma fails, and the
current status is that pxa_dma Oopses in linux-next, and therefore in Linus's
tree once the merge window is closed.
I'd really like to have this patch as the fix, ie. that at transfer preparation
in vchan_tx_prep(), the virtual descriptor is added on "allocated" list tail.
But to be sure there is no regression on other platforms, I need a test,
especially from someone who suffered from the former version of this patch
(where we had DMA_CTRL_ACK instead of DMA_CTRL_REUSE).
Could anybody give it a try, as I need a fix to go in the early -rc of v4.3
please ?
Cheers.
--
Robert
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion
2015-09-12 13:08 ` [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
@ 2015-09-16 19:20 ` Robert Jarzmik
0 siblings, 0 replies; 7+ messages in thread
From: Robert Jarzmik @ 2015-09-16 19:20 UTC (permalink / raw)
To: linux-arm-kernel
Robert Jarzmik <robert.jarzmik@free.fr> writes:
> Robert Jarzmik <robert.jarzmik@free.fr> writes:
> Hi Jun, Lars-Peter and Vinod,
>
> The revert of the former patch of this type, 8c8fe97b2b8a ("Revert "dmaengine:
> virt-dma: don't always free descriptor upon completion"") broke pxa_dma
> driver.
So ... what's the plan, Vinod ? I need to fix pxa-dma driver in the early -rc
stages, how does it work with your schedule ?
Cheers.
--
Robert
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion
2015-09-06 11:40 [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
` (2 preceding siblings ...)
2015-09-12 13:08 ` [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
@ 2015-09-24 17:26 ` Vinod Koul
2015-09-24 19:48 ` Robert Jarzmik
3 siblings, 1 reply; 7+ messages in thread
From: Vinod Koul @ 2015-09-24 17:26 UTC (permalink / raw)
To: linux-arm-kernel
On Sun, Sep 06, 2015 at 01:40:52PM +0200, Robert Jarzmik wrote:
> @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
> spin_lock_irqsave(&vc->lock, flags);
> cookie = dma_cookie_assign(tx);
>
> - list_add_tail(&vd->node, &vc->desc_submitted);
> + list_move_tail(&vd->node, &vc->desc_submitted);
I am not sure I follow this, why move ?
> +int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
> +{
> + struct virt_dma_chan *vc = to_virt_chan(tx->chan);
> + struct virt_dma_desc *vd = to_virt_desc(tx);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&vc->lock, flags);
> + list_del(&vd->node);
> + spin_unlock_irqrestore(&vc->lock, flags);
> +
> + dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
> + vc, vd, vd->tx.cookie);
> + vc->desc_free(vd);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
this one seems okay, can you add comments here and also update documentation
for this
> @@ -96,9 +115,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
> while (!list_empty(head)) {
> struct virt_dma_desc *vd = list_first_entry(head,
> struct virt_dma_desc, node);
> - list_del(&vd->node);
> - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
> - vc->desc_free(vd);
> + if (dmaengine_desc_test_reuse(&vd->tx)) {
> + list_move_tail(&vd->node, &vc->desc_allocated);
should we check this if the list is to be freed? Why would anyone call free
except when cleaning up ?
> static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
> {
> + struct virt_dma_desc *vd;
> unsigned long flags;
> LIST_HEAD(head);
>
> spin_lock_irqsave(&vc->lock, flags);
> vchan_get_all_descriptors(vc, &head);
> + list_for_each_entry(vd, &head, node)
> + dmaengine_desc_clear_reuse(&vd->tx);
why do we want to do this, if we are freeing them
--
~Vinod
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion
2015-09-24 17:26 ` Vinod Koul
@ 2015-09-24 19:48 ` Robert Jarzmik
0 siblings, 0 replies; 7+ messages in thread
From: Robert Jarzmik @ 2015-09-24 19:48 UTC (permalink / raw)
To: linux-arm-kernel
Vinod Koul <vinod.koul@intel.com> writes:
> On Sun, Sep 06, 2015 at 01:40:52PM +0200, Robert Jarzmik wrote:
>> @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
>> spin_lock_irqsave(&vc->lock, flags);
>> cookie = dma_cookie_assign(tx);
>>
>> - list_add_tail(&vd->node, &vc->desc_submitted);
>> + list_move_tail(&vd->node, &vc->desc_submitted);
>
> I am not sure I follow this, why move ?
Because it is already on the allocated list (see vchan_tx_prep()).
>> +int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
>> +{
>> + struct virt_dma_chan *vc = to_virt_chan(tx->chan);
>> + struct virt_dma_desc *vd = to_virt_desc(tx);
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&vc->lock, flags);
>> + list_del(&vd->node);
>> + spin_unlock_irqrestore(&vc->lock, flags);
>> +
>> + dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
>> + vc, vd, vd->tx.cookie);
>> + vc->desc_free(vd);
>> + return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
> this one seems okay, can you add comments here and also update documentation
> for this
Ah yes, very good point, doxygen like documentation, got it.
>> @@ -96,9 +115,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
>> while (!list_empty(head)) {
>> struct virt_dma_desc *vd = list_first_entry(head,
>> struct virt_dma_desc, node);
>> - list_del(&vd->node);
>> - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
>> - vc->desc_free(vd);
>> + if (dmaengine_desc_test_reuse(&vd->tx)) {
>> + list_move_tail(&vd->node, &vc->desc_allocated);
>
> should we check this if the list is to be freed? Why would anyone call free
> except when cleaning up ?
Yes, there is a corner case : a driver does a dmaengine_terminate_all(), to stop
the dma transfers (a missed video frame for example). Then upon the sync signal,
it resubmits the reusable transfers it had.
Or said differently, reusable transfers should continue to be reusable through a
dmaengine_terminate_all(), hence this test. And there is a link with your
comment below.
>> static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
>> {
>> + struct virt_dma_desc *vd;
>> unsigned long flags;
>> LIST_HEAD(head);
>>
>> spin_lock_irqsave(&vc->lock, flags);
>> vchan_get_all_descriptors(vc, &head);
>> + list_for_each_entry(vd, &head, node)
>> + dmaengine_desc_clear_reuse(&vd->tx);
>
> why do we want to do this, if we are freeing them
Because if we don't clear reuse flag, vchan_dma_desc_free_list() won't really
free the descriptor, but move it to allocated list.
Cheers.
--
Robert
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2015-09-24 19:48 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-06 11:40 [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
2015-09-06 11:40 ` [PATCH 2/3] dmaengine: enable DMA_CTRL_REUSE Robert Jarzmik
2015-09-06 11:40 ` [PATCH 3/3] dmaengine: pxa_dma: declare transfer are reusable Robert Jarzmik
2015-09-12 13:08 ` [PATCH 1/3] dmaengine: virt-dma: don't always free descriptor upon completion Robert Jarzmik
2015-09-16 19:20 ` Robert Jarzmik
2015-09-24 17:26 ` Vinod Koul
2015-09-24 19:48 ` Robert Jarzmik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).