From: Basavaraj Natikar <bnatikar@amd.com>
To: Vinod Koul <vkoul@kernel.org>,
Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Cc: dmaengine@vger.kernel.org
Subject: Re: [PATCH 3/3] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality
Date: Mon, 10 Feb 2025 19:56:12 +0530 [thread overview]
Message-ID: <c568caf4-c69c-4e41-a794-4756ca8fbd93@amd.com> (raw)
In-Reply-To: <Z6oKqKncVc9AL2Tb@vaman>
On 2/10/2025 7:48 PM, Vinod Koul wrote:
> On 03-02-25, 21:55, Basavaraj Natikar wrote:
>> As AE4DMA offers multi-channel functionality compared to PTDMA’s single
>> queue, utilize multi-queue, which supports higher speeds than PTDMA, to
>> achieve higher performance using the AE4DMA workqueue based mechanism.
>>
>> Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version")
> Why is this a fix, again!
Yes, as AE4DMA is much faster with multi-queue. However, sometimes due to multi-queue
processing, it takes longer and introduces a timeout for the synchronization of
consumers and producers, which helps avoid long waits that could eventually cause a hang.
Thanks,
--
Basavaraj
>
>> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> ---
>> drivers/dma/amd/ae4dma/ae4dma.h | 2 +
>> drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++-
>> 2 files changed, 89 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
>> index 265c5d436008..57f6048726bb 100644
>> --- a/drivers/dma/amd/ae4dma/ae4dma.h
>> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
>> @@ -37,6 +37,8 @@
>> #define AE4_DMA_VERSION 4
>> #define CMD_AE4_DESC_DW0_VAL 2
>>
>> +#define AE4_TIME_OUT 5000
>> +
>> struct ae4_msix {
>> int msix_count;
>> struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
>> diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
>> index 35c84ec9608b..715ac3ae067b 100644
>> --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
>> +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
>> @@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>> {
>> struct dma_async_tx_descriptor *tx_desc;
>> struct virt_dma_desc *vd;
>> + struct pt_device *pt;
>> unsigned long flags;
>>
>> + pt = chan->pt;
>> /* Loop over descriptors until one is found with commands */
>> do {
>> if (desc) {
>> @@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>>
>> spin_lock_irqsave(&chan->vc.lock, flags);
>>
>> - if (desc) {
>> + if (pt->ver != AE4_DMA_VERSION && desc) {
>> if (desc->status != DMA_COMPLETE) {
>> if (desc->status != DMA_ERROR)
>> desc->status = DMA_COMPLETE;
>> @@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>>
>> spin_unlock_irqrestore(&chan->vc.lock, flags);
>>
>> - if (tx_desc) {
>> + if (pt->ver != AE4_DMA_VERSION && tx_desc) {
> Why should this handling be different for DMA_VERSION?
>
>> dmaengine_desc_get_callback_invoke(tx_desc, NULL);
>> dma_run_dependencies(tx_desc);
>> vchan_vdesc_fini(vd);
>> @@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>> return NULL;
>> }
>>
>> +static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
>> +{
>> + u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
>> + u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
>> +
>> + if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1))
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> static void pt_cmd_callback(void *data, int err)
>> {
>> struct pt_dma_desc *desc = data;
>> + struct ae4_cmd_queue *ae4cmd_q;
>> struct dma_chan *dma_chan;
>> struct pt_dma_chan *chan;
>> + struct ae4_device *ae4;
>> + struct pt_device *pt;
>> int ret;
>>
>> if (err == -EINPROGRESS)
>> @@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
>>
>> dma_chan = desc->vd.tx.chan;
>> chan = to_pt_chan(dma_chan);
>> + pt = chan->pt;
>>
>> if (err)
>> desc->status = DMA_ERROR;
>>
>> while (true) {
>> + if (pt->ver == AE4_DMA_VERSION) {
>> + ae4 = container_of(pt, struct ae4_device, pt);
>> + ae4cmd_q = &ae4->ae4cmd_q[chan->id];
>> +
>> + if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
>> + ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
>> + wake_up(&ae4cmd_q->q_w);
>> +
>> + if (wait_for_completion_timeout(&ae4cmd_q->cmp,
>> + msecs_to_jiffies(AE4_TIME_OUT))
>> + == 0) {
>> + dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
>> + break;
>> + }
>> +
>> + reinit_completion(&ae4cmd_q->cmp);
>> + continue;
>> + }
>> + }
>> +
>> /* Check for DMA descriptor completion */
>> desc = pt_handle_active_desc(chan, desc);
>>
>> @@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
>> return desc;
>> }
>>
>> +static void pt_cmd_callback_work(void *data, int err)
>> +{
>> + struct dma_async_tx_descriptor *tx_desc;
>> + struct pt_dma_desc *desc = data;
>> + struct dma_chan *dma_chan;
>> + struct virt_dma_desc *vd;
>> + struct pt_dma_chan *chan;
>> + unsigned long flags;
>> +
>> + dma_chan = desc->vd.tx.chan;
>> + chan = to_pt_chan(dma_chan);
>> +
>> + if (err == -EINPROGRESS)
>> + return;
>> +
>> + tx_desc = &desc->vd.tx;
>> + vd = &desc->vd;
>> +
>> + if (err)
>> + desc->status = DMA_ERROR;
>> +
>> + spin_lock_irqsave(&chan->vc.lock, flags);
>> + if (desc) {
>> + if (desc->status != DMA_COMPLETE) {
>> + if (desc->status != DMA_ERROR)
>> + desc->status = DMA_COMPLETE;
>> +
>> + dma_cookie_complete(tx_desc);
>> + dma_descriptor_unmap(tx_desc);
>> + } else {
>> + tx_desc = NULL;
>> + }
>> + }
>> + spin_unlock_irqrestore(&chan->vc.lock, flags);
>> +
>> + if (tx_desc) {
>> + dmaengine_desc_get_callback_invoke(tx_desc, NULL);
>> + dma_run_dependencies(tx_desc);
>> + list_del(&desc->vd.node);
>> + vchan_vdesc_fini(vd);
>> + }
>> +}
> Why do we have callback in driver...?
>
>> +
>> static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
>> dma_addr_t dst,
>> dma_addr_t src,
>> @@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
>> desc->len = len;
>>
>> if (pt->ver == AE4_DMA_VERSION) {
>> + pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
>> ae4 = container_of(pt, struct ae4_device, pt);
>> ae4cmd_q = &ae4->ae4cmd_q[chan->id];
>> mutex_lock(&ae4cmd_q->cmd_lock);
>> @@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
>> {
>> struct pt_dma_chan *chan = to_pt_chan(dma_chan);
>> struct pt_dma_desc *desc;
>> + struct pt_device *pt;
>> unsigned long flags;
>> bool engine_is_idle = true;
>>
>> + pt = chan->pt;
>> +
>> spin_lock_irqsave(&chan->vc.lock, flags);
>>
>> desc = pt_next_dma_desc(chan);
>> - if (desc)
>> + if (desc && pt->ver != AE4_DMA_VERSION)
>> engine_is_idle = false;
>>
>> vchan_issue_pending(&chan->vc);
>> --
>> 2.25.1
next prev parent reply other threads:[~2025-02-10 14:26 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-03 16:25 [PATCH 0/3] Fixes to the AE4DMA Basavaraj Natikar
2025-02-03 16:25 ` [PATCH 1/3] dmaengine: ae4dma: Remove deprecated PCI IDs Basavaraj Natikar
2025-02-10 13:59 ` Vinod Koul
2025-02-10 14:18 ` Basavaraj Natikar
2025-02-03 16:25 ` [PATCH 2/3] dmaengine: ae4dma: Use the MSI count and its corresponding IRQ number Basavaraj Natikar
2025-02-03 16:25 ` [PATCH 3/3] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality Basavaraj Natikar
2025-02-10 14:18 ` Vinod Koul
2025-02-10 14:26 ` Basavaraj Natikar [this message]
2025-02-10 14:37 ` Basavaraj Natikar
2025-03-10 21:06 ` [PATCH 0/3] Fixes to the AE4DMA Vinod Koul
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c568caf4-c69c-4e41-a794-4756ca8fbd93@amd.com \
--to=bnatikar@amd.com \
--cc=Basavaraj.Natikar@amd.com \
--cc=dmaengine@vger.kernel.org \
--cc=vkoul@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox