DMA Engine development
 help / color / mirror / Atom feed
From: Basavaraj Natikar <bnatikar@amd.com>
To: Vinod Koul <vkoul@kernel.org>,
	Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Cc: dmaengine@vger.kernel.org
Subject: Re: [PATCH 3/3] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality
Date: Mon, 10 Feb 2025 19:56:12 +0530	[thread overview]
Message-ID: <c568caf4-c69c-4e41-a794-4756ca8fbd93@amd.com> (raw)
In-Reply-To: <Z6oKqKncVc9AL2Tb@vaman>



On 2/10/2025 7:48 PM, Vinod Koul wrote:
> On 03-02-25, 21:55, Basavaraj Natikar wrote:
>> As AE4DMA offers multi-channel functionality compared to PTDMA’s single
>> queue, utilize multi-queue, which supports higher speeds than PTDMA, to
>> achieve higher performance using the AE4DMA workqueue based mechanism.
>>
>> Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version")
> Why is this a fix, again!

Yes, as AE4DMA is much faster with multi-queue. However, sometimes due to multi-queue
processing, it takes longer and introduces a timeout for the synchronization of
consumers and producers, which helps avoid long waits that could eventually cause a hang.

Thanks,
--
Basavaraj

>
>> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> ---
>>   drivers/dma/amd/ae4dma/ae4dma.h         |  2 +
>>   drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++-
>>   2 files changed, 89 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
>> index 265c5d436008..57f6048726bb 100644
>> --- a/drivers/dma/amd/ae4dma/ae4dma.h
>> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
>> @@ -37,6 +37,8 @@
>>   #define AE4_DMA_VERSION			4
>>   #define CMD_AE4_DESC_DW0_VAL		2
>>   
>> +#define AE4_TIME_OUT			5000
>> +
>>   struct ae4_msix {
>>   	int msix_count;
>>   	struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
>> diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
>> index 35c84ec9608b..715ac3ae067b 100644
>> --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
>> +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
>> @@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>>   {
>>   	struct dma_async_tx_descriptor *tx_desc;
>>   	struct virt_dma_desc *vd;
>> +	struct pt_device *pt;
>>   	unsigned long flags;
>>   
>> +	pt = chan->pt;
>>   	/* Loop over descriptors until one is found with commands */
>>   	do {
>>   		if (desc) {
>> @@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>>   
>>   		spin_lock_irqsave(&chan->vc.lock, flags);
>>   
>> -		if (desc) {
>> +		if (pt->ver != AE4_DMA_VERSION && desc) {
>>   			if (desc->status != DMA_COMPLETE) {
>>   				if (desc->status != DMA_ERROR)
>>   					desc->status = DMA_COMPLETE;
>> @@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>>   
>>   		spin_unlock_irqrestore(&chan->vc.lock, flags);
>>   
>> -		if (tx_desc) {
>> +		if (pt->ver != AE4_DMA_VERSION && tx_desc) {
> Why should this handling be different for DMA_VERSION?
>
>>   			dmaengine_desc_get_callback_invoke(tx_desc, NULL);
>>   			dma_run_dependencies(tx_desc);
>>   			vchan_vdesc_fini(vd);
>> @@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>>   	return NULL;
>>   }
>>   
>> +static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
>> +{
>> +	u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
>> +	u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
>> +
>> +	if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN)  >= (MAX_CMD_QLEN - 1))
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>>   static void pt_cmd_callback(void *data, int err)
>>   {
>>   	struct pt_dma_desc *desc = data;
>> +	struct ae4_cmd_queue *ae4cmd_q;
>>   	struct dma_chan *dma_chan;
>>   	struct pt_dma_chan *chan;
>> +	struct ae4_device *ae4;
>> +	struct pt_device *pt;
>>   	int ret;
>>   
>>   	if (err == -EINPROGRESS)
>> @@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
>>   
>>   	dma_chan = desc->vd.tx.chan;
>>   	chan = to_pt_chan(dma_chan);
>> +	pt = chan->pt;
>>   
>>   	if (err)
>>   		desc->status = DMA_ERROR;
>>   
>>   	while (true) {
>> +		if (pt->ver == AE4_DMA_VERSION) {
>> +			ae4 = container_of(pt, struct ae4_device, pt);
>> +			ae4cmd_q = &ae4->ae4cmd_q[chan->id];
>> +
>> +			if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
>> +			    ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
>> +				wake_up(&ae4cmd_q->q_w);
>> +
>> +				if (wait_for_completion_timeout(&ae4cmd_q->cmp,
>> +								msecs_to_jiffies(AE4_TIME_OUT))
>> +								== 0) {
>> +					dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
>> +					break;
>> +				}
>> +
>> +				reinit_completion(&ae4cmd_q->cmp);
>> +				continue;
>> +			}
>> +		}
>> +
>>   		/* Check for DMA descriptor completion */
>>   		desc = pt_handle_active_desc(chan, desc);
>>   
>> @@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
>>   	return desc;
>>   }
>>   
>> +static void pt_cmd_callback_work(void *data, int err)
>> +{
>> +	struct dma_async_tx_descriptor *tx_desc;
>> +	struct pt_dma_desc *desc = data;
>> +	struct dma_chan *dma_chan;
>> +	struct virt_dma_desc *vd;
>> +	struct pt_dma_chan *chan;
>> +	unsigned long flags;
>> +
>> +	dma_chan = desc->vd.tx.chan;
>> +	chan = to_pt_chan(dma_chan);
>> +
>> +	if (err == -EINPROGRESS)
>> +		return;
>> +
>> +	tx_desc = &desc->vd.tx;
>> +	vd = &desc->vd;
>> +
>> +	if (err)
>> +		desc->status = DMA_ERROR;
>> +
>> +	spin_lock_irqsave(&chan->vc.lock, flags);
>> +	if (desc) {
>> +		if (desc->status != DMA_COMPLETE) {
>> +			if (desc->status != DMA_ERROR)
>> +				desc->status = DMA_COMPLETE;
>> +
>> +			dma_cookie_complete(tx_desc);
>> +			dma_descriptor_unmap(tx_desc);
>> +		} else {
>> +			tx_desc = NULL;
>> +		}
>> +	}
>> +	spin_unlock_irqrestore(&chan->vc.lock, flags);
>> +
>> +	if (tx_desc) {
>> +		dmaengine_desc_get_callback_invoke(tx_desc, NULL);
>> +		dma_run_dependencies(tx_desc);
>> +		list_del(&desc->vd.node);
>> +		vchan_vdesc_fini(vd);
>> +	}
>> +}
> Why do we have callback in driver...?
>
>> +
>>   static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
>>   					  dma_addr_t dst,
>>   					  dma_addr_t src,
>> @@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
>>   	desc->len = len;
>>   
>>   	if (pt->ver == AE4_DMA_VERSION) {
>> +		pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
>>   		ae4 = container_of(pt, struct ae4_device, pt);
>>   		ae4cmd_q = &ae4->ae4cmd_q[chan->id];
>>   		mutex_lock(&ae4cmd_q->cmd_lock);
>> @@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
>>   {
>>   	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
>>   	struct pt_dma_desc *desc;
>> +	struct pt_device *pt;
>>   	unsigned long flags;
>>   	bool engine_is_idle = true;
>>   
>> +	pt = chan->pt;
>> +
>>   	spin_lock_irqsave(&chan->vc.lock, flags);
>>   
>>   	desc = pt_next_dma_desc(chan);
>> -	if (desc)
>> +	if (desc && pt->ver != AE4_DMA_VERSION)
>>   		engine_is_idle = false;
>>   
>>   	vchan_issue_pending(&chan->vc);
>> -- 
>> 2.25.1


  reply	other threads:[~2025-02-10 14:26 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-03 16:25 [PATCH 0/3] Fixes to the AE4DMA Basavaraj Natikar
2025-02-03 16:25 ` [PATCH 1/3] dmaengine: ae4dma: Remove deprecated PCI IDs Basavaraj Natikar
2025-02-10 13:59   ` Vinod Koul
2025-02-10 14:18     ` Basavaraj Natikar
2025-02-03 16:25 ` [PATCH 2/3] dmaengine: ae4dma: Use the MSI count and its corresponding IRQ number Basavaraj Natikar
2025-02-03 16:25 ` [PATCH 3/3] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality Basavaraj Natikar
2025-02-10 14:18   ` Vinod Koul
2025-02-10 14:26     ` Basavaraj Natikar [this message]
2025-02-10 14:37       ` Basavaraj Natikar
2025-03-10 21:06 ` [PATCH 0/3] Fixes to the AE4DMA Vinod Koul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c568caf4-c69c-4e41-a794-4756ca8fbd93@amd.com \
    --to=bnatikar@amd.com \
    --cc=Basavaraj.Natikar@amd.com \
    --cc=dmaengine@vger.kernel.org \
    --cc=vkoul@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox