[PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ

public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
@ 2025-06-24 20:12 Bart Van Assche
  2025-06-30 20:19 ` Bart Van Assche
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Bart Van Assche @ 2025-06-24 20:12 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: linux-scsi, Bart Van Assche, Can Guo, James E.J. Bottomley,
	Peter Wang, Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen,
	Stanley Jhu, Asutosh Das

ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
may wait up to 20 ms longer than necessary. Hence this patch that reworks
ufshcd_clock_scaling_prepare(). Compile-tested only.

Cc: Can Guo <quic_cang@quicinc.com>
Fixes: 305a357d3595 ("scsi: ufs: core: Introduce multi-circular queue capability")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/ufs/core/ufshcd.c | 162 +++++++++++++++-----------------------
 1 file changed, 64 insertions(+), 98 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index b3fe4335d56c..c8eb5bf65e22 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1248,87 +1248,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
 	return false;
 }
 
-/*
- * Determine the number of pending commands by counting the bits in the SCSI
- * device budget maps. This approach has been selected because a bit is set in
- * the budget map before scsi_host_queue_ready() checks the host_self_blocked
- * flag. The host_self_blocked flag can be modified by calling
- * scsi_block_requests() or scsi_unblock_requests().
- */
-static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
-{
-	const struct scsi_device *sdev;
-	unsigned long flags;
-	u32 pending = 0;
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	__shost_for_each_device(sdev, hba->host)
-		pending += sbitmap_weight(&sdev->budget_map);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-	return pending;
-}
-
-/*
- * Wait until all pending SCSI commands and TMFs have finished or the timeout
- * has expired.
- *
- * Return: 0 upon success; -EBUSY upon timeout.
- */
-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_pending;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_pending = ufshcd_pending_cmds(hba);
-		if (!tm_doorbell && !tr_pending) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		io_schedule_timeout(msecs_to_jiffies(20));
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-	} while (tm_doorbell || tr_pending);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_pending);
-		ret = -EBUSY;
-	}
-out:
-	ufshcd_release(hba);
-	return ret;
-}
-
 /**
  * ufshcd_scale_gear - scale up/down UFS gear
  * @hba: per adapter instance
@@ -1391,36 +1310,86 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up
  * Return: 0 upon success; -EBUSY upon timeout.
  */
 static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us)
+	__cond_acquires(hba->host->scan_mutex)
+	__cond_acquires(hba->wb_mutex)
+	__cond_acquires(hba->clk_scaling_lock)
 {
-	int ret = 0;
+	const unsigned long deadline = jiffies + usecs_to_jiffies(timeout_us);
+	struct Scsi_Host *host = hba->host;
+	struct scsi_device *sdev;
+	long timeout;
+
 	/*
-	 * make sure that there are no outstanding requests when
-	 * clock scaling is in progress
+	 * Hold scan_mutex to prevent that SCSI devices are added or removed
+	 * while this function is in progress.
 	 */
-	mutex_lock(&hba->host->scan_mutex);
-	blk_mq_quiesce_tagset(&hba->host->tag_set);
+	mutex_lock(&host->scan_mutex);
 	mutex_lock(&hba->wb_mutex);
 	down_write(&hba->clk_scaling_lock);
+	/* Call ufshcd_hold() to serialize clock gating and clock scaling. */
+	ufshcd_hold(hba);
 
 	if (!hba->clk_scaling.is_allowed ||
-	    ufshcd_wait_for_doorbell_clr(hba, timeout_us)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		mutex_unlock(&hba->wb_mutex);
-		blk_mq_unquiesce_tagset(&hba->host->tag_set);
-		mutex_unlock(&hba->host->scan_mutex);
+	    hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
 		goto out;
+
+	blk_freeze_queue_start(hba->tmf_queue);
+	shost_for_each_device(sdev, host)
+		blk_freeze_queue_start(sdev->request_queue);
+
+	/*
+	 * Calling synchronize_*rcu_expedited() reduces the wait time from
+	 * milliseconds to less than a microsecond. See also
+	 * https://paulmck.livejournal.com/67547.html.
+	 */
+	if (host->tag_set.flags & BLK_MQ_F_BLOCKING)
+		synchronize_srcu_expedited(host->tag_set.srcu);
+	else
+		synchronize_rcu_expedited();
+
+	timeout = deadline - jiffies;
+	if (timeout <= 0 ||
+	    blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, timeout) <= 0)
+		goto unfreeze;
+	shost_for_each_device(sdev, host) {
+		timeout = deadline - jiffies;
+		if (timeout <= 0 ||
+		    blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+						     timeout) <= 0) {
+			goto unfreeze;
+		}
 	}
 
-	/* let's not get into low power until clock scaling is completed */
-	ufshcd_hold(hba);
+	return 0;
+
+unfreeze:
+	blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
+	shost_for_each_device(sdev, host)
+		blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
+
+	dev_err(hba->dev, "%s timed out\n", __func__);
 
 out:
-	return ret;
+	ufshcd_release(hba);
+	up_write(&hba->clk_scaling_lock);
+	mutex_unlock(&hba->wb_mutex);
+	mutex_unlock(&host->scan_mutex);
+
+	return -EBUSY;
 }
 
 static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
+	__releases(hba->host->scan_mutex)
+	__releases(hba->wb_mutex)
+	__releases(hba->clk_scaling_lock)
 {
+	struct scsi_device *sdev;
+
+	blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
+
+	ufshcd_release(hba);
 	up_write(&hba->clk_scaling_lock);
 
 	/* Enable Write Booster if current gear requires it else disable it */
@@ -1428,10 +1397,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
 		ufshcd_wb_toggle(hba, hba->pwr_info.gear_rx >= hba->clk_scaling.wb_gear);
 
 	mutex_unlock(&hba->wb_mutex);
-
-	blk_mq_unquiesce_tagset(&hba->host->tag_set);
 	mutex_unlock(&hba->host->scan_mutex);
-	ufshcd_release(hba);
 }
 
 /**

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
  2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
@ 2025-06-30 20:19 ` Bart Van Assche
  2025-07-01  4:04 ` Ziqi Chen
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2025-06-30 20:19 UTC (permalink / raw)
  To: linux-scsi
  Cc: Martin K . Petersen, Can Guo, James E.J. Bottomley, Peter Wang,
	Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
	Asutosh Das

On 6/24/25 1:12 PM, Bart Van Assche wrote:
> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
> may wait up to 20 ms longer than necessary. Hence this patch that reworks
> ufshcd_clock_scaling_prepare(). Compile-tested only.
(replying to my own e-mail)

Can someone from Qualcomm please help with reviewing and/or testing this
patch? I'd like to get rid of the ufshcd_wait_for_doorbell_clr()
function before anyone adds more callers to that function.
ufshcd_wait_for_doorbell_clr() supports the legacy doorbell mode but not
MCQ.

Thanks

Bart.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
  2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
  2025-06-30 20:19 ` Bart Van Assche
@ 2025-07-01  4:04 ` Ziqi Chen
  2025-07-03  8:29 ` Ziqi Chen
  2025-07-04 12:04 ` Peter Wang (王信友)
  3 siblings, 0 replies; 6+ messages in thread
From: Ziqi Chen @ 2025-07-01  4:04 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, Can Guo, James E.J. Bottomley, Peter Wang,
	Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
	Asutosh Das, Martin K . Petersen

Hi Bart,

I am reviewing, will test it also and get back to you.

BRs,
Ziqi

On 6/25/2025 4:12 AM, Bart Van Assche wrote:
> On 6/24/25 1:12 PM, Bart Van Assche wrote:
>> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
>> may wait up to 20 ms longer than necessary. Hence this patch that reworks
>> ufshcd_clock_scaling_prepare(). Compile-tested only.
> 
> Can someone from Qualcomm please help with reviewing and/or testing this
> patch? I'd like to get rid of the ufshcd_wait_for_doorbell_clr()
> function before anyone adds more callers to that function.
> ufshcd_wait_for_doorbell_clr() supports the legacy doorbell mode but not
> MCQ.
> 
> Thanks
> 
> Bart.
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
  2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
  2025-06-30 20:19 ` Bart Van Assche
  2025-07-01  4:04 ` Ziqi Chen
@ 2025-07-03  8:29 ` Ziqi Chen
  2025-07-07 19:40   ` Bart Van Assche
  2025-07-04 12:04 ` Peter Wang (王信友)
  3 siblings, 1 reply; 6+ messages in thread
From: Ziqi Chen @ 2025-07-03  8:29 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, Can Guo, James E.J. Bottomley, Peter Wang,
	Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
	Asutosh Das, Martin K . Petersen



On 6/25/2025 4:12 AM, Bart Van Assche wrote:
> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
> may wait up to 20 ms longer than necessary. Hence this patch that reworks
> ufshcd_clock_scaling_prepare(). Compile-tested only.
> 

Hi Bart,

I finished the test on your patch. The patch functional is OK, it can
pass our clock scaling test and I didn't observed errors. But I didn't
run stress test and stability test.

Although patch functional is OK, but I found this patch will increase
the latency of ufshcd_clock_scaling_prepare():

MTP 8550 (upstream kernel):
Original:
  spent: 226302 ns, avg: 2135214 ns, count: 200, total:427042923 ns
with patch:
  spent: 1213333 ns, avg: 4583551 ns, count: 200, total:916710316 ns

MTP 8650 (upstream kernel):
Original:
  spent: 2013386 ns, avg: 1464596 ns, count: 150, total:219689530 ns
with patch:
  spent: 2718802 ns, avg: 4329696 ns, count: 150, total:649454539 ns

MTP8850 (downstream kernel)
Original:
  spent: 144323 ns, avg: 1080332 ns, count: 2005, total:2166066242 ns
with patch:
  spent: 2530208 ns, avg: 1307159 ns, count: 2005, total:2620855033 ns


I think this increament is come from you replaced blk_mq_quiesce_queue()
with blk_freeze_queue(), as my understading , the blk_mq_quiesce_queue()
just only block new IO be dispatched to hardware queue but the
blk_freeze_queue() will freeze whole queue and wait all IOs get
complete.

I am not understand you said "ufshcd_wait_for_doorbell_clr() supports
the legacy doorbell mode but not MCQ". In ufshcd_wait_for_doorbell_clr()
, tr_pending = ufshcd_pending_cmds(hba) is counted from budget_map, not
read from legacy doorbell, it is used to get inflight cmds for MCQ mode.
So I don't know the details of your saying on
"ufshcd_wait_for_doorbell_clr() supports the legacy doorbell mode but
not MCQ".


BRs,
Ziqi

> Cc: Can Guo <quic_cang@quicinc.com>
> Fixes: 305a357d3595 ("scsi: ufs: core: Introduce multi-circular queue capability")
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/ufs/core/ufshcd.c | 162 +++++++++++++++-----------------------
>   1 file changed, 64 insertions(+), 98 deletions(-)
> 
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index b3fe4335d56c..c8eb5bf65e22 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -1248,87 +1248,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>   	return false;
>   }
>   
> -/*
> - * Determine the number of pending commands by counting the bits in the SCSI
> - * device budget maps. This approach has been selected because a bit is set in
> - * the budget map before scsi_host_queue_ready() checks the host_self_blocked
> - * flag. The host_self_blocked flag can be modified by calling
> - * scsi_block_requests() or scsi_unblock_requests().
> - */
> -static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
> -{
> -	const struct scsi_device *sdev;
> -	unsigned long flags;
> -	u32 pending = 0;
> -
> -	spin_lock_irqsave(hba->host->host_lock, flags);
> -	__shost_for_each_device(sdev, hba->host)
> -		pending += sbitmap_weight(&sdev->budget_map);
> -	spin_unlock_irqrestore(hba->host->host_lock, flags);
> -
> -	return pending;
> -}
> -
> -/*
> - * Wait until all pending SCSI commands and TMFs have finished or the timeout
> - * has expired.
> - *
> - * Return: 0 upon success; -EBUSY upon timeout.
> - */
> -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -					u64 wait_timeout_us)
> -{
> -	int ret = 0;
> -	u32 tm_doorbell;
> -	u32 tr_pending;
> -	bool timeout = false, do_last_check = false;
> -	ktime_t start;
> -
> -	ufshcd_hold(hba);
> -	/*
> -	 * Wait for all the outstanding tasks/transfer requests.
> -	 * Verify by checking the doorbell registers are clear.
> -	 */
> -	start = ktime_get();
> -	do {
> -		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> -			ret = -EBUSY;
> -			goto out;
> -		}
> -
> -		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -		tr_pending = ufshcd_pending_cmds(hba);
> -		if (!tm_doorbell && !tr_pending) {
> -			timeout = false;
> -			break;
> -		} else if (do_last_check) {
> -			break;
> -		}
> -
> -		io_schedule_timeout(msecs_to_jiffies(20));
> -		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> -		    wait_timeout_us) {
> -			timeout = true;
> -			/*
> -			 * We might have scheduled out for long time so make
> -			 * sure to check if doorbells are cleared by this time
> -			 * or not.
> -			 */
> -			do_last_check = true;
> -		}
> -	} while (tm_doorbell || tr_pending);
> -
> -	if (timeout) {
> -		dev_err(hba->dev,
> -			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -			__func__, tm_doorbell, tr_pending);
> -		ret = -EBUSY;
> -	}
> -out:
> -	ufshcd_release(hba);
> -	return ret;
> -}
> -
>   /**
>    * ufshcd_scale_gear - scale up/down UFS gear
>    * @hba: per adapter instance
> @@ -1391,36 +1310,86 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up
>    * Return: 0 upon success; -EBUSY upon timeout.
>    */
>   static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us)
> +	__cond_acquires(hba->host->scan_mutex)
> +	__cond_acquires(hba->wb_mutex)
> +	__cond_acquires(hba->clk_scaling_lock)
>   {
> -	int ret = 0;
> +	const unsigned long deadline = jiffies + usecs_to_jiffies(timeout_us);
> +	struct Scsi_Host *host = hba->host;
> +	struct scsi_device *sdev;
> +	long timeout;
> +
>   	/*
> -	 * make sure that there are no outstanding requests when
> -	 * clock scaling is in progress
> +	 * Hold scan_mutex to prevent that SCSI devices are added or removed
> +	 * while this function is in progress.
>   	 */
> -	mutex_lock(&hba->host->scan_mutex);
> -	blk_mq_quiesce_tagset(&hba->host->tag_set);
> +	mutex_lock(&host->scan_mutex);
>   	mutex_lock(&hba->wb_mutex);
>   	down_write(&hba->clk_scaling_lock);
> +	/* Call ufshcd_hold() to serialize clock gating and clock scaling. */
> +	ufshcd_hold(hba);
>   
>   	if (!hba->clk_scaling.is_allowed ||
> -	    ufshcd_wait_for_doorbell_clr(hba, timeout_us)) {
> -		ret = -EBUSY;
> -		up_write(&hba->clk_scaling_lock);
> -		mutex_unlock(&hba->wb_mutex);
> -		blk_mq_unquiesce_tagset(&hba->host->tag_set);
> -		mutex_unlock(&hba->host->scan_mutex);
> +	    hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
>   		goto out;
> +
> +	blk_freeze_queue_start(hba->tmf_queue);
> +	shost_for_each_device(sdev, host)
> +		blk_freeze_queue_start(sdev->request_queue);
> +
> +	/*
> +	 * Calling synchronize_*rcu_expedited() reduces the wait time from
> +	 * milliseconds to less than a microsecond. See also
> +	 * https://paulmck.livejournal.com/67547.html.
> +	 */
> +	if (host->tag_set.flags & BLK_MQ_F_BLOCKING)
> +		synchronize_srcu_expedited(host->tag_set.srcu);
> +	else
> +		synchronize_rcu_expedited();
> +
> +	timeout = deadline - jiffies;
> +	if (timeout <= 0 ||
> +	    blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, timeout) <= 0)
> +		goto unfreeze;
> +	shost_for_each_device(sdev, host) {
> +		timeout = deadline - jiffies;
> +		if (timeout <= 0 ||
> +		    blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
> +						     timeout) <= 0) {
> +			goto unfreeze;
> +		}
>   	}
>   
> -	/* let's not get into low power until clock scaling is completed */
> -	ufshcd_hold(hba);
> +	return 0;
> +
> +unfreeze:
> +	blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
> +	shost_for_each_device(sdev, host)
> +		blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
> +
> +	dev_err(hba->dev, "%s timed out\n", __func__);
>   
>   out:
> -	return ret;
> +	ufshcd_release(hba);
> +	up_write(&hba->clk_scaling_lock);
> +	mutex_unlock(&hba->wb_mutex);
> +	mutex_unlock(&host->scan_mutex);
> +
> +	return -EBUSY;
>   }
>   
>   static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
> +	__releases(hba->host->scan_mutex)
> +	__releases(hba->wb_mutex)
> +	__releases(hba->clk_scaling_lock)
>   {
> +	struct scsi_device *sdev;
> +
> +	blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
> +
> +	ufshcd_release(hba);
>   	up_write(&hba->clk_scaling_lock);
>   
>   	/* Enable Write Booster if current gear requires it else disable it */
> @@ -1428,10 +1397,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
>   		ufshcd_wb_toggle(hba, hba->pwr_info.gear_rx >= hba->clk_scaling.wb_gear);
>   
>   	mutex_unlock(&hba->wb_mutex);
> -
> -	blk_mq_unquiesce_tagset(&hba->host->tag_set);
>   	mutex_unlock(&hba->host->scan_mutex);
> -	ufshcd_release(hba);
>   }
>   
>   /**
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
  2025-07-03  8:29 ` Ziqi Chen
@ 2025-07-07 19:40   ` Bart Van Assche
  0 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2025-07-07 19:40 UTC (permalink / raw)
  To: Ziqi Chen
  Cc: linux-scsi, Can Guo, James E.J. Bottomley, Peter Wang,
	Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
	Asutosh Das, Martin K . Petersen

On 7/3/25 1:29 AM, Ziqi Chen wrote:
> Although patch functional is OK, but I found this patch will increase
> the latency of ufshcd_clock_scaling_prepare():
> 
> MTP 8550 (upstream kernel):
> Original:
>   spent: 226302 ns, avg: 2135214 ns, count: 200, total:427042923 ns
> with patch:
>   spent: 1213333 ns, avg: 4583551 ns, count: 200, total:916710316 ns
> 
> MTP 8650 (upstream kernel):
> Original:
>   spent: 2013386 ns, avg: 1464596 ns, count: 150, total:219689530 ns
> with patch:
>   spent: 2718802 ns, avg: 4329696 ns, count: 150, total:649454539 ns
> 
> MTP8850 (downstream kernel)
> Original:
>   spent: 144323 ns, avg: 1080332 ns, count: 2005, total:2166066242 ns
> with patch:
>   spent: 2530208 ns, avg: 1307159 ns, count: 2005, total:2620855033 ns

That's unfortunate ...

> I think this increament is come from you replaced blk_mq_quiesce_queue()
> with blk_freeze_queue(), as my understading , the blk_mq_quiesce_queue()
> just only block new IO be dispatched to hardware queue but the
> blk_freeze_queue() will freeze whole queue and wait all IOs get
> complete.

Hmm ... both blk_freeze_queue() and the loop that calls
ufshcd_pending_cmds() should wait for all pending commands to finish. So
the latency increase probably comes from the synchronize_rcu_expedited()
call.

> I am not understand you said "ufshcd_wait_for_doorbell_clr() supports
> the legacy doorbell mode but not MCQ". In ufshcd_wait_for_doorbell_clr()
> , tr_pending = ufshcd_pending_cmds(hba) is counted from budget_map, not
> read from legacy doorbell, it is used to get inflight cmds for MCQ mode.

That was a misunderstanding from my side. Since the current code already
supports MCQ and my patch doesn't improve the clock scaling latency,
let's drop this patch.

Thanks,

Bart.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
  2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
                   ` (2 preceding siblings ...)
  2025-07-03  8:29 ` Ziqi Chen
@ 2025-07-04 12:04 ` Peter Wang (王信友)
  3 siblings, 0 replies; 6+ messages in thread
From: Peter Wang (王信友) @ 2025-07-04 12:04 UTC (permalink / raw)
  To: bvanassche@acm.org, martin.petersen@oracle.com
  Cc: quic_asutoshd@quicinc.com, quic_cang@quicinc.com,
	quic_nguyenb@quicinc.com, linux-scsi@vger.kernel.org,
	chu.stanley@gmail.com, avri.altman@sandisk.com,
	James.Bottomley@HansenPartnership.com, mani@kernel.org

On Tue, 2025-06-24 at 13:12 -0700, Bart Van Assche wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode
> and
> may wait up to 20 ms longer than necessary. Hence this patch that
> reworks
> ufshcd_clock_scaling_prepare(). Compile-tested only.
> 


Hi Bart,


In the past two years, Mediatek has been using the original method, 
MCQ mode + clock scaling, without any issues. So I'm also very 
curious too, what problems did you encounter under MCQ mode?

Thanks
Peter


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-07-07 19:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
2025-06-30 20:19 ` Bart Van Assche
2025-07-01  4:04 ` Ziqi Chen
2025-07-03  8:29 ` Ziqi Chen
2025-07-07 19:40   ` Bart Van Assche
2025-07-04 12:04 ` Peter Wang (王信友)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox