* [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
@ 2025-06-24 20:12 Bart Van Assche
2025-06-30 20:19 ` Bart Van Assche
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Bart Van Assche @ 2025-06-24 20:12 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, Bart Van Assche, Can Guo, James E.J. Bottomley,
Peter Wang, Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen,
Stanley Jhu, Asutosh Das
ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
may wait up to 20 ms longer than necessary. Hence this patch that reworks
ufshcd_clock_scaling_prepare(). Compile-tested only.
Cc: Can Guo <quic_cang@quicinc.com>
Fixes: 305a357d3595 ("scsi: ufs: core: Introduce multi-circular queue capability")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/ufs/core/ufshcd.c | 162 +++++++++++++++-----------------------
1 file changed, 64 insertions(+), 98 deletions(-)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index b3fe4335d56c..c8eb5bf65e22 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1248,87 +1248,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
return false;
}
-/*
- * Determine the number of pending commands by counting the bits in the SCSI
- * device budget maps. This approach has been selected because a bit is set in
- * the budget map before scsi_host_queue_ready() checks the host_self_blocked
- * flag. The host_self_blocked flag can be modified by calling
- * scsi_block_requests() or scsi_unblock_requests().
- */
-static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
-{
- const struct scsi_device *sdev;
- unsigned long flags;
- u32 pending = 0;
-
- spin_lock_irqsave(hba->host->host_lock, flags);
- __shost_for_each_device(sdev, hba->host)
- pending += sbitmap_weight(&sdev->budget_map);
- spin_unlock_irqrestore(hba->host->host_lock, flags);
-
- return pending;
-}
-
-/*
- * Wait until all pending SCSI commands and TMFs have finished or the timeout
- * has expired.
- *
- * Return: 0 upon success; -EBUSY upon timeout.
- */
-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
- u64 wait_timeout_us)
-{
- int ret = 0;
- u32 tm_doorbell;
- u32 tr_pending;
- bool timeout = false, do_last_check = false;
- ktime_t start;
-
- ufshcd_hold(hba);
- /*
- * Wait for all the outstanding tasks/transfer requests.
- * Verify by checking the doorbell registers are clear.
- */
- start = ktime_get();
- do {
- if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
- ret = -EBUSY;
- goto out;
- }
-
- tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
- tr_pending = ufshcd_pending_cmds(hba);
- if (!tm_doorbell && !tr_pending) {
- timeout = false;
- break;
- } else if (do_last_check) {
- break;
- }
-
- io_schedule_timeout(msecs_to_jiffies(20));
- if (ktime_to_us(ktime_sub(ktime_get(), start)) >
- wait_timeout_us) {
- timeout = true;
- /*
- * We might have scheduled out for long time so make
- * sure to check if doorbells are cleared by this time
- * or not.
- */
- do_last_check = true;
- }
- } while (tm_doorbell || tr_pending);
-
- if (timeout) {
- dev_err(hba->dev,
- "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
- __func__, tm_doorbell, tr_pending);
- ret = -EBUSY;
- }
-out:
- ufshcd_release(hba);
- return ret;
-}
-
/**
* ufshcd_scale_gear - scale up/down UFS gear
* @hba: per adapter instance
@@ -1391,36 +1310,86 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up
* Return: 0 upon success; -EBUSY upon timeout.
*/
static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us)
+ __cond_acquires(hba->host->scan_mutex)
+ __cond_acquires(hba->wb_mutex)
+ __cond_acquires(hba->clk_scaling_lock)
{
- int ret = 0;
+ const unsigned long deadline = jiffies + usecs_to_jiffies(timeout_us);
+ struct Scsi_Host *host = hba->host;
+ struct scsi_device *sdev;
+ long timeout;
+
/*
- * make sure that there are no outstanding requests when
- * clock scaling is in progress
+ * Hold scan_mutex to prevent that SCSI devices are added or removed
+ * while this function is in progress.
*/
- mutex_lock(&hba->host->scan_mutex);
- blk_mq_quiesce_tagset(&hba->host->tag_set);
+ mutex_lock(&host->scan_mutex);
mutex_lock(&hba->wb_mutex);
down_write(&hba->clk_scaling_lock);
+ /* Call ufshcd_hold() to serialize clock gating and clock scaling. */
+ ufshcd_hold(hba);
if (!hba->clk_scaling.is_allowed ||
- ufshcd_wait_for_doorbell_clr(hba, timeout_us)) {
- ret = -EBUSY;
- up_write(&hba->clk_scaling_lock);
- mutex_unlock(&hba->wb_mutex);
- blk_mq_unquiesce_tagset(&hba->host->tag_set);
- mutex_unlock(&hba->host->scan_mutex);
+ hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
goto out;
+
+ blk_freeze_queue_start(hba->tmf_queue);
+ shost_for_each_device(sdev, host)
+ blk_freeze_queue_start(sdev->request_queue);
+
+ /*
+ * Calling synchronize_*rcu_expedited() reduces the wait time from
+ * milliseconds to less than a microsecond. See also
+ * https://paulmck.livejournal.com/67547.html.
+ */
+ if (host->tag_set.flags & BLK_MQ_F_BLOCKING)
+ synchronize_srcu_expedited(host->tag_set.srcu);
+ else
+ synchronize_rcu_expedited();
+
+ timeout = deadline - jiffies;
+ if (timeout <= 0 ||
+ blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, timeout) <= 0)
+ goto unfreeze;
+ shost_for_each_device(sdev, host) {
+ timeout = deadline - jiffies;
+ if (timeout <= 0 ||
+ blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+ timeout) <= 0) {
+ goto unfreeze;
+ }
}
- /* let's not get into low power until clock scaling is completed */
- ufshcd_hold(hba);
+ return 0;
+
+unfreeze:
+ blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
+ shost_for_each_device(sdev, host)
+ blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
+
+ dev_err(hba->dev, "%s timed out\n", __func__);
out:
- return ret;
+ ufshcd_release(hba);
+ up_write(&hba->clk_scaling_lock);
+ mutex_unlock(&hba->wb_mutex);
+ mutex_unlock(&host->scan_mutex);
+
+ return -EBUSY;
}
static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
+ __releases(hba->host->scan_mutex)
+ __releases(hba->wb_mutex)
+ __releases(hba->clk_scaling_lock)
{
+ struct scsi_device *sdev;
+
+ blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
+ shost_for_each_device(sdev, hba->host)
+ blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
+
+ ufshcd_release(hba);
up_write(&hba->clk_scaling_lock);
/* Enable Write Booster if current gear requires it else disable it */
@@ -1428,10 +1397,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
ufshcd_wb_toggle(hba, hba->pwr_info.gear_rx >= hba->clk_scaling.wb_gear);
mutex_unlock(&hba->wb_mutex);
-
- blk_mq_unquiesce_tagset(&hba->host->tag_set);
mutex_unlock(&hba->host->scan_mutex);
- ufshcd_release(hba);
}
/**
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
@ 2025-06-30 20:19 ` Bart Van Assche
2025-07-01 4:04 ` Ziqi Chen
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2025-06-30 20:19 UTC (permalink / raw)
To: linux-scsi
Cc: Martin K . Petersen, Can Guo, James E.J. Bottomley, Peter Wang,
Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
Asutosh Das
On 6/24/25 1:12 PM, Bart Van Assche wrote:
> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
> may wait up to 20 ms longer than necessary. Hence this patch that reworks
> ufshcd_clock_scaling_prepare(). Compile-tested only.
(replying to my own e-mail)
Can someone from Qualcomm please help with reviewing and/or testing this
patch? I'd like to get rid of the ufshcd_wait_for_doorbell_clr()
function before anyone adds more callers to that function.
ufshcd_wait_for_doorbell_clr() supports the legacy doorbell mode but not
MCQ.
Thanks
Bart.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
2025-06-30 20:19 ` Bart Van Assche
@ 2025-07-01 4:04 ` Ziqi Chen
2025-07-03 8:29 ` Ziqi Chen
2025-07-04 12:04 ` Peter Wang (王信友)
3 siblings, 0 replies; 6+ messages in thread
From: Ziqi Chen @ 2025-07-01 4:04 UTC (permalink / raw)
To: Bart Van Assche
Cc: linux-scsi, Can Guo, James E.J. Bottomley, Peter Wang,
Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
Asutosh Das, Martin K . Petersen
Hi Bart,
I am reviewing, will test it also and get back to you.
BRs,
Ziqi
On 6/25/2025 4:12 AM, Bart Van Assche wrote:
> On 6/24/25 1:12 PM, Bart Van Assche wrote:
>> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
>> may wait up to 20 ms longer than necessary. Hence this patch that reworks
>> ufshcd_clock_scaling_prepare(). Compile-tested only.
>
> Can someone from Qualcomm please help with reviewing and/or testing this
> patch? I'd like to get rid of the ufshcd_wait_for_doorbell_clr()
> function before anyone adds more callers to that function.
> ufshcd_wait_for_doorbell_clr() supports the legacy doorbell mode but not
> MCQ.
>
> Thanks
>
> Bart.
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
2025-06-30 20:19 ` Bart Van Assche
2025-07-01 4:04 ` Ziqi Chen
@ 2025-07-03 8:29 ` Ziqi Chen
2025-07-07 19:40 ` Bart Van Assche
2025-07-04 12:04 ` Peter Wang (王信友)
3 siblings, 1 reply; 6+ messages in thread
From: Ziqi Chen @ 2025-07-03 8:29 UTC (permalink / raw)
To: Bart Van Assche
Cc: linux-scsi, Can Guo, James E.J. Bottomley, Peter Wang,
Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
Asutosh Das, Martin K . Petersen
On 6/25/2025 4:12 AM, Bart Van Assche wrote:
> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode and
> may wait up to 20 ms longer than necessary. Hence this patch that reworks
> ufshcd_clock_scaling_prepare(). Compile-tested only.
>
Hi Bart,
I finished the test on your patch. The patch functional is OK, it can
pass our clock scaling test and I didn't observed errors. But I didn't
run stress test and stability test.
Although patch functional is OK, but I found this patch will increase
the latency of ufshcd_clock_scaling_prepare():
MTP 8550 (upstream kernel):
Original:
spent: 226302 ns, avg: 2135214 ns, count: 200, total:427042923 ns
with patch:
spent: 1213333 ns, avg: 4583551 ns, count: 200, total:916710316 ns
MTP 8650 (upstream kernel):
Original:
spent: 2013386 ns, avg: 1464596 ns, count: 150, total:219689530 ns
with patch:
spent: 2718802 ns, avg: 4329696 ns, count: 150, total:649454539 ns
MTP8850 (downstream kernel)
Original:
spent: 144323 ns, avg: 1080332 ns, count: 2005, total:2166066242 ns
with patch:
spent: 2530208 ns, avg: 1307159 ns, count: 2005, total:2620855033 ns
I think this increament is come from you replaced blk_mq_quiesce_queue()
with blk_freeze_queue(), as my understading , the blk_mq_quiesce_queue()
just only block new IO be dispatched to hardware queue but the
blk_freeze_queue() will freeze whole queue and wait all IOs get
complete.
I am not understand you said "ufshcd_wait_for_doorbell_clr() supports
the legacy doorbell mode but not MCQ". In ufshcd_wait_for_doorbell_clr()
, tr_pending = ufshcd_pending_cmds(hba) is counted from budget_map, not
read from legacy doorbell, it is used to get inflight cmds for MCQ mode.
So I don't know the details of your saying on
"ufshcd_wait_for_doorbell_clr() supports the legacy doorbell mode but
not MCQ".
BRs,
Ziqi
> Cc: Can Guo <quic_cang@quicinc.com>
> Fixes: 305a357d3595 ("scsi: ufs: core: Introduce multi-circular queue capability")
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
> drivers/ufs/core/ufshcd.c | 162 +++++++++++++++-----------------------
> 1 file changed, 64 insertions(+), 98 deletions(-)
>
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index b3fe4335d56c..c8eb5bf65e22 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -1248,87 +1248,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
> return false;
> }
>
> -/*
> - * Determine the number of pending commands by counting the bits in the SCSI
> - * device budget maps. This approach has been selected because a bit is set in
> - * the budget map before scsi_host_queue_ready() checks the host_self_blocked
> - * flag. The host_self_blocked flag can be modified by calling
> - * scsi_block_requests() or scsi_unblock_requests().
> - */
> -static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
> -{
> - const struct scsi_device *sdev;
> - unsigned long flags;
> - u32 pending = 0;
> -
> - spin_lock_irqsave(hba->host->host_lock, flags);
> - __shost_for_each_device(sdev, hba->host)
> - pending += sbitmap_weight(&sdev->budget_map);
> - spin_unlock_irqrestore(hba->host->host_lock, flags);
> -
> - return pending;
> -}
> -
> -/*
> - * Wait until all pending SCSI commands and TMFs have finished or the timeout
> - * has expired.
> - *
> - * Return: 0 upon success; -EBUSY upon timeout.
> - */
> -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> - u64 wait_timeout_us)
> -{
> - int ret = 0;
> - u32 tm_doorbell;
> - u32 tr_pending;
> - bool timeout = false, do_last_check = false;
> - ktime_t start;
> -
> - ufshcd_hold(hba);
> - /*
> - * Wait for all the outstanding tasks/transfer requests.
> - * Verify by checking the doorbell registers are clear.
> - */
> - start = ktime_get();
> - do {
> - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> - ret = -EBUSY;
> - goto out;
> - }
> -
> - tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> - tr_pending = ufshcd_pending_cmds(hba);
> - if (!tm_doorbell && !tr_pending) {
> - timeout = false;
> - break;
> - } else if (do_last_check) {
> - break;
> - }
> -
> - io_schedule_timeout(msecs_to_jiffies(20));
> - if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> - wait_timeout_us) {
> - timeout = true;
> - /*
> - * We might have scheduled out for long time so make
> - * sure to check if doorbells are cleared by this time
> - * or not.
> - */
> - do_last_check = true;
> - }
> - } while (tm_doorbell || tr_pending);
> -
> - if (timeout) {
> - dev_err(hba->dev,
> - "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> - __func__, tm_doorbell, tr_pending);
> - ret = -EBUSY;
> - }
> -out:
> - ufshcd_release(hba);
> - return ret;
> -}
> -
> /**
> * ufshcd_scale_gear - scale up/down UFS gear
> * @hba: per adapter instance
> @@ -1391,36 +1310,86 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up
> * Return: 0 upon success; -EBUSY upon timeout.
> */
> static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us)
> + __cond_acquires(hba->host->scan_mutex)
> + __cond_acquires(hba->wb_mutex)
> + __cond_acquires(hba->clk_scaling_lock)
> {
> - int ret = 0;
> + const unsigned long deadline = jiffies + usecs_to_jiffies(timeout_us);
> + struct Scsi_Host *host = hba->host;
> + struct scsi_device *sdev;
> + long timeout;
> +
> /*
> - * make sure that there are no outstanding requests when
> - * clock scaling is in progress
> + * Hold scan_mutex to prevent that SCSI devices are added or removed
> + * while this function is in progress.
> */
> - mutex_lock(&hba->host->scan_mutex);
> - blk_mq_quiesce_tagset(&hba->host->tag_set);
> + mutex_lock(&host->scan_mutex);
> mutex_lock(&hba->wb_mutex);
> down_write(&hba->clk_scaling_lock);
> + /* Call ufshcd_hold() to serialize clock gating and clock scaling. */
> + ufshcd_hold(hba);
>
> if (!hba->clk_scaling.is_allowed ||
> - ufshcd_wait_for_doorbell_clr(hba, timeout_us)) {
> - ret = -EBUSY;
> - up_write(&hba->clk_scaling_lock);
> - mutex_unlock(&hba->wb_mutex);
> - blk_mq_unquiesce_tagset(&hba->host->tag_set);
> - mutex_unlock(&hba->host->scan_mutex);
> + hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
> goto out;
> +
> + blk_freeze_queue_start(hba->tmf_queue);
> + shost_for_each_device(sdev, host)
> + blk_freeze_queue_start(sdev->request_queue);
> +
> + /*
> + * Calling synchronize_*rcu_expedited() reduces the wait time from
> + * milliseconds to less than a microsecond. See also
> + * https://paulmck.livejournal.com/67547.html.
> + */
> + if (host->tag_set.flags & BLK_MQ_F_BLOCKING)
> + synchronize_srcu_expedited(host->tag_set.srcu);
> + else
> + synchronize_rcu_expedited();
> +
> + timeout = deadline - jiffies;
> + if (timeout <= 0 ||
> + blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, timeout) <= 0)
> + goto unfreeze;
> + shost_for_each_device(sdev, host) {
> + timeout = deadline - jiffies;
> + if (timeout <= 0 ||
> + blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
> + timeout) <= 0) {
> + goto unfreeze;
> + }
> }
>
> - /* let's not get into low power until clock scaling is completed */
> - ufshcd_hold(hba);
> + return 0;
> +
> +unfreeze:
> + blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
> + shost_for_each_device(sdev, host)
> + blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
> +
> + dev_err(hba->dev, "%s timed out\n", __func__);
>
> out:
> - return ret;
> + ufshcd_release(hba);
> + up_write(&hba->clk_scaling_lock);
> + mutex_unlock(&hba->wb_mutex);
> + mutex_unlock(&host->scan_mutex);
> +
> + return -EBUSY;
> }
>
> static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
> + __releases(hba->host->scan_mutex)
> + __releases(hba->wb_mutex)
> + __releases(hba->clk_scaling_lock)
> {
> + struct scsi_device *sdev;
> +
> + blk_mq_unfreeze_queue_nomemrestore(hba->tmf_queue);
> + shost_for_each_device(sdev, hba->host)
> + blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
> +
> + ufshcd_release(hba);
> up_write(&hba->clk_scaling_lock);
>
> /* Enable Write Booster if current gear requires it else disable it */
> @@ -1428,10 +1397,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err)
> ufshcd_wb_toggle(hba, hba->pwr_info.gear_rx >= hba->clk_scaling.wb_gear);
>
> mutex_unlock(&hba->wb_mutex);
> -
> - blk_mq_unquiesce_tagset(&hba->host->tag_set);
> mutex_unlock(&hba->host->scan_mutex);
> - ufshcd_release(hba);
> }
>
> /**
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
` (2 preceding siblings ...)
2025-07-03 8:29 ` Ziqi Chen
@ 2025-07-04 12:04 ` Peter Wang (王信友)
3 siblings, 0 replies; 6+ messages in thread
From: Peter Wang (王信友) @ 2025-07-04 12:04 UTC (permalink / raw)
To: bvanassche@acm.org, martin.petersen@oracle.com
Cc: quic_asutoshd@quicinc.com, quic_cang@quicinc.com,
quic_nguyenb@quicinc.com, linux-scsi@vger.kernel.org,
chu.stanley@gmail.com, avri.altman@sandisk.com,
James.Bottomley@HansenPartnership.com, mani@kernel.org
On Tue, 2025-06-24 at 13:12 -0700, Bart Van Assche wrote:
>
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>
>
> ufshcd_clock_scaling_prepare() only supports the lecagy doorbell mode
> and
> may wait up to 20 ms longer than necessary. Hence this patch that
> reworks
> ufshcd_clock_scaling_prepare(). Compile-tested only.
>
Hi Bart,
In the past two years, Mediatek has been using the original method,
MCQ mode + clock scaling, without any issues. So I'm also very
curious too, what problems did you encounter under MCQ mode?
Thanks
Peter
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ
2025-07-03 8:29 ` Ziqi Chen
@ 2025-07-07 19:40 ` Bart Van Assche
0 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2025-07-07 19:40 UTC (permalink / raw)
To: Ziqi Chen
Cc: linux-scsi, Can Guo, James E.J. Bottomley, Peter Wang,
Avri Altman, Manivannan Sadhasivam, Bao D. Nguyen, Stanley Jhu,
Asutosh Das, Martin K . Petersen
On 7/3/25 1:29 AM, Ziqi Chen wrote:
> Although patch functional is OK, but I found this patch will increase
> the latency of ufshcd_clock_scaling_prepare():
>
> MTP 8550 (upstream kernel):
> Original:
> spent: 226302 ns, avg: 2135214 ns, count: 200, total:427042923 ns
> with patch:
> spent: 1213333 ns, avg: 4583551 ns, count: 200, total:916710316 ns
>
> MTP 8650 (upstream kernel):
> Original:
> spent: 2013386 ns, avg: 1464596 ns, count: 150, total:219689530 ns
> with patch:
> spent: 2718802 ns, avg: 4329696 ns, count: 150, total:649454539 ns
>
> MTP8850 (downstream kernel)
> Original:
> spent: 144323 ns, avg: 1080332 ns, count: 2005, total:2166066242 ns
> with patch:
> spent: 2530208 ns, avg: 1307159 ns, count: 2005, total:2620855033 ns
That's unfortunate ...
> I think this increament is come from you replaced blk_mq_quiesce_queue()
> with blk_freeze_queue(), as my understading , the blk_mq_quiesce_queue()
> just only block new IO be dispatched to hardware queue but the
> blk_freeze_queue() will freeze whole queue and wait all IOs get
> complete.
Hmm ... both blk_freeze_queue() and the loop that calls
ufshcd_pending_cmds() should wait for all pending commands to finish. So
the latency increase probably comes from the synchronize_rcu_expedited()
call.
> I am not understand you said "ufshcd_wait_for_doorbell_clr() supports
> the legacy doorbell mode but not MCQ". In ufshcd_wait_for_doorbell_clr()
> , tr_pending = ufshcd_pending_cmds(hba) is counted from budget_map, not
> read from legacy doorbell, it is used to get inflight cmds for MCQ mode.
That was a misunderstanding from my side. Since the current code already
supports MCQ and my patch doesn't improve the clock scaling latency,
let's drop this patch.
Thanks,
Bart.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2025-07-07 19:40 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-24 20:12 [PATCH] scsi: ufs: Make ufshcd_clock_scaling_prepare() compatible with MCQ Bart Van Assche
2025-06-30 20:19 ` Bart Van Assche
2025-07-01 4:04 ` Ziqi Chen
2025-07-03 8:29 ` Ziqi Chen
2025-07-07 19:40 ` Bart Van Assche
2025-07-04 12:04 ` Peter Wang (王信友)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox