Linux-mediatek Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: <peter.wang@mediatek.com>
To: <linux-scsi@vger.kernel.org>, <martin.petersen@oracle.com>,
	<avri.altman@sandisk.com>, <alim.akhtar@samsung.com>,
	<jejb@linux.ibm.com>
Cc: <wsd_upstream@mediatek.com>, <linux-mediatek@lists.infradead.org>,
	<peter.wang@mediatek.com>, <chun-hung.wu@mediatek.com>,
	<alice.chao@mediatek.com>, <cc.chou@mediatek.com>,
	<chaotian.jing@mediatek.com>, <tun-yu.yu@mediatek.com>,
	<eddie.huang@mediatek.com>, <naomi.chu@mediatek.com>,
	<ed.tsai@mediatek.com>, <bvanassche@acm.org>,
	<quic_cang@guicinc.com>, <quic_asutoshd@guicinc.com>,
	<light.hsieh@mediatek.com>
Subject: [PATCH v1] ufs: core: decouple CQE processing from spinlock critical section
Date: Thu, 14 May 2026 16:26:39 +0800	[thread overview]
Message-ID: <20260514082906.58593-1-peter.wang@mediatek.com> (raw)

From: Peter Wang <peter.wang@mediatek.com>

Currently, ufshcd_mcq_process_cqe() is called while holding the CQ
spinlock, which can lead to unnecessary lock contention since CQE
processing may involve time-consuming operations like completing I/O
requests and invoking callbacks.

Refactor the CQE processing flow to separate the lock-protected queue
head/tail slot updates from the actual CQE processing:

1. Add a new 'cqe_last_addr' field to 'ufs_hw_queue' to cache the
   address of the last CQE entry, precomputed during memory allocation
   in ufshcd_mcq_memory_alloc(). This avoids repeated recalculation
   during the hot path.

2. Introduce ufshcd_mcq_inc_cqe_addr() helper in ufshcd-priv.h to
   increment a CQE pointer with wraparound, using 'cqe_last_addr' for
   boundary checking.

3. Refactor ufshcd_mcq_process_cqe() to accept a 'struct cq_entry *'
   directly instead of deriving it from the hardware queue, decoupling
   it from queue state.

4. In both ufshcd_mcq_compl_all_cqes_lock() and
   ufshcd_mcq_poll_cqe_lock(), snapshot the starting CQE pointer before
   advancing the head slot under the spinlock, then process the collected
   CQEs after releasing the lock using the new helper.

This reduces the time spent holding the CQ spinlock to only the
minimal queue slot management operations, improving concurrency and
reducing latency under heavy I/O workloads.

Signed-off-by: Peter Wang <peter.wang@mediatek.com>
---
 drivers/ufs/core/ufs-mcq.c     | 23 ++++++++++++++++++-----
 drivers/ufs/core/ufshcd-priv.h | 20 ++++++++++++++++++++
 include/ufs/ufshcd.h           |  1 +
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index c1b1d67a1ddc..74a6595f9bda 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -248,6 +248,7 @@ int ufshcd_mcq_memory_alloc(struct ufs_hba *hba)
 			dev_err(hba->dev, "CQE allocation failed\n");
 			return -ENOMEM;
 		}
+		hwq->cqe_last_addr = hwq->cqe_base_addr + hwq->max_entries - 1;
 	}
 
 	return 0;
@@ -307,10 +308,8 @@ static int ufshcd_mcq_get_tag(struct ufs_hba *hba, struct cq_entry *cqe)
 }
 
 static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
-				   struct ufs_hw_queue *hwq)
+			           struct cq_entry *cqe)
 {
-	struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
-
 	if (cqe->command_desc_base_addr) {
 		int tag = ufshcd_mcq_get_tag(hba, cqe);
 
@@ -335,10 +334,12 @@ void ufshcd_mcq_compl_all_cqes_lock(struct ufs_hba *hba,
 {
 	unsigned long flags;
 	u32 entries = hwq->max_entries;
+	struct cq_entry *cqe;
+	int i;
 
 	spin_lock_irqsave(&hwq->cq_lock, flags);
+	cqe = ufshcd_mcq_cur_cqe(hwq);
 	while (entries > 0) {
-		ufshcd_mcq_process_cqe(hba, hwq);
 		ufshcd_mcq_inc_cq_head_slot(hwq);
 		entries--;
 	}
@@ -346,6 +347,11 @@ void ufshcd_mcq_compl_all_cqes_lock(struct ufs_hba *hba,
 	ufshcd_mcq_update_cq_tail_slot(hwq);
 	hwq->cq_head_slot = hwq->cq_tail_slot;
 	spin_unlock_irqrestore(&hwq->cq_lock, flags);
+
+	for (i = 0; i < hwq->max_entries; i++) {
+		ufshcd_mcq_process_cqe(hba, cqe);
+		cqe = ufshcd_mcq_inc_cqe_addr(hwq, cqe);
+	}
 }
 
 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
@@ -353,11 +359,13 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
 {
 	unsigned long completed_reqs = 0;
 	unsigned long flags;
+	struct cq_entry *cqe;
+	int i;
 
 	spin_lock_irqsave(&hwq->cq_lock, flags);
+	cqe = ufshcd_mcq_cur_cqe(hwq);
 	ufshcd_mcq_update_cq_tail_slot(hwq);
 	while (!ufshcd_mcq_is_cq_empty(hwq)) {
-		ufshcd_mcq_process_cqe(hba, hwq);
 		ufshcd_mcq_inc_cq_head_slot(hwq);
 		completed_reqs++;
 	}
@@ -366,6 +374,11 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
 		ufshcd_mcq_update_cq_head(hwq);
 	spin_unlock_irqrestore(&hwq->cq_lock, flags);
 
+	for (i = 0; i < completed_reqs; i++) {
+		ufshcd_mcq_process_cqe(hba, cqe);
+		cqe = ufshcd_mcq_inc_cqe_addr(hwq, cqe);
+	}
+
 	return completed_reqs;
 }
 EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_lock);
diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index 0a72148cb053..6d4d3e726a9a 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -440,6 +440,26 @@ static inline struct scsi_cmnd *ufshcd_tag_to_cmd(struct ufs_hba *hba, u32 tag)
 	return blk_mq_rq_to_pdu(rq);
 }
 
+/**
+ * ufshcd_mcq_inc_cqe_addr - increment CQE pointer with wraparound
+ * @hwq: pointer to the hardware queue
+ * @cqe: current CQE pointer to increment
+ *
+ * Increments the CQE pointer to the next entry. If the pointer
+ * exceeds the last entry, it wraps around to the base address.
+ *
+ * Returns: pointer to the next cq_entry
+ */
+static inline struct cq_entry *ufshcd_mcq_inc_cqe_addr(struct ufs_hw_queue *q,
+	struct cq_entry *cqe)
+{
+        cqe++;
+        if (cqe > q->cqe_last_addr)
+                cqe = q->cqe_base_addr;
+
+        return cqe;
+}
+
 static inline void ufshcd_inc_sq_tail(struct ufs_hw_queue *q)
 	__must_hold(&q->sq_lock)
 {
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index cfbc75d8df83..1becb38e215e 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1291,6 +1291,7 @@ struct ufs_hw_queue {
 	struct utp_transfer_req_desc *sqe_base_addr;
 	dma_addr_t sqe_dma_addr;
 	struct cq_entry *cqe_base_addr;
+	struct cq_entry *cqe_last_addr;
 	dma_addr_t cqe_dma_addr;
 	u32 max_entries;
 	u32 id;
-- 
2.45.2



                 reply	other threads:[~2026-05-14  9:01 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260514082906.58593-1-peter.wang@mediatek.com \
    --to=peter.wang@mediatek.com \
    --cc=alice.chao@mediatek.com \
    --cc=alim.akhtar@samsung.com \
    --cc=avri.altman@sandisk.com \
    --cc=bvanassche@acm.org \
    --cc=cc.chou@mediatek.com \
    --cc=chaotian.jing@mediatek.com \
    --cc=chun-hung.wu@mediatek.com \
    --cc=ed.tsai@mediatek.com \
    --cc=eddie.huang@mediatek.com \
    --cc=jejb@linux.ibm.com \
    --cc=light.hsieh@mediatek.com \
    --cc=linux-mediatek@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=naomi.chu@mediatek.com \
    --cc=quic_asutoshd@guicinc.com \
    --cc=quic_cang@guicinc.com \
    --cc=tun-yu.yu@mediatek.com \
    --cc=wsd_upstream@mediatek.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox