linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: axboe@kernel.dk (Jens Axboe)
Subject: [PATCH 5/7] nvme-pci: handle completions outside of the queue lock
Date: Fri, 18 May 2018 08:52:33 -0600	[thread overview]
Message-ID: <1526655155-4006-6-git-send-email-axboe@kernel.dk> (raw)
In-Reply-To: <1526655155-4006-1-git-send-email-axboe@kernel.dk>

Split the completion of events into a two part process:

1) Reap the events inside the queue lock
2) Complete the events outside the queue lock

Since we never wrap the queue, we can access it locklessly after we've
updated the completion queue head. This patch started off with batching
events on the stack, but with this trick we don't have to. Keith Busch
<keith.busch at intel.com> came up with that idea.

Note that this kills the ->cqe_seen as well. I haven't been able to
trigger any ill effects of this. If we do race with polling every so
often, it should be rare enough NOT to trigger any issues.

Signed-off-by: Jens Axboe <axboe at kernel.dk>
Signed-off-by: Keith Busch <keith.busch at intel.com>
[hch: refactored, restored poll early exit optimization]
Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/pci.c | 87 +++++++++++++++++++++++++------------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bfd7023877a7..fd4f348c8f54 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -160,7 +160,6 @@ struct nvme_queue {
 	u16 cq_head;
 	u16 qid;
 	u8 cq_phase;
-	u8 cqe_seen;
 	u32 *dbbuf_sq_db;
 	u32 *dbbuf_cq_db;
 	u32 *dbbuf_sq_ei;
@@ -931,9 +930,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
 	}
 }
 
-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
-		struct nvme_completion *cqe)
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 {
+	volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
 	struct request *req;
 
 	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -956,50 +955,58 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
 		return;
 	}
 
-	nvmeq->cqe_seen = 1;
 	req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
 	nvme_end_request(req, cqe->status, cqe->result);
 }
 
-static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
-		struct nvme_completion *cqe)
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
 {
-	if (nvme_cqe_pending(nvmeq)) {
-		*cqe = nvmeq->cqes[nvmeq->cq_head];
+	while (start != end) {
+		nvme_handle_cqe(nvmeq, start);
+		if (++start == nvmeq->q_depth)
+			start = 0;
+	}
+}
 
-		if (++nvmeq->cq_head == nvmeq->q_depth) {
-			nvmeq->cq_head = 0;
-			nvmeq->cq_phase = !nvmeq->cq_phase;
-		}
-		return true;
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
+{
+	if (++nvmeq->cq_head == nvmeq->q_depth) {
+		nvmeq->cq_head = 0;
+		nvmeq->cq_phase = !nvmeq->cq_phase;
 	}
-	return false;
 }
 
-static void nvme_process_cq(struct nvme_queue *nvmeq)
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+		u16 *end, int tag)
 {
-	struct nvme_completion cqe;
-	int consumed = 0;
+	bool found = false;
 
-	while (nvme_read_cqe(nvmeq, &cqe)) {
-		nvme_handle_cqe(nvmeq, &cqe);
-		consumed++;
+	*start = nvmeq->cq_head;
+	while (!found && nvme_cqe_pending(nvmeq)) {
+		if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+			found = true;
+		nvme_update_cq_head(nvmeq);
 	}
+	*end = nvmeq->cq_head;
 
-	if (consumed)
+	if (*start != *end)
 		nvme_ring_cq_doorbell(nvmeq);
+	return found;
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
 {
-	irqreturn_t result;
 	struct nvme_queue *nvmeq = data;
+	u16 start, end;
+
 	spin_lock(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
-	result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
-	nvmeq->cqe_seen = 0;
+	nvme_process_cq(nvmeq, &start, &end, -1);
 	spin_unlock(&nvmeq->q_lock);
-	return result;
+
+	if (start == end)
+		return IRQ_NONE;
+	nvme_complete_cqes(nvmeq, start, end);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t nvme_irq_check(int irq, void *data)
@@ -1012,27 +1019,17 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
-	struct nvme_completion cqe;
-	int found = 0, consumed = 0;
+	u16 start, end;
+	bool found;
 
 	if (!nvme_cqe_pending(nvmeq))
 		return 0;
 
 	spin_lock_irq(&nvmeq->q_lock);
-	while (nvme_read_cqe(nvmeq, &cqe)) {
-		nvme_handle_cqe(nvmeq, &cqe);
-		consumed++;
-
-		if (tag == cqe.command_id) {
-			found = 1;
-			break;
-		}
-       }
-
-	if (consumed)
-		nvme_ring_cq_doorbell(nvmeq);
+	found = nvme_process_cq(nvmeq, &start, &end, tag);
 	spin_unlock_irq(&nvmeq->q_lock);
 
+	nvme_complete_cqes(nvmeq, start, end);
 	return found;
 }
 
@@ -1348,6 +1345,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 {
 	struct nvme_queue *nvmeq = &dev->queues[0];
+	u16 start, end;
 
 	if (shutdown)
 		nvme_shutdown_ctrl(&dev->ctrl);
@@ -1355,8 +1353,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 		nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
 	spin_lock_irq(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
+	nvme_process_cq(nvmeq, &start, &end, -1);
 	spin_unlock_irq(&nvmeq->q_lock);
+
+	nvme_complete_cqes(nvmeq, start, end);
 }
 
 static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -2003,6 +2003,7 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
 static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
+	u16 start, end;
 
 	if (!error) {
 		unsigned long flags;
@@ -2014,8 +2015,10 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error)
 		 */
 		spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
 					SINGLE_DEPTH_NESTING);
-		nvme_process_cq(nvmeq);
+		nvme_process_cq(nvmeq, &start, &end, -1);
 		spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+		nvme_complete_cqes(nvmeq, start, end);
 	}
 
 	nvme_del_queue_end(req, error);
-- 
2.7.4

  parent reply	other threads:[~2018-05-18 14:52 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-18 14:52 [PATCHSET v2 0/7] Improve nvme completion handling Jens Axboe
2018-05-18 14:52 ` [PATCH 1/7] nvme: mark the result argument to nvme_complete_async_event volatile Jens Axboe
2018-05-18 14:52 ` [PATCH 2/7] nvme-pci: simplify nvme_cqe_valid Jens Axboe
2018-05-18 20:49   ` Keith Busch
2018-05-18 20:48     ` Jens Axboe
2018-05-18 14:52 ` [PATCH 3/7] nvme-pci: remove cq check after submission Jens Axboe
2018-05-18 14:52 ` [PATCH 4/7] nvme-pci: move ->cq_vector == -1 check outside of ->q_lock Jens Axboe
2018-05-18 14:52 ` Jens Axboe [this message]
2018-05-18 21:06   ` [PATCH 5/7] nvme-pci: handle completions outside of the queue lock Keith Busch
2018-05-18 21:11     ` Jens Axboe
2018-05-18 21:22       ` Jens Axboe
2018-05-18 21:28         ` Keith Busch
2018-05-18 21:31           ` Jens Axboe
2018-05-18 21:48             ` Keith Busch
2018-05-18 22:46               ` Jens Axboe
2018-05-21 14:18               ` Jens Axboe
2018-05-21 14:23                 ` Keith Busch
2018-05-21 14:33                   ` Jens Axboe
2018-05-21 14:40                     ` Keith Busch
2018-05-21 14:43                       ` Keith Busch
2018-05-18 21:25       ` Keith Busch
2018-05-18 14:52 ` [PATCH 6/7] nvme-pci: split the nvme queue lock into submission and completion locks Jens Axboe
2018-05-18 14:52 ` [PATCH 7/7] nvme-pci: drop IRQ disabling on submission queue lock Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2018-05-17 16:31 RFC: handle completions outside the queue lock and split the " Christoph Hellwig
2018-05-17 16:31 ` [PATCH 5/7] nvme-pci: handle completions outside of " Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1526655155-4006-6-git-send-email-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).