From: hch@infradead.org (Christoph Hellwig)
Subject: nvme: batch completions and do them outside of the queue lock
Date: Thu, 17 May 2018 00:16:03 -0700 [thread overview]
Message-ID: <20180517071603.GB30079@infradead.org> (raw)
In-Reply-To: <909fe3ae-fe5a-fce6-20ea-a7f440799a06@kernel.dk>
> static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
> - struct nvme_completion *cqe)
> + volatile struct nvme_completion *cqe)
Skip the bogus reindentation here :)
> {
> struct request *req;
>
> @@ -950,21 +949,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
> if (unlikely(nvmeq->qid == 0 &&
> cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
> nvme_complete_async_event(&nvmeq->dev->ctrl,
> - cqe->status, &cqe->result);
> + cqe->status, (union nvme_result *) &cqe->result);
Please find a way to avoid that cast. Either always make it volatile
or pass by value if modern complilers have stopped generating shit code
for passing unions by value.
> -static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
> - struct nvme_completion *cqe)
> +static inline bool nvme_read_cqe(struct nvme_queue *nvmeq)
> {
> if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
> - *cqe = nvmeq->cqes[nvmeq->cq_head];
> -
Without actually reading the CQE this function is now grossly misnamed.
What about nvme_consume_cqe or something like that instead?
> +static inline void nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
> + u16 *end)
> {
> + *start = nvmeq->cq_head;
> + while (nvme_read_cqe(nvmeq))
> + ;
> + *end = nvmeq->cq_head;
> +
> + if (*start != *end)
> + nvme_ring_cq_doorbell(nvmeq);
> +}
Or in fact just kill off nvme_read_cqe, as that appears to be the only
callers with your patch (just reading the patch so I might be wrong).
The this could become:
*start = nvmeq->cq_head;
while (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
if (++nvmeq->cq_head == nvmeq->q_depth) {
nvmeq->cq_head = 0;
nvmeq->cq_phase = !nvmeq->cq_phase;
}
}
*end = nvmeq->cq_head;
if (*start != *end)
nvme_ring_cq_doorbell(nvmeq);
}
which starts to make a lot more sense.
> +static bool nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end,
> + unsigned int tag)
> +{
> + bool found = false;
> +
> + if (start == end)
> + return false;
> +
> + while (start != end) {
> + volatile struct nvme_completion *cqe = &nvmeq->cqes[start];
> +
> + if (!found && tag == cqe->command_id)
> + found = true;
> + nvme_handle_cqe(nvmeq, cqe);
> + if (++start == nvmeq->q_depth)
> + start = 0;
> }
>
> + return found;
I don't think we need the if (start == end) check, the while loop already
handles that. I also wonder if we should move the tag matching into
nvme_handle_cqe. It already looks at cqe->command_id, so that would keep
the access together, and would remove the need to even look at the CQE at
all in this function. And nvme_complete_cqes would be so trivial now that
we can still keep the poll optimization.
Irq/delete queue path, including handling the irqreturn_t value:
static irqreturn_t nvme_complete_cqes(struct nvme_queue *nvmeq,
u16 start, u16 end)
{
irqreturn_t ret = IRQ_NONE;
while (start != end) {
nvme_handle_cqe(nvmeq, cqe, -1);
if (++start == nvmeq->q_depth)
start = 0;
ret = IRQ_HANDLED;
}
return ret;
}
poll path:
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
struct nvme_completion cqe;
u16 start, end;
if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
return 0;
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq, &start, &end);
spin_unlock(&nvmeq->q_lock);
while (start != end) {
if (nvme_handle_cqe(nvmeq, cqe, tag))
return 1;
if (++start == nvmeq->q_depth)
start = 0;
}
return 0;
}
that would also keep the early exit behavior for poll once we found
the tag.
> @@ -2006,8 +2016,9 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error)
> */
> spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
> SINGLE_DEPTH_NESTING);
> - nvme_process_cq(nvmeq);
> + nvme_process_cq(nvmeq, &start, &end);
> spin_unlock_irqrestore(&nvmeq->q_lock, flags);
> + nvme_complete_cqes(nvmeq, start, end, -1U);
If we could somehow move this into a workqueue we could even move the
locking into nvme_process_cq, but that's something left for another time.
next prev parent reply other threads:[~2018-05-17 7:16 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-16 20:37 nvme: batch completions and do them outside of the queue lock Jens Axboe
2018-05-16 21:27 ` Keith Busch
2018-05-16 22:35 ` Keith Busch
2018-05-16 22:57 ` Jens Axboe
2018-05-16 23:10 ` Jens Axboe
2018-05-16 23:18 ` Jens Axboe
2018-05-16 23:39 ` Jens Axboe
2018-05-17 2:09 ` Keith Busch
2018-05-17 3:16 ` Jens Axboe
2018-05-17 3:16 ` Jens Axboe
2018-05-17 7:16 ` Christoph Hellwig [this message]
2018-05-17 7:51 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180517071603.GB30079@infradead.org \
--to=hch@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).