From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>, Fam Zheng <fam@euphon.net>,
qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
Gollu Appalanaidu <anaidu.gollu@samsung.com>,
Max Reitz <mreitz@redhat.com>, Klaus Jensen <its@irrelevant.dk>,
Stefan Hajnoczi <stefanha@redhat.com>,
Keith Busch <kbusch@kernel.org>
Subject: [PATCH RFC v2 4/8] hw/block/nvme: try to deal with the iov/qsg duality
Date: Sun, 7 Feb 2021 22:49:36 +0100 [thread overview]
Message-ID: <20210207214940.281889-5-its@irrelevant.dk> (raw)
In-Reply-To: <20210207214940.281889-1-its@irrelevant.dk>
From: Klaus Jensen <k.jensen@samsung.com>
Introduce NvmeSg and try to deal with that pesky qsg/iov duality that
haunts all the memory-related functions.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/block/nvme.h | 8 ++-
hw/block/nvme.c | 171 ++++++++++++++++++++++++------------------------
2 files changed, 90 insertions(+), 89 deletions(-)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index cb2b5175f1a1..0e4fbd6990ad 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -29,6 +29,11 @@ typedef struct NvmeAsyncEvent {
NvmeAerResult result;
} NvmeAsyncEvent;
+typedef struct NvmeSg {
+ QEMUSGList qsg;
+ QEMUIOVector iov;
+} NvmeSg;
+
typedef struct NvmeRequest {
struct NvmeSQueue *sq;
struct NvmeNamespace *ns;
@@ -38,8 +43,7 @@ typedef struct NvmeRequest {
NvmeCqe cqe;
NvmeCmd cmd;
BlockAcctCookie acct;
- QEMUSGList qsg;
- QEMUIOVector iov;
+ NvmeSg sg;
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 29902038d618..a0009c057f1e 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -428,14 +428,20 @@ static void nvme_req_clear(NvmeRequest *req)
req->status = NVME_SUCCESS;
}
-static void nvme_req_exit(NvmeRequest *req)
+static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg)
{
- if (req->qsg.sg) {
- qemu_sglist_destroy(&req->qsg);
+ pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
+ qemu_iovec_init(&sg->iov, 0);
+}
+
+static inline void nvme_sg_unmap(NvmeSg *sg)
+{
+ if (sg->qsg.sg) {
+ qemu_sglist_destroy(&sg->qsg);
}
- if (req->iov.iov) {
- qemu_iovec_destroy(&req->iov);
+ if (sg->iov.iov) {
+ qemu_iovec_destroy(&sg->iov);
}
}
@@ -473,8 +479,7 @@ static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
return NVME_SUCCESS;
}
-static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
- hwaddr addr, size_t len)
+static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
{
bool cmb = false, pmr = false;
@@ -491,34 +496,22 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
}
if (cmb || pmr) {
- if (qsg && qsg->sg) {
+ if (sg->qsg.nsg) {
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
}
- assert(iov);
-
- if (!iov->iov) {
- qemu_iovec_init(iov, 1);
- }
-
if (cmb) {
- return nvme_map_addr_cmb(n, iov, addr, len);
+ return nvme_map_addr_cmb(n, &sg->iov, addr, len);
} else {
- return nvme_map_addr_pmr(n, iov, addr, len);
+ return nvme_map_addr_pmr(n, &sg->iov, addr, len);
}
}
- if (iov && iov->iov) {
+ if (sg->iov.niov) {
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
}
- assert(qsg);
-
- if (!qsg->sg) {
- pci_dma_sglist_init(qsg, &n->parent_obj, 1);
- }
-
- qemu_sglist_add(qsg, addr, len);
+ qemu_sglist_add(&sg->qsg, addr, len);
return NVME_SUCCESS;
}
@@ -532,20 +525,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
uint16_t status;
int ret;
- QEMUSGList *qsg = &req->qsg;
- QEMUIOVector *iov = &req->iov;
-
trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
- if (nvme_addr_is_cmb(n, prp1) || (nvme_addr_is_pmr(n, prp1))) {
- qemu_iovec_init(iov, num_prps);
- } else {
- pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
- }
+ nvme_sg_init(n, &req->sg);
- status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
+ status = nvme_map_addr(n, &req->sg, prp1, trans_len);
if (status) {
- return status;
+ goto unmap;
}
len -= trans_len;
@@ -560,7 +546,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
if (ret) {
trace_pci_nvme_err_addr_read(prp2);
- return NVME_DATA_TRAS_ERROR;
+ status = NVME_DATA_TRAS_ERROR;
+ goto unmap;
}
while (len != 0) {
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
@@ -568,7 +555,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
if (i == n->max_prp_ents - 1 && len > n->page_size) {
if (unlikely(prp_ent & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
i = 0;
@@ -578,20 +566,22 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
prp_trans);
if (ret) {
trace_pci_nvme_err_addr_read(prp_ent);
- return NVME_DATA_TRAS_ERROR;
+ status = NVME_DATA_TRAS_ERROR;
+ goto unmap;
}
prp_ent = le64_to_cpu(prp_list[i]);
}
if (unlikely(prp_ent & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
trans_len = MIN(len, n->page_size);
- status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
+ status = nvme_map_addr(n, &req->sg, prp_ent, trans_len);
if (status) {
- return status;
+ goto unmap;
}
len -= trans_len;
@@ -600,24 +590,28 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
} else {
if (unlikely(prp2 & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prp2_align(prp2);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
- status = nvme_map_addr(n, qsg, iov, prp2, len);
+ status = nvme_map_addr(n, &req->sg, prp2, len);
if (status) {
- return status;
+ goto unmap;
}
}
}
return NVME_SUCCESS;
+
+unmap:
+ nvme_sg_unmap(&req->sg);
+ return status;
}
/*
* Map 'nsgld' data descriptors from 'segment'. The function will subtract the
* number of bytes mapped in len.
*/
-static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
- QEMUIOVector *iov,
+static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
NvmeSglDescriptor *segment, uint64_t nsgld,
size_t *len, NvmeRequest *req)
{
@@ -675,7 +669,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}
- status = nvme_map_addr(n, qsg, iov, addr, trans_len);
+ status = nvme_map_addr(n, sg, addr, trans_len);
if (status) {
return status;
}
@@ -687,9 +681,8 @@ next:
return NVME_SUCCESS;
}
-static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
- NvmeSglDescriptor sgl, size_t len,
- NvmeRequest *req)
+static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
+ size_t len, NvmeRequest *req)
{
/*
* Read the segment in chunks of 256 descriptors (one 4k page) to avoid
@@ -707,6 +700,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
hwaddr addr;
int ret;
+ nvme_sg_init(n, sg);
+
sgld = &sgl;
addr = le64_to_cpu(sgl.addr);
@@ -717,7 +712,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
* be mapped directly.
*/
if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
- status = nvme_map_sgl_data(n, qsg, iov, sgld, 1, &len, req);
+ status = nvme_map_sgl_data(n, sg, sgld, 1, &len, req);
if (status) {
goto unmap;
}
@@ -755,7 +750,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
goto unmap;
}
- status = nvme_map_sgl_data(n, qsg, iov, segment, SEG_CHUNK_SIZE,
+ status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
&len, req);
if (status) {
goto unmap;
@@ -782,7 +777,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
switch (NVME_SGL_TYPE(last_sgld->type)) {
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
- status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld, &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, req);
if (status) {
goto unmap;
}
@@ -809,7 +804,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
* Do not map the last descriptor; it will be a Segment or Last Segment
* descriptor and is handled by the next iteration.
*/
- status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld - 1, &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, req);
if (status) {
goto unmap;
}
@@ -825,14 +820,7 @@ out:
return NVME_SUCCESS;
unmap:
- if (iov->iov) {
- qemu_iovec_destroy(iov);
- }
-
- if (qsg->sg) {
- qemu_sglist_destroy(qsg);
- }
-
+ nvme_sg_unmap(sg);
return status;
}
@@ -853,8 +841,7 @@ static uint16_t nvme_map_dptr(NvmeCtrl *n, size_t len, NvmeRequest *req)
return NVME_INVALID_FIELD | NVME_DNR;
}
- return nvme_map_sgl(n, &req->qsg, &req->iov, req->cmd.dptr.sgl, len,
- req);
+ return nvme_map_sgl(n, &req->sg, req->cmd.dptr.sgl, len, req);
default:
return NVME_INVALID_FIELD;
}
@@ -871,15 +858,15 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
}
/* assert that only one of qsg and iov carries data */
- assert((req->qsg.nsg > 0) != (req->iov.niov > 0));
+ assert((req->sg.qsg.nsg > 0) != (req->sg.iov.niov > 0));
- if (req->qsg.nsg > 0) {
+ if (req->sg.qsg.nsg > 0) {
uint64_t residual;
if (dir == DMA_DIRECTION_TO_DEVICE) {
- residual = dma_buf_write(ptr, len, &req->qsg);
+ residual = dma_buf_write(ptr, len, &req->sg.qsg);
} else {
- residual = dma_buf_read(ptr, len, &req->qsg);
+ residual = dma_buf_read(ptr, len, &req->sg.qsg);
}
if (unlikely(residual)) {
@@ -890,9 +877,9 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
size_t bytes;
if (dir == DMA_DIRECTION_TO_DEVICE) {
- bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
+ bytes = qemu_iovec_to_buf(&req->sg.iov, 0, ptr, len);
} else {
- bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
+ bytes = qemu_iovec_from_buf(&req->sg.iov, 0, ptr, len);
}
if (unlikely(bytes != len)) {
@@ -904,6 +891,28 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return status;
}
+static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
+ BlockCompletionFunc *cb, NvmeRequest *req)
+{
+ if (req->sg.qsg.nsg) {
+ req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+ cb, req);
+ } else {
+ req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
+ }
+}
+
+static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
+ BlockCompletionFunc *cb, NvmeRequest *req)
+{
+ if (req->sg.qsg.nsg) {
+ req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+ cb, req);
+ } else {
+ req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
+ }
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -934,7 +943,7 @@ static void nvme_post_cqes(void *opaque)
}
QTAILQ_REMOVE(&cq->req_list, req, entry);
nvme_inc_cq_tail(cq);
- nvme_req_exit(req);
+ nvme_sg_unmap(&req->sg);
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
@@ -1597,14 +1606,14 @@ static void nvme_copy_in_complete(NvmeRequest *req)
zone->w_ptr += ctx->nlb;
}
- qemu_iovec_init(&req->iov, 1);
- qemu_iovec_add(&req->iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
+ qemu_iovec_init(&req->sg.iov, 1);
+ qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_WRITE);
req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
- &req->iov, 0, nvme_copy_cb, req);
+ &req->sg.iov, 0, nvme_copy_cb, req);
return;
@@ -1992,13 +2001,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
BLOCK_ACCT_READ);
- if (req->qsg.sg) {
- req->aiocb = dma_blk_read(blk, &req->qsg, data_offset,
- BDRV_SECTOR_SIZE, nvme_rw_cb, req);
- } else {
- req->aiocb = blk_aio_preadv(blk, data_offset, &req->iov, 0,
- nvme_rw_cb, req);
- }
+ nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
return NVME_NO_COMPLETE;
invalid:
@@ -2080,13 +2083,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
BLOCK_ACCT_WRITE);
- if (req->qsg.sg) {
- req->aiocb = dma_blk_write(blk, &req->qsg, data_offset,
- BDRV_SECTOR_SIZE, nvme_rw_cb, req);
- } else {
- req->aiocb = blk_aio_pwritev(blk, data_offset, &req->iov, 0,
- nvme_rw_cb, req);
- }
+ nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
} else {
req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
--
2.30.0
next prev parent reply other threads:[~2021-02-07 21:54 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-07 21:49 [PATCH RFC v2 0/8] hw/block/nvme: metadata and end-to-end data protection support Klaus Jensen
2021-02-07 21:49 ` [PATCH RFC v2 1/8] hw/block/nvme: remove redundant len member in compare context Klaus Jensen
2021-02-08 15:07 ` Minwoo Im
2021-02-07 21:49 ` [PATCH RFC v2 2/8] hw/block/nvme: remove block accounting for write zeroes Klaus Jensen
2021-02-08 15:07 ` Minwoo Im
2021-02-07 21:49 ` [PATCH RFC v2 3/8] hw/block/nvme: fix strerror printing Klaus Jensen
2021-02-08 15:13 ` Minwoo Im
2021-02-07 21:49 ` Klaus Jensen [this message]
2021-02-08 15:45 ` [PATCH RFC v2 4/8] hw/block/nvme: try to deal with the iov/qsg duality Keith Busch
2021-02-08 15:47 ` Klaus Jensen
2021-02-07 21:49 ` [PATCH RFC v2 5/8] hw/block/nvme: remove the req dependency in map functions Klaus Jensen
2021-02-07 21:49 ` [PATCH RFC v2 6/8] hw/block/nvme: refactor nvme_dma Klaus Jensen
2021-02-07 21:49 ` [PATCH RFC v2 7/8] hw/block/nvme: add metadata support Klaus Jensen
2021-02-07 21:49 ` [PATCH RFC v2 8/8] hw/block/nvme: end-to-end data protection Klaus Jensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210207214940.281889-5-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=anaidu.gollu@samsung.com \
--cc=fam@euphon.net \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.