From: Kanchan Joshi <joshi.k@samsung.com>
To: hch@lst.de, kbusch@kernel.org, axboe@kernel.dk, sagi@grimberg.me
Cc: linux-nvme@lists.infradead.org, gost.dev@samsung.com,
joshiiitr@gmail.com, Kanchan Joshi <joshi.k@samsung.com>
Subject: [PATCH 1/2] nvme-pci: meta-transfer via sgl
Date: Thu, 19 Oct 2023 00:00:02 +0530 [thread overview]
Message-ID: <20231018183003.41174-2-joshi.k@samsung.com> (raw)
In-Reply-To: <20231018183003.41174-1-joshi.k@samsung.com>
Introduce the ability to transfer the metadata buffer using sgl.
Also add a nvme request flag 'NVME_REQ_FORCE_SGL' that mandates both
data and meta transfer via sgl.
This is a prepatory patch to enable unprivileged passthrough via SGL.
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
drivers/nvme/host/nvme.h | 6 ++++
drivers/nvme/host/pci.c | 63 ++++++++++++++++++++++++++++++++++++----
2 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f35647c470af..58f8efe1ace9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -184,6 +184,7 @@ enum {
NVME_REQ_CANCELLED = (1 << 0),
NVME_REQ_USERCMD = (1 << 1),
NVME_MPATH_IO_STATS = (1 << 2),
+ NVME_REQ_FORCE_SGL = (1 << 3),
};
static inline struct nvme_request *nvme_req(struct request *req)
@@ -1043,6 +1044,11 @@ static inline void nvme_start_request(struct request *rq)
blk_mq_start_request(rq);
}
+static inline bool nvme_ctrl_meta_sgl_supported(struct nvme_ctrl *ctrl)
+{
+ return ctrl->sgls & (1 << 19);
+}
+
static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
{
return ctrl->sgls & ((1 << 0) | (1 << 1));
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3f0c9ee09a12..1907b1c9919a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -123,6 +123,7 @@ struct nvme_dev {
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
+ struct dma_pool *meta_sgl_pool;
unsigned online_queues;
unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES];
@@ -236,6 +237,8 @@ struct nvme_iod {
unsigned int dma_len; /* length of single DMA segment mapping */
dma_addr_t first_dma;
dma_addr_t meta_dma;
+ dma_addr_t meta_dma_sg;
+ struct nvme_sgl_desc *meta_sgl;
struct sg_table sgt;
union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
};
@@ -772,18 +775,23 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
blk_status_t ret = BLK_STS_RESOURCE;
int rc;
+ bool force_sgl = nvme_req(req)->flags & NVME_REQ_FORCE_SGL;
+
+ if (force_sgl && !nvme_ctrl_sgl_supported(&dev->ctrl))
+ return BLK_STS_IOERR;
if (blk_rq_nr_phys_segments(req) == 1) {
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct bio_vec bv = req_bvec(req);
if (!is_pci_p2pdma_page(bv.bv_page)) {
- if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
+ if (!force_sgl &&
+ bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
return nvme_setup_prp_simple(dev, req,
&cmnd->rw, &bv);
- if (nvmeq->qid && sgl_threshold &&
- nvme_ctrl_sgl_supported(&dev->ctrl))
+ if (nvmeq->qid && nvme_ctrl_sgl_supported(&dev->ctrl)
+ && (sgl_threshold || force_sgl))
return nvme_setup_sgl_simple(dev, req,
&cmnd->rw, &bv);
}
@@ -806,7 +814,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out_free_sg;
}
- if (nvme_pci_use_sgls(dev, req, iod->sgt.nents))
+ if (force_sgl || nvme_pci_use_sgls(dev, req, iod->sgt.nents))
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
@@ -825,13 +833,44 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ bool force_sgl = nvme_req(req)->flags & NVME_REQ_FORCE_SGL;
+ blk_status_t ret;
iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
- cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+
+ if (!force_sgl) {
+ cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+ return BLK_STS_OK;
+ }
+
+ if (!nvme_ctrl_meta_sgl_supported(&dev->ctrl)) {
+ WARN_ONCE(1, "controller does not support meta sgl.");
+ ret = BLK_STS_IOERR;
+ goto out_unmap;
+ }
+
+ iod->meta_sgl = dma_pool_alloc(dev->meta_sgl_pool, GFP_KERNEL,
+ &iod->meta_dma_sg);
+ if (!iod->meta_sgl) {
+ ret = BLK_STS_IOERR;
+ goto out_unmap;
+ }
+
+ iod->meta_sgl->addr = cpu_to_le64(iod->meta_dma);
+ iod->meta_sgl->length = cpu_to_le32(rq_integrity_vec(req)->bv_len);
+ iod->meta_sgl->type = NVME_SGL_FMT_DATA_DESC << 4;
+ cmnd->rw.metadata = cpu_to_le64(iod->meta_dma_sg);
+ cmnd->rw.flags = NVME_CMD_SGL_METASEG;
+
return BLK_STS_OK;
+
+out_unmap:
+ dma_unmap_page(dev->dev, iod->meta_dma,
+ rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+ return ret;
}
static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
@@ -968,6 +1007,11 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+
+ if (nvme_req(req)->flags & NVME_REQ_FORCE_SGL)
+ dma_pool_free(dev->meta_sgl_pool,
+ (void *)iod->meta_sgl,
+ iod->meta_dma_sg);
}
if (blk_rq_nr_phys_segments(req))
@@ -2644,6 +2688,14 @@ static int nvme_setup_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_page_pool);
return -ENOMEM;
}
+ /* for metadata sgl */
+ dev->meta_sgl_pool = dma_pool_create("meta sg 16", dev->dev, 16, 16, 0);
+ if (!dev->meta_sgl_pool) {
+ dma_pool_destroy(dev->prp_page_pool);
+ dma_pool_destroy(dev->prp_small_pool);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -2651,6 +2703,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
{
dma_pool_destroy(dev->prp_page_pool);
dma_pool_destroy(dev->prp_small_pool);
+ dma_pool_destroy(dev->meta_sgl_pool);
}
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
--
2.25.1
next prev parent reply other threads:[~2023-10-18 18:36 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20231018183620epcas5p26ab74bdd1f2739ef3ec1ee2431329dc4@epcas5p2.samsung.com>
2023-10-18 18:30 ` [PATCH 0/2] Unprivileged sgl-only passthrough Kanchan Joshi
2023-10-18 18:30 ` Kanchan Joshi [this message]
2023-10-19 5:48 ` [PATCH 1/2] nvme-pci: meta-transfer via sgl Christoph Hellwig
2023-10-19 9:54 ` Kanchan Joshi
2023-10-20 4:40 ` Christoph Hellwig
2023-10-18 18:30 ` [PATCH 2/2] nvme: fine-granular CAP_SYS_ADMIN for nvme io/admin commands Kanchan Joshi
2023-10-19 5:49 ` Christoph Hellwig
2023-10-19 9:59 ` Kanchan Joshi
2023-10-18 18:40 ` [PATCH 0/2] Unprivileged sgl-only passthrough Jens Axboe
2023-10-18 19:06 ` Kanchan Joshi
2023-10-18 19:12 ` Jens Axboe
2023-10-18 19:35 ` Keith Busch
2023-10-18 19:37 ` Jens Axboe
2023-10-18 19:44 ` Kanchan Joshi
2023-10-18 19:47 ` Jens Axboe
2023-10-18 19:59 ` Kanchan Joshi
2023-10-18 21:06 ` Keith Busch
2023-10-18 21:08 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231018183003.41174-2-joshi.k@samsung.com \
--to=joshi.k@samsung.com \
--cc=axboe@kernel.dk \
--cc=gost.dev@samsung.com \
--cc=hch@lst.de \
--cc=joshiiitr@gmail.com \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox