[PATCH 1/2] nvme-pci: meta-transfer via sgl

public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed

From: Kanchan Joshi <joshi.k@samsung.com>
To: hch@lst.de, kbusch@kernel.org, axboe@kernel.dk, sagi@grimberg.me
Cc: linux-nvme@lists.infradead.org, gost.dev@samsung.com,
	joshiiitr@gmail.com, Kanchan Joshi <joshi.k@samsung.com>
Subject: [PATCH 1/2] nvme-pci: meta-transfer via sgl
Date: Thu, 19 Oct 2023 00:00:02 +0530	[thread overview]
Message-ID: <20231018183003.41174-2-joshi.k@samsung.com> (raw)
In-Reply-To: <20231018183003.41174-1-joshi.k@samsung.com>

Introduce the ability to transfer the metadata buffer using sgl.
Also add a nvme request flag 'NVME_REQ_FORCE_SGL' that mandates both
data and meta transfer via sgl.

This is a prepatory patch to enable unprivileged passthrough via SGL.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
 drivers/nvme/host/nvme.h |  6 ++++
 drivers/nvme/host/pci.c  | 63 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f35647c470af..58f8efe1ace9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -184,6 +184,7 @@ enum {
 	NVME_REQ_CANCELLED		= (1 << 0),
 	NVME_REQ_USERCMD		= (1 << 1),
 	NVME_MPATH_IO_STATS		= (1 << 2),
+	NVME_REQ_FORCE_SGL		= (1 << 3),
 };
 
 static inline struct nvme_request *nvme_req(struct request *req)
@@ -1043,6 +1044,11 @@ static inline void nvme_start_request(struct request *rq)
 	blk_mq_start_request(rq);
 }
 
+static inline bool nvme_ctrl_meta_sgl_supported(struct nvme_ctrl *ctrl)
+{
+	return ctrl->sgls & (1 << 19);
+}
+
 static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
 {
 	return ctrl->sgls & ((1 << 0) | (1 << 1));
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3f0c9ee09a12..1907b1c9919a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -123,6 +123,7 @@ struct nvme_dev {
 	struct device *dev;
 	struct dma_pool *prp_page_pool;
 	struct dma_pool *prp_small_pool;
+	struct dma_pool *meta_sgl_pool;
 	unsigned online_queues;
 	unsigned max_qid;
 	unsigned io_queues[HCTX_MAX_TYPES];
@@ -236,6 +237,8 @@ struct nvme_iod {
 	unsigned int dma_len;	/* length of single DMA segment mapping */
 	dma_addr_t first_dma;
 	dma_addr_t meta_dma;
+	dma_addr_t meta_dma_sg;
+	struct nvme_sgl_desc	*meta_sgl;
 	struct sg_table sgt;
 	union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
 };
@@ -772,18 +775,23 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	blk_status_t ret = BLK_STS_RESOURCE;
 	int rc;
+	bool force_sgl = nvme_req(req)->flags & NVME_REQ_FORCE_SGL;
+
+	if (force_sgl && !nvme_ctrl_sgl_supported(&dev->ctrl))
+		return BLK_STS_IOERR;
 
 	if (blk_rq_nr_phys_segments(req) == 1) {
 		struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
 		struct bio_vec bv = req_bvec(req);
 
 		if (!is_pci_p2pdma_page(bv.bv_page)) {
-			if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
+			if (!force_sgl &&
+			    bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
 				return nvme_setup_prp_simple(dev, req,
 							     &cmnd->rw, &bv);
 
-			if (nvmeq->qid && sgl_threshold &&
-			    nvme_ctrl_sgl_supported(&dev->ctrl))
+			if (nvmeq->qid && nvme_ctrl_sgl_supported(&dev->ctrl)
+			    && (sgl_threshold || force_sgl))
 				return nvme_setup_sgl_simple(dev, req,
 							     &cmnd->rw, &bv);
 		}
@@ -806,7 +814,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		goto out_free_sg;
 	}
 
-	if (nvme_pci_use_sgls(dev, req, iod->sgt.nents))
+	if (force_sgl || nvme_pci_use_sgls(dev, req, iod->sgt.nents))
 		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
 	else
 		ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
@@ -825,13 +833,44 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	bool force_sgl = nvme_req(req)->flags & NVME_REQ_FORCE_SGL;
+	blk_status_t ret;
 
 	iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
 			rq_dma_dir(req), 0);
 	if (dma_mapping_error(dev->dev, iod->meta_dma))
 		return BLK_STS_IOERR;
-	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+
+	if (!force_sgl) {
+		cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+		return BLK_STS_OK;
+	}
+
+	if (!nvme_ctrl_meta_sgl_supported(&dev->ctrl)) {
+		WARN_ONCE(1, "controller does not support meta sgl.");
+		ret = BLK_STS_IOERR;
+		goto out_unmap;
+	}
+
+	iod->meta_sgl = dma_pool_alloc(dev->meta_sgl_pool, GFP_KERNEL,
+				       &iod->meta_dma_sg);
+	if (!iod->meta_sgl) {
+		ret = BLK_STS_IOERR;
+		goto out_unmap;
+	}
+
+	iod->meta_sgl->addr = cpu_to_le64(iod->meta_dma);
+	iod->meta_sgl->length = cpu_to_le32(rq_integrity_vec(req)->bv_len);
+	iod->meta_sgl->type = NVME_SGL_FMT_DATA_DESC << 4;
+	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma_sg);
+	cmnd->rw.flags = NVME_CMD_SGL_METASEG;
+
 	return BLK_STS_OK;
+
+out_unmap:
+	dma_unmap_page(dev->dev, iod->meta_dma,
+			rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+	return ret;
 }
 
 static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
@@ -968,6 +1007,11 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
 
 		dma_unmap_page(dev->dev, iod->meta_dma,
 			       rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+
+		if (nvme_req(req)->flags & NVME_REQ_FORCE_SGL)
+			dma_pool_free(dev->meta_sgl_pool,
+				      (void *)iod->meta_sgl,
+				      iod->meta_dma_sg);
 	}
 
 	if (blk_rq_nr_phys_segments(req))
@@ -2644,6 +2688,14 @@ static int nvme_setup_prp_pools(struct nvme_dev *dev)
 		dma_pool_destroy(dev->prp_page_pool);
 		return -ENOMEM;
 	}
+	/* for metadata sgl */
+	dev->meta_sgl_pool = dma_pool_create("meta sg 16", dev->dev, 16, 16, 0);
+	if (!dev->meta_sgl_pool) {
+		dma_pool_destroy(dev->prp_page_pool);
+		dma_pool_destroy(dev->prp_small_pool);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -2651,6 +2703,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
 {
 	dma_pool_destroy(dev->prp_page_pool);
 	dma_pool_destroy(dev->prp_small_pool);
+	dma_pool_destroy(dev->meta_sgl_pool);
 }
 
 static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
-- 
2.25.1

next prev parent reply	other threads:[~2023-10-18 18:36 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20231018183620epcas5p26ab74bdd1f2739ef3ec1ee2431329dc4@epcas5p2.samsung.com>
2023-10-18 18:30 ` [PATCH 0/2] Unprivileged sgl-only passthrough Kanchan Joshi
2023-10-18 18:30   ` Kanchan Joshi [this message]
2023-10-19  5:48     ` [PATCH 1/2] nvme-pci: meta-transfer via sgl Christoph Hellwig
2023-10-19  9:54       ` Kanchan Joshi
2023-10-20  4:40         ` Christoph Hellwig
2023-10-18 18:30   ` [PATCH 2/2] nvme: fine-granular CAP_SYS_ADMIN for nvme io/admin commands Kanchan Joshi
2023-10-19  5:49     ` Christoph Hellwig
2023-10-19  9:59       ` Kanchan Joshi
2023-10-18 18:40   ` [PATCH 0/2] Unprivileged sgl-only passthrough Jens Axboe
2023-10-18 19:06     ` Kanchan Joshi
2023-10-18 19:12       ` Jens Axboe
2023-10-18 19:35     ` Keith Busch
2023-10-18 19:37       ` Jens Axboe
2023-10-18 19:44       ` Kanchan Joshi
2023-10-18 19:47         ` Jens Axboe
2023-10-18 19:59       ` Kanchan Joshi
2023-10-18 21:06         ` Keith Busch
2023-10-18 21:08           ` Jens Axboe

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f35647c470a dfblob:58f8efe1ace dfblob:3f0c9ee09a1
dfblob:1907b1c9919 )
 OR (
bs:"[PATCH 1/2] nvme-pci: meta-transfer via sgl" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231018183003.41174-2-joshi.k@samsung.com \
    --to=joshi.k@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=gost.dev@samsung.com \
    --cc=hch@lst.de \
    --cc=joshiiitr@gmail.com \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox