* [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers
@ 2025-06-25 20:44 Keith Busch
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Keith Busch @ 2025-06-25 20:44 UTC (permalink / raw)
To: hch, linux-nvme, linux-block; +Cc: axboe, leon, joshi.k, sagi, Keith Busch
From: Keith Busch <kbusch@kernel.org>
This is much like the scatter-less DMA helpers for request data, but for
integrity metadata instead. This one only subscribes to the direct
mapping as the virt boundary queue limit used to check for iova
coalescing possibilities doesn't apply to metadata.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
block/blk-integrity.c | 94 +++++++++++++++++++++++++++++++++++
block/blk-mq-dma.c | 9 +---
block/blk.h | 10 ++++
include/linux/blk-integrity.h | 6 +++
4 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index e4e2567061f9d..e79df07d1151a 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -112,6 +112,100 @@ int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
+static void bio_integrity_advance_iter_single(struct bio *bio,
+ struct bvec_iter *iter,
+ struct bio_vec *bvec,
+ unsigned int bytes)
+{
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+
+ iter->bi_sector += bytes / bi->tuple_size;
+ bvec_iter_advance(bvec, iter, bytes);
+}
+
+static bool blk_rq_integrity_map_iter_next(struct request *req,
+ struct req_iterator *iter, struct phys_vec *vec)
+{
+ struct bio_integrity_payload *bip = bio_integrity(iter->bio);
+ unsigned int max_size;
+ struct bio_vec bv;
+
+ if (!iter->iter.bi_size)
+ return false;
+
+ bv = mp_bvec_iter_bvec(bip->bip_vec, iter->iter);
+ vec->paddr = bvec_phys(&bv);
+ max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
+ bv.bv_len = min(bv.bv_len, max_size);
+
+ bio_integrity_advance_iter_single(iter->bio, &iter->iter, &bv, bv.bv_len);
+ while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
+ struct bio_vec next;
+
+ if (!iter->iter.bi_size) {
+ if (!iter->bio->bi_next)
+ break;
+ iter->bio = iter->bio->bi_next;
+ iter->iter = iter->bio->bi_iter;
+ }
+
+ next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ if (bv.bv_len + next.bv_len > max_size ||
+ !biovec_phys_mergeable(req->q, &bv, &next))
+ break;
+
+ bv.bv_len += next.bv_len;
+ bio_integrity_advance_iter_single(iter->bio, &iter->iter, &bv,
+ next.bv_len);
+ }
+
+ vec->len = bv.bv_len;
+ return true;
+}
+
+bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter)
+{
+ struct bio_integrity_payload *bip = bio_integrity(req->bio);
+ struct phys_vec vec;
+
+ iter->iter.bio = req->bio;
+ iter->iter.iter = bip->bip_iter;
+ memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
+ iter->status = BLK_STS_OK;
+
+ if (!blk_rq_integrity_map_iter_next(req, &iter->iter, &vec))
+ return false;
+
+ switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
+ phys_to_page(vec.paddr))) {
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ return blk_dma_map_bus(iter, &vec);
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ case PCI_P2PDMA_MAP_NONE:
+ break;
+ default:
+ iter->status = BLK_STS_INVAL;
+ return false;
+ }
+
+ return blk_dma_map_direct(req, dma_dev, iter, &vec);
+}
+EXPORT_SYMBOL_GPL(blk_rq_integrity_map_iter_start);
+
+bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter)
+{
+ struct phys_vec vec;
+
+ if (!blk_rq_integrity_map_iter_next(req, &iter->iter, &vec))
+ return false;
+ if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
+ return blk_dma_map_bus(iter, &vec);
+ return blk_dma_map_direct(req, dma_dev, iter, &vec);
+}
+EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next);
+
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes)
{
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
index ad283017caef2..54c25e5e60d78 100644
--- a/block/blk-mq-dma.c
+++ b/block/blk-mq-dma.c
@@ -5,11 +5,6 @@
#include <linux/blk-mq-dma.h>
#include "blk.h"
-struct phys_vec {
- phys_addr_t paddr;
- u32 len;
-};
-
static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
struct phys_vec *vec)
{
@@ -77,14 +72,14 @@ static inline bool blk_can_dma_map_iova(struct request *req,
dma_get_merge_boundary(dma_dev));
}
-static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
+bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
{
iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr);
iter->len = vec->len;
return true;
}
-static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
+bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
struct blk_dma_iter *iter, struct phys_vec *vec)
{
iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr),
diff --git a/block/blk.h b/block/blk.h
index 1141b343d0b5c..755975ddc3046 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -4,6 +4,7 @@
#include <linux/bio-integrity.h>
#include <linux/blk-crypto.h>
+#include <linux/blk-mq-dma.h>
#include <linux/lockdep.h>
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
#include <linux/sched/sysctl.h>
@@ -727,6 +728,15 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops, struct file *bdev_file);
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
+struct phys_vec {
+ phys_addr_t paddr;
+ u32 len;
+};
+
+bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec);
+bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
+ struct blk_dma_iter *iter, struct phys_vec *vec);
+
void blk_integrity_generate(struct bio *bio);
void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter);
void blk_integrity_prepare(struct request *rq);
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index c7eae0bfb013f..8e2aeb5c13864 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -4,6 +4,7 @@
#include <linux/blk-mq.h>
#include <linux/bio-integrity.h>
+#include <linux/blk-mq-dma.h>
struct request;
@@ -30,6 +31,11 @@ int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes);
+bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter);
+bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter);
+
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
{
--
2.47.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] nvme: convert metadata mapping to dma iter
2025-06-25 20:44 [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
@ 2025-06-25 20:44 ` Keith Busch
2025-06-25 21:18 ` Keith Busch
` (2 more replies)
2025-06-25 21:17 ` [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
2025-06-26 13:55 ` kernel test robot
2 siblings, 3 replies; 8+ messages in thread
From: Keith Busch @ 2025-06-25 20:44 UTC (permalink / raw)
To: hch, linux-nvme, linux-block; +Cc: axboe, leon, joshi.k, sagi, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Aligns data and metadata to the similar dma mapping scheme and removes
one more user of the scatter-gather dma mapping.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/nvme/host/pci.c | 69 +++++++++++++----------------------------
1 file changed, 21 insertions(+), 48 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 38be1505dbd96..f82ef19fa0a3a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -270,13 +270,13 @@ struct nvme_iod {
struct nvme_command cmd;
u8 flags;
u8 nr_descriptors;
+ u8 nr_meta_descriptors;
unsigned int total_len;
struct dma_iova_state dma_state;
void *descriptors[NVME_MAX_NR_DESCRIPTORS];
dma_addr_t meta_dma;
- struct sg_table meta_sgt;
struct nvme_sgl_desc *meta_descriptor;
};
@@ -1010,70 +1010,39 @@ static blk_status_t nvme_map_data(struct request *req)
return nvme_pci_setup_data_prp(req, &iter);
}
-static void nvme_pci_sgl_set_data_sg(struct nvme_sgl_desc *sge,
- struct scatterlist *sg)
-{
- sge->addr = cpu_to_le64(sg_dma_address(sg));
- sge->length = cpu_to_le32(sg_dma_len(sg));
- sge->type = NVME_SGL_FMT_DATA_DESC << 4;
-}
-
static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
{
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
- struct nvme_dev *dev = nvmeq->dev;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dev *dev = nvmeq->dev;
struct nvme_sgl_desc *sg_list;
- struct scatterlist *sgl, *sg;
- unsigned int entries;
+ struct blk_dma_iter iter;
dma_addr_t sgl_dma;
- int rc, i;
-
- iod->meta_sgt.sgl = mempool_alloc(dev->iod_meta_mempool, GFP_ATOMIC);
- if (!iod->meta_sgt.sgl)
- return BLK_STS_RESOURCE;
-
- sg_init_table(iod->meta_sgt.sgl, req->nr_integrity_segments);
- iod->meta_sgt.orig_nents = blk_rq_map_integrity_sg(req,
- iod->meta_sgt.sgl);
- if (!iod->meta_sgt.orig_nents)
- goto out_free_sg;
+ int i = 0;
- rc = dma_map_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req),
- DMA_ATTR_NO_WARN);
- if (rc)
- goto out_free_sg;
+ if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev, &iter))
+ return iter.status;
sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC,
&sgl_dma);
if (!sg_list)
- goto out_unmap_sg;
+ return BLK_STS_RESOURCE;
- entries = iod->meta_sgt.nents;
iod->meta_descriptor = sg_list;
iod->meta_dma = sgl_dma;
iod->cmd.common.flags = NVME_CMD_SGL_METASEG;
iod->cmd.common.metadata = cpu_to_le64(sgl_dma);
- sgl = iod->meta_sgt.sgl;
- if (entries == 1) {
- nvme_pci_sgl_set_data_sg(sg_list, sgl);
- return BLK_STS_OK;
- }
-
sgl_dma += sizeof(*sg_list);
- nvme_pci_sgl_set_seg(sg_list, sgl_dma, entries);
- for_each_sg(sgl, sg, entries, i)
- nvme_pci_sgl_set_data_sg(&sg_list[i + 1], sg);
- return BLK_STS_OK;
+ do {
+ nvme_pci_sgl_set_data(&sg_list[++i], &iter);
+ } while (blk_rq_integrity_dma_map_iter_next(req, dev->dev, &iter));
-out_unmap_sg:
- dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
-out_free_sg:
- mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
- return BLK_STS_RESOURCE;
+ nvme_pci_sgl_set_seg(sg_list, sgl_dma, i);
+ iod->nr_meta_descriptors = i;
+ return BLK_STS_OK;
}
static blk_status_t nvme_pci_setup_meta_mptr(struct request *req)
@@ -1086,6 +1055,7 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct request *req)
if (dma_mapping_error(nvmeq->dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
iod->cmd.common.metadata = cpu_to_le64(iod->meta_dma);
+ iod->nr_meta_descriptors = 0;
return BLK_STS_OK;
}
@@ -1107,7 +1077,6 @@ static blk_status_t nvme_prep_rq(struct request *req)
iod->flags = 0;
iod->nr_descriptors = 0;
iod->total_len = 0;
- iod->meta_sgt.nents = 0;
ret = nvme_setup_cmd(req->q->queuedata, req);
if (ret)
@@ -1222,19 +1191,23 @@ static __always_inline void nvme_unmap_metadata(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ struct nvme_sgl_desc *sg_list = iod->meta_descriptor;
+ enum dma_data_direction dir = rq_dma_dir(req);
struct nvme_dev *dev = nvmeq->dev;
+ int i;
- if (!iod->meta_sgt.nents) {
+ if (!iod->nr_meta_descriptors) {
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req).bv_len,
rq_dma_dir(req));
return;
}
+ for (i = 1; i <= iod->nr_meta_descriptors; i++)
+ dma_unmap_page(dev->dev, le64_to_cpu(sg_list[i].addr),
+ le32_to_cpu(sg_list[i].length), dir);
dma_pool_free(nvmeq->descriptor_pools.small, iod->meta_descriptor,
iod->meta_dma);
- dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
- mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
}
static __always_inline void nvme_pci_unmap_rq(struct request *req)
--
2.47.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers
2025-06-25 20:44 [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
@ 2025-06-25 21:17 ` Keith Busch
2025-06-26 5:02 ` Christoph Hellwig
2025-06-26 13:55 ` kernel test robot
2 siblings, 1 reply; 8+ messages in thread
From: Keith Busch @ 2025-06-25 21:17 UTC (permalink / raw)
To: Keith Busch; +Cc: hch, linux-nvme, linux-block, axboe, leon, joshi.k, sagi
On Wed, Jun 25, 2025 at 01:44:44PM -0700, Keith Busch wrote:
> + while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
> + struct bio_vec next;
> +
> + if (!iter->iter.bi_size) {
> + if (!iter->bio->bi_next)
> + break;
> + iter->bio = iter->bio->bi_next;
> + iter->iter = iter->bio->bi_iter;
Ugh, this should have been:
bip = bio_integrity(iter->bio);
iter->iter = bip->bip_iter;
> + }
> +
> + next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
And then this should be:
next = mp_bvec_iter_bvec(bip->bip_vec, iter->iter);
Obviously I didn't test merging bio's into plugged requests...
> +bool blk_rq_integrity_dma_map_iter_start(struct request *req,
> + struct device *dma_dev, struct blk_dma_iter *iter)
> +{
...
> +}
> +EXPORT_SYMBOL_GPL(blk_rq_integrity_map_iter_start);
And while I'm pointing out my mistakes, this last second name change was
pretty stupid... The export symbol needs the "_dma_" part.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] nvme: convert metadata mapping to dma iter
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
@ 2025-06-25 21:18 ` Keith Busch
2025-06-26 5:11 ` Christoph Hellwig
2025-06-26 13:55 ` kernel test robot
2 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2025-06-25 21:18 UTC (permalink / raw)
To: Keith Busch; +Cc: hch, linux-nvme, linux-block, axboe, leon, joshi.k, sagi
On Wed, Jun 25, 2025 at 01:44:45PM -0700, Keith Busch wrote:
> Aligns data and metadata to the similar dma mapping scheme and removes
> one more user of the scatter-gather dma mapping.
One thing missing from this patch, it should have removed all the iod
mempool's since we're not using them anymore after this.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers
2025-06-25 21:17 ` [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
@ 2025-06-26 5:02 ` Christoph Hellwig
0 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2025-06-26 5:02 UTC (permalink / raw)
To: Keith Busch
Cc: Keith Busch, hch, linux-nvme, linux-block, axboe, leon, joshi.k,
sagi
On Wed, Jun 25, 2025 at 03:17:32PM -0600, Keith Busch wrote:
> > + next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
>
> And then this should be:
>
> next = mp_bvec_iter_bvec(bip->bip_vec, iter->iter);
>
> Obviously I didn't test merging bio's into plugged requests...
So, testing was the main reason I have skipped this in the initial
conversion, as I could not come up with good coverage of multi-segment
integrity metadata. Anuj promised we'd get good test coverage once the
PI query ioctl lands, so the plan was to do it just after that.
I had written some code before I realized that, but it never really got
finished, but one thing I did was to trying figure out how we implement
the iterator without too much code duplication. The below is what I came
up with - it adds two branches to the fast path, but otherwise shares the
entire dma iterator and I think also makes it very clear which bio_vec
table to use. Maybe this is useful for the next version? My next step
would have been to convert the scatterlist mapping to use the new common
helper and unify the code with the data mapping, and then implement the
new API (hopefully also sharing most of the code from the data mapping).
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
index 82bae475dfa4..24667d199525 100644
--- a/block/blk-mq-dma.c
+++ b/block/blk-mq-dma.c
@@ -10,28 +10,34 @@ struct phys_vec {
};
static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
- struct phys_vec *vec)
+ struct phys_vec *vec, bool integrity)
{
+ struct bio_vec *base;
unsigned int max_size;
struct bio_vec bv;
- if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
- if (!iter->bio)
- return false;
- vec->paddr = bvec_phys(&req->special_vec);
- vec->len = req->special_vec.bv_len;
- iter->bio = NULL;
- return true;
+ if (integrity) {
+ base = iter->bio->bi_integrity->bip_vec;
+ } else {
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ if (!iter->bio)
+ return false;
+ vec->paddr = bvec_phys(&req->special_vec);
+ vec->len = req->special_vec.bv_len;
+ iter->bio = NULL;
+ return true;
+ }
+ base = iter->bio->bi_io_vec;
}
if (!iter->iter.bi_size)
return false;
- bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ bv = mp_bvec_iter_bvec(base, iter->iter);
vec->paddr = bvec_phys(&bv);
max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
bv.bv_len = min(bv.bv_len, max_size);
- bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
+ bvec_iter_advance_single(base, &iter->iter, bv.bv_len);
/*
* If we are entirely done with this bi_io_vec entry, check if the next
@@ -46,15 +52,19 @@ static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
break;
iter->bio = iter->bio->bi_next;
iter->iter = iter->bio->bi_iter;
+ if (integrity)
+ base = iter->bio->bi_integrity->bip_vec;
+ else
+ base = iter->bio->bi_io_vec;
}
- next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ next = mp_bvec_iter_bvec(base, iter->iter);
if (bv.bv_len + next.bv_len > max_size ||
!biovec_phys_mergeable(req->q, &bv, &next))
break;
bv.bv_len += next.bv_len;
- bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
+ bvec_iter_advance_single(base, &iter->iter, next.bv_len);
}
vec->len = bv.bv_len;
@@ -95,7 +105,7 @@ int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
if (iter.bio)
iter.iter = iter.bio->bi_iter;
- while (blk_map_iter_next(rq, &iter, &vec)) {
+ while (blk_map_iter_next(rq, &iter, &vec, false)) {
*last_sg = blk_next_sg(last_sg, sglist);
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
offset_in_page(vec.paddr));
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] nvme: convert metadata mapping to dma iter
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
2025-06-25 21:18 ` Keith Busch
@ 2025-06-26 5:11 ` Christoph Hellwig
2025-06-26 13:55 ` kernel test robot
2 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2025-06-26 5:11 UTC (permalink / raw)
To: Keith Busch
Cc: hch, linux-nvme, linux-block, axboe, leon, joshi.k, sagi,
Keith Busch
On Wed, Jun 25, 2025 at 01:44:45PM -0700, Keith Busch wrote:
> static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
> {
> struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
> struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> + struct nvme_dev *dev = nvmeq->dev;
> struct nvme_sgl_desc *sg_list;
> + struct blk_dma_iter iter;
> dma_addr_t sgl_dma;
> + int i = 0;
>
> + if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev, &iter))
> + return iter.status;
If blk_rq_dma_map_coalesce returns true after this, which it will do for
all mappings when using an IOMMU, we can simply set up a single contiguous
metadata pointers here, which will be a lot more efficient than using an
SGL.
> + for (i = 1; i <= iod->nr_meta_descriptors; i++)
> + dma_unmap_page(dev->dev, le64_to_cpu(sg_list[i].addr),
> + le32_to_cpu(sg_list[i].length), dir);
This should walk based on the size of SGL segment descriptor pointed to
by the metadata pointer, similar to what we do for the data SGLs. The
descriptors if counted as by the data mapping path are always one for
SGL mappings (and could in fact just be replaced with a flag if we
cared enough).
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] nvme: convert metadata mapping to dma iter
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
2025-06-25 21:18 ` Keith Busch
2025-06-26 5:11 ` Christoph Hellwig
@ 2025-06-26 13:55 ` kernel test robot
2 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2025-06-26 13:55 UTC (permalink / raw)
To: Keith Busch, hch, linux-nvme, linux-block
Cc: llvm, oe-kbuild-all, axboe, leon, joshi.k, sagi, Keith Busch
Hi Keith,
kernel test robot noticed the following build errors:
[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on next-20250626]
[cannot apply to linux-nvme/for-next hch-configfs/for-next linus/master v6.16-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Keith-Busch/nvme-convert-metadata-mapping-to-dma-iter/20250626-044623
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20250625204445.1802483-2-kbusch%40meta.com
patch subject: [PATCH 2/2] nvme: convert metadata mapping to dma iter
config: i386-buildonly-randconfig-002-20250626 (https://download.01.org/0day-ci/archive/20250626/202506262128.hNOOLoxM-lkp@intel.com/config)
compiler: clang version 20.1.7 (https://github.com/llvm/llvm-project 6146a88f60492b520a36f8f8f3231e15f3cc6082)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250626/202506262128.hNOOLoxM-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506262128.hNOOLoxM-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/nvme/host/pci.c:1023:7: error: call to undeclared function 'blk_rq_integrity_dma_map_iter_start'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
1023 | if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev, &iter))
| ^
drivers/nvme/host/pci.c:1023:7: note: did you mean 'blk_rq_dma_map_iter_start'?
include/linux/blk-mq-dma.h:21:6: note: 'blk_rq_dma_map_iter_start' declared here
21 | bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
| ^
>> drivers/nvme/host/pci.c:1041:11: error: call to undeclared function 'blk_rq_integrity_dma_map_iter_next'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
1041 | } while (blk_rq_integrity_dma_map_iter_next(req, dev->dev, &iter));
| ^
drivers/nvme/host/pci.c:3336:41: warning: shift count >= width of type [-Wshift-count-overflow]
3336 | dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
| ^~~~~~~~~~~~~~~~
include/linux/dma-mapping.h:73:54: note: expanded from macro 'DMA_BIT_MASK'
73 | #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
| ^ ~~~
1 warning and 2 errors generated.
vim +/blk_rq_integrity_dma_map_iter_start +1023 drivers/nvme/host/pci.c
1012
1013 static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
1014 {
1015 struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
1016 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
1017 struct nvme_dev *dev = nvmeq->dev;
1018 struct nvme_sgl_desc *sg_list;
1019 struct blk_dma_iter iter;
1020 dma_addr_t sgl_dma;
1021 int i = 0;
1022
> 1023 if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev, &iter))
1024 return iter.status;
1025
1026 sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC,
1027 &sgl_dma);
1028 if (!sg_list)
1029 return BLK_STS_RESOURCE;
1030
1031 iod->meta_descriptor = sg_list;
1032 iod->meta_dma = sgl_dma;
1033
1034 iod->cmd.common.flags = NVME_CMD_SGL_METASEG;
1035 iod->cmd.common.metadata = cpu_to_le64(sgl_dma);
1036
1037 sgl_dma += sizeof(*sg_list);
1038
1039 do {
1040 nvme_pci_sgl_set_data(&sg_list[++i], &iter);
> 1041 } while (blk_rq_integrity_dma_map_iter_next(req, dev->dev, &iter));
1042
1043 nvme_pci_sgl_set_seg(sg_list, sgl_dma, i);
1044 iod->nr_meta_descriptors = i;
1045 return BLK_STS_OK;
1046 }
1047
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers
2025-06-25 20:44 [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
2025-06-25 21:17 ` [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
@ 2025-06-26 13:55 ` kernel test robot
2 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2025-06-26 13:55 UTC (permalink / raw)
To: Keith Busch, hch, linux-nvme, linux-block
Cc: llvm, oe-kbuild-all, axboe, leon, joshi.k, sagi, Keith Busch
Hi Keith,
kernel test robot noticed the following build errors:
[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on next-20250626]
[cannot apply to linux-nvme/for-next hch-configfs/for-next linus/master v6.16-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Keith-Busch/nvme-convert-metadata-mapping-to-dma-iter/20250626-044623
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20250625204445.1802483-1-kbusch%40meta.com
patch subject: [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers
config: x86_64-buildonly-randconfig-002-20250626 (https://download.01.org/0day-ci/archive/20250626/202506262136.WXl2reWF-lkp@intel.com/config)
compiler: clang version 20.1.7 (https://github.com/llvm/llvm-project 6146a88f60492b520a36f8f8f3231e15f3cc6082)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250626/202506262136.WXl2reWF-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506262136.WXl2reWF-lkp@intel.com/
All errors (new ones prefixed by >>):
>> block/blk-integrity.c:194:19: error: use of undeclared identifier 'blk_rq_integrity_map_iter_start'
194 | EXPORT_SYMBOL_GPL(blk_rq_integrity_map_iter_start);
| ^
1 error generated.
vim +/blk_rq_integrity_map_iter_start +194 block/blk-integrity.c
165
166 bool blk_rq_integrity_dma_map_iter_start(struct request *req,
167 struct device *dma_dev, struct blk_dma_iter *iter)
168 {
169 struct bio_integrity_payload *bip = bio_integrity(req->bio);
170 struct phys_vec vec;
171
172 iter->iter.bio = req->bio;
173 iter->iter.iter = bip->bip_iter;
174 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
175 iter->status = BLK_STS_OK;
176
177 if (!blk_rq_integrity_map_iter_next(req, &iter->iter, &vec))
178 return false;
179
180 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
181 phys_to_page(vec.paddr))) {
182 case PCI_P2PDMA_MAP_BUS_ADDR:
183 return blk_dma_map_bus(iter, &vec);
184 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
185 case PCI_P2PDMA_MAP_NONE:
186 break;
187 default:
188 iter->status = BLK_STS_INVAL;
189 return false;
190 }
191
192 return blk_dma_map_direct(req, dma_dev, iter, &vec);
193 }
> 194 EXPORT_SYMBOL_GPL(blk_rq_integrity_map_iter_start);
195
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-06-26 13:58 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-25 20:44 [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
2025-06-25 20:44 ` [PATCH 2/2] nvme: convert metadata mapping to dma iter Keith Busch
2025-06-25 21:18 ` Keith Busch
2025-06-26 5:11 ` Christoph Hellwig
2025-06-26 13:55 ` kernel test robot
2025-06-25 21:17 ` [PATCH 1/2] blk-integrity: add scatter-less DMA mapping helpers Keith Busch
2025-06-26 5:02 ` Christoph Hellwig
2025-06-26 13:55 ` kernel test robot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).