linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] block: accumulate segment page gaps per bio
@ 2025-08-05 19:56 Keith Busch
  2025-08-05 19:56 ` [PATCH 2/2] nvme: remove virtual boundary for sgl capable devices Keith Busch
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Keith Busch @ 2025-08-05 19:56 UTC (permalink / raw)
  To: linux-block, linux-nvme; +Cc: hch, axboe, Keith Busch

From: Keith Busch <kbusch@kernel.org>

The blk-mq dma iteration has an optimization for requests that align to
the device's iommu merge boundary. This boundary may be larger than the
device's virtual boundary, but the code had been depending on that queue
limit to know ahead of time if the request aligns to the optimization.

Rather than rely on that queue limit, which many devices may not even
have, store the virtual boundary gaps of each segment into the bio as a
mask while checking the segments and merging. We can then quickly check
per io if the request can use the optimization or not.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/blk-merge.c         | 30 +++++++++++++++++++++++++++---
 block/blk-mq-dma.c        |  3 +--
 block/blk-mq.c            |  5 +++++
 include/linux/blk-mq.h    |  6 ++++++
 include/linux/blk_types.h |  2 ++
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 81bdad915699a..d63389c063006 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -278,6 +278,9 @@ static unsigned int bio_split_alignment(struct bio *bio,
 	return lim->logical_block_size;
 }
 
+#define bv_seg_gap(bv, bvprv) \
+	bv.bv_offset | ((bvprv.bv_offset + bvprv.bv_len) & (PAGE_SIZE - 1));
+
 /**
  * bio_split_rw_at - check if and where to split a read/write bio
  * @bio:  [in] bio to be split
@@ -293,9 +296,9 @@ static unsigned int bio_split_alignment(struct bio *bio,
 int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
 		unsigned *segs, unsigned max_bytes)
 {
+	unsigned nsegs = 0, bytes = 0, page_gaps = 0;
 	struct bio_vec bv, bvprv, *bvprvp = NULL;
 	struct bvec_iter iter;
-	unsigned nsegs = 0, bytes = 0;
 
 	bio_for_each_bvec(bv, bio, iter) {
 		if (bv.bv_offset & lim->dma_alignment)
@@ -305,8 +308,11 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */
-		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
-			goto split;
+		if (bvprvp) {
+			if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
+				goto split;
+			page_gaps |= bv_seg_gap(bv, bvprv);
+		}
 
 		if (nsegs < lim->max_segments &&
 		    bytes + bv.bv_len <= max_bytes &&
@@ -324,6 +330,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
 	}
 
 	*segs = nsegs;
+	bio->page_gaps = page_gaps;
 	return 0;
 split:
 	if (bio->bi_opf & REQ_ATOMIC)
@@ -353,6 +360,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
 	 * big IO can be trival, disable iopoll when split needed.
 	 */
 	bio_clear_polled(bio);
+	bio->page_gaps = page_gaps;
 	return bytes >> SECTOR_SHIFT;
 }
 EXPORT_SYMBOL_GPL(bio_split_rw_at);
@@ -696,6 +704,8 @@ static bool blk_atomic_write_mergeable_rqs(struct request *rq,
 static struct request *attempt_merge(struct request_queue *q,
 				     struct request *req, struct request *next)
 {
+	struct bio_vec bv, bvprv;
+
 	if (!rq_mergeable(req) || !rq_mergeable(next))
 		return NULL;
 
@@ -753,6 +763,10 @@ static struct request *attempt_merge(struct request_queue *q,
 	if (next->start_time_ns < req->start_time_ns)
 		req->start_time_ns = next->start_time_ns;
 
+	bv = next->bio->bi_io_vec[0];
+	bvprv = req->biotail->bi_io_vec[req->biotail->bi_vcnt - 1];
+	req->__page_gaps |= blk_rq_page_gaps(next) | bv_seg_gap(bv, bvprv);
+
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 
@@ -861,6 +875,7 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
 		struct bio *bio, unsigned int nr_segs)
 {
 	const blk_opf_t ff = bio_failfast(bio);
+	struct bio_vec bv, bvprv;
 
 	if (!ll_back_merge_fn(req, bio, nr_segs))
 		return BIO_MERGE_FAILED;
@@ -876,6 +891,10 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
 		blk_zone_write_plug_bio_merged(bio);
 
+	bv = bio->bi_io_vec[0];
+	bvprv = req->biotail->bi_io_vec[req->biotail->bi_vcnt - 1];
+	req->__page_gaps |= bio->page_gaps | bv_seg_gap(bv, bvprv);
+
 	req->biotail->bi_next = bio;
 	req->biotail = bio;
 	req->__data_len += bio->bi_iter.bi_size;
@@ -890,6 +909,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
 		struct bio *bio, unsigned int nr_segs)
 {
 	const blk_opf_t ff = bio_failfast(bio);
+	struct bio_vec bv, bvprv;
 
 	/*
 	 * A front merge for writes to sequential zones of a zoned block device
@@ -910,6 +930,10 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
 
 	blk_update_mixed_merge(req, bio, true);
 
+	bv = req->bio->bi_io_vec[0];
+	bvprv = bio->bi_io_vec[bio->bi_vcnt - 1];
+	req->__page_gaps |= bio->page_gaps | bv_seg_gap(bv, bvprv);
+
 	bio->bi_next = req->bio;
 	req->bio = bio;
 
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
index faa36ff6465ee..a03067c4a268f 100644
--- a/block/blk-mq-dma.c
+++ b/block/blk-mq-dma.c
@@ -73,8 +73,7 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter)
 static inline bool blk_can_dma_map_iova(struct request *req,
 		struct device *dma_dev)
 {
-	return !((queue_virt_boundary(req->q) + 1) &
-		dma_get_merge_boundary(dma_dev));
+	return !(blk_rq_page_gaps(req) & dma_get_merge_boundary(dma_dev));
 }
 
 static bool blk_dma_map_bus(struct blk_dma_iter *iter)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b67d6c02ecebd..09134a66c5666 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -376,6 +376,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->queuelist);
 	rq->q = q;
 	rq->__sector = (sector_t) -1;
+	rq->__page_gaps = 0;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->tag = BLK_MQ_NO_TAG;
@@ -659,6 +660,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
 			goto out_queue_exit;
 	}
 	rq->__data_len = 0;
+	rq->__page_gaps = 0;
 	rq->__sector = (sector_t) -1;
 	rq->bio = rq->biotail = NULL;
 	return rq;
@@ -739,6 +741,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
 	blk_mq_rq_time_init(rq, alloc_time_ns);
 	rq->__data_len = 0;
+	rq->__page_gaps = 0;
 	rq->__sector = (sector_t) -1;
 	rq->bio = rq->biotail = NULL;
 	return rq;
@@ -2665,6 +2668,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 	rq->bio = rq->biotail = bio;
 	rq->__sector = bio->bi_iter.bi_sector;
 	rq->__data_len = bio->bi_iter.bi_size;
+	rq->__page_gaps = bio->page_gaps;
 	rq->nr_phys_segments = nr_segs;
 	if (bio_integrity(bio))
 		rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
@@ -3363,6 +3367,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 
 	/* Copy attributes of the original request to the clone request. */
 	rq->__sector = blk_rq_pos(rq_src);
+	rq->__page_gaps = blk_rq_page_gaps(rq_src);
 	rq->__data_len = blk_rq_bytes(rq_src);
 	if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
 		rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2a5a828f19a0b..d8f491867adc0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -115,6 +115,7 @@ struct request {
 
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
+	unsigned int __page_gaps;	/* a mask of all the segment page gaps */
 	sector_t __sector;		/* sector cursor */
 
 	struct bio *bio;
@@ -1080,6 +1081,11 @@ static inline sector_t blk_rq_pos(const struct request *rq)
 	return rq->__sector;
 }
 
+static inline unsigned int blk_rq_page_gaps(const struct request *rq)
+{
+	return rq->__page_gaps;
+}
+
 static inline unsigned int blk_rq_bytes(const struct request *rq)
 {
 	return rq->__data_len;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0a29b20939d17..d0ed28d40fe02 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -264,6 +264,8 @@ struct bio {
 
 	unsigned short		bi_max_vecs;	/* max bvl_vecs we can hold */
 
+	unsigned int		page_gaps;	/* a mask of all the vector gaps */
+
 	atomic_t		__bi_cnt;	/* pin count */
 
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2025-08-20 23:15 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-05 19:56 [PATCH 1/2] block: accumulate segment page gaps per bio Keith Busch
2025-08-05 19:56 ` [PATCH 2/2] nvme: remove virtual boundary for sgl capable devices Keith Busch
2025-08-06 14:55   ` Christoph Hellwig
2025-08-06 15:04     ` Keith Busch
2025-08-05 20:32 ` [PATCH 1/2] block: accumulate segment page gaps per bio Caleb Sander Mateos
2025-08-05 20:52   ` Keith Busch
2025-08-05 22:52 ` Keith Busch
2025-08-06 14:53   ` Christoph Hellwig
2025-08-06 15:17     ` Keith Busch
2025-08-06 14:56 ` Christoph Hellwig
2025-08-06 15:44   ` Keith Busch
2025-08-10 14:31     ` Christoph Hellwig
2025-08-11 15:27       ` Keith Busch
2025-08-11 16:17         ` Christoph Hellwig
2025-08-20 19:22           ` Keith Busch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).