All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@fb.com>
To: <linux-nvme@lists.infradead.org>, <linux-block@vger.kernel.org>,
	<io-uring@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>
Cc: <axboe@kernel.dk>, <hch@lst.de>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Kernel Team <Kernel-team@fb.com>, Keith Busch <kbusch@kernel.org>
Subject: [PATCHv3 4/7] block: add dma tag bio type
Date: Fri, 5 Aug 2022 09:24:41 -0700	[thread overview]
Message-ID: <20220805162444.3985535-5-kbusch@fb.com> (raw)
In-Reply-To: <20220805162444.3985535-1-kbusch@fb.com>

From: Keith Busch <kbusch@kernel.org>

Premapped buffers don't require a generic bio_vec since these have
already been dma mapped to the driver specific data structure. Repurpose
the bi_io_vec with the driver specific tag as they are mutually
exclusive, and provide all the setup and split helpers to support dma
tags.

In order to use this, a driver must implement the .dma_map() blk-mq op.
If the driver provides this callback, then it must be aware that any
given bio may be using a dma_tag instead of a bio_vec.

Note, this isn't working with blk_integrity.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/bio.c               | 24 +++++++++++++++++++++++-
 block/blk-merge.c         | 19 +++++++++++++++++++
 block/fops.c              |  4 +++-
 include/linux/bio.h       | 22 +++++++++++++---------
 include/linux/blk-mq.h    | 11 +++++++++++
 include/linux/blk_types.h |  6 +++++-
 6 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index d6eb90d9b20b..c1e97dff5e40 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -229,7 +229,8 @@ static void bio_free(struct bio *bio)
 	WARN_ON_ONCE(!bs);
 
 	bio_uninit(bio);
-	bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+	if (!bio_flagged(bio, BIO_DMA_TAGGED))
+		bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
 	mempool_free(p - bs->front_pad, &bs->bio_pool);
 }
 
@@ -762,6 +763,8 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 	bio_set_flag(bio, BIO_CLONED);
 	if (bio_flagged(bio_src, BIO_THROTTLED))
 		bio_set_flag(bio, BIO_THROTTLED);
+	if (bio_flagged(bio_src, BIO_DMA_TAGGED))
+		bio_set_flag(bio, BIO_DMA_TAGGED);
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_iter = bio_src->bi_iter;
 
@@ -1151,6 +1154,19 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
 	bio_set_flag(bio, BIO_CLONED);
 }
 
+void bio_iov_dma_tag_set(struct bio *bio, struct iov_iter *iter)
+{
+	size_t size = iov_iter_count(iter);
+
+	bio->bi_vcnt = iter->nr_segs;
+	bio->bi_dma_tag = iter->dma_tag;
+	bio->bi_iter.bi_bvec_done = iter->iov_offset;
+	bio->bi_iter.bi_size = size;
+	bio->bi_opf |= REQ_NOMERGE;
+	bio_set_flag(bio, BIO_NO_PAGE_REF);
+	bio_set_flag(bio, BIO_DMA_TAGGED);
+}
+
 static int bio_iov_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int offset)
 {
@@ -1287,6 +1303,12 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		return 0;
 	}
 
+	if (iov_iter_is_dma_tag(iter)) {
+		bio_iov_dma_tag_set(bio, iter);
+		iov_iter_advance(iter, bio->bi_iter.bi_size);
+		return 0;
+	}
+
 	do {
 		ret = __bio_iov_iter_get_pages(bio, iter);
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
diff --git a/block/blk-merge.c b/block/blk-merge.c
index ff04e9290715..d024885ad4c4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -274,6 +274,25 @@ static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim,
 	struct bvec_iter iter;
 	unsigned nsegs = 0, bytes = 0;
 
+	if (bio_flagged(bio, BIO_DMA_TAGGED)) {
+		int offset = offset_in_page(bio->bi_iter.bi_bvec_done);
+
+		nsegs = ALIGN(bio->bi_iter.bi_size + offset, PAGE_SIZE) >>
+								PAGE_SHIFT;
+		if (bio->bi_iter.bi_size > max_bytes) {
+			bytes = max_bytes;
+			nsegs = (bytes + offset) >> PAGE_SHIFT;
+		} else if (nsegs > lim->max_segments) {
+			nsegs = lim->max_segments;
+			bytes = PAGE_SIZE * nsegs - offset;
+		} else {
+			*segs = nsegs;
+			return NULL;
+		}
+
+		goto split;
+	}
+
 	bio_for_each_bvec(bv, bio, iter) {
 		/*
 		 * If the queue doesn't support SG gaps and adding this
diff --git a/block/fops.c b/block/fops.c
index db2d1e848f4b..1b3649c7eb17 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -325,7 +325,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 		 * bio_iov_iter_get_pages() and set the bvec directly.
 		 */
 		bio_iov_bvec_set(bio, iter);
-	} else {
+	} else if (iov_iter_is_dma_tag(iter)) {
+		bio_iov_dma_tag_set(bio, iter);
+	}else {
 		ret = bio_iov_iter_get_pages(bio, iter);
 		if (unlikely(ret)) {
 			bio_put(bio);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ca22b06700a9..b5277ec189e0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -61,11 +61,17 @@ static inline bool bio_has_data(struct bio *bio)
 	return false;
 }
 
+static inline bool bio_flagged(const struct bio *bio, unsigned int bit)
+{
+	return (bio->bi_flags & (1U << bit)) != 0;
+}
+
 static inline bool bio_no_advance_iter(const struct bio *bio)
 {
 	return bio_op(bio) == REQ_OP_DISCARD ||
 	       bio_op(bio) == REQ_OP_SECURE_ERASE ||
-	       bio_op(bio) == REQ_OP_WRITE_ZEROES;
+	       bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+	       bio_flagged(bio, BIO_DMA_TAGGED);
 }
 
 static inline void *bio_data(struct bio *bio)
@@ -98,9 +104,11 @@ static inline void bio_advance_iter(const struct bio *bio,
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio_no_advance_iter(bio))
+	if (bio_no_advance_iter(bio)) {
 		iter->bi_size -= bytes;
-	else
+		if (bio_flagged(bio, BIO_DMA_TAGGED))
+			iter->bi_bvec_done += bytes;
+	} else
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
 }
@@ -225,11 +233,6 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
 	atomic_set(&bio->__bi_cnt, count);
 }
 
-static inline bool bio_flagged(struct bio *bio, unsigned int bit)
-{
-	return (bio->bi_flags & (1U << bit)) != 0;
-}
-
 static inline void bio_set_flag(struct bio *bio, unsigned int bit)
 {
 	bio->bi_flags |= (1U << bit);
@@ -447,7 +450,7 @@ static inline void bio_wouldblock_error(struct bio *bio)
  */
 static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
 {
-	if (iov_iter_is_bvec(iter))
+	if (iov_iter_is_bvec(iter) || iov_iter_is_dma_tag(iter))
 		return 0;
 	return iov_iter_npages(iter, max_segs);
 }
@@ -471,6 +474,7 @@ void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
+void bio_iov_dma_tag_set(struct bio *bio, struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e10aabb36c2c..f5e0aa61bf85 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1141,6 +1141,17 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 }
 void blk_dump_rq_flags(struct request *, char *);
 
+static inline void *blk_rq_dma_tag(struct request *rq)
+{
+	return rq->bio && bio_flagged(rq->bio, BIO_DMA_TAGGED) ?
+		rq->bio->bi_dma_tag : 0;
+}
+
+static inline size_t blk_rq_dma_offset(struct request *rq)
+{
+	return rq->bio->bi_iter.bi_bvec_done;
+}
+
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1ef99790f6ed..ea6db439acbe 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -299,7 +299,10 @@ struct bio {
 
 	atomic_t		__bi_cnt;	/* pin count */
 
-	struct bio_vec		*bi_io_vec;	/* the actual vec list */
+	union {
+		struct bio_vec		*bi_io_vec;	/* the actual vec list */
+		void			*bi_dma_tag;    /* driver specific tag */
+	};
 
 	struct bio_set		*bi_pool;
 
@@ -334,6 +337,7 @@ enum {
 	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
+	BIO_DMA_TAGGED,		/* Using premmaped dma buffers */
 	BIO_FLAG_LAST
 };
 
-- 
2.30.2


  parent reply	other threads:[~2022-08-05 16:26 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-05 16:24 [PATCHv3 0/7] dma mapping optimisations Keith Busch
2022-08-05 16:24 ` [PATCHv3 1/7] blk-mq: add ops to dma map bvec Keith Busch
2022-08-05 16:24 ` [PATCHv3 2/7] file: " Keith Busch
2022-08-08  0:21   ` Dave Chinner
2022-08-08  1:13     ` Matthew Wilcox
2022-08-08  2:15       ` Dave Chinner
2022-08-08  2:49         ` Matthew Wilcox
2022-08-08  7:31           ` Dave Chinner
2022-08-08 15:28             ` Keith Busch
2022-08-08 10:14         ` Pavel Begunkov
2022-08-05 16:24 ` [PATCHv3 3/7] iov_iter: introduce type for preregistered dma tags Keith Busch
2022-08-05 16:24 ` Keith Busch [this message]
2022-08-05 16:24 ` [PATCHv3 5/7] io_uring: introduce file slot release helper Keith Busch
2022-08-05 16:24 ` [PATCHv3 6/7] io_uring: add support for dma pre-mapping Keith Busch
2022-08-05 16:24 ` [PATCHv3 7/7] nvme-pci: implement dma_map support Keith Busch
2022-08-09  6:46 ` [PATCHv3 0/7] dma mapping optimisations Christoph Hellwig
2022-08-09 14:18   ` Keith Busch
2022-08-09 18:39     ` Christoph Hellwig
2022-08-09 16:46   ` Keith Busch
2022-08-09 18:41     ` Christoph Hellwig
2022-08-10 18:05       ` Keith Busch
2022-08-11  7:22         ` Christoph Hellwig
2022-08-31 21:19           ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220805162444.3985535-5-kbusch@fb.com \
    --to=kbusch@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=io-uring@vger.kernel.org \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.