Re: [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Christoph Hellwig <hch@infradead.org>
To: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	Christoph Hellwig <hch@infradead.org>,
	linux-block <linux-block@vger.kernel.org>,
	Damien Le Moal <Damien.LeMoal@wdc.com>,
	Keith Busch <kbusch@kernel.org>,
	"linux-scsi @ vger . kernel . org" <linux-scsi@vger.kernel.org>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	"linux-fsdevel @ vger . kernel . org"
	<linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND
Date: Fri, 10 Apr 2020 00:10:45 -0700	[thread overview]
Message-ID: <20200410071045.GA13404@infradead.org> (raw)
In-Reply-To: <20200409165352.2126-3-johannes.thumshirn@wdc.com>

I've just been auditing the bio code for now and have a few suggestions:

 - we really should be reusing the passthrough bio handling for
   zone append instead of reinventing it
 - I think __bio_iov_iter_get_pages should be split into a separate
   append version.  That matches the bvec split (which we fail to
   handle properly for append), avoids a branch for every page in
   the fast path and generall seems to look cleaner.

Patch on top of your whole branch attached:

diff --git a/block/bio.c b/block/bio.c
index 4029a48f3828..dd84bd5adc24 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -679,54 +679,6 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
-static bool bio_try_merge_zone_append_page(struct bio *bio, struct page *page,
-					   unsigned int len, unsigned int off,
-					   bool *same_page)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-	struct bio_vec *bv;
-	unsigned long mask = queue_segment_boundary(q);
-	phys_addr_t addr1, addr2;
-
-	if (bio->bi_vcnt < 1)
-		return false;
-
-	bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
-	addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
-	addr2 = page_to_phys(page) + off + len - 1;
-
-	if ((addr1 | mask) != (addr2 | mask))
-		return false;
-	if (bv->bv_len + len > queue_max_segment_size(q))
-		return false;
-	return __bio_try_merge_page(bio, page, len, off, same_page);
-}
-
-static int bio_add_append_page(struct bio *bio, struct page *page, unsigned len,
-			       size_t offset)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-	unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
-	bool same_page = false;
-
-	if (WARN_ON_ONCE(!max_append_sectors))
-		return 0;
-
-	if (((bio->bi_iter.bi_size + len) >> 9) > max_append_sectors)
-		return 0;
-
-	if (bio_try_merge_zone_append_page(bio, page, len, offset, &same_page))
-		return len;
-
-	if (bio->bi_vcnt >= queue_max_segments(q))
-		return 0;
-
-	__bio_add_page(bio, page, len, offset);
-
-	return len;
-}
-
 static inline bool page_is_mergeable(const struct bio_vec *bv,
 		struct page *page, unsigned int len, unsigned int off,
 		bool *same_page)
@@ -746,9 +698,13 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
 	return true;
 }
 
-static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
-		struct page *page, unsigned len, unsigned offset,
-		bool *same_page)
+/*
+ * Try to merge a page into a segment, while obeying the hardware segment
+ * size limit.  This is not for normal read/write bios, but for passthrough
+ * or Zone Append operations that we can't split.
+ */
+static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
+		struct page *page, unsigned len, unsigned offset, bool *same_page)
 {
 	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 	unsigned long mask = queue_segment_boundary(q);
@@ -762,39 +718,24 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
 	return __bio_try_merge_page(bio, page, len, offset, same_page);
 }
 
-/**
- *	__bio_add_pc_page	- attempt to add page to passthrough bio
- *	@q: the target queue
- *	@bio: destination bio
- *	@page: page to add
- *	@len: vec entry length
- *	@offset: vec entry offset
- *	@same_page: return if the merge happen inside the same page
- *
- *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block device
- *	limitations. The target block device must allow bio's up to PAGE_SIZE,
- *	so it is always possible to add a single page to an empty bio.
- *
- *	This should only be used by passthrough bios.
+/*
+ * Add a page to a bio while respecting the hardware max_sectors, max_segment
+ * and gap limitations.
  */
-static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+static int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
-		bool *same_page)
+		unsigned int max_sectors, bool *same_page)
 {
 	struct bio_vec *bvec;
 
-	/*
-	 * cloned bio must not modify vec list
-	 */
-	if (unlikely(bio_flagged(bio, BIO_CLONED)))
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
-	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
+	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
 		return 0;
 
 	if (bio->bi_vcnt > 0) {
-		if (bio_try_merge_pc_page(q, bio, page, len, offset, same_page))
+		if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
 			return len;
 
 		/*
@@ -821,11 +762,27 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
 	return len;
 }
 
+/**
+ * bio_add_pc_page	- attempt to add page to passthrough bio
+ * @q: the target queue
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ *
+ * Attempt to add a page to the bio_vec maplist. This can fail for a
+ * number of reasons, such as the bio being full or target block device
+ * limitations. The target block device must allow bio's up to PAGE_SIZE,
+ * so it is always possible to add a single page to an empty bio.
+ *
+ * This should only be used by passthrough bios.
+ */
 int bio_add_pc_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset)
 {
 	bool same_page = false;
-	return __bio_add_pc_page(q, bio, page, len, offset, &same_page);
+	return bio_add_hw_page(q, bio, page, len, offset,
+			queue_max_hw_sectors(q), &same_page);
 }
 EXPORT_SYMBOL(bio_add_pc_page);
 
@@ -993,27 +950,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		struct page *page = pages[i];
 
 		len = min_t(size_t, PAGE_SIZE - offset, left);
-		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-			int ret;
-
-			if (bio_try_merge_zone_append_page(bio, page, len,
-							   offset,
-							   &same_page)) {
-				if (same_page)
-					put_page(page);
-			} else {
-				ret = bio_add_append_page(bio, page, len,
-							  offset);
-				if (ret != len)
-					return -EINVAL;
-			}
-		} else if (__bio_try_merge_page(bio, page, len, offset,
-						&same_page)) {
+		if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
 			if (same_page)
 				put_page(page);
 		} else {
 			if (WARN_ON_ONCE(bio_full(bio, len)))
-                                return -EINVAL;
+				return -EINVAL;
 			__bio_add_page(bio, page, len, offset);
 		}
 		offset = 0;
@@ -1023,6 +965,50 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	return 0;
 }
 
+static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bio->bi_disk->queue;
+	unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
+	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+	struct page **pages = (struct page **)bv;
+	ssize_t size, left;
+	unsigned len, i;
+	size_t offset;
+
+	if (WARN_ON_ONCE(!max_append_sectors))
+		return 0;
+
+	/*
+	 * Move page array up in the allocated memory for the bio vecs as far as
+	 * possible so that we can start filling biovecs from the beginning
+	 * without overwriting the temporary page array.
+	*/
+	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
+	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
+
+	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (unlikely(size <= 0))
+		return size ? size : -EFAULT;
+
+	for (left = size, i = 0; left > 0; left -= len, i++) {
+		struct page *page = pages[i];
+		bool same_page = false;
+
+		len = min_t(size_t, PAGE_SIZE - offset, left);
+		if (bio_add_hw_page(q, bio, page, len, offset,
+				max_append_sectors, &same_page) != len)
+			return -EINVAL;
+		if (same_page)
+			put_page(page);
+		offset = 0;
+	}
+
+	iov_iter_advance(iter, size);
+	return 0;
+}
+
 /**
  * bio_iov_iter_get_pages - add user or kernel pages to a bio
  * @bio: bio to add pages to
@@ -1052,10 +1038,16 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		return -EINVAL;
 
 	do {
-		if (is_bvec)
-			ret = __bio_iov_bvec_add_pages(bio, iter);
-		else
-			ret = __bio_iov_iter_get_pages(bio, iter);
+		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+			if (WARN_ON_ONCE(is_bvec))
+				return -EINVAL;
+			ret = __bio_iov_append_get_pages(bio, iter);
+		} else {
+			if (is_bvec)
+				ret = __bio_iov_bvec_add_pages(bio, iter);
+			else
+				ret = __bio_iov_iter_get_pages(bio, iter);
+		}
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
 
 	if (is_bvec)
@@ -1455,6 +1447,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 			     struct iov_iter *iter,
 			     gfp_t gfp_mask)
 {
+	unsigned int max_sectors = queue_max_hw_sectors(q);
 	int j;
 	struct bio *bio;
 	int ret;
@@ -1492,8 +1485,8 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 				if (n > bytes)
 					n = bytes;
 
-				if (!__bio_add_pc_page(q, bio, page, n, offs,
-						&same_page)) {
+				if (!bio_add_hw_page(q, bio, page, n, offs,
+						max_sectors, &same_page)) {
 					if (same_page)
 						put_page(page);
 					break;

next prev parent reply	other threads:[~2020-04-10  7:10 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-09 16:53 [PATCH v5 00/10] Introduce Zone Append for writing to zoned block devices Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 01/10] block: provide fallbacks for blk_queue_zone_is_seq and blk_queue_zone_no Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND Johannes Thumshirn
2020-04-10  7:10   ` Christoph Hellwig [this message]
2020-04-14  9:43     ` Johannes Thumshirn
2020-04-14 11:28       ` hch
2020-04-09 16:53 ` [PATCH v5 03/10] block: introduce blk_req_zone_write_trylock Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 04/10] block: Modify revalidate zones Johannes Thumshirn
2020-04-10  0:27   ` Damien Le Moal
2020-04-10  6:40   ` Christoph Hellwig
2020-04-10  6:55     ` Damien Le Moal
2020-04-09 16:53 ` [PATCH v5 05/10] scsi: sd_zbc: factor out sanity checks for zoned commands Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 06/10] scsi: export scsi_mq_free_sgtables Johannes Thumshirn
2020-04-10  5:58   ` Christoph Hellwig
2020-04-10  7:46     ` Johannes Thumshirn
2020-04-10 14:22   ` Bart Van Assche
2020-04-09 16:53 ` [PATCH v5 07/10] scsi: sd_zbc: emulate ZONE_APPEND commands Johannes Thumshirn
2020-04-10  0:30   ` Damien Le Moal
2020-04-10  6:18   ` Christoph Hellwig
2020-04-10  6:38     ` Christoph Hellwig
2020-04-10  8:01       ` Johannes Thumshirn
2020-04-14 11:09       ` Johannes Thumshirn
2020-04-14 11:30         ` hch
2020-04-10  7:54     ` Johannes Thumshirn
2020-04-10  7:23   ` Christoph Hellwig
2020-04-14 10:18     ` Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 08/10] null_blk: Support REQ_OP_ZONE_APPEND Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 09/10] block: export bio_release_pages and bio_iov_iter_get_pages Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 10/10] zonefs: use REQ_OP_ZONE_APPEND for sync DIO Johannes Thumshirn

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4029a48f382 dfblob:dd84bd5adc2 )
 OR (
bs:"Re: [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200410071045.GA13404@infradead.org \
    --to=hch@infradead.org \
    --cc=Damien.LeMoal@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=johannes.thumshirn@wdc.com \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.