Re: [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Christoph Hellwig <hch@infradead.org>
To: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	Christoph Hellwig <hch@infradead.org>,
	linux-block <linux-block@vger.kernel.org>,
	Damien Le Moal <Damien.LeMoal@wdc.com>,
	Keith Busch <kbusch@kernel.org>,
	"linux-scsi @ vger . kernel . org" <linux-scsi@vger.kernel.org>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	"linux-fsdevel @ vger . kernel . org"
	<linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND
Date: Fri, 10 Apr 2020 00:10:45 -0700	[thread overview]
Message-ID: <20200410071045.GA13404@infradead.org> (raw)
In-Reply-To: <20200409165352.2126-3-johannes.thumshirn@wdc.com>

I've just been auditing the bio code for now and have a few suggestions:

 - we really should be reusing the passthrough bio handling for
   zone append instead of reinventing it
 - I think __bio_iov_iter_get_pages should be split into a separate
   append version.  That matches the bvec split (which we fail to
   handle properly for append), avoids a branch for every page in
   the fast path and generall seems to look cleaner.

Patch on top of your whole branch attached:

diff --git a/block/bio.c b/block/bio.c
index 4029a48f3828..dd84bd5adc24 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -679,54 +679,6 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
-static bool bio_try_merge_zone_append_page(struct bio *bio, struct page *page,
-					   unsigned int len, unsigned int off,
-					   bool *same_page)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-	struct bio_vec *bv;
-	unsigned long mask = queue_segment_boundary(q);
-	phys_addr_t addr1, addr2;
-
-	if (bio->bi_vcnt < 1)
-		return false;
-
-	bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
-	addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
-	addr2 = page_to_phys(page) + off + len - 1;
-
-	if ((addr1 | mask) != (addr2 | mask))
-		return false;
-	if (bv->bv_len + len > queue_max_segment_size(q))
-		return false;
-	return __bio_try_merge_page(bio, page, len, off, same_page);
-}
-
-static int bio_add_append_page(struct bio *bio, struct page *page, unsigned len,
-			       size_t offset)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-	unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
-	bool same_page = false;
-
-	if (WARN_ON_ONCE(!max_append_sectors))
-		return 0;
-
-	if (((bio->bi_iter.bi_size + len) >> 9) > max_append_sectors)
-		return 0;
-
-	if (bio_try_merge_zone_append_page(bio, page, len, offset, &same_page))
-		return len;
-
-	if (bio->bi_vcnt >= queue_max_segments(q))
-		return 0;
-
-	__bio_add_page(bio, page, len, offset);
-
-	return len;
-}
-
 static inline bool page_is_mergeable(const struct bio_vec *bv,
 		struct page *page, unsigned int len, unsigned int off,
 		bool *same_page)
@@ -746,9 +698,13 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
 	return true;
 }
 
-static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
-		struct page *page, unsigned len, unsigned offset,
-		bool *same_page)
+/*
+ * Try to merge a page into a segment, while obeying the hardware segment
+ * size limit.  This is not for normal read/write bios, but for passthrough
+ * or Zone Append operations that we can't split.
+ */
+static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
+		struct page *page, unsigned len, unsigned offset, bool *same_page)
 {
 	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 	unsigned long mask = queue_segment_boundary(q);
@@ -762,39 +718,24 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
 	return __bio_try_merge_page(bio, page, len, offset, same_page);
 }
 
-/**
- *	__bio_add_pc_page	- attempt to add page to passthrough bio
- *	@q: the target queue
- *	@bio: destination bio
- *	@page: page to add
- *	@len: vec entry length
- *	@offset: vec entry offset
- *	@same_page: return if the merge happen inside the same page
- *
- *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block device
- *	limitations. The target block device must allow bio's up to PAGE_SIZE,
- *	so it is always possible to add a single page to an empty bio.
- *
- *	This should only be used by passthrough bios.
+/*
+ * Add a page to a bio while respecting the hardware max_sectors, max_segment
+ * and gap limitations.
  */
-static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+static int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
-		bool *same_page)
+		unsigned int max_sectors, bool *same_page)
 {
 	struct bio_vec *bvec;
 
-	/*
-	 * cloned bio must not modify vec list
-	 */
-	if (unlikely(bio_flagged(bio, BIO_CLONED)))
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
-	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
+	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
 		return 0;
 
 	if (bio->bi_vcnt > 0) {
-		if (bio_try_merge_pc_page(q, bio, page, len, offset, same_page))
+		if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
 			return len;
 
 		/*
@@ -821,11 +762,27 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
 	return len;
 }
 
+/**
+ * bio_add_pc_page	- attempt to add page to passthrough bio
+ * @q: the target queue
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ *
+ * Attempt to add a page to the bio_vec maplist. This can fail for a
+ * number of reasons, such as the bio being full or target block device
+ * limitations. The target block device must allow bio's up to PAGE_SIZE,
+ * so it is always possible to add a single page to an empty bio.
+ *
+ * This should only be used by passthrough bios.
+ */
 int bio_add_pc_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset)
 {
 	bool same_page = false;
-	return __bio_add_pc_page(q, bio, page, len, offset, &same_page);
+	return bio_add_hw_page(q, bio, page, len, offset,
+			queue_max_hw_sectors(q), &same_page);
 }
 EXPORT_SYMBOL(bio_add_pc_page);
 
@@ -993,27 +950,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		struct page *page = pages[i];
 
 		len = min_t(size_t, PAGE_SIZE - offset, left);
-		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-			int ret;
-
-			if (bio_try_merge_zone_append_page(bio, page, len,
-							   offset,
-							   &same_page)) {
-				if (same_page)
-					put_page(page);
-			} else {
-				ret = bio_add_append_page(bio, page, len,
-							  offset);
-				if (ret != len)
-					return -EINVAL;
-			}
-		} else if (__bio_try_merge_page(bio, page, len, offset,
-						&same_page)) {
+		if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
 			if (same_page)
 				put_page(page);
 		} else {
 			if (WARN_ON_ONCE(bio_full(bio, len)))
-                                return -EINVAL;
+				return -EINVAL;
 			__bio_add_page(bio, page, len, offset);
 		}
 		offset = 0;
@@ -1023,6 +965,50 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	return 0;
 }
 
+static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bio->bi_disk->queue;
+	unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
+	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+	struct page **pages = (struct page **)bv;
+	ssize_t size, left;
+	unsigned len, i;
+	size_t offset;
+
+	if (WARN_ON_ONCE(!max_append_sectors))
+		return 0;
+
+	/*
+	 * Move page array up in the allocated memory for the bio vecs as far as
+	 * possible so that we can start filling biovecs from the beginning
+	 * without overwriting the temporary page array.
+	*/
+	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
+	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
+
+	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (unlikely(size <= 0))
+		return size ? size : -EFAULT;
+
+	for (left = size, i = 0; left > 0; left -= len, i++) {
+		struct page *page = pages[i];
+		bool same_page = false;
+
+		len = min_t(size_t, PAGE_SIZE - offset, left);
+		if (bio_add_hw_page(q, bio, page, len, offset,
+				max_append_sectors, &same_page) != len)
+			return -EINVAL;
+		if (same_page)
+			put_page(page);
+		offset = 0;
+	}
+
+	iov_iter_advance(iter, size);
+	return 0;
+}
+
 /**
  * bio_iov_iter_get_pages - add user or kernel pages to a bio
  * @bio: bio to add pages to
@@ -1052,10 +1038,16 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		return -EINVAL;
 
 	do {
-		if (is_bvec)
-			ret = __bio_iov_bvec_add_pages(bio, iter);
-		else
-			ret = __bio_iov_iter_get_pages(bio, iter);
+		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+			if (WARN_ON_ONCE(is_bvec))
+				return -EINVAL;
+			ret = __bio_iov_append_get_pages(bio, iter);
+		} else {
+			if (is_bvec)
+				ret = __bio_iov_bvec_add_pages(bio, iter);
+			else
+				ret = __bio_iov_iter_get_pages(bio, iter);
+		}
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
 
 	if (is_bvec)
@@ -1455,6 +1447,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 			     struct iov_iter *iter,
 			     gfp_t gfp_mask)
 {
+	unsigned int max_sectors = queue_max_hw_sectors(q);
 	int j;
 	struct bio *bio;
 	int ret;
@@ -1492,8 +1485,8 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 				if (n > bytes)
 					n = bytes;
 
-				if (!__bio_add_pc_page(q, bio, page, n, offs,
-						&same_page)) {
+				if (!bio_add_hw_page(q, bio, page, n, offs,
+						max_sectors, &same_page)) {
 					if (same_page)
 						put_page(page);
 					break;

next prev parent reply	other threads:[~2020-04-10  7:10 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-09 16:53 [PATCH v5 00/10] Introduce Zone Append for writing to zoned block devices Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 01/10] block: provide fallbacks for blk_queue_zone_is_seq and blk_queue_zone_no Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND Johannes Thumshirn
2020-04-10  7:10   ` Christoph Hellwig [this message]
2020-04-14  9:43     ` Johannes Thumshirn
2020-04-14 11:28       ` hch
2020-04-09 16:53 ` [PATCH v5 03/10] block: introduce blk_req_zone_write_trylock Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 04/10] block: Modify revalidate zones Johannes Thumshirn
2020-04-10  0:27   ` Damien Le Moal
2020-04-10  6:40   ` Christoph Hellwig
2020-04-10  6:55     ` Damien Le Moal
2020-04-09 16:53 ` [PATCH v5 05/10] scsi: sd_zbc: factor out sanity checks for zoned commands Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 06/10] scsi: export scsi_mq_free_sgtables Johannes Thumshirn
2020-04-10  5:58   ` Christoph Hellwig
2020-04-10  7:46     ` Johannes Thumshirn
2020-04-10 14:22   ` Bart Van Assche
2020-04-09 16:53 ` [PATCH v5 07/10] scsi: sd_zbc: emulate ZONE_APPEND commands Johannes Thumshirn
2020-04-10  0:30   ` Damien Le Moal
2020-04-10  6:18   ` Christoph Hellwig
2020-04-10  6:38     ` Christoph Hellwig
2020-04-10  8:01       ` Johannes Thumshirn
2020-04-14 11:09       ` Johannes Thumshirn
2020-04-14 11:30         ` hch
2020-04-10  7:54     ` Johannes Thumshirn
2020-04-10  7:23   ` Christoph Hellwig
2020-04-14 10:18     ` Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 08/10] null_blk: Support REQ_OP_ZONE_APPEND Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 09/10] block: export bio_release_pages and bio_iov_iter_get_pages Johannes Thumshirn
2020-04-09 16:53 ` [PATCH v5 10/10] zonefs: use REQ_OP_ZONE_APPEND for sync DIO Johannes Thumshirn

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4029a48f382 dfblob:dd84bd5adc2 )
 OR (
bs:"Re: [PATCH v5 02/10] block: Introduce REQ_OP_ZONE_APPEND" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200410071045.GA13404@infradead.org \
    --to=hch@infradead.org \
    --cc=Damien.LeMoal@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=johannes.thumshirn@wdc.com \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).