From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v2 03/24] btrfs: extent_io: replace extent_start/extent_len with better structure for end_bio_extent_readpage()
Date: Fri, 13 Nov 2020 20:51:28 +0800 [thread overview]
Message-ID: <20201113125149.140836-4-wqu@suse.com> (raw)
In-Reply-To: <20201113125149.140836-1-wqu@suse.com>
In end_bio_extent_readpage() we had a strange dance around
extent_start/extent_len.
Hides behind the strange dance is, it's just calling
endio_readpage_release_extent() on each bvec range.
Here is an example to explain the original work flow:
Bio is for inode 257, containing 2 pages, for range [1M, 1M+8K)
end_bio_extent_extent_readpage() entered
|- extent_start = 0;
|- extent_end = 0;
|- bio_for_each_segment_all() {
| |- /* Got the 1st bvec */
| |- start = SZ_1M;
| |- end = SZ_1M + SZ_4K - 1;
| |- update = 1;
| |- if (extent_len == 0) {
| | |- extent_start = start; /* SZ_1M */
| | |- extent_len = end + 1 - start; /* SZ_1M */
| | }
| |
| |- /* Got the 2nd bvec */
| |- start = SZ_1M + 4K;
| |- end = SZ_1M + 4K - 1;
| |- update = 1;
| |- if (extent_start + extent_len == start) {
| | |- extent_len += end + 1 - start; /* SZ_8K */
| | }
| } /* All bio vec iterated */
|
|- if (extent_len) {
|- endio_readpage_release_extent(tree, extent_start, extent_len,
update);
/* extent_start == SZ_1M, extent_len == SZ_8K, uptodate = 1 */
As the above flow shows, the existing code in end_bio_extent_readpage()
is just accumulate extent_start/extent_len, and when the contiguous range
breaks, call endio_readpage_release_extent() for the range.
However current behavior has something not really considered:
- The inode can change
For bio, their pages don't need to have contig page_offset.
This means, even pages from different inode can be packed into one
bio.
- Bvec cross page boundary
There is a feature called multi-page bvec, where bvec->bv_len can go
beyond bvec->bv_page boundary.
- Poor readability
This patch will address the problem by:
- Introduce a proper structure, processed_extent, to record processed
extent range
- Integrate inode/start/end/uptodate check into
endio_readpage_release_extent()
- Add more comment on each step.
This should greatly improve the readability, now in
end_bio_extent_readpage() there are only two
endio_readpage_release_extent() calls.
- Add inode contig check
Now we also ensure the inode is the same before checking the range
contig.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 102 +++++++++++++++++++++++++++++--------------
1 file changed, 69 insertions(+), 33 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3bbb3bdd395b..b5b3700943e0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2779,16 +2779,74 @@ static void end_bio_extent_writepage(struct bio *bio)
bio_put(bio);
}
+/*
+ * Records previously processed extent range.
+ *
+ * For endio_readpage_release_extent() to handle a full extent range, reducing
+ * the extent io operations.
+ */
+struct processed_extent {
+ struct btrfs_inode *inode;
+ u64 start; /* file offset in @inode */
+ u64 end; /* file offset in @inode */
+ bool uptodate;
+};
+
+/*
+ * Try to release processed extent range.
+ *
+ * May not release the extent range right now if the current range is contig
+ * with processed extent.
+ *
+ * Will release processed extent when any of @inode, @uptodate, the range is
+ * no longer contig with processed range.
+ * Pass @inode == NULL will force processed extent to be released.
+ */
static void
-endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
- int uptodate)
+endio_readpage_release_extent(struct processed_extent *processed,
+ struct btrfs_inode *inode, u64 start, u64 end,
+ bool uptodate)
{
struct extent_state *cached = NULL;
- u64 end = start + len - 1;
+ struct extent_io_tree *tree;
- if (uptodate && tree->track_uptodate)
- set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
- unlock_extent_cached_atomic(tree, start, end, &cached);
+ /* We're the first extent, initialize @processed */
+ if (!processed->inode)
+ goto update;
+
+ /*
+ * Contig with processed extent. Just uptodate the end
+ *
+ * Several things to notice:
+ * - Bio can be merged as long as on-disk bytenr is contig
+ * This means we can have page belonging to other inodes, thus need to
+ * check if the inode matches.
+ * - Bvec can contain range beyond current page for multi-page bvec
+ * Thus we need to do processed->end + 1 >= start check
+ */
+ if (processed->inode == inode && processed->uptodate == uptodate &&
+ processed->end + 1 >= start && end >= processed->end) {
+ processed->end = end;
+ return;
+ }
+
+ tree = &processed->inode->io_tree;
+ /*
+ * Now we have a range not contig with processed range, release the
+ * processed range now.
+ */
+ if (processed->uptodate && tree->track_uptodate)
+ set_extent_uptodate(tree, processed->start, processed->end,
+ &cached, GFP_ATOMIC);
+ unlock_extent_cached_atomic(tree, processed->start, processed->end,
+ &cached);
+
+update:
+ /* Update @processed to current range */
+ processed->inode = inode;
+ processed->start = start;
+ processed->end = end;
+ processed->uptodate = uptodate;
}
/*
@@ -2808,12 +2866,11 @@ static void end_bio_extent_readpage(struct bio *bio)
int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree, *failure_tree;
+ struct processed_extent processed = { 0 };
u64 offset = 0;
u64 start;
u64 end;
u64 len;
- u64 extent_start = 0;
- u64 extent_len = 0;
int mirror;
int ret;
struct bvec_iter_all iter_all;
@@ -2922,32 +2979,11 @@ static void end_bio_extent_readpage(struct bio *bio)
unlock_page(page);
offset += len;
- if (unlikely(!uptodate)) {
- if (extent_len) {
- endio_readpage_release_extent(tree,
- extent_start,
- extent_len, 1);
- extent_start = 0;
- extent_len = 0;
- }
- endio_readpage_release_extent(tree, start,
- end - start + 1, 0);
- } else if (!extent_len) {
- extent_start = start;
- extent_len = end + 1 - start;
- } else if (extent_start + extent_len == start) {
- extent_len += end + 1 - start;
- } else {
- endio_readpage_release_extent(tree, extent_start,
- extent_len, uptodate);
- extent_start = start;
- extent_len = end + 1 - start;
- }
+ endio_readpage_release_extent(&processed, BTRFS_I(inode),
+ start, end, uptodate);
}
-
- if (extent_len)
- endio_readpage_release_extent(tree, extent_start, extent_len,
- uptodate);
+ /* Release the last extent */
+ endio_readpage_release_extent(&processed, NULL, 0, 0, 0);
btrfs_io_bio_free_csum(io_bio);
bio_put(bio);
}
--
2.29.2
next prev parent reply other threads:[~2020-11-13 12:52 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-13 12:51 [PATCH v2 00/24] btrfs: preparation patches for subpage support Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 01/24] btrfs: tests: fix free space tree test failure on 64K page system Qu Wenruo
2020-11-17 11:53 ` Nikolay Borisov
2020-11-13 12:51 ` [PATCH v2 02/24] btrfs: extent-io-tests: remove invalid tests Qu Wenruo
2020-11-13 18:42 ` Josef Bacik
2020-11-19 21:08 ` David Sterba
2020-11-13 12:51 ` Qu Wenruo [this message]
2020-11-13 19:13 ` [PATCH v2 03/24] btrfs: extent_io: replace extent_start/extent_len with better structure for end_bio_extent_readpage() Josef Bacik
2020-11-18 16:05 ` David Sterba
2020-11-18 23:49 ` Qu Wenruo
2020-11-19 20:30 ` David Sterba
2020-11-19 21:08 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 04/24] btrfs: extent_io: introduce helper to handle page status update in end_bio_extent_readpage() Qu Wenruo
2020-11-13 19:18 ` Josef Bacik
2020-11-18 20:27 ` David Sterba
2020-11-18 23:43 ` Qu Wenruo
2020-11-19 18:32 ` David Sterba
2020-11-19 21:08 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 05/24] btrfs: extent_io: extract the btree page submission code into its own helper function Qu Wenruo
2020-11-13 19:22 ` Josef Bacik
2020-11-18 20:04 ` David Sterba
2020-11-18 20:09 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 06/24] btrfs: remove the phy_offset parameter for btrfs_validate_metadata_buffer() Qu Wenruo
2020-11-19 21:09 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 07/24] btrfs: pass bio_offset to check_data_csum() directly Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 08/24] btrfs: inode: make btrfs_verify_data_csum() follow sector size Qu Wenruo
2020-11-13 19:43 ` Josef Bacik
2020-11-13 12:51 ` [PATCH v2 09/24] btrfs: extent_io: calculate inline extent buffer page size based on page size Qu Wenruo
2020-11-13 19:47 ` Josef Bacik
2020-11-14 0:11 ` Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 10/24] btrfs: introduce a helper to determine if the sectorsize is smaller than PAGE_SIZE Qu Wenruo
2020-11-16 22:51 ` David Sterba
2020-11-16 23:50 ` Qu Wenruo
2020-11-17 0:24 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 11/24] btrfs: extent_io: don't allow tree block to cross page boundary for subpage support Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 12/24] btrfs: extent_io: update num_extent_pages() to support subpage sized extent buffer Qu Wenruo
2020-11-18 16:22 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 13/24] btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors Qu Wenruo
2020-11-18 19:30 ` David Sterba
2020-11-18 19:38 ` David Sterba
2020-11-18 19:48 ` David Sterba
2020-11-24 6:20 ` Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 14/24] btrfs: disk-io: only clear EXTENT_LOCK bit for extent_invalidatepage() Qu Wenruo
2020-11-19 21:09 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 15/24] btrfs: extent-io: make type of extent_state::state to be at least 32 bits Qu Wenruo
2020-11-13 18:40 ` Josef Bacik
2020-11-18 16:11 ` David Sterba
2020-11-18 23:48 ` Qu Wenruo
2020-11-19 7:18 ` Nikolay Borisov
2020-11-19 21:09 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 16/24] btrfs: file-item: use nodesize to determine whether we need readahead for btrfs_lookup_bio_sums() Qu Wenruo
2020-11-19 21:09 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 17/24] btrfs: file-item: remove the btrfs_find_ordered_sum() call in btrfs_lookup_bio_sums() Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 18/24] btrfs: file-item: refactor btrfs_lookup_bio_sums() to handle out-of-order bvecs Qu Wenruo
2020-11-18 16:27 ` David Sterba
2020-11-18 23:57 ` Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 19/24] btrfs: scrub: remove the anonymous structure from scrub_page Qu Wenruo
2020-11-18 19:00 ` David Sterba
2020-11-19 21:09 ` David Sterba
2020-11-13 12:51 ` [PATCH v2 20/24] btrfs: scrub: always allocate one full page for one sector for RAID56 Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 21/24] btrfs: scrub: support subpage tree block scrub Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 22/24] btrfs: scrub: support subpage data scrub Qu Wenruo
2020-11-18 16:29 ` David Sterba
2020-11-18 23:38 ` Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 23/24] btrfs: scrub: allow scrub to work with subpage sectorsize Qu Wenruo
2020-11-13 12:51 ` [PATCH v2 24/24] btrfs: extent_io: Use detach_page_private() for alloc_extent_buffer() Qu Wenruo
2020-11-13 20:05 ` [PATCH v2 00/24] btrfs: preparation patches for subpage support Josef Bacik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201113125149.140836-4-wqu@suse.com \
--to=wqu@suse.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).