linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v4 37/68] btrfs: extent_io: implement subpage metadata read and its endio function
Date: Wed, 21 Oct 2020 14:25:23 +0800	[thread overview]
Message-ID: <20201021062554.68132-38-wqu@suse.com> (raw)
In-Reply-To: <20201021062554.68132-1-wqu@suse.com>

For subpage metadata read, since we're completely relying on io tree
other than page bits, its read submission and endio function is
different from the regular page size.

For submission part:
- Do extent locking/waiting
  Instead of page locking, we do extent io tree locking, which provides
  subpage granularity locking.

  And since we're no longer relying on full page locking, which means in
  theory we can submit parallel metadata read even they are in the same
  page.

- Submit extent page directly
  To simply the process, as all the metadata read is always contained in
  one page.

For endio part:
- Do extent locking
  The same as submission part, instead of page locking, only reply on
  extent io tree locking.

This behavior has a small problem that, extent locking/waiting are all
going to allocate memory, thus they can all fail.

Currently we're relying on the BUG_ON() in various set_extent_bits()
calls. But when we're going to handle the error from them, this way
would make it more complex to pass all the ENOMEM error upwards.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c   | 81 ++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/extent_io.c | 74 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 155 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 10bdb0a8a92f..89021e552da0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -651,6 +651,84 @@ static int btrfs_check_extent_buffer(struct extent_buffer *eb)
 	return ret;
 }
 
+static int btree_read_subpage_endio_hook(struct page *page, u64 start, u64 end,
+					 int mirror)
+{
+	struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+	struct extent_buffer *eb;
+	int reads_done;
+	int ret = 0;
+
+	if (!IS_ALIGNED(start, fs_info->sectorsize) ||
+	    !IS_ALIGNED(end - start + 1, fs_info->sectorsize) ||
+	    !IS_ALIGNED(end - start + 1, fs_info->nodesize)) {
+		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+		btrfs_err(fs_info, "invalid tree read bytenr");
+		return -EUCLEAN;
+	}
+
+	/*
+	 * We don't allow bio merge for subpage metadata read, so we should
+	 * only get one eb for each endio hook.
+	 */
+	ASSERT(end == start + fs_info->nodesize - 1);
+	ASSERT(PagePrivate(page));
+
+	rcu_read_lock();
+	eb = radix_tree_lookup(&fs_info->buffer_radix,
+			       start / fs_info->sectorsize);
+	rcu_read_unlock();
+
+	/*
+	 * When we are reading one tree block, eb must have been
+	 * inserted into the radix tree. If not something is wrong.
+	 */
+	if (!eb) {
+		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+		btrfs_err(fs_info,
+			"can't find extent buffer for bytenr %llu",
+			start);
+		return -EUCLEAN;
+	}
+	/*
+	 * The pending IO might have been the only thing that kept
+	 * this buffer in memory.  Make sure we have a ref for all
+	 * this other checks
+	 */
+	atomic_inc(&eb->refs);
+
+	reads_done = atomic_dec_and_test(&eb->io_pages);
+	/* Subpage read must finish in page read */
+	ASSERT(reads_done);
+
+	eb->read_mirror= mirror;
+	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
+		ret = -EIO;
+		goto err;
+	}
+	ret = btrfs_check_extent_buffer(eb);
+	if (ret < 0)
+		goto err;
+
+	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
+		btree_readahead_hook(eb, ret);
+
+	set_extent_buffer_uptodate(eb);
+
+	free_extent_buffer(eb);
+	return ret;
+err:
+	/*
+	 * our io error hook is going to dec the io pages
+	 * again, we have to make sure it has something to
+	 * decrement
+	 */
+	atomic_inc(&eb->io_pages);
+	clear_extent_buffer_uptodate(eb);
+	free_extent_buffer(eb);
+	return ret;
+}
+
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 				      u64 phy_offset, struct page *page,
 				      u64 start, u64 end, int mirror)
@@ -659,6 +737,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	int ret = 0;
 	bool reads_done;
 
+	if (btrfs_is_subpage(page_to_fs_info(page)))
+		return btree_read_subpage_endio_hook(page, start, end, mirror);
+
 	/* Metadata pages that goes through IO should all have private set */
 	ASSERT(PagePrivate(page) && page->private);
 	eb = (struct extent_buffer *)page->private;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dcc7d4602cea..2f9609d35f0c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3111,6 +3111,15 @@ static int submit_extent_page(unsigned int opf,
 		else
 			contig = bio_end_sector(bio) == sector;
 
+		/*
+		 * For subpage metadata read, never merge request, so that
+		 * we get endio hook called on each metadata read.
+		 */
+		if (btrfs_is_subpage(page_to_fs_info(page)) &&
+		    tree->owner == IO_TREE_BTREE_INODE_IO &&
+		    (opf & REQ_OP_READ))
+			ASSERT(force_bio_submit);
+
 		ASSERT(tree->ops);
 		if (btrfs_bio_fits_in_stripe(page, io_size, bio, bio_flags))
 			can_merge = false;
@@ -5681,6 +5690,68 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 	}
 }
 
+static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
+				      int mirror_num)
+{
+	struct btrfs_fs_info *fs_info = eb->fs_info;
+	struct extent_io_tree *io_tree = info_to_btree_io_tree(fs_info);
+	struct page *page = eb->pages[0];
+	struct bio *bio = NULL;
+	int ret = 0;
+
+	ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
+
+	if (wait == WAIT_NONE) {
+		ret = try_lock_extent(io_tree, eb->start,
+				      eb->start + eb->len - 1);
+		if (ret <= 0)
+			return ret;
+	} else {
+		ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = 0;
+	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) ||
+	    PageUptodate(page) ||
+	    test_range_bit(io_tree, eb->start, eb->start + eb->len - 1,
+			   EXTENT_UPTODATE, 1, NULL)) {
+		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+		unlock_extent(io_tree, eb->start, eb->start + eb->len - 1);
+		return ret;
+	}
+	atomic_set(&eb->io_pages, 1);
+
+	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, page, eb->start,
+				 eb->len, eb->start - page_offset(page), &bio,
+				 end_bio_extent_readpage, mirror_num, 0, 0,
+				 true);
+	if (ret) {
+		/*
+		 * In the endio function, if we hit something wrong we will
+		 * increase the io_pages, so here we need to decrease it for error
+		 * path.
+		 */
+		atomic_dec(&eb->io_pages);
+	}
+	if (bio) {
+		int tmp;
+
+		tmp = submit_one_bio(bio, mirror_num, 0);
+		if (tmp < 0)
+			return tmp;
+	}
+	if (ret || wait != WAIT_COMPLETE)
+		return ret;
+
+	wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1,
+			EXTENT_LOCKED);
+	if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+		ret = -EIO;
+	return ret;
+}
+
 int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 {
 	int i;
@@ -5697,6 +5768,9 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
 		return 0;
 
+	if (btrfs_is_subpage(eb->fs_info))
+		return read_extent_buffer_subpage(eb, wait, mirror_num);
+
 	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
-- 
2.28.0


  parent reply	other threads:[~2020-10-21  6:27 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-21  6:24 [PATCH v4 00/68] btrfs: add basic rw support for subpage sector size Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 01/68] btrfs: extent-io-tests: remove invalid tests Qu Wenruo
2020-10-26 23:26   ` David Sterba
2020-10-27  0:44     ` Qu Wenruo
2020-11-03  6:07       ` Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 02/68] btrfs: use iosize while reading compressed pages Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 03/68] btrfs: extent_io: fix the comment on lock_extent_buffer_for_io() Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 04/68] btrfs: extent_io: update the comment for find_first_extent_bit() Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 05/68] btrfs: extent_io: sink the @failed_start parameter for set_extent_bit() Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 06/68] btrfs: make btree inode io_tree has its special owner Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 07/68] btrfs: disk-io: replace @fs_info and @private_data with @inode for btrfs_wq_submit_bio() Qu Wenruo
2020-10-21 22:00   ` Goldwyn Rodrigues
2020-10-21  6:24 ` [PATCH v4 08/68] btrfs: inode: sink parameter @start and @len for check_data_csum() Qu Wenruo
2020-10-21 22:11   ` Goldwyn Rodrigues
2020-10-27  0:13   ` David Sterba
2020-10-27  0:50     ` Qu Wenruo
2020-10-27 23:17       ` David Sterba
2020-10-28  0:57         ` Qu Wenruo
2020-10-29 19:38           ` David Sterba
2020-10-21  6:24 ` [PATCH v4 09/68] btrfs: extent_io: unexport extent_invalidatepage() Qu Wenruo
2020-10-27  0:24   ` David Sterba
2020-10-21  6:24 ` [PATCH v4 10/68] btrfs: extent_io: remove the forward declaration and rename __process_pages_contig Qu Wenruo
2020-10-27  0:28   ` David Sterba
2020-10-27  0:50     ` Qu Wenruo
2020-10-27 23:25       ` David Sterba
2020-10-21  6:24 ` [PATCH v4 11/68] btrfs: extent_io: rename pages_locked in process_pages_contig() Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 12/68] btrfs: extent_io: only require sector size alignment for page read Qu Wenruo
2020-10-21  6:24 ` [PATCH v4 13/68] btrfs: extent_io: remove the extent_start/extent_len for end_bio_extent_readpage() Qu Wenruo
2020-10-27 10:29   ` David Sterba
2020-10-27 12:15     ` Qu Wenruo
2020-10-27 23:31       ` David Sterba
2020-10-21  6:25 ` [PATCH v4 14/68] btrfs: extent_io: integrate page status update into endio_readpage_release_extent() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 15/68] btrfs: extent_io: rename page_size to io_size in submit_extent_page() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 16/68] btrfs: extent_io: add assert_spin_locked() for attach_extent_buffer_page() Qu Wenruo
2020-10-27 10:43   ` David Sterba
2020-10-21  6:25 ` [PATCH v4 17/68] btrfs: extent_io: extract the btree page submission code into its own helper function Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 18/68] btrfs: extent_io: calculate inline extent buffer page size based on page size Qu Wenruo
2020-10-27 11:16   ` David Sterba
2020-10-27 11:20     ` David Sterba
2020-10-21  6:25 ` [PATCH v4 19/68] btrfs: extent_io: make btrfs_fs_info::buffer_radix to take sector size devided values Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 20/68] btrfs: extent_io: sink less common parameters for __set_extent_bit() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 21/68] btrfs: extent_io: sink less common parameters for __clear_extent_bit() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 22/68] btrfs: disk_io: grab fs_info from extent_buffer::fs_info directly for btrfs_mark_buffer_dirty() Qu Wenruo
2020-10-27 15:43   ` Goldwyn Rodrigues
2020-10-21  6:25 ` [PATCH v4 23/68] btrfs: disk-io: make csum_tree_block() handle sectorsize smaller than page size Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 24/68] btrfs: disk-io: extract the extent buffer verification from btree_readpage_end_io_hook() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 25/68] btrfs: disk-io: accept bvec directly for csum_dirty_buffer() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 26/68] btrfs: inode: make btrfs_readpage_end_io_hook() follow sector size Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 27/68] btrfs: introduce a helper to determine if the sectorsize is smaller than PAGE_SIZE Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 28/68] btrfs: extent_io: allow find_first_extent_bit() to find a range with exact bits match Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 29/68] btrfs: extent_io: don't allow tree block to cross page boundary for subpage support Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 30/68] btrfs: extent_io: update num_extent_pages() to support subpage sized extent buffer Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 31/68] btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 32/68] btrfs: disk-io: only clear EXTENT_LOCK bit for extent_invalidatepage() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 33/68] btrfs: extent-io: make type of extent_state::state to be at least 32 bits Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 34/68] btrfs: extent_io: use extent_io_tree to handle subpage extent buffer allocation Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 35/68] btrfs: extent_io: make set/clear_extent_buffer_uptodate() to support subpage size Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 36/68] btrfs: extent_io: make the assert test on page uptodate able to handle subpage Qu Wenruo
2020-10-21  6:25 ` Qu Wenruo [this message]
2020-10-21  6:25 ` [PATCH v4 38/68] btrfs: extent_io: implement try_release_extent_buffer() for subpage metadata support Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 39/68] btrfs: extent_io: extra the core of test_range_bit() into test_range_bit_nolock() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 40/68] btrfs: extent_io: introduce EXTENT_READ_SUBMITTED to handle subpage data read Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 41/68] btrfs: set btree inode track_uptodate for subpage support Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 42/68] btrfs: allow RO mount of 4K sector size fs on 64K page system Qu Wenruo
2020-10-29 20:11   ` David Sterba
2020-10-29 23:34   ` Michał Mirosław
2020-10-29 23:56     ` Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 43/68] btrfs: disk-io: allow btree_set_page_dirty() to do more sanity check on subpage metadata Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 44/68] btrfs: disk-io: support subpage metadata csum calculation at write time Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 45/68] btrfs: extent_io: prevent extent_state from being merged for btree io tree Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 46/68] btrfs: extent_io: make set_extent_buffer_dirty() to support subpage sized metadata Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 47/68] btrfs: extent_io: add subpage support for clear_extent_buffer_dirty() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 48/68] btrfs: extent_io: make set_btree_ioerr() accept extent buffer Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 49/68] btrfs: extent_io: introduce write_one_subpage_eb() function Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 50/68] btrfs: extent_io: make lock_extent_buffer_for_io() subpage compatible Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 51/68] btrfs: extent_io: introduce submit_btree_subpage() to submit a page for subpage metadata write Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 52/68] btrfs: extent_io: introduce end_bio_subpage_eb_writepage() function Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 53/68] btrfs: inode: make can_nocow_extent() check only return 1 if the range is no smaller than PAGE_SIZE Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 54/68] btrfs: file: calculate reserve space based on PAGE_SIZE for buffered write Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 55/68] btrfs: file: make hole punching page aligned for subpage Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 56/68] btrfs: file: make btrfs_dirty_pages() follow page size to mark extent io tree Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 57/68] btrfs: file: make btrfs_file_write_iter() to be page aligned Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 58/68] btrfs: output extra info for space info update underflow Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 59/68] btrfs: delalloc-space: make data space reservation to be page aligned Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 60/68] btrfs: scrub: allow scrub to work with subpage sectorsize Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 61/68] btrfs: inode: make btrfs_truncate_block() to do page alignment Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 62/68] btrfs: file: make hole punch and zero range to be page aligned Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 63/68] btrfs: file: make btrfs_fallocate() to use PAGE_SIZE as blocksize Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 64/68] btrfs: inode: always mark the full page range delalloc for btrfs_page_mkwrite() Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 65/68] btrfs: inode: require page alignement for direct io Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 66/68] btrfs: inode: only do NOCOW write for page aligned extent Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 67/68] btrfs: reflink: do full page writeback for reflink prepare Qu Wenruo
2020-10-21  6:25 ` [PATCH v4 68/68] btrfs: support subpage read write for test Qu Wenruo
2020-10-21 11:22 ` [PATCH v4 00/68] btrfs: add basic rw support for subpage sector size David Sterba
2020-10-21 11:50   ` Qu Wenruo
2020-11-02 14:56 ` David Sterba
2020-11-03  0:06   ` Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201021062554.68132-38-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).