linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 01/42] btrfs: introduce end_bio_subpage_eb_writepage() function
Date: Thu, 15 Apr 2021 13:04:07 +0800	[thread overview]
Message-ID: <20210415050448.267306-2-wqu@suse.com> (raw)
In-Reply-To: <20210415050448.267306-1-wqu@suse.com>

The new function, end_bio_subpage_eb_writepage(), will handle the
metadata writeback endio.

The major differences involved are:
- How to grab extent buffer
  Now page::private is a pointer to btrfs_subpage, we can no longer grab
  extent buffer directly.
  Thus we need to use the bv_offset to locate the extent buffer manually
  and iterate through the whole range.

- Use btrfs_subpage_end_writeback() caller
  This helper will handle the subpage writeback for us.

Since this function is executed under endio context, when grabbing
extent buffers it can't grab eb->refs_lock as that lock is not designed
to be grabbed under hardirq context.

So here introduce a helper, find_extent_buffer_nospinlock(), for such
situation, and convert find_extent_buffer() to use that helper.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 135 +++++++++++++++++++++++++++++++++----------
 1 file changed, 106 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a50adbd8808d..21a14b1cb065 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4080,13 +4080,97 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
 	}
 }
 
+/*
+ * This is the endio specific version which won't touch any unsafe spinlock
+ * in endio context.
+ */
+static struct extent_buffer *find_extent_buffer_nospinlock(
+		struct btrfs_fs_info *fs_info, u64 start)
+{
+	struct extent_buffer *eb;
+
+	rcu_read_lock();
+	eb = radix_tree_lookup(&fs_info->buffer_radix,
+			       start >> fs_info->sectorsize_bits);
+	if (eb && atomic_inc_not_zero(&eb->refs)) {
+		rcu_read_unlock();
+		return eb;
+	}
+	rcu_read_unlock();
+	return NULL;
+}
+/*
+ * The endio function for subpage extent buffer write.
+ *
+ * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
+ * after all extent buffers in the page has finished their writeback.
+ */
+static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
+					 struct bio *bio)
+{
+	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
+
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
+	bio_for_each_segment_all(bvec, bio, iter_all) {
+		struct page *page = bvec->bv_page;
+		u64 bvec_start = page_offset(page) + bvec->bv_offset;
+		u64 bvec_end = bvec_start + bvec->bv_len - 1;
+		u64 cur_bytenr = bvec_start;
+
+		ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
+
+		/* Iterate through all extent buffers in the range */
+		while (cur_bytenr <= bvec_end) {
+			struct extent_buffer *eb;
+			int done;
+
+			/*
+			 * Here we can't use find_extent_buffer(), as it may
+			 * try to lock eb->refs_lock, which is not safe in endio
+			 * context.
+			 */
+			eb = find_extent_buffer_nospinlock(fs_info, cur_bytenr);
+			ASSERT(eb);
+
+			cur_bytenr = eb->start + eb->len;
+
+			ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
+			done = atomic_dec_and_test(&eb->io_pages);
+			ASSERT(done);
+
+			if (bio->bi_status ||
+			    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
+				ClearPageUptodate(page);
+				set_btree_ioerr(page, eb);
+			}
+
+			btrfs_subpage_clear_writeback(fs_info, page, eb->start,
+						      eb->len);
+			end_extent_buffer_writeback(eb);
+			/*
+			 * free_extent_buffer() will grab spinlock which is not
+			 * safe in endio context. Thus here we manually dec
+			 * the ref.
+			 */
+			atomic_dec(&eb->refs);
+		}
+	}
+	bio_put(bio);
+}
+
 static void end_bio_extent_buffer_writepage(struct bio *bio)
 {
+	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
 	struct extent_buffer *eb;
 	int done;
 	struct bvec_iter_all iter_all;
 
+	fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
+	if (fs_info->sectorsize < PAGE_SIZE)
+		return end_bio_subpage_eb_writepage(fs_info, bio);
+
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		struct page *page = bvec->bv_page;
@@ -5465,36 +5549,29 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 {
 	struct extent_buffer *eb;
 
-	rcu_read_lock();
-	eb = radix_tree_lookup(&fs_info->buffer_radix,
-			       start >> fs_info->sectorsize_bits);
-	if (eb && atomic_inc_not_zero(&eb->refs)) {
-		rcu_read_unlock();
-		/*
-		 * Lock our eb's refs_lock to avoid races with
-		 * free_extent_buffer. When we get our eb it might be flagged
-		 * with EXTENT_BUFFER_STALE and another task running
-		 * free_extent_buffer might have seen that flag set,
-		 * eb->refs == 2, that the buffer isn't under IO (dirty and
-		 * writeback flags not set) and it's still in the tree (flag
-		 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
-		 * of decrementing the extent buffer's reference count twice.
-		 * So here we could race and increment the eb's reference count,
-		 * clear its stale flag, mark it as dirty and drop our reference
-		 * before the other task finishes executing free_extent_buffer,
-		 * which would later result in an attempt to free an extent
-		 * buffer that is dirty.
-		 */
-		if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
-			spin_lock(&eb->refs_lock);
-			spin_unlock(&eb->refs_lock);
-		}
-		mark_extent_buffer_accessed(eb, NULL);
-		return eb;
+	eb = find_extent_buffer_nospinlock(fs_info, start);
+	if (!eb)
+		return NULL;
+	/*
+	 * Lock our eb's refs_lock to avoid races with free_extent_buffer().
+	 * When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
+	 * another task running free_extent_buffer() might have seen that flag
+	 * set, eb->refs == 2, that the buffer isn't under IO (dirty and
+	 * writeback flags not set) and it's still in the tree (flag
+	 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
+	 * of decrementing the extent buffer's reference count twice.
+	 * So here we could race and increment the eb's reference count,
+	 * clear its stale flag, mark it as dirty and drop our reference
+	 * before the other task finishes executing free_extent_buffer,
+	 * which would later result in an attempt to free an extent
+	 * buffer that is dirty.
+	 */
+	if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
+		spin_lock(&eb->refs_lock);
+		spin_unlock(&eb->refs_lock);
 	}
-	rcu_read_unlock();
-
-	return NULL;
+	mark_extent_buffer_accessed(eb, NULL);
+	return eb;
 }
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-- 
2.31.1


  reply	other threads:[~2021-04-15  5:04 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-15  5:04 [PATCH 00/42] btrfs: add full read-write support for subpage Qu Wenruo
2021-04-15  5:04 ` Qu Wenruo [this message]
2021-04-15 18:50   ` [PATCH 01/42] btrfs: introduce end_bio_subpage_eb_writepage() function Josef Bacik
2021-04-15 23:21     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 02/42] btrfs: introduce write_one_subpage_eb() function Qu Wenruo
2021-04-15 19:03   ` Josef Bacik
2021-04-15 23:25     ` Qu Wenruo
2021-04-16 13:26       ` Josef Bacik
2021-04-18 19:45       ` Thiago Jung Bauermann
2021-04-15  5:04 ` [PATCH 03/42] btrfs: make lock_extent_buffer_for_io() to be subpage compatible Qu Wenruo
2021-04-15 19:04   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 04/42] btrfs: introduce submit_eb_subpage() to submit a subpage metadata page Qu Wenruo
2021-04-15 19:27   ` Josef Bacik
2021-04-15 23:28     ` Qu Wenruo
2021-04-16 13:25       ` Josef Bacik
2021-04-15  5:04 ` [PATCH 05/42] btrfs: remove the unused parameter @len for btrfs_bio_fits_in_stripe() Qu Wenruo
2021-04-16 13:46   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 06/42] btrfs: allow btrfs_bio_fits_in_stripe() to accept bio without any page Qu Wenruo
2021-04-16 13:50   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 07/42] btrfs: use u32 for length related members of btrfs_ordered_extent Qu Wenruo
2021-04-16 13:54   ` Josef Bacik
2021-04-16 23:59     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 08/42] btrfs: pass btrfs_inode into btrfs_writepage_endio_finish_ordered() Qu Wenruo
2021-04-16 13:58   ` Josef Bacik
2021-04-17  0:02     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 09/42] btrfs: refactor how we finish ordered extent io for endio functions Qu Wenruo
2021-04-16 14:09   ` Josef Bacik
2021-04-17  0:06     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 10/42] btrfs: update the comments in btrfs_invalidatepage() Qu Wenruo
2021-04-16 14:32   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 11/42] btrfs: refactor btrfs_invalidatepage() Qu Wenruo
2021-04-16 14:42   ` Josef Bacik
2021-04-17  0:13     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 12/42] btrfs: make Private2 lifespan more consistent Qu Wenruo
2021-04-16 14:43   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 13/42] btrfs: rename PagePrivate2 to PageOrdered inside btrfs Qu Wenruo
2021-04-16 14:49   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 14/42] btrfs: pass bytenr directly to __process_pages_contig() Qu Wenruo
2021-04-16 14:58   ` Josef Bacik
2021-04-17  0:15     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 15/42] btrfs: refactor the page status update into process_one_page() Qu Wenruo
2021-04-16 15:06   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 16/42] btrfs: provide btrfs_page_clamp_*() helpers Qu Wenruo
2021-04-16 15:09   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 17/42] btrfs: only require sector size alignment for end_bio_extent_writepage() Qu Wenruo
2021-04-16 15:13   ` Josef Bacik
2021-04-17  0:16     ` Qu Wenruo
2021-04-15  5:04 ` [PATCH 18/42] btrfs: make btrfs_dirty_pages() to be subpage compatible Qu Wenruo
2021-04-16 15:14   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 19/42] btrfs: make __process_pages_contig() to handle subpage dirty/error/writeback status Qu Wenruo
2021-04-16 15:20   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 20/42] btrfs: make end_bio_extent_writepage() to be subpage compatible Qu Wenruo
2021-04-16 15:21   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 21/42] btrfs: make process_one_page() to handle subpage locking Qu Wenruo
2021-04-16 15:36   ` Josef Bacik
2021-04-15  5:04 ` [PATCH 22/42] btrfs: introduce helpers for subpage ordered status Qu Wenruo
2021-04-15  5:04 ` [PATCH 23/42] btrfs: make page Ordered bit to be subpage compatible Qu Wenruo
2021-04-15  5:04 ` [PATCH 24/42] btrfs: update locked page dirty/writeback/error bits in __process_pages_contig Qu Wenruo
2021-04-15  5:04 ` [PATCH 25/42] btrfs: prevent extent_clear_unlock_delalloc() to unlock page not locked by __process_pages_contig() Qu Wenruo
2021-04-15  5:04 ` [PATCH 26/42] btrfs: make btrfs_set_range_writeback() subpage compatible Qu Wenruo
2021-04-15  5:04 ` [PATCH 27/42] btrfs: make __extent_writepage_io() only submit dirty range for subpage Qu Wenruo
2021-04-15  5:04 ` [PATCH 28/42] btrfs: add extra assert for submit_extent_page() Qu Wenruo
2021-04-15  5:04 ` [PATCH 29/42] btrfs: make btrfs_truncate_block() to be subpage compatible Qu Wenruo
2021-04-15  5:04 ` [PATCH 30/42] btrfs: make btrfs_page_mkwrite() " Qu Wenruo
2021-04-15  5:04 ` [PATCH 31/42] btrfs: reflink: make copy_inline_to_page() " Qu Wenruo
2021-04-15  5:04 ` [PATCH 32/42] btrfs: fix the filemap_range_has_page() call in btrfs_punch_hole_lock_range() Qu Wenruo
2021-04-15  5:04 ` [PATCH 33/42] btrfs: don't clear page extent mapped if we're not invalidating the full page Qu Wenruo
2021-04-15  5:04 ` [PATCH 34/42] btrfs: extract relocation page read and dirty part into its own function Qu Wenruo
2021-04-15  5:04 ` [PATCH 35/42] btrfs: make relocate_one_page() to handle subpage case Qu Wenruo
2021-04-15  5:04 ` [PATCH 36/42] btrfs: fix wild subpage writeback which does not have ordered extent Qu Wenruo
2021-04-15  5:04 ` [PATCH 37/42] btrfs: disable inline extent creation for subpage Qu Wenruo
2021-04-15  5:04 ` [PATCH 38/42] btrfs: skip validation for subpage read repair Qu Wenruo
2021-04-15  5:04 ` [PATCH 39/42] btrfs: make free space cache size consistent across different PAGE_SIZE Qu Wenruo
2021-04-15  5:04 ` [PATCH 40/42] btrfs: refactor submit_extent_page() to make bio and its flag tracing easier Qu Wenruo
2021-04-15  5:04 ` [PATCH 41/42] btrfs: allow submit_extent_page() to do bio split for subpage Qu Wenruo
2021-04-15  5:04 ` [PATCH 42/42] btrfs: allow read-write for 4K sectorsize on 64K page size systems Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210415050448.267306-2-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).