linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	David Sterba <dsterba@suse.com>
Cc: linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O
Date: Thu, 13 Jul 2023 15:04:29 +0200	[thread overview]
Message-ID: <20230713130431.4798-8-hch@lst.de> (raw)
In-Reply-To: <20230713130431.4798-1-hch@lst.de>

For sub-page I/O, a fast I/O completion can clear the page writeback bit
in the I/O completion handler before subsequent bios were submitted.
Use a trick from iomap and defer submission of bios until we're reached
the end of the page to avoid this race.

Fixes: c5ef5c6c733a ("btrfs: make __extent_writepage_io() only submit dirty range for subpage")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/extent_io.c | 54 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 035f49de024887..994d38f59cbed4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -105,7 +105,8 @@ struct btrfs_bio_ctrl {
 	struct writeback_control *wbc;
 };
 
-static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
+static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl,
+			   struct bio_list *bio_list)
 {
 	struct btrfs_bio *bbio = bio_ctrl->bbio;
 
@@ -118,6 +119,8 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
 	if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
 	    bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
 		btrfs_submit_compressed_read(bbio);
+	else if (bio_list)
+		bio_list_add(bio_list, &bbio->bio);
 	else
 		btrfs_submit_bio(bbio, 0);
 
@@ -141,7 +144,22 @@ static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
 		/* The bio is owned by the end_io handler now */
 		bio_ctrl->bbio = NULL;
 	} else {
-		submit_one_bio(bio_ctrl);
+		submit_one_bio(bio_ctrl, NULL);
+	}
+}
+
+static void btrfs_submit_pending_bios(struct bio_list *bio_list, int ret)
+{
+	struct bio *bio;
+
+	if (ret) {
+		blk_status_t status = errno_to_blk_status(ret);
+
+		while ((bio = bio_list_pop(bio_list)))
+			btrfs_bio_end_io(btrfs_bio(bio), status);
+	} else {
+		while ((bio = bio_list_pop(bio_list)))
+			btrfs_submit_bio(btrfs_bio(bio), 0);
 	}
 }
 
@@ -791,7 +809,8 @@ static void alloc_new_bio(struct btrfs_inode *inode,
  */
 static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 			       u64 disk_bytenr, struct page *page,
-			       size_t size, unsigned long pg_offset)
+			       size_t size, unsigned long pg_offset,
+			       struct bio_list *bio_list)
 {
 	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
 
@@ -800,7 +819,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 
 	if (bio_ctrl->bbio &&
 	    !btrfs_bio_is_contig(bio_ctrl, page, disk_bytenr, pg_offset))
-		submit_one_bio(bio_ctrl);
+		submit_one_bio(bio_ctrl, bio_list);
 
 	do {
 		u32 len = size;
@@ -820,7 +839,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 
 		if (bio_add_page(&bio_ctrl->bbio->bio, page, len, pg_offset) != len) {
 			/* bio full: move on to a new one */
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, bio_list);
 			continue;
 		}
 
@@ -834,7 +853,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 
 		/* Ordered extent boundary: move on to a new bio. */
 		if (bio_ctrl->len_to_oe_boundary == 0)
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, bio_list);
 	} while (size);
 }
 
@@ -1082,14 +1101,14 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 		}
 
 		if (bio_ctrl->compress_type != compress_type) {
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, NULL);
 			bio_ctrl->compress_type = compress_type;
 		}
 
 		if (force_bio_submit)
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, NULL);
 		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
-				   pg_offset);
+				   pg_offset, NULL);
 		cur = cur + iosize;
 		pg_offset += iosize;
 	}
@@ -1113,7 +1132,7 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
 	 * If btrfs_do_readpage() failed we will want to submit the assembled
 	 * bio to do the cleanup.
 	 */
-	submit_one_bio(&bio_ctrl);
+	submit_one_bio(&bio_ctrl, NULL);
 	return ret;
 }
 
@@ -1287,6 +1306,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 	u64 extent_offset;
 	u64 block_start;
 	struct extent_map *em;
+	struct bio_list bio_list = BIO_EMPTY_LIST;
 	int ret = 0;
 	int nr = 0;
 
@@ -1365,7 +1385,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		btrfs_page_clear_dirty(fs_info, page, cur, iosize);
 
 		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
-				   cur - page_offset(page));
+				   cur - page_offset(page), &bio_list);
 		cur += iosize;
 		nr++;
 	}
@@ -1378,6 +1398,16 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		set_page_writeback(page);
 		end_page_writeback(page);
 	}
+
+	/*
+	 * Submit all bios we queued up for this page.
+	 *
+	 * The bios are not submitted directly after building them as otherwise
+	 * a very fast I/O completion on an earlier bio could clear the page
+	 * writeback bit before the subsequent bios are even submitted.
+	 */
+	btrfs_submit_pending_bios(&bio_list, ret);
+
 	if (ret) {
 		u32 remaining = end + 1 - cur;
 
@@ -2243,7 +2273,7 @@ void extent_readahead(struct readahead_control *rac)
 
 	if (em_cached)
 		free_extent_map(em_cached);
-	submit_one_bio(&bio_ctrl);
+	submit_one_bio(&bio_ctrl, NULL);
 }
 
 /*
-- 
2.39.2


  parent reply	other threads:[~2023-07-13 13:06 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-13 13:04 small writeback fixes Christoph Hellwig
2023-07-13 13:04 ` [PATCH 1/9] btrfs: don't stop integrity writeback too early Christoph Hellwig
2023-07-13 13:04 ` [PATCH 2/9] btrfs: don't wait for writeback on clean pages in extent_write_cache_pages Christoph Hellwig
2023-07-13 13:04 ` [PATCH 3/9] btrfs: fix an error handling corner case in cow_file_range Christoph Hellwig
2023-07-13 13:04 ` [PATCH 4/9] btrfs: move the cow_fixup earlier in writepages handling Christoph Hellwig
2023-07-13 13:04 ` [PATCH 5/9] btrfs: fix handling of errors from __extent_writepage_io Christoph Hellwig
2023-07-13 13:04 ` [PATCH 6/9] btrfs: stop submitting I/O after an error in extent_write_locked_range Christoph Hellwig
2023-07-13 13:04 ` Christoph Hellwig [this message]
2023-07-13 13:04 ` [PATCH 8/9] btrfs: remove the call to btrfs_mark_ordered_io_finished in btrfs_writepage_fixup_worker Christoph Hellwig
2023-07-13 13:04 ` [PATCH 9/9] btrfs: lift the call to mapping_set_error out of cow_file_range Christoph Hellwig
2023-07-14 13:59 ` small writeback fixes Josef Bacik
2023-07-18 17:17 ` Josef Bacik
2023-07-19  5:39   ` Christoph Hellwig
2023-07-19 11:50     ` Christoph Hellwig
2023-07-19 14:30       ` Josef Bacik
2023-07-19 15:25         ` Christoph Hellwig
2023-07-19 21:42       ` Josef Bacik
2023-07-20  4:48         ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230713130431.4798-8-hch@lst.de \
    --to=hch@lst.de \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).