public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Boris Burkov <boris@bur.io>
To: Christoph Hellwig <hch@lst.de>
Cc: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	David Sterba <dsterba@suse.com>,
	linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O
Date: Wed, 2 Aug 2023 17:59:46 -0700	[thread overview]
Message-ID: <20230803005946.GD1934467@zen> (raw)
In-Reply-To: <20230724132701.816771-8-hch@lst.de>

On Mon, Jul 24, 2023 at 06:26:59AM -0700, Christoph Hellwig wrote:
> For sub-page I/O, a fast I/O completion can clear the page writeback bit
> in the I/O completion handler before subsequent bios were submitted.
> Use a trick from iomap and defer submission of bios until we're reached
> the end of the page to avoid this race.

This LGTM in that I don't see a bug.

However, I'm confused why it's exactly necessary: doesn't the existing
logic already only allocate one bio and calls bio_add_page on it in a
loop. And bio_add_page handles the subpage blocksize case.

As far as I can tell, it tries to do it sequentially so it should be
contiguous. Are you worried about non-contiguous writes within one page
resulting in separate bios? In that case, why would a completion clear
the writeback bit on the page if the whole page isn't written back? I
guess that could be tricky to do and this is the correct solution?

Thanks,
Boris

> 
> Fixes: c5ef5c6c733a ("btrfs: make __extent_writepage_io() only submit dirty range for subpage")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/extent_io.c | 54 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index fada7a1931b130..48a49c57daa6fd 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -105,7 +105,8 @@ struct btrfs_bio_ctrl {
>  	struct writeback_control *wbc;
>  };
>  
> -static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
> +static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl,
> +			   struct bio_list *bio_list)
>  {
>  	struct btrfs_bio *bbio = bio_ctrl->bbio;
>  
> @@ -118,6 +119,8 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
>  	if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
>  	    bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
>  		btrfs_submit_compressed_read(bbio);
> +	else if (bio_list)
> +		bio_list_add(bio_list, &bbio->bio);
>  	else
>  		btrfs_submit_bio(bbio, 0);
>  
> @@ -141,7 +144,22 @@ static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
>  		/* The bio is owned by the end_io handler now */
>  		bio_ctrl->bbio = NULL;
>  	} else {
> -		submit_one_bio(bio_ctrl);
> +		submit_one_bio(bio_ctrl, NULL);
> +	}
> +}
> +
> +static void btrfs_submit_pending_bios(struct bio_list *bio_list, int ret)
> +{
> +	struct bio *bio;
> +
> +	if (ret) {
> +		blk_status_t status = errno_to_blk_status(ret);
> +
> +		while ((bio = bio_list_pop(bio_list)))
> +			btrfs_bio_end_io(btrfs_bio(bio), status);
> +	} else {
> +		while ((bio = bio_list_pop(bio_list)))
> +			btrfs_submit_bio(btrfs_bio(bio), 0);
>  	}
>  }
>  
> @@ -791,7 +809,8 @@ static void alloc_new_bio(struct btrfs_inode *inode,
>   */
>  static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  			       u64 disk_bytenr, struct page *page,
> -			       size_t size, unsigned long pg_offset)
> +			       size_t size, unsigned long pg_offset,
> +			       struct bio_list *bio_list)
>  {
>  	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
>  
> @@ -800,7 +819,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  	if (bio_ctrl->bbio &&
>  	    !btrfs_bio_is_contig(bio_ctrl, page, disk_bytenr, pg_offset))
> -		submit_one_bio(bio_ctrl);
> +		submit_one_bio(bio_ctrl, bio_list);
>  
>  	do {
>  		u32 len = size;
> @@ -820,7 +839,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  		if (bio_add_page(&bio_ctrl->bbio->bio, page, len, pg_offset) != len) {
>  			/* bio full: move on to a new one */
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, bio_list);
>  			continue;
>  		}
>  
> @@ -834,7 +853,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  		/* Ordered extent boundary: move on to a new bio. */
>  		if (bio_ctrl->len_to_oe_boundary == 0)
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, bio_list);
>  	} while (size);
>  }
>  
> @@ -1082,14 +1101,14 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
>  		}
>  
>  		if (bio_ctrl->compress_type != compress_type) {
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, NULL);
>  			bio_ctrl->compress_type = compress_type;
>  		}
>  
>  		if (force_bio_submit)
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, NULL);
>  		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
> -				   pg_offset);
> +				   pg_offset, NULL);
>  		cur = cur + iosize;
>  		pg_offset += iosize;
>  	}
> @@ -1113,7 +1132,7 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
>  	 * If btrfs_do_readpage() failed we will want to submit the assembled
>  	 * bio to do the cleanup.
>  	 */
> -	submit_one_bio(&bio_ctrl);
> +	submit_one_bio(&bio_ctrl, NULL);
>  	return ret;
>  }
>  
> @@ -1287,6 +1306,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  	u64 extent_offset;
>  	u64 block_start;
>  	struct extent_map *em;
> +	struct bio_list bio_list = BIO_EMPTY_LIST;
>  	int ret = 0;
>  	int nr = 0;
>  
> @@ -1365,7 +1385,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  		btrfs_page_clear_dirty(fs_info, page, cur, iosize);
>  
>  		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
> -				   cur - page_offset(page));
> +				   cur - page_offset(page), &bio_list);
>  		cur += iosize;
>  		nr++;
>  	}
> @@ -1378,6 +1398,16 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  		set_page_writeback(page);
>  		end_page_writeback(page);
>  	}
> +
> +	/*
> +	 * Submit all bios we queued up for this page.
> +	 *
> +	 * The bios are not submitted directly after building them as otherwise
> +	 * a very fast I/O completion on an earlier bio could clear the page
> +	 * writeback bit before the subsequent bios are even submitted.
> +	 */
> +	btrfs_submit_pending_bios(&bio_list, ret);
> +
>  	if (ret) {
>  		u32 remaining = end + 1 - cur;
>  
> @@ -2243,7 +2273,7 @@ void extent_readahead(struct readahead_control *rac)
>  
>  	if (em_cached)
>  		free_extent_map(em_cached);
> -	submit_one_bio(&bio_ctrl);
> +	submit_one_bio(&bio_ctrl, NULL);
>  }
>  
>  /*
> -- 
> 2.39.2
> 

  reply	other threads:[~2023-08-03  1:01 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-24 13:26 small writeback fixes v2 Christoph Hellwig
2023-07-24 13:26 ` [PATCH 1/9] btrfs: don't stop integrity writeback too early Christoph Hellwig
2023-08-02 22:49   ` Boris Burkov
2023-07-24 13:26 ` [PATCH 2/9] btrfs: don't wait for writeback on clean pages in extent_write_cache_pages Christoph Hellwig
2023-07-24 13:26 ` [PATCH 3/9] btrfs: fix an error handling corner case in cow_file_range Christoph Hellwig
2023-07-24 13:26 ` [PATCH 4/9] btrfs: move the cow_fixup earlier in writepages handling Christoph Hellwig
2023-08-03  0:21   ` Boris Burkov
2023-07-24 13:26 ` [PATCH 5/9] btrfs: fix handling of errors from __extent_writepage_io Christoph Hellwig
2023-07-24 13:26 ` [PATCH 6/9] btrfs: stop submitting I/O after an error in extent_write_locked_range Christoph Hellwig
2023-08-03  0:42   ` Boris Burkov
2023-07-24 13:26 ` [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O Christoph Hellwig
2023-08-03  0:59   ` Boris Burkov [this message]
2023-07-24 13:27 ` [PATCH 8/9] btrfs: remove the call to btrfs_mark_ordered_io_finished in btrfs_writepage_fixup_worker Christoph Hellwig
2023-07-24 13:27 ` [PATCH 9/9] btrfs: lift the call to mapping_set_error out of cow_file_range Christoph Hellwig
2023-07-27 17:06 ` small writeback fixes v2 David Sterba
2023-08-01 15:29   ` Christoph Hellwig
2023-08-01 15:37     ` Josef Bacik
2023-08-02 12:49     ` Josef Bacik
2023-08-02 15:16       ` Christoph Hellwig
2023-08-02 15:35         ` Josef Bacik
2023-08-03 17:11           ` Boris Burkov
2023-08-02 17:55         ` Josef Bacik
2023-08-09 14:13   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2023-07-13 13:04 small writeback fixes Christoph Hellwig
2023-07-13 13:04 ` [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230803005946.GD1934467@zen \
    --to=boris@bur.io \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=hch@lst.de \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox