All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boris Burkov <boris@bur.io>
To: Christoph Hellwig <hch@lst.de>
Cc: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	David Sterba <dsterba@suse.com>,
	linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O
Date: Wed, 2 Aug 2023 17:59:46 -0700	[thread overview]
Message-ID: <20230803005946.GD1934467@zen> (raw)
In-Reply-To: <20230724132701.816771-8-hch@lst.de>

On Mon, Jul 24, 2023 at 06:26:59AM -0700, Christoph Hellwig wrote:
> For sub-page I/O, a fast I/O completion can clear the page writeback bit
> in the I/O completion handler before subsequent bios were submitted.
> Use a trick from iomap and defer submission of bios until we're reached
> the end of the page to avoid this race.

This LGTM in that I don't see a bug.

However, I'm confused why it's exactly necessary: doesn't the existing
logic already only allocate one bio and calls bio_add_page on it in a
loop. And bio_add_page handles the subpage blocksize case.

As far as I can tell, it tries to do it sequentially so it should be
contiguous. Are you worried about non-contiguous writes within one page
resulting in separate bios? In that case, why would a completion clear
the writeback bit on the page if the whole page isn't written back? I
guess that could be tricky to do and this is the correct solution?

Thanks,
Boris

> 
> Fixes: c5ef5c6c733a ("btrfs: make __extent_writepage_io() only submit dirty range for subpage")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/extent_io.c | 54 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index fada7a1931b130..48a49c57daa6fd 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -105,7 +105,8 @@ struct btrfs_bio_ctrl {
>  	struct writeback_control *wbc;
>  };
>  
> -static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
> +static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl,
> +			   struct bio_list *bio_list)
>  {
>  	struct btrfs_bio *bbio = bio_ctrl->bbio;
>  
> @@ -118,6 +119,8 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
>  	if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
>  	    bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
>  		btrfs_submit_compressed_read(bbio);
> +	else if (bio_list)
> +		bio_list_add(bio_list, &bbio->bio);
>  	else
>  		btrfs_submit_bio(bbio, 0);
>  
> @@ -141,7 +144,22 @@ static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
>  		/* The bio is owned by the end_io handler now */
>  		bio_ctrl->bbio = NULL;
>  	} else {
> -		submit_one_bio(bio_ctrl);
> +		submit_one_bio(bio_ctrl, NULL);
> +	}
> +}
> +
> +static void btrfs_submit_pending_bios(struct bio_list *bio_list, int ret)
> +{
> +	struct bio *bio;
> +
> +	if (ret) {
> +		blk_status_t status = errno_to_blk_status(ret);
> +
> +		while ((bio = bio_list_pop(bio_list)))
> +			btrfs_bio_end_io(btrfs_bio(bio), status);
> +	} else {
> +		while ((bio = bio_list_pop(bio_list)))
> +			btrfs_submit_bio(btrfs_bio(bio), 0);
>  	}
>  }
>  
> @@ -791,7 +809,8 @@ static void alloc_new_bio(struct btrfs_inode *inode,
>   */
>  static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  			       u64 disk_bytenr, struct page *page,
> -			       size_t size, unsigned long pg_offset)
> +			       size_t size, unsigned long pg_offset,
> +			       struct bio_list *bio_list)
>  {
>  	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
>  
> @@ -800,7 +819,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  	if (bio_ctrl->bbio &&
>  	    !btrfs_bio_is_contig(bio_ctrl, page, disk_bytenr, pg_offset))
> -		submit_one_bio(bio_ctrl);
> +		submit_one_bio(bio_ctrl, bio_list);
>  
>  	do {
>  		u32 len = size;
> @@ -820,7 +839,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  		if (bio_add_page(&bio_ctrl->bbio->bio, page, len, pg_offset) != len) {
>  			/* bio full: move on to a new one */
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, bio_list);
>  			continue;
>  		}
>  
> @@ -834,7 +853,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  		/* Ordered extent boundary: move on to a new bio. */
>  		if (bio_ctrl->len_to_oe_boundary == 0)
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, bio_list);
>  	} while (size);
>  }
>  
> @@ -1082,14 +1101,14 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
>  		}
>  
>  		if (bio_ctrl->compress_type != compress_type) {
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, NULL);
>  			bio_ctrl->compress_type = compress_type;
>  		}
>  
>  		if (force_bio_submit)
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, NULL);
>  		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
> -				   pg_offset);
> +				   pg_offset, NULL);
>  		cur = cur + iosize;
>  		pg_offset += iosize;
>  	}
> @@ -1113,7 +1132,7 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
>  	 * If btrfs_do_readpage() failed we will want to submit the assembled
>  	 * bio to do the cleanup.
>  	 */
> -	submit_one_bio(&bio_ctrl);
> +	submit_one_bio(&bio_ctrl, NULL);
>  	return ret;
>  }
>  
> @@ -1287,6 +1306,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  	u64 extent_offset;
>  	u64 block_start;
>  	struct extent_map *em;
> +	struct bio_list bio_list = BIO_EMPTY_LIST;
>  	int ret = 0;
>  	int nr = 0;
>  
> @@ -1365,7 +1385,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  		btrfs_page_clear_dirty(fs_info, page, cur, iosize);
>  
>  		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
> -				   cur - page_offset(page));
> +				   cur - page_offset(page), &bio_list);
>  		cur += iosize;
>  		nr++;
>  	}
> @@ -1378,6 +1398,16 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  		set_page_writeback(page);
>  		end_page_writeback(page);
>  	}
> +
> +	/*
> +	 * Submit all bios we queued up for this page.
> +	 *
> +	 * The bios are not submitted directly after building them as otherwise
> +	 * a very fast I/O completion on an earlier bio could clear the page
> +	 * writeback bit before the subsequent bios are even submitted.
> +	 */
> +	btrfs_submit_pending_bios(&bio_list, ret);
> +
>  	if (ret) {
>  		u32 remaining = end + 1 - cur;
>  
> @@ -2243,7 +2273,7 @@ void extent_readahead(struct readahead_control *rac)
>  
>  	if (em_cached)
>  		free_extent_map(em_cached);
> -	submit_one_bio(&bio_ctrl);
> +	submit_one_bio(&bio_ctrl, NULL);
>  }
>  
>  /*
> -- 
> 2.39.2
> 

  reply	other threads:[~2023-08-03  1:01 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-24 13:26 small writeback fixes v2 Christoph Hellwig
2023-07-24 13:26 ` [PATCH 1/9] btrfs: don't stop integrity writeback too early Christoph Hellwig
2023-08-02 22:49   ` Boris Burkov
2023-07-24 13:26 ` [PATCH 2/9] btrfs: don't wait for writeback on clean pages in extent_write_cache_pages Christoph Hellwig
2023-07-24 13:26 ` [PATCH 3/9] btrfs: fix an error handling corner case in cow_file_range Christoph Hellwig
2023-07-24 13:26 ` [PATCH 4/9] btrfs: move the cow_fixup earlier in writepages handling Christoph Hellwig
2023-08-03  0:21   ` Boris Burkov
2023-07-24 13:26 ` [PATCH 5/9] btrfs: fix handling of errors from __extent_writepage_io Christoph Hellwig
2023-07-24 13:26 ` [PATCH 6/9] btrfs: stop submitting I/O after an error in extent_write_locked_range Christoph Hellwig
2023-08-03  0:42   ` Boris Burkov
2023-07-24 13:26 ` [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O Christoph Hellwig
2023-08-03  0:59   ` Boris Burkov [this message]
2023-07-24 13:27 ` [PATCH 8/9] btrfs: remove the call to btrfs_mark_ordered_io_finished in btrfs_writepage_fixup_worker Christoph Hellwig
2023-07-24 13:27 ` [PATCH 9/9] btrfs: lift the call to mapping_set_error out of cow_file_range Christoph Hellwig
2023-07-27 17:06 ` small writeback fixes v2 David Sterba
2023-08-01 15:29   ` Christoph Hellwig
2023-08-01 15:37     ` Josef Bacik
2023-08-02 12:49     ` Josef Bacik
2023-08-02 15:16       ` Christoph Hellwig
2023-08-02 15:35         ` Josef Bacik
2023-08-03 17:11           ` Boris Burkov
2023-08-02 17:55         ` Josef Bacik
2023-08-09 14:13   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2023-07-13 13:04 small writeback fixes Christoph Hellwig
2023-07-13 13:04 ` [PATCH 7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230803005946.GD1934467@zen \
    --to=boris@bur.io \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=hch@lst.de \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.