From: Josef Bacik <josef@toxicpanda.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Chris Mason <clm@fb.com>, David Sterba <dsterba@suse.com>,
Damien Le Moal <damien.lemoal@wdc.com>,
Naohiro Aota <naohiro.aota@wdc.com>,
Johannes Thumshirn <johannes.thumshirn@wdc.com>,
Qu Wenruo <wqu@suse.com>, Jens Axboe <axboe@kernel.dk>,
"Darrick J. Wong" <djwong@kernel.org>,
linux-block@vger.kernel.org, linux-btrfs@vger.kernel.org,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 23/34] btrfs: allow btrfs_submit_bio to split bios
Date: Wed, 25 Jan 2023 16:51:16 -0500 [thread overview]
Message-ID: <Y9GkVONZJFXVe8AH@localhost.localdomain> (raw)
In-Reply-To: <20230121065031.1139353-24-hch@lst.de>
On Sat, Jan 21, 2023 at 07:50:20AM +0100, Christoph Hellwig wrote:
> Currently the I/O submitters have to split bios according to the
> chunk stripe boundaries. This leads to extra lookups in the extent
> trees and a lot of boilerplate code.
>
> To drop this requirement, split the bio when __btrfs_map_block
> returns a mapping that is smaller than the requested size and
> keep a count of pending bios in the original btrfs_bio so that
> the upper level completion is only invoked when all clones have
> completed.
>
> Based on a patch from Qu Wenruo.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> Reviewed-by: Qu Wenruo <wqu@suse.com>
> ---
> fs/btrfs/bio.c | 108 ++++++++++++++++++++++++++++++++++++++++---------
> fs/btrfs/bio.h | 1 +
> 2 files changed, 91 insertions(+), 18 deletions(-)
>
> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
> index c7522ac7e0e71c..ff42b783902140 100644
> --- a/fs/btrfs/bio.c
> +++ b/fs/btrfs/bio.c
> @@ -17,6 +17,7 @@
> #include "file-item.h"
>
> static struct bio_set btrfs_bioset;
> +static struct bio_set btrfs_clone_bioset;
> static struct bio_set btrfs_repair_bioset;
> static mempool_t btrfs_failed_bio_pool;
>
> @@ -38,6 +39,7 @@ static inline void btrfs_bio_init(struct btrfs_bio *bbio,
> bbio->inode = inode;
> bbio->end_io = end_io;
> bbio->private = private;
> + atomic_set(&bbio->pending_ios, 1);
> }
>
> /*
> @@ -75,6 +77,58 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
> return bio;
> }
>
> +static struct bio *btrfs_split_bio(struct bio *orig, u64 map_length)
> +{
> + struct btrfs_bio *orig_bbio = btrfs_bio(orig);
> + struct bio *bio;
> +
> + bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
> + &btrfs_clone_bioset);
> + btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio);
> +
> + btrfs_bio(bio)->file_offset = orig_bbio->file_offset;
> + if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED))
> + orig_bbio->file_offset += map_length;
> +
> + atomic_inc(&orig_bbio->pending_ios);
> + return bio;
> +}
> +
> +static void btrfs_orig_write_end_io(struct bio *bio);
> +static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
> + struct btrfs_bio *orig_bbio)
> +{
> + /*
> + * For writes btrfs tolerates nr_mirrors - 1 write failures, so we
> + * can't just blindly propagate a write failure here.
> + * Instead increment the error count in the original I/O context so
> + * that it is guaranteed to be larger than the error tolerance.
> + */
> + if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
> + struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
> + struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
> +
> + atomic_add(orig_bioc->max_errors, &orig_bioc->error);
> + } else {
> + orig_bbio->bio.bi_status = bbio->bio.bi_status;
> + }
> +}
> +
> +static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
> +{
> + if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
> + struct btrfs_bio *orig_bbio = bbio->private;
> +
> + if (bbio->bio.bi_status)
> + btrfs_bbio_propagate_error(bbio, orig_bbio);
> + bio_put(&bbio->bio);
> + bbio = orig_bbio;
> + }
> +
> + if (atomic_dec_and_test(&bbio->pending_ios))
> + bbio->end_io(bbio);
> +}
> +
> static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
> {
> if (cur_mirror == fbio->num_copies)
> @@ -92,7 +146,7 @@ static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
> static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
> {
> if (atomic_dec_and_test(&fbio->repair_count)) {
> - fbio->bbio->end_io(fbio->bbio);
> + btrfs_orig_bbio_end_io(fbio->bbio);
> mempool_free(fbio, &btrfs_failed_bio_pool);
> }
> }
> @@ -232,7 +286,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio,
> if (unlikely(fbio))
> btrfs_repair_done(fbio);
> else
> - bbio->end_io(bbio);
> + btrfs_orig_bbio_end_io(bbio);
> }
>
> static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
> @@ -286,7 +340,7 @@ static void btrfs_simple_end_io(struct bio *bio)
> } else {
> if (bio_op(bio) == REQ_OP_ZONE_APPEND)
> btrfs_record_physical_zoned(bbio);
> - bbio->end_io(bbio);
> + btrfs_orig_bbio_end_io(bbio);
> }
> }
>
> @@ -300,7 +354,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
> if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META))
> btrfs_check_read_bio(bbio, NULL);
> else
> - bbio->end_io(bbio);
> + btrfs_orig_bbio_end_io(bbio);
>
> btrfs_put_bioc(bioc);
> }
> @@ -327,7 +381,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
> else
> bio->bi_status = BLK_STS_OK;
>
> - bbio->end_io(bbio);
> + btrfs_orig_bbio_end_io(bbio);
> btrfs_put_bioc(bioc);
> }
>
> @@ -492,7 +546,7 @@ static void run_one_async_done(struct btrfs_work *work)
>
> /* If an error occurred we just want to clean up the bio and move on */
> if (bio->bi_status) {
> - btrfs_bio_end_io(async->bbio, bio->bi_status);
> + btrfs_orig_bbio_end_io(async->bbio);
> return;
> }
>
> @@ -567,8 +621,8 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
> return true;
> }
>
> -void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
> - int mirror_num)
> +static bool btrfs_submit_chunk(struct btrfs_fs_info *fs_info, struct bio *bio,
> + int mirror_num)
> {
> struct btrfs_bio *bbio = btrfs_bio(bio);
> u64 logical = bio->bi_iter.bi_sector << 9;
> @@ -587,11 +641,10 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
> goto fail;
> }
>
> + map_length = min(map_length, length);
> if (map_length < length) {
> - btrfs_crit(fs_info,
> - "mapping failed logical %llu bio len %llu len %llu",
> - logical, length, map_length);
> - BUG();
> + bio = btrfs_split_bio(bio, map_length);
> + bbio = btrfs_bio(bio);
> }
>
> /*
> @@ -602,14 +655,14 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
> bbio->saved_iter = bio->bi_iter;
> ret = btrfs_lookup_bio_sums(bbio);
> if (ret)
> - goto fail;
> + goto fail_put_bio;
> }
>
> if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
> if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> ret = btrfs_extract_ordered_extent(btrfs_bio(bio));
> if (ret)
> - goto fail;
> + goto fail_put_bio;
> }
>
> /*
> @@ -621,20 +674,33 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
> !btrfs_is_data_reloc_root(bbio->inode->root)) {
> if (should_async_write(bbio) &&
> btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
> - return;
> + goto done;
>
> ret = btrfs_bio_csum(bbio);
> if (ret)
> - goto fail;
> + goto fail_put_bio;
> }
> }
>
> __btrfs_submit_bio(bio, bioc, &smap, mirror_num);
> - return;
> +done:
> + return map_length == length;
>
> +fail_put_bio:
> + if (map_length < length)
> + bio_put(bio);
This is causing a panic in btrfs/125 because you set bbio to
btrfs_bio(split_bio), which has a NULL end_io. You need something like the
following so that we're ending the correct bbio. Thanks,
Josef
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 5d4b67fc44f4..f3a357c48e69 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -607,6 +607,7 @@ static bool btrfs_submit_chunk(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num)
{
struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct btrfs_bio *orig_bbio = bbio;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
@@ -673,7 +674,7 @@ static bool btrfs_submit_chunk(struct btrfs_fs_info *fs_info, struct bio *bio,
bio_put(bio);
fail:
btrfs_bio_counter_dec(fs_info);
- btrfs_bio_end_io(bbio, ret);
+ btrfs_bio_end_io(orig_bbio, ret);
/* Do not submit another chunk */
return true;
}
next prev parent reply other threads:[~2023-01-25 21:52 UTC|newest]
Thread overview: 80+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-21 6:49 consolidate btrfs checksumming, repair and bio splitting v4 Christoph Hellwig
2023-01-21 6:49 ` [PATCH 01/34] block: export bio_split_rw Christoph Hellwig
2023-01-21 6:49 ` [PATCH 02/34] btrfs: better document struct btrfs_bio Christoph Hellwig
2023-01-22 10:19 ` Anand Jain
2023-01-23 15:25 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 03/34] btrfs: add a btrfs_inode pointer to " Christoph Hellwig
2023-01-22 10:20 ` Anand Jain
2023-01-23 15:26 ` Johannes Thumshirn
2023-01-23 15:47 ` Johannes Thumshirn
2023-03-07 1:44 ` Qu Wenruo
2023-03-07 14:41 ` Christoph Hellwig
2023-03-07 22:21 ` Qu Wenruo
2023-03-08 6:04 ` Qu Wenruo
2023-03-08 14:28 ` Christoph Hellwig
2023-03-09 0:08 ` Qu Wenruo
2023-03-09 9:31 ` Christoph Hellwig
2023-03-09 10:32 ` Qu Wenruo
2023-03-09 15:18 ` Christoph Hellwig
2023-01-21 6:50 ` [PATCH 04/34] btrfs: remove the direct I/O read checksum lookup optimization Christoph Hellwig
2023-01-23 15:59 ` Johannes Thumshirn
2023-01-24 14:55 ` Anand Jain
2023-01-24 19:55 ` Christoph Hellwig
2023-01-25 7:42 ` Anand Jain
2023-01-21 6:50 ` [PATCH 05/34] btrfs: simplify btrfs_lookup_bio_sums Christoph Hellwig
2023-01-23 16:06 ` Johannes Thumshirn
2023-01-24 15:16 ` Anand Jain
2023-01-21 6:50 ` [PATCH 06/34] btrfs: slightly refactor btrfs_submit_bio Christoph Hellwig
2023-01-23 16:13 ` Johannes Thumshirn
2023-01-24 15:27 ` Anand Jain
2023-01-21 6:50 ` [PATCH 07/34] btrfs: save the bio iter for checksum validation in common code Christoph Hellwig
2023-01-23 16:18 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 08/34] btrfs: pre-load data checksum for reads in btrfs_submit_bio Christoph Hellwig
2023-01-23 16:32 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 09/34] btrfs: add a btrfs_data_csum_ok helper Christoph Hellwig
2023-01-23 16:36 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 10/34] btrfs: handle checksum validation and repair at the storage layer Christoph Hellwig
2023-01-23 16:39 ` Johannes Thumshirn
2023-01-24 6:47 ` Christoph Hellwig
2023-01-24 8:16 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 11/34] btrfs: remove btrfs_bio_free_csum Christoph Hellwig
2023-01-23 16:41 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 12/34] btrfs: remove btrfs_bio_for_each_sector Christoph Hellwig
2023-01-23 16:42 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 13/34] btrfs: remove now unused checksumming helpers Christoph Hellwig
2023-01-23 16:49 ` Johannes Thumshirn
2023-01-23 16:53 ` Christoph Hellwig
2023-01-24 8:33 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 14/34] btrfs: remove the device field in struct btrfs_bio Christoph Hellwig
2023-01-24 11:01 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 15/34] btrfs: remove the io_failure_record infrastructure Christoph Hellwig
2023-01-24 11:04 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 16/34] btrfs: rename the iter field in struct btrfs_bio Christoph Hellwig
2023-01-24 11:09 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 17/34] btrfs: remove the is_metadata flag " Christoph Hellwig
2023-01-24 11:13 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 18/34] btrfs: remove the submit_bio_start helpers Christoph Hellwig
2023-01-24 11:49 ` Johannes Thumshirn
2023-01-21 6:50 ` [PATCH 19/34] btrfs: simplify the btrfs_csum_one_bio calling convention Christoph Hellwig
2023-01-21 6:50 ` [PATCH 20/34] btrfs: handle checksum generation in the storage layer Christoph Hellwig
2023-01-21 6:50 ` [PATCH 21/34] btrfs: handle recording of zoned writes " Christoph Hellwig
2023-01-21 6:50 ` [PATCH 22/34] btrfs: support cloned bios in btree_csum_one_bio Christoph Hellwig
2023-01-21 6:50 ` [PATCH 23/34] btrfs: allow btrfs_submit_bio to split bios Christoph Hellwig
2023-01-25 21:51 ` Josef Bacik [this message]
2023-01-26 5:21 ` Christoph Hellwig
2023-01-26 17:43 ` Josef Bacik
2023-01-26 17:46 ` Christoph Hellwig
2023-01-26 18:33 ` David Sterba
2023-01-27 7:02 ` test not in the auto group, was: " Christoph Hellwig
2023-01-21 6:50 ` [PATCH 24/34] btrfs: pass the iomap bio to btrfs_submit_bio Christoph Hellwig
2023-01-21 6:50 ` [PATCH 25/34] btrfs: remove stripe boundary calculation for buffered I/O Christoph Hellwig
2023-01-21 6:50 ` [PATCH 26/34] btrfs: remove stripe boundary calculation for compressed I/O Christoph Hellwig
2023-01-21 6:50 ` [PATCH 27/34] btrfs: remove stripe boundary calculation for encoded I/O Christoph Hellwig
2023-01-21 6:50 ` [PATCH 28/34] btrfs: remove struct btrfs_io_geometry Christoph Hellwig
2023-01-21 6:50 ` [PATCH 29/34] btrfs: remove submit_encoded_read_bio Christoph Hellwig
2023-01-21 6:50 ` [PATCH 30/34] btrfs: remove the fs_info argument to btrfs_submit_bio Christoph Hellwig
2023-01-21 6:50 ` [PATCH 31/34] btrfs: remove now spurious bio submission helpers Christoph Hellwig
2023-01-21 6:50 ` [PATCH 32/34] btrfs: calculate file system wide queue limit for zoned mode Christoph Hellwig
2023-01-21 6:50 ` [PATCH 33/34] btrfs: split zone append bios in btrfs_submit_bio Christoph Hellwig
2023-01-21 6:50 ` [PATCH 34/34] iomap: remove IOMAP_F_ZONE_APPEND Christoph Hellwig
2023-01-24 13:22 ` consolidate btrfs checksumming, repair and bio splitting v4 Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y9GkVONZJFXVe8AH@localhost.localdomain \
--to=josef@toxicpanda.com \
--cc=axboe@kernel.dk \
--cc=clm@fb.com \
--cc=damien.lemoal@wdc.com \
--cc=djwong@kernel.org \
--cc=dsterba@suse.com \
--cc=hch@lst.de \
--cc=johannes.thumshirn@wdc.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=naohiro.aota@wdc.com \
--cc=wqu@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox