From: Omar Sandoval <osandov@osandov.com>
To: linux-btrfs@vger.kernel.org
Cc: kernel-team@fb.com, Jens Axboe <axboe@kernel.dk>,
Christoph Hellwig <hch@lst.de>
Subject: [PATCH v2 03/15] btrfs: fix double __endio_write_update_ordered in direct I/O
Date: Thu, 16 Apr 2020 14:46:13 -0700 [thread overview]
Message-ID: <594c8cb6dd64cebdf5e01016ce823e1be00fc7ab.1587072977.git.osandov@fb.com> (raw)
In-Reply-To: <cover.1587072977.git.osandov@fb.com>
From: Omar Sandoval <osandov@fb.com>
In btrfs_submit_direct(), if we fail to allocate the btrfs_dio_private,
we complete the ordered extent range. However, we don't mark that the
range doesn't need to be cleaned up from btrfs_direct_IO() until later.
Therefore, if we fail to allocate the btrfs_dio_private, we complete the
ordered extent range twice. We could fix this by updating
unsubmitted_oe_range earlier, but it's cleaner to reorganize the code so
that creating the btrfs_dio_private and submitting the bios are
separate, and once the btrfs_dio_private is created, cleanup always
happens through the btrfs_dio_private.
Fixes: f28a49287817 ("Btrfs: fix leaking of ordered extents after direct IO write error")
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
fs/btrfs/inode.c | 174 ++++++++++++++++++-----------------------------
1 file changed, 66 insertions(+), 108 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b628c319a5b6..f6ce9749adb6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7903,14 +7903,60 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
return ret;
}
-static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
+/*
+ * If this succeeds, the btrfs_dio_private is responsible for cleaning up locked
+ * or ordered extents whether or not we submit any bios.
+ */
+static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
+ struct inode *inode,
+ loff_t file_offset)
{
- struct inode *inode = dip->inode;
+ const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+ struct btrfs_dio_private *dip;
+ struct bio *bio;
+
+ dip = kzalloc(sizeof(*dip), GFP_NOFS);
+ if (!dip)
+ return NULL;
+
+ bio = btrfs_bio_clone(dio_bio);
+ bio->bi_private = dip;
+ btrfs_io_bio(bio)->logical = file_offset;
+
+ dip->private = dio_bio->bi_private;
+ dip->inode = inode;
+ dip->logical_offset = file_offset;
+ dip->bytes = dio_bio->bi_iter.bi_size;
+ dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
+ dip->orig_bio = bio;
+ dip->dio_bio = dio_bio;
+ atomic_set(&dip->pending_bios, 1);
+
+ if (write) {
+ struct btrfs_dio_data *dio_data = current->journal_info;
+
+ dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+ dip->bytes;
+ dio_data->unsubmitted_oe_range_start =
+ dio_data->unsubmitted_oe_range_end;
+
+ bio->bi_end_io = btrfs_endio_direct_write;
+ } else {
+ bio->bi_end_io = btrfs_endio_direct_read;
+ dip->subio_endio = btrfs_subio_endio_read;
+ }
+ return dip;
+}
+
+static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
+ loff_t file_offset)
+{
+ const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_dio_private *dip;
struct bio *bio;
- struct bio *orig_bio = dip->orig_bio;
- u64 start_sector = orig_bio->bi_iter.bi_sector;
- u64 file_offset = dip->logical_offset;
+ struct bio *orig_bio;
+ u64 start_sector;
int async_submit = 0;
u64 submit_len;
int clone_offset = 0;
@@ -7919,11 +7965,24 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
blk_status_t status;
struct btrfs_io_geometry geom;
+ dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
+ if (!dip) {
+ if (!write) {
+ unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+ file_offset + dio_bio->bi_iter.bi_size - 1);
+ }
+ dio_bio->bi_status = BLK_STS_RESOURCE;
+ dio_end_io(dio_bio);
+ return;
+ }
+
+ orig_bio = dip->orig_bio;
+ start_sector = orig_bio->bi_iter.bi_sector;
submit_len = orig_bio->bi_iter.bi_size;
ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
start_sector << 9, submit_len, &geom);
if (ret)
- return -EIO;
+ goto out_err;
if (geom.len >= submit_len) {
bio = orig_bio;
@@ -7986,7 +8045,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
submit:
status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
if (!status)
- return 0;
+ return;
if (bio != orig_bio)
bio_put(bio);
@@ -8000,107 +8059,6 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
*/
if (atomic_dec_and_test(&dip->pending_bios))
bio_io_error(dip->orig_bio);
-
- /* bio_end_io() will handle error, so we needn't return it */
- return 0;
-}
-
-static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
- loff_t file_offset)
-{
- struct btrfs_dio_private *dip = NULL;
- struct bio *bio = NULL;
- struct btrfs_io_bio *io_bio;
- bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
- int ret = 0;
-
- bio = btrfs_bio_clone(dio_bio);
-
- dip = kzalloc(sizeof(*dip), GFP_NOFS);
- if (!dip) {
- ret = -ENOMEM;
- goto free_ordered;
- }
-
- dip->private = dio_bio->bi_private;
- dip->inode = inode;
- dip->logical_offset = file_offset;
- dip->bytes = dio_bio->bi_iter.bi_size;
- dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
- bio->bi_private = dip;
- dip->orig_bio = bio;
- dip->dio_bio = dio_bio;
- atomic_set(&dip->pending_bios, 1);
- io_bio = btrfs_io_bio(bio);
- io_bio->logical = file_offset;
-
- if (write) {
- bio->bi_end_io = btrfs_endio_direct_write;
- } else {
- bio->bi_end_io = btrfs_endio_direct_read;
- dip->subio_endio = btrfs_subio_endio_read;
- }
-
- /*
- * Reset the range for unsubmitted ordered extents (to a 0 length range)
- * even if we fail to submit a bio, because in such case we do the
- * corresponding error handling below and it must not be done a second
- * time by btrfs_direct_IO().
- */
- if (write) {
- struct btrfs_dio_data *dio_data = current->journal_info;
-
- dio_data->unsubmitted_oe_range_end = dip->logical_offset +
- dip->bytes;
- dio_data->unsubmitted_oe_range_start =
- dio_data->unsubmitted_oe_range_end;
- }
-
- ret = btrfs_submit_direct_hook(dip);
- if (!ret)
- return;
-
- btrfs_io_bio_free_csum(io_bio);
-
-free_ordered:
- /*
- * If we arrived here it means either we failed to submit the dip
- * or we either failed to clone the dio_bio or failed to allocate the
- * dip. If we cloned the dio_bio and allocated the dip, we can just
- * call bio_endio against our io_bio so that we get proper resource
- * cleanup if we fail to submit the dip, otherwise, we must do the
- * same as btrfs_endio_direct_[write|read] because we can't call these
- * callbacks - they require an allocated dip and a clone of dio_bio.
- */
- if (bio && dip) {
- bio_io_error(bio);
- /*
- * The end io callbacks free our dip, do the final put on bio
- * and all the cleanup and final put for dio_bio (through
- * dio_end_io()).
- */
- dip = NULL;
- bio = NULL;
- } else {
- if (write)
- __endio_write_update_ordered(inode,
- file_offset,
- dio_bio->bi_iter.bi_size,
- false);
- else
- unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
- file_offset + dio_bio->bi_iter.bi_size - 1);
-
- dio_bio->bi_status = BLK_STS_IOERR;
- /*
- * Releases and cleans up our dio_bio, no need to bio_put()
- * nor bio_endio()/bio_io_error() against dio_bio.
- */
- dio_end_io(dio_bio);
- }
- if (bio)
- bio_put(bio);
- kfree(dip);
}
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
--
2.26.1
next prev parent reply other threads:[~2020-04-16 21:46 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-16 21:46 [PATCH v2 00/15] btrfs: read repair/direct I/O improvements Omar Sandoval
2020-04-16 21:46 ` [PATCH v2 01/15] block: add bio_for_each_bvec_all() Omar Sandoval
2020-04-17 12:56 ` Johannes Thumshirn
2020-04-16 21:46 ` [PATCH v2 02/15] btrfs: fix error handling when submitting direct I/O bio Omar Sandoval
2020-04-17 13:01 ` Johannes Thumshirn
2020-04-16 21:46 ` Omar Sandoval [this message]
2020-04-17 17:53 ` [PATCH v2 03/15] btrfs: fix double __endio_write_update_ordered in direct I/O Johannes Thumshirn
2020-04-20 15:45 ` David Sterba
2020-04-21 10:44 ` Nikolay Borisov
2020-04-21 22:26 ` David Sterba
2020-04-16 21:46 ` [PATCH v2 04/15] btrfs: look at full bi_io_vec for repair decision Omar Sandoval
2020-04-17 17:56 ` Johannes Thumshirn
2020-04-16 21:46 ` [PATCH v2 05/15] btrfs: don't do repair validation for checksum errors Omar Sandoval
2020-04-17 17:59 ` Johannes Thumshirn
2020-04-16 21:46 ` [PATCH v2 06/15] btrfs: clarify btrfs_lookup_bio_sums documentation Omar Sandoval
2020-04-17 18:01 ` Johannes Thumshirn
2020-04-21 11:17 ` Nikolay Borisov
2020-04-16 21:46 ` [PATCH v2 07/15] btrfs: rename __readpage_endio_check to check_data_csum Omar Sandoval
2020-04-16 21:46 ` [PATCH v2 08/15] btrfs: make btrfs_check_repairable() static Omar Sandoval
2020-04-16 21:46 ` [PATCH v2 09/15] btrfs: kill btrfs_dio_private->private Omar Sandoval
2020-04-17 18:02 ` Johannes Thumshirn
2020-04-16 21:46 ` [PATCH v2 10/15] btrfs: convert btrfs_dio_private->pending_bios to refcount_t Omar Sandoval
2020-04-17 18:03 ` Johannes Thumshirn
2020-04-16 21:46 ` [PATCH v2 11/15] btrfs: put direct I/O checksums in btrfs_dio_private instead of bio Omar Sandoval
2020-04-17 18:06 ` Johannes Thumshirn
2020-04-21 22:50 ` David Sterba
2020-04-16 21:46 ` [PATCH v2 12/15] btrfs: get rid of one layer of bios in direct I/O Omar Sandoval
2020-04-21 13:00 ` Nikolay Borisov
2020-04-21 23:11 ` David Sterba
2020-04-16 21:46 ` [PATCH v2 13/15] btrfs: simplify direct I/O read repair Omar Sandoval
2020-04-21 13:53 ` Nikolay Borisov
2020-04-21 14:40 ` Nikolay Borisov
2020-04-16 21:46 ` [PATCH v2 14/15] btrfs: get rid of endio_repair_workers Omar Sandoval
2020-04-16 21:46 ` [PATCH v2 15/15] btrfs: unify buffered and direct I/O read repair Omar Sandoval
2020-04-21 23:38 ` David Sterba
2020-04-17 11:03 ` [PATCH v2 00/15] btrfs: read repair/direct I/O improvements David Sterba
2020-04-21 23:46 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=594c8cb6dd64cebdf5e01016ce823e1be00fc7ab.1587072977.git.osandov@fb.com \
--to=osandov@osandov.com \
--cc=axboe@kernel.dk \
--cc=hch@lst.de \
--cc=kernel-team@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).