From: Christoph Hellwig <hch@infradead.org>
To: Boris Burkov <boris@bur.io>
Cc: linux-btrfs@vger.kernel.org, kernel-team@fb.com
Subject: Re: [PATCH] btrfs: fix dio continue after short write due to source fault
Date: Fri, 17 Feb 2023 22:11:21 -0800 [thread overview]
Message-ID: <Y/BsCVfm05OxR+dk@infradead.org> (raw)
In-Reply-To: <ae81e48b0e954bae1c3451c0da1a24ae7146606c.1676684984.git.boris@bur.io>
On Fri, Feb 17, 2023 at 05:50:17PM -0800, Boris Burkov wrote:
> Downstream bug report:
> https://bugzilla.redhat.com/show_bug.cgi?id=2169947
Any chance we could wire this or a simplified version up in xfstests?
This seems important enough to have a regression test.
The fix looks technically good (but others might know the ordered
extent semantics better than me). I have a whole bunch of superficial
comments, though.
> +struct btrfs_dio_data;
> struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
> - size_t done_before);
> + struct btrfs_dio_data *data, size_t done_before);
I'd much prefer not exposing btrfs_dio_data outside of inode.c, and
I think we can just do this with an in/out ordered_extent pointer.
See my incremental patch at the end.
> @@ -7543,6 +7551,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
> goto err;
> }
>
> +
unrelated whitespace changes.
> /*
> * If this errors out it's because we couldn't invalidate pagecache for
> * this range and we need to fallback to buffered IO, or we are doing a
> @@ -7662,6 +7671,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
> else
> free_extent_state(cached_state);
>
> +out:
> /*
> * Translate extent map information to iomap.
> * We trim the extents (and move the addr) even though iomap code does
Naming this label out seems a bit confusing. What about something
like map_iomap?
> @@ -7785,18 +7807,17 @@ static const struct iomap_dio_ops btrfs_dio_ops = {
> ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
> {
> struct btrfs_dio_data data;
> + memset(&data, 0, sizeof(data));
This looks unrelated. I'd also do a simple compile initialization if
we'd want to do it:
struct btrfs_dio_data data = { };
> +struct btrfs_ordered_extent *__btrfs_add_ordered_extent(
> + struct btrfs_inode *inode, u64 file_offset,
> + u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
> + u64 disk_num_bytes, u64 offset, unsigned long flags,
> + int compress_type)
I have a similar change in a development tree and a lot more caller, and
I ended up naming this helper btrfs_alloc_ordered_extent as that seems
more descriptive. This also probably warrants being split into a prep
patch.
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index f771001574d0..b1277e615478 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -131,7 +131,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
> ret += dio->done_before;
>
> kfree(dio);
> -
> return ret;
> }
> EXPORT_SYMBOL_GPL(iomap_dio_complete);
Unrelated whitespace change.
And here is the incremental patch for the btrfs_dio_write prototype:
---
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 2092348b6f7d7c..1367bbc6c0ae46 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -531,10 +531,9 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
size_t done_before);
-
-struct btrfs_dio_data;
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
- struct btrfs_dio_data *data, size_t done_before);
+ struct btrfs_ordered_extent **ordered_extent,
+ size_t done_before);
extern const struct dentry_operations btrfs_dentry_operations;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9a6938917b0dc6..7b5dd01772ad3a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1464,11 +1464,8 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
loff_t endbyte;
ssize_t err;
unsigned int ilock_flags = 0;
- struct btrfs_dio_data dio_data;
struct iomap_dio *dio;
- struct btrfs_ordered_extent *tmp;
-
- memset(&dio_data, 0, sizeof(dio_data));
+ struct btrfs_ordered_extent *ordered_extent = NULL;
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;
@@ -1530,7 +1527,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
* got -EFAULT, faulting in the pages before the retry.
*/
from->nofault = true;
- dio = btrfs_dio_write(iocb, from, &dio_data, written);
+ dio = btrfs_dio_write(iocb, from, &ordered_extent, written);
from->nofault = false;
/*
@@ -1568,9 +1565,6 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (left == prev_left) {
err = -ENOTBLK;
} else {
- tmp = dio_data.ordered;
- memset(&dio_data, 0, sizeof(dio_data));
- dio_data.ordered = tmp;
fault_in_iov_iter_readable(from, left);
prev_left = left;
goto relock;
@@ -1582,10 +1576,8 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
* ordered_extent, but this is needed to clean up in case of an error
* path breaking out of iomap_iter before the final iomap_end call.
*/
- if (dio_data.ordered) {
- btrfs_put_ordered_extent(dio_data.ordered);
- dio_data.ordered = NULL;
- }
+ if (ordered_extent)
+ btrfs_put_ordered_extent(ordered_extent);
/*
* If 'err' is -ENOTBLK or we have not written all data, then it means
diff --git a/fs/btrfs/file.h b/fs/btrfs/file.h
index 3d4427881eb654..82b34fbb295f27 100644
--- a/fs/btrfs/file.h
+++ b/fs/btrfs/file.h
@@ -5,14 +5,6 @@
extern const struct file_operations btrfs_file_operations;
-struct btrfs_dio_data {
- ssize_t submitted;
- struct extent_changeset *data_reserved;
- bool data_space_reserved;
- bool nocow_done;
- struct btrfs_ordered_extent *ordered;
-};
-
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93499f15b8f3e2..8c73d2a8346f05 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -76,6 +76,14 @@ struct btrfs_iget_args {
struct btrfs_root *root;
};
+struct btrfs_dio_data {
+ ssize_t submitted;
+ struct extent_changeset *data_reserved;
+ bool data_space_reserved;
+ bool nocow_done;
+ struct btrfs_ordered_extent *ordered;
+};
+
struct btrfs_dio_private {
struct btrfs_inode *inode;
@@ -8193,10 +8201,17 @@ ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_be
}
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
- struct btrfs_dio_data *data, size_t done_before)
+ struct btrfs_ordered_extent **ordered_extent,
+ size_t done_before)
{
- return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
- IOMAP_DIO_PARTIAL, data, done_before);
+ struct btrfs_dio_data dio_data = { .ordered = *ordered_extent };
+ struct iomap_dio *dio;
+
+ dio = __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+ IOMAP_DIO_PARTIAL, &dio_data, done_before);
+ if (!IS_ERR_OR_NULL(dio))
+ *ordered_extent = dio_data.ordered;
+ return dio;
}
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
next prev parent reply other threads:[~2023-02-18 6:11 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-18 1:50 [PATCH] btrfs: fix dio continue after short write due to source fault Boris Burkov
2023-02-18 6:11 ` Christoph Hellwig [this message]
2023-02-20 14:01 ` Filipe Manana
2023-02-21 18:34 ` Boris Burkov
2023-03-16 0:48 ` Neal Gompa
2023-03-16 0:52 ` Boris Burkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y/BsCVfm05OxR+dk@infradead.org \
--to=hch@infradead.org \
--cc=boris@bur.io \
--cc=kernel-team@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox