From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 1/5] btrfs: detect dirty blocks without an ordered extent more reliably
Date: Thu, 7 May 2026 14:59:17 +0930 [thread overview]
Message-ID: <e52caa0b55dade2020a615eabc3d8aea9fcb55fa.1778131118.git.wqu@suse.com> (raw)
In-Reply-To: <cover.1778131118.git.wqu@suse.com>
Currently btrfs detects dirty folio which doesn't have an ordered extent
at extent_writepage_io(), but that is not ideal:
- The check is not handling all dirty blocks
We can have multiple blocks inside a large folio, but the whole folio
is marked ordered as long as there is one ordered extent in the range.
We can still hit cases where some dirty blocks do not have
corresponding ordered extents.
Instead of checking the folio ordered flags, do the check at
alloc_new_bio(), where we're already searching for ordered extents for
writebacks.
If we didn't find an ordered extent, we should already give an error
message and notify the caller there is something wrong.
This allows us to check every block that goes through
submit_extent_folio().
With this new and more reliable check, we can remove the old check.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 85 ++++++++++++++++++++++++++++----------------
1 file changed, 54 insertions(+), 31 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ebf9a63946e5..3550ae40255c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -730,9 +730,9 @@ static bool btrfs_bio_is_contig(struct btrfs_bio_ctrl *bio_ctrl,
bio_end_sector(bio) == sector;
}
-static void alloc_new_bio(struct btrfs_inode *inode,
- struct btrfs_bio_ctrl *bio_ctrl,
- u64 disk_bytenr, u64 file_offset)
+static int alloc_new_bio(struct btrfs_inode *inode,
+ struct btrfs_bio_ctrl *bio_ctrl,
+ u64 disk_bytenr, u64 file_offset)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_bio *bbio;
@@ -749,13 +749,25 @@ static void alloc_new_bio(struct btrfs_inode *inode,
if (bio_ctrl->wbc) {
struct btrfs_ordered_extent *ordered;
+ /* This must be a write for data inodes. */
+ ASSERT(btrfs_op(&bio_ctrl->bbio->bio) == BTRFS_MAP_WRITE);
+ ASSERT(is_data_inode(inode));
+
ordered = btrfs_lookup_ordered_extent(inode, file_offset);
- if (ordered) {
- bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
- ordered->file_offset +
- ordered->disk_num_bytes - file_offset);
- bbio->ordered = ordered;
+ if (unlikely(!ordered)) {
+ bio_ctrl->bbio = NULL;
+ bio_ctrl->next_file_offset = 0;
+ bio_put(&bbio->bio);
+ btrfs_err_rl(fs_info,
+ "root %lld ino %llu file offset %llu is marked dirty without notifying the fs",
+ btrfs_root_id(inode->root), btrfs_ino(inode),
+ file_offset);
+ return -EUCLEAN;
}
+ bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
+ ordered->file_offset +
+ ordered->disk_num_bytes - file_offset);
+ bbio->ordered = ordered;
/*
* Pick the last added device to support cgroup writeback. For
@@ -766,6 +778,7 @@ static void alloc_new_bio(struct btrfs_inode *inode,
bio_set_dev(&bbio->bio, fs_info->fs_devices->latest_dev->bdev);
wbc_init_bio(bio_ctrl->wbc, &bbio->bio);
}
+ return 0;
}
/*
@@ -781,14 +794,19 @@ static void alloc_new_bio(struct btrfs_inode *inode,
* new one in @bio_ctrl->bbio.
* The mirror number for this IO should already be initialized in
* @bio_ctrl->mirror_num.
+ *
+ * Return the number of bytes that are queued into a bio.
+ * If the returned bytes is smaller than @size, it means we hit a critical error
+ * for data write, where there is no ordered extent for the range.
*/
-static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
- u64 disk_bytenr, struct folio *folio,
- size_t size, unsigned long pg_offset,
- u64 read_em_generation)
+static unsigned int submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
+ u64 disk_bytenr, struct folio *folio,
+ size_t size, unsigned long pg_offset,
+ u64 read_em_generation)
{
struct btrfs_inode *inode = folio_to_inode(folio);
loff_t file_offset = folio_pos(folio) + pg_offset;
+ unsigned int queued = 0;
ASSERT(pg_offset + size <= folio_size(folio));
ASSERT(bio_ctrl->end_io_func);
@@ -801,8 +819,13 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
u32 len = size;
/* Allocate new bio if needed */
- if (!bio_ctrl->bbio)
- alloc_new_bio(inode, bio_ctrl, disk_bytenr, file_offset);
+ if (!bio_ctrl->bbio) {
+ int ret;
+
+ ret = alloc_new_bio(inode, bio_ctrl, disk_bytenr, file_offset);
+ if (ret < 0)
+ break;
+ }
/* Cap to the current ordered extent boundary if there is one. */
if (len > bio_ctrl->len_to_oe_boundary) {
@@ -830,6 +853,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
pg_offset += len;
disk_bytenr += len;
file_offset += len;
+ queued += len;
/*
* len_to_oe_boundary defaults to U32_MAX, which isn't folio or
@@ -869,6 +893,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
submit_one_bio(bio_ctrl);
} while (size);
+ return queued;
}
static int attach_extent_buffer_folio(struct extent_buffer *eb,
@@ -1041,6 +1066,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
u64 disk_bytenr;
u64 block_start;
u64 em_gen;
+ unsigned int queued;
ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
if (cur >= last_byte) {
@@ -1154,8 +1180,10 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
if (force_bio_submit)
submit_one_bio(bio_ctrl);
- submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize,
- pg_offset, em_gen);
+ queued = submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize,
+ pg_offset, em_gen);
+ /* Read submission should not fail. */
+ ASSERT(queued == blocksize);
}
return 0;
}
@@ -1643,6 +1671,7 @@ static int submit_one_sector(struct btrfs_inode *inode,
u64 extent_offset;
u64 em_end;
const u32 sectorsize = fs_info->sectorsize;
+ unsigned int queued;
ASSERT(IS_ALIGNED(filepos, sectorsize));
@@ -1709,8 +1738,15 @@ static int submit_one_sector(struct btrfs_inode *inode,
*/
ASSERT(folio_test_writeback(folio));
- submit_extent_folio(bio_ctrl, disk_bytenr, folio,
- sectorsize, filepos - folio_pos(folio), 0);
+ queued = submit_extent_folio(bio_ctrl, disk_bytenr, folio,
+ sectorsize, filepos - folio_pos(folio), 0);
+ if (unlikely(queued < sectorsize)) {
+ btrfs_folio_clear_writeback(fs_info, folio, filepos, sectorsize);
+ btrfs_folio_clear_ordered(fs_info, folio, filepos, sectorsize);
+ btrfs_mark_ordered_io_finished(inode, filepos, fs_info->sectorsize,
+ false);
+ return -EUCLEAN;
+ }
return 0;
}
@@ -1743,19 +1779,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
ASSERT(end <= folio_end, "start=%llu len=%u folio_start=%llu folio_size=%zu",
start, len, folio_start, folio_size(folio));
- if (unlikely(!folio_test_ordered(folio))) {
- DEBUG_WARN();
- btrfs_err_rl(fs_info,
- "root %lld ino %llu folio %llu is marked dirty without notifying the fs",
- btrfs_root_id(inode->root),
- btrfs_ino(inode),
- folio_pos(folio));
- btrfs_folio_clear_dirty(fs_info, folio, start, len);
- btrfs_folio_set_writeback(fs_info, folio, start, len);
- btrfs_folio_clear_writeback(fs_info, folio, start, len);
- return -EUCLEAN;
- }
-
/* Truncate the submit bitmap to the current range. */
if (start > folio_start)
bitmap_clear(bio_ctrl->submit_bitmap, 0,
--
2.54.0
next prev parent reply other threads:[~2026-05-07 5:29 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-07 5:29 [PATCH 0/5] btrfs: remove folio ordered flag Qu Wenruo
2026-05-07 5:29 ` Qu Wenruo [this message]
2026-05-07 5:29 ` [PATCH 2/5] btrfs: unify folio dirty flag clearing Qu Wenruo
2026-05-07 5:29 ` [PATCH 3/5] btrfs: use dirty flag to check if an ordered extent needs to be truncated Qu Wenruo
2026-05-07 5:29 ` [PATCH 4/5] btrfs: remove folio_test_ordered() usage Qu Wenruo
2026-05-07 5:29 ` [PATCH 5/5] btrfs: remove folio ordered flag and subpage bitmap Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e52caa0b55dade2020a615eabc3d8aea9fcb55fa.1778131118.git.wqu@suse.com \
--to=wqu@suse.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox