* [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper
@ 2026-02-25 22:53 Qu Wenruo
2026-03-04 4:02 ` Boris Burkov
0 siblings, 1 reply; 3+ messages in thread
From: Qu Wenruo @ 2026-02-25 22:53 UTC (permalink / raw)
To: linux-btrfs
Currently we call cow_file_range_inline() in different situations, from
regular cow_file_range() to compress_file_range().
This is because inline extent creation has different conditions based on
whether it's a compressed one or not.
But on the other hand, inline extent creation shouldn't be so
distributed, we can just have a dedicated branch in
btrfs_run_delalloc_range().
It will become more obvious for compressed inline cases, it makes no
sense to go through all the complex async extent mechanism just to
inline a single block.
So here we introduce a dedicated run_delalloc_inline() helper, and
remove all inline related handling from cow_file_range() and
compress_file_range().
There is a special update to inode_need_compress(), that a new
@check_inline parameter is introduced.
This is to allow inline specific checks to be done inside
run_delalloc_inline(), which allows single block compression, but
other call sites should always reject single block compression.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Changelog:
v4:
- Reorder with the patch "btrfs: do compressed bio size roundup and zeroing in one go"
That patch is a pure cleanup, should not depend on this intrusive
patch.
- Remove the comment of ret > 0 case of cow_file_range()
As that function can no longer create inlined extent.
v3:
- Fix a grammar error in the commit message
v2:
- Fix a bug exposed in btrfs/344
Where the inode_need_compress() check allows single block to be
compressed.
Update inode_need_compress() to accept a new @check_inline parameter,
so that only inode_need_compress() in run_delalloc_inline() will allow
single block to be compressed, meanwhile all other call sites will
reject single block compression.
- Fix a leak of extent_state
---
fs/btrfs/inode.c | 205 ++++++++++++++++++++++-------------------------
1 file changed, 95 insertions(+), 110 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 46acfd16e817..9148ec4a1d19 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -74,7 +74,6 @@
#include "delayed-inode.h"
#define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0)
-#define COW_FILE_RANGE_NO_INLINE (1UL << 1)
struct btrfs_iget_args {
u64 ino;
@@ -703,55 +702,6 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
return ret;
}
-static noinline int cow_file_range_inline(struct btrfs_inode *inode,
- struct folio *locked_folio,
- u64 offset, u64 end,
- size_t compressed_size,
- int compress_type,
- struct folio *compressed_folio,
- bool update_i_size)
-{
- struct extent_state *cached = NULL;
- unsigned long clear_flags = EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING | EXTENT_LOCKED;
- u64 size = min_t(u64, i_size_read(&inode->vfs_inode), end + 1);
- int ret;
-
- if (!can_cow_file_range_inline(inode, offset, size, compressed_size))
- return 1;
-
- btrfs_lock_extent(&inode->io_tree, offset, end, &cached);
- ret = __cow_file_range_inline(inode, size, compressed_size,
- compress_type, compressed_folio,
- update_i_size);
- if (ret > 0) {
- btrfs_unlock_extent(&inode->io_tree, offset, end, &cached);
- return ret;
- }
-
- /*
- * In the successful case (ret == 0 here), cow_file_range will return 1.
- *
- * Quite a bit further up the callstack in extent_writepage(), ret == 1
- * is treated as a short circuited success and does not unlock the folio,
- * so we must do it here.
- *
- * In the failure case, the locked_folio does get unlocked by
- * btrfs_folio_end_all_writers, which asserts that it is still locked
- * at that point, so we must *not* unlock it here.
- *
- * The other two callsites in compress_file_range do not have a
- * locked_folio, so they are not relevant to this logic.
- */
- if (ret == 0)
- locked_folio = NULL;
-
- extent_clear_unlock_delalloc(inode, offset, end, locked_folio, &cached,
- clear_flags, PAGE_UNLOCK |
- PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
- return ret;
-}
-
struct async_extent {
u64 start;
u64 ram_size;
@@ -797,7 +747,7 @@ static int add_async_extent(struct async_chunk *cow, u64 start, u64 ram_size,
* options, defragmentation, properties or heuristics.
*/
static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
- u64 end)
+ u64 end, bool check_inline)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -812,8 +762,9 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
* and will always fallback to regular write later.
*/
if (end + 1 - start <= fs_info->sectorsize &&
- (start > 0 || end + 1 < inode->disk_i_size))
+ (!check_inline || (start > 0 || end + 1 < inode->disk_i_size)))
return 0;
+
/* Defrag ioctl takes precedence over mount options and properties. */
if (inode->defrag_compress == BTRFS_DEFRAG_DONT_COMPRESS)
return 0;
@@ -928,7 +879,6 @@ static void compress_file_range(struct btrfs_work *work)
container_of(work, struct async_chunk, work);
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct address_space *mapping = inode->vfs_inode.i_mapping;
struct compressed_bio *cb = NULL;
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
@@ -1000,7 +950,7 @@ static void compress_file_range(struct btrfs_work *work)
* been flagged as NOCOMPRESS. This flag can change at any time if we
* discover bad compression ratios.
*/
- if (!inode_need_compress(inode, start, end))
+ if (!inode_need_compress(inode, start, end, false))
goto cleanup_and_bail_uncompressed;
if (0 < inode->defrag_compress && inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
@@ -1021,35 +971,6 @@ static void compress_file_range(struct btrfs_work *work)
total_compressed = cb->bbio.bio.bi_iter.bi_size;
total_in = cur_len;
- /*
- * Try to create an inline extent.
- *
- * If we didn't compress the entire range, try to create an uncompressed
- * inline extent, else a compressed one.
- *
- * Check cow_file_range() for why we don't even try to create inline
- * extent for the subpage case.
- */
- if (total_in < actual_end)
- ret = cow_file_range_inline(inode, NULL, start, end, 0,
- BTRFS_COMPRESS_NONE, NULL, false);
- else
- ret = cow_file_range_inline(inode, NULL, start, end, total_compressed,
- compress_type,
- bio_first_folio_all(&cb->bbio.bio), false);
- if (ret <= 0) {
- cleanup_compressed_bio(cb);
- if (ret < 0)
- mapping_set_error(mapping, -EIO);
- return;
- }
- /*
- * If a single block at file offset 0 cannot be inlined, fall back to
- * regular writes without marking the file incompressible.
- */
- if (start == 0 && end <= blocksize)
- goto cleanup_and_bail_uncompressed;
-
/*
* We aren't doing an inline extent. Round the compressed size up to a
* block size boundary so the allocator does sane things.
@@ -1427,11 +1348,6 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
*
* When this function fails, it unlocks all folios except @locked_folio.
*
- * When this function successfully creates an inline extent, it returns 1 and
- * unlocks all folios including locked_folio and starts I/O on them.
- * (In reality inline extents are limited to a single block, so locked_folio is
- * the only folio handled anyway).
- *
* When this function succeed and creates a normal extent, the folio locking
* status depends on the passed in flags:
*
@@ -1475,25 +1391,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
-
- if (!(flags & COW_FILE_RANGE_NO_INLINE)) {
- /* lets try to make an inline extent */
- ret = cow_file_range_inline(inode, locked_folio, start, end, 0,
- BTRFS_COMPRESS_NONE, NULL, false);
- if (ret <= 0) {
- /*
- * We succeeded, return 1 so the caller knows we're done
- * with this page and already handled the IO.
- *
- * If there was an error then cow_file_range_inline() has
- * already done the cleanup.
- */
- if (ret == 0)
- ret = 1;
- goto done;
- }
- }
-
alloc_hint = btrfs_get_extent_allocation_hint(inode, start, num_bytes);
/*
@@ -1571,7 +1468,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
}
extent_clear_unlock_delalloc(inode, orig_start, end, locked_folio, &cached,
EXTENT_LOCKED | EXTENT_DELALLOC, page_ops);
-done:
if (done_offset)
*done_offset = end;
return ret;
@@ -1874,7 +1770,7 @@ static int fallback_to_cow(struct btrfs_inode *inode,
* a locked folio, which can race with writeback.
*/
ret = cow_file_range(inode, locked_folio, start, end, NULL,
- COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED);
+ COW_FILE_RANGE_KEEP_LOCKED);
ASSERT(ret != 1);
return ret;
}
@@ -2425,6 +2321,80 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
return false;
}
+/*
+ * Return 0 if an inlined extent is created successfully.
+ * Return <0 if critical error happened.
+ * Return >0 if an inline extent can not be created.
+ */
+static int run_delalloc_inline(struct btrfs_inode *inode, struct folio *locked_folio)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct compressed_bio *cb = NULL;
+ struct extent_state *cached = NULL;
+ const u64 i_size = i_size_read(&inode->vfs_inode);
+ const u32 blocksize = fs_info->sectorsize;
+ int compress_type = fs_info->compress_type;
+ int compress_level = fs_info->compress_level;
+ u32 compressed_size = 0;
+ int ret;
+
+ ASSERT(folio_pos(locked_folio) == 0);
+
+ if (btrfs_inode_can_compress(inode) &&
+ inode_need_compress(inode, 0, blocksize, true)) {
+ if (inode->defrag_compress > 0 &&
+ inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
+ compress_type = inode->defrag_compress;
+ compress_level = inode->defrag_compress_level;
+ } else if (inode->prop_compress) {
+ compress_type = inode->prop_compress;
+ }
+ cb = btrfs_compress_bio(inode, 0, blocksize, compress_type, compress_level, 0);
+ if (IS_ERR(cb)) {
+ cb = NULL;
+ /* Just fall back to non-compressed case. */
+ } else {
+ compressed_size = cb->bbio.bio.bi_iter.bi_size;
+ }
+ }
+ if (!can_cow_file_range_inline(inode, 0, i_size, compressed_size)) {
+ if (cb)
+ cleanup_compressed_bio(cb);
+ return 1;
+ }
+
+ btrfs_lock_extent(&inode->io_tree, 0, blocksize - 1, &cached);
+ if (cb)
+ ret = __cow_file_range_inline(inode, i_size, compressed_size, compress_type,
+ bio_first_folio_all(&cb->bbio.bio), false);
+ else
+ ret = __cow_file_range_inline(inode, i_size, 0, BTRFS_COMPRESS_NONE,
+ NULL, false);
+ /*
+ * In the successful case (ret == 0 here), run_delalloc_inline() will return 1.
+ *
+ * Quite a bit further up the callstack in extent_writepage(), ret == 1
+ * is treated as a short circuited success and does not unlock the folio,
+ * so we must do it here.
+ *
+ * For failure case, the @locked_folio does get unlocked by
+ * btrfs_folio_end_lock_bitmap(), so we must *not* unlock it here.
+ *
+ * So if ret == 0, we let extent_clear_unlock_delalloc() to unlock the
+ * folio by passing NULL as @locked_folio.
+ * Otherwise pass @locked_folio as usual.
+ */
+ if (ret == 0)
+ locked_folio = NULL;
+ extent_clear_unlock_delalloc(inode, 0, blocksize - 1, locked_folio, &cached,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+ EXTENT_DO_ACCOUNTING | EXTENT_LOCKED,
+ PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
+ if (cb)
+ cleanup_compressed_bio(cb);
+ return ret;
+}
+
/*
* Function to process delayed allocation (create CoW) for ranges which are
* being touched for the first time.
@@ -2441,11 +2411,26 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
ASSERT(!(end <= folio_pos(locked_folio) ||
start >= folio_next_pos(locked_folio)));
+ if (start == 0 && end + 1 <= inode->root->fs_info->sectorsize &&
+ end + 1 >= inode->disk_i_size) {
+ int ret;
+
+ ret = run_delalloc_inline(inode, locked_folio);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return 1;
+ /*
+ * Continue regular handling if we can not create an
+ * inlined extent.
+ */
+ }
+
if (should_nocow(inode, start, end))
return run_delalloc_nocow(inode, locked_folio, start, end);
if (btrfs_inode_can_compress(inode) &&
- inode_need_compress(inode, start, end) &&
+ inode_need_compress(inode, start, end, false) &&
run_delalloc_compressed(inode, locked_folio, start, end, wbc))
return 1;
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper
2026-02-25 22:53 [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper Qu Wenruo
@ 2026-03-04 4:02 ` Boris Burkov
2026-03-04 4:18 ` Qu Wenruo
0 siblings, 1 reply; 3+ messages in thread
From: Boris Burkov @ 2026-03-04 4:02 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs
On Thu, Feb 26, 2026 at 09:23:18AM +1030, Qu Wenruo wrote:
> Currently we call cow_file_range_inline() in different situations, from
> regular cow_file_range() to compress_file_range().
>
> This is because inline extent creation has different conditions based on
> whether it's a compressed one or not.
>
> But on the other hand, inline extent creation shouldn't be so
> distributed, we can just have a dedicated branch in
> btrfs_run_delalloc_range().
>
> It will become more obvious for compressed inline cases, it makes no
> sense to go through all the complex async extent mechanism just to
> inline a single block.
>
> So here we introduce a dedicated run_delalloc_inline() helper, and
> remove all inline related handling from cow_file_range() and
> compress_file_range().
>
> There is a special update to inode_need_compress(), that a new
> @check_inline parameter is introduced.
> This is to allow inline specific checks to be done inside
> run_delalloc_inline(), which allows single block compression, but
> other call sites should always reject single block compression.
I noticed a small comment discrepancy and Chris's claude code review
found what I think is in fact a serious bug to do with the state of the
delalloc bits.
Details inline.
Thanks,
Boris
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> Changelog:
> v4:
> - Reorder with the patch "btrfs: do compressed bio size roundup and zeroing in one go"
> That patch is a pure cleanup, should not depend on this intrusive
> patch.
>
> - Remove the comment of ret > 0 case of cow_file_range()
> As that function can no longer create inlined extent.
>
> v3:
> - Fix a grammar error in the commit message
>
> v2:
> - Fix a bug exposed in btrfs/344
> Where the inode_need_compress() check allows single block to be
> compressed.
> Update inode_need_compress() to accept a new @check_inline parameter,
> so that only inode_need_compress() in run_delalloc_inline() will allow
> single block to be compressed, meanwhile all other call sites will
> reject single block compression.
>
> - Fix a leak of extent_state
> ---
> fs/btrfs/inode.c | 205 ++++++++++++++++++++++-------------------------
> 1 file changed, 95 insertions(+), 110 deletions(-)
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 46acfd16e817..9148ec4a1d19 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -74,7 +74,6 @@
> #include "delayed-inode.h"
>
> #define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0)
> -#define COW_FILE_RANGE_NO_INLINE (1UL << 1)
>
> struct btrfs_iget_args {
> u64 ino;
> @@ -703,55 +702,6 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
> return ret;
> }
>
> -static noinline int cow_file_range_inline(struct btrfs_inode *inode,
> - struct folio *locked_folio,
> - u64 offset, u64 end,
> - size_t compressed_size,
> - int compress_type,
> - struct folio *compressed_folio,
> - bool update_i_size)
> -{
> - struct extent_state *cached = NULL;
> - unsigned long clear_flags = EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
> - EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING | EXTENT_LOCKED;
> - u64 size = min_t(u64, i_size_read(&inode->vfs_inode), end + 1);
> - int ret;
> -
> - if (!can_cow_file_range_inline(inode, offset, size, compressed_size))
> - return 1;
> -
> - btrfs_lock_extent(&inode->io_tree, offset, end, &cached);
> - ret = __cow_file_range_inline(inode, size, compressed_size,
> - compress_type, compressed_folio,
> - update_i_size);
> - if (ret > 0) {
Possible bug described below, but note the old code did not call
extent_clear_unlock_delalloc when __cow_file_range_inline() returned > 0
> - btrfs_unlock_extent(&inode->io_tree, offset, end, &cached);
> - return ret;
> - }
> -
> - /*
> - * In the successful case (ret == 0 here), cow_file_range will return 1.
> - *
> - * Quite a bit further up the callstack in extent_writepage(), ret == 1
> - * is treated as a short circuited success and does not unlock the folio,
> - * so we must do it here.
> - *
> - * In the failure case, the locked_folio does get unlocked by
> - * btrfs_folio_end_all_writers, which asserts that it is still locked
> - * at that point, so we must *not* unlock it here.
> - *
> - * The other two callsites in compress_file_range do not have a
> - * locked_folio, so they are not relevant to this logic.
> - */
> - if (ret == 0)
> - locked_folio = NULL;
> -
> - extent_clear_unlock_delalloc(inode, offset, end, locked_folio, &cached,
> - clear_flags, PAGE_UNLOCK |
> - PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
> - return ret;
> -}
> -
> struct async_extent {
> u64 start;
> u64 ram_size;
> @@ -797,7 +747,7 @@ static int add_async_extent(struct async_chunk *cow, u64 start, u64 ram_size,
> * options, defragmentation, properties or heuristics.
> */
> static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
> - u64 end)
> + u64 end, bool check_inline)
> {
> struct btrfs_fs_info *fs_info = inode->root->fs_info;
>
> @@ -812,8 +762,9 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
> * and will always fallback to regular write later.
> */
> if (end + 1 - start <= fs_info->sectorsize &&
> - (start > 0 || end + 1 < inode->disk_i_size))
> + (!check_inline || (start > 0 || end + 1 < inode->disk_i_size)))
> return 0;
> +
> /* Defrag ioctl takes precedence over mount options and properties. */
> if (inode->defrag_compress == BTRFS_DEFRAG_DONT_COMPRESS)
> return 0;
> @@ -928,7 +879,6 @@ static void compress_file_range(struct btrfs_work *work)
> container_of(work, struct async_chunk, work);
> struct btrfs_inode *inode = async_chunk->inode;
> struct btrfs_fs_info *fs_info = inode->root->fs_info;
> - struct address_space *mapping = inode->vfs_inode.i_mapping;
> struct compressed_bio *cb = NULL;
> u64 blocksize = fs_info->sectorsize;
> u64 start = async_chunk->start;
> @@ -1000,7 +950,7 @@ static void compress_file_range(struct btrfs_work *work)
> * been flagged as NOCOMPRESS. This flag can change at any time if we
> * discover bad compression ratios.
> */
> - if (!inode_need_compress(inode, start, end))
> + if (!inode_need_compress(inode, start, end, false))
> goto cleanup_and_bail_uncompressed;
>
> if (0 < inode->defrag_compress && inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
> @@ -1021,35 +971,6 @@ static void compress_file_range(struct btrfs_work *work)
> total_compressed = cb->bbio.bio.bi_iter.bi_size;
> total_in = cur_len;
>
> - /*
> - * Try to create an inline extent.
> - *
> - * If we didn't compress the entire range, try to create an uncompressed
> - * inline extent, else a compressed one.
> - *
> - * Check cow_file_range() for why we don't even try to create inline
> - * extent for the subpage case.
> - */
> - if (total_in < actual_end)
> - ret = cow_file_range_inline(inode, NULL, start, end, 0,
> - BTRFS_COMPRESS_NONE, NULL, false);
> - else
> - ret = cow_file_range_inline(inode, NULL, start, end, total_compressed,
> - compress_type,
> - bio_first_folio_all(&cb->bbio.bio), false);
> - if (ret <= 0) {
> - cleanup_compressed_bio(cb);
> - if (ret < 0)
> - mapping_set_error(mapping, -EIO);
> - return;
> - }
> - /*
> - * If a single block at file offset 0 cannot be inlined, fall back to
> - * regular writes without marking the file incompressible.
> - */
> - if (start == 0 && end <= blocksize)
> - goto cleanup_and_bail_uncompressed;
> -
> /*
> * We aren't doing an inline extent. Round the compressed size up to a
> * block size boundary so the allocator does sane things.
> @@ -1427,11 +1348,6 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
> *
> * When this function fails, it unlocks all folios except @locked_folio.
> *
> - * When this function successfully creates an inline extent, it returns 1 and
> - * unlocks all folios including locked_folio and starts I/O on them.
> - * (In reality inline extents are limited to a single block, so locked_folio is
> - * the only folio handled anyway).
> - *
> * When this function succeed and creates a normal extent, the folio locking
> * status depends on the passed in flags:
> *
> @@ -1475,25 +1391,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
> ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
>
> inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
> -
> - if (!(flags & COW_FILE_RANGE_NO_INLINE)) {
> - /* lets try to make an inline extent */
> - ret = cow_file_range_inline(inode, locked_folio, start, end, 0,
> - BTRFS_COMPRESS_NONE, NULL, false);
> - if (ret <= 0) {
> - /*
> - * We succeeded, return 1 so the caller knows we're done
> - * with this page and already handled the IO.
> - *
> - * If there was an error then cow_file_range_inline() has
> - * already done the cleanup.
> - */
> - if (ret == 0)
> - ret = 1;
> - goto done;
> - }
> - }
> -
> alloc_hint = btrfs_get_extent_allocation_hint(inode, start, num_bytes);
>
> /*
> @@ -1571,7 +1468,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
> }
> extent_clear_unlock_delalloc(inode, orig_start, end, locked_folio, &cached,
> EXTENT_LOCKED | EXTENT_DELALLOC, page_ops);
> -done:
> if (done_offset)
> *done_offset = end;
> return ret;
> @@ -1874,7 +1770,7 @@ static int fallback_to_cow(struct btrfs_inode *inode,
> * a locked folio, which can race with writeback.
> */
> ret = cow_file_range(inode, locked_folio, start, end, NULL,
> - COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED);
> + COW_FILE_RANGE_KEEP_LOCKED);
> ASSERT(ret != 1);
> return ret;
> }
> @@ -2425,6 +2321,80 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
> return false;
> }
>
> +/*
> + * Return 0 if an inlined extent is created successfully.
> + * Return <0 if critical error happened.
> + * Return >0 if an inline extent can not be created.
> + */
> +static int run_delalloc_inline(struct btrfs_inode *inode, struct folio *locked_folio)
> +{
> + struct btrfs_fs_info *fs_info = inode->root->fs_info;
> + struct compressed_bio *cb = NULL;
> + struct extent_state *cached = NULL;
> + const u64 i_size = i_size_read(&inode->vfs_inode);
> + const u32 blocksize = fs_info->sectorsize;
> + int compress_type = fs_info->compress_type;
> + int compress_level = fs_info->compress_level;
> + u32 compressed_size = 0;
> + int ret;
> +
> + ASSERT(folio_pos(locked_folio) == 0);
> +
> + if (btrfs_inode_can_compress(inode) &&
> + inode_need_compress(inode, 0, blocksize, true)) {
> + if (inode->defrag_compress > 0 &&
> + inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
> + compress_type = inode->defrag_compress;
> + compress_level = inode->defrag_compress_level;
> + } else if (inode->prop_compress) {
> + compress_type = inode->prop_compress;
> + }
> + cb = btrfs_compress_bio(inode, 0, blocksize, compress_type, compress_level, 0);
> + if (IS_ERR(cb)) {
> + cb = NULL;
> + /* Just fall back to non-compressed case. */
> + } else {
> + compressed_size = cb->bbio.bio.bi_iter.bi_size;
> + }
> + }
> + if (!can_cow_file_range_inline(inode, 0, i_size, compressed_size)) {
> + if (cb)
> + cleanup_compressed_bio(cb);
> + return 1;
> + }
> +
> + btrfs_lock_extent(&inode->io_tree, 0, blocksize - 1, &cached);
> + if (cb)
> + ret = __cow_file_range_inline(inode, i_size, compressed_size, compress_type,
> + bio_first_folio_all(&cb->bbio.bio), false);
> + else
> + ret = __cow_file_range_inline(inode, i_size, 0, BTRFS_COMPRESS_NONE,
> + NULL, false);
> + /*
> + * In the successful case (ret == 0 here), run_delalloc_inline() will return 1.
As far as I can tell, this is inside run_delalloc_inline() and we do not
further manipulate ret to become 1, so this comment seems to have an
error.
Was it meant to say that the caller, btrfs_run_delalloc_range(), will
return 1?
> + *
> + * Quite a bit further up the callstack in extent_writepage(), ret == 1
> + * is treated as a short circuited success and does not unlock the folio,
> + * so we must do it here.
> + *
> + * For failure case, the @locked_folio does get unlocked by
> + * btrfs_folio_end_lock_bitmap(), so we must *not* unlock it here.
> + *
> + * So if ret == 0, we let extent_clear_unlock_delalloc() to unlock the
> + * folio by passing NULL as @locked_folio.
> + * Otherwise pass @locked_folio as usual.
> + */
> + if (ret == 0)
> + locked_folio = NULL;
I think this might be a bug.
when ret == 1, we do fallback but have cleared delalloc, so the caller
will call cow_file_range without the delalloc bits set, and double call
extent_clear_unlock_delalloc()
> + extent_clear_unlock_delalloc(inode, 0, blocksize - 1, locked_folio, &cached,
> + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
> + EXTENT_DO_ACCOUNTING | EXTENT_LOCKED,
> + PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
> + if (cb)
> + cleanup_compressed_bio(cb);
> + return ret;
> +}
> +
> /*
> * Function to process delayed allocation (create CoW) for ranges which are
> * being touched for the first time.
> @@ -2441,11 +2411,26 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
> ASSERT(!(end <= folio_pos(locked_folio) ||
> start >= folio_next_pos(locked_folio)));
>
> + if (start == 0 && end + 1 <= inode->root->fs_info->sectorsize &&
> + end + 1 >= inode->disk_i_size) {
> + int ret;
> +
> + ret = run_delalloc_inline(inode, locked_folio);
> + if (ret < 0)
> + return ret;
> + if (ret == 0)
> + return 1;
> + /*
> + * Continue regular handling if we can not create an
> + * inlined extent.
> + */
> + }
> +
> if (should_nocow(inode, start, end))
> return run_delalloc_nocow(inode, locked_folio, start, end);
>
> if (btrfs_inode_can_compress(inode) &&
> - inode_need_compress(inode, start, end) &&
> + inode_need_compress(inode, start, end, false) &&
> run_delalloc_compressed(inode, locked_folio, start, end, wbc))
> return 1;
>
> --
> 2.53.0
>
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper
2026-03-04 4:02 ` Boris Burkov
@ 2026-03-04 4:18 ` Qu Wenruo
0 siblings, 0 replies; 3+ messages in thread
From: Qu Wenruo @ 2026-03-04 4:18 UTC (permalink / raw)
To: Boris Burkov; +Cc: linux-btrfs
在 2026/3/4 14:32, Boris Burkov 写道:
> On Thu, Feb 26, 2026 at 09:23:18AM +1030, Qu Wenruo wrote:
[...]
>> - btrfs_lock_extent(&inode->io_tree, offset, end, &cached);
>> - ret = __cow_file_range_inline(inode, size, compressed_size,
>> - compress_type, compressed_folio,
>> - update_i_size);
>> - if (ret > 0) {
>
> Possible bug described below, but note the old code did not call
> extent_clear_unlock_delalloc when __cow_file_range_inline() returned > 0
You're right, and __cow_file_range_inline() has a somewhat weird
returning value.
It turns 0 if we have inserted the inline extent, <0 for critical error
which also aborts the transaction (except -ENOSPC).
It only return >0 if we failed with ENOSPC.
But to be honest, the ENOSPC can happen from two locations (insert and
update inodes), and if the insert succeeded but inode update failed, I'm
not sure if we should still continue without reverting the inserted extent.
Thus the ret > 0 branch in the new code should be falling back to
regular ones.
>
>> - btrfs_unlock_extent(&inode->io_tree, offset, end, &cached);
>> - return ret;
>> - }
>> -
>> - /*
>> - * In the successful case (ret == 0 here), cow_file_range will return 1.
>> - *
>> - * Quite a bit further up the callstack in extent_writepage(), ret == 1
>> - * is treated as a short circuited success and does not unlock the folio,
>> - * so we must do it here.
>> - *
>> - * In the failure case, the locked_folio does get unlocked by
>> - * btrfs_folio_end_all_writers, which asserts that it is still locked
>> - * at that point, so we must *not* unlock it here.
>> - *
>> - * The other two callsites in compress_file_range do not have a
>> - * locked_folio, so they are not relevant to this logic.
>> - */
>> - if (ret == 0)
>> - locked_folio = NULL;
>> -
>> - extent_clear_unlock_delalloc(inode, offset, end, locked_folio, &cached,
>> - clear_flags, PAGE_UNLOCK |
>> - PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
>> - return ret;
>> -}
>> -
>> struct async_extent {
>> u64 start;
>> u64 ram_size;
>> @@ -797,7 +747,7 @@ static int add_async_extent(struct async_chunk *cow, u64 start, u64 ram_size,
>> * options, defragmentation, properties or heuristics.
>> */
>> static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
>> - u64 end)
>> + u64 end, bool check_inline)
>> {
>> struct btrfs_fs_info *fs_info = inode->root->fs_info;
>>
>> @@ -812,8 +762,9 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
>> * and will always fallback to regular write later.
>> */
>> if (end + 1 - start <= fs_info->sectorsize &&
>> - (start > 0 || end + 1 < inode->disk_i_size))
>> + (!check_inline || (start > 0 || end + 1 < inode->disk_i_size)))
>> return 0;
>> +
>> /* Defrag ioctl takes precedence over mount options and properties. */
>> if (inode->defrag_compress == BTRFS_DEFRAG_DONT_COMPRESS)
>> return 0;
>> @@ -928,7 +879,6 @@ static void compress_file_range(struct btrfs_work *work)
>> container_of(work, struct async_chunk, work);
>> struct btrfs_inode *inode = async_chunk->inode;
>> struct btrfs_fs_info *fs_info = inode->root->fs_info;
>> - struct address_space *mapping = inode->vfs_inode.i_mapping;
>> struct compressed_bio *cb = NULL;
>> u64 blocksize = fs_info->sectorsize;
>> u64 start = async_chunk->start;
>> @@ -1000,7 +950,7 @@ static void compress_file_range(struct btrfs_work *work)
>> * been flagged as NOCOMPRESS. This flag can change at any time if we
>> * discover bad compression ratios.
>> */
>> - if (!inode_need_compress(inode, start, end))
>> + if (!inode_need_compress(inode, start, end, false))
>> goto cleanup_and_bail_uncompressed;
>>
>> if (0 < inode->defrag_compress && inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
>> @@ -1021,35 +971,6 @@ static void compress_file_range(struct btrfs_work *work)
>> total_compressed = cb->bbio.bio.bi_iter.bi_size;
>> total_in = cur_len;
>>
>> - /*
>> - * Try to create an inline extent.
>> - *
>> - * If we didn't compress the entire range, try to create an uncompressed
>> - * inline extent, else a compressed one.
>> - *
>> - * Check cow_file_range() for why we don't even try to create inline
>> - * extent for the subpage case.
>> - */
>> - if (total_in < actual_end)
>> - ret = cow_file_range_inline(inode, NULL, start, end, 0,
>> - BTRFS_COMPRESS_NONE, NULL, false);
>> - else
>> - ret = cow_file_range_inline(inode, NULL, start, end, total_compressed,
>> - compress_type,
>> - bio_first_folio_all(&cb->bbio.bio), false);
>> - if (ret <= 0) {
>> - cleanup_compressed_bio(cb);
>> - if (ret < 0)
>> - mapping_set_error(mapping, -EIO);
>> - return;
>> - }
>> - /*
>> - * If a single block at file offset 0 cannot be inlined, fall back to
>> - * regular writes without marking the file incompressible.
>> - */
>> - if (start == 0 && end <= blocksize)
>> - goto cleanup_and_bail_uncompressed;
>> -
>> /*
>> * We aren't doing an inline extent. Round the compressed size up to a
>> * block size boundary so the allocator does sane things.
>> @@ -1427,11 +1348,6 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
>> *
>> * When this function fails, it unlocks all folios except @locked_folio.
>> *
>> - * When this function successfully creates an inline extent, it returns 1 and
>> - * unlocks all folios including locked_folio and starts I/O on them.
>> - * (In reality inline extents are limited to a single block, so locked_folio is
>> - * the only folio handled anyway).
>> - *
>> * When this function succeed and creates a normal extent, the folio locking
>> * status depends on the passed in flags:
>> *
>> @@ -1475,25 +1391,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>> ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
>>
>> inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
>> -
>> - if (!(flags & COW_FILE_RANGE_NO_INLINE)) {
>> - /* lets try to make an inline extent */
>> - ret = cow_file_range_inline(inode, locked_folio, start, end, 0,
>> - BTRFS_COMPRESS_NONE, NULL, false);
>> - if (ret <= 0) {
>> - /*
>> - * We succeeded, return 1 so the caller knows we're done
>> - * with this page and already handled the IO.
>> - *
>> - * If there was an error then cow_file_range_inline() has
>> - * already done the cleanup.
>> - */
>> - if (ret == 0)
>> - ret = 1;
>> - goto done;
>> - }
>> - }
>> -
>> alloc_hint = btrfs_get_extent_allocation_hint(inode, start, num_bytes);
>>
>> /*
>> @@ -1571,7 +1468,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>> }
>> extent_clear_unlock_delalloc(inode, orig_start, end, locked_folio, &cached,
>> EXTENT_LOCKED | EXTENT_DELALLOC, page_ops);
>> -done:
>> if (done_offset)
>> *done_offset = end;
>> return ret;
>> @@ -1874,7 +1770,7 @@ static int fallback_to_cow(struct btrfs_inode *inode,
>> * a locked folio, which can race with writeback.
>> */
>> ret = cow_file_range(inode, locked_folio, start, end, NULL,
>> - COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED);
>> + COW_FILE_RANGE_KEEP_LOCKED);
>> ASSERT(ret != 1);
>> return ret;
>> }
>> @@ -2425,6 +2321,80 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
>> return false;
>> }
>>
>> +/*
>> + * Return 0 if an inlined extent is created successfully.
>> + * Return <0 if critical error happened.
>> + * Return >0 if an inline extent can not be created.
>> + */
>> +static int run_delalloc_inline(struct btrfs_inode *inode, struct folio *locked_folio)
>> +{
>> + struct btrfs_fs_info *fs_info = inode->root->fs_info;
>> + struct compressed_bio *cb = NULL;
>> + struct extent_state *cached = NULL;
>> + const u64 i_size = i_size_read(&inode->vfs_inode);
>> + const u32 blocksize = fs_info->sectorsize;
>> + int compress_type = fs_info->compress_type;
>> + int compress_level = fs_info->compress_level;
>> + u32 compressed_size = 0;
>> + int ret;
>> +
>> + ASSERT(folio_pos(locked_folio) == 0);
>> +
>> + if (btrfs_inode_can_compress(inode) &&
>> + inode_need_compress(inode, 0, blocksize, true)) {
>> + if (inode->defrag_compress > 0 &&
>> + inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
>> + compress_type = inode->defrag_compress;
>> + compress_level = inode->defrag_compress_level;
>> + } else if (inode->prop_compress) {
>> + compress_type = inode->prop_compress;
>> + }
>> + cb = btrfs_compress_bio(inode, 0, blocksize, compress_type, compress_level, 0);
>> + if (IS_ERR(cb)) {
>> + cb = NULL;
>> + /* Just fall back to non-compressed case. */
>> + } else {
>> + compressed_size = cb->bbio.bio.bi_iter.bi_size;
>> + }
>> + }
>> + if (!can_cow_file_range_inline(inode, 0, i_size, compressed_size)) {
>> + if (cb)
>> + cleanup_compressed_bio(cb);
>> + return 1;
>> + }
>> +
>> + btrfs_lock_extent(&inode->io_tree, 0, blocksize - 1, &cached);
>> + if (cb)
>> + ret = __cow_file_range_inline(inode, i_size, compressed_size, compress_type,
>> + bio_first_folio_all(&cb->bbio.bio), false);
>> + else
>> + ret = __cow_file_range_inline(inode, i_size, 0, BTRFS_COMPRESS_NONE,
>> + NULL, false);
>> + /*
>> + * In the successful case (ret == 0 here), run_delalloc_inline() will return 1.
>
> As far as I can tell, this is inside run_delalloc_inline() and we do not
> further manipulate ret to become 1, so this comment seems to have an
> error.
>
> Was it meant to say that the caller, btrfs_run_delalloc_range(), will
> return 1?
Right, the comment is not properly updated for the new situation.
>
>> + *
>> + * Quite a bit further up the callstack in extent_writepage(), ret == 1
>> + * is treated as a short circuited success and does not unlock the folio,
>> + * so we must do it here.
>> + *
>> + * For failure case, the @locked_folio does get unlocked by
>> + * btrfs_folio_end_lock_bitmap(), so we must *not* unlock it here.
>> + *
>> + * So if ret == 0, we let extent_clear_unlock_delalloc() to unlock the
>> + * folio by passing NULL as @locked_folio.
>> + * Otherwise pass @locked_folio as usual.
>> + */
>> + if (ret == 0)
>> + locked_folio = NULL;
>
> I think this might be a bug.
>
> when ret == 1, we do fallback but have cleared delalloc, so the caller
> will call cow_file_range without the delalloc bits set, and double call
> extent_clear_unlock_delalloc()
Correct, I'll update the comment of that __cow_file_range_inline() and
for ret > 0 case to directly exit without extent_clear_unlock_delalloc().
Thanks a lot for reviewing and catching this critical bug,
Qu
>
>> + extent_clear_unlock_delalloc(inode, 0, blocksize - 1, locked_folio, &cached,
>> + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
>> + EXTENT_DO_ACCOUNTING | EXTENT_LOCKED,
>> + PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
>> + if (cb)
>> + cleanup_compressed_bio(cb);
>> + return ret;
>> +}
>> +
>> /*
>> * Function to process delayed allocation (create CoW) for ranges which are
>> * being touched for the first time.
>> @@ -2441,11 +2411,26 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
>> ASSERT(!(end <= folio_pos(locked_folio) ||
>> start >= folio_next_pos(locked_folio)));
>>
>> + if (start == 0 && end + 1 <= inode->root->fs_info->sectorsize &&
>> + end + 1 >= inode->disk_i_size) {
>> + int ret;
>> +
>> + ret = run_delalloc_inline(inode, locked_folio);
>> + if (ret < 0)
>> + return ret;
>> + if (ret == 0)
>> + return 1;
>> + /*
>> + * Continue regular handling if we can not create an
>> + * inlined extent.
>> + */
>> + }
>> +
>> if (should_nocow(inode, start, end))
>> return run_delalloc_nocow(inode, locked_folio, start, end);
>>
>> if (btrfs_inode_can_compress(inode) &&
>> - inode_need_compress(inode, start, end) &&
>> + inode_need_compress(inode, start, end, false) &&
>> run_delalloc_compressed(inode, locked_folio, start, end, wbc))
>> return 1;
>>
>> --
>> 2.53.0
>>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-03-04 4:18 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-25 22:53 [PATCH v4] btrfs: extract inlined creation into a dedicated delalloc helper Qu Wenruo
2026-03-04 4:02 ` Boris Burkov
2026-03-04 4:18 ` Qu Wenruo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox