[PATCH] btrfs: warn about extent buffer that can not be released

public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] btrfs: warn about extent buffer that can not be released
@ 2026-04-05  4:56 Qu Wenruo
  2026-04-05 10:05 ` 말란말야
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Qu Wenruo @ 2026-04-05  4:56 UTC (permalink / raw)
  To: linux-btrfs; +Cc: AHN SEOK-YOUNG, Teng Liu

When we unmount the fs or during mount failures, btrfs will call
invalidate_inode_pages() to release all btree inode folios.

However that function can return -EBUSY if any folios can not be
invalidated.
This can be caused by:

- Some extent buffers are still held by btrfs
  This is a logic error, as we should release all tree root nodes
  during unmount and mount failure handling.

- Some extent buffers are under readahead and haven't yet finished
  This is much rarer but valid cases.
  In that case we should wait for those extent buffers.

Introduce a new helper invalidate_btree_folios() which will:

- Call invalidate_inode_pages2() and catch its return value
  If it returned 0 as expected, that's great and we can call it a day.

- Otherwise go through each extent buffer in buffer_tree
  For eb that still has EXTENT_BUFFER_READING flag, wait for them to
  finish first.

  Then check if the refs of the eb is greater than 1, if so it means
  it's still held by someone, give it a warning.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=221270
Reported-by: AHN SEOK-YOUNG <iamsyahn@gmail.com>
Cc: Teng Liu <27rabbitlt@gmail.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
This new debug feature has exposed some problems, e.g. generic/648,
after an injected failure for device barrier, transaction is aborted but
we have a lot of dirty tree blocks unreleased and triggered the new
warning.

We still need to fix these corner cases.

Meanwhile if Ahn or Teng can give this patch a try for the existing
use-after-free problem, it would be appreciated a lot.
---
 fs/btrfs/disk-io.c   | 52 ++++++++++++++++++++++++++++++++++++++++++--
 fs/btrfs/extent_io.c |  6 -----
 fs/btrfs/extent_io.h |  6 +++++
 3 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b8ac3275d8f8..d450bfc6d89b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3276,6 +3276,54 @@ static bool fs_is_full_ro(const struct btrfs_fs_info *fs_info)
 	return false;
 }
 
+static void invalidate_btree_folios(struct btrfs_fs_info *fs_info)
+{
+	XA_STATE(xas, &fs_info->buffer_tree, 0);
+	unsigned int checked = 0;
+	void *tmp;
+	int ret;
+
+	ret = invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+	if (likely(ret == 0))
+		return;
+
+	/*
+	 * Some btree pages can not be invalidated, this happens when some
+	 * tree blocks are still held (either by some pointer or readahead).
+	 */
+	xas_lock_irq(&xas);
+	xas_for_each(&xas, tmp, ULONG_MAX) {
+		struct extent_buffer *eb = tmp;
+
+		/* Wait for any readahead first. */
+		if (test_bit(EXTENT_BUFFER_READING, &eb->bflags)) {
+			xas_pause(&xas);
+			xas_unlock_irq(&xas);
+			wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
+				       TASK_UNINTERRUPTIBLE);
+			xas_lock_irq(&xas);
+		}
+
+		spin_lock(&eb->refs_lock);
+		if (refcount_read(&eb->refs) > 1 || extent_buffer_under_io(eb)) {
+			btrfs_warn(fs_info,
+			"unbale to release extent buffer %llu owner %llu flags 0x%lx",
+				   eb->start, btrfs_header_owner(eb), eb->bflags);
+			if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+				WARN_ON_ONCE(true);
+		}
+		spin_unlock(&eb->refs_lock);
+		if (++checked % XA_CHECK_SCHED)
+			continue;
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
+		cond_resched();
+		xas_lock_irq(&xas);
+	}
+	xas_unlock_irq(&xas);
+	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+}
+
 int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
 {
 	u32 sectorsize;
@@ -3706,7 +3754,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	if (fs_info->data_reloc_root)
 		btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
 	free_root_pointers(fs_info, true);
-	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+	invalidate_btree_folios(fs_info);
 
 fail_sb_buffer:
 	btrfs_stop_all_workers(fs_info);
@@ -4445,7 +4493,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
 	 * We must make sure there is not any read request to
 	 * submit after we stop all workers.
 	 */
-	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+	invalidate_btree_folios(fs_info);
 	btrfs_stop_all_workers(fs_info);
 
 	/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1ba8a7d3587b..a8887285deda 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2877,12 +2877,6 @@ bool try_release_extent_mapping(struct folio *folio, gfp_t mask)
 	return try_release_extent_state(io_tree, folio);
 }
 
-static int extent_buffer_under_io(const struct extent_buffer *eb)
-{
-	return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
-		test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-}
-
 static bool folio_range_has_eb(struct folio *folio)
 {
 	struct btrfs_folio_state *bfs;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index fd209233317f..b284aee1bfb0 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -326,6 +326,12 @@ static inline bool extent_buffer_uptodate(const struct extent_buffer *eb)
 	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 }
 
+static inline bool extent_buffer_under_io(const struct extent_buffer *eb)
+{
+	return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+		test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+}
+
 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 			 unsigned long start, unsigned long len);
 void read_extent_buffer(const struct extent_buffer *eb, void *dst,
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] btrfs: warn about extent buffer that can not be released
  2026-04-05  4:56 [PATCH] btrfs: warn about extent buffer that can not be released Qu Wenruo
@ 2026-04-05 10:05 ` 말란말야
  2026-04-08 22:18 ` Teng Liu
  2026-04-15 11:25 ` Filipe Manana
  2 siblings, 0 replies; 6+ messages in thread
From: 말란말야 @ 2026-04-05 10:05 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, Teng Liu

Hi Qu,

Thank you for the patch and for including me in the report.

Please understand that I am using an AI assistant to write this email
as my English is not very strong, but I wanted to make sure my message
is clear.

Unfortunately, I have already formatted the problematic drive to
recover my system, so I cannot reproduce the exact same corrupted
state to test this patch at the moment.

However, I am very honored and glad to hear that my report helped
improve BTRFS for everyone. Thank you for your hard work on this fix!

Best regards,

Seok-Young Ahn


2026년 4월 5일 (일) 오후 1:56, Qu Wenruo <wqu@suse.com>님이 작성:
>
> When we unmount the fs or during mount failures, btrfs will call
> invalidate_inode_pages() to release all btree inode folios.
>
> However that function can return -EBUSY if any folios can not be
> invalidated.
> This can be caused by:
>
> - Some extent buffers are still held by btrfs
>   This is a logic error, as we should release all tree root nodes
>   during unmount and mount failure handling.
>
> - Some extent buffers are under readahead and haven't yet finished
>   This is much rarer but valid cases.
>   In that case we should wait for those extent buffers.
>
> Introduce a new helper invalidate_btree_folios() which will:
>
> - Call invalidate_inode_pages2() and catch its return value
>   If it returned 0 as expected, that's great and we can call it a day.
>
> - Otherwise go through each extent buffer in buffer_tree
>   For eb that still has EXTENT_BUFFER_READING flag, wait for them to
>   finish first.
>
>   Then check if the refs of the eb is greater than 1, if so it means
>   it's still held by someone, give it a warning.
>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=221270
> Reported-by: AHN SEOK-YOUNG <iamsyahn@gmail.com>
> Cc: Teng Liu <27rabbitlt@gmail.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> This new debug feature has exposed some problems, e.g. generic/648,
> after an injected failure for device barrier, transaction is aborted but
> we have a lot of dirty tree blocks unreleased and triggered the new
> warning.
>
> We still need to fix these corner cases.
>
> Meanwhile if Ahn or Teng can give this patch a try for the existing
> use-after-free problem, it would be appreciated a lot.
> ---
>  fs/btrfs/disk-io.c   | 52 ++++++++++++++++++++++++++++++++++++++++++--
>  fs/btrfs/extent_io.c |  6 -----
>  fs/btrfs/extent_io.h |  6 +++++
>  3 files changed, 56 insertions(+), 8 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index b8ac3275d8f8..d450bfc6d89b 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3276,6 +3276,54 @@ static bool fs_is_full_ro(const struct btrfs_fs_info *fs_info)
>         return false;
>  }
>
> +static void invalidate_btree_folios(struct btrfs_fs_info *fs_info)
> +{
> +       XA_STATE(xas, &fs_info->buffer_tree, 0);
> +       unsigned int checked = 0;
> +       void *tmp;
> +       int ret;
> +
> +       ret = invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +       if (likely(ret == 0))
> +               return;
> +
> +       /*
> +        * Some btree pages can not be invalidated, this happens when some
> +        * tree blocks are still held (either by some pointer or readahead).
> +        */
> +       xas_lock_irq(&xas);
> +       xas_for_each(&xas, tmp, ULONG_MAX) {
> +               struct extent_buffer *eb = tmp;
> +
> +               /* Wait for any readahead first. */
> +               if (test_bit(EXTENT_BUFFER_READING, &eb->bflags)) {
> +                       xas_pause(&xas);
> +                       xas_unlock_irq(&xas);
> +                       wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
> +                                      TASK_UNINTERRUPTIBLE);
> +                       xas_lock_irq(&xas);
> +               }
> +
> +               spin_lock(&eb->refs_lock);
> +               if (refcount_read(&eb->refs) > 1 || extent_buffer_under_io(eb)) {
> +                       btrfs_warn(fs_info,
> +                       "unbale to release extent buffer %llu owner %llu flags 0x%lx",
> +                                  eb->start, btrfs_header_owner(eb), eb->bflags);
> +                       if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
> +                               WARN_ON_ONCE(true);
> +               }
> +               spin_unlock(&eb->refs_lock);
> +               if (++checked % XA_CHECK_SCHED)
> +                       continue;
> +               xas_pause(&xas);
> +               xas_unlock_irq(&xas);
> +               cond_resched();
> +               xas_lock_irq(&xas);
> +       }
> +       xas_unlock_irq(&xas);
> +       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +}
> +
>  int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
>  {
>         u32 sectorsize;
> @@ -3706,7 +3754,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>         if (fs_info->data_reloc_root)
>                 btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
>         free_root_pointers(fs_info, true);
> -       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +       invalidate_btree_folios(fs_info);
>
>  fail_sb_buffer:
>         btrfs_stop_all_workers(fs_info);
> @@ -4445,7 +4493,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
>          * We must make sure there is not any read request to
>          * submit after we stop all workers.
>          */
> -       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +       invalidate_btree_folios(fs_info);
>         btrfs_stop_all_workers(fs_info);
>
>         /*
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 1ba8a7d3587b..a8887285deda 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2877,12 +2877,6 @@ bool try_release_extent_mapping(struct folio *folio, gfp_t mask)
>         return try_release_extent_state(io_tree, folio);
>  }
>
> -static int extent_buffer_under_io(const struct extent_buffer *eb)
> -{
> -       return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
> -               test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
> -}
> -
>  static bool folio_range_has_eb(struct folio *folio)
>  {
>         struct btrfs_folio_state *bfs;
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index fd209233317f..b284aee1bfb0 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -326,6 +326,12 @@ static inline bool extent_buffer_uptodate(const struct extent_buffer *eb)
>         return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
>  }
>
> +static inline bool extent_buffer_under_io(const struct extent_buffer *eb)
> +{
> +       return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
> +               test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
> +}
> +
>  int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
>                          unsigned long start, unsigned long len);
>  void read_extent_buffer(const struct extent_buffer *eb, void *dst,
> --
> 2.53.0
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] btrfs: warn about extent buffer that can not be released
  2026-04-05  4:56 [PATCH] btrfs: warn about extent buffer that can not be released Qu Wenruo
  2026-04-05 10:05 ` 말란말야
@ 2026-04-08 22:18 ` Teng Liu
  2026-04-15 11:25 ` Filipe Manana
  2 siblings, 0 replies; 6+ messages in thread
From: Teng Liu @ 2026-04-08 22:18 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, AHN SEOK-YOUNG

On 2026-04-05 14:26, Qu Wenruo wrote:
> 
> Meanwhile if Ahn or Teng can give this patch a try for the existing
> use-after-free problem, it would be appreciated a lot.

I tested t his patch on a QEMU VM (kernel 7.0.0.-rc4 + this patch) with
a forged corrupted btrfs image.

With the patch, it fails gracefully:

    [root@btrfs-test ~]# mount /dev/sda /mnt
    [  107.497290] BTRFS error (device sda): parent transid verify failed on logical 1081344 mirror 1 wanted 5 found 1
    ...
    [  108.302268] BTRFS error (device sda): parent transid verify failed on logical 1228800 mirror 1 wanted 5 found 2
    [  108.302730] BTRFS error (device sda): bad tree block start, mirror 1 want 1245184 have 0
    ...
    [  108.306296] BTRFS error (device sda): bad tree block start, mirror 1 want 1392640 have 0
    [  109.098136] BTRFS error (device sda): devid 999 uuid 00000000-0000-0000-0000-000000000000 is missing
    [  109.099635] BTRFS error (device sda): failed to read chunk tree: -2
    [  109.998476] BTRFS error (device sda): open_ctree failed: -2
    mount: /mnt: fsconfig() failed: No such file or directory.
           dmesg(1) may have more information after failed mount system call.

No panic observed. Thanks!

Tested-by: Teng Liu <27rabbitlt@gmail.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] btrfs: warn about extent buffer that can not be released
  2026-04-05  4:56 [PATCH] btrfs: warn about extent buffer that can not be released Qu Wenruo
  2026-04-05 10:05 ` 말란말야
  2026-04-08 22:18 ` Teng Liu
@ 2026-04-15 11:25 ` Filipe Manana
  2026-04-15 23:02   ` Qu Wenruo
  2 siblings, 1 reply; 6+ messages in thread
From: Filipe Manana @ 2026-04-15 11:25 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, AHN SEOK-YOUNG, Teng Liu

On Sun, Apr 5, 2026 at 5:56 AM Qu Wenruo <wqu@suse.com> wrote:
>
> When we unmount the fs or during mount failures, btrfs will call
> invalidate_inode_pages() to release all btree inode folios.
>
> However that function can return -EBUSY if any folios can not be
> invalidated.
> This can be caused by:
>
> - Some extent buffers are still held by btrfs
>   This is a logic error, as we should release all tree root nodes
>   during unmount and mount failure handling.
>
> - Some extent buffers are under readahead and haven't yet finished
>   This is much rarer but valid cases.
>   In that case we should wait for those extent buffers.
>
> Introduce a new helper invalidate_btree_folios() which will:
>
> - Call invalidate_inode_pages2() and catch its return value
>   If it returned 0 as expected, that's great and we can call it a day.
>
> - Otherwise go through each extent buffer in buffer_tree
>   For eb that still has EXTENT_BUFFER_READING flag, wait for them to
>   finish first.
>
>   Then check if the refs of the eb is greater than 1, if so it means
>   it's still held by someone, give it a warning.
>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=221270
> Reported-by: AHN SEOK-YOUNG <iamsyahn@gmail.com>
> Cc: Teng Liu <27rabbitlt@gmail.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> This new debug feature has exposed some problems, e.g. generic/648,
> after an injected failure for device barrier, transaction is aborted but
> we have a lot of dirty tree blocks unreleased and triggered the new
> warning.
>
> We still need to fix these corner cases.
>
> Meanwhile if Ahn or Teng can give this patch a try for the existing
> use-after-free problem, it would be appreciated a lot.
> ---
>  fs/btrfs/disk-io.c   | 52 ++++++++++++++++++++++++++++++++++++++++++--
>  fs/btrfs/extent_io.c |  6 -----
>  fs/btrfs/extent_io.h |  6 +++++
>  3 files changed, 56 insertions(+), 8 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index b8ac3275d8f8..d450bfc6d89b 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3276,6 +3276,54 @@ static bool fs_is_full_ro(const struct btrfs_fs_info *fs_info)
>         return false;
>  }
>
> +static void invalidate_btree_folios(struct btrfs_fs_info *fs_info)
> +{
> +       XA_STATE(xas, &fs_info->buffer_tree, 0);
> +       unsigned int checked = 0;
> +       void *tmp;
> +       int ret;
> +
> +       ret = invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +       if (likely(ret == 0))
> +               return;
> +
> +       /*
> +        * Some btree pages can not be invalidated, this happens when some
> +        * tree blocks are still held (either by some pointer or readahead).
> +        */
> +       xas_lock_irq(&xas);
> +       xas_for_each(&xas, tmp, ULONG_MAX) {
> +               struct extent_buffer *eb = tmp;

Why use the tmp variable? We can pass 'eb' to xas_for_each().

> +
> +               /* Wait for any readahead first. */
> +               if (test_bit(EXTENT_BUFFER_READING, &eb->bflags)) {
> +                       xas_pause(&xas);
> +                       xas_unlock_irq(&xas);
> +                       wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
> +                                      TASK_UNINTERRUPTIBLE);
> +                       xas_lock_irq(&xas);
> +               }
> +
> +               spin_lock(&eb->refs_lock);
> +               if (refcount_read(&eb->refs) > 1 || extent_buffer_under_io(eb)) {
> +                       btrfs_warn(fs_info,
> +                       "unbale to release extent buffer %llu owner %llu flags 0x%lx",

unbale -> unable

> +                                  eb->start, btrfs_header_owner(eb), eb->bflags);
> +                       if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
> +                               WARN_ON_ONCE(true);

Pointless if, just do DEBUG_WARN();

Thanks.

> +               }
> +               spin_unlock(&eb->refs_lock);
> +               if (++checked % XA_CHECK_SCHED)
> +                       continue;
> +               xas_pause(&xas);
> +               xas_unlock_irq(&xas);
> +               cond_resched();
> +               xas_lock_irq(&xas);
> +       }
> +       xas_unlock_irq(&xas);
> +       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +}
> +
>  int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
>  {
>         u32 sectorsize;
> @@ -3706,7 +3754,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>         if (fs_info->data_reloc_root)
>                 btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
>         free_root_pointers(fs_info, true);
> -       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +       invalidate_btree_folios(fs_info);
>
>  fail_sb_buffer:
>         btrfs_stop_all_workers(fs_info);
> @@ -4445,7 +4493,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
>          * We must make sure there is not any read request to
>          * submit after we stop all workers.
>          */
> -       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> +       invalidate_btree_folios(fs_info);
>         btrfs_stop_all_workers(fs_info);
>
>         /*
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 1ba8a7d3587b..a8887285deda 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2877,12 +2877,6 @@ bool try_release_extent_mapping(struct folio *folio, gfp_t mask)
>         return try_release_extent_state(io_tree, folio);
>  }
>
> -static int extent_buffer_under_io(const struct extent_buffer *eb)
> -{
> -       return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
> -               test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
> -}
> -
>  static bool folio_range_has_eb(struct folio *folio)
>  {
>         struct btrfs_folio_state *bfs;
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index fd209233317f..b284aee1bfb0 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -326,6 +326,12 @@ static inline bool extent_buffer_uptodate(const struct extent_buffer *eb)
>         return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
>  }
>
> +static inline bool extent_buffer_under_io(const struct extent_buffer *eb)
> +{
> +       return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
> +               test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
> +}
> +
>  int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
>                          unsigned long start, unsigned long len);
>  void read_extent_buffer(const struct extent_buffer *eb, void *dst,
> --
> 2.53.0
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] btrfs: warn about extent buffer that can not be released
  2026-04-15 11:25 ` Filipe Manana
@ 2026-04-15 23:02   ` Qu Wenruo
  2026-04-16 10:22     ` Filipe Manana
  0 siblings, 1 reply; 6+ messages in thread
From: Qu Wenruo @ 2026-04-15 23:02 UTC (permalink / raw)
  To: Filipe Manana; +Cc: linux-btrfs, AHN SEOK-YOUNG, Teng Liu



在 2026/4/15 20:55, Filipe Manana 写道:
> On Sun, Apr 5, 2026 at 5:56 AM Qu Wenruo <wqu@suse.com> wrote:
>>
>> When we unmount the fs or during mount failures, btrfs will call
>> invalidate_inode_pages() to release all btree inode folios.
>>
>> However that function can return -EBUSY if any folios can not be
>> invalidated.
>> This can be caused by:
>>
>> - Some extent buffers are still held by btrfs
>>    This is a logic error, as we should release all tree root nodes
>>    during unmount and mount failure handling.
>>
>> - Some extent buffers are under readahead and haven't yet finished
>>    This is much rarer but valid cases.
>>    In that case we should wait for those extent buffers.
>>
>> Introduce a new helper invalidate_btree_folios() which will:
>>
>> - Call invalidate_inode_pages2() and catch its return value
>>    If it returned 0 as expected, that's great and we can call it a day.
>>
>> - Otherwise go through each extent buffer in buffer_tree
>>    For eb that still has EXTENT_BUFFER_READING flag, wait for them to
>>    finish first.
>>
>>    Then check if the refs of the eb is greater than 1, if so it means
>>    it's still held by someone, give it a warning.
>>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=221270
>> Reported-by: AHN SEOK-YOUNG <iamsyahn@gmail.com>
>> Cc: Teng Liu <27rabbitlt@gmail.com>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>> This new debug feature has exposed some problems, e.g. generic/648,
>> after an injected failure for device barrier, transaction is aborted but
>> we have a lot of dirty tree blocks unreleased and triggered the new
>> warning.
>>
>> We still need to fix these corner cases.
>>
>> Meanwhile if Ahn or Teng can give this patch a try for the existing
>> use-after-free problem, it would be appreciated a lot.
>> ---
>>   fs/btrfs/disk-io.c   | 52 ++++++++++++++++++++++++++++++++++++++++++--
>>   fs/btrfs/extent_io.c |  6 -----
>>   fs/btrfs/extent_io.h |  6 +++++
>>   3 files changed, 56 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index b8ac3275d8f8..d450bfc6d89b 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -3276,6 +3276,54 @@ static bool fs_is_full_ro(const struct btrfs_fs_info *fs_info)
>>          return false;
>>   }
>>
>> +static void invalidate_btree_folios(struct btrfs_fs_info *fs_info)
>> +{
>> +       XA_STATE(xas, &fs_info->buffer_tree, 0);
>> +       unsigned int checked = 0;
>> +       void *tmp;
>> +       int ret;
>> +
>> +       ret = invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
>> +       if (likely(ret == 0))
>> +               return;
>> +
>> +       /*
>> +        * Some btree pages can not be invalidated, this happens when some
>> +        * tree blocks are still held (either by some pointer or readahead).
>> +        */
>> +       xas_lock_irq(&xas);
>> +       xas_for_each(&xas, tmp, ULONG_MAX) {
>> +               struct extent_buffer *eb = tmp;
> 
> Why use the tmp variable? We can pass 'eb' to xas_for_each().
> 
>> +
>> +               /* Wait for any readahead first. */
>> +               if (test_bit(EXTENT_BUFFER_READING, &eb->bflags)) {
>> +                       xas_pause(&xas);
>> +                       xas_unlock_irq(&xas);
>> +                       wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
>> +                                      TASK_UNINTERRUPTIBLE);
>> +                       xas_lock_irq(&xas);
>> +               }
>> +
>> +               spin_lock(&eb->refs_lock);
>> +               if (refcount_read(&eb->refs) > 1 || extent_buffer_under_io(eb)) {
>> +                       btrfs_warn(fs_info,
>> +                       "unbale to release extent buffer %llu owner %llu flags 0x%lx",
> 
> unbale -> unable
> 
>> +                                  eb->start, btrfs_header_owner(eb), eb->bflags);
>> +                       if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
>> +                               WARN_ON_ONCE(true);
> 
> Pointless if, just do DEBUG_WARN();

The problem is we do not have DEBUG_WARN_ON_ONCE().

We already have cases triggering this during fstests, and there were a 
lot of ebs that are dirty in that case.

DEBUG_WARN() will flood the dmesg with the same call trace for each 
dirty eb.

I can add a new DEBUG_WARN_ON_ONCE() for this particular call site though.

Thanks,
Qu


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] btrfs: warn about extent buffer that can not be released
  2026-04-15 23:02   ` Qu Wenruo
@ 2026-04-16 10:22     ` Filipe Manana
  0 siblings, 0 replies; 6+ messages in thread
From: Filipe Manana @ 2026-04-16 10:22 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, AHN SEOK-YOUNG, Teng Liu

On Thu, Apr 16, 2026 at 12:02 AM Qu Wenruo <wqu@suse.com> wrote:
>
>
>
> 在 2026/4/15 20:55, Filipe Manana 写道:
> > On Sun, Apr 5, 2026 at 5:56 AM Qu Wenruo <wqu@suse.com> wrote:
> >>
> >> When we unmount the fs or during mount failures, btrfs will call
> >> invalidate_inode_pages() to release all btree inode folios.
> >>
> >> However that function can return -EBUSY if any folios can not be
> >> invalidated.
> >> This can be caused by:
> >>
> >> - Some extent buffers are still held by btrfs
> >>    This is a logic error, as we should release all tree root nodes
> >>    during unmount and mount failure handling.
> >>
> >> - Some extent buffers are under readahead and haven't yet finished
> >>    This is much rarer but valid cases.
> >>    In that case we should wait for those extent buffers.
> >>
> >> Introduce a new helper invalidate_btree_folios() which will:
> >>
> >> - Call invalidate_inode_pages2() and catch its return value
> >>    If it returned 0 as expected, that's great and we can call it a day.
> >>
> >> - Otherwise go through each extent buffer in buffer_tree
> >>    For eb that still has EXTENT_BUFFER_READING flag, wait for them to
> >>    finish first.
> >>
> >>    Then check if the refs of the eb is greater than 1, if so it means
> >>    it's still held by someone, give it a warning.
> >>
> >> Link: https://bugzilla.kernel.org/show_bug.cgi?id=221270
> >> Reported-by: AHN SEOK-YOUNG <iamsyahn@gmail.com>
> >> Cc: Teng Liu <27rabbitlt@gmail.com>
> >> Signed-off-by: Qu Wenruo <wqu@suse.com>
> >> ---
> >> This new debug feature has exposed some problems, e.g. generic/648,
> >> after an injected failure for device barrier, transaction is aborted but
> >> we have a lot of dirty tree blocks unreleased and triggered the new
> >> warning.
> >>
> >> We still need to fix these corner cases.
> >>
> >> Meanwhile if Ahn or Teng can give this patch a try for the existing
> >> use-after-free problem, it would be appreciated a lot.
> >> ---
> >>   fs/btrfs/disk-io.c   | 52 ++++++++++++++++++++++++++++++++++++++++++--
> >>   fs/btrfs/extent_io.c |  6 -----
> >>   fs/btrfs/extent_io.h |  6 +++++
> >>   3 files changed, 56 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> >> index b8ac3275d8f8..d450bfc6d89b 100644
> >> --- a/fs/btrfs/disk-io.c
> >> +++ b/fs/btrfs/disk-io.c
> >> @@ -3276,6 +3276,54 @@ static bool fs_is_full_ro(const struct btrfs_fs_info *fs_info)
> >>          return false;
> >>   }
> >>
> >> +static void invalidate_btree_folios(struct btrfs_fs_info *fs_info)
> >> +{
> >> +       XA_STATE(xas, &fs_info->buffer_tree, 0);
> >> +       unsigned int checked = 0;
> >> +       void *tmp;
> >> +       int ret;
> >> +
> >> +       ret = invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
> >> +       if (likely(ret == 0))
> >> +               return;
> >> +
> >> +       /*
> >> +        * Some btree pages can not be invalidated, this happens when some
> >> +        * tree blocks are still held (either by some pointer or readahead).
> >> +        */
> >> +       xas_lock_irq(&xas);
> >> +       xas_for_each(&xas, tmp, ULONG_MAX) {
> >> +               struct extent_buffer *eb = tmp;
> >
> > Why use the tmp variable? We can pass 'eb' to xas_for_each().
> >
> >> +
> >> +               /* Wait for any readahead first. */
> >> +               if (test_bit(EXTENT_BUFFER_READING, &eb->bflags)) {
> >> +                       xas_pause(&xas);
> >> +                       xas_unlock_irq(&xas);
> >> +                       wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING,
> >> +                                      TASK_UNINTERRUPTIBLE);
> >> +                       xas_lock_irq(&xas);
> >> +               }
> >> +
> >> +               spin_lock(&eb->refs_lock);
> >> +               if (refcount_read(&eb->refs) > 1 || extent_buffer_under_io(eb)) {
> >> +                       btrfs_warn(fs_info,
> >> +                       "unbale to release extent buffer %llu owner %llu flags 0x%lx",
> >
> > unbale -> unable
> >
> >> +                                  eb->start, btrfs_header_owner(eb), eb->bflags);
> >> +                       if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
> >> +                               WARN_ON_ONCE(true);
> >
> > Pointless if, just do DEBUG_WARN();
>
> The problem is we do not have DEBUG_WARN_ON_ONCE().
>
> We already have cases triggering this during fstests, and there were a
> lot of ebs that are dirty in that case.
>
> DEBUG_WARN() will flood the dmesg with the same call trace for each
> dirty eb.
>
> I can add a new DEBUG_WARN_ON_ONCE() for this particular call site though.

No need just for one case IMO.

I find code using WARN_ON(1), ASSERT(0), and so on, to be annoying,
unnecessary, and verbose.
We can pass a condition to avoid the if statement, making the code
shorter and clearer.

Instead of:

 if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
     WARN_ON_ONCE(true);

WARN_ON_ONCE(IS_ENABLED(CONFIG_BTRFS_DEBUG));

Thanks.
>
> Thanks,
> Qu
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-04-16 10:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-05  4:56 [PATCH] btrfs: warn about extent buffer that can not be released Qu Wenruo
2026-04-05 10:05 ` 말란말야
2026-04-08 22:18 ` Teng Liu
2026-04-15 11:25 ` Filipe Manana
2026-04-15 23:02   ` Qu Wenruo
2026-04-16 10:22     ` Filipe Manana

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox