From: Boris Burkov <boris@bur.io>
To: Mark Harmstone <mark@harmstone.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v3 16/17] btrfs: handle discarding fully-remapped block groups
Date: Tue, 14 Oct 2025 21:54:42 -0700 [thread overview]
Message-ID: <20251015045442.GI1702774@zen.localdomain> (raw)
In-Reply-To: <20251009112814.13942-17-mark@harmstone.com>
On Thu, Oct 09, 2025 at 12:28:11PM +0100, Mark Harmstone wrote:
> Discard normally works by iterating over the free-space entries of a
> block group. This doesn't work for fully-remapped block groups, as we
> removed their free-space entries when we started relocation.
>
> For sync discard, call btrfs_discard_extent() when we commit the
> transaction in which the last identity remap was removed.
>
> For async discard, add a new function btrfs_trim_fully_remapped_block_group()
> to be called by the discard worker, which iterates over the block
> group's range using the normal async discard rules. Once we reach the
> end, remove the chunk's stripes and device extents to get back its free
> space.
>
> Signed-off-by: Mark Harmstone <mark@harmstone.com>
> ---
> fs/btrfs/block-group.c | 2 +
> fs/btrfs/block-group.h | 1 +
> fs/btrfs/discard.c | 61 +++++++++++++++++++++++++----
> fs/btrfs/extent-tree.c | 4 ++
> fs/btrfs/free-space-cache.c | 77 +++++++++++++++++++++++++++++++++++++
> fs/btrfs/free-space-cache.h | 1 +
> fs/btrfs/transaction.c | 8 ++--
> 7 files changed, 144 insertions(+), 10 deletions(-)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 3f7c45d17415..a7dfa6c95223 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -4828,4 +4828,6 @@ void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
>
> spin_unlock(&fs_info->unused_bgs_lock);
>
> + if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
> + btrfs_discard_queue_work(&fs_info->discard_ctl, bg);
> }
> diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
> index c6866a57ea24..3c086fbb0a7c 100644
> --- a/fs/btrfs/block-group.h
> +++ b/fs/btrfs/block-group.h
> @@ -49,6 +49,7 @@ enum btrfs_discard_state {
> BTRFS_DISCARD_EXTENTS,
> BTRFS_DISCARD_BITMAPS,
> BTRFS_DISCARD_RESET_CURSOR,
> + BTRFS_DISCARD_FULLY_REMAPPED,
> };
>
> /*
> diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
> index 2b7b1e440bc8..421f48a6603a 100644
> --- a/fs/btrfs/discard.c
> +++ b/fs/btrfs/discard.c
> @@ -241,8 +241,17 @@ static struct btrfs_block_group *peek_discard_list(
> block_group = find_next_block_group(discard_ctl, now);
>
> if (block_group && now >= block_group->discard_eligible_time) {
> + bool unused;
> +
> + if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED) {
> + unused = block_group->remap_bytes == 0 &&
> + block_group->identity_remap_count == 0;
> + } else {
> + unused = block_group->used == 0;
> + }
> +
This pattern for computing unused gets repeated enough times in this file
that I think a static helper is merited.
> if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
> - block_group->used != 0) {
> + !unused) {
> if (btrfs_is_block_group_data_only(block_group)) {
> __add_to_discard_list(discard_ctl, block_group);
> /*
> @@ -267,7 +276,15 @@ static struct btrfs_block_group *peek_discard_list(
> }
> if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
> block_group->discard_cursor = block_group->start;
> - block_group->discard_state = BTRFS_DISCARD_EXTENTS;
> +
> + if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
> + unused) {
> + block_group->discard_state =
> + BTRFS_DISCARD_FULLY_REMAPPED;
> + } else {
> + block_group->discard_state =
> + BTRFS_DISCARD_EXTENTS;
> + }
> }
> }
> if (block_group) {
> @@ -370,10 +387,19 @@ void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
> void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
> struct btrfs_block_group *block_group)
> {
> + bool unused;
> +
> if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
> return;
>
> - if (block_group->used == 0 && block_group->remap_bytes == 0)
> + if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED) {
> + unused = block_group->remap_bytes == 0 &&
> + block_group->identity_remap_count == 0;
> + } else {
> + unused = block_group->used == 0;
> + }
> +
> + if (unused)
> add_to_discard_unused_list(discard_ctl, block_group);
> else
> add_to_discard_list(discard_ctl, block_group);
> @@ -468,9 +494,18 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
> static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
> struct btrfs_block_group *block_group)
> {
> + bool unused;
> +
> remove_from_discard_list(discard_ctl, block_group);
>
> - if (block_group->used == 0) {
> + if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED) {
> + unused = block_group->remap_bytes == 0 &&
> + block_group->identity_remap_count == 0;
> + } else {
> + unused = block_group->used == 0;
> + }
> +
> + if (unused) {
> if (btrfs_is_free_space_trimmed(block_group))
> btrfs_mark_bg_unused(block_group);
> else
> @@ -524,7 +559,8 @@ static void btrfs_discard_workfn(struct work_struct *work)
> /* Perform discarding */
> minlen = discard_minlen[discard_index];
>
> - if (discard_state == BTRFS_DISCARD_BITMAPS) {
> + switch (discard_state) {
> + case BTRFS_DISCARD_BITMAPS: {
> u64 maxlen = 0;
>
> /*
> @@ -541,17 +577,28 @@ static void btrfs_discard_workfn(struct work_struct *work)
> btrfs_block_group_end(block_group),
> minlen, maxlen, true);
> discard_ctl->discard_bitmap_bytes += trimmed;
> - } else {
> +
> + break;
> + }
> +
> + case BTRFS_DISCARD_FULLY_REMAPPED:
> + btrfs_trim_fully_remapped_block_group(block_group);
> + break;
> +
> + default:
> btrfs_trim_block_group_extents(block_group, &trimmed,
> block_group->discard_cursor,
> btrfs_block_group_end(block_group),
> minlen, true);
> discard_ctl->discard_extent_bytes += trimmed;
> +
> + break;
> }
>
> /* Determine next steps for a block_group */
> if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
> - if (discard_state == BTRFS_DISCARD_BITMAPS) {
> + if (discard_state == BTRFS_DISCARD_BITMAPS ||
> + discard_state == BTRFS_DISCARD_FULLY_REMAPPED) {
> btrfs_finish_discard_pass(discard_ctl, block_group);
> } else {
> block_group->discard_cursor = block_group->start;
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 2e3074612f39..03674d171ec0 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -2870,6 +2870,10 @@ int btrfs_handle_fully_remapped_bgs(struct btrfs_trans_handle *trans)
> return ret;
> }
>
> + if (!TRANS_ABORTED(trans))
> + btrfs_discard_extent(fs_info, block_group->start,
> + block_group->length, NULL, false);
> +
> /*
> * Set num_stripes to 0, so that btrfs_remove_dev_extents()
> * won't run a second time.
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index 6387f8d1c3a1..56f27487b632 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -29,6 +29,7 @@
> #include "file-item.h"
> #include "file.h"
> #include "super.h"
> +#include "relocation.h"
>
> #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
> #define MAX_CACHE_BYTES_PER_GIG SZ_64K
> @@ -3063,6 +3064,12 @@ bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
> struct rb_node *node;
> bool ret = true;
>
> + if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
> + block_group->remap_bytes == 0 &&
> + block_group->identity_remap_count == 0) {
> + return true;
> + }
> +
> spin_lock(&ctl->tree_lock);
> node = rb_first(&ctl->free_space_offset);
>
> @@ -3827,6 +3834,76 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group,
> return ret;
> }
>
> +void btrfs_trim_fully_remapped_block_group(struct btrfs_block_group *bg)
> +{
> + struct btrfs_fs_info *fs_info = bg->fs_info;
> + struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
> + int ret = 0;
> + u64 bytes;
> + const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
> + u64 end = btrfs_block_group_end(bg);
> + struct btrfs_trans_handle *trans;
> + struct btrfs_chunk_map *map;
> +
> + while (bg->discard_cursor < end) {
> + u64 trimmed;
> +
> + bytes = end - bg->discard_cursor;
> +
> + if (max_discard_size &&
> + bytes >= (max_discard_size +
> + BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
> + bytes = max_discard_size;
> + }
> +
> + ret = btrfs_discard_extent(fs_info, bg->discard_cursor, bytes,
> + &trimmed, false);
> + if (ret || trimmed == 0)
> + return;
As opposed to btrfs_trim_block_group_extents() / trim_no_bitmap() this will
always do all the discarding in a tight loop without sending control
back up to the delayed work queue. As a result, the rate limiting is
unable to do its job; this will simply rip through the whole bg in steps
of max_discard_size.
Check out the `async` parameter to trim_no_bitamp() and how it breaks
the loop, which then lets the higher level metering operate properly.
> +
> + bg->discard_cursor += trimmed;
> +
> + if (btrfs_trim_interrupted())
> + return;
> +
> + cond_resched();
> + }
> +
> + trans = btrfs_start_transaction(fs_info->tree_root, 0);
> + if (IS_ERR(trans))
> + return;
> +
> + map = btrfs_get_chunk_map(fs_info, bg->start, 1);
> + if (IS_ERR(map)) {
> + ret = PTR_ERR(map);
> + btrfs_abort_transaction(trans, ret);
> + return;
> + }
> +
> + ret = btrfs_last_identity_remap_gone(trans, map, bg);
> + if (ret) {
> + btrfs_free_chunk_map(map);
> + btrfs_abort_transaction(trans, ret);
> + return;
> + }
> +
> + btrfs_end_transaction(trans);
> +
> + /*
> + * Set num_stripes to 0, so that btrfs_remove_dev_extents()
> + * won't run a second time.
> + */
> + map->num_stripes = 0;
> +
> + btrfs_free_chunk_map(map);
> +
> + if (bg->used == 0) {
> + spin_lock(&fs_info->unused_bgs_lock);
> + list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
> + spin_unlock(&fs_info->unused_bgs_lock);
> + }
> +}
> +
> /*
> * If we break out of trimming a bitmap prematurely, we should reset the
> * trimming bit. In a rather contrived case, it's possible to race here so
> diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
> index 9f1dbfdee8ca..33fc3b245648 100644
> --- a/fs/btrfs/free-space-cache.h
> +++ b/fs/btrfs/free-space-cache.h
> @@ -166,6 +166,7 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
> int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
> u64 *trimmed, u64 start, u64 end, u64 minlen,
> u64 maxlen, bool async);
> +void btrfs_trim_fully_remapped_block_group(struct btrfs_block_group *bg);
>
> bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info);
> int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active);
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index 6ce91397afe6..b0a5254ba214 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -2438,9 +2438,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
> if (ret)
> goto unlock_reloc;
>
> - ret = btrfs_handle_fully_remapped_bgs(trans);
> - if (ret)
> - goto unlock_reloc;
> + if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
> + ret = btrfs_handle_fully_remapped_bgs(trans);
> + if (ret)
> + goto unlock_reloc;
> + }
>
> /*
> * make sure none of the code above managed to slip in a
> --
> 2.49.1
>
next prev parent reply other threads:[~2025-10-15 4:55 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-09 11:27 [PATCH v3 00/17] Remap tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 01/17] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 02/17] btrfs: add REMAP chunk type Mark Harmstone
2025-10-15 3:37 ` Boris Burkov
2025-10-20 9:58 ` Mark Harmstone
2025-10-20 17:35 ` Boris Burkov
2025-10-09 11:27 ` [PATCH v3 03/17] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-10-15 3:47 ` Boris Burkov
2025-10-20 12:15 ` Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 04/17] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 05/17] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 06/17] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 07/17] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-10-15 3:55 ` Boris Burkov
2025-10-20 11:32 ` Mark Harmstone
2025-10-20 17:44 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 08/17] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-10-15 4:21 ` Boris Burkov
2025-10-20 14:31 ` Mark Harmstone
2025-10-20 17:44 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 09/17] btrfs: release BG lock before calling btrfs_link_bg_list() Mark Harmstone
2025-10-09 11:56 ` Filipe Manana
2025-10-09 14:58 ` Mark Harmstone
2025-10-09 15:16 ` Filipe Manana
2025-10-09 16:30 ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 10/17] btrfs: handle deletions from remapped block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 11/17] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 12/17] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 13/17] btrfs: replace identity maps with actual remaps when doing relocations Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 14/17] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 15/17] btrfs: allow balancing remap tree Mark Harmstone
2025-10-15 4:24 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 16/17] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-10-15 4:54 ` Boris Burkov [this message]
2025-10-23 17:35 ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 17/17] btrfs: add stripe removal pending flag Mark Harmstone
2025-10-15 5:05 ` Boris Burkov
2025-10-20 14:52 ` Mark Harmstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251015045442.GI1702774@zen.localdomain \
--to=boris@bur.io \
--cc=linux-btrfs@vger.kernel.org \
--cc=mark@harmstone.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox