public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Mark Harmstone <mark@harmstone.com>
To: Boris Burkov <boris@bur.io>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v4 15/16] btrfs: handle discarding fully-remapped block groups
Date: Mon, 3 Nov 2025 17:01:22 +0000	[thread overview]
Message-ID: <e39be7bb-c37e-4585-95bb-c0c09f7ee2c2@harmstone.com> (raw)
In-Reply-To: <aQU0CDKWD4LoXdCB@devvm12410.ftw0.facebook.com>

On 31/10/2025 10.11 pm, Boris Burkov wrote:
> On Fri, Oct 24, 2025 at 07:12:16PM +0100, Mark Harmstone wrote:
>> Discard normally works by iterating over the free-space entries of a
>> block group. This doesn't work for fully-remapped block groups, as we
>> removed their free-space entries when we started relocation.
>>
>> For sync discard, call btrfs_discard_extent() when we commit the
>> transaction in which the last identity remap was removed.
>>
>> For async discard, add a new function btrfs_trim_fully_remapped_block_group()
>> to be called by the discard worker, which iterates over the block
>> group's range using the normal async discard rules. Once we reach the
>> end, remove the chunk's stripes and device extents to get back its free
>> space.
>>
>> Signed-off-by: Mark Harmstone <mark@harmstone.com>
>> ---
>>   fs/btrfs/block-group.c      |  2 ++
>>   fs/btrfs/block-group.h      |  1 +
>>   fs/btrfs/discard.c          | 57 ++++++++++++++++++++++++++----
>>   fs/btrfs/extent-tree.c      | 10 ++++++
>>   fs/btrfs/free-space-cache.c | 70 +++++++++++++++++++++++++++++++++++++
>>   fs/btrfs/free-space-cache.h |  1 +
>>   6 files changed, 134 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
>> index 8feddb472882..0c91553b02cf 100644
>> --- a/fs/btrfs/block-group.c
>> +++ b/fs/btrfs/block-group.c
>> @@ -4833,4 +4833,6 @@ void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
>>   
>>   	spin_unlock(&fs_info->unused_bgs_lock);
>>   
>> +	if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
>> +		btrfs_discard_queue_work(&fs_info->discard_ctl, bg);
>>   }
>> diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
>> index 4522074a45c2..b0b16efea19a 100644
>> --- a/fs/btrfs/block-group.h
>> +++ b/fs/btrfs/block-group.h
>> @@ -49,6 +49,7 @@ enum btrfs_discard_state {
>>   	BTRFS_DISCARD_EXTENTS,
>>   	BTRFS_DISCARD_BITMAPS,
>>   	BTRFS_DISCARD_RESET_CURSOR,
>> +	BTRFS_DISCARD_FULLY_REMAPPED,
>>   };
>>   
>>   /*
>> diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
>> index ee5f5b2788e1..f9890037395a 100644
>> --- a/fs/btrfs/discard.c
>> +++ b/fs/btrfs/discard.c
>> @@ -215,6 +215,27 @@ static struct btrfs_block_group *find_next_block_group(
>>   	return ret_block_group;
>>   }
>>   
>> +/*
>> + * Returns whether a block group is empty.
>> + *
>> + * @block_group: block_group of interest
>> + *
>> + * "Empty" here means that there are no extents physically located within the
>> + * device extents corresponding to this block group.
>> + *
>> + * For a remapped block group, this means that all of its identity remaps have
>> + * been removed. For a non-remapped block group, this means that no extents
>> + * have an address within its range, and that nothing has been remapped to be
>> + * within it.
>> + */
>> +static bool block_group_is_empty(struct btrfs_block_group *block_group)
>> +{
>> +	if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)
>> +		return block_group->identity_remap_count == 0;
>> +	else
>> +		return block_group->used == 0 && block_group->remap_bytes == 0;
>> +}
>> +
>>   /*
>>    * Look up next block group and set it for use.
>>    *
>> @@ -241,8 +262,10 @@ static struct btrfs_block_group *peek_discard_list(
>>   	block_group = find_next_block_group(discard_ctl, now);
>>   
>>   	if (block_group && now >= block_group->discard_eligible_time) {
>> +		bool empty = block_group_is_empty(block_group);
>> +
>>   		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
>> -		    block_group->used != 0) {
>> +		    !empty) {
>>   			if (btrfs_is_block_group_data_only(block_group)) {
>>   				__add_to_discard_list(discard_ctl, block_group);
>>   				/*
>> @@ -267,7 +290,15 @@ static struct btrfs_block_group *peek_discard_list(
>>   		}
>>   		if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
>>   			block_group->discard_cursor = block_group->start;
>> -			block_group->discard_state = BTRFS_DISCARD_EXTENTS;
>> +
>> +			if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
>> +			    empty) {
>> +				block_group->discard_state =
>> +					BTRFS_DISCARD_FULLY_REMAPPED;
>> +			} else {
>> +				block_group->discard_state =
>> +					BTRFS_DISCARD_EXTENTS;
>> +			}
>>   		}
>>   	}
>>   	if (block_group) {
>> @@ -373,7 +404,7 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
>>   	if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
>>   		return;
>>   
>> -	if (block_group->used == 0 && block_group->remap_bytes == 0)
>> +	if (block_group_is_empty(block_group))
>>   		add_to_discard_unused_list(discard_ctl, block_group);
>>   	else
>>   		add_to_discard_list(discard_ctl, block_group);
>> @@ -470,7 +501,7 @@ static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
>>   {
>>   	remove_from_discard_list(discard_ctl, block_group);
>>   
>> -	if (block_group->used == 0) {
>> +	if (block_group_is_empty(block_group)) {
>>   		if (btrfs_is_free_space_trimmed(block_group))
>>   			btrfs_mark_bg_unused(block_group);
>>   		else
>> @@ -524,7 +555,8 @@ static void btrfs_discard_workfn(struct work_struct *work)
>>   	/* Perform discarding */
>>   	minlen = discard_minlen[discard_index];
>>   
>> -	if (discard_state == BTRFS_DISCARD_BITMAPS) {
>> +	switch (discard_state) {
>> +	case BTRFS_DISCARD_BITMAPS: {
>>   		u64 maxlen = 0;
>>   
>>   		/*
>> @@ -541,17 +573,28 @@ static void btrfs_discard_workfn(struct work_struct *work)
>>   				       btrfs_block_group_end(block_group),
>>   				       minlen, maxlen, true);
>>   		discard_ctl->discard_bitmap_bytes += trimmed;
>> -	} else {
>> +
>> +		break;
>> +	}
>> +
>> +	case BTRFS_DISCARD_FULLY_REMAPPED:
>> +		btrfs_trim_fully_remapped_block_group(block_group);
>> +		break;
>> +
>> +	default:
>>   		btrfs_trim_block_group_extents(block_group, &trimmed,
>>   				       block_group->discard_cursor,
>>   				       btrfs_block_group_end(block_group),
>>   				       minlen, true);
>>   		discard_ctl->discard_extent_bytes += trimmed;
>> +
>> +		break;
>>   	}
>>   
>>   	/* Determine next steps for a block_group */
>>   	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
>> -		if (discard_state == BTRFS_DISCARD_BITMAPS) {
>> +		if (discard_state == BTRFS_DISCARD_BITMAPS ||
>> +		    discard_state == BTRFS_DISCARD_FULLY_REMAPPED) {
>>   			btrfs_finish_discard_pass(discard_ctl, block_group);
>>   		} else {
>>   			block_group->discard_cursor = block_group->start;
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 82dc88915b7e..82d102a157e9 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -2860,6 +2860,12 @@ int btrfs_handle_fully_remapped_bgs(struct btrfs_trans_handle *trans)
>>   	list_for_each_entry_safe(block_group, tmp, fully_remapped_bgs, bg_list) {
>>   		struct btrfs_chunk_map *map;
>>   
>> +		btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
> 
> Any reason to queue this when DISCARD_ASYNC isn't set?
> i.e., put this in the if (btrfs_test_opt(..)) below?

Oops, you're right - btrfs_discard_queue_work() returns early if 
DISCARD_ASYNC isn't set, so we might as well move it into the if.

> 
>> +
>> +		/* for async discard the below gets done in discard job */
>> +		if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
>> +			continue;
>> +
>>   		map = btrfs_get_chunk_map(fs_info, block_group->start, 1);
>>   		if (IS_ERR(map))
>>   			return PTR_ERR(map);
>> @@ -2870,6 +2876,10 @@ int btrfs_handle_fully_remapped_bgs(struct btrfs_trans_handle *trans)
>>   			return ret;
>>   		}
>>   
>> +		if (!TRANS_ABORTED(trans))
>> +			btrfs_discard_extent(fs_info, block_group->start,
>> +					     block_group->length, NULL, false);
>> +
>>   		/*
>>   		 * Set num_stripes to 0, so that btrfs_remove_dev_extents()
>>   		 * won't run a second time.
>> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
>> index 91670d0af179..5d5e3401e723 100644
>> --- a/fs/btrfs/free-space-cache.c
>> +++ b/fs/btrfs/free-space-cache.c
>> @@ -29,6 +29,7 @@
>>   #include "file-item.h"
>>   #include "file.h"
>>   #include "super.h"
>> +#include "relocation.h"
>>   
>>   #define BITS_PER_BITMAP		(PAGE_SIZE * 8UL)
>>   #define MAX_CACHE_BYTES_PER_GIG	SZ_64K
>> @@ -3066,6 +3067,11 @@ bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
>>   	struct rb_node *node;
>>   	bool ret = true;
>>   
>> +	if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
>> +	    block_group->identity_remap_count == 0) {
>> +		return true;
>> +	}
>> +
>>   	spin_lock(&ctl->tree_lock);
>>   	node = rb_first(&ctl->free_space_offset);
>>   
>> @@ -3830,6 +3836,70 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group,
>>   	return ret;
>>   }
>>   
>> +void btrfs_trim_fully_remapped_block_group(struct btrfs_block_group *bg)
>> +{
>> +	struct btrfs_fs_info *fs_info = bg->fs_info;
>> +	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
>> +	int ret = 0;
>> +	u64 bytes, trimmed;
>> +	const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
>> +	u64 end = btrfs_block_group_end(bg);
>> +	struct btrfs_trans_handle *trans;
>> +	struct btrfs_chunk_map *map;
>> +
>> +	bytes = end - bg->discard_cursor;
>> +
>> +	if (max_discard_size &&
>> +		bytes >= (max_discard_size +
>> +			BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
>> +		bytes = max_discard_size;
>> +	}
>> +
>> +	ret = btrfs_discard_extent(fs_info, bg->discard_cursor, bytes, &trimmed,
>> +				   false);
>> +	if (ret)
>> +		return;
>> +
>> +	bg->discard_cursor += trimmed;
>> +
>> +	if (bg->discard_cursor < end)
>> +		return;
>> +
>> +	trans = btrfs_start_transaction(fs_info->tree_root, 0);
>> +	if (IS_ERR(trans))
>> +		return;
>> +
>> +	map = btrfs_get_chunk_map(fs_info, bg->start, 1);
>> +	if (IS_ERR(map)) {
>> +		ret = PTR_ERR(map);
>> +		btrfs_abort_transaction(trans, ret);
>> +		return;
>> +	}
>> +
>> +	ret = btrfs_last_identity_remap_gone(trans, map, bg);
>> +	if (ret) {
>> +		btrfs_free_chunk_map(map);
>> +		btrfs_abort_transaction(trans, ret);
>> +		return;
>> +	}
>> +
>> +	btrfs_end_transaction(trans);
>> +
>> +	/*
>> +	 * Set num_stripes to 0, so that btrfs_remove_dev_extents()
>> +	 * won't run a second time.
>> +	 */
>> +	map->num_stripes = 0;
>> +
>> +	btrfs_free_chunk_map(map);
>> +
>> +	if (bg->used == 0) {
>> +		spin_lock(&fs_info->unused_bgs_lock);
>> +		list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
>> +		spin_unlock(&fs_info->unused_bgs_lock);
>> +	}
>> +}
>> +
>>   /*
>>    * If we break out of trimming a bitmap prematurely, we should reset the
>>    * trimming bit.  In a rather contrived case, it's possible to race here so
>> diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
>> index 9f1dbfdee8ca..33fc3b245648 100644
>> --- a/fs/btrfs/free-space-cache.h
>> +++ b/fs/btrfs/free-space-cache.h
>> @@ -166,6 +166,7 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
>>   int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
>>   				   u64 *trimmed, u64 start, u64 end, u64 minlen,
>>   				   u64 maxlen, bool async);
>> +void btrfs_trim_fully_remapped_block_group(struct btrfs_block_group *bg);
>>   
>>   bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info);
>>   int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active);
>> -- 
>> 2.49.1
>>


  reply	other threads:[~2025-11-03 17:01 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-24 18:12 [PATCH v4 00/16] Remap tree Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 01/16] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-10-31 22:50   ` Boris Burkov
2025-11-03 12:18     ` Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 02/16] btrfs: add REMAP chunk type Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 03/16] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-10-31 21:39   ` Boris Burkov
2025-10-24 18:12 ` [PATCH v4 04/16] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-10-31 21:44   ` Boris Burkov
2025-11-03 12:39     ` Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 05/16] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 06/16] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-10-31 21:47   ` Boris Burkov
2025-10-24 18:12 ` [PATCH v4 07/16] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 08/16] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-10-31 22:03   ` Boris Burkov
2025-10-24 18:12 ` [PATCH v4 09/16] btrfs: handle deletions from remapped block group Mark Harmstone
2025-10-31 23:05   ` Boris Burkov
2025-11-03 15:51     ` Mark Harmstone
2025-10-31 23:30   ` Boris Burkov
2025-11-04 12:30     ` Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 10/16] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-10-31 23:43   ` Boris Burkov
2025-11-03 18:45     ` Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 11/16] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-11-01  0:02   ` Boris Burkov
2025-11-04 13:00     ` Mark Harmstone
2025-11-01  0:10   ` Boris Burkov
2025-10-24 18:12 ` [PATCH v4 12/16] btrfs: replace identity remaps with actual remaps when doing relocations Mark Harmstone
2025-11-01  0:09   ` Boris Burkov
2025-11-04 14:31     ` Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 13/16] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-11-01  0:12   ` Boris Burkov
2025-10-24 18:12 ` [PATCH v4 14/16] btrfs: allow balancing remap tree Mark Harmstone
2025-10-24 18:12 ` [PATCH v4 15/16] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-10-27 16:04   ` kernel test robot
2025-10-31 22:12     ` Boris Burkov
2025-11-03 16:49       ` Mark Harmstone
2025-11-09  8:42         ` Philip Li
2025-10-31 22:11   ` Boris Burkov
2025-11-03 17:01     ` Mark Harmstone [this message]
2025-10-24 18:12 ` [PATCH v4 16/16] btrfs: add stripe removal pending flag Mark Harmstone

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e39be7bb-c37e-4585-95bb-c0c09f7ee2c2@harmstone.com \
    --to=mark@harmstone.com \
    --cc=boris@bur.io \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox