From: Boris Burkov <boris@bur.io>
To: Mark Harmstone <mark@harmstone.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v6 12/16] btrfs: replace identity remaps with actual remaps when doing relocations
Date: Wed, 19 Nov 2025 16:21:03 -0800 [thread overview]
Message-ID: <20251120002103.GC2478853@zen.localdomain> (raw)
In-Reply-To: <20251114184745.9304-13-mark@harmstone.com>
On Fri, Nov 14, 2025 at 06:47:17PM +0000, Mark Harmstone wrote:
> Add a function do_remap_tree_reloc(), which does the actual work of
> doing a relocation using the remap tree.
>
> In a loop we call do_remap_reloc_trans(), which searches for the first
> identity remap for the block group. We call btrfs_reserve_extent() to
> find space elsewhere for it, and read the data into memory and write it
> to the new location. We then carve out the identity remap and replace it
> with an actual remap, which points to the new location in which to look.
>
> Once the last identity remap has been removed we call
> last_identity_remap_gone(), which, as with deletions, removes the
> chunk's stripes and device extents.
>
Reviewed-by: Boris Burkov <boris@bur.io>
> Signed-off-by: Mark Harmstone <mark@harmstone.com>
> ---
> fs/btrfs/relocation.c | 336 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 336 insertions(+)
>
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index a95899af811d..15c4a7c6b1ef 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -4636,6 +4636,61 @@ static int create_remap_tree_entries(struct btrfs_trans_handle *trans,
> return ret;
> }
>
> +static int find_next_identity_remap(struct btrfs_trans_handle *trans,
> + struct btrfs_path *path, u64 bg_end,
> + u64 last_start, u64 *start,
> + u64 *length)
> +{
> + int ret;
> + struct btrfs_key key, found_key;
> + struct btrfs_root *remap_root = trans->fs_info->remap_root;
> + struct extent_buffer *leaf;
> +
> + key.objectid = last_start;
> + key.type = BTRFS_IDENTITY_REMAP_KEY;
> + key.offset = 0;
> +
> + ret = btrfs_search_slot(trans, remap_root, &key, path, 0, 0);
> + if (ret < 0)
> + goto out;
> +
> + leaf = path->nodes[0];
> + while (true) {
> + if (path->slots[0] >= btrfs_header_nritems(leaf)) {
> + ret = btrfs_next_leaf(remap_root, path);
> +
> + if (ret != 0) {
> + if (ret == 1)
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + leaf = path->nodes[0];
> + }
> +
> + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
> +
> + if (found_key.objectid >= bg_end) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + if (found_key.type == BTRFS_IDENTITY_REMAP_KEY) {
> + *start = found_key.objectid;
> + *length = found_key.offset;
> + ret = 0;
> + goto out;
> + }
> +
> + path->slots[0]++;
> + }
> +
> +out:
> + btrfs_release_path(path);
> +
> + return ret;
> +}
> +
> static int remove_chunk_stripes(struct btrfs_trans_handle *trans,
> struct btrfs_chunk_map *chunk,
> struct btrfs_path *path)
> @@ -4781,6 +4836,96 @@ static void adjust_identity_remap_count(struct btrfs_trans_handle *trans,
> btrfs_mark_bg_fully_remapped(bg, trans);
> }
>
> +static int add_remap_entry(struct btrfs_trans_handle *trans,
> + struct btrfs_path *path,
> + struct btrfs_block_group *src_bg, u64 old_addr,
> + u64 new_addr, u64 length)
> +{
> + struct btrfs_fs_info *fs_info = trans->fs_info;
> + struct btrfs_key key, new_key;
> + int ret;
> + int identity_count_delta = 0;
> +
> + key.objectid = old_addr;
> + key.type = (u8)-1;
> + key.offset = (u64)-1;
> +
> + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, -1, 1);
> + if (ret < 0)
> + goto end;
> +
> + if (path->slots[0] == 0) {
> + ret = -ENOENT;
> + goto end;
> + }
> +
> + path->slots[0]--;
> +
> + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
> +
> + if (key.type != BTRFS_IDENTITY_REMAP_KEY ||
> + key.objectid > old_addr ||
> + key.objectid + key.offset <= old_addr) {
> + ret = -ENOENT;
> + goto end;
> + }
> +
> + /* Shorten or delete identity mapping entry. */
> +
> + if (key.objectid == old_addr) {
> + ret = btrfs_del_item(trans, fs_info->remap_root, path);
> + if (ret)
> + goto end;
> +
> + identity_count_delta--;
> + } else {
> + new_key.objectid = key.objectid;
> + new_key.type = BTRFS_IDENTITY_REMAP_KEY;
> + new_key.offset = old_addr - key.objectid;
> +
> + btrfs_set_item_key_safe(trans, path, &new_key);
> + }
> +
> + btrfs_release_path(path);
> +
> + /* Create new remap entry. */
> +
> + ret = add_remap_item(trans, path, new_addr, length, old_addr);
> + if (ret)
> + goto end;
> +
> + /* Add entry for remainder of identity mapping, if necessary. */
> +
> + if (key.objectid + key.offset != old_addr + length) {
> + new_key.objectid = old_addr + length;
> + new_key.type = BTRFS_IDENTITY_REMAP_KEY;
> + new_key.offset = key.objectid + key.offset - old_addr - length;
> +
> + ret = btrfs_insert_empty_item(trans, fs_info->remap_root,
> + path, &new_key, 0);
> + if (ret)
> + goto end;
> +
> + btrfs_release_path(path);
> +
> + identity_count_delta++;
> + }
> +
> + /* Add backref. */
> +
> + ret = add_remap_backref_item(trans, path, new_addr, length, old_addr);
> + if (ret)
> + goto end;
> +
> + if (identity_count_delta != 0)
> + adjust_identity_remap_count(trans, src_bg, identity_count_delta);
> +
> +end:
> + btrfs_release_path(path);
> +
> + return ret;
> +}
> +
> static int mark_chunk_remapped(struct btrfs_trans_handle *trans,
> struct btrfs_path *path, uint64_t start)
> {
> @@ -4830,6 +4975,189 @@ static int mark_chunk_remapped(struct btrfs_trans_handle *trans,
> return ret;
> }
>
> +static int do_remap_reloc_trans(struct btrfs_fs_info *fs_info,
> + struct btrfs_block_group *src_bg,
> + struct btrfs_path *path, u64 *last_start)
> +{
> + struct btrfs_trans_handle *trans;
> + struct btrfs_root *extent_root;
> + struct btrfs_key ins;
> + struct btrfs_block_group *dest_bg = NULL;
> + u64 start, remap_length, length, new_addr, min_size;
> + int ret;
> + bool no_more = false;
> + bool is_data = src_bg->flags & BTRFS_BLOCK_GROUP_DATA;
> + bool made_reservation = false, bg_needs_free_space;
> + struct btrfs_space_info *sinfo = src_bg->space_info;
> +
> + extent_root = btrfs_extent_root(fs_info, src_bg->start);
> +
> + trans = btrfs_start_transaction(extent_root, 0);
> + if (IS_ERR(trans))
> + return PTR_ERR(trans);
> +
> + mutex_lock(&fs_info->remap_mutex);
> +
> + ret = find_next_identity_remap(trans, path, src_bg->start + src_bg->length,
> + *last_start, &start, &remap_length);
> + if (ret == -ENOENT) {
> + no_more = true;
> + goto next;
> + } else if (ret) {
> + mutex_unlock(&fs_info->remap_mutex);
> + btrfs_end_transaction(trans);
> + return ret;
> + }
> +
> + /* Try to reserve enough space for block. */
> +
> + spin_lock(&sinfo->lock);
> + btrfs_space_info_update_bytes_may_use(sinfo, remap_length);
> + spin_unlock(&sinfo->lock);
> +
> + if (is_data)
> + min_size = fs_info->sectorsize;
> + else
> + min_size = fs_info->nodesize;
> +
> + /*
> + * We're using btrfs_reserve_extent() to allocate a contiguous
> + * logical address range, but this will become a remap item rather than
> + * an extent in the extent tree.
> + *
> + * Short allocations are fine: it means that we chop off the beginning
> + * of the identity remap that we're processing, and will tackle the
> + * rest of it the next time round.
> + */
> + ret = btrfs_reserve_extent(fs_info->fs_root, remap_length,
> + remap_length, min_size,
> + 0, 0, &ins, is_data, false);
> + if (ret) {
> + spin_lock(&sinfo->lock);
> + btrfs_space_info_update_bytes_may_use(sinfo, -remap_length);
> + spin_unlock(&sinfo->lock);
> +
> + mutex_unlock(&fs_info->remap_mutex);
> + btrfs_end_transaction(trans);
> + return ret;
> + }
> +
> + made_reservation = true;
> +
> + new_addr = ins.objectid;
> + length = ins.offset;
> +
> + if (!is_data && !IS_ALIGNED(length, fs_info->nodesize)) {
> + u64 new_length = ALIGN_DOWN(length, fs_info->nodesize);
> +
> + btrfs_free_reserved_extent(fs_info, new_addr + new_length,
> + length - new_length, 0);
> +
> + length = new_length;
> + }
> +
> + dest_bg = btrfs_lookup_block_group(fs_info, new_addr);
> +
> + mutex_lock(&dest_bg->free_space_lock);
> + bg_needs_free_space = test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
> + &dest_bg->runtime_flags);
> + mutex_unlock(&dest_bg->free_space_lock);
> +
> + if (bg_needs_free_space) {
> + ret = btrfs_add_block_group_free_space(trans, dest_bg);
> + if (ret)
> + goto fail;
> + }
> +
> + ret = do_copy(fs_info, start, new_addr, length);
> + if (ret)
> + goto fail;
> +
> + ret = btrfs_remove_from_free_space_tree(trans, new_addr, length);
> + if (ret)
> + goto fail;
> +
> + ret = add_remap_entry(trans, path, src_bg, start, new_addr, length);
> + if (ret) {
> + btrfs_add_to_free_space_tree(trans, new_addr, length);
> + goto fail;
> + }
> +
> + adjust_block_group_remap_bytes(trans, dest_bg, length);
> + btrfs_free_reserved_bytes(dest_bg, length, 0);
> +
> + spin_lock(&sinfo->lock);
> + sinfo->bytes_readonly += length;
> + spin_unlock(&sinfo->lock);
> +
> +next:
> + if (dest_bg)
> + btrfs_put_block_group(dest_bg);
> +
> + if (made_reservation)
> + btrfs_dec_block_group_reservations(fs_info, new_addr);
> +
> + mutex_unlock(&fs_info->remap_mutex);
> +
> + if (src_bg->identity_remap_count == 0) {
> + bool mark_fully_remapped = false;
> +
> + spin_lock(&src_bg->lock);
> +
> + if (!src_bg->fully_remapped) {
> + mark_fully_remapped = true;
> + src_bg->fully_remapped = true;
> + }
> +
> + spin_unlock(&src_bg->lock);
> +
> + if (mark_fully_remapped)
> + btrfs_mark_bg_fully_remapped(src_bg, trans);
> + }
> +
> + ret = btrfs_end_transaction(trans);
> + if (ret)
> + return ret;
> +
> + if (no_more)
> + return 1;
> +
> + *last_start = start;
> +
> + return 0;
> +
> +fail:
> + if (dest_bg)
> + btrfs_put_block_group(dest_bg);
> +
> + btrfs_free_reserved_extent(fs_info, new_addr, length, 0);
> +
> + mutex_unlock(&fs_info->remap_mutex);
> + btrfs_end_transaction(trans);
> +
> + return ret;
> +}
> +
> +static int do_remap_reloc(struct btrfs_fs_info *fs_info,
> + struct btrfs_path *path, struct btrfs_block_group *bg)
> +{
> + u64 last_start;
> + int ret;
> +
> + last_start = bg->start;
> +
> + while (true) {
> + ret = do_remap_reloc_trans(fs_info, bg, path, &last_start);
> + if (ret) {
> + if (ret == 1)
> + ret = 0;
> + break;
> + }
> + }
> +
> + return ret;
> +}
> +
> int btrfs_translate_remap(struct btrfs_fs_info *fs_info, u64 *logical,
> u64 *length)
> {
> @@ -5123,6 +5451,14 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
> }
>
> ret = start_block_group_remapping(fs_info, path, bg);
> + if (ret)
> + goto out;
> +
> + ret = do_remap_reloc(fs_info, path, rc->block_group);
> + if (ret)
> + goto out;
> +
> + btrfs_delete_unused_bgs(fs_info);
> } else {
> ret = do_nonremap_reloc(fs_info, verbose, rc);
> }
> --
> 2.51.0
>
next prev parent reply other threads:[~2025-11-20 0:21 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-14 18:47 [PATCH v6 00/16] Remap tree Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 01/16] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 02/16] btrfs: add REMAP chunk type Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 03/16] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 04/16] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 05/16] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 06/16] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 07/16] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 08/16] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 09/16] btrfs: handle deletions from remapped block group Mark Harmstone
2025-11-20 0:17 ` Boris Burkov
2025-11-24 12:40 ` Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 10/16] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-11-15 14:52 ` Sun Yangkai
2025-11-24 18:01 ` Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 11/16] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 12/16] btrfs: replace identity remaps with actual remaps when doing relocations Mark Harmstone
2025-11-20 0:21 ` Boris Burkov [this message]
2025-11-14 18:47 ` [PATCH v6 13/16] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 14/16] btrfs: allow balancing remap tree Mark Harmstone
2025-11-14 18:47 ` [PATCH v6 15/16] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-11-20 0:19 ` Boris Burkov
2025-11-14 18:47 ` [PATCH v6 16/16] btrfs: populate fully_remapped_bgs_list on mount Mark Harmstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251120002103.GC2478853@zen.localdomain \
--to=boris@bur.io \
--cc=linux-btrfs@vger.kernel.org \
--cc=mark@harmstone.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox