From: Mark Harmstone <mark@harmstone.com>
To: linux-btrfs@vger.kernel.org
Cc: Mark Harmstone <mark@harmstone.com>, Boris Burkov <boris@bur.io>
Subject: [PATCH v5 14/16] btrfs: allow balancing remap tree
Date: Mon, 10 Nov 2025 17:14:38 +0000 [thread overview]
Message-ID: <20251110171511.20900-15-mark@harmstone.com> (raw)
In-Reply-To: <20251110171511.20900-1-mark@harmstone.com>
Balancing the REMAP chunk, i.e. the chunk in which the remap tree lives,
is a special case.
We can't use the remap tree itself for this, as then we'd have no way to
boostrap it on mount. And we can't use the pre-remap tree code for this
as it relies on walking the extent tree, and we're not creating backrefs
for REMAP chunks.
So instead, if a balance would relocate any REMAP block groups, mark
those block groups as readonly and COW every leaf of the remap tree.
There's more sophisticated ways of doing this, such as only COWing nodes
within a block group that's to be relocated, but they're fiddly and with
lots of edge cases. Plus it's not anticipated that a) the number of
REMAP chunks is going to be particularly large, or b) that users will
want to only relocate some of these chunks - the main use case here is
to unbreak RAID conversion and device removal.
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: Boris Burkov <boris@bur.io>
---
fs/btrfs/volumes.c | 159 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 155 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5b211cf99c3e..7def123fa560 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3991,8 +3991,11 @@ static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk
struct btrfs_balance_args *bargs = NULL;
u64 chunk_type = btrfs_chunk_type(leaf, chunk);
- if (chunk_type & BTRFS_BLOCK_GROUP_REMAP)
- return false;
+ /* treat REMAP chunks as METADATA */
+ if (chunk_type & BTRFS_BLOCK_GROUP_REMAP) {
+ chunk_type &= ~BTRFS_BLOCK_GROUP_REMAP;
+ chunk_type |= BTRFS_BLOCK_GROUP_METADATA;
+ }
/* type filter */
if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
@@ -4075,6 +4078,113 @@ static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk
return true;
}
+struct remap_chunk_info {
+ struct list_head list;
+ u64 offset;
+ struct btrfs_block_group *bg;
+ bool made_ro;
+};
+
+static int cow_remap_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_key key = { 0 };
+ int ret;
+
+ ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, 0, 1);
+ if (ret < 0)
+ return ret;
+
+ while (true) {
+ ret = btrfs_next_leaf(fs_info->remap_root, path);
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0) {
+ ret = 0;
+ break;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ btrfs_release_path(path);
+
+ ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path,
+ 0, 1);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+static int balance_remap_chunks(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ struct list_head *chunks)
+{
+ struct remap_chunk_info *rci, *tmp;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ list_for_each_entry_safe(rci, tmp, chunks, list) {
+ rci->bg = btrfs_lookup_block_group(fs_info, rci->offset);
+ if (!rci->bg) {
+ list_del(&rci->list);
+ kfree(rci);
+ continue;
+ }
+
+ ret = btrfs_inc_block_group_ro(rci->bg, false);
+ if (ret)
+ goto end;
+
+ rci->made_ro = true;
+ }
+
+ if (list_empty(chunks))
+ return 0;
+
+ trans = btrfs_start_transaction(fs_info->remap_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto end;
+ }
+
+ mutex_lock(&fs_info->remap_mutex);
+
+ ret = cow_remap_tree(trans, path);
+
+ btrfs_release_path(path);
+
+ mutex_unlock(&fs_info->remap_mutex);
+
+ btrfs_commit_transaction(trans);
+
+end:
+ while (!list_empty(chunks)) {
+ bool unused;
+
+ rci = list_first_entry(chunks, struct remap_chunk_info, list);
+
+ spin_lock(&rci->bg->lock);
+ unused = !btrfs_is_block_group_used(rci->bg);
+ spin_unlock(&rci->bg->lock);
+
+ if (unused)
+ btrfs_mark_bg_unused(rci->bg);
+
+ if (rci->made_ro)
+ btrfs_dec_block_group_ro(rci->bg);
+
+ btrfs_put_block_group(rci->bg);
+
+ list_del(&rci->list);
+ kfree(rci);
+ }
+
+ return ret;
+}
+
static int __btrfs_balance(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
@@ -4097,6 +4207,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
u32 count_meta = 0;
u32 count_sys = 0;
int chunk_reserved = 0;
+ struct remap_chunk_info *rci;
+ unsigned int num_remap_chunks = 0;
+ LIST_HEAD(remap_chunks);
path = btrfs_alloc_path();
if (!path) {
@@ -4195,7 +4308,8 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
count_data++;
else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
count_sys++;
- else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
+ else if (chunk_type & (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_REMAP))
count_meta++;
goto loop;
@@ -4215,6 +4329,30 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
goto loop;
}
+ /*
+ * Balancing REMAP chunks takes place separately - add the
+ * details to a list so it can be processed later.
+ */
+ if (chunk_type & BTRFS_BLOCK_GROUP_REMAP) {
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
+
+ rci = kmalloc(sizeof(struct remap_chunk_info),
+ GFP_NOFS);
+ if (!rci) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ rci->offset = found_key.offset;
+ rci->bg = NULL;
+ rci->made_ro = false;
+ list_add_tail(&rci->list, &remap_chunks);
+
+ num_remap_chunks++;
+
+ goto loop;
+ }
+
if (!chunk_reserved) {
/*
* We may be relocating the only data chunk we have,
@@ -4254,11 +4392,24 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
key.offset = found_key.offset - 1;
}
+ btrfs_release_path(path);
+
if (counting) {
- btrfs_release_path(path);
counting = false;
goto again;
}
+
+ if (!list_empty(&remap_chunks)) {
+ ret = balance_remap_chunks(fs_info, path, &remap_chunks);
+ if (ret == -ENOSPC)
+ enospc_errors++;
+
+ if (!ret) {
+ spin_lock(&fs_info->balance_lock);
+ bctl->stat.completed += num_remap_chunks;
+ spin_unlock(&fs_info->balance_lock);
+ }
+ }
error:
btrfs_free_path(path);
if (enospc_errors) {
--
2.51.0
next prev parent reply other threads:[~2025-11-10 17:15 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-10 17:14 [PATCH v5 00/16] Remap tree Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 01/16] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 02/16] btrfs: add REMAP chunk type Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 03/16] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 04/16] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 05/16] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 06/16] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 07/16] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 08/16] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 09/16] btrfs: handle deletions from remapped block group Mark Harmstone
2025-11-12 5:14 ` Boris Burkov
2025-11-12 18:51 ` Mark Harmstone
2025-11-13 11:09 ` Mark Harmstone
2025-11-12 6:15 ` Boris Burkov
2025-11-10 17:14 ` [PATCH v5 10/16] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-11-12 5:35 ` Boris Burkov
2025-11-12 17:38 ` Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 11/16] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-11-12 5:41 ` Boris Burkov
2025-11-10 17:14 ` [PATCH v5 12/16] btrfs: replace identity remaps with actual remaps when doing relocations Mark Harmstone
2025-11-10 17:14 ` [PATCH v5 13/16] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-11-10 17:14 ` Mark Harmstone [this message]
2025-11-10 17:14 ` [PATCH v5 15/16] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-11-12 5:55 ` Boris Burkov
2025-11-10 17:14 ` [PATCH v5 16/16] btrfs: populate fully_remapped_bgs_list on mount Mark Harmstone
2025-11-12 5:57 ` Boris Burkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251110171511.20900-15-mark@harmstone.com \
--to=mark@harmstone.com \
--cc=boris@bur.io \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox