From: Boris Burkov <boris@bur.io>
To: Mark Harmstone <mark@harmstone.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v3 07/17] btrfs: allow mounting filesystems with remap-tree incompat flag
Date: Tue, 14 Oct 2025 20:55:48 -0700 [thread overview]
Message-ID: <20251015035548.GF1702774@zen.localdomain> (raw)
In-Reply-To: <20251009112814.13942-8-mark@harmstone.com>
On Thu, Oct 09, 2025 at 12:28:02PM +0100, Mark Harmstone wrote:
> If we encounter a filesystem with the remap-tree incompat flag set,
> valdiate its compatibility with the other flags, and load the remap tree
> using the values that have been added to the superblock.
>
> The remap-tree feature depends on the free space tere, but no-holes and
> block-group-tree have been made dependencies to reduce the testing
> matrix. Similarly I'm not aware of any reason why mixed-bg and zoned would be
> incompatible with remap-tree, but this is blocked for the time being
> until it can be fully tested.
>
Bonus points on offer for moving the open ctree bit as late as humanly
possible for that good "partial ordering" self documentation. (Or
explaining why it is already so).
Thanks.
Reviewed-by: Boris Burkov <boris@bur.io>
> Signed-off-by: Mark Harmstone <mark@harmstone.com>
> ---
> fs/btrfs/Kconfig | 2 +
> fs/btrfs/accessors.h | 6 ++
> fs/btrfs/disk-io.c | 101 ++++++++++++++++++++++++++++----
> fs/btrfs/extent-tree.c | 2 +
> fs/btrfs/fs.h | 4 +-
> fs/btrfs/transaction.c | 7 +++
> include/uapi/linux/btrfs_tree.h | 5 +-
> 7 files changed, 113 insertions(+), 14 deletions(-)
>
> diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
> index 4438637c8900..77b5a9f27840 100644
> --- a/fs/btrfs/Kconfig
> +++ b/fs/btrfs/Kconfig
> @@ -117,4 +117,6 @@ config BTRFS_EXPERIMENTAL
>
> - large folio support
>
> + - remap-tree - logical address remapping tree
> +
> If unsure, say N.
> diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
> index 0dd161ee6863..392eaad75e72 100644
> --- a/fs/btrfs/accessors.h
> +++ b/fs/btrfs/accessors.h
> @@ -882,6 +882,12 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
> uuid_tree_generation, 64);
> BTRFS_SETGET_STACK_FUNCS(super_nr_global_roots, struct btrfs_super_block,
> nr_global_roots, 64);
> +BTRFS_SETGET_STACK_FUNCS(super_remap_root, struct btrfs_super_block,
> + remap_root, 64);
> +BTRFS_SETGET_STACK_FUNCS(super_remap_root_generation, struct btrfs_super_block,
> + remap_root_generation, 64);
> +BTRFS_SETGET_STACK_FUNCS(super_remap_root_level, struct btrfs_super_block,
> + remap_root_level, 8);
>
> /* struct btrfs_file_extent_item */
> BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 92cb789957b4..60507e971aad 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1180,6 +1180,8 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
> return btrfs_grab_root(btrfs_global_root(fs_info, &key));
> case BTRFS_RAID_STRIPE_TREE_OBJECTID:
> return btrfs_grab_root(fs_info->stripe_root);
> + case BTRFS_REMAP_TREE_OBJECTID:
> + return btrfs_grab_root(fs_info->remap_root);
> default:
> return NULL;
> }
> @@ -1271,6 +1273,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
> btrfs_put_root(fs_info->data_reloc_root);
> btrfs_put_root(fs_info->block_group_root);
> btrfs_put_root(fs_info->stripe_root);
> + btrfs_put_root(fs_info->remap_root);
> btrfs_check_leaked_roots(fs_info);
> btrfs_extent_buffer_leak_debug_check(fs_info);
> kfree(fs_info->super_copy);
> @@ -1825,6 +1828,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
> free_root_extent_buffers(info->data_reloc_root);
> free_root_extent_buffers(info->block_group_root);
> free_root_extent_buffers(info->stripe_root);
> + free_root_extent_buffers(info->remap_root);
> if (free_chunk_root)
> free_root_extent_buffers(info->chunk_root);
> }
> @@ -2256,20 +2260,45 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
> if (ret)
> goto out;
>
> - /*
> - * This tree can share blocks with some other fs tree during relocation
> - * and we need a proper setup by btrfs_get_fs_root
> - */
> - root = btrfs_get_fs_root(tree_root->fs_info,
> - BTRFS_DATA_RELOC_TREE_OBJECTID, true);
> - if (IS_ERR(root)) {
> - if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
> - ret = PTR_ERR(root);
> - goto out;
> + if (btrfs_fs_incompat(fs_info, REMAP_TREE)) {
> + /* remap_root already loaded in load_important_roots() */
> + root = fs_info->remap_root;
> +
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> +
> + root->root_key.objectid = BTRFS_REMAP_TREE_OBJECTID;
> + root->root_key.type = BTRFS_ROOT_ITEM_KEY;
> + root->root_key.offset = 0;
> +
> + /* Check that data reloc tree doesn't also exist */
> + location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
> + root = btrfs_read_tree_root(fs_info->tree_root, &location);
> + if (!IS_ERR(root)) {
> + btrfs_err(fs_info,
> + "data reloc tree exists when remap-tree enabled");
> + btrfs_put_root(root);
> + return -EIO;
> + } else if (PTR_ERR(root) != -ENOENT) {
> + btrfs_warn(fs_info,
> + "error %ld when checking for data reloc tree",
> + PTR_ERR(root));
> }
> } else {
> - set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> - fs_info->data_reloc_root = root;
> + /*
> + * This tree can share blocks with some other fs tree during
> + * relocation and we need a proper setup by btrfs_get_fs_root
> + */
> + root = btrfs_get_fs_root(tree_root->fs_info,
> + BTRFS_DATA_RELOC_TREE_OBJECTID, true);
> + if (IS_ERR(root)) {
> + if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
> + ret = PTR_ERR(root);
> + goto out;
> + }
> + } else {
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> + fs_info->data_reloc_root = root;
> + }
> }
>
> location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
> @@ -2509,6 +2538,31 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
> ret = -EINVAL;
> }
>
> + /*
> + * Reduce test matrix for remap tree by requiring block-group-tree
> + * and no-holes. Free-space-tree is a hard requirement.
> + */
> + if (btrfs_fs_incompat(fs_info, REMAP_TREE) &&
> + (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
> + !btrfs_fs_incompat(fs_info, NO_HOLES) ||
> + !btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))) {
> + btrfs_err(fs_info,
> +"remap-tree feature requires free-space-tree, no-holes, and block-group-tree");
> + ret = -EINVAL;
> + }
> +
> + if (btrfs_fs_incompat(fs_info, REMAP_TREE) &&
> + btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
> + btrfs_err(fs_info, "remap-tree not supported with mixed-bg");
> + ret = -EINVAL;
> + }
> +
> + if (btrfs_fs_incompat(fs_info, REMAP_TREE) &&
> + btrfs_fs_incompat(fs_info, ZONED)) {
> + btrfs_err(fs_info, "remap-tree not supported with zoned devices");
> + ret = -EINVAL;
> + }
> +
> /*
> * Hint to catch really bogus numbers, bitflips or so, more exact checks are
> * done later
> @@ -2667,6 +2721,18 @@ static int load_important_roots(struct btrfs_fs_info *fs_info)
> btrfs_warn(fs_info, "couldn't read tree root");
> return ret;
> }
> +
> + if (btrfs_fs_incompat(fs_info, REMAP_TREE)) {
> + bytenr = btrfs_super_remap_root(sb);
> + gen = btrfs_super_remap_root_generation(sb);
> + level = btrfs_super_remap_root_level(sb);
> + ret = load_super_root(fs_info->remap_root, bytenr, gen, level);
> + if (ret) {
> + btrfs_warn(fs_info, "couldn't read remap root");
> + return ret;
> + }
> + }
> +
> return 0;
> }
>
> @@ -3284,6 +3350,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
> struct btrfs_fs_info *fs_info = btrfs_sb(sb);
> struct btrfs_root *tree_root;
> struct btrfs_root *chunk_root;
> + struct btrfs_root *remap_root;
> int ret;
> int level;
>
> @@ -3417,6 +3484,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
> if (ret < 0)
> goto fail_alloc;
>
> + if (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_REMAP_TREE) {
> + remap_root = btrfs_alloc_root(fs_info, BTRFS_REMAP_TREE_OBJECTID,
> + GFP_KERNEL);
> + fs_info->remap_root = remap_root;
> + if (!remap_root) {
> + ret = -ENOMEM;
> + goto fail_alloc;
> + }
> + }
> +
Thanks for scooting it down. Does this need to be before reading the sys
array / chunk tree? I would guess no, as we can't remap that, but
perhaps I have misunderstood.
> /*
> * At this point our mount options are validated, if we set ->max_inline
> * to something non-standard make sure we truncate it to sectorsize.
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index d964147b8097..7805a148bbd8 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -2589,6 +2589,8 @@ static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
> flags = BTRFS_BLOCK_GROUP_DATA;
> else if (root == fs_info->chunk_root)
> flags = BTRFS_BLOCK_GROUP_SYSTEM;
> + else if (root == fs_info->remap_root)
> + flags = BTRFS_BLOCK_GROUP_REMAP;
> else
> flags = BTRFS_BLOCK_GROUP_METADATA;
>
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index c4dba7e7ce5a..c76f4c2701f9 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -291,7 +291,8 @@ enum {
> #define BTRFS_FEATURE_INCOMPAT_SUPP \
> (BTRFS_FEATURE_INCOMPAT_SUPP_STABLE | \
> BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \
> - BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
> + BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 | \
> + BTRFS_FEATURE_INCOMPAT_REMAP_TREE)
>
> #else
>
> @@ -451,6 +452,7 @@ struct btrfs_fs_info {
> struct btrfs_root *data_reloc_root;
> struct btrfs_root *block_group_root;
> struct btrfs_root *stripe_root;
> + struct btrfs_root *remap_root;
>
> /* The log root tree is a directory of all the other log roots */
> struct btrfs_root *log_root_tree;
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index 89ae0c7a610a..b1c41982e7b2 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -1950,6 +1950,13 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
> super->cache_generation = 0;
> if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
> super->uuid_tree_generation = root_item->generation;
> +
> + if (btrfs_fs_incompat(fs_info, REMAP_TREE)) {
> + root_item = &fs_info->remap_root->root_item;
> + super->remap_root = root_item->bytenr;
> + super->remap_root_generation = root_item->generation;
> + super->remap_root_level = root_item->level;
> + }
> }
>
> int btrfs_transaction_blocked(struct btrfs_fs_info *info)
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index 500e3a7df90b..89bcb80081a6 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -721,9 +721,12 @@ struct btrfs_super_block {
> __u8 metadata_uuid[BTRFS_FSID_SIZE];
>
> __u64 nr_global_roots;
> + __le64 remap_root;
> + __le64 remap_root_generation;
> + __u8 remap_root_level;
>
> /* Future expansion */
> - __le64 reserved[27];
> + __u8 reserved[199];
> __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
> struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
>
> --
> 2.49.1
>
next prev parent reply other threads:[~2025-10-15 3:56 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-09 11:27 [PATCH v3 00/17] Remap tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 01/17] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 02/17] btrfs: add REMAP chunk type Mark Harmstone
2025-10-15 3:37 ` Boris Burkov
2025-10-20 9:58 ` Mark Harmstone
2025-10-20 17:35 ` Boris Burkov
2025-10-09 11:27 ` [PATCH v3 03/17] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-10-15 3:47 ` Boris Burkov
2025-10-20 12:15 ` Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 04/17] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 05/17] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 06/17] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 07/17] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-10-15 3:55 ` Boris Burkov [this message]
2025-10-20 11:32 ` Mark Harmstone
2025-10-20 17:44 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 08/17] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-10-15 4:21 ` Boris Burkov
2025-10-20 14:31 ` Mark Harmstone
2025-10-20 17:44 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 09/17] btrfs: release BG lock before calling btrfs_link_bg_list() Mark Harmstone
2025-10-09 11:56 ` Filipe Manana
2025-10-09 14:58 ` Mark Harmstone
2025-10-09 15:16 ` Filipe Manana
2025-10-09 16:30 ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 10/17] btrfs: handle deletions from remapped block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 11/17] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 12/17] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 13/17] btrfs: replace identity maps with actual remaps when doing relocations Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 14/17] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 15/17] btrfs: allow balancing remap tree Mark Harmstone
2025-10-15 4:24 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 16/17] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-10-15 4:54 ` Boris Burkov
2025-10-23 17:35 ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 17/17] btrfs: add stripe removal pending flag Mark Harmstone
2025-10-15 5:05 ` Boris Burkov
2025-10-20 14:52 ` Mark Harmstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251015035548.GF1702774@zen.localdomain \
--to=boris@bur.io \
--cc=linux-btrfs@vger.kernel.org \
--cc=mark@harmstone.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox