From: Boris Burkov <boris@bur.io>
To: Mark Harmstone <mark@harmstone.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v3 02/17] btrfs: add REMAP chunk type
Date: Tue, 14 Oct 2025 20:37:07 -0700 [thread overview]
Message-ID: <20251015033707.GD1702774@zen.localdomain> (raw)
In-Reply-To: <20251009112814.13942-3-mark@harmstone.com>
On Thu, Oct 09, 2025 at 12:27:57PM +0100, Mark Harmstone wrote:
> Add a new REMAP chunk type, which is a metadata chunk that holds the
> remap tree.
>
> This is needed for bootstrapping purposes: the remap tree can't itself
> be remapped, and must be relocated the existing way, by COWing every
> leaf. The remap tree can't go in the SYSTEM chunk as space there is
> limited, because a copy of the chunk item gets placed in the superblock.
>
> The changes in fs/btrfs/volumes.h are because we're adding a new block
> group type bit after the profile bits, and so can no longer rely on the
> const_ilog2 trick.
>
> The sizing to 32MB per chunk, matching the SYSTEM chunk, is an estimate
> here, we can adjust it later if it proves to be too big or too small.
> This works out to be ~500,000 remap items, which for a 4KB block size
> covers ~2GB of remapped data in the worst case and ~500TB in the best case.
>
Asked a relatively inconsequential question about static asserts in
volumes.h. Otherwise,
Reviewed-by: Boris Burkov <boris@bur.io>
> Signed-off-by: Mark Harmstone <mark@harmstone.com>
> ---
> fs/btrfs/block-rsv.c | 8 ++++++++
> fs/btrfs/block-rsv.h | 1 +
> fs/btrfs/disk-io.c | 1 +
> fs/btrfs/fs.h | 2 ++
> fs/btrfs/space-info.c | 13 ++++++++++++-
> fs/btrfs/sysfs.c | 2 ++
> fs/btrfs/tree-checker.c | 13 +++++++++++--
> fs/btrfs/volumes.c | 3 +++
> fs/btrfs/volumes.h | 11 +++++++++--
> include/uapi/linux/btrfs_tree.h | 4 +++-
> 10 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
> index 5ad6de738aee..2678cd3bed29 100644
> --- a/fs/btrfs/block-rsv.c
> +++ b/fs/btrfs/block-rsv.c
> @@ -421,6 +421,9 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
> case BTRFS_TREE_LOG_OBJECTID:
> root->block_rsv = &fs_info->treelog_rsv;
> break;
> + case BTRFS_REMAP_TREE_OBJECTID:
> + root->block_rsv = &fs_info->remap_block_rsv;
> + break;
> default:
> root->block_rsv = NULL;
> break;
> @@ -434,6 +437,9 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
> space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
> fs_info->chunk_block_rsv.space_info = space_info;
>
> + space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_REMAP);
> + fs_info->remap_block_rsv.space_info = space_info;
> +
> space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
> fs_info->global_block_rsv.space_info = space_info;
> fs_info->trans_block_rsv.space_info = space_info;
> @@ -460,6 +466,8 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
> WARN_ON(fs_info->trans_block_rsv.reserved > 0);
> WARN_ON(fs_info->chunk_block_rsv.size > 0);
> WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
> + WARN_ON(fs_info->remap_block_rsv.size > 0);
> + WARN_ON(fs_info->remap_block_rsv.reserved > 0);
> WARN_ON(fs_info->delayed_block_rsv.size > 0);
> WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
> WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
> diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
> index 79ae9d05cd91..8359fb96bc3c 100644
> --- a/fs/btrfs/block-rsv.h
> +++ b/fs/btrfs/block-rsv.h
> @@ -22,6 +22,7 @@ enum btrfs_rsv_type {
> BTRFS_BLOCK_RSV_DELALLOC,
> BTRFS_BLOCK_RSV_TRANS,
> BTRFS_BLOCK_RSV_CHUNK,
> + BTRFS_BLOCK_RSV_REMAP,
> BTRFS_BLOCK_RSV_DELOPS,
> BTRFS_BLOCK_RSV_DELREFS,
> BTRFS_BLOCK_RSV_TREELOG,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index f475fb2272ac..102e5e0ad10c 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2815,6 +2815,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
> BTRFS_BLOCK_RSV_GLOBAL);
> btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
> btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
> + btrfs_init_block_rsv(&fs_info->remap_block_rsv, BTRFS_BLOCK_RSV_REMAP);
> btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG);
> btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
> btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index 814bbc9417d2..c4dba7e7ce5a 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -485,6 +485,8 @@ struct btrfs_fs_info {
> struct btrfs_block_rsv trans_block_rsv;
> /* Block reservation for chunk tree */
> struct btrfs_block_rsv chunk_block_rsv;
> + /* Block reservation for remap tree */
> + struct btrfs_block_rsv remap_block_rsv;
> /* Block reservation for delayed operations */
> struct btrfs_block_rsv delayed_block_rsv;
> /* Block reservation for delayed refs */
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 04a07d6f8537..22bc95f33a6f 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -215,7 +215,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
>
> if (flags & BTRFS_BLOCK_GROUP_DATA)
> return BTRFS_MAX_DATA_CHUNK_SIZE;
> - else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
> + else if (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_REMAP))
> return SZ_32M;
>
> /* Handle BTRFS_BLOCK_GROUP_METADATA */
> @@ -343,6 +343,8 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
> if (mixed) {
> flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
> ret = create_space_info(fs_info, flags);
> + if (ret)
> + goto out;
> } else {
> flags = BTRFS_BLOCK_GROUP_METADATA;
> ret = create_space_info(fs_info, flags);
> @@ -351,7 +353,15 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
>
> flags = BTRFS_BLOCK_GROUP_DATA;
> ret = create_space_info(fs_info, flags);
> + if (ret)
> + goto out;
> + }
> +
> + if (features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE) {
> + flags = BTRFS_BLOCK_GROUP_REMAP;
> + ret = create_space_info(fs_info, flags);
> }
> +
> out:
> return ret;
> }
> @@ -590,6 +600,7 @@ static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
> DUMP_BLOCK_RSV(fs_info, global_block_rsv);
> DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
> DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
> + DUMP_BLOCK_RSV(fs_info, remap_block_rsv);
> DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
> DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
> }
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 857d2772db1c..f942fde1d936 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1973,6 +1973,8 @@ static const char *alloc_name(struct btrfs_space_info *space_info)
> case BTRFS_BLOCK_GROUP_SYSTEM:
> ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
> return "system";
> + case BTRFS_BLOCK_GROUP_REMAP:
> + return "remap";
> default:
> WARN_ON(1);
> return "invalid-combination";
> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index 9c03aeb6f2c7..62f35338a555 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c
> @@ -748,17 +748,26 @@ static int check_block_group_item(struct extent_buffer *leaf,
> return -EUCLEAN;
> }
>
> + if (flags & BTRFS_BLOCK_GROUP_REMAP &&
> + !btrfs_fs_incompat(fs_info, REMAP_TREE)) {
> + block_group_err(leaf, slot,
> +"invalid flags, have 0x%llx (REMAP flag set) but no remap-tree incompat flag",
> + flags);
> + return -EUCLEAN;
> + }
> +
> type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
> if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
> type != BTRFS_BLOCK_GROUP_METADATA &&
> type != BTRFS_BLOCK_GROUP_SYSTEM &&
> + type != BTRFS_BLOCK_GROUP_REMAP &&
> type != (BTRFS_BLOCK_GROUP_METADATA |
> BTRFS_BLOCK_GROUP_DATA))) {
> block_group_err(leaf, slot,
> -"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
> +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx, 0x%llx or 0x%llx",
> type, hweight64(type),
> BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
> - BTRFS_BLOCK_GROUP_SYSTEM,
> + BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_REMAP,
> BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
> return -EUCLEAN;
> }
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2e144ebd56ac..e14b86e2996b 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -231,6 +231,9 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
> DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
> DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
> DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
> + /* block groups containing the remap tree */
> + DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_REMAP, "remap");
> + /* block group that has been remapped */
> DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_REMAPPED, "remapped");
>
> DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 2cbf8080eade..b0fa8eb38060 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -58,8 +58,6 @@ static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
> */
> static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
> const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
> -static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
> - ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
>
> /* ilog2() can handle both constants and variables */
> #define BTRFS_BG_FLAG_TO_INDEX(profile) \
> @@ -81,6 +79,15 @@ enum btrfs_raid_types {
> BTRFS_NR_RAID_TYPES
> };
>
> +static_assert(BTRFS_RAID_RAID0 == 1);
> +static_assert(BTRFS_RAID_RAID1 == 2);
> +static_assert(BTRFS_RAID_DUP == 3);
> +static_assert(BTRFS_RAID_RAID10 == 4);
> +static_assert(BTRFS_RAID_RAID5 == 5);
> +static_assert(BTRFS_RAID_RAID6 == 6);
> +static_assert(BTRFS_RAID_RAID1C3 == 7);
> +static_assert(BTRFS_RAID_RAID1C4 == 8);
> +
What is the significance of these static_assert changes?
> /*
> * Use sequence counter to get consistent device stat data on
> * 32-bit processors.
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index 4439d77a7252..9a36f0206d90 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -1169,12 +1169,14 @@ struct btrfs_dev_replace_item {
> #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
> #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
> #define BTRFS_BLOCK_GROUP_REMAPPED (1ULL << 11)
> +#define BTRFS_BLOCK_GROUP_REMAP (1ULL << 12)
> #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
> BTRFS_SPACE_INFO_GLOBAL_RSV)
>
> #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
> BTRFS_BLOCK_GROUP_SYSTEM | \
> - BTRFS_BLOCK_GROUP_METADATA)
> + BTRFS_BLOCK_GROUP_METADATA | \
> + BTRFS_BLOCK_GROUP_REMAP)
>
> #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
> BTRFS_BLOCK_GROUP_RAID1 | \
> --
> 2.49.1
>
next prev parent reply other threads:[~2025-10-15 3:37 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-09 11:27 [PATCH v3 00/17] Remap tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 01/17] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 02/17] btrfs: add REMAP chunk type Mark Harmstone
2025-10-15 3:37 ` Boris Burkov [this message]
2025-10-20 9:58 ` Mark Harmstone
2025-10-20 17:35 ` Boris Burkov
2025-10-09 11:27 ` [PATCH v3 03/17] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-10-15 3:47 ` Boris Burkov
2025-10-20 12:15 ` Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 04/17] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 05/17] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 06/17] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 07/17] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-10-15 3:55 ` Boris Burkov
2025-10-20 11:32 ` Mark Harmstone
2025-10-20 17:44 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 08/17] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-10-15 4:21 ` Boris Burkov
2025-10-20 14:31 ` Mark Harmstone
2025-10-20 17:44 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 09/17] btrfs: release BG lock before calling btrfs_link_bg_list() Mark Harmstone
2025-10-09 11:56 ` Filipe Manana
2025-10-09 14:58 ` Mark Harmstone
2025-10-09 15:16 ` Filipe Manana
2025-10-09 16:30 ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 10/17] btrfs: handle deletions from remapped block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 11/17] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 12/17] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 13/17] btrfs: replace identity maps with actual remaps when doing relocations Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 14/17] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 15/17] btrfs: allow balancing remap tree Mark Harmstone
2025-10-15 4:24 ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 16/17] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-10-15 4:54 ` Boris Burkov
2025-10-23 17:35 ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 17/17] btrfs: add stripe removal pending flag Mark Harmstone
2025-10-15 5:05 ` Boris Burkov
2025-10-20 14:52 ` Mark Harmstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251015033707.GD1702774@zen.localdomain \
--to=boris@bur.io \
--cc=linux-btrfs@vger.kernel.org \
--cc=mark@harmstone.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox