Linux Btrfs filesystem development
 help / color / mirror / Atom feed
From: Boris Burkov <boris@bur.io>
To: Mark Harmstone <mark@harmstone.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v3 02/17] btrfs: add REMAP chunk type
Date: Tue, 14 Oct 2025 20:37:07 -0700	[thread overview]
Message-ID: <20251015033707.GD1702774@zen.localdomain> (raw)
In-Reply-To: <20251009112814.13942-3-mark@harmstone.com>

On Thu, Oct 09, 2025 at 12:27:57PM +0100, Mark Harmstone wrote:
> Add a new REMAP chunk type, which is a metadata chunk that holds the
> remap tree.
> 
> This is needed for bootstrapping purposes: the remap tree can't itself
> be remapped, and must be relocated the existing way, by COWing every
> leaf. The remap tree can't go in the SYSTEM chunk as space there is
> limited, because a copy of the chunk item gets placed in the superblock.
> 
> The changes in fs/btrfs/volumes.h are because we're adding a new block
> group type bit after the profile bits, and so can no longer rely on the
> const_ilog2 trick.
> 
> The sizing to 32MB per chunk, matching the SYSTEM chunk, is an estimate
> here, we can adjust it later if it proves to be too big or too small.
> This works out to be ~500,000 remap items, which for a 4KB block size
> covers ~2GB of remapped data in the worst case and ~500TB in the best case.
> 

Asked a relatively inconsequential question about static asserts in
volumes.h. Otherwise,

Reviewed-by: Boris Burkov <boris@bur.io>

> Signed-off-by: Mark Harmstone <mark@harmstone.com>
> ---
>  fs/btrfs/block-rsv.c            |  8 ++++++++
>  fs/btrfs/block-rsv.h            |  1 +
>  fs/btrfs/disk-io.c              |  1 +
>  fs/btrfs/fs.h                   |  2 ++
>  fs/btrfs/space-info.c           | 13 ++++++++++++-
>  fs/btrfs/sysfs.c                |  2 ++
>  fs/btrfs/tree-checker.c         | 13 +++++++++++--
>  fs/btrfs/volumes.c              |  3 +++
>  fs/btrfs/volumes.h              | 11 +++++++++--
>  include/uapi/linux/btrfs_tree.h |  4 +++-
>  10 files changed, 52 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
> index 5ad6de738aee..2678cd3bed29 100644
> --- a/fs/btrfs/block-rsv.c
> +++ b/fs/btrfs/block-rsv.c
> @@ -421,6 +421,9 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
>  	case BTRFS_TREE_LOG_OBJECTID:
>  		root->block_rsv = &fs_info->treelog_rsv;
>  		break;
> +	case BTRFS_REMAP_TREE_OBJECTID:
> +		root->block_rsv = &fs_info->remap_block_rsv;
> +		break;
>  	default:
>  		root->block_rsv = NULL;
>  		break;
> @@ -434,6 +437,9 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
>  	space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
>  	fs_info->chunk_block_rsv.space_info = space_info;
>  
> +	space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_REMAP);
> +	fs_info->remap_block_rsv.space_info = space_info;
> +
>  	space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
>  	fs_info->global_block_rsv.space_info = space_info;
>  	fs_info->trans_block_rsv.space_info = space_info;
> @@ -460,6 +466,8 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
>  	WARN_ON(fs_info->trans_block_rsv.reserved > 0);
>  	WARN_ON(fs_info->chunk_block_rsv.size > 0);
>  	WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
> +	WARN_ON(fs_info->remap_block_rsv.size > 0);
> +	WARN_ON(fs_info->remap_block_rsv.reserved > 0);
>  	WARN_ON(fs_info->delayed_block_rsv.size > 0);
>  	WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
>  	WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
> diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
> index 79ae9d05cd91..8359fb96bc3c 100644
> --- a/fs/btrfs/block-rsv.h
> +++ b/fs/btrfs/block-rsv.h
> @@ -22,6 +22,7 @@ enum btrfs_rsv_type {
>  	BTRFS_BLOCK_RSV_DELALLOC,
>  	BTRFS_BLOCK_RSV_TRANS,
>  	BTRFS_BLOCK_RSV_CHUNK,
> +	BTRFS_BLOCK_RSV_REMAP,
>  	BTRFS_BLOCK_RSV_DELOPS,
>  	BTRFS_BLOCK_RSV_DELREFS,
>  	BTRFS_BLOCK_RSV_TREELOG,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index f475fb2272ac..102e5e0ad10c 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2815,6 +2815,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
>  			     BTRFS_BLOCK_RSV_GLOBAL);
>  	btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
>  	btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
> +	btrfs_init_block_rsv(&fs_info->remap_block_rsv, BTRFS_BLOCK_RSV_REMAP);
>  	btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG);
>  	btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
>  	btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index 814bbc9417d2..c4dba7e7ce5a 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -485,6 +485,8 @@ struct btrfs_fs_info {
>  	struct btrfs_block_rsv trans_block_rsv;
>  	/* Block reservation for chunk tree */
>  	struct btrfs_block_rsv chunk_block_rsv;
> +	/* Block reservation for remap tree */
> +	struct btrfs_block_rsv remap_block_rsv;
>  	/* Block reservation for delayed operations */
>  	struct btrfs_block_rsv delayed_block_rsv;
>  	/* Block reservation for delayed refs */
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 04a07d6f8537..22bc95f33a6f 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -215,7 +215,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
>  
>  	if (flags & BTRFS_BLOCK_GROUP_DATA)
>  		return BTRFS_MAX_DATA_CHUNK_SIZE;
> -	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
> +	else if (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_REMAP))
>  		return SZ_32M;
>  
>  	/* Handle BTRFS_BLOCK_GROUP_METADATA */
> @@ -343,6 +343,8 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
>  	if (mixed) {
>  		flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
>  		ret = create_space_info(fs_info, flags);
> +		if (ret)
> +			goto out;
>  	} else {
>  		flags = BTRFS_BLOCK_GROUP_METADATA;
>  		ret = create_space_info(fs_info, flags);
> @@ -351,7 +353,15 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
>  
>  		flags = BTRFS_BLOCK_GROUP_DATA;
>  		ret = create_space_info(fs_info, flags);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	if (features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE) {
> +		flags = BTRFS_BLOCK_GROUP_REMAP;
> +		ret = create_space_info(fs_info, flags);
>  	}
> +
>  out:
>  	return ret;
>  }
> @@ -590,6 +600,7 @@ static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
>  	DUMP_BLOCK_RSV(fs_info, global_block_rsv);
>  	DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
>  	DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
> +	DUMP_BLOCK_RSV(fs_info, remap_block_rsv);
>  	DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
>  	DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
>  }
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 857d2772db1c..f942fde1d936 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1973,6 +1973,8 @@ static const char *alloc_name(struct btrfs_space_info *space_info)
>  	case BTRFS_BLOCK_GROUP_SYSTEM:
>  		ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
>  		return "system";
> +	case BTRFS_BLOCK_GROUP_REMAP:
> +		return "remap";
>  	default:
>  		WARN_ON(1);
>  		return "invalid-combination";
> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index 9c03aeb6f2c7..62f35338a555 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c
> @@ -748,17 +748,26 @@ static int check_block_group_item(struct extent_buffer *leaf,
>  		return -EUCLEAN;
>  	}
>  
> +	if (flags & BTRFS_BLOCK_GROUP_REMAP &&
> +		!btrfs_fs_incompat(fs_info, REMAP_TREE)) {
> +		block_group_err(leaf, slot,
> +"invalid flags, have 0x%llx (REMAP flag set) but no remap-tree incompat flag",
> +				flags);
> +		return -EUCLEAN;
> +	}
> +
>  	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
>  	if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
>  		     type != BTRFS_BLOCK_GROUP_METADATA &&
>  		     type != BTRFS_BLOCK_GROUP_SYSTEM &&
> +		     type != BTRFS_BLOCK_GROUP_REMAP &&
>  		     type != (BTRFS_BLOCK_GROUP_METADATA |
>  			      BTRFS_BLOCK_GROUP_DATA))) {
>  		block_group_err(leaf, slot,
> -"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
> +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx, 0x%llx or 0x%llx",
>  			type, hweight64(type),
>  			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
> -			BTRFS_BLOCK_GROUP_SYSTEM,
> +			BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_REMAP,
>  			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
>  		return -EUCLEAN;
>  	}
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2e144ebd56ac..e14b86e2996b 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -231,6 +231,9 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
>  	DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
>  	DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
>  	DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
> +	/* block groups containing the remap tree */
> +	DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_REMAP, "remap");
> +	/* block group that has been remapped */
>  	DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_REMAPPED, "remapped");
>  
>  	DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 2cbf8080eade..b0fa8eb38060 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -58,8 +58,6 @@ static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
>   */
>  static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
>  	      const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
> -static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
> -	      ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
>  
>  /* ilog2() can handle both constants and variables */
>  #define BTRFS_BG_FLAG_TO_INDEX(profile)					\
> @@ -81,6 +79,15 @@ enum btrfs_raid_types {
>  	BTRFS_NR_RAID_TYPES
>  };
>  
> +static_assert(BTRFS_RAID_RAID0 == 1);
> +static_assert(BTRFS_RAID_RAID1 == 2);
> +static_assert(BTRFS_RAID_DUP == 3);
> +static_assert(BTRFS_RAID_RAID10 == 4);
> +static_assert(BTRFS_RAID_RAID5 == 5);
> +static_assert(BTRFS_RAID_RAID6 == 6);
> +static_assert(BTRFS_RAID_RAID1C3 == 7);
> +static_assert(BTRFS_RAID_RAID1C4 == 8);
> +

What is the significance of these static_assert changes?

>  /*
>   * Use sequence counter to get consistent device stat data on
>   * 32-bit processors.
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index 4439d77a7252..9a36f0206d90 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -1169,12 +1169,14 @@ struct btrfs_dev_replace_item {
>  #define BTRFS_BLOCK_GROUP_RAID1C3       (1ULL << 9)
>  #define BTRFS_BLOCK_GROUP_RAID1C4       (1ULL << 10)
>  #define BTRFS_BLOCK_GROUP_REMAPPED      (1ULL << 11)
> +#define BTRFS_BLOCK_GROUP_REMAP         (1ULL << 12)
>  #define BTRFS_BLOCK_GROUP_RESERVED	(BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
>  					 BTRFS_SPACE_INFO_GLOBAL_RSV)
>  
>  #define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
>  					 BTRFS_BLOCK_GROUP_SYSTEM |  \
> -					 BTRFS_BLOCK_GROUP_METADATA)
> +					 BTRFS_BLOCK_GROUP_METADATA | \
> +					 BTRFS_BLOCK_GROUP_REMAP)
>  
>  #define BTRFS_BLOCK_GROUP_PROFILE_MASK	(BTRFS_BLOCK_GROUP_RAID0 |   \
>  					 BTRFS_BLOCK_GROUP_RAID1 |   \
> -- 
> 2.49.1
> 

  reply	other threads:[~2025-10-15  3:37 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-09 11:27 [PATCH v3 00/17] Remap tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 01/17] btrfs: add definitions and constants for remap-tree Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 02/17] btrfs: add REMAP chunk type Mark Harmstone
2025-10-15  3:37   ` Boris Burkov [this message]
2025-10-20  9:58     ` Mark Harmstone
2025-10-20 17:35       ` Boris Burkov
2025-10-09 11:27 ` [PATCH v3 03/17] btrfs: allow remapped chunks to have zero stripes Mark Harmstone
2025-10-15  3:47   ` Boris Burkov
2025-10-20 12:15     ` Mark Harmstone
2025-10-09 11:27 ` [PATCH v3 04/17] btrfs: remove remapped block groups from the free-space tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 05/17] btrfs: don't add metadata items for the remap tree to the extent tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 06/17] btrfs: add extended version of struct block_group_item Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 07/17] btrfs: allow mounting filesystems with remap-tree incompat flag Mark Harmstone
2025-10-15  3:55   ` Boris Burkov
2025-10-20 11:32     ` Mark Harmstone
2025-10-20 17:44       ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 08/17] btrfs: redirect I/O for remapped block groups Mark Harmstone
2025-10-15  4:21   ` Boris Burkov
2025-10-20 14:31     ` Mark Harmstone
2025-10-20 17:44       ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 09/17] btrfs: release BG lock before calling btrfs_link_bg_list() Mark Harmstone
2025-10-09 11:56   ` Filipe Manana
2025-10-09 14:58     ` Mark Harmstone
2025-10-09 15:16       ` Filipe Manana
2025-10-09 16:30         ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 10/17] btrfs: handle deletions from remapped block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 11/17] btrfs: handle setting up relocation of block group with remap-tree Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 12/17] btrfs: move existing remaps before relocating block group Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 13/17] btrfs: replace identity maps with actual remaps when doing relocations Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 14/17] btrfs: add do_remap param to btrfs_discard_extent() Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 15/17] btrfs: allow balancing remap tree Mark Harmstone
2025-10-15  4:24   ` Boris Burkov
2025-10-09 11:28 ` [PATCH v3 16/17] btrfs: handle discarding fully-remapped block groups Mark Harmstone
2025-10-15  4:54   ` Boris Burkov
2025-10-23 17:35     ` Mark Harmstone
2025-10-09 11:28 ` [PATCH v3 17/17] btrfs: add stripe removal pending flag Mark Harmstone
2025-10-15  5:05   ` Boris Burkov
2025-10-20 14:52     ` Mark Harmstone

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251015033707.GD1702774@zen.localdomain \
    --to=boris@bur.io \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=mark@harmstone.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox