linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree
@ 2023-09-14 16:05 Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 1/6] btrfs-progs: add raid-stripe-tree definitions Johannes Thumshirn
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

This series adds support for the RAID stripe tree to btrfs-progs.

RST is hidden behind the --enable-experimental config option.

This series survived 'make test' with and without experimental enabled.

---
Changes in v4:
- Adopt to on-disk format changes
- Link to v3: https://lore.kernel.org/r/20230911-raid-stripe-tree-v1-0-c8337f7444b5@wdc.com

---
Johannes Thumshirn (6):
      btrfs-progs: add raid-stripe-tree definitions
      btrfs-progs: read fs with stripe tree from disk
      btrfs-progs: add dump tree support for the raid stripe tree
      btrfs-progs: allow zoned RAID
      btrfs-progs: load zone info for all zoned devices
      btrfs-progs: read stripe tree when mapping blocks

 cmds/inspect-dump-tree.c        |   5 ++
 common/fsfeatures.c             |   8 +++
 kernel-shared/accessors.h       |  37 +++++++++++++
 kernel-shared/ctree.h           |   9 +++-
 kernel-shared/disk-io.c         |  28 +++++++++-
 kernel-shared/print-tree.c      |  53 ++++++++++++++++++
 kernel-shared/uapi/btrfs.h      |   1 +
 kernel-shared/uapi/btrfs_tree.h |  29 ++++++++++
 kernel-shared/volumes.c         | 116 ++++++++++++++++++++++++++++++++++++++--
 kernel-shared/zoned.c           |  34 ++++++++++--
 kernel-shared/zoned.h           |   4 +-
 mkfs/main.c                     |  83 ++++++++++++++++++++++++++--
 12 files changed, 393 insertions(+), 14 deletions(-)
---
base-commit: aa49b7cfbbe55f9f7fd7f240bdaf960f722f0148
change-id: 20230613-raid-stripe-tree-6b64ad651c0a

Best regards,
-- 
Johannes Thumshirn <johannes.thumshirn@wdc.com>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v4 1/6] btrfs-progs: add raid-stripe-tree definitions
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
@ 2023-09-14 16:05 ` Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 2/6] btrfs-progs: read fs with stripe tree from disk Johannes Thumshirn
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

Add the definitions for the on-disk format of the RAID stripe tree.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 kernel-shared/accessors.h       | 37 +++++++++++++++++++++++++++++++++++++
 kernel-shared/uapi/btrfs_tree.h | 26 ++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/kernel-shared/accessors.h b/kernel-shared/accessors.h
index 625acfbe8ca7..80635f31c90b 100644
--- a/kernel-shared/accessors.h
+++ b/kernel-shared/accessors.h
@@ -270,6 +270,43 @@ BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
 		   extent_count, 32);
 BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
 
+/* struct btrfs_stripe_extent */
+BTRFS_SETGET_FUNCS(stripe_extent_encoding, struct btrfs_stripe_extent, encoding, 8);
+BTRFS_SETGET_FUNCS(raid_stride_devid, struct btrfs_raid_stride, devid, 64);
+BTRFS_SETGET_FUNCS(raid_stride_offset, struct btrfs_raid_stride, offset, 64);
+BTRFS_SETGET_FUNCS(raid_stride_length, struct btrfs_raid_stride, length, 64);
+
+static inline struct btrfs_raid_stride *btrfs_raid_stride_nr(
+						 struct btrfs_stripe_extent *dps,
+						 int nr)
+{
+	unsigned long offset = (unsigned long)dps;
+	offset += offsetof(struct btrfs_stripe_extent, strides);
+	offset += nr * sizeof(struct btrfs_raid_stride);
+	return (struct btrfs_raid_stride *)offset;
+}
+
+static inline u64 btrfs_raid_stride_devid_nr(struct extent_buffer *eb,
+					       struct btrfs_stripe_extent *dps,
+					       int nr)
+{
+	return btrfs_raid_stride_devid(eb, btrfs_raid_stride_nr(dps, nr));
+}
+
+static inline u64 btrfs_raid_stride_offset_nr(struct extent_buffer *eb,
+						struct btrfs_stripe_extent *dps,
+						int nr)
+{
+	return btrfs_raid_stride_offset(eb, btrfs_raid_stride_nr(dps, nr));
+}
+
+static inline u64 btrfs_raid_stride_length_nr(struct extent_buffer *eb,
+						struct btrfs_stripe_extent *dps,
+						int nr)
+{
+	return btrfs_raid_stride_length(eb, btrfs_raid_stride_nr(dps, nr));
+}
+
 /* struct btrfs_inode_ref */
 BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
 BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
diff --git a/kernel-shared/uapi/btrfs_tree.h b/kernel-shared/uapi/btrfs_tree.h
index ad555e7055ab..73411a8697ce 100644
--- a/kernel-shared/uapi/btrfs_tree.h
+++ b/kernel-shared/uapi/btrfs_tree.h
@@ -699,6 +699,32 @@ struct btrfs_super_block {
 	__u8 padding[565];
 } __attribute__ ((__packed__));
 
+struct btrfs_raid_stride {
+	/* btrfs device-id this raid extent  lives on */
+	__le64 devid;
+	/* offset from  the devextent start */
+	__le64 offset;
+	/* length of the stride on disk */
+	__le64 length;
+} __attribute__ ((__packed__));
+
+/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types */
+#define BTRFS_STRIPE_RAID0	1
+#define BTRFS_STRIPE_RAID1	2
+#define BTRFS_STRIPE_DUP	3
+#define BTRFS_STRIPE_RAID10	4
+#define BTRFS_STRIPE_RAID5	5
+#define BTRFS_STRIPE_RAID6	6
+#define BTRFS_STRIPE_RAID1C3	7
+#define BTRFS_STRIPE_RAID1C4	8
+
+struct btrfs_stripe_extent {
+	u8 encoding;
+	u8 reserved[7];
+	/* array of raid strides this stripe is comprised of */
+	struct btrfs_raid_stride strides;
+} __attribute__ ((__packed__));
+
 #define BTRFS_FREE_SPACE_EXTENT	1
 #define BTRFS_FREE_SPACE_BITMAP	2
 

-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 2/6] btrfs-progs: read fs with stripe tree from disk
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 1/6] btrfs-progs: add raid-stripe-tree definitions Johannes Thumshirn
@ 2023-09-14 16:05 ` Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 3/6] btrfs-progs: add dump tree support for the raid stripe tree Johannes Thumshirn
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

When encountering a filesystem formatted with the raid stripe tree
feature, read it from disk.

Also add the incompat declaration to the tree printer.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 kernel-shared/ctree.h           |  1 +
 kernel-shared/disk-io.c         | 28 +++++++++++++++++++++++++++-
 kernel-shared/print-tree.c      |  1 +
 kernel-shared/uapi/btrfs.h      |  1 +
 kernel-shared/uapi/btrfs_tree.h |  3 +++
 5 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h
index 5d3392ae82a6..035358436d8f 100644
--- a/kernel-shared/ctree.h
+++ b/kernel-shared/ctree.h
@@ -298,6 +298,7 @@ struct btrfs_fs_info {
 	struct btrfs_root *quota_root;
 	struct btrfs_root *uuid_root;
 	struct btrfs_root *block_group_root;
+	struct btrfs_root *stripe_root;
 
 	struct rb_root global_roots_tree;
 	struct rb_root fs_root_tree;
diff --git a/kernel-shared/disk-io.c b/kernel-shared/disk-io.c
index 442d3af8bc01..3ab32f7ad910 100644
--- a/kernel-shared/disk-io.c
+++ b/kernel-shared/disk-io.c
@@ -790,6 +790,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
 		return fs_info->block_group_root ? fs_info->block_group_root :
 						ERR_PTR(-ENOENT);
 
+	if (location->objectid == BTRFS_RAID_STRIPE_TREE_OBJECTID)
+		return fs_info->stripe_root ? fs_info->stripe_root :
+			ERR_PTR(-ENOENT);
+
 	BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
 
 	node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
@@ -822,6 +826,9 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 	if (fs_info->quota_root)
 		free(fs_info->quota_root);
 
+	if (fs_info->stripe_root)
+		free(fs_info->stripe_root);
+
 	free_global_roots_tree(&fs_info->global_roots_tree);
 	free(fs_info->tree_root);
 	free(fs_info->chunk_root);
@@ -846,12 +853,14 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
 	fs_info->dev_root = calloc(1, sizeof(struct btrfs_root));
 	fs_info->quota_root = calloc(1, sizeof(struct btrfs_root));
 	fs_info->uuid_root = calloc(1, sizeof(struct btrfs_root));
+	fs_info->stripe_root = calloc(1, sizeof(struct btrfs_root));
 	fs_info->block_group_root = calloc(1, sizeof(struct btrfs_root));
 	fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
 
 	if (!fs_info->tree_root || !fs_info->chunk_root || !fs_info->dev_root ||
 	    !fs_info->quota_root || !fs_info->uuid_root ||
-	    !fs_info->block_group_root || !fs_info->super_copy)
+	    !fs_info->block_group_root || !fs_info->super_copy ||
+	    !fs_info->stripe_root)
 		goto free_all;
 
 	extent_buffer_init_cache(fs_info);
@@ -1260,6 +1269,21 @@ int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info, u64 root_tree_bytenr,
 			return -EIO;
 	}
 
+#if EXPERIMENTAL
+	if (btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE)) {
+		ret = btrfs_find_and_setup_root(root, fs_info,
+						BTRFS_RAID_STRIPE_TREE_OBJECTID,
+						fs_info->stripe_root);
+		if (ret) {
+			free(fs_info->stripe_root);
+			fs_info->stripe_root = NULL;
+		} else {
+			set_bit(BTRFS_ROOT_TRACK_DIRTY,
+				&fs_info->stripe_root->state);
+		}
+	}
+#endif
+
 	if (maybe_load_block_groups(fs_info, flags)) {
 		ret = btrfs_read_block_groups(fs_info);
 		/*
@@ -1317,6 +1341,8 @@ void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
 		free_extent_buffer(fs_info->chunk_root->node);
 	if (fs_info->uuid_root)
 		free_extent_buffer(fs_info->uuid_root->node);
+	if (fs_info->stripe_root)
+		free_extent_buffer(fs_info->stripe_root->node);
 }
 
 static void free_map_lookup(struct cache_extent *ce)
diff --git a/kernel-shared/print-tree.c b/kernel-shared/print-tree.c
index 0f7f7b72f96a..3eff82b364ef 100644
--- a/kernel-shared/print-tree.c
+++ b/kernel-shared/print-tree.c
@@ -1708,6 +1708,7 @@ static struct readable_flag_entry incompat_flags_array[] = {
 	DEF_INCOMPAT_FLAG_ENTRY(RAID1C34),
 	DEF_INCOMPAT_FLAG_ENTRY(ZONED),
 	DEF_INCOMPAT_FLAG_ENTRY(EXTENT_TREE_V2),
+	DEF_INCOMPAT_FLAG_ENTRY(RAID_STRIPE_TREE),
 };
 static const int incompat_flags_num = sizeof(incompat_flags_array) /
 				      sizeof(struct readable_flag_entry);
diff --git a/kernel-shared/uapi/btrfs.h b/kernel-shared/uapi/btrfs.h
index 85b04f89a2a9..c750b7aa921f 100644
--- a/kernel-shared/uapi/btrfs.h
+++ b/kernel-shared/uapi/btrfs.h
@@ -356,6 +356,7 @@ _static_assert(sizeof(struct btrfs_ioctl_fs_info_args) == 1024);
 #define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
 #define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
 #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2	(1ULL << 13)
+#define BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE (1ULL << 14)
 
 struct btrfs_ioctl_feature_flags {
 	__u64 compat_flags;
diff --git a/kernel-shared/uapi/btrfs_tree.h b/kernel-shared/uapi/btrfs_tree.h
index 73411a8697ce..b76b1660f38b 100644
--- a/kernel-shared/uapi/btrfs_tree.h
+++ b/kernel-shared/uapi/btrfs_tree.h
@@ -73,6 +73,9 @@
 /* Holds the block group items for extent tree v2. */
 #define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
 
+/* holds raid stripe entries */
+#define BTRFS_RAID_STRIPE_TREE_OBJECTID 12ULL
+
 /* device stats in the device tree */
 #define BTRFS_DEV_STATS_OBJECTID 0ULL
 

-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 3/6] btrfs-progs: add dump tree support for the raid stripe tree
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 1/6] btrfs-progs: add raid-stripe-tree definitions Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 2/6] btrfs-progs: read fs with stripe tree from disk Johannes Thumshirn
@ 2023-09-14 16:05 ` Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 4/6] btrfs-progs: allow zoned RAID Johannes Thumshirn
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

Add support for the RAID stripe tree to btrfs inspect-internal dump-tree.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 cmds/inspect-dump-tree.c   |  5 +++++
 kernel-shared/ctree.h      |  5 +++++
 kernel-shared/print-tree.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)

diff --git a/cmds/inspect-dump-tree.c b/cmds/inspect-dump-tree.c
index bfc0fff148dd..328ecd76c8a0 100644
--- a/cmds/inspect-dump-tree.c
+++ b/cmds/inspect-dump-tree.c
@@ -170,6 +170,7 @@ static u64 treeid_from_string(const char *str, const char **end)
 		{ "TREE_RELOC", BTRFS_TREE_RELOC_OBJECTID },
 		{ "DATA_RELOC", BTRFS_DATA_RELOC_TREE_OBJECTID },
 		{ "BLOCK_GROUP_TREE", BTRFS_BLOCK_GROUP_TREE_OBJECTID },
+		{ "RAID_STRIPE", BTRFS_RAID_STRIPE_TREE_OBJECTID },
 	};
 
 	if (strncasecmp("BTRFS_", str, strlen("BTRFS_")) == 0)
@@ -729,6 +730,10 @@ again:
 				if (!skip)
 					pr_verbose(LOG_DEFAULT, "block group");
 				break;
+			case BTRFS_RAID_STRIPE_TREE_OBJECTID:
+				if (!skip)
+					printf("raid stripe");
+				break;
 			default:
 				if (!skip) {
 					pr_verbose(LOG_DEFAULT, "file");
diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h
index 035358436d8f..de09c15ca0eb 100644
--- a/kernel-shared/ctree.h
+++ b/kernel-shared/ctree.h
@@ -556,6 +556,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
  */
 #define BTRFS_EXTENT_DATA_KEY	108
 
+
 /*
  * csum items have the checksums for data in the extents
  */
@@ -640,6 +641,8 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
 #define BTRFS_DEV_ITEM_KEY	216
 #define BTRFS_CHUNK_ITEM_KEY	228
 
+#define BTRFS_RAID_STRIPE_KEY	230
+
 #define BTRFS_BALANCE_ITEM_KEY	248
 
 /*
@@ -650,6 +653,8 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
 #define BTRFS_QGROUP_LIMIT_KEY		244
 #define BTRFS_QGROUP_RELATION_KEY	246
 
+
+
 /*
  * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
  */
diff --git a/kernel-shared/print-tree.c b/kernel-shared/print-tree.c
index 3eff82b364ef..d719ebaebb40 100644
--- a/kernel-shared/print-tree.c
+++ b/kernel-shared/print-tree.c
@@ -637,6 +637,51 @@ static void print_free_space_header(struct extent_buffer *leaf, int slot)
 	       (unsigned long long)btrfs_free_space_bitmaps(leaf, header));
 }
 
+struct raid_encoding_map {
+	u8 encoding;
+	char name[16];
+};
+
+static const struct raid_encoding_map raid_map[] = {
+	{ BTRFS_STRIPE_DUP,	"DUP" },
+	{ BTRFS_STRIPE_RAID0,	"RAID0" },
+	{ BTRFS_STRIPE_RAID1,	"RAID1" },
+	{ BTRFS_STRIPE_RAID1C3,	"RAID1C3" },
+	{ BTRFS_STRIPE_RAID1C4, "RAID1C4" },
+	{ BTRFS_STRIPE_RAID5,	"RAID5" },
+	{ BTRFS_STRIPE_RAID6,	"RAID6" },
+	{ BTRFS_STRIPE_RAID10,	"RAID10" }
+};
+
+static const char *stripe_encoding_name(u8 encoding)
+{
+	for (int i = 0; i < ARRAY_SIZE(raid_map); i++) {
+		if (raid_map[i].encoding == encoding)
+			return raid_map[i].name;
+	}
+
+	return "UNKNOWN";
+}
+
+static void print_raid_stripe_key(struct extent_buffer *eb,
+				  u32 item_size,
+				  struct btrfs_stripe_extent *stripe)
+{
+	int num_stripes;
+	u8 encoding = btrfs_stripe_extent_encoding(eb, stripe);
+	int i;
+
+	num_stripes = (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
+		sizeof(struct btrfs_raid_stride);
+
+	printf("\t\t\tencoding: %s\n", stripe_encoding_name(encoding));
+	for (i = 0; i < num_stripes; i++)
+		printf("\t\t\tstripe %d devid %llu physical %llu length %llu\n", i,
+		       (unsigned long long)btrfs_raid_stride_devid_nr(eb, stripe, i),
+		       (unsigned long long)btrfs_raid_stride_offset_nr(eb, stripe, i),
+		       (unsigned long long)btrfs_raid_stride_length_nr(eb, stripe, i));
+}
+
 void print_key_type(FILE *stream, u64 objectid, u8 type)
 {
 	static const char* key_to_str[256] = {
@@ -681,6 +726,7 @@ void print_key_type(FILE *stream, u64 objectid, u8 type)
 		[BTRFS_PERSISTENT_ITEM_KEY]	= "PERSISTENT_ITEM",
 		[BTRFS_UUID_KEY_SUBVOL]		= "UUID_KEY_SUBVOL",
 		[BTRFS_UUID_KEY_RECEIVED_SUBVOL] = "UUID_KEY_RECEIVED_SUBVOL",
+		[BTRFS_RAID_STRIPE_KEY]		= "RAID_STRIPE_KEY",
 	};
 
 	if (type == 0 && objectid == BTRFS_FREE_SPACE_OBJECTID) {
@@ -793,6 +839,9 @@ void print_objectid(FILE *stream, u64 objectid, u8 type)
 	case BTRFS_CSUM_CHANGE_OBJECTID:
 		fprintf(stream, "CSUM_CHANGE");
 		break;
+	case  BTRFS_RAID_STRIPE_TREE_OBJECTID:
+		fprintf(stream, "RAID_STRIPE_TREE");
+		break;
 	case (u64)-1:
 		fprintf(stream, "-1");
 		break;
@@ -1469,6 +1518,9 @@ void btrfs_print_leaf(struct extent_buffer *eb, unsigned int mode)
 		case BTRFS_TEMPORARY_ITEM_KEY:
 			print_temporary_item(eb, ptr, objectid, offset);
 			break;
+		case BTRFS_RAID_STRIPE_KEY:
+			print_raid_stripe_key(eb, item_size, ptr);
+			break;
 		};
 		fflush(stdout);
 	}

-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 4/6] btrfs-progs: allow zoned RAID
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
                   ` (2 preceding siblings ...)
  2023-09-14 16:05 ` [PATCH v4 3/6] btrfs-progs: add dump tree support for the raid stripe tree Johannes Thumshirn
@ 2023-09-14 16:05 ` Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 5/6] btrfs-progs: load zone info for all zoned devices Johannes Thumshirn
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

Allow for RAID levels 0, 1 and 10 on zoned devices if the RAID stripe tree
is used.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 common/fsfeatures.c   |  8 ++++++
 kernel-shared/ctree.h |  3 +-
 kernel-shared/zoned.c | 34 ++++++++++++++++++++--
 kernel-shared/zoned.h |  4 +--
 mkfs/main.c           | 79 ++++++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/common/fsfeatures.c b/common/fsfeatures.c
index 00658fa5159f..2658f5072af4 100644
--- a/common/fsfeatures.c
+++ b/common/fsfeatures.c
@@ -189,6 +189,14 @@ static const struct btrfs_feature mkfs_features[] = {
 		VERSION_NULL(safe),
 		VERSION_NULL(default),
 		.desc		= "new extent tree format"
+	} , {
+		.name		= "raid-stripe-tree",
+		.incompat_flag	= BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE,
+		.sysfs_name	= NULL,
+		VERSION_NULL(compat),
+		VERSION_NULL(safe),
+		VERSION_NULL(default),
+		.desc		= "raid stripe tree"
 	},
 #endif
 	/* Keep this one last */
diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h
index de09c15ca0eb..f6ee467adaab 100644
--- a/kernel-shared/ctree.h
+++ b/kernel-shared/ctree.h
@@ -102,7 +102,8 @@ static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 nodesize)
 	 BTRFS_FEATURE_INCOMPAT_RAID1C34 |		\
 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID |		\
 	 BTRFS_FEATURE_INCOMPAT_ZONED |			\
-	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
+	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 |	\
+	 BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE)
 #else
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
diff --git a/kernel-shared/zoned.c b/kernel-shared/zoned.c
index d187c5763406..d8fad4319e44 100644
--- a/kernel-shared/zoned.c
+++ b/kernel-shared/zoned.c
@@ -737,7 +737,7 @@ out:
 	return ret;
 }
 
-bool zoned_profile_supported(u64 map_type)
+bool zoned_profile_supported(u64 map_type, bool rst)
 {
 	bool data = (map_type & BTRFS_BLOCK_GROUP_DATA);
 	u64 flags = (map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
@@ -746,9 +746,37 @@ bool zoned_profile_supported(u64 map_type)
 	if (flags == 0)
 		return true;
 
-	/* We can support DUP on metadata */
+#if EXPERIMENTAL
+	if (data) {
+		if ((flags & BTRFS_BLOCK_GROUP_DUP) && rst)
+			return true;
+		/* Data RAID1 needs a raid-stripe-tree */
+		if ((flags & BTRFS_BLOCK_GROUP_RAID1_MASK) && rst)
+			return true;
+		/* Data RAID0 needs a raid-stripe-tree */
+		if ((flags & BTRFS_BLOCK_GROUP_RAID0) && rst)
+			return true;
+		/* Data RAID10 needs a raid-stripe-tree */
+		if ((flags & BTRFS_BLOCK_GROUP_RAID10) && rst)
+			return true;
+	} else {
+		/* We can support DUP on metadata/system */
+		if (flags & BTRFS_BLOCK_GROUP_DUP)
+			return true;
+		/* We can support RAID1 on metadata/system */
+		if (flags & BTRFS_BLOCK_GROUP_RAID1_MASK)
+			return true;
+		/* We can support RAID0 on metadata/system */
+		if (flags & BTRFS_BLOCK_GROUP_RAID0)
+			return true;
+		/* We can support RAID10 on metadata/system */
+		if (flags & BTRFS_BLOCK_GROUP_RAID10)
+			return true;
+	}
+#else
 	if (!data && (flags & BTRFS_BLOCK_GROUP_DUP))
 		return true;
+#endif
 
 	/* All other profiles are not supported yet */
 	return false;
@@ -863,7 +891,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	if (!zoned_profile_supported(map->type)) {
+	if (!zoned_profile_supported(map->type, !!fs_info->stripe_root)) {
 		error("zoned: profile %s not yet supported",
 		      btrfs_group_profile_str(map->type));
 		ret = -EINVAL;
diff --git a/kernel-shared/zoned.h b/kernel-shared/zoned.h
index 9e4162cf25c5..6efc60281bc9 100644
--- a/kernel-shared/zoned.h
+++ b/kernel-shared/zoned.h
@@ -133,7 +133,7 @@ static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos)
 	return zinfo->zones[zno].cond == BLK_ZONE_COND_EMPTY;
 }
 
-bool zoned_profile_supported(u64 map_type);
+bool zoned_profile_supported(u64 map_type, bool rst);
 int btrfs_reset_dev_zone(int fd, struct blk_zone *zone);
 u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
 				 u64 hole_end, u64 num_bytes);
@@ -214,7 +214,7 @@ static inline int btrfs_wipe_temporary_sb(struct btrfs_fs_devices *fs_devices)
 	return 0;
 }
 
-static inline bool zoned_profile_supported(u64 map_type)
+static inline bool zoned_profile_supported(u64 map_type, bool rst)
 {
 	return false;
 }
diff --git a/mkfs/main.c b/mkfs/main.c
index 7acd39ec6531..7d07ba1e7001 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -962,6 +962,38 @@ fail:
 	return ret;
 }
 
+static int setup_raid_stripe_tree_root(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *stripe_root;
+	struct btrfs_key key = {
+		.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID,
+		.type = BTRFS_ROOT_ITEM_KEY,
+	};
+	int ret;
+
+	trans = btrfs_start_transaction(fs_info->tree_root, 0);
+	if (IS_ERR(trans))
+		return PTR_ERR(trans);
+
+	stripe_root = btrfs_create_tree(trans, fs_info, &key);
+	if (IS_ERR(stripe_root))  {
+		ret =  PTR_ERR(stripe_root);
+		goto abort;
+	}
+	fs_info->stripe_root = stripe_root;
+	add_root_to_dirty_list(stripe_root);
+
+	ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+	if (ret)
+		return ret;
+
+	return 0;
+abort:
+	btrfs_abort_transaction(trans, ret);
+	return ret;
+}
+
 /* Thread callback for device preparation */
 static void *prepare_one_device(void *ctx)
 {
@@ -1472,10 +1504,39 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
 	if (ret)
 		goto error;
 
-	if (opt_zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
-		      !zoned_profile_supported(BTRFS_BLOCK_GROUP_DATA | data_profile))) {
-		error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
-		goto error;
+#if EXPERIMENTAL
+	if (opt_zoned && device_count) {
+		switch (data_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+		case BTRFS_BLOCK_GROUP_DUP:
+		case BTRFS_BLOCK_GROUP_RAID1:
+		case BTRFS_BLOCK_GROUP_RAID1C3:
+		case BTRFS_BLOCK_GROUP_RAID1C4:
+		case BTRFS_BLOCK_GROUP_RAID0:
+		case BTRFS_BLOCK_GROUP_RAID10:
+			features.incompat_flags |=
+				BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE;
+			break;
+		default:
+			break;
+		}
+	}
+#endif
+
+	if (opt_zoned) {
+		u64 metadata = BTRFS_BLOCK_GROUP_METADATA | metadata_profile;
+		u64 data = BTRFS_BLOCK_GROUP_DATA | data_profile;
+#if EXPERIMENTAL
+		bool rst = features.incompat_flags &
+			BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE;
+#else
+		bool rst = false;
+#endif
+
+		if (!zoned_profile_supported(metadata, rst) ||
+		    !zoned_profile_supported(data, rst)) {
+			error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
+			goto error;
+		}
 	}
 
 	t_prepare = calloc(device_count, sizeof(*t_prepare));
@@ -1585,6 +1646,15 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
 		goto error;
 	}
 
+	if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE) {
+		ret = setup_raid_stripe_tree_root(fs_info);
+		if (ret < 0) {
+			error("failed to initialize raid-stripe-tree: %d (%m)",
+			      ret);
+			goto out;
+		}
+	}
+
 	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		errno = -PTR_ERR(trans);
@@ -1750,6 +1820,7 @@ raid_groups:
 			goto out;
 		}
 	}
+
 	if (bconf.verbose) {
 		char features_buf[BTRFS_FEATURE_STRING_BUF_SIZE];
 

-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 5/6] btrfs-progs: load zone info for all zoned devices
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
                   ` (3 preceding siblings ...)
  2023-09-14 16:05 ` [PATCH v4 4/6] btrfs-progs: allow zoned RAID Johannes Thumshirn
@ 2023-09-14 16:05 ` Johannes Thumshirn
  2023-09-14 16:05 ` [PATCH v4 6/6] btrfs-progs: read stripe tree when mapping blocks Johannes Thumshirn
  2023-09-14 18:43 ` [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree David Sterba
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 mkfs/main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mkfs/main.c b/mkfs/main.c
index 7d07ba1e7001..8a27704ac29e 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -1732,6 +1732,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
 		}
 	}
 
+
+	if (opt_zoned)
+		btrfs_get_dev_zone_info_all_devices(fs_info);
+
 raid_groups:
 	ret = create_raid_groups(trans, root, data_profile,
 			 metadata_profile, mixed, &allocation);

-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 6/6] btrfs-progs: read stripe tree when mapping blocks
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
                   ` (4 preceding siblings ...)
  2023-09-14 16:05 ` [PATCH v4 5/6] btrfs-progs: load zone info for all zoned devices Johannes Thumshirn
@ 2023-09-14 16:05 ` Johannes Thumshirn
  2023-09-14 18:43 ` [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree David Sterba
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Thumshirn @ 2023-09-14 16:05 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs, Johannes Thumshirn

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 kernel-shared/volumes.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 113 insertions(+), 3 deletions(-)

diff --git a/kernel-shared/volumes.c b/kernel-shared/volumes.c
index 95d5930b95d8..2081f7db088f 100644
--- a/kernel-shared/volumes.c
+++ b/kernel-shared/volumes.c
@@ -1796,6 +1796,105 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 				 multi_ret, mirror_num, raid_map_ret);
 }
 
+static bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
+					u64 map_type)
+{
+#if EXPERIMENTAL
+	const bool is_data = (map_type & BTRFS_BLOCK_GROUP_DATA);
+
+	if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE))
+		return false;
+
+	if (!fs_info->stripe_root)
+		return false;
+
+	if (!is_data)
+		return false;
+
+	if (map_type & BTRFS_BLOCK_GROUP_DUP)
+		return true;
+
+	if (map_type & BTRFS_BLOCK_GROUP_RAID1_MASK)
+		return true;
+
+	if (map_type & BTRFS_BLOCK_GROUP_RAID0)
+		return true;
+
+	if (map_type & BTRFS_BLOCK_GROUP_RAID10)
+		return true;
+
+#endif
+	return false;
+}
+
+static int btrfs_stripe_tree_logical_to_physical(struct btrfs_fs_info *fs_info,
+						u64 logical,
+						struct btrfs_bio_stripe *stripe)
+{
+	struct btrfs_root *root = fs_info->stripe_root;
+	struct btrfs_path path = { 0 };
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	int slot;
+	int ret;
+
+	key.objectid = logical;
+	key.type = BTRFS_RAID_STRIPE_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	while (1) {
+		struct btrfs_key found_key;
+		struct btrfs_stripe_extent *extent;
+		int num_stripes;
+		u32 item_size;
+		int i;
+
+		leaf = path.nodes[0];
+		slot = path.slots[0];
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, &path);
+			if (ret == 0)
+				continue;
+			if (ret < 0)
+				goto error;
+			break;
+		}
+
+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+		if (found_key.type != BTRFS_RAID_STRIPE_KEY)
+			goto next;
+
+		extent = btrfs_item_ptr(leaf, slot,
+					struct btrfs_stripe_extent);
+		item_size = btrfs_item_size(leaf, slot);
+		num_stripes = (item_size -
+			offsetof(struct btrfs_stripe_extent, strides)) /
+			sizeof(struct btrfs_raid_stride);
+
+		for (i = 0; i < num_stripes; i++) {
+			if (stripe->dev->devid !=
+				btrfs_raid_stride_devid_nr(leaf, extent, i))
+				continue;
+			stripe->physical = btrfs_raid_stride_offset_nr(leaf, extent, i);
+			btrfs_release_path(&path);
+			return 0;
+		}
+
+next:
+		path.slots[0]++;
+	}
+
+	btrfs_release_path(&path);
+error:
+	return ret;
+}
+
 int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 		      u64 logical, u64 *length, u64 *type,
 		      struct btrfs_multi_bio **multi_ret, int mirror_num,
@@ -1988,10 +2087,21 @@ again:
 	BUG_ON(stripe_index >= map->num_stripes);
 
 	for (i = 0; i < multi->num_stripes; i++) {
-		multi->stripes[i].physical =
-			map->stripes[stripe_index].physical + stripe_offset +
-			stripe_nr * map->stripe_len;
 		multi->stripes[i].dev = map->stripes[stripe_index].dev;
+
+		if (stripes_allocated &&
+			btrfs_need_stripe_tree_update(fs_info, map->type)) {
+			int ret;
+
+			ret = btrfs_stripe_tree_logical_to_physical(fs_info, logical,
+								&multi->stripes[i]);
+			if (ret)
+				return ret;
+		} else {
+			multi->stripes[i].physical =
+				map->stripes[stripe_index].physical +
+				stripe_offset + stripe_nr * map->stripe_len;
+		}
 		stripe_index++;
 	}
 	*multi_ret = multi;

-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree
  2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
                   ` (5 preceding siblings ...)
  2023-09-14 16:05 ` [PATCH v4 6/6] btrfs-progs: read stripe tree when mapping blocks Johannes Thumshirn
@ 2023-09-14 18:43 ` David Sterba
  6 siblings, 0 replies; 8+ messages in thread
From: David Sterba @ 2023-09-14 18:43 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: David Sterba, linux-btrfs

On Thu, Sep 14, 2023 at 09:05:31AM -0700, Johannes Thumshirn wrote:
> This series adds support for the RAID stripe tree to btrfs-progs.
> 
> RST is hidden behind the --enable-experimental config option.
> 
> This series survived 'make test' with and without experimental enabled.
> 
> ---
> Changes in v4:
> - Adopt to on-disk format changes
> - Link to v3: https://lore.kernel.org/r/20230911-raid-stripe-tree-v1-0-c8337f7444b5@wdc.com

I've added the series to devel, with some fixups. Any updates please
send as incrementals. Thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-09-14 18:50 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-14 16:05 [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree Johannes Thumshirn
2023-09-14 16:05 ` [PATCH v4 1/6] btrfs-progs: add raid-stripe-tree definitions Johannes Thumshirn
2023-09-14 16:05 ` [PATCH v4 2/6] btrfs-progs: read fs with stripe tree from disk Johannes Thumshirn
2023-09-14 16:05 ` [PATCH v4 3/6] btrfs-progs: add dump tree support for the raid stripe tree Johannes Thumshirn
2023-09-14 16:05 ` [PATCH v4 4/6] btrfs-progs: allow zoned RAID Johannes Thumshirn
2023-09-14 16:05 ` [PATCH v4 5/6] btrfs-progs: load zone info for all zoned devices Johannes Thumshirn
2023-09-14 16:05 ` [PATCH v4 6/6] btrfs-progs: read stripe tree when mapping blocks Johannes Thumshirn
2023-09-14 18:43 ` [PATCH v4 0/6] btrfs-progs: add support for RAID stripe tree David Sterba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).