public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 1/5] btrfs-progs: introduce the basic support for RAID56J feature
Date: Sun, 15 May 2022 18:54:56 +0800	[thread overview]
Message-ID: <c80eda7127082ae9eefc9ed3c8258fafff12e776.1652611957.git.wqu@suse.com> (raw)
In-Reply-To: <cover.1652611957.git.wqu@suse.com>

This patch will cross-port the RAID56J feature from the WIP kernel
patch to btrfs-progs, allowing us to create a fs with RAID56J.

The RAID56J feature itself is pretty much the same as regular RAID56,
with extra btrfs_chunk::per_dev_reserved bytes reserved for each stripe.

The reserved space will be used for write-ahead journal to address the
write-hole problem.

Thankfully for btrfs-progs, there isn't much need to fully implement the
journal yet.

This patch will just allow chunk allocation/deletion to take the extra
reservation into consideration.

And the new feature will only be enabled with experimental feature.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 check/common.h              |   6 ++-
 cmds/filesystem-usage.c     |   6 ++-
 cmds/rescue-chunk-recover.c |  13 +++--
 common/utils.c              |   6 ++-
 kernel-shared/ctree.h       |  42 +++++++++++++--
 kernel-shared/extent-tree.c |  18 +++++--
 kernel-shared/volumes.c     | 105 +++++++++++++++++++++++++++++++-----
 kernel-shared/volumes.h     |   2 +
 8 files changed, 162 insertions(+), 36 deletions(-)

diff --git a/check/common.h b/check/common.h
index ba4e291e8d0d..f6e6eece37aa 100644
--- a/check/common.h
+++ b/check/common.h
@@ -133,9 +133,11 @@ static inline int check_num_stripes(u64 type, int num_stripes)
 {
 	if (num_stripes == 0)
 		return -1;
-	if (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes <= 1)
+	if (type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID5J) &&
+	    num_stripes <= 1)
 		return -1;
-	if (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes <= 2)
+	if (type & (BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID6J) &&
+	    num_stripes <= 2)
 		return -1;
 	return 0;
 }
diff --git a/cmds/filesystem-usage.c b/cmds/filesystem-usage.c
index 01729e1886ac..4bdb07eeba86 100644
--- a/cmds/filesystem-usage.c
+++ b/cmds/filesystem-usage.c
@@ -356,11 +356,13 @@ static void get_raid56_space_info(struct btrfs_ioctl_space_args *sargs,
 		double l_data_ratio, l_metadata_ratio, l_system_ratio, rt;
 
 		parities_count = btrfs_bg_type_to_nparity(info_ptr->type);
-		if (info_ptr->type & BTRFS_BLOCK_GROUP_RAID5) {
+		if ((BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID5J) &
+		    info_ptr->type) {
 			l_data_ratio = l_data_ratio_r5;
 			l_metadata_ratio = l_metadata_ratio_r5;
 			l_system_ratio = l_system_ratio_r5;
-		} else if (info_ptr->type & BTRFS_BLOCK_GROUP_RAID6) {
+		} else if ((BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID6J) &
+		    info_ptr->type) {
 			l_data_ratio = l_data_ratio_r6;
 			l_metadata_ratio = l_metadata_ratio_r6;
 			l_system_ratio = l_system_ratio_r6;
diff --git a/cmds/rescue-chunk-recover.c b/cmds/rescue-chunk-recover.c
index ec5c206f85e7..67a7bd595b5d 100644
--- a/cmds/rescue-chunk-recover.c
+++ b/cmds/rescue-chunk-recover.c
@@ -2093,8 +2093,8 @@ next_csum:
 
 	if (list_empty(&candidates)) {
 		num_unordered = count_devext_records(&unordered);
-		if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6
-					&& num_unordered == 2) {
+		if ((BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID6J) &
+		    chunk->type && num_unordered == 2) {
 			btrfs_release_path(&path);
 			ret = fill_chunk_up(chunk, &unordered, rc);
 			return ret;
@@ -2139,12 +2139,11 @@ out:
 		if (ret)
 			goto fail_out;
 	} else {
-		if ((num_unordered == 2 && chunk->type_flags
-			& BTRFS_BLOCK_GROUP_RAID5)
-		 || (num_unordered == 3 && chunk->type_flags
-			& BTRFS_BLOCK_GROUP_RAID6)) {
+		if ((num_unordered == 2 && chunk->type_flags &
+		    (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID5J))
+		 || (num_unordered == 3 && chunk->type_flags &
+		    (BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID6J)))
 			ret = fill_chunk_up(chunk, &unordered, rc);
-		}
 	}
 fail_out:
 	ret = !!ret || (list_empty(&unordered) ? 0 : 1);
diff --git a/common/utils.c b/common/utils.c
index 1ed5571f7c1c..e609cca50cde 100644
--- a/common/utils.c
+++ b/common/utils.c
@@ -602,10 +602,12 @@ int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile,
 		return 1;
 	}
 
-	if (dev_cnt == 3 && profile & BTRFS_BLOCK_GROUP_RAID6) {
+	if (dev_cnt == 3 && profile &
+	    (BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID6J)) {
 		warning("RAID6 is not recommended on filesystem with 3 devices only");
 	}
-	if (dev_cnt == 2 && profile & BTRFS_BLOCK_GROUP_RAID5) {
+	if (dev_cnt == 2 && profile &
+	    (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID5J)) {
 		warning("RAID5 is not recommended on filesystem with 2 devices only");
 	}
 	warning_on(!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP) && ssd,
diff --git a/kernel-shared/ctree.h b/kernel-shared/ctree.h
index 68943ff294cc..4ad7cd9948a2 100644
--- a/kernel-shared/ctree.h
+++ b/kernel-shared/ctree.h
@@ -266,6 +266,10 @@ struct btrfs_dev_item {
 
 struct btrfs_stripe {
 	__le64 devid;
+	/*
+	 * Where the real stripe starts on the device, excluding the per-dev
+	 * reserved bytes.
+	 */
 	__le64 offset;
 	u8 dev_uuid[BTRFS_UUID_SIZE];
 } __attribute__ ((__packed__));
@@ -280,8 +284,19 @@ struct btrfs_chunk {
 	__le64 stripe_len;
 	__le64 type;
 
-	/* optimal io alignment for this chunk */
-	__le32 io_align;
+	union {
+		/*
+		 * For non-journaled profiles, optimal io alignment for this
+		 * chunk, not really utilized though.
+		 */
+		__le32 io_align;
+
+		/*
+		 * For journaled profiles, per-device-extent reserved bytes
+		 * before the real data starts.
+		 */
+		__le32 per_dev_reserved;
+	};
 
 	/* optimal io width for this chunk */
 	__le32 io_width;
@@ -512,6 +527,7 @@ BUILD_ASSERT(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 #define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
 #define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
 #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2	(1ULL << 13)
+#define BTRFS_FEATURE_INCOMPAT_RAID56_JOURNAL	(1ULL << 14)
 
 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
 
@@ -539,7 +555,8 @@ BUILD_ASSERT(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 	 BTRFS_FEATURE_INCOMPAT_RAID1C34 |		\
 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID |		\
 	 BTRFS_FEATURE_INCOMPAT_ZONED |			\
-	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
+	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 |	\
+	 BTRFS_FEATURE_INCOMPAT_RAID56_JOURNAL)
 #else
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
@@ -1017,6 +1034,8 @@ struct btrfs_csum_item {
 #define BTRFS_BLOCK_GROUP_RAID6    	(1ULL << 8)
 #define BTRFS_BLOCK_GROUP_RAID1C3    	(1ULL << 9)
 #define BTRFS_BLOCK_GROUP_RAID1C4    	(1ULL << 10)
+#define BTRFS_BLOCK_GROUP_RAID5J    	(1ULL << 11)
+#define BTRFS_BLOCK_GROUP_RAID6J    	(1ULL << 12)
 #define BTRFS_BLOCK_GROUP_RESERVED	(BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
 					 BTRFS_SPACE_INFO_GLOBAL_RSV)
 
@@ -1030,6 +1049,8 @@ enum btrfs_raid_types {
 	BTRFS_RAID_RAID6,
 	BTRFS_RAID_RAID1C3,
 	BTRFS_RAID_RAID1C4,
+	BTRFS_RAID_RAID5J,
+	BTRFS_RAID_RAID6J,
 	BTRFS_NR_RAID_TYPES
 };
 
@@ -1041,13 +1062,20 @@ enum btrfs_raid_types {
 					 BTRFS_BLOCK_GROUP_RAID1 |   \
 					 BTRFS_BLOCK_GROUP_RAID5 |   \
 					 BTRFS_BLOCK_GROUP_RAID6 |   \
+					 BTRFS_BLOCK_GROUP_RAID5J |   \
+					 BTRFS_BLOCK_GROUP_RAID6J |   \
 					 BTRFS_BLOCK_GROUP_RAID1C3 | \
 					 BTRFS_BLOCK_GROUP_RAID1C4 | \
 					 BTRFS_BLOCK_GROUP_DUP |     \
 					 BTRFS_BLOCK_GROUP_RAID10)
 
-#define BTRFS_BLOCK_GROUP_RAID56_MASK	(BTRFS_BLOCK_GROUP_RAID5 |	\
-                                         BTRFS_BLOCK_GROUP_RAID6)
+#define BTRFS_BLOCK_GROUP_RAID56_MASK	(BTRFS_BLOCK_GROUP_RAID5  |	\
+					 BTRFS_BLOCK_GROUP_RAID5J |	\
+					 BTRFS_BLOCK_GROUP_RAID6  |	\
+                                         BTRFS_BLOCK_GROUP_RAID6J)
+
+#define BTRFS_BLOCK_GROUP_JOURNAL_MASK	(BTRFS_BLOCK_GROUP_RAID5J |	\
+                                         BTRFS_BLOCK_GROUP_RAID6J)
 
 #define BTRFS_BLOCK_GROUP_RAID1_MASK    (BTRFS_BLOCK_GROUP_RAID1 |	\
                                          BTRFS_BLOCK_GROUP_RAID1C3 |	\
@@ -1652,6 +1680,8 @@ BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
 BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
 BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
 BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_per_dev_reserved, struct btrfs_chunk, per_dev_reserved,
+		   32);
 BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
 BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
 BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
@@ -1671,6 +1701,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
 			 stripe_len, 64);
 BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
 			 io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_per_dev_reserved, struct btrfs_chunk,
+			 per_dev_reserved, 32);
 BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
 			 io_width, 32);
 BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
diff --git a/kernel-shared/extent-tree.c b/kernel-shared/extent-tree.c
index 697a8a1e4dec..92655fe32fb0 100644
--- a/kernel-shared/extent-tree.c
+++ b/kernel-shared/extent-tree.c
@@ -3004,7 +3004,9 @@ static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
 	struct btrfs_root *root= fs_info->chunk_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
+	bool is_journal;
 	u16 num_stripes;
+	u32 per_dev_reserved = 0;
 	int i;
 	int ret;
 
@@ -3025,19 +3027,24 @@ static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
 	}
 	chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
 			       struct btrfs_chunk);
+	is_journal = btrfs_bg_type_is_journal(btrfs_chunk_type(path->nodes[0],
+					      chunk));
+	if (is_journal)
+		per_dev_reserved = btrfs_chunk_per_dev_reserved(path->nodes[0],
+								chunk);
 	num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
 	for (i = 0; i < num_stripes; i++) {
 		u64 devid = btrfs_stripe_devid_nr(path->nodes[0], chunk, i);
 		u64 offset = btrfs_stripe_offset_nr(path->nodes[0], chunk, i);
 		u64 length = btrfs_stripe_length(fs_info, path->nodes[0], chunk);
 
+		ASSERT(offset > per_dev_reserved);
 		ret = btrfs_reset_chunk_zones(fs_info, devid, offset, length);
 		if (ret < 0)
 			goto out;
 
-		ret = free_dev_extent_item(trans, fs_info,
-			btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
-			btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
+		ret = free_dev_extent_item(trans, fs_info, devid,
+					   offset - per_dev_reserved);
 		if (ret < 0)
 			goto out;
 	}
@@ -3146,6 +3153,8 @@ static u64 get_dev_extent_len(struct map_lookup *map)
 		break;
 	case BTRFS_BLOCK_GROUP_RAID5:
 	case BTRFS_BLOCK_GROUP_RAID6:
+	case BTRFS_BLOCK_GROUP_RAID5J:
+	case BTRFS_BLOCK_GROUP_RAID6J:
 		div = map->num_stripes - btrfs_bg_type_to_nparity(map->type);
 		break;
 	case BTRFS_BLOCK_GROUP_RAID10:
@@ -3198,7 +3207,8 @@ static int free_block_group_cache(struct btrfs_trans_handle *trans,
 		struct btrfs_device *device;
 
 		device = map->stripes[i].dev;
-		device->bytes_used -= get_dev_extent_len(map);
+		device->bytes_used -= get_dev_extent_len(map) +
+				      map->per_dev_reserved;
 		ret = btrfs_update_device(trans, device);
 		if (ret < 0)
 			goto out;
diff --git a/kernel-shared/volumes.c b/kernel-shared/volumes.c
index 97c09a1a4931..e0f31d089707 100644
--- a/kernel-shared/volumes.c
+++ b/kernel-shared/volumes.c
@@ -33,6 +33,14 @@
 #include "common/device-utils.h"
 #include "kernel-lib/raid56.h"
 
+/*
+ * The extra space for journal based profiles (raid56j).
+ *
+ * Each device will have this amount of bytes reserved before the real
+ * stripe begins.
+ */
+#define JOURNAL_RESERVED		(SZ_1M)
+
 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
 	[BTRFS_RAID_RAID10] = {
 		.sub_stripes	= 2,
@@ -164,6 +172,34 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
 		.bg_flag	= BTRFS_BLOCK_GROUP_RAID6,
 		.mindev_error	= BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
 	},
+	[BTRFS_RAID_RAID5J] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 0,
+		.devs_min	= 2,
+		.tolerated_failures = 1,
+		.devs_increment	= 1,
+		.ncopies	= 1,
+		.nparity        = 1,
+		.lower_name	= "raid5j",
+		.upper_name	= "RAID5J",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID5J,
+		.mindev_error	= BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
+	},
+	[BTRFS_RAID_RAID6J] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 0,
+		.devs_min	= 3,
+		.tolerated_failures = 2,
+		.devs_increment	= 1,
+		.ncopies	= 1,
+		.nparity        = 2,
+		.lower_name	= "raid6j",
+		.upper_name	= "RAID6J",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID6J,
+		.mindev_error	= BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
+	},
 };
 
 struct alloc_chunk_ctl {
@@ -173,6 +209,8 @@ struct alloc_chunk_ctl {
 	int max_stripes;
 	int min_stripes;
 	int sub_stripes;
+	u32 per_dev_reserved;
+	/* This stripe_size is excluding above per_dev_reserved */
 	u64 stripe_size;
 	u64 min_stripe_size;
 	u64 num_bytes;
@@ -210,6 +248,10 @@ enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
 		return BTRFS_RAID_RAID5;
 	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
 		return BTRFS_RAID_RAID6;
+	if (flags & BTRFS_BLOCK_GROUP_RAID5J)
+		return BTRFS_RAID_RAID5J;
+	if (flags & BTRFS_BLOCK_GROUP_RAID6J)
+		return BTRFS_RAID_RAID6J;
 
 	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
 }
@@ -270,6 +312,11 @@ bool btrfs_bg_type_is_stripey(u64 flags)
 	return btrfs_raid_array[index].devs_max == 0;
 }
 
+bool btrfs_bg_type_is_journal(u64 flags)
+{
+	return (flags & BTRFS_BLOCK_GROUP_JOURNAL_MASK);
+}
+
 u64 btrfs_bg_flags_for_device_num(int number)
 {
 	int i;
@@ -1256,6 +1303,7 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_info *info,
 				 struct alloc_chunk_ctl *ctl)
 {
 	enum btrfs_raid_types type = btrfs_bg_flags_to_raid_index(ctl->type);
+	bool is_journal = btrfs_bg_type_is_journal(ctl->type);
 
 	ctl->num_stripes = btrfs_raid_array[type].dev_stripes;
 	ctl->min_stripes = btrfs_raid_array[type].devs_min;
@@ -1268,6 +1316,10 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_info *info,
 	ctl->dev_offset = 0;
 	ctl->nparity = btrfs_raid_array[type].nparity;
 	ctl->ncopies = btrfs_raid_array[type].ncopies;
+	if (is_journal)
+		ctl->per_dev_reserved = JOURNAL_RESERVED;
+	else
+		ctl->per_dev_reserved = 0;
 
 	switch (info->fs_devices->chunk_alloc_policy) {
 	case BTRFS_CHUNK_ALLOC_REGULAR:
@@ -1293,6 +1345,8 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_info *info,
 	case BTRFS_RAID_RAID10:
 	case BTRFS_RAID_RAID5:
 	case BTRFS_RAID_RAID6:
+	case BTRFS_RAID_RAID5J:
+	case BTRFS_RAID_RAID6J:
 		ctl->num_stripes = min(ctl->max_stripes, ctl->total_devs);
 		if (type == BTRFS_RAID_RAID10)
 			ctl->num_stripes &= ~(u32)1;
@@ -1320,6 +1374,7 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl)
 
 static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl)
 {
+	ASSERT(!btrfs_bg_type_is_journal(ctl->type));
 	if (chunk_bytes_by_type(ctl) > ctl->max_chunk_size) {
 		/* stripe_size is fixed in ZONED, reduce num_stripes instead */
 		ctl->num_stripes = ctl->max_chunk_size * ctl->ncopies /
@@ -1358,6 +1413,7 @@ static int create_chunk(struct btrfs_trans_handle *trans,
 	int ret;
 	int index;
 	struct btrfs_key key;
+	bool is_journal = btrfs_bg_type_is_journal(ctl->type);
 	u64 offset;
 	u64 zone_size = info->zone_size;
 
@@ -1401,29 +1457,31 @@ static int create_chunk(struct btrfs_trans_handle *trans,
 
 		if (!ctl->dev_offset) {
 			ret = btrfs_alloc_dev_extent(trans, device, key.offset,
-					ctl->stripe_size, &dev_offset);
+					ctl->stripe_size + ctl->per_dev_reserved,
+					&dev_offset);
 			if (ret < 0)
 				goto out_chunk_map;
 		} else {
-			dev_offset = ctl->dev_offset;
+			dev_offset = ctl->dev_offset - ctl->per_dev_reserved;
 			ret = btrfs_insert_dev_extent(trans, device, key.offset,
-						      ctl->stripe_size,
-						      ctl->dev_offset);
+					ctl->stripe_size + ctl->per_dev_reserved,
+					ctl->dev_offset);
 			BUG_ON(ret);
 		}
 
 		ASSERT(!zone_size || IS_ALIGNED(dev_offset, zone_size));
 
-		device->bytes_used += ctl->stripe_size;
+		device->bytes_used += ctl->stripe_size + ctl->per_dev_reserved;
 		ret = btrfs_update_device(trans, device);
 		if (ret < 0)
 			goto out_chunk_map;
 
 		map->stripes[index].dev = device;
-		map->stripes[index].physical = dev_offset;
+		map->stripes[index].physical = dev_offset + ctl->per_dev_reserved;
 		stripe = stripes + index;
 		btrfs_set_stack_stripe_devid(stripe, device->devid);
-		btrfs_set_stack_stripe_offset(stripe, dev_offset);
+		btrfs_set_stack_stripe_offset(stripe, dev_offset +
+						      ctl->per_dev_reserved);
 		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
 		index++;
 	}
@@ -1435,7 +1493,11 @@ static int create_chunk(struct btrfs_trans_handle *trans,
 	btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
 	btrfs_set_stack_chunk_type(chunk, ctl->type);
 	btrfs_set_stack_chunk_num_stripes(chunk, ctl->num_stripes);
-	btrfs_set_stack_chunk_io_align(chunk, BTRFS_STRIPE_LEN);
+	if (is_journal)
+		btrfs_set_stack_chunk_per_dev_reserved(chunk,
+				ctl->per_dev_reserved);
+	else
+		btrfs_set_stack_chunk_io_align(chunk, BTRFS_STRIPE_LEN);
 	btrfs_set_stack_chunk_io_width(chunk, BTRFS_STRIPE_LEN);
 	btrfs_set_stack_chunk_sector_size(chunk, info->sectorsize);
 	btrfs_set_stack_chunk_sub_stripes(chunk, ctl->sub_stripes);
@@ -1446,6 +1508,7 @@ static int create_chunk(struct btrfs_trans_handle *trans,
 	map->type = ctl->type;
 	map->num_stripes = ctl->num_stripes;
 	map->sub_stripes = ctl->sub_stripes;
+	map->per_dev_reserved = ctl->per_dev_reserved;
 
 	ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
 				btrfs_chunk_item_size(ctl->num_stripes));
@@ -1552,7 +1615,8 @@ again:
 			if (ctl.type & BTRFS_BLOCK_GROUP_DUP)
 				ctl.stripe_size = max_avail / 2;
 			else
-				ctl.stripe_size = max_avail;
+				ctl.stripe_size = max_avail -
+						  ctl.per_dev_reserved;
 			goto again;
 		}
 		return -ENOSPC;
@@ -1592,7 +1656,7 @@ int btrfs_alloc_data_chunk(struct btrfs_trans_handle *trans,
 	struct list_head *dev_list = &info->fs_devices->devices;
 	struct list_head private_devs;
 	struct btrfs_device *device;
-	struct alloc_chunk_ctl ctl;
+	struct alloc_chunk_ctl ctl = {0};
 
 	if (*start != round_down(*start, info->sectorsize)) {
 		error("DATA chunk start not sectorsize aligned: %llu",
@@ -1649,9 +1713,11 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 		ret = map->num_stripes;
 	else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
 		ret = map->sub_stripes;
-	else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+	else if ((BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID5J) &
+		 map->type)
 		ret = 2;
-	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+	else if ((BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID6J) &
+		 map->type)
 		ret = 3;
 	else
 		ret = 1;
@@ -1953,7 +2019,8 @@ again:
 					ce->start + (tmp + i) * map->stripe_len;
 
 			raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
-			if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+			if (map->type & (BTRFS_BLOCK_GROUP_RAID6 |
+					 BTRFS_BLOCK_GROUP_RAID6J))
 				raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
 
 			*length = map->stripe_len;
@@ -2235,6 +2302,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 	u64 length;
 	u64 devid;
 	u8 uuid[BTRFS_UUID_SIZE];
+	bool is_journal;
 	int num_stripes;
 	int ret;
 	int i;
@@ -2262,11 +2330,18 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 	if (!map)
 		return -ENOMEM;
 
+	is_journal = btrfs_bg_type_is_journal(btrfs_chunk_type(leaf, chunk));
 	map->ce.start = logical;
 	map->ce.size = length;
 	map->num_stripes = num_stripes;
 	map->io_width = btrfs_chunk_io_width(leaf, chunk);
-	map->io_align = btrfs_chunk_io_align(leaf, chunk);
+	if (is_journal) {
+		map->io_align = map->io_width;
+		map->per_dev_reserved = btrfs_chunk_per_dev_reserved(leaf, chunk);
+	} else {
+		map->io_align = btrfs_chunk_io_align(leaf, chunk);
+		map->per_dev_reserved = 0;
+	}
 	map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
 	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 	map->type = btrfs_chunk_type(leaf, chunk);
@@ -2772,6 +2847,8 @@ u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
 		break;
 	case BTRFS_BLOCK_GROUP_RAID5:
 	case BTRFS_BLOCK_GROUP_RAID6:
+	case BTRFS_BLOCK_GROUP_RAID5J:
+	case BTRFS_BLOCK_GROUP_RAID6J:
 		stripe_len = chunk_len / (num_stripes - btrfs_bg_type_to_nparity(profile));
 		break;
 	case BTRFS_BLOCK_GROUP_RAID10:
diff --git a/kernel-shared/volumes.h b/kernel-shared/volumes.h
index 6e9103a933b7..e1bf0bbe2978 100644
--- a/kernel-shared/volumes.h
+++ b/kernel-shared/volumes.h
@@ -120,6 +120,7 @@ struct map_lookup {
 	int sector_size;
 	int num_stripes;
 	int sub_stripes;
+	u32 per_dev_reserved;
 	struct btrfs_bio_stripe stripes[];
 };
 
@@ -315,5 +316,6 @@ int btrfs_bg_type_to_nparity(u64 flags);
 int btrfs_bg_type_to_sub_stripes(u64 flags);
 u64 btrfs_bg_flags_for_device_num(int number);
 bool btrfs_bg_type_is_stripey(u64 flags);
+bool btrfs_bg_type_is_journal(u64 flags);
 
 #endif
-- 
2.36.1


  reply	other threads:[~2022-05-15 10:55 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-15 10:54 [PATCH 0/5] btrfs-progs: almost full support for RAID56J profiles Qu Wenruo
2022-05-15 10:54 ` Qu Wenruo [this message]
2022-05-15 10:54 ` [PATCH 2/5] btrfs-progs: mkfs: add support for RAID56J creation Qu Wenruo
2022-05-15 10:54 ` [PATCH 3/5] btrfs-progs: check: take per device reservation into consideration Qu Wenruo
2022-05-15 10:54 ` [PATCH 4/5] btrfs-progs: print-tree: add support for per_dev_reserved of chunk item Qu Wenruo
2022-05-15 10:55 ` [PATCH 5/5] btrfs-progs: check/lowmem: fix path leakage when dev extents are invalid Qu Wenruo
2022-05-15 18:15   ` Nikolay Borisov
2022-05-17 19:03     ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c80eda7127082ae9eefc9ed3c8258fafff12e776.1652611957.git.wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox